diff --git a/CMakeLists.txt b/CMakeLists.txt index b51bc421fdbf60a5d6dbb2ed23e3ebe322aac420..f929b652080eb3dab678d67ad69f2666867ad687 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,13 @@ if (NOT PACKAGE_VERSION) "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") endif() +if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (CMAKE_GENERATOR_TOOLSET STREQUAL "")) + message(WARNING "Visual Studio generators use the x86 host compiler by " + "default, even for 64-bit targets. This can result in linker " + "instability and out of memory errors. To use the 64-bit " + "host compiler, pass -Thost=x64 on the CMake command line.") +endif() + project(LLVM ${cmake_3_0_PROJ_VERSION} ${cmake_3_0_LANGUAGES} @@ -378,6 +385,7 @@ else() option(LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY "Compile with -fmodules-local-submodule-visibility." ON) endif() option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF) +option(LLVM_ENABLE_CXX1Z "Compile with C++1z enabled." OFF) option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF) option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF) option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) @@ -512,6 +520,9 @@ set(LLVM_INSTALL_OCAMLDOC_HTML_DIR "share/doc/llvm/ocaml-html" option (LLVM_BUILD_EXTERNAL_COMPILER_RT "Build compiler-rt as an external project." OFF) +option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO + "Show target and host info when tools are invoked with --version." ON) + # You can configure which libraries from LLVM you want to include in the # shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited # list of LLVM components. All component names handled by llvm-config are valid. @@ -527,6 +538,8 @@ if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB) endif() option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default}) +option(LLVM_DYLIB_SYMBOL_VERSIONING OFF) + option(LLVM_OPTIMIZED_TABLEGEN "Force TableGen to be built with optimization" OFF) if(CMAKE_CROSSCOMPILING OR (LLVM_OPTIMIZED_TABLEGEN AND (LLVM_ENABLE_ASSERTIONS OR CMAKE_CONFIGURATION_TYPES))) set(LLVM_USE_HOST_TOOLS ON) @@ -557,6 +570,10 @@ if (LLVM_BUILD_STATIC) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") endif() +# Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +set(LLVM_TARGET_TRIPLE_ENV CACHE STRING "The name of environment variable to override default target. Disabled by blank.") +mark_as_advanced(LLVM_TARGET_TRIPLE_ENV) + # All options referred to from HandleLLVMOptions have to be specified # BEFORE this include, otherwise options will not be correctly set on # first cmake run @@ -841,7 +858,6 @@ if( LLVM_INCLUDE_UTILS ) add_subdirectory(utils/not) add_subdirectory(utils/llvm-lit) add_subdirectory(utils/yaml-bench) - add_subdirectory(utils/unittest) else() if ( LLVM_INCLUDE_TESTS ) message(FATAL_ERROR "Including tests when not building utils will not work. @@ -885,6 +901,10 @@ if( LLVM_INCLUDE_TESTS ) endif() add_subdirectory(test) add_subdirectory(unittests) + if( LLVM_INCLUDE_UTILS ) + add_subdirectory(utils/unittest) + endif() + if (WIN32) # This utility is used to prevent crashing tests from calling Dr. Watson on # Windows. diff --git a/CREDITS.TXT b/CREDITS.TXT index 15d822a680911f07fe48f33f9e733099e8edc396..20bd553ae2bc6cad93aaee2bdd935d076fa4d6c4 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -265,7 +265,7 @@ D: Release manager (1.7+) N: Sylvestre Ledru E: sylvestre@debian.org W: http://sylvestre.ledru.info/ -W: http://llvm.org/apt/ +W: http://apt.llvm.org/ D: Debian and Ubuntu packaging D: Continuous integration with jenkins diff --git a/bindings/go/llvm/DIBuilderBindings.cpp b/bindings/go/llvm/DIBuilderBindings.cpp index 53e223d67b4e8e9636c7a0be8533a7ccdf73c154..a0792e93d4ba560e7da040d73ddbbfd5df734df2 100644 --- a/bindings/go/llvm/DIBuilderBindings.cpp +++ b/bindings/go/llvm/DIBuilderBindings.cpp @@ -19,8 +19,6 @@ using namespace llvm; -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef) - LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef mref) { Module *m = unwrap(mref); return wrap(new DIBuilder(*m)); diff --git a/bindings/go/llvm/IRBindings.h b/bindings/go/llvm/IRBindings.h index f4f490391d4f1107558ee889af0628a93cb39b66..25a00b13804423ad8ad491184d760b7675777161 100644 --- a/bindings/go/llvm/IRBindings.h +++ b/bindings/go/llvm/IRBindings.h @@ -26,7 +26,6 @@ extern "C" { #endif -typedef struct LLVMOpaqueMetadata *LLVMMetadataRef; struct LLVMDebugLocMetadata{ unsigned Line; unsigned Col; @@ -59,16 +58,6 @@ void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP); #ifdef __cplusplus } -namespace llvm { - -DEFINE_ISA_CONVERSION_FUNCTIONS(Metadata, LLVMMetadataRef) - -inline Metadata **unwrap(LLVMMetadataRef *Vals) { - return reinterpret_cast(Vals); -} - -} - #endif #endif diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go index fe191beb38132ef501e74178d9acf682a218cd90..2220970343071d09b4cc3906db47ad83d2e375a7 100644 --- a/bindings/go/llvm/ir.go +++ b/bindings/go/llvm/ir.go @@ -611,6 +611,12 @@ func (t Type) StructElementTypes() []Type { } // Operations on array, pointer, and vector types (sequence types) +func (t Type) Subtypes() (ret []Type) { + ret = make([]Type, C.LLVMGetNumContainedTypes(t.C)) + C.LLVMGetSubtypes(t.C, llvmTypeRefPtr(&ret[0])) + return +} + func ArrayType(elementType Type, elementCount int) (t Type) { t.C = C.LLVMArrayType(elementType.C, C.unsigned(elementCount)) return diff --git a/bindings/go/llvm/ir_test.go b/bindings/go/llvm/ir_test.go index c823615a4293c8a2e2997b9fdbaf0a444e0081a3..325ee4890f4c16fd0545357fe203856c4ee5d31e 100644 --- a/bindings/go/llvm/ir_test.go +++ b/bindings/go/llvm/ir_test.go @@ -134,3 +134,29 @@ func TestDebugLoc(t *testing.T) { t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C) } } + +func TestSubtypes(t *testing.T) { + cont := NewContext() + defer cont.Dispose() + + int_pointer := PointerType(cont.Int32Type(), 0) + int_inner := int_pointer.Subtypes() + if len(int_inner) != 1 { + t.Errorf("Got size %d, though wanted 1") + } + if int_inner[0] != cont.Int32Type() { + t.Errorf("Expected int32 type") + } + + st_pointer := cont.StructType([]Type{cont.Int32Type(), cont.Int8Type()}, false) + st_inner := st_pointer.Subtypes() + if len(st_inner) != 2 { + t.Errorf("Got size %d, though wanted 2") + } + if st_inner[0] != cont.Int32Type() { + t.Errorf("Expected first struct field to be int32") + } + if st_inner[1] != cont.Int8Type() { + t.Errorf("Expected second struct field to be int8") + } +} diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml index 399fd2d27c201a86108c8f8c711920acc67b2bc5..6e8ca662ef67ea45e4b34b788271644c54ed5d23 100644 --- a/bindings/ocaml/llvm/llvm.ml +++ b/bindings/ocaml/llvm/llvm.ml @@ -459,6 +459,8 @@ external is_packed : lltype -> bool = "llvm_is_packed" external is_opaque : lltype -> bool = "llvm_is_opaque" (*--... Operations on pointer, vector, and array types .....................--*) + +external subtypes : lltype -> lltype array = "llvm_subtypes" external array_type : lltype -> int -> lltype = "llvm_array_type" external pointer_type : lltype -> lltype = "llvm_pointer_type" external qualified_pointer_type : lltype -> int -> lltype diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli index 4068126e2cbf1c0236afdc882eddd1321a0c9b90..c422e78f5d2dd0c770becfede3db37b49504399b 100644 --- a/bindings/ocaml/llvm/llvm.mli +++ b/bindings/ocaml/llvm/llvm.mli @@ -658,6 +658,9 @@ val is_opaque : lltype -> bool (** {7 Operations on pointer, vector, and array types} *) +(** [subtypes ty] returns [ty]'s subtypes *) +val subtypes : lltype -> lltype array + (** [array_type ty n] returns the array type containing [n] elements of type [ty]. See the method [llvm::ArrayType::get]. *) val array_type : lltype -> int -> lltype diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c index af04ea25c8ab403a6eaa5c5bf99364f5660c7998..4b6d1c5072bc50917268ca10f18683b65bec0b27 100644 --- a/bindings/ocaml/llvm/llvm_ocaml.c +++ b/bindings/ocaml/llvm/llvm_ocaml.c @@ -506,6 +506,20 @@ CAMLprim value llvm_is_opaque(LLVMTypeRef StructTy) { /*--... Operations on array, pointer, and vector types .....................--*/ +/* lltype -> lltype array */ +CAMLprim value llvm_subtypes(LLVMTypeRef Ty) { + CAMLparam0(); + CAMLlocal1(Arr); + + unsigned Size = LLVMGetNumContainedTypes(Ty); + + Arr = caml_alloc(Size, 0); + + LLVMGetSubtypes(Ty, (LLVMTypeRef *) Arr); + + CAMLreturn(Arr); +} + /* lltype -> int -> lltype */ CAMLprim LLVMTypeRef llvm_array_type(LLVMTypeRef ElementTy, value Count) { return LLVMArrayType(ElementTy, Int_val(Count)); diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 0331d0fa10abf161af11ffdfbf401681febcf6d8..de8e9bf9a494404a2cdb63ba8327e329cb983076 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -530,16 +530,6 @@ else() message(STATUS "Doxygen disabled.") endif() -if (LLVM_ENABLE_SPHINX) - message(STATUS "Sphinx enabled.") - find_package(Sphinx REQUIRED) - if (LLVM_BUILD_DOCS) - add_custom_target(sphinx ALL) - endif() -else() - message(STATUS "Sphinx disabled.") -endif() - set(LLVM_BINDINGS "") if(WIN32) message(STATUS "Go bindings disabled.") diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 7f7608cff33d3e0c683272656fc4bd9d5fa6592f..2b54bdbf290076117236c2056bc8556467ca6a4f 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -81,8 +81,9 @@ function(add_llvm_symbol_exports target_name export_file) # Gold and BFD ld require a version script rather than a plain list. set(native_export_file "${target_name}.exports") # FIXME: Don't write the "local:" line on OpenBSD. + # in the export file, also add a linker script to version LLVM symbols (form: LLVM_N.M) add_custom_command(OUTPUT ${native_export_file} - COMMAND echo "{" > ${native_export_file} + COMMAND echo "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {" > ${native_export_file} COMMAND grep -q "[[:alnum:]]" ${export_file} && echo " global:" >> ${native_export_file} || : COMMAND sed -e "s/$/;/" -e "s/^/ /" < ${export_file} >> ${native_export_file} COMMAND echo " local: *;" >> ${native_export_file} @@ -1132,6 +1133,19 @@ function(configure_lit_site_cfg input output) set(LIT_SITE_CFG_IN_HEADER "## Autogenerated from ${input}\n## Do not edit!") + # Override config_target_triple (and the env) + if(LLVM_TARGET_TRIPLE_ENV) + # This is expanded into the heading. + string(CONCAT LIT_SITE_CFG_IN_HEADER "${LIT_SITE_CFG_IN_HEADER}\n\n" + "import os\n" + "target_env = \"${LLVM_TARGET_TRIPLE_ENV}\"\n" + "config.target_triple = config.environment[target_env] = os.environ.get(target_env, \"${TARGET_TRIPLE}\")\n" + ) + + # This is expanded to; config.target_triple = ""+config.target_triple+"" + set(TARGET_TRIPLE "\"+config.target_triple+\"") + endif() + configure_file(${input} ${output} @ONLY) endfunction() diff --git a/cmake/modules/AddSphinxTarget.cmake b/cmake/modules/AddSphinxTarget.cmake index cfc7f38e9e7776e81f79993b713584de935fbb1a..4540c5c36c8e25ecdc3120ae29d9dafe388d0cf6 100644 --- a/cmake/modules/AddSphinxTarget.cmake +++ b/cmake/modules/AddSphinxTarget.cmake @@ -1,3 +1,16 @@ + +# Create sphinx target +if (LLVM_ENABLE_SPHINX) + message(STATUS "Sphinx enabled.") + find_package(Sphinx REQUIRED) + if (LLVM_BUILD_DOCS AND NOT TARGET sphinx) + add_custom_target(sphinx ALL) + endif() +else() + message(STATUS "Sphinx disabled.") +endif() + + # Handy function for creating the different Sphinx targets. # # ``builder`` should be one of the supported builders used by diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index dd44476bc996426a7b5a7832e6eade5f75ec81bb..c3325db117882d64457fab640b96542c6cd21fd8 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -17,6 +17,9 @@ else() set(LINKER_IS_LLD_LINK FALSE) endif() +set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO") +string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO) + # Ninja Job Pool support # The following only works with the Ninja generator in CMake >= 3.0. set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING @@ -32,16 +35,19 @@ endif() set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING "Define the maximum number of concurrent link jobs.") -if(LLVM_PARALLEL_LINK_JOBS) - if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja") - message(WARNING "Job pooling is only available with Ninja generators.") - else() +if(CMAKE_MAKE_PROGRAM MATCHES "ninja") + if(NOT LLVM_PARALLEL_LINK_JOBS AND uppercase_LLVM_ENABLE_LTO STREQUAL "THIN") + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set(LLVM_PARALLEL_LINK_JOBS "2") + endif() + if(LLVM_PARALLEL_LINK_JOBS) set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS}) set(CMAKE_JOB_POOL_LINK link_job_pool) endif() +elseif(LLVM_PARALLEL_LINK_JOBS) + message(WARNING "Job pooling is only available with Ninja generators.") endif() - if (LINKER_IS_LLD_LINK) # Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries. Adding # manifests with mt.exe breaks LLD's symbol tables and takes as much time as @@ -95,6 +101,10 @@ else() message(FATAL_ERROR "Unknown value for LLVM_ABI_BREAKING_CHECKS: \"${LLVM_ABI_BREAKING_CHECKS}\"!") endif() +if( LLVM_REVERSE_ITERATION ) + set( LLVM_ENABLE_REVERSE_ITERATION 1 ) +endif() + if(WIN32) set(LLVM_HAVE_LINK_VERSION_SCRIPT 0) if(CYGWIN) @@ -222,6 +232,13 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) endif( LLVM_BUILD_32_BITS ) endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) +# If building on a GNU specific 32-bit system, make sure off_t is 64 bits +# so that off_t can stored offset > 2GB +if( CMAKE_SIZEOF_VOID_P EQUAL 4 ) + add_definitions( -D_LARGEFILE_SOURCE ) + add_definitions( -D_FILE_OFFSET_BITS=64 ) +endif() + if( XCODE ) # For Xcode enable several build settings that correspond to # many warnings that are on by default in Clang but are @@ -368,6 +385,9 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) if (LLVM_ENABLE_CXX1Y) check_cxx_compiler_flag("-std=c++1y" CXX_SUPPORTS_CXX1Y) append_if(CXX_SUPPORTS_CXX1Y "-std=c++1y" CMAKE_CXX_FLAGS) + elseif(LLVM_ENABLE_CXX1Z) + check_cxx_compiler_flag("-std=c++1z" CXX_SUPPORTS_CXX1Z) + append_if(CXX_SUPPORTS_CXX1Z "-std=c++1z" CMAKE_CXX_FLAGS) else() check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11) if (CXX_SUPPORTS_CXX11) @@ -561,6 +581,10 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG) endif (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) +if (LLVM_COMPILER_IS_GCC_COMPATIBLE AND NOT LLVM_ENABLE_WARNINGS) + append("-w" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) +endif() + macro(append_common_sanitizer_flags) if (NOT MSVC) # Append -fno-omit-frame-pointer and turn on debug info to get better @@ -713,8 +737,6 @@ append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PRO CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) -set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO") -string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO) if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK) message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)") endif() diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index da0858e54d441d6ca892dd91d1708486c21a14db..21421e4fdbd27520149f4b87db8b37d8751bad35 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -30,16 +30,26 @@ function(tablegen project ofn) endif() endif() + # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list + # (both the target and the file) to have .inc files rebuilt on + # a tablegen change, as cmake does not propagate file-level dependencies + # of custom targets. See the following ticket for more information: + # https://cmake.org/Bug/view.php?id=15858 + # The dependency on both, the target and the file, produces the same + # dependency twice in the result file when + # ("${${project}_TABLEGEN_TARGET}" STREQUAL "${${project}_TABLEGEN_EXE}") + # but lets us having smaller and cleaner code here. add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp # Generate tablegen output in a temporary file. COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} - ${LLVM_TABLEGEN_FLAGS} + ${LLVM_TABLEGEN_FLAGS} ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp # The file in LLVM_TARGET_DEFINITIONS may be not in the current # directory and local_tds may not contain it, so we must # explicitly list it here: - DEPENDS ${${project}_TABLEGEN_TARGET} ${local_tds} ${global_tds} + DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE} + ${local_tds} ${global_tds} ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} COMMENT "Building ${ofn}..." ) diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake index e92540991a1092d7d15daa496456753857680156..552fe77cdfb6890d43b34ff0fc478e29577951b7 100644 --- a/cmake/modules/VersionFromVCS.cmake +++ b/cmake/modules/VersionFromVCS.cmake @@ -25,57 +25,67 @@ function(add_version_info_from_vcs VERS) set(LLVM_REPOSITORY ${Project_WC_URL} PARENT_SCOPE) endif() endif() - elseif( EXISTS ${SOURCE_DIR}/.git ) - set(result "${result}git") - # Try to get a ref-id + else() find_program(git_executable NAMES git git.exe git.cmd) if( git_executable ) - if( EXISTS ${SOURCE_DIR}/.git/svn ) - # Get the repository URL - execute_process(COMMAND - ${git_executable} svn info - WORKING_DIRECTORY ${SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if( git_result EQUAL 0 ) - string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output}) - if(svn_url) - set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE) + # Run from a subdirectory to force git to print an absoute path. + execute_process(COMMAND ${git_executable} rev-parse --git-dir + WORKING_DIRECTORY ${SOURCE_DIR}/cmake + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_dir + ERROR_QUIET) + if(git_result EQUAL 0) + # Try to get a ref-id + string(STRIP "${git_dir}" git_dir) + set(result "${result}git") + if( EXISTS ${git_dir}/svn ) + # Get the repository URL + execute_process(COMMAND + ${git_executable} svn info + WORKING_DIRECTORY ${SOURCE_DIR} + TIMEOUT 5 + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + ERROR_QUIET) + if( git_result EQUAL 0 ) + string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output}) + if(svn_url) + set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE) + endif() + endif() + + # Get the svn revision number for this git commit if one exists. + execute_process(COMMAND ${git_executable} svn find-rev HEAD + WORKING_DIRECTORY ${SOURCE_DIR} + TIMEOUT 5 + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_head_svn_rev_number + OUTPUT_STRIP_TRAILING_WHITESPACE) + if( git_result EQUAL 0 AND git_output) + set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE) + set(git_svn_rev "-svn-${git_head_svn_rev_number}") + else() + set(git_svn_rev "") endif() endif() - # Get the svn revision number for this git commit if one exists. - execute_process(COMMAND ${git_executable} svn find-rev HEAD + # Get the git ref id + execute_process(COMMAND + ${git_executable} rev-parse --short HEAD WORKING_DIRECTORY ${SOURCE_DIR} TIMEOUT 5 RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_head_svn_rev_number + OUTPUT_VARIABLE git_ref_id OUTPUT_STRIP_TRAILING_WHITESPACE) - if( git_result EQUAL 0 AND git_output) - set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE) - set(git_svn_rev "-svn-${git_head_svn_rev_number}") + + if( git_result EQUAL 0 ) + set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE) + set(result "${result}${git_svn_rev}-${git_ref_id}") else() - set(git_svn_rev "") + set(result "${result}${git_svn_rev}") endif() endif() - - # Get the git ref id - execute_process(COMMAND - ${git_executable} rev-parse --short HEAD - WORKING_DIRECTORY ${SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_ref_id - OUTPUT_STRIP_TRAILING_WHITESPACE) - - if( git_result EQUAL 0 ) - set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE) - set(result "${result}${git_svn_rev}-${git_ref_id}") - else() - set(result "${result}${git_svn_rev}") - endif() endif() endif() set(${VERS} ${result} PARENT_SCOPE) diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 5ff0f207f227b8f42c4a971e519f37e42d4b52a1..caa697ca28cdfb1c7424f5be231e993e9cf43cc3 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -1,110 +1,3441 @@ -============================== -User Guide for AMDGPU Back-end -============================== +============================= +User Guide for AMDGPU Backend +============================= + +.. contents:: + :local: Introduction ============ -The AMDGPU back-end provides ISA code generation for AMD GPUs, starting with -the R600 family up until the current Volcanic Islands (GCN Gen 3). +The AMDGPU backend provides ISA code generation for AMD GPUs, starting with the +R600 family up until the current GCN families. It lives in the +``lib/Target/AMDGPU`` directory. -Refer to `AMDGPU section in Architecture & Platform Information for Compiler Writers `_ -for additional documentation. +LLVM +==== -Conventions -=========== +.. _amdgpu-target-triples: + +Target Triples +-------------- + +Use the ``clang -target ---`` option to +specify the target triple: + + .. table:: AMDGPU Target Triples + :name: amdgpu-target-triples-table + + ============ ======== ========= =========== + Architecture Vendor OS Environment + ============ ======== ========= =========== + r600 amd + amdgcn amd + amdgcn amd amdhsa + amdgcn amd amdhsa opencl + amdgcn amd amdhsa amdgizcl + amdgcn amd amdhsa amdgiz + amdgcn amd amdhsa hcc + ============ ======== ========= =========== + +``r600-amd--`` + Supports AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders executed on + the MESA runtime. + +``amdgcn-amd--`` + Supports AMD GPUs GCN 6 onwards for graphics and compute shaders executed on + the MESA runtime. + +``amdgcn-amd-amdhsa-`` + Supports AMD GCN GPUs GFX6 onwards for compute kernels executed on HSA [HSA]_ + compatible runtimes such as AMD's ROCm [AMD-ROCm]_. + +``amdgcn-amd-amdhsa-opencl`` + Supports AMD GCN GPUs GFX6 onwards for OpenCL compute kernels executed on HSA + [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See + :ref:`amdgpu-opencl`. + +``amdgcn-amd-amdhsa-amdgizcl`` + Same as ``amdgcn-amd-amdhsa-opencl`` except a different address space mapping + is used (see :ref:`amdgpu-address-spaces`). + +``amdgcn-amd-amdhsa-amdgiz`` + Same as ``amdgcn-amd-amdhsa-`` except a different address space mapping is + used (see :ref:`amdgpu-address-spaces`). + +``amdgcn-amd-amdhsa-hcc`` + Supports AMD GCN GPUs GFX6 onwards for AMD HC language compute kernels + executed on HSA [HSA]_ compatible runtimes such as AMD's ROCm [AMD-ROCm]_. See + :ref:`amdgpu-hcc`. + +.. _amdgpu-processors: + +Processors +---------- + +Use the ``clang -mcpu `` option to specify the AMD GPU processor. The +names from both the *Processor* and *Alternative Processor* can be used. + + .. table:: AMDGPU Processors + :name: amdgpu-processors-table + + ========== =========== ============ ===== ======= ================== + Processor Alternative Target dGPU/ Runtime Example + Processor Triple APU Support Products + Architecture + ========== =========== ============ ===== ======= ================== + **R600** [AMD-R6xx]_ + -------------------------------------------------------------------- + r600 r600 dGPU + r630 r600 dGPU + rs880 r600 dGPU + rv670 r600 dGPU + **R700** [AMD-R7xx]_ + -------------------------------------------------------------------- + rv710 r600 dGPU + rv730 r600 dGPU + rv770 r600 dGPU + **Evergreen** [AMD-Evergreen]_ + -------------------------------------------------------------------- + cedar r600 dGPU + redwood r600 dGPU + sumo r600 dGPU + juniper r600 dGPU + cypress r600 dGPU + **Northern Islands** [AMD-Cayman-Trinity]_ + -------------------------------------------------------------------- + barts r600 dGPU + turks r600 dGPU + caicos r600 dGPU + cayman r600 dGPU + **GCN GFX6 (Southern Islands (SI))** [AMD-Souther-Islands]_ + -------------------------------------------------------------------- + gfx600 - SI amdgcn dGPU + - tahiti + gfx601 - pitcairn amdgcn dGPU + - verde + - oland + - hainan + **GCN GFX7 (Sea Islands (CI))** [AMD-Sea-Islands]_ + -------------------------------------------------------------------- + gfx700 - bonaire amdgcn dGPU - Radeon HD 7790 + - Radeon HD 8770 + - R7 260 + - R7 260X + \ - kaveri amdgcn APU - A6-7000 + - A6 Pro-7050B + - A8-7100 + - A8 Pro-7150B + - A10-7300 + - A10 Pro-7350B + - FX-7500 + - A8-7200P + - A10-7400P + - FX-7600P + gfx701 - hawaii amdgcn dGPU ROCm - FirePro W8100 + - FirePro W9100 + - FirePro S9150 + - FirePro S9170 + gfx702 dGPU ROCm - Radeon R9 290 + - Radeon R9 290x + - Radeon R390 + - Radeon R390x + gfx703 - kabini amdgcn APU - E1-2100 + - mullins - E1-2200 + - E1-2500 + - E2-3000 + - E2-3800 + - A4-5000 + - A4-5100 + - A6-5200 + - A4 Pro-3340B + **GCN GFX8 (Volcanic Islands (VI))** [AMD-Volcanic-Islands]_ + -------------------------------------------------------------------- + gfx800 - iceland amdgcn dGPU - FirePro S7150 + - FirePro S7100 + - FirePro W7100 + - Radeon R285 + - Radeon R9 380 + - Radeon R9 385 + - Mobile FirePro + M7170 + gfx801 - carrizo amdgcn APU - A6-8500P + - Pro A6-8500B + - A8-8600P + - Pro A8-8600B + - FX-8800P + - Pro A12-8800B + \ amdgcn APU ROCm - A10-8700P + - Pro A10-8700B + - A10-8780P + \ amdgcn APU - A10-9600P + - A10-9630P + - A12-9700P + - A12-9730P + - FX-9800P + - FX-9830P + \ amdgcn APU - E2-9010 + - A6-9210 + - A9-9410 + gfx802 - tonga amdgcn dGPU ROCm Same as gfx800 + gfx803 - fiji amdgcn dGPU ROCm - Radeon R9 Nano + - Radeon R9 Fury + - Radeon R9 FuryX + - Radeon Pro Duo + - FirePro S9300x2 + \ - polaris10 amdgcn dGPU ROCm - Radeon RX 470 + - Radeon RX 480 + \ - polaris11 amdgcn dGPU ROCm - Radeon RX 460 + gfx804 amdgcn dGPU Same as gfx803 + gfx810 - stoney amdgcn APU + **GCN GFX9** + -------------------------------------------------------------------- + gfx900 amdgcn dGPU - FirePro W9500 + - FirePro S9500 + - FirePro S9500x2 + gfx901 amdgcn dGPU ROCm Same as gfx900 + except XNACK is + enabled + gfx902 amdgcn APU *TBA* + + .. TODO + Add product + names. + gfx903 amdgcn APU Same as gfx902 + except XNACK is + enabled + ========== =========== ============ ===== ======= ================== + +.. _amdgpu-address-spaces: Address Spaces -------------- -The AMDGPU back-end uses the following address space mapping: +The AMDGPU backend uses the following address space mappings. + +The memory space names used in the table, aside from the region memory space, is +from the OpenCL standard. + +LLVM Address Space number is used throughout LLVM (for example, in LLVM IR). + + .. table:: Address Space Mapping + :name: amdgpu-address-space-mapping-table + + ================== ================= ================= ================= ================= + LLVM Address Space Memory Space + ------------------ ----------------------------------------------------------------------- + \ Current Default amdgiz/amdgizcl hcc Future Default + ================== ================= ================= ================= ================= + 0 Private (Scratch) Generic (Flat) Generic (Flat) Generic (Flat) + 1 Global Global Global Global + 2 Constant Constant Constant Region (GDS) + 3 Local (group/LDS) Local (group/LDS) Local (group/LDS) Local (group/LDS) + 4 Generic (Flat) Region (GDS) Region (GDS) Constant + 5 Region (GDS) Private (Scratch) Private (Scratch) Private (Scratch) + ================== ================= ================= ================= ================= + +Current Default + This is the current default address space mapping used for all languages + except hcc. This will shortly be deprecated. + +amdgiz/amdgizcl + This is the current address space mapping used when ``amdgiz`` or ``amdgizcl`` + is specified as the target triple environment value. + +hcc + This is the current address space mapping used when ``hcc`` is specified as + the target triple environment value.This will shortly be deprecated. + +Future Default + This will shortly be the only address space mapping for all languages using + AMDGPU backend. + +.. _amdgpu-memory-scopes: + +Memory Scopes +------------- + +This section provides LLVM memory synchronization scopes supported by the AMDGPU +backend memory model when the target triple OS is ``amdhsa`` (see +:ref:`amdgpu-amdhsa-memory-model` and :ref:`amdgpu-target-triples`). + +The memory model supported is based on the HSA memory model [HSA]_ which is +based in turn on HRF-indirect with scope inclusion [HRF]_. The happens-before +relation is transitive over the synchonizes-with relation independent of scope, +and synchonizes-with allows the memory scope instances to be inclusive (see +table :ref:`amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table`). + +This is different to the OpenCL [OpenCL]_ memory model which does not have scope +inclusion and requires the memory scopes to exactly match. However, this +is conservatively correct for OpenCL. + + .. table:: AMDHSA LLVM Sync Scopes for AMDHSA + :name: amdgpu-amdhsa-llvm-sync-scopes-amdhsa-table + + ================ ========================================================== + LLVM Sync Scope Description + ================ ========================================================== + *none* The default: ``system``. + + Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``. + - ``agent`` and executed by a thread on the same agent. + - ``workgroup`` and executed by a thread in the same + workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``agent`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system`` or ``agent`` and executed by a thread on the + same agent. + - ``workgroup`` and executed by a thread in the same + workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``workgroup`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``, ``agent`` or ``workgroup`` and executed by a + thread in the same workgroup. + - ``wavefront`` and executed by a thread in the same + wavefront. + + ``wavefront`` Synchronizes with, and participates in modification and + seq_cst total orderings with, other operations (except + image operations) for all address spaces (except private, + or generic that accesses private) provided the other + operation's sync scope is: + + - ``system``, ``agent``, ``workgroup`` or ``wavefront`` + and executed by a thread in the same wavefront. + + ``singlethread`` Only synchronizes with, and participates in modification + and seq_cst total orderings with, other operations (except + image operations) running in the same thread for all + address spaces (for example, in signal handlers). + ================ ========================================================== + +AMDGPU Intrinsics +----------------- + +The AMDGPU backend implements the following intrinsics. + +*This section is WIP.* + +.. TODO + List AMDGPU intrinsics + +Code Object +=========== + +The AMDGPU backend generates a standard ELF [ELF]_ relocatable code object that +can be linked by ``lld`` to produce a standard ELF shared code object which can +be loaded and executed on an AMDGPU target. + +Header +------ + +The AMDGPU backend uses the following ELF header: + + .. table:: AMDGPU ELF Header + :name: amdgpu-elf-header-table + + ========================== ========================= + Field Value + ========================== ========================= + ``e_ident[EI_CLASS]`` ``ELFCLASS64`` + ``e_ident[EI_DATA]`` ``ELFDATA2LSB`` + ``e_ident[EI_OSABI]`` ``ELFOSABI_AMDGPU_HSA`` + ``e_ident[EI_ABIVERSION]`` ``ELFABIVERSION_AMDGPU_HSA`` + ``e_type`` ``ET_REL`` or ``ET_DYN`` + ``e_machine`` ``EM_AMDGPU`` + ``e_entry`` 0 + ``e_flags`` 0 + ========================== ========================= + +.. + + .. table:: AMDGPU ELF Header Enumeration Values + :name: amdgpu-elf-header-enumeration-values-table + + ============================ ===== + Name Value + ============================ ===== + ``EM_AMDGPU`` 224 + ``ELFOSABI_AMDGPU_HSA`` 64 + ``ELFABIVERSION_AMDGPU_HSA`` 1 + ============================ ===== + +``e_ident[EI_CLASS]`` + The ELF class is always ``ELFCLASS64``. The AMDGPU backend only supports 64 bit + applications. + +``e_ident[EI_DATA]`` + All AMDGPU targets use ELFDATA2LSB for little-endian byte ordering. + +``e_ident[EI_OSABI]`` + The AMD GPU architecture specific OS ABI of ``ELFOSABI_AMDGPU_HSA`` is used to + specify that the code object conforms to the AMD HSA runtime ABI [HSA]_. + +``e_ident[EI_ABIVERSION]`` + The AMD GPU architecture specific OS ABI version of + ``ELFABIVERSION_AMDGPU_HSA`` is used to specify the version of AMD HSA runtime + ABI to which the code object conforms. + +``e_type`` + Can be one of the following values: + + + ``ET_REL`` + The type produced by the AMD GPU backend compiler as it is relocatable code + object. + + ``ET_DYN`` + The type produced by the linker as it is a shared code object. + + The AMD HSA runtime loader requires a ``ET_DYN`` code object. + +``e_machine`` + The value ``EM_AMDGPU`` is used for the machine for all members of the AMD GPU + architecture family. The specific member is specified in the + ``NT_AMD_AMDGPU_ISA`` entry in the ``.note`` section (see + :ref:`amdgpu-note-records`). + +``e_entry`` + The entry point is 0 as the entry points for individual kernels must be + selected in order to invoke them through AQL packets. + +``e_flags`` + The value is 0 as no flags are used. + +Sections +-------- + +An AMDGPU target ELF code object has the standard ELF sections which include: + + .. table:: AMDGPU ELF Sections + :name: amdgpu-elf-sections-table + + ================== ================ ================================= + Name Type Attributes + ================== ================ ================================= + ``.bss`` ``SHT_NOBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.data`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.debug_``\ *\** ``SHT_PROGBITS`` *none* + ``.dynamic`` ``SHT_DYNAMIC`` ``SHF_ALLOC`` + ``.dynstr`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.dynsym`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.got`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.hash`` ``SHT_HASH`` ``SHF_ALLOC`` + ``.note`` ``SHT_NOTE`` *none* + ``.rela``\ *name* ``SHT_RELA`` *none* + ``.rela.dyn`` ``SHT_RELA`` *none* + ``.rodata`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.shstrtab`` ``SHT_STRTAB`` *none* + ``.strtab`` ``SHT_STRTAB`` *none* + ``.symtab`` ``SHT_SYMTAB`` *none* + ``.text`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_EXECINSTR`` + ================== ================ ================================= + +These sections have their standard meanings (see [ELF]_) and are only generated +if needed. + +``.debug``\ *\** + The standard DWARF sections. See :ref:`amdgpu-dwarf` for information on the + DWARF produced by the AMDGPU backend. + +``.dynamic``, ``.dynstr``, ``.dynstr``, ``.hash`` + The standard sections used by a dynamic loader. + +``.note`` + See :ref:`amdgpu-note-records` for the note records supported by the AMDGPU + backend. + +``.rela``\ *name*, ``.rela.dyn`` + For relocatable code objects, *name* is the name of the section that the + relocation records apply. For example, ``.rela.text`` is the section name for + relocation records associated with the ``.text`` section. + + For linked shared code objects, ``.rela.dyn`` contains all the relocation + records from each of the relocatable code object's ``.rela``\ *name* sections. + + See :ref:`amdgpu-relocation-records` for the relocation records supported by + the AMDGPU backend. + +``.text`` + The executable machine code for the kernels and functions they call. Generated + as position independent code. See :ref:`amdgpu-code-conventions` for + information on conventions used in the isa generation. + +.. _amdgpu-note-records: + +Note Records +------------ + +As required by ``ELFCLASS64``, minimal zero byte padding must be generated after +the ``name`` field to ensure the ``desc`` field is 4 byte aligned. In addition, +minimal zero byte padding must be generated to ensure the ``desc`` field size is +a multiple of 4 bytes. The ``sh_addralign`` field of the ``.note`` section must +be at least 4 to indicate at least 8 byte alignment. + +The AMDGPU backend code object uses the following ELF note records in the +``.note`` section. The *Description* column specifies the layout of the note +record’s ``desc`` field. All fields are consecutive bytes. Note records with +variable size strings have a corresponding ``*_size`` field that specifies the +number of bytes, including the terminating null character, in the string. The +string(s) come immediately after the preceding fields. + +Additional note records can be present. + + .. table:: AMDGPU ELF Note Records + :name: amdgpu-elf-note-records-table - ================== =================== ============== - LLVM Address Space DWARF Address Space Memory Space - ================== =================== ============== - 0 1 Private - 1 N/A Global - 2 N/A Constant - 3 2 Local - 4 N/A Generic (Flat) - 5 N/A Region - ================== =================== ============== + ===== ========================== ========================================== + Name Type Description + ===== ========================== ========================================== + "AMD" ``NT_AMD_AMDGPU_METADATA`` + "AMD" ``NT_AMD_AMDGPU_ISA`` + ===== ========================== ========================================== -The terminology in the table, aside from the region memory space, is from the -OpenCL standard. +.. -LLVM Address Space is used throughout LLVM (for example, in LLVM IR). DWARF -Address Space is emitted in DWARF, and is used by tools, such as debugger, -profiler and others. + .. table:: AMDGPU ELF Note Record Enumeration Values + :name: amdgpu-elf-note-record-enumeration-values-table + + ============================= ===== + Name Value + ============================= ===== + *reserved* 0-9 + ``NT_AMD_AMDGPU_METADATA`` 10 + ``NT_AMD_AMDGPU_ISA`` 11 + ============================= ===== + +``NT_AMD_AMDGPU_ISA`` + Specifies the instruction set architecture used by the machine code contained + in the code object. + + This note record is required for code objects containing machine code for + processors matching the ``amdgcn`` architecture in table + :ref:`amdgpu-processors`. + + The null terminated string has the following syntax: + + *architecture*\ ``-``\ *vendor*\ ``-``\ *os*\ ``-``\ *environment*\ ``-``\ *processor* + + where: + + *architecture* + The architecture from table :ref:`amdgpu-target-triples-table`. + + This is always ``amdgcn`` when the target triple OS is ``amdhsa`` (see + :ref:`amdgpu-target-triples`). + + *vendor* + The vendor from table :ref:`amdgpu-target-triples-table`. + + For the AMDGPU backend this is always ``amd``. + + *os* + The OS from table :ref:`amdgpu-target-triples-table`. + + *environment* + An environment from table :ref:`amdgpu-target-triples-table`, or blank if + the environment has no affect on the execution of the code object. + + For the AMDGPU backend this is currently always blank. + *processor* + The processor from table :ref:`amdgpu-processors-table`. + + For example: + + ``amdgcn-amd-amdhsa--gfx901`` + +``NT_AMD_AMDGPU_METADATA`` + Specifies extensible metadata associated with the code object. See + :ref:`amdgpu-code-object-metadata` for the syntax of the code object metadata + string. + + This note record is required and must contain the minimum information + necessary to support the ROCM kernel queries. For example, the segment sizes + needed in a dispatch packet. In addition, a high level language runtime may + require other information to be included. For example, the AMD OpenCL runtime + records kernel argument information. + + .. TODO + Is the string null terminated? It probably should not if YAML allows it to + contain null characters, otherwise it should be. + +.. _amdgpu-code-object-metadata: + +Code Object Metadata +-------------------- + +The code object metadata is specified by the ``NT_AMD_AMDHSA_METADATA`` note +record (see :ref:`amdgpu-note-records`). + +The metadata is specified as a YAML formated string (see [YAML]_ and +:doc:`YamlIO`). + +The metadata is represented as a single YAML document comprised of the mapping +defined in table :ref:`amdgpu-amdhsa-code-object-metadata-mapping-table` and +referenced tables. + +For boolean values, the string values of ``false`` and ``true`` are used for +false and true respectively. + +Additional information can be added to the mappings. To avoid conflicts, any +non-AMD key names should be prefixed by "*vendor-name*.". + + .. table:: AMDHSA Code Object Metadata Mapping + :name: amdgpu-amdhsa-code-object-metadata-mapping-table + + ========== ============== ========= ======================================= + String Key Value Type Required? Description + ========== ============== ========= ======================================= + "Version" sequence of Required - The first integer is the major + 2 integers version. Currently 1. + - The second integer is the minor + version. Currently 0. + "Printf" sequence of Each string is encoded information + strings about a printf function call. The + encoded information is organized as + fields separated by colon (':'): + + ``ID:N:S[0]:S[1]:...:S[N-1]:FormatString`` + + where: + + ``ID`` + A 32 bit integer as a unique id for + each printf function call + + ``N`` + A 32 bit integer equal to the number + of arguments of printf function call + minus 1 + + ``S[i]`` (where i = 0, 1, ... , N-1) + 32 bit integers for the size in bytes + of the i-th FormatString argument of + the printf function call + + FormatString + The format string passed to the + printf function call. + "Kernels" sequence of Required Sequence of the mappings for each + mapping kernel in the code object. See + :ref:`amdgpu-amdhsa-code-object-kernel-metadata-mapping-table` + for the definition of the mapping. + ========== ============== ========= ======================================= + +.. + + .. table:: AMDHSA Code Object Kernel Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-metadata-mapping-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Required Source name of the kernel. + "SymbolName" string Required Name of the kernel + descriptor ELF symbol. + "Language" string Source language of the kernel. + Values include: + + - "OpenCL C" + - "OpenCL C++" + - "HCC" + - "OpenMP" + + "LanguageVersion" sequence of - The first integer is the major + 2 integers version. + - The second integer is the + minor version. + "Attrs" mapping Mapping of kernel attributes. + See + :ref:`amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table` + for the mapping definition. + "Arguments" sequence of Sequence of mappings of the + mapping kernel arguments. See + :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table` + for the definition of the mapping. + "CodeProps" mapping Mapping of properties related to + the kernel code. See + :ref:`amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table` + for the mapping definition. + "DebugProps" mapping Mapping of properties related to + the kernel debugging. See + :ref:`amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table` + for the mapping definition. + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object Kernel Attribute Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table + + =================== ============== ========= ============================== + String Key Value Type Required? Description + =================== ============== ========= ============================== + "ReqdWorkGroupSize" sequence of The dispatch work-group size + 3 integers X, Y, Z must correspond to the + specified values. + + Corresponds to the OpenCL + ``reqd_work_group_size`` + attribute. + "WorkGroupSizeHint" sequence of The dispatch work-group size + 3 integers X, Y, Z is likely to be the + specified values. + + Corresponds to the OpenCL + ``work_group_size_hint`` + attribute. + "VecTypeHint" string The name of a scalar or vector + type. + + Corresponds to the OpenCL + ``vec_type_hint`` attribute. + =================== ============== ========= ============================== + +.. + + .. table:: AMDHSA Code Object Kernel Argument Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Kernel argument name. + "TypeName" string Kernel argument type name. + "Size" integer Required Kernel argument size in bytes. + "Align" integer Required Kernel argument alignment in + bytes. Must be a power of two. + "ValueKind" string Required Kernel argument kind that + specifies how to set up the + corresponding argument. + Values include: + + "ByValue" + The argument is copied + directly into the kernarg. + + "GlobalBuffer" + A global address space pointer + to the buffer data is passed + in the kernarg. + + "DynamicSharedPointer" + A group address space pointer + to dynamically allocated LDS + is passed in the kernarg. + + "Sampler" + A global address space + pointer to a S# is passed in + the kernarg. + + "Image" + A global address space + pointer to a T# is passed in + the kernarg. + + "Pipe" + A global address space pointer + to an OpenCL pipe is passed in + the kernarg. + + "Queue" + A global address space pointer + to an OpenCL device enqueue + queue is passed in the + kernarg. + + "HiddenGlobalOffsetX" + The OpenCL grid dispatch + global offset for the X + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetY" + The OpenCL grid dispatch + global offset for the Y + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetZ" + The OpenCL grid dispatch + global offset for the Z + dimension is passed in the + kernarg. + + "HiddenNone" + An argument that is not used + by the kernel. Space needs to + be left for it, but it does + not need to be set up. + + "HiddenPrintfBuffer" + A global address space pointer + to the runtime printf buffer + is passed in kernarg. + + "HiddenDefaultQueue" + A global address space pointer + to the OpenCL device enqueue + queue that should be used by + the kernel by default is + passed in the kernarg. + + "HiddenCompletionAction" + *TBD* + + .. TODO + Add description. + + "ValueType" string Required Kernel argument value type. Only + present if "ValueKind" is + "ByValue". For vector data + types, the value is for the + element type. Values include: + + - "Struct" + - "I8" + - "U8" + - "I16" + - "U16" + - "F16" + - "I32" + - "U32" + - "F32" + - "I64" + - "U64" + - "F64" + + .. TODO + How can it be determined if a + vector type, and what size + vector? + "PointeeAlign" integer Alignment in bytes of pointee + type for pointer type kernel + argument. Must be a power + of 2. Only present if + "ValueKind" is + "DynamicSharedPointer". + "AddrSpaceQual" string Kernel argument address space + qualifier. Only present if + "ValueKind" is "GlobalBuffer" or + "DynamicSharedPointer". Values + are: + + - "Private" + - "Global" + - "Constant" + - "Local" + - "Generic" + - "Region" + + .. TODO + Is GlobalBuffer only Global + or Constant? Is + DynamicSharedPointer always + Local? Can HCC allow Generic? + How can Private or Region + ever happen? + "AccQual" string Kernel argument access + qualifier. Only present if + "ValueKind" is "Image" or + "Pipe". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + .. TODO + Does this apply to + GlobalBuffer? + "ActualAcc" string The actual memory accesses + performed by the kernel on the + kernel argument. Only present if + "ValueKind" is "GlobalBuffer", + "Image", or "Pipe". This may be + more restrictive than indicated + by "AccQual" to reflect what the + kernel actual does. If not + present then the runtime must + assume what is implied by + "AccQual" and "IsConst". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + "IsConst" boolean Indicates if the kernel argument + is const qualified. Only present + if "ValueKind" is + "GlobalBuffer". + + "IsRestrict" boolean Indicates if the kernel argument + is restrict qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsVolatile" boolean Indicates if the kernel argument + is volatile qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsPipe" boolean Indicates if the kernel argument + is pipe qualified. Only present + if "ValueKind" is "Pipe". + + .. TODO + Can GlobalBuffer be pipe + qualified? + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object Kernel Code Properties Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table + + ============================ ============== ========= ===================== + String Key Value Type Required? Description + ============================ ============== ========= ===================== + "KernargSegmentSize" integer Required The size in bytes of + the kernarg segment + that holds the values + of the arguments to + the kernel. + "GroupSegmentFixedSize" integer Required The amount of group + segment memory + required by a + work-group in + bytes. This does not + include any + dynamically allocated + group segment memory + that may be added + when the kernel is + dispatched. + "PrivateSegmentFixedSize" integer Required The amount of fixed + private address space + memory required for a + work-item in + bytes. If + IsDynamicCallstack + is 1 then additional + space must be added + to this value for the + call stack. + "KernargSegmentAlign" integer Required The maximum byte + alignment of + arguments in the + kernarg segment. Must + be a power of 2. + "WavefrontSize" integer Required Wavefront size. Must + be a power of 2. + "NumSGPRs" integer Number of scalar + registers used by a + wavefront for + GFX6-GFX9. This + includes the special + SGPRs for VCC, Flat + Scratch (GFX7-GFX9) + and XNACK (for + GFX8-GFX9). It does + not include the 16 + SGPR added if a trap + handler is + enabled. It is not + rounded up to the + allocation + granularity. + "NumVGPRs" integer Number of vector + registers used by + each work-item for + GFX6-GFX9 + "MaxFlatWorkgroupSize" integer Maximum flat + work-group size + supported by the + kernel in work-items. + "IsDynamicCallStack" boolean Indicates if the + generated machine + code is using a + dynamically sized + call stack. + "IsXNACKEnabled" boolean Indicates if the + generated machine + code is capable of + supporting XNACK. + ============================ ============== ========= ===================== + +.. + + .. table:: AMDHSA Code Object Kernel Debug Properties Metadata Mapping + :name: amdgpu-amdhsa-code-object-kernel-debug-properties-metadata-mapping-table + + =================================== ============== ========= ============== + String Key Value Type Required? Description + =================================== ============== ========= ============== + "DebuggerABIVersion" string + "ReservedNumVGPRs" integer + "ReservedFirstVGPR" integer + "PrivateSegmentBufferSGPR" integer + "WavefrontPrivateSegmentOffsetSGPR" integer + =================================== ============== ========= ============== + +.. TODO + Plan to remove the debug properties metadata. + +.. _amdgpu-symbols: + +Symbols +------- + +Symbols include the following: + + .. table:: AMDGPU ELF Symbols + :name: amdgpu-elf-symbols-table + + ===================== ============== ============= ================== + Name Type Section Description + ===================== ============== ============= ================== + *link-name* ``STT_OBJECT`` - ``.data`` Global variable + - ``.rodata`` + - ``.bss`` + *link-name*\ ``@kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor + *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point + ===================== ============== ============= ================== + +Global variable + Global variables both used and defined by the compilation unit. + + If the symbol is defined in the compilation unit then it is allocated in the + appropriate section according to if it has initialized data or is readonly. + + If the symbol is external then its section is ``STN_UNDEF`` and the loader + will resolve relocations using the defintion provided by another code object + or explicitly defined by the runtime. + + All global symbols, whether defined in the compilation unit or external, are + accessed by the machine code indirectly throught a GOT table entry. This + allows them to be preemptable. The GOT table is only supported when the target + triple OS is ``amdhsa`` (see :ref:`amdgpu-target-triples`). + + .. TODO + Add description of linked shared object symbols. Seems undefined symbols + are marked as STT_NOTYPE. + +Kernel descriptor + Every HSA kernel has an associated kernel descriptor. It is the address of the + kernel descriptor that is used in the AQL dispatch packet used to invoke the + kernel, not the kernel entry point. The layout of the HSA kernel descriptor is + defined in :ref:`amdgpu-amdhsa-kernel-descriptor`. + +Kernel entry point + Every HSA kernel also has a symbol for its machine code entry point. + +.. _amdgpu-relocation-records: + +Relocation Records +------------------ + +AMDGPU backend generates ``Elf64_Rela`` relocation records. Supported +relocatable fields are: + +``word32`` + This specifies a 32-bit field occupying 4 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMD GPU architecture. + +``word64`` + This specifies a 64-bit field occupying 8 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMD GPU architecture. + +Following notations are used for specifying relocation calculations: + +**A** + Represents the addend used to compute the value of the relocatable field. + +**G** + Represents the offset into the global offset table at which the relocation + entry’s symbol will reside during execution. + +**GOT** + Represents the address of the global offset table. + +**P** + Represents the place (section offset for ``et_rel`` or address for ``et_dyn``) + of the storage unit being relocated (computed using ``r_offset``). + +**S** + Represents the value of the symbol whose index resides in the relocation + entry. + +The following relocation types are supported: + + .. table:: AMDGPU ELF Relocation Records + :name: amdgpu-elf-relocation-records-table + + ========================== ===== ========== ============================== + Relocation Type Value Field Calculation + ========================== ===== ========== ============================== + ``R_AMDGPU_NONE`` 0 *none* *none* + ``R_AMDGPU_ABS32_LO`` 1 ``word32`` (S + A) & 0xFFFFFFFF + ``R_AMDGPU_ABS32_HI`` 2 ``word32`` (S + A) >> 32 + ``R_AMDGPU_ABS64`` 3 ``word64`` S + A + ``R_AMDGPU_REL32`` 4 ``word32`` S + A - P + ``R_AMDGPU_REL64`` 5 ``word64`` S + A - P + ``R_AMDGPU_ABS32`` 6 ``word32`` S + A + ``R_AMDGPU_GOTPCREL`` 7 ``word32`` G + GOT + A - P + ``R_AMDGPU_GOTPCREL32_LO`` 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF + ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 + ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF + ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 + ========================== ===== ========== ============================== + +.. _amdgpu-dwarf: + +DWARF +----- + +Standard DWARF [DWARF]_ Version 2 sections can be generated. These contain +information that maps the code object executable code and data to the source +language constructs. It can be used by tools such as debuggers and profilers. + +Address Space Mapping +~~~~~~~~~~~~~~~~~~~~~ + +The following address space mapping is used: + + .. table:: AMDGPU DWARF Address Space Mapping + :name: amdgpu-dwarf-address-space-mapping-table + + =================== ================= + DWARF Address Space Memory Space + =================== ================= + 1 Private (Scratch) + 2 Local (group/LDS) + *omitted* Global + *omitted* Constant + *omitted* Generic (Flat) + *not supported* Region (GDS) + =================== ================= + +See :ref:`amdgpu-address-spaces` for infomration on the memory space terminology +used in the table. + +An ``address_class`` attribute is generated on pointer type DIEs to specify the +DWARF address space of the value of the pointer when it is in the *private* or +*local* address space. Otherwise the attribute is omitted. + +An ``XDEREF`` operation is generated in location list expressions for variables +that are allocated in the *private* and *local* address space. Otherwise no +``XDREF`` is omitted. + +Register Mapping +~~~~~~~~~~~~~~~~ + +*This section is WIP.* + +.. TODO + Define DWARF register enumeration. + + If want to present a wavefront state then should expose vector registers as + 64 wide (rather than per work-item view that LLVM uses). Either as seperate + registers, or a 64x4 byte single register. In either case use a new LANE op + (akin to XDREF) to select the current lane usage in a location + expression. This would also allow scalar register spilling to vector register + lanes to be expressed (currently no debug information is being generated for + spilling). If choose a wide single register approach then use LANE in + conjunction with PIECE operation to select the dword part of the register for + the current lane. If the separate register approach then use LANE to select + the register. + +Source Text +~~~~~~~~~~~ + +*This section is WIP.* + +.. TODO + DWARF extension to include runtime generated source text. + +.. _amdgpu-code-conventions: + +Code Conventions +================ + +AMDHSA +------ + +This section provides code conventions used when the target triple OS is +``amdhsa`` (see :ref:`amdgpu-target-triples`). + +Kernel Dispatch +~~~~~~~~~~~~~~~ + +The HSA architected queuing language (AQL) defines a user space memory interface +that can be used to control the dispatch of kernels, in an agent independent +way. An agent can have zero or more AQL queues created for it using the ROCm +runtime, in which AQL packets (all of which are 64 bytes) can be placed. See the +*HSA Platform System Architecture Specification* [HSA]_ for the AQL queue +mechanics and packet layouts. + +The packet processor of a kernel agent is responsible for detecting and +dispatching HSA kernels from the AQL queues associated with it. For AMD GPUs the +packet processor is implemented by the hardware command processor (CP), +asynchronous dispatch controller (ADC) and shader processor input controller +(SPI). + +The ROCm runtime can be used to allocate an AQL queue object. It uses the kernel +mode driver to initialize and register the AQL queue with CP. + +To dispatch a kernel the following actions are performed. This can occur in the +CPU host program, or from an HSA kernel executing on a GPU. + +1. A pointer to an AQL queue for the kernel agent on which the kernel is to be + executed is obtained. +2. A pointer to the kernel descriptor (see + :ref:`amdgpu-amdhsa-kernel-descriptor`) of the kernel to execute is + obtained. It must be for a kernel that is contained in a code object that that + was loaded by the ROCm runtime on the kernel agent with which the AQL queue is + associated. +3. Space is allocated for the kernel arguments using the ROCm runtime allocator + for a memory region with the kernarg property for the kernel agent that will + execute the kernel. It must be at least 16 byte aligned. +4. Kernel argument values are assigned to the kernel argument memory + allocation. The layout is defined in the *HSA Programmer’s Language Reference* + [HSA]_. For AMDGPU the kernel execution directly accesses the kernel argument + memory in the same way constant memory is accessed. (Note that the HSA + specification allows an implementation to copy the kernel argument contents to + another location that is accessed by the kernel.) +5. An AQL kernel dispatch packet is created on the AQL queue. The ROCm runtime + api uses 64 bit atomic operations to reserve space in the AQL queue for the + packet. The packet must be set up, and the final write must use an atomic + store release to set the packet kind to ensure the packet contents are + visible to the kernel agent. AQL defines a doorbell signal mechanism to + notify the kernel agent that the AQL queue has been updated. These rules, and + the layout of the AQL queue and kernel dispatch packet is defined in the *HSA + System Architecture Specification* [HSA]_. +6. A kernel dispatch packet includes information about the actual dispatch, + such as grid and work-group size, together with information from the code + object about the kernel, such as segment sizes. The ROCm runtime queries on + the kernel symbol can be used to obtain the code object values which are + recorded in the :ref:`amdgpu-code-object-metadata`. +7. CP executes micro-code and is responsible for detecting and setting up the + GPU to execute the wavefronts of a kernel dispatch. +8. CP ensures that when the a wavefront starts executing the kernel machine + code, the scalar general purpose registers (SGPR) and vector general purpose + registers (VGPR) are set up as required by the machine code. The required + setup is defined in the :ref:`amdgpu-amdhsa-kernel-descriptor`. The initial + register state is defined in + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`. +9. The prolog of the kernel machine code (see + :ref:`amdgpu-amdhsa-kernel-prolog`) sets up the machine state as necessary + before continuing executing the machine code that corresponds to the kernel. +10. When the kernel dispatch has completed execution, CP signals the completion + signal specified in the kernel dispatch packet if not 0. + +.. _amdgpu-amdhsa-memory-spaces: + +Memory Spaces +~~~~~~~~~~~~~ + +The memory space properties are: + + .. table:: AMDHSA Memory Spaces + :name: amdgpu-amdhsa-memory-spaces-table + + ================= =========== ======== ======= ================== + Memory Space Name HSA Segment Hardware Address NULL Value + Name Name Size + ================= =========== ======== ======= ================== + Private private scratch 32 0x00000000 + Local group LDS 32 0xFFFFFFFF + Global global global 64 0x0000000000000000 + Constant constant *same as 64 0x0000000000000000 + global* + Generic flat flat 64 0x0000000000000000 + Region N/A GDS 32 *not implemented + for AMDHSA* + ================= =========== ======== ======= ================== + +The global and constant memory spaces both use global virtual addresses, which +are the same virtual address space used by the CPU. However, some virtual +addresses may only be accessible to the CPU, some only accessible by the GPU, +and some by both. + +Using the constant memory space indicates that the data will not change during +the execution of the kernel. This allows scalar read instructions to be +used. The vector and scalar L1 caches are invalidated of volatile data before +each kernel dispatch execution to allow constant memory to change values between +kernel dispatches. + +The local memory space uses the hardware Local Data Store (LDS) which is +automatically allocated when the hardware creates work-groups of wavefronts, and +freed when all the wavefronts of a work-group have terminated. The data store +(DS) instructions can be used to access it. + +The private memory space uses the hardware scratch memory support. If the kernel +uses scratch, then the hardware allocates memory that is accessed using +wavefront lane dword (4 byte) interleaving. The mapping used from private +address to physical address is: + + ``wavefront-scratch-base + + (private-address * wavefront-size * 4) + + (wavefront-lane-id * 4)`` + +There are different ways that the wavefront scratch base address is determined +by a wavefront (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). This +memory can be accessed in an interleaved manner using buffer instruction with +the scratch buffer descriptor and per wave scratch offset, by the scratch +instructions, or by flat instructions. If each lane of a wavefront accesses the +same private address, the interleaving results in adjacent dwords being accessed +and hence requires fewer cache lines to be fetched. Multi-dword access is not +supported except by flat and scratch instructions in GFX9. + +The generic address space uses the hardware flat address support available in +GFX7-GFX9. This uses two fixed ranges of virtual addresses (the private and +local appertures), that are outside the range of addressible global memory, to +map from a flat address to a private or local address. + +FLAT instructions can take a flat address and access global, private (scratch) +and group (LDS) memory depending in if the address is within one of the +apperture ranges. Flat access to scratch requires hardware aperture setup and +setup in the kernel prologue (see :ref:`amdgpu-amdhsa-flat-scratch`). Flat +access to LDS requires hardware aperture setup and M0 (GFX7-GFX8) register setup +(see :ref:`amdgpu-amdhsa-m0`). + +To convert between a segment address and a flat address the base address of the +appertures address can be used. For GFX7-GFX8 these are available in the +:ref:`amdgpu-amdhsa-hsa-aql-queue` the address of which can be obtained with +Queue Ptr SGPR (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). For +GFX9 the appature base addresses are directly available as inline constant +registers ``SRC_SHARED_BASE/LIMIT`` and ``SRC_PRIVATE_BASE/LIMIT``. In 64 bit +address mode the apperture sizes are 2^32 bytes and the base is aligned to 2^32 +which makes it easier to convert from flat to segment or segment to flat. + +HSA Image and Samplers +~~~~~~~~~~~~~~~~~~~~~~ + +Image and sample handles created by the ROCm runtime are 64 bit addresses of a +hardware 32 byte V# and 48 byte S# object respectively. In order to support the +HSA ``query_sampler`` operations two extra dwords are used to store the HSA BRIG +enumeration values for the queries that are not trivially deducible from the S# +representation. + +HSA Signals +~~~~~~~~~~~ + +Signal handles created by the ROCm runtime are 64 bit addresses of a structure +allocated in memory accessible from both the CPU and GPU. The structure is +defined by the ROCm runtime and subject to change between releases (see +[AMD-ROCm-github]_). + +.. _amdgpu-amdhsa-hsa-aql-queue: + +HSA AQL Queue +~~~~~~~~~~~~~ + +The AQL queue structure is defined by the ROCm runtime and subject to change +between releases (see [AMD-ROCm-github]_). For some processors it contains +fields needed to implement certain language features such as the flat address +aperture bases. It also contains fields used by CP such as managing the +allocation of scratch memory. + +.. _amdgpu-amdhsa-kernel-descriptor: + +Kernel Descriptor +~~~~~~~~~~~~~~~~~ + +A kernel descriptor consists of the information needed by CP to initiate the +execution of a kernel, including the entry point address of the machine code +that implements the kernel. + +Kernel Descriptor for GFX6-GFX9 ++++++++++++++++++++++++++++++++ + +CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. + + .. table:: Kernel Descriptor for GFX6-GFX9 + :name: amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table + + ======= ======= =============================== =========================== + Bits Size Field Name Description + ======= ======= =============================== =========================== + 31:0 4 bytes group_segment_fixed_size The amount of fixed local + address space memory + required for a work-group + in bytes. This does not + include any dynamically + allocated local address + space memory that may be + added when the kernel is + dispatched. + 63:32 4 bytes private_segment_fixed_size The amount of fixed + private address space + memory required for a + work-item in bytes. If + is_dynamic_callstack is 1 + then additional space must + be added to this value for + the call stack. + 95:64 4 bytes max_flat_workgroup_size Maximum flat work-group + size supported by the + kernel in work-items. + 96 1 bit is_dynamic_call_stack Indicates if the generated + machine code is using a + dynamically sized call + stack. + 97 1 bit is_xnack_enabled Indicates if the generated + machine code is capable of + suppoting XNACK. + 127:98 30 bits Reserved. Must be 0. + 191:128 8 bytes kernel_code_entry_byte_offset Byte offset (possibly + negative) from base + address of kernel + descriptor to kernel's + entry point instruction + which must be 256 byte + aligned. + 383:192 24 Reserved. Must be 0. + bytes + 415:384 4 bytes compute_pgm_rsrc1 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC1`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table`. + 447:416 4 bytes compute_pgm_rsrc2 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC2`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table`. + 448 1 bit enable_sgpr_private_segment Enable the setup of the + _buffer SGPR user data registers + (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + The total number of SGPR + user data registers + requested must not exceed + 16 and match value in + ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``. + Any requests beyond 16 + will be ignored. + 449 1 bit enable_sgpr_dispatch_ptr *see above* + 450 1 bit enable_sgpr_queue_ptr *see above* + 451 1 bit enable_sgpr_kernarg_segment_ptr *see above* + 452 1 bit enable_sgpr_dispatch_id *see above* + 453 1 bit enable_sgpr_flat_scratch_init *see above* + 454 1 bit enable_sgpr_private_segment *see above* + _size + 455 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_X should always be 0. + 456 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_Y should always be 0. + 457 1 bit enable_sgpr_grid_workgroup Not implemented in CP and + _count_Z should always be 0. + 463:458 6 bits Reserved. Must be 0. + 511:464 4 Reserved. Must be 0. + bytes + 512 **Total size 64 bytes.** + ======= =================================================================== + +.. + + .. table:: compute_pgm_rsrc1 for GFX6-GFX9 + :name: amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 5:0 6 bits granulated_workitem_vgpr_count Number of vector registers + used by each work-item, + granularity is device + specific: + + GFX6-9 + roundup((max-vgpg + 1) + / 4) - 1 + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.VGPRS``. + 9:6 4 bits granulated_wavefront_sgpr_count Number of scalar registers + used by a wavefront, + granularity is device + specific: + + GFX6-8 + roundup((max-sgpg + 1) + / 8) - 1 + GFX9 + roundup((max-sgpg + 1) + / 16) - 1 + + Includes the special SGPRs + for VCC, Flat Scratch (for + GFX7 onwards) and XNACK + (for GFX8 onwards). It does + not include the 16 SGPR + added if a trap handler is + enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.SGPRS``. + 11:10 2 bits priority Must be 0. + + Start executing wavefront + at the specified priority. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIORITY``. + 13:12 2 bits float_mode_round_32 Wavefront starts execution + with specified rounding + mode for single (32 + bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 15:14 2 bits float_mode_round_16_64 Wavefront starts execution + with specified rounding + denorm mode for half/double (16 + and 64 bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 17:16 2 bits float_mode_denorm_32 Wavefront starts execution + with specified denorm mode + for single (32 + bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 19:18 2 bits float_mode_denorm_16_64 Wavefront starts execution + with specified denorm mode + for half/double (16 + and 64 bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 20 1 bit priv Must be 0. + + Start executing wavefront + in privilege trap handler + mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIV``. + 21 1 bit enable_dx10_clamp Wavefront starts execution + with DX10 clamp mode + enabled. Used by the vector + ALU to force DX-10 style + treatment of NaN's (when + set, clamp NaN to zero, + otherwise pass NaN + through). + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.DX10_CLAMP``. + 22 1 bit debug_mode Must be 0. + + Start executing wavefront + in single step mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.DEBUG_MODE``. + 23 1 bit enable_ieee_mode Wavefront starts execution + with IEEE mode + enabled. Floating point + opcodes that support + exception flag gathering + will quiet and propagate + signaling-NaN inputs per + IEEE 754-2008. Min_dx10 and + max_dx10 become IEEE + 754-2008 compliant due to + signaling-NaN propagation + and quieting. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.IEEE_MODE``. + 24 1 bit bulky Must be 0. + + Only one work-group allowed + to execute on a compute + unit. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.BULKY``. + 25 1 bit cdbg_user Must be 0. + + Flag that can be used to + control debugging code. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.CDBG_USER``. + 31:26 6 bits Reserved. Must be 0. + 32 **Total size 4 bytes** + ======= =================================================================================================================== + +.. + + .. table:: compute_pgm_rsrc2 for GFX6-GFX9 + :name: amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 0 1 bit enable_sgpr_private_segment Enable the setup of the + _wave_offset SGPR wave scratch offset + system register (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.SCRATCH_EN``. + 5:1 5 bits user_sgpr_count The total number of SGPR + user data registers + requested. This number must + match the number of user + data registers enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.USER_SGPR``. + 6 1 bit enable_trap_handler Set to 1 if code contains a + TRAP instruction which + requires a trap hander to + be enabled. + + CP sets + ``COMPUTE_PGM_RSRC2.TRAP_PRESENT`` + if the runtime has + installed a trap handler + regardless of the setting + of this field. + 7 1 bit enable_sgpr_workgroup_id_x Enable the setup of the + system SGPR register for + the work-group id in the X + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_X_EN``. + 8 1 bit enable_sgpr_workgroup_id_y Enable the setup of the + system SGPR register for + the work-group id in the Y + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Y_EN``. + 9 1 bit enable_sgpr_workgroup_id_z Enable the setup of the + system SGPR register for + the work-group id in the Z + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Z_EN``. + 10 1 bit enable_sgpr_workgroup_info Enable the setup of the + system SGPR register for + work-group information (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``. + 12:11 2 bits enable_vgpr_workitem_id Enable the setup of the + VGPR system registers used + for the work-item ID. + :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table` + defines the values. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``. + 13 1 bit enable_exception_address_watch Must be 0. + + Wavefront starts execution + with address watch + exceptions enabled which + are generated when L1 has + witnessed a thread access + an *address of + interest*. + + CP is responsible for + filling in the address + watch bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 14 1 bit enable_exception_memory Must be 0. + + Wavefront starts execution + with memory violation + exceptions exceptions + enabled which are generated + when a memory violation has + occurred for this wave from + L1 or LDS + (write-to-read-only-memory, + mis-aligned atomic, LDS + address out of range, + illegal address, etc.). + + CP sets the memory + violation bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 23:15 9 bits granulated_lds_size Must be 0. + + CP uses the rounded value + from the dispatch packet, + not this value, as the + dispatch may contain + dynamically allocated group + segment memory. CP writes + directly to + ``COMPUTE_PGM_RSRC2.LDS_SIZE``. + + Amount of group segment + (LDS) to allocate for each + work-group. Granularity is + device specific: + + GFX6: + roundup(lds-size / (64 * 4)) + GFX7-GFX9: + roundup(lds-size / (128 * 4)) + + 24 1 bit enable_exception_ieee_754_fp Wavefront starts execution + _invalid_operation with specified exceptions + enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.EXCP_EN`` + (set from bits 0..6). + + IEEE 754 FP Invalid + Operation + 25 1 bit enable_exception_fp_denormal FP Denormal one or more + _source input operands is a + denormal number + 26 1 bit enable_exception_ieee_754_fp IEEE 754 FP Division by + _division_by_zero Zero + 27 1 bit enable_exception_ieee_754_fp IEEE 754 FP FP Overflow + _overflow + 28 1 bit enable_exception_ieee_754_fp IEEE 754 FP Underflow + _underflow + 29 1 bit enable_exception_ieee_754_fp IEEE 754 FP Inexact + _inexact + 30 1 bit enable_exception_int_divide_by Integer Division by Zero + _zero (rcp_iflag_f32 instruction + only) + 31 1 bit Reserved. Must be 0. + 32 **Total size 4 bytes.** + ======= =================================================================================================================== + +.. + + .. table:: Floating Point Rounding Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even + AMD_FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity + AMD_FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity + AMD_FLOAT_ROUND_MODE_ZERO 3 Round Toward 0 + ===================================== ===== =============================== + +.. + + .. table:: Floating Point Denorm Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination + Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms + AMD_FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush + ===================================== ===== =============================== + +.. + + .. table:: System VGPR Work-Item ID Enumeration Values + :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table + + ===================================== ===== =============================== + Enumeration Name Value Description + ===================================== ===== =============================== + AMD_SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y + dimensions ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z + dimensions ID. + AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined. + ===================================== ===== =============================== + +.. _amdgpu-amdhsa-initial-kernel-execution-state: + +Initial Kernel Execution State +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section defines the register state that will be set up by the packet +processor prior to the start of execution of every wavefront. This is limited by +the constraints of the hardware controllers of CP/ADC/SPI. + +The order of the SGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_sgpr_*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at SGPR0: the first enabled register is +SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have +an SGPR number. + +The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to +all waves of the grid. It is possible to specify more than 16 User SGPRs using +the ``enable_sgpr_*`` bit fields, in which case only the first 16 are actually +initialized. These are then immediately followed by the System SGPRs that are +set up by ADC/SPI and can have different values for each wave of the grid +dispatch. + +SGPR register initial state is defined in +:ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + + .. table:: SGPR Register Set Up Order + :name: amdgpu-amdhsa-sgpr-register-set-up-order-table + + ========== ========================== ====== ============================== + SGPR Order Name Number Description + (kernel descriptor enable of + field) SGPRs + ========== ========================== ====== ============================== + First Private Segment Buffer 4 V# that can be used, together + (enable_sgpr_private with Scratch Wave Offset as an + _segment_buffer) offset, to access the private + memory space using a segment + address. + + CP uses the value provided by + the runtime. + then Dispatch Ptr 2 64 bit address of AQL dispatch + (enable_sgpr_dispatch_ptr) packet for kernel dispatch + actually executing. + then Queue Ptr 2 64 bit address of amd_queue_t + (enable_sgpr_queue_ptr) object for AQL queue on which + the dispatch packet was + queued. + then Kernarg Segment Ptr 2 64 bit address of Kernarg + (enable_sgpr_kernarg segment. This is directly + _segment_ptr) copied from the + kernarg_address in the kernel + dispatch packet. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + then Dispatch Id 2 64 bit Dispatch ID of the + (enable_sgpr_dispatch_id) dispatch packet being + executed. + then Flat Scratch Init 2 This is 2 SGPRs: + (enable_sgpr_flat_scratch + _init) GFX6 + Not supported. + GFX7-GFX8 + The first SGPR is a 32 bit + byte offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID`` + to per SPI base of memory + for scratch for the queue + executing the kernel + dispatch. CP obtains this + from the runtime. + + This is the same offset used + in computing the Scratch + Segment Buffer base + address. The value of + Scratch Wave Offset must be + added by the kernel machine + code and moved to SGPRn-4 + for use as the FLAT SCRATCH + BASE in flat memory + instructions. + + The second SGPR is 32 bit + byte size of a single + work-item’s scratch memory + usage. This is directly + loaded from the kernel + dispatch packet Private + Segment Byte Size and + rounded up to a multiple of + DWORD. + + The kernel code must move to + SGPRn-3 for use as the FLAT + SCRATCH SIZE in flat memory + instructions. Having CP load + it once avoids loading it at + the beginning of every + wavefront. + GFX9 + This is the 64 bit base + address of the per SPI + scratch backing memory + managed by SPI for the queue + executing the kernel + dispatch. CP obtains this + from the runtime (and + divides it if there are + multiple Shader Arrays each + with its own SPI). The value + of Scratch Wave Offset must + be added by the kernel + machine code and moved to + SGPRn-4 and SGPRn-3 for use + as the FLAT SCRATCH BASE in + flat memory instructions. + then Private Segment Size 1 The 32 bit byte size of a + (enable_sgpr_private single work-item’s scratch + _segment_size) memory allocation. This is the + value from the kernel dispatch + packet Private Segment Byte + Size rounded up by CP to a + multiple of DWORD. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + + This is not used for + GFX7-GFX8 since it is the same + value as the second SGPR of + Flat Scratch Init. However, it + may be needed for GFX9 which + changes the meaning of the + Flat Scratch Init value. + then Grid Work-Group Count X 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the X dimension + _workgroup_count_X) for the grid being + executed. Computed from the + fields in the kernel dispatch + packet as ((grid_size.x + + workgroup_size.x - 1) / + workgroup_size.x). + then Grid Work-Group Count Y 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the Y dimension + _workgroup_count_Y && for the grid being + less than 16 previous executed. Computed from the + SGPRs) fields in the kernel dispatch + packet as ((grid_size.y + + workgroup_size.y - 1) / + workgroupSize.y). + + Only initialized if <16 + previous SGPRs initialized. + then Grid Work-Group Count Z 1 32 bit count of the number of + (enable_sgpr_grid work-groups in the Z dimension + _workgroup_count_Z && for the grid being + less than 16 previous executed. Computed from the + SGPRs) fields in the kernel dispatch + packet as ((grid_size.z + + workgroup_size.z - 1) / + workgroupSize.z). + + Only initialized if <16 + previous SGPRs initialized. + then Work-Group Id X 1 32 bit work-group id in X + (enable_sgpr_workgroup_id dimension of grid for + _X) wavefront. + then Work-Group Id Y 1 32 bit work-group id in Y + (enable_sgpr_workgroup_id dimension of grid for + _Y) wavefront. + then Work-Group Id Z 1 32 bit work-group id in Z + (enable_sgpr_workgroup_id dimension of grid for + _Z) wavefront. + then Work-Group Info 1 {first_wave, 14’b0000, + (enable_sgpr_workgroup ordered_append_term[10:0], + _info) threadgroup_size_in_waves[5:0]} + then Scratch Wave Offset 1 32 bit byte offset from base + (enable_sgpr_private of scratch base of queue + _segment_wave_offset) executing the kernel + dispatch. Must be used as an + offset with Private + segment address when using + Scratch Segment Buffer. It + must be used to set up FLAT + SCRATCH for flat addressing + (see + :ref:`amdgpu-amdhsa-flat-scratch`). + ========== ========================== ====== ============================== + +The order of the VGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_vgpr*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at VGPR0: the first enabled register is +VGPR0, the next enabled register is VGPR1 etc.; disabled registers do not have a +VGPR number. + +VGPR register initial state is defined in +:ref:`amdgpu-amdhsa-vgpr-register-set-up-order-table`. + + .. table:: VGPR Register Set Up Order + :name: amdgpu-amdhsa-vgpr-register-set-up-order-table + + ========== ========================== ====== ============================== + VGPR Order Name Number Description + (kernel descriptor enable of + field) VGPRs + ========== ========================== ====== ============================== + First Work-Item Id X 1 32 bit work item id in X + (Always initialized) dimension of work-group for + wavefront lane. + then Work-Item Id Y 1 32 bit work item id in Y + (enable_vgpr_workitem_id dimension of work-group for + > 0) wavefront lane. + then Work-Item Id Z 1 32 bit work item id in Z + (enable_vgpr_workitem_id dimension of work-group for + > 1) wavefront lane. + ========== ========================== ====== ============================== + +The setting of registers is is done by GPU CP/ADC/SPI hardware as follows: + +1. SGPRs before the Work-Group Ids are set by CP using the 16 User Data + registers. +2. Work-group Id registers X, Y, Z are set by ADC which supports any + combination including none. +3. Scratch Wave Offset is set by SPI in a per wave basis which is why its value + cannot included with the flat scratch init value which is per queue. +4. The VGPRs are set by SPI which only supports specifying either (X), (X, Y) + or (X, Y, Z). + +Flat Scratch register pair are adjacent SGRRs so they can be moved as a 64 bit +value to the hardware required SGPRn-3 and SGPRn-4 respectively. + +The global segment can be accessed either using buffer instructions (GFX6 which +has V# 64 bit address support), flat instructions (GFX7-9), or global +instructions (GFX9). + +If buffer operations are used then the compiler can generate a V# with the +following properties: + +* base address of 0 +* no swizzle +* ATC: 1 if IOMMU present (such as APU) +* ptr64: 1 +* MTYPE set to support memory coherence that matches the runtime (such as CC for + APU and NC for dGPU). + +.. _amdgpu-amdhsa-kernel-prolog: + +Kernel Prolog +~~~~~~~~~~~~~ + +.. _amdgpu-amdhsa-m0: + +M0 +++ + +GFX6-GFX8 + The M0 register must be initialized with a value at least the total LDS size + if the kernel may access LDS via DS or flat operations. Total LDS size is + available in dispatch packet. For M0, it is also possible to use maximum + possible value of LDS for given target (0x7FFF for GFX6 and 0xFFFF for + GFX7-GFX8). +GFX9 + The M0 register is not used for range checking LDS accesses and so does not + need to be initialized in the prolog. + +.. _amdgpu-amdhsa-flat-scratch: + +Flat Scratch +++++++++++++ + +If the kernel may use flat operations to access scratch memory, the prolog code +must set up FLAT_SCRATCH register pair (FLAT_SCRATCH_LO/FLAT_SCRATCH_HI which +are in SGPRn-4/SGPRn-3). Initialization uses Flat Scratch Init and Scratch Wave +Offset SGPR registers (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`): + +GFX6 + Flat scratch is not supported. + +GFX7-8 + 1. The low word of Flat Scratch Init is 32 bit byte offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID`` to the base of scratch backing memory + being managed by SPI for the queue executing the kernel dispatch. This is + the same value used in the Scratch Segment Buffer V# base address. The + prolog must add the value of Scratch Wave Offset to get the wave's byte + scratch backing memory offset from ``SH_HIDDEN_PRIVATE_BASE_VIMID``. Since + FLAT_SCRATCH_LO is in units of 256 bytes, the offset must be right shifted + by 8 before moving into FLAT_SCRATCH_LO. + 2. The second word of Flat Scratch Init is 32 bit byte size of a single + work-items scratch memory usage. This is directly loaded from the kernel + dispatch packet Private Segment Byte Size and rounded up to a multiple of + DWORD. Having CP load it once avoids loading it at the beginning of every + wavefront. The prolog must move it to FLAT_SCRATCH_LO for use as FLAT SCRATCH + SIZE. +GFX9 + The Flat Scratch Init is the 64 bit address of the base of scratch backing + memory being managed by SPI for the queue executing the kernel dispatch. The + prolog must add the value of Scratch Wave Offset and moved to the FLAT_SCRATCH + pair for use as the flat scratch base in flat memory instructions. + +.. _amdgpu-amdhsa-memory-model: + +Memory Model +~~~~~~~~~~~~ + +This section describes the mapping of LLVM memory model onto AMDGPU machine code +(see :ref:`memmodel`). *The implementation is WIP.* + +.. TODO + Update when implementation complete. + + Support more relaxed OpenCL memory model to be controled by environment + component of target triple. + +The AMDGPU backend supports the memory synchronization scopes specified in +:ref:`amdgpu-memory-scopes`. + +The code sequences used to implement the memory model are defined in table +:ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. + +The sequences specify the order of instructions that a single thread must +execute. The ``s_waitcnt`` and ``buffer_wbinvl1_vol`` are defined with respect +to other memory instructions executed by the same thread. This allows them to be +moved earlier or later which can allow them to be combined with other instances +of the same instruction, or hoisted/sunk out of loops to improve +performance. Only the instructions related to the memory model are given; +additional ``s_waitcnt`` instructions are required to ensure registers are +defined before being used. These may be able to be combined with the memory +model ``s_waitcnt`` instructions as described above. + +The AMDGPU memory model supports both the HSA [HSA]_ memory model, and the +OpenCL [OpenCL]_ memory model. The HSA memory model uses a single happens-before +relation for all address spaces (see :ref:`amdgpu-address-spaces`). The OpenCL +memory model which has separate happens-before relations for the global and +local address spaces, and only a fence specifying both global and local address +space joins the relationships. Since the LLVM ``memfence`` instruction does not +allow an address space to be specified the OpenCL fence has to convervatively +assume both local and global address space was specified. However, optimizations +can often be done to eliminate the additional ``s_waitcnt``instructions when +there are no intervening corresponding ``ds/flat_load/store/atomic`` memory +instructions. The code sequences in the table indicate what can be omitted for +the OpenCL memory. The target triple environment is used to determine if the +source language is OpenCL (see :ref:`amdgpu-opencl`). + +``ds/flat_load/store/atomic`` instructions to local memory are termed LDS +operations. + +``buffer/global/flat_load/store/atomic`` instructions to global memory are +termed vector memory operations. + +For GFX6-GFX9: + +* Each agent has multiple compute units (CU). +* Each CU has multiple SIMDs that execute wavefronts. +* The wavefronts for a single work-group are executed in the same CU but may be + executed by different SIMDs. +* Each CU has a single LDS memory shared by the wavefronts of the work-groups + executing on it. +* All LDS operations of a CU are performed as wavefront wide operations in a + global order and involve no caching. Completion is reported to a wavefront in + execution order. +* The LDS memory has multiple request queues shared by the SIMDs of a + CU. Therefore, the LDS operations performed by different waves of a work-group + can be reordered relative to each other, which can result in reordering the + visibility of vector memory operations with respect to LDS operations of other + wavefronts in the same work-group. A ``s_waitcnt lgkmcnt(0)`` is required to + ensure synchonization between LDS operations and vector memory operations + between waves of a work-group, but not between operations performed by the + same wavefront. +* The vector memory operations are performed as wavefront wide operations and + completion is reported to a wavefront in execution order. The exception is + that for GFX7-9 ``flat_load/store/atomic`` instructions can report out of + vector memory order if they access LDS memory, and out of LDS operation order + if they access global memory. +* The vector memory operations access a vector L1 cache shared by all wavefronts + on a CU. Therefore, no special action is required for coherence between + wavefronts in the same work-group. A ``buffer_wbinvl1_vol`` is required for + coherence between waves executing in different work-groups as they may be + executing on different CUs. +* The scalar memory operations access a scalar L1 cache shared by all wavefronts + on a group of CUs. The scalar and vector L1 caches are not coherent. However, + scalar operations are used in a restricted way so do not impact the memory + model. See :ref:`amdgpu-amdhsa-memory-spaces`. +* The vector and scalar memory operations use an L2 cache shared by all CUs on + the same agent. +* The L2 cache has independent channels to service disjoint ranges of virtual + addresses. +* Each CU has a separate request queue per channel. Therefore, the vector and + scalar memory operations performed by waves executing in different work-groups + (which may be executing on different CUs) of an agent can be reordered + relative to each other. A ``s_waitcnt vmcnt(0)`` is required to ensure + synchonization between vector memory operations of different CUs. It ensures a + previous vector memory operation has completed before executing a subsequent + vector memory or LDS operation and so can be used to meet the requirements of + acquire and release. +* The L2 cache can be kept coherent with other agents on some targets, or ranges + of virtual addresses can be set up to bypass it to ensure system coherence. + +Private address space uses ``buffer_load/store`` using the scratch V# (GFX6-8), +or ``scratch_load/store`` (GFX9). Since only a single thread is accessing the +memory, atomic memory orderings are not meaningful and all accesses are treated +as non-atomic. + +Constant address space uses ``buffer/global_load`` instructions (or equivalent +scalar memory instructions). Since the constant address space contents do not +change during the execution of a kernel dispatch it is not legal to perform +stores, and atomic memory orderings are not meaningful and all access are +treated as non-atomic. + +A memory synchronization scope wider than work-group is not meaningful for the +group (LDS) address space and is treated as work-group. + +The memory model does not support the region address space which is treated as +non-atomic. + +Acquire memory ordering is not meaningful on store atomic instructions and is +treated as non-atomic. + +Release memory ordering is not meaningful on load atomic instructions and is +treated a non-atomic. + +Acquire-release memory ordering is not meaningful on load or store atomic +instructions and is treated as acquire and release respectively. + +AMDGPU backend only uses scalar memory operations to access memory that is +proven to not change during the execution of the kernel dispatch. This includes +constant address space and global address space for program scope const +variables. Therefore the kernel machine code does not have to maintain the +scalar L1 cache to ensure it is coherent with the vector L1 cache. The scalar +and vector L1 caches are invalidated between kernel dispatches by CP since +constant address space data may change between kernel dispatch executions. See +:ref:`amdgpu-amdhsa-memory-spaces`. + +The one exeception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a ``s_dcache_wb`` is inserted before the ``s_endpgm`` and before a function +return since the locations may be used for vector memory instructions by a +future wave that uses the same scratch area, or a function call that creates a +frame at the same address, respectively. There is no need for a ``s_dcache_inv`` +as all scalar writes are write-before-read in the same thread. + +Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC_NV (non-coherenent non-volatile). Since the private address space +is only accessed by a single thread, and is always write-before-read, +there is never a need to invalidate these entries from the L1 cache. Hence all +cache invalidates are done as ``*_vol`` to only invalidate the volatile cache +lines. + +On dGPU the kernarg backing memory is accessed as UC (uncached) to avoid needing +to invalidate the L2 cache. This also causes it to be treated as non-volatile +and so is not invalidated by ``*_vol``. On APU it is accessed as CC (cache +coherent) and so the L2 cache will coherent with the CPU and other agents. + + .. table:: AMDHSA Memory Model Code Sequences GFX6-GFX9 + :name: amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table + + ============ ============ ============== ========== ======================= + LLVM Instr LLVM Memory LLVM Memory AMDGPU AMDGPU Machine Code + Ordering Sync Scope Address + Space + ============ ============ ============== ========== ======================= + **Non-Atomic** + --------------------------------------------------------------------------- + load *none* *none* - global non-volatile + - generic 1. buffer/global/flat_load + volatile + 1. buffer/global/flat_load + glc=1 + load *none* *none* - local 1. ds_load + store *none* *none* - global 1. buffer/global/flat_store + - generic + store *none* *none* - local 1. ds_store + **Unordered Atomic** + --------------------------------------------------------------------------- + load atomic unordered *any* *any* *Same as non-atomic*. + store atomic unordered *any* *any* *Same as non-atomic*. + atomicrmw unordered *any* *any* *Same as monotonic + atomic*. + **Monotonic Atomic** + --------------------------------------------------------------------------- + load atomic monotonic - singlethread - global 1. buffer/global/flat_load + - wavefront - generic + - workgroup + load atomic monotonic - singlethread - local 1. ds_load + - wavefront + - workgroup + load atomic monotonic - agent - global 1. buffer/global/flat_load + - system - generic glc=1 + store atomic monotonic - singlethread - global 1. buffer/global/flat_store + - wavefront - generic + - workgroup + - agent + - system + store atomic monotonic - singlethread - local 1. ds_store + - wavefront + - workgroup + atomicrmw monotonic - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + - workgroup + - agent + - system + atomicrmw monotonic - singlethread - local 1. ds_atomic + - wavefront + - workgroup + **Acquire Atomic** + --------------------------------------------------------------------------- + load atomic acquire - singlethread - global 1. buffer/global/ds/flat_load + - wavefront - local + - generic + load atomic acquire - workgroup - global 1. buffer/global_load + load atomic acquire - workgroup - local 1. ds/flat_load + - generic 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + + load atomic acquire - agent - global 1. buffer/global_load + - system glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale global data. + + load atomic acquire - agent - generic 1. flat_load glc=1 + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the flat_load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acquire - workgroup - global 1. buffer/global_atomic + atomicrmw acquire - workgroup - local 1. ds/flat_atomic + - generic 2. waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + atomicrmw value + being acquired. + + atomicrmw acquire - agent - global 1. buffer/global_atomic + - system 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - agent - generic 1. flat_atomic + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acquire - singlethread *none* *none* + - wavefront + fence acquire - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + value read by the + fence-paired-atomic. + + fence acquire - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + group/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + **Release Atomic** + --------------------------------------------------------------------------- + store atomic release - singlethread - global 1. buffer/global/ds/flat_store + - wavefront - local + - generic + store atomic release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to local have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + store atomic release - workgroup - local 1. ds_store + store atomic release - agent - global 1. s_waitcnt vmcnt(0) & + - system - generic lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to global have + completed before + performing the + store that is being + released. + + 2. buffer/global/ds/flat_store + atomicrmw release - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global/flat_atomic + atomicrmw release - workgroup - local 1. ds_atomic + atomicrmw release - agent - global 1. s_waitcnt vmcnt(0) & + - system - generic lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and local + have completed + before performing + the atomicrmw that + is being released. + + 2. buffer/global/ds/flat_atomic* + fence release - singlethread *none* *none* + - wavefront + fence release - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + to local have + completed before + performing the + following + fence-paired-atomic. + + fence release - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + to global have + completed before + performing the + following + fence-paired-atomic. + + **Acquire-Release Atomic** + --------------------------------------------------------------------------- + atomicrmw acq_rel - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + atomicrmw acq_rel - workgroup - local 1. ds_atomic + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + + atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit + waitcnt. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + atomicrmw acq_rel - agent - global 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acq_rel - agent - generic 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acq_rel - singlethread *none* *none* + - wavefront + fence acq_rel - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + waitcnt. However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing any + following global + memory operations. + - Ensures that the + preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic) + has completed + before following + global memory + operations. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + local/generic store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + This satisfies the + requirements of + release. + + fence acq_rel - agent *none* 1. s_waitcnt vmcnt(0) & + - system lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + preceding + global/local/generic + load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic) + has completed + before invalidating + the cache. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + global/local/generic + store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + This satisfies the + requirements of + release. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. This + satisfies the + requirements of + acquire. + + **Sequential Consistent Atomic** + --------------------------------------------------------------------------- + load atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local load atomic acquire*. + - workgroup - generic + load atomic seq_cst - agent - global 1. s_waitcnt vmcnt(0) + - system - local + - generic - Must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + waitcnt vmcnt(0) of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + competing out of + order.) + + 2. *Following + instructions same as + corresponding load + atomic acquire*. + + store atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local store atomic release*. + - workgroup - generic + store atomic seq_cst - agent - global *Same as corresponding + - system - generic store atomic release*. + atomicrmw seq_cst - singlethread - global *Same as corresponding + - wavefront - local atomicrmw acq_rel*. + - workgroup - generic + atomicrmw seq_cst - agent - global *Same as corresponding + - system - generic atomicrmw acq_rel*. + fence seq_cst - singlethread *none* *Same as corresponding + - wavefront fence acq_rel*. + - workgroup + - agent + - system + ============ ============ ============== ========== ======================= + +The memory order also adds the single thread optimization constrains defined in +table +:ref:`amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table`. + + .. table:: AMDHSA Memory Model Single Thread Optimization Constraints GFX6-GFX9 + :name: amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table + + ============ ============================================================== + LLVM Memory Optimization Constraints + Ordering + ============ ============================================================== + unordered *none* + monotonic *none* + acquire - If a load atomic/atomicrmw then no following load/load + atomic/store/ store atomic/atomicrmw/fence instruction can + be moved before the acquire. + - If a fence then same as load atomic, plus no preceding + associated fence-paired-atomic can be moved after the fence. + release - If a store atomic/atomicrmw then no preceeding load/load + atomic/store/ store atomic/atomicrmw/fence instruction can + be moved after the release. + - If a fence then same as store atomic, plus no following + associated fence-paired-atomic can be moved before the + fence. + acq_rel Same constraints as both acquire and release. + seq_cst - If a load atomic then same constraints as acquire, plus no + preceding sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved after the + seq_cst. + - If a store atomic then the same constraints as release, plus + no following sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved before the + seq_cst. + - If an atomicrmw/fence then same constraints as acq_rel. + ============ ============================================================== + +Trap Handler ABI +~~~~~~~~~~~~~~~~ + +For code objects generated by AMDGPU backend for HSA [HSA]_ compatible runtimes +(such as ROCm [AMD-ROCm]_), the runtime installs a trap handler that supports +the ``s_trap`` instruction with the following usage: + + .. table:: AMDGPU Trap Handler for AMDHSA OS + :name: amdgpu-trap-handler-for-amdhsa-os-table + + =================== =============== =============== ======================= + Usage Code Sequence Trap Handler Description + Inputs + =================== =============== =============== ======================= + reserved ``s_trap 0x00`` Reserved by hardware. + ``debugtrap(arg)`` ``s_trap 0x01`` ``SGPR0-1``: Reserved for HSA + ``queue_ptr`` ``debugtrap`` + ``VGPR0``: intrinsic (not + ``arg`` implemented). + ``llvm.trap`` ``s_trap 0x02`` ``SGPR0-1``: Causes dispatch to be + ``queue_ptr`` terminated and its + associated queue put + into the error state. + ``llvm.debugtrap`` ``s_trap 0x03`` ``SGPR0-1``: If debugger not + ``queue_ptr`` installed handled + same as ``llvm.trap``. + debugger breakpoint ``s_trap 0x07`` Reserved for debugger + breakpoints. + debugger ``s_trap 0x08`` Reserved for debugger. + debugger ``s_trap 0xfe`` Reserved for debugger. + debugger ``s_trap 0xff`` Reserved for debugger. + =================== =============== =============== ======================= + +Non-AMDHSA +---------- Trap Handler ABI ----------------- -The OS element of the target triple controls the trap handler behavior. - -HSA OS -^^^^^^ -For code objects generated by AMDGPU back-end for the HSA OS, the runtime -installs a trap handler that supports the s_trap instruction with the following -usage: - - +--------------+-------------+-------------------+----------------------------+ - |Usage |Code Sequence|Trap Handler Inputs|Description | - +==============+=============+===================+============================+ - |reserved |s_trap 0x00 | |Reserved by hardware. | - +--------------+-------------+-------------------+----------------------------+ - |HSA debugtrap |s_trap 0x01 |SGPR0-1: queue_ptr |Reserved for HSA debugtrap | - |(arg) | |VGPR0: arg |intrinsic (not implemented).| - +--------------+-------------+-------------------+----------------------------+ - |llvm.trap |s_trap 0x02 |SGPR0-1: queue_ptr |Causes dispatch to be | - | | | |terminated and its | - | | | |associated queue put into | - | | | |the error state. | - +--------------+-------------+-------------------+----------------------------+ - |llvm.debugtrap| s_trap 0x03 |SGPR0-1: queue_ptr |If debugger not installed | - | | | |handled same as llvm.trap. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0x07 | |Reserved for debugger | - |breakpoint | | |breakpoints. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0x08 | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0xfe | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - |debugger |s_trap 0xff | |Reserved for debugger. | - +--------------+-------------+-------------------+----------------------------+ - -Non-HSA OS -^^^^^^^^^^ -For code objects generated by AMDGPU back-end for non-HSA OS, the runtime does -not install a trap handler. The llvm.trap and llvm.debugtrap instructions are -handler as follows: - - =============== ============= =============================================== - Usage Code Sequence Description - =============== ============= =============================================== - llvm.trap s_endpgm Causes wavefront to be terminated. - llvm.debugtrap s_nop No operation. Compiler warning generated that - there is no trap handler installed. - =============== ============= =============================================== +~~~~~~~~~~~~~~~~ + +For code objects generated by AMDGPU backend for non-amdhsa OS, the runtime does +not install a trap handler. The ``llvm.trap`` and ``llvm.debugtrap`` +instructions are handled as follows: + + .. table:: AMDGPU Trap Handler for Non-AMDHSA OS + :name: amdgpu-trap-handler-for-non-amdhsa-os-table + + =============== =============== =========================================== + Usage Code Sequence Description + =============== =============== =========================================== + llvm.trap s_endpgm Causes wavefront to be terminated. + llvm.debugtrap *none* Compiler warning given that there is no + trap handler installed. + =============== =============== =========================================== + +Source Languages +================ + +.. _amdgpu-opencl: + +OpenCL +------ + +When generating code for the OpenCL language the target triple environment +should be ``opencl`` or ``amdgizcl`` (see :ref:`amdgpu-target-triples`). + +When the language is OpenCL the following differences occur: + +1. The OpenCL memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). +2. The AMDGPU backend adds additional arguments to the kernel. +3. Additional metadata is generated (:ref:`amdgpu-code-object-metadata`). + +.. TODO + Specify what affect this has. Hidden arguments added. Additional metadata + generated. + +.. _amdgpu-hcc: + +HCC +--- + +When generating code for the OpenCL language the target triple environment +should be ``hcc`` (see :ref:`amdgpu-target-triples`). + +When the language is OpenCL the following differences occur: + +1. The HSA memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). + +.. TODO + Specify what affect this has. Assembler -========= +--------- AMDGPU backend has LLVM-MC based assembler which is currently in development. -It supports Southern Islands ISA, Sea Islands and Volcanic Islands. +It supports AMDGCN GFX6-GFX8. -This document describes general syntax for instructions and operands. For more -information about instructions, their semantics and supported combinations -of operands, refer to one of Instruction Set Architecture manuals. +This section describes general syntax for instructions and operands. For more +information about instructions, their semantics and supported combinations of +operands, refer to one of instruction set architecture manuals +[AMD-Souther-Islands]_ [AMD-Sea-Islands]_ [AMD-Volcanic-Islands]_. -An instruction has the following syntax (register operands are -normally comma-separated while extra operands are space-separated): +An instruction has the following syntax (register operands are normally +comma-separated while extra operands are space-separated): * , ... ...* - Operands --------- +~~~~~~~~ The following syntax for register operands is supported: @@ -141,8 +3472,11 @@ The following extra operands are supported: - dst_unused (UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE) - abs, neg, sext -DS Instructions Examples ------------------------- +Instruction Examples +~~~~~~~~~~~~~~~~~~~~ + +DS +~~ .. code-block:: nasm @@ -154,8 +3488,8 @@ DS Instructions Examples For full list of supported instructions, refer to "LDS/GDS instructions" in ISA Manual. -FLAT Instruction Examples --------------------------- +FLAT +++++ .. code-block:: nasm @@ -167,8 +3501,8 @@ FLAT Instruction Examples For full list of supported instructions, refer to "FLAT instructions" in ISA Manual. -MUBUF Instruction Examples ---------------------------- +MUBUF ++++++ .. code-block:: nasm @@ -180,8 +3514,8 @@ MUBUF Instruction Examples For full list of supported instructions, refer to "MUBUF Instructions" in ISA Manual. -SMRD/SMEM Instruction Examples -------------------------------- +SMRD/SMEM ++++++++++ .. code-block:: nasm @@ -193,8 +3527,8 @@ SMRD/SMEM Instruction Examples For full list of supported instructions, refer to "Scalar Memory Operations" in ISA Manual. -SOP1 Instruction Examples --------------------------- +SOP1 +++++ .. code-block:: nasm @@ -208,8 +3542,8 @@ SOP1 Instruction Examples For full list of supported instructions, refer to "SOP1 Instructions" in ISA Manual. -SOP2 Instruction Examples -------------------------- +SOP2 +++++ .. code-block:: nasm @@ -225,8 +3559,8 @@ SOP2 Instruction Examples For full list of supported instructions, refer to "SOP2 Instructions" in ISA Manual. -SOPC Instruction Examples --------------------------- +SOPC +++++ .. code-block:: nasm @@ -237,8 +3571,8 @@ SOPC Instruction Examples For full list of supported instructions, refer to "SOPC Instructions" in ISA Manual. -SOPP Instruction Examples --------------------------- +SOPP +++++ .. code-block:: nasm @@ -260,8 +3594,8 @@ Unless otherwise mentioned, little verification is performed on the operands of SOPP Instructions, so it is up to the programmer to be familiar with the range or acceptable values. -Vector ALU Instruction Examples -------------------------------- +VALU +++++ For vector ALU instruction opcodes (VOP1, VOP2, VOP3, VOPC, VOP_DPP, VOP_SDWA), the assembler will automatically use optimal encoding based on its operands. @@ -315,19 +3649,20 @@ VOP_SDWA examples: For full list of supported instructions, refer to "Vector ALU instructions". HSA Code Object Directives --------------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~ AMDGPU ABI defines auxiliary data in output code object. In assembly source, one can specify them with assembler directives. .hsa_code_object_version major, minor -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++++++++++++++ *major* and *minor* are integers that specify the version of the HSA code object that will be generated by the assembler. .hsa_code_object_isa [major, minor, stepping, vendor, arch] -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + *major*, *minor*, and *stepping* are all integers that describe the instruction set architecture (ISA) version of the assembly program. @@ -339,13 +3674,13 @@ By default, the assembler will derive the ISA version, *vendor*, and *arch* from the value of the -mcpu option that is passed to the assembler. .amdgpu_hsa_kernel (name) -^^^^^^^^^^^^^^^^^^^^^^^^^ ++++++++++++++++++++++++++ This directives specifies that the symbol with given name is a kernel entry point (label) and the object should contain corresponding symbol of type STT_AMDGPU_HSA_KERNEL. .amd_kernel_code_t -^^^^^^^^^^^^^^^^^^ +++++++++++++++++++ This directive marks the beginning of a list of key / value pairs that are used to specify the amd_kernel_code_t object that will be emitted by the assembler. @@ -404,3 +3739,25 @@ Here is an example of a minimal amd_kernel_code_t specification: s_endpgm .Lfunc_end0: .size hello_world, .Lfunc_end0-hello_world + +Additional Documentation +======================== + +.. [AMD-R6xx] `AMD R6xx shader ISA `__ +.. [AMD-R7xx] `AMD R7xx shader ISA `__ +.. [AMD-Evergreen] `AMD Evergreen shader ISA `__ +.. [AMD-Cayman-Trinity] `AMD Cayman/Trinity shader ISA `__ +.. [AMD-Souther-Islands] `AMD Southern Islands Series ISA `__ +.. [AMD-Sea-Islands] `AMD Sea Islands Series ISA `_ +.. [AMD-Volcanic-Islands] `AMD GCN3 Instruction Set Architecture `__ +.. [AMD-OpenCL_Programming-Guide] `AMD Accelerated Parallel Processing OpenCL Programming Guide `_ +.. [AMD-APP-SDK] `AMD Accelerated Parallel Processing APP SDK Documentation `__ +.. [AMD-ROCm] `ROCm: Open Platform for Development, Discovery and Education Around GPU Computing `__ +.. [AMD-ROCm-github] `ROCm github `__ +.. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ +.. [ELF] `Executable and Linkable Format (ELF) `__ +.. [DWARF] `DWARF Debugging Information Format `__ +.. [YAML] `YAML Ain’t Markup Language (YAML™) Version 1.2 `__ +.. [OpenCL] `The OpenCL Specification Version 2.0 `__ +.. [HRF] `Heterogeneous-race-free Memory Models `__ +.. [AMD-AMDGPU-Compute-Application-Binary-Interface] `AMDGPU Compute Application Binary Interface `__ diff --git a/docs/Benchmarking.rst b/docs/Benchmarking.rst new file mode 100644 index 0000000000000000000000000000000000000000..0f88db745a6862d8dad7d3e72f2fe9c991130b3e --- /dev/null +++ b/docs/Benchmarking.rst @@ -0,0 +1,87 @@ +================================== +Benchmarking tips +================================== + + +Introduction +============ + +For benchmarking a patch we want to reduce all possible sources of +noise as much as possible. How to do that is very OS dependent. + +Note that low noise is required, but not sufficient. It does not +exclude measurement bias. See +https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf for +example. + +General +================================ + +* Use a high resolution timer, e.g. perf under linux. + +* Run the benchmark multiple times to be able to recognize noise. + +* Disable as many processes or services as possible on the target system. + +* Disable frequency scaling, turbo boost and address space + randomization (see OS specific section). + +* Static link if the OS supports it. That avoids any variation that + might be introduced by loading dynamic libraries. This can be done + by passing ``-DLLVM_BUILD_STATIC=ON`` to cmake. + +* Try to avoid storage. On some systems you can use tmpfs. Putting the + program, inputs and outputs on tmpfs avoids touching a real storage + system, which can have a pretty big variability. + + To mount it (on linux and freebsd at least):: + + mount -t tmpfs -o size=g none dir_to_mount + +Linux +===== + +* Disable address space randomization:: + + echo 0 > /proc/sys/kernel/randomize_va_space + +* Set scaling_governor to performance:: + + for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + do + echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + done + +* Use https://github.com/lpechacek/cpuset to reserve cpus for just the + program you are benchmarking. If using perf, leave at least 2 cores + so that perf runs in one and your program in another:: + + cset shield -c N1,N2 -k on + + This will move all threads out of N1 and N2. The ``-k on`` means + that even kernel threads are moved out. + +* Disable the SMT pair of the cpus you will use for the benchmark. The + pair of cpu N can be found in + ``/sys/devices/system/cpu/cpuN/topology/thread_siblings_list`` and + disabled with:: + + echo 0 > /sys/devices/system/cpu/cpuX/online + + +* Run the program with:: + + cset shield --exec -- perf stat -r 10 + + This will run the command after ``--`` in the isolated cpus. The + particular perf command runs the ```` 10 times and reports + statistics. + +With these in place you can expect perf variations of less than 0.1%. + +Linux Intel +----------- + +* Disable turbo mode:: + + echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index a9a123595f7f5aa983201c33b1d5c1cb2525f95b..6ee3842c8d908f13eb9993fc46739afc75414cc2 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -550,6 +550,8 @@ LLVM IR is defined with the following blocks: * 17 --- `TYPE_BLOCK`_ --- This describes all of the types in the module. +* 23 --- `STRTAB_BLOCK`_ --- The bitcode file's string table. + .. _MODULE_BLOCK: MODULE_BLOCK Contents @@ -577,7 +579,7 @@ MODULE_CODE_VERSION Record ``[VERSION, version#]`` The ``VERSION`` record (code 1) contains a single value indicating the format -version. Versions 0 and 1 are supported at this time. The difference between +version. Versions 0, 1 and 2 are supported at this time. The difference between version 0 and 1 is in the encoding of instruction operands in each `FUNCTION_BLOCK`_. @@ -620,6 +622,12 @@ as unsigned VBRs. However, forward references are rare, except in the case of phi instructions. For phi instructions, operands are encoded as `Signed VBRs`_ to deal with forward references. +In version 2, the meaning of module records ``FUNCTION``, ``GLOBALVAR``, +``ALIAS``, ``IFUNC`` and ``COMDAT`` change such that the first two operands +specify an offset and size of a string in a string table (see `STRTAB_BLOCK +Contents`_), the function name is removed from the ``FNENTRY`` record in the +value symbol table, and the top-level ``VALUE_SYMTAB_BLOCK`` may only contain +``FNENTRY`` records. MODULE_CODE_TRIPLE Record ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -673,11 +681,14 @@ for each library name referenced. MODULE_CODE_GLOBALVAR Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]`` +``[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]`` The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a global variable. The operand fields are: +* *strtab offset*, *strtab size*: Specifies the name of the global variable. + See `STRTAB_BLOCK Contents`_. + * *pointer type*: The type index of the pointer type used to point to this global variable @@ -755,11 +766,14 @@ global variable. The operand fields are: MODULE_CODE_FUNCTION Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]`` +``[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]`` The ``FUNCTION`` record (code 8) marks the declaration or definition of a function. The operand fields are: +* *strtab offset*, *strtab size*: Specifies the name of the function. + See `STRTAB_BLOCK Contents`_. + * *type*: The type index of the function type describing this function * *callingconv*: The calling convention number: @@ -817,11 +831,14 @@ function. The operand fields are: MODULE_CODE_ALIAS Record ^^^^^^^^^^^^^^^^^^^^^^^^ -``[ALIAS, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]`` +``[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]`` The ``ALIAS`` record (code 9) marks the definition of an alias. The operand fields are +* *strtab offset*, *strtab size*: Specifies the name of the alias. + See `STRTAB_BLOCK Contents`_. + * *alias type*: The type index of the alias * *aliasee val#*: The value index of the aliased value @@ -1300,3 +1317,20 @@ METADATA_ATTACHMENT Contents ---------------------------- The ``METADATA_ATTACHMENT`` block (id 16) ... + +.. _STRTAB_BLOCK: + +STRTAB_BLOCK Contents +--------------------- + +The ``STRTAB`` block (id 23) contains a single record (``STRTAB_BLOB``, id 1) +with a single blob operand containing the bitcode file's string table. + +Strings in the string table are not null terminated. A record's *strtab +offset* and *strtab size* operands specify the byte offset and size of a +string within the string table. + +The string table is used by all preceding blocks in the bitcode file that are +not succeeded by another intervening ``STRTAB`` block. Normally a bitcode +file will have a single string table, but it may have more than one if it +was created by binary concatenation of multiple bitcode files. diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst index b941d0d1505064139d9875bc0fcc263e60c7cf22..9bd8bd4ae744afdfe446e90682cd3c505d01a72f 100644 --- a/docs/BranchWeightMetadata.rst +++ b/docs/BranchWeightMetadata.rst @@ -64,6 +64,20 @@ Branch weights are assigned to every destination. [ , i32 ... ] } +``CallInst`` +^^^^^^^^^^^^^^^^^^ + +Calls may have branch weight metadata, containing the execution count of +the call. It is currently used in SamplePGO mode only, to augment the +block and entry counts which may not be accurate with sampling. + +.. code-block:: none + + !0 = metadata !{ + metadata !"branch_weights", + i32 + } + Other ^^^^^ diff --git a/docs/CMake.rst b/docs/CMake.rst index 0a32d3957a53cd59f2efaff7266e197b6a759150..2deae9361874abd111babc29a02382c2c614db09 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -186,8 +186,8 @@ CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``. Sets the build type for ``make``-based generators. Possible values are Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as Visual Studio, you should use the IDE settings to set the build type. - Be aware that Release and RelWithDebInfo are not using the same optimization - level on most platform. + Be aware that Release and RelWithDebInfo use different optimization levels on + most platforms. **CMAKE_INSTALL_PREFIX**:PATH Path where LLVM will be installed if "make install" is invoked or the diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index ad2178dc5875fb936e21a07c1dac5aa83419f1d0..4437610146c45a04316f2f1a1fab6fca5d1d984a 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -1,8 +1,8 @@ if (DOXYGEN_FOUND) if (LLVM_ENABLE_DOXYGEN) - set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR}) - set(abs_top_builddir ${LLVM_BINARY_DIR}) + set(abs_top_srcdir ${CMAKE_CURRENT_SOURCE_DIR}) + set(abs_top_builddir ${CMAKE_CURRENT_BINARY_DIR}) if (HAVE_DOT) set(DOT ${LLVM_PATH_DOT}) @@ -103,8 +103,8 @@ endif() endif() if (LLVM_ENABLE_SPHINX) + include(AddSphinxTarget) if (SPHINX_FOUND) - include(AddSphinxTarget) if (${SPHINX_OUTPUT_HTML}) add_sphinx_target(html llvm) endif() diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 106fc8456f616f05f7f51170fe3b59cc3ca0861a..bcdc7228356653479d18e2b7bcb11e39b1a558df 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -2642,59 +2642,6 @@ to ensure valid register usage and operand types. The AMDGPU backend ------------------ -The AMDGPU code generator lives in the lib/Target/AMDGPU directory, and is an -open source native AMD GCN ISA code generator. - -Target triples supported -^^^^^^^^^^^^^^^^^^^^^^^^ - -The following are the known target triples that are supported by the AMDGPU -backend. - -* **amdgcn--** --- AMD GCN GPUs (AMDGPU.7.0.0+) -* **amdgcn--amdhsa** --- AMD GCN GPUs (AMDGPU.7.0.0+) with HSA support -* **r600--** --- AMD GPUs HD2XXX-HD6XXX - -Relocations -^^^^^^^^^^^ - -Supported relocatable fields are: - -* **word32** --- This specifies a 32-bit field occupying 4 bytes with arbitrary - byte alignment. These values use the same byte order as other word values in - the AMD GPU architecture -* **word64** --- This specifies a 64-bit field occupying 8 bytes with arbitrary - byte alignment. These values use the same byte order as other word values in - the AMD GPU architecture - -Following notations are used for specifying relocation calculations: - -* **A** --- Represents the addend used to compute the value of the relocatable - field -* **G** --- Represents the offset into the global offset table at which the - relocation entry’s symbol will reside during execution. -* **GOT** --- Represents the address of the global offset table. -* **P** --- Represents the place (section offset or address) of the storage unit - being relocated (computed using ``r_offset``) -* **S** --- Represents the value of the symbol whose index resides in the - relocation entry - -AMDGPU Backend generates *Elf64_Rela* relocation records with the following -supported relocation types: - - ========================== ===== ========== ============================== - Relocation type Value Field Calculation - ========================== ===== ========== ============================== - ``R_AMDGPU_NONE`` 0 ``none`` ``none`` - ``R_AMDGPU_ABS32_LO`` 1 ``word32`` (S + A) & 0xFFFFFFFF - ``R_AMDGPU_ABS32_HI`` 2 ``word32`` (S + A) >> 32 - ``R_AMDGPU_ABS64`` 3 ``word64`` S + A - ``R_AMDGPU_REL32`` 4 ``word32`` S + A - P - ``R_AMDGPU_REL64`` 5 ``word64`` S + A - P - ``R_AMDGPU_ABS32`` 6 ``word32`` S + A - ``R_AMDGPU_GOTPCREL`` 7 ``word32`` G + GOT + A - P - ``R_AMDGPU_GOTPCREL32_LO`` 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF - ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 - ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF - ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 - ========================== ===== ========== ============================== +The AMDGPU code generator lives in the ``lib/Target/AMDGPU`` +directory. This code generator is capable of targeting a variety of +AMD GPU processors. Refer to :doc:`AMDGPUUsage` for more information. diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst index 8ce999033b7f65e000458a571650881f040e075d..24375fb70d4e82e58c0498822711b0f28d33d456 100644 --- a/docs/CompilerWriterInfo.rst +++ b/docs/CompilerWriterInfo.rst @@ -72,16 +72,7 @@ Other documents, collections, notes AMDGPU ------ -* `AMD R6xx shader ISA `_ -* `AMD R7xx shader ISA `_ -* `AMD Evergreen shader ISA `_ -* `AMD Cayman/Trinity shader ISA `_ -* `AMD Southern Islands Series ISA `_ -* `AMD Sea Islands Series ISA `_ -* `AMD GCN3 Instruction Set Architecture `__ -* `AMD GPU Programming Guide `_ -* `AMD Compute Resources `_ -* `AMDGPU Compute Application Binary Interface `__ +Refer to :doc:`AMDGPUUsage` for additional documentation. RISC-V ------ diff --git a/docs/GetElementPtr.rst b/docs/GetElementPtr.rst index f39f1d9207a2a48fc609d0c79f858cd80611ad34..d13479dabca81c9aa8267e1ca121b133f198ee25 100644 --- a/docs/GetElementPtr.rst +++ b/docs/GetElementPtr.rst @@ -9,10 +9,11 @@ Introduction ============ This document seeks to dispel the mystery and confusion surrounding LLVM's -`GetElementPtr `_ (GEP) instruction. Questions -about the wily GEP instruction are probably the most frequently occurring -questions once a developer gets down to coding with LLVM. Here we lay out the -sources of confusion and show that the GEP instruction is really quite simple. +`GetElementPtr `_ (GEP) instruction. +Questions about the wily GEP instruction are probably the most frequently +occurring questions once a developer gets down to coding with LLVM. Here we lay +out the sources of confusion and show that the GEP instruction is really quite +simple. Address Computation =================== @@ -429,7 +430,8 @@ because LLVM has no restrictions on mixing types in addressing, loads or stores. LLVM's type-based alias analysis pass uses metadata to describe a different type system (such as the C type system), and performs type-based aliasing on top of -that. Further details are in the `language reference `_. +that. Further details are in the +`language reference `_. What happens if a GEP computation overflows? -------------------------------------------- diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index a88860310f642937a553d512bfa3f28e6b5af044..0cb415ad764e5d758563a00914f56594c188f079 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -171,6 +171,8 @@ Linux PowerPC GCC, Clang Solaris V9 (Ultrasparc) GCC FreeBSD x86\ :sup:`1` GCC, Clang FreeBSD amd64 GCC, Clang +NetBSD x86\ :sup:`1` GCC, Clang +NetBSD amd64 GCC, Clang MacOS X\ :sup:`2` PowerPC GCC MacOS X x86 GCC, Clang Cygwin/Win32 x86\ :sup:`1, 3` GCC @@ -697,14 +699,14 @@ For developers to work with a git monorepo .. note:: - This set-up is using unofficial mirror hosted on GitHub, use with caution. + This set-up is using an unofficial mirror hosted on GitHub, use with caution. To set up a clone of all the llvm projects using a unified repository: .. code-block:: console % export TOP_LEVEL_DIR=`pwd` - % git clone https://github.com/llvm-project/llvm-project/ + % git clone https://github.com/llvm-project/llvm-project-20170507/ llvm-project % cd llvm-project % git config branch.master.rebase true diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst index 1e46767679393a03382b28b57b435ef9702a5e17..50f7aa123c5587a468cb9b5cdb4c1799bfb729d6 100644 --- a/docs/GettingStartedVS.rst +++ b/docs/GettingStartedVS.rst @@ -100,6 +100,10 @@ Here's the short story for getting up and running quickly with LLVM: * CMake generates project files for all build types. To select a specific build type, use the Configuration manager from the VS IDE or the ``/property:Configuration`` command line option when using MSBuild. + * By default, the Visual Studio project files generated by CMake use the + 32-bit toolset. If you are developing on a 64-bit version of Windows and + want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when + generating the Visual Studio solution. This requires CMake 3.8.0 or later. 6. Start Visual Studio diff --git a/docs/HowToAddABuilder.rst b/docs/HowToAddABuilder.rst index fcc2293de052e3b9ad455d33fd29ed00561533d2..08cbecdc2a5795d77754a0dc96c908565c5067ba 100644 --- a/docs/HowToAddABuilder.rst +++ b/docs/HowToAddABuilder.rst @@ -83,6 +83,8 @@ Here are the steps you can follow to do so: * slaves are added to ``buildbot/osuosl/master/config/slaves.py`` * builders are added to ``buildbot/osuosl/master/config/builders.py`` + Please make sure your builder name and its builddir are unique through the file. + It is possible to whitelist email addresses to unconditionally receive notifications on build failure; for this you'll need to add an ``InformativeMailNotifier`` to ``buildbot/osuosl/master/config/status.py``. This is particularly useful for the diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 363847af0a8a877311fe5bae506076d906c827cd..68aa500150ae326e0c1b0b81e6200ae2ad24ff44 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -161,7 +161,7 @@ symbol table entries. Here is an example of the "hello world" module: ; Definition of main function define i32 @main() { ; i32()* - ; Convert [13 x i8]* to i8 *... + ; Convert [13 x i8]* to i8*... %cast210 = getelementptr [13 x i8], [13 x i8]* @.str, i64 0, i64 0 ; Call puts function to write out the string to stdout. @@ -641,8 +641,9 @@ assume that the globals are densely packed in their section and try to iterate over them as an array, alignment padding would break this iteration. The maximum alignment is ``1 << 29``. -Globals can also have a :ref:`DLL storage class ` and -an optional list of attached :ref:`metadata `, +Globals can also have a :ref:`DLL storage class `, +an optional :ref:`global attributes ` and +an optional list of attached :ref:`metadata `. Variables and aliases can have a :ref:`Thread Local Storage Model `. @@ -1535,6 +1536,17 @@ example: ``sanitize_thread`` This attribute indicates that ThreadSanitizer checks (dynamic thread safety analysis) are enabled for this function. +``speculatable`` + This function attribute indicates that the function does not have any + effects besides calculating its result and does not have undefined behavior. + Note that ``speculatable`` is not enough to conclude that along any + particular execution path the number of calls to this function will not be + externally observable. This attribute is only valid on functions + and declarations, not on individual call sites. If a function is + incorrectly marked as speculatable and really does exhibit + undefined behavior, the undefined behavior may be observed even + if the call site is dead code. + ``ssp`` This attribute indicates that the function should emit a stack smashing protector. It is in the form of a "canary" --- a random value @@ -1613,6 +1625,14 @@ example: the ELF x86-64 abi, but it can be disabled for some compilation units. +.. _glattrs: + +Global Attributes +----------------- + +Attributes may be set to communicate additional information about a global variable. +Unlike :ref:`function attributes `, attributes on a global variable +are grouped into a single :ref:`attribute group `. .. _opbundles: @@ -3653,6 +3673,9 @@ Sparc: - ``I``: An immediate 13-bit signed integer. - ``r``: A 32-bit integer register. +- ``f``: Any floating-point register on SparcV8, or a floating point + register in the "low" half of the registers on SparcV9. +- ``e``: Any floating point register. (Same as ``f`` on SparcV8.) SystemZ: @@ -4010,26 +4033,26 @@ DICompileUnit """"""""""""" ``DICompileUnit`` nodes represent a compile unit. The ``enums:``, -``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:`` -fields are tuples containing the debug info to be emitted along with the compile -unit, regardless of code optimizations (some nodes are only emitted if there are -references to them from instructions). The ``debugInfoForProfiling:`` field is a -boolean indicating whether or not line-table discriminators are updated to -provide more-accurate debug info for profiling results. +``retainedTypes:``, ``globals:``, ``imports:`` and ``macros:`` fields are tuples +containing the debug info to be emitted along with the compile unit, regardless +of code optimizations (some nodes are only emitted if there are references to +them from instructions). The ``debugInfoForProfiling:`` field is a boolean +indicating whether or not line-table discriminators are updated to provide +more-accurate debug info for profiling results. .. code-block:: text !0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: FullDebug, - enums: !2, retainedTypes: !3, subprograms: !4, - globals: !5, imports: !6, macros: !7, dwoId: 0x0abcd) + enums: !2, retainedTypes: !3, globals: !4, imports: !5, + macros: !6, dwoId: 0x0abcd) Compile unit descriptors provide the root scope for objects declared in a -specific compilation unit. File descriptors are defined using this scope. -These descriptors are collected by a named metadata ``!llvm.dbg.cu``. They -keep track of subprograms, global variables, type information, and imported -entities (declarations and namespaces). +specific compilation unit. File descriptors are defined using this scope. These +descriptors are collected by a named metadata node ``!llvm.dbg.cu``. They keep +track of global variables, type information, and imported entities (declarations +and namespaces). .. _DIFile: @@ -4303,8 +4326,8 @@ and ``scope:``. containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, - isOptimized: true, templateParams: !5, - declaration: !6, variables: !7) + isOptimized: true, unit: !5, templateParams: !6, + declaration: !7, variables: !8, thrownTypes: !9) .. _DILexicalBlock: @@ -4380,8 +4403,13 @@ referenced LLVM variable relates to the source language variable. The current supported vocabulary is limited: -- ``DW_OP_deref`` dereferences the working expression. -- ``DW_OP_plus, 93`` adds ``93`` to the working expression. +- ``DW_OP_deref`` dereferences the top of the expression stack. +- ``DW_OP_plus`` pops the last two entries from the expression stack, adds + them together and appends the result to the expression stack. +- ``DW_OP_minus`` pops the last two entries from the expression stack, subtracts + the last entry from the second last entry and appends the result to the + expression stack. +- ``DW_OP_plus_uconst, 93`` adds ``93`` to the working expression. - ``DW_OP_LLVM_fragment, 16, 8`` specifies the offset and size (``16`` and ``8`` here, respectively) of the variable fragment from the working expression. Note that contrary to DW_OP_bit_piece, the offset is describing the the location @@ -4392,21 +4420,21 @@ The current supported vocabulary is limited: address space identifier. - ``DW_OP_stack_value`` marks a constant value. -DIExpression nodes that contain a ``DW_OP_stack_value`` operator are standalone -location descriptions that describe constant values. This form is used to -describe global constants that have been optimized away. All other expressions -are modifiers to another location: A debug intrinsic ties a location and a -DIExpression together. Contrary to DWARF expressions, a DIExpression always -describes the *value* of a source variable and never its *address*. In DWARF -terminology, a DIExpression can always be considered an implicit location -description regardless whether it contains a ``DW_OP_stack_value`` or not. +DWARF specifies three kinds of simple location descriptions: Register, memory, +and implicit location descriptions. Register and memory location descriptions +describe the *location* of a source variable (in the sense that a debugger might +modify its value), whereas implicit locations describe merely the *value* of a +source variable. DIExpressions also follow this model: A DIExpression that +doesn't have a trailing ``DW_OP_stack_value`` will describe an *address* when +combined with a concrete location. -.. code-block:: text +.. code-block:: llvm !0 = !DIExpression(DW_OP_deref) - !1 = !DIExpression(DW_OP_plus, 3) + !1 = !DIExpression(DW_OP_plus_uconst, 3) + !1 = !DIExpression(DW_OP_constu, 3, DW_OP_plus) !2 = !DIExpression(DW_OP_bit_piece, 3, 7) - !3 = !DIExpression(DW_OP_deref, DW_OP_plus, 3, DW_OP_LLVM_fragment, 3, 7) + !3 = !DIExpression(DW_OP_deref, DW_OP_constu, 3, DW_OP_plus, DW_OP_LLVM_fragment, 3, 7) !4 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef) !5 = !DIExpression(DW_OP_constu, 42, DW_OP_stack_value) @@ -5164,6 +5192,72 @@ Example: !0 = !{i32* @a} +'``prof``' Metadata +^^^^^^^^^^^^^^^^^^^ + +The ``prof`` metadata is used to record profile data in the IR. +The first operand of the metadata node indicates the profile metadata +type. There are currently 3 types: +:ref:`branch_weights`, +:ref:`function_entry_count`, and +:ref:`VP`. + +.. _prof_node_branch_weights: + +branch_weights +"""""""""""""" + +Branch weight metadata attached to a branch, select, switch or call instruction +represents the likeliness of the associated branch being taken. +For more information, see :doc:`BranchWeightMetadata`. + +.. _prof_node_function_entry_count: + +function_entry_count +"""""""""""""""""""" + +Function entry count metadata can be attached to function definitions +to record the number of times the function is called. Used with BFI +information, it is also used to derive the basic block profile count. +For more information, see :doc:`BranchWeightMetadata`. + +.. _prof_node_VP: + +VP +"" + +VP (value profile) metadata can be attached to instructions that have +value profile information. Currently this is indirect calls (where it +records the hottest callees) and calls to memory intrinsics such as memcpy, +memmove, and memset (where it records the hottest byte lengths). + +Each VP metadata node contains "VP" string, then a uint32_t value for the value +profiling kind, a uint64_t value for the total number of times the instruction +is executed, followed by uint64_t value and execution count pairs. +The value profiling kind is 0 for indirect call targets and 1 for memory +operations. For indirect call targets, each profile value is a hash +of the callee function name, and for memory operations each value is the +byte length. + +Note that the value counts do not need to add up to the total count +listed in the third operand (in practice only the top hottest values +are tracked and reported). + +Indirect call example: + +.. code-block:: llvm + + call void %f(), !prof !1 + !1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410} + +Note that the VP type is 0 (the second operand), which indicates this is +an indirect call value profile data. The third operand indicates that the +indirect call executed 1600 times. The 4th and 6th operands give the +hashes of the 2 hottest target functions' names (this is the same hash used +to represent function names in the profile database), and the 5th and 7th +operands give the execution count that each of the respective prior target +functions was called. + Module Flags Metadata ===================== @@ -5330,40 +5424,6 @@ Some important flag interactions: - A module with ``Objective-C Garbage Collection`` set to 0 cannot be merged with a module with ``Objective-C GC Only`` set to 6. -Automatic Linker Flags Module Flags Metadata --------------------------------------------- - -Some targets support embedding flags to the linker inside individual object -files. Typically this is used in conjunction with language extensions which -allow source files to explicitly declare the libraries they depend on, and have -these automatically be transmitted to the linker via object files. - -These flags are encoded in the IR using metadata in the module flags section, -using the ``Linker Options`` key. The merge behavior for this flag is required -to be ``AppendUnique``, and the value for the key is expected to be a metadata -node which should be a list of other metadata nodes, each of which should be a -list of metadata strings defining linker options. - -For example, the following metadata section specifies two separate sets of -linker options, presumably to link against ``libz`` and the ``Cocoa`` -framework:: - - !0 = !{ i32 6, !"Linker Options", - !{ - !{ !"-lz" }, - !{ !"-framework", !"Cocoa" } } } - !llvm.module.flags = !{ !0 } - -The metadata encoding as lists of lists of options, as opposed to a collapsed -list of options, is chosen so that the IR encoding can use multiple option -strings to specify e.g., a single library, while still having that specifier be -preserved as an atomic element that can be recognized by a target specific -assembly writer or object file emitter. - -Each individual option is required to be either a valid option for the target's -linker, or an option that is reserved by the target specific assembly writer or -object file emitter. No other aspect of these options is defined by the IR. - C type width Module Flags Metadata ---------------------------------- @@ -5400,6 +5460,37 @@ enum is the smallest type which can represent all of its values:: !0 = !{i32 1, !"short_wchar", i32 1} !1 = !{i32 1, !"short_enum", i32 0} +Automatic Linker Flags Named Metadata +===================================== + +Some targets support embedding flags to the linker inside individual object +files. Typically this is used in conjunction with language extensions which +allow source files to explicitly declare the libraries they depend on, and have +these automatically be transmitted to the linker via object files. + +These flags are encoded in the IR using named metadata with the name +``!llvm.linker.options``. Each operand is expected to be a metadata node +which should be a list of other metadata nodes, each of which should be a +list of metadata strings defining linker options. + +For example, the following metadata section specifies two separate sets of +linker options, presumably to link against ``libz`` and the ``Cocoa`` +framework:: + + !0 = !{ !"-lz" }, + !1 = !{ !"-framework", !"Cocoa" } } } + !llvm.linker.options = !{ !0, !1 } + +The metadata encoding as lists of lists of options, as opposed to a collapsed +list of options, is chosen so that the IR encoding can use multiple option +strings to specify e.g., a single library, while still having that specifier be +preserved as an atomic element that can be recognized by a target specific +assembly writer or object file emitter. + +Each individual option is required to be either a valid option for the target's +linker, or an option that is reserved by the target specific assembly writer or +object file emitter. No other aspect of these options is defined by the IR. + .. _intrinsicglobalvariables: Intrinsic Global Variables @@ -5812,9 +5903,7 @@ This instruction requires several arguments: #. '``exception label``': the label reached when a callee returns via the :ref:`resume ` instruction or other exception handling mechanism. -#. The optional :ref:`function attributes ` list. Only - '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``' - attributes are valid here. +#. The optional :ref:`function attributes ` list. #. The optional :ref:`operand bundles ` list. Semantics: @@ -6671,15 +6760,14 @@ Semantics: The value produced is ``op1`` \* 2\ :sup:`op2` mod 2\ :sup:`n`, where ``n`` is the width of the result. If ``op2`` is (statically or dynamically) equal to or larger than the number of bits in -``op1``, the result is undefined. If the arguments are vectors, each -vector element of ``op1`` is shifted by the corresponding shift amount -in ``op2``. +``op1``, this instruction returns a :ref:`poison value `. +If the arguments are vectors, each vector element of ``op1`` is shifted +by the corresponding shift amount in ``op2``. -If the ``nuw`` keyword is present, then the shift produces a :ref:`poison -value ` if it shifts out any non-zero bits. If the -``nsw`` keyword is present, then the shift produces a :ref:`poison -value ` if it shifts out any bits that disagree with the -resultant sign bit. +If the ``nuw`` keyword is present, then the shift produces a poison +value if it shifts out any non-zero bits. +If the ``nsw`` keyword is present, then the shift produces a poison +value it shifts out any bits that disagree with the resultant sign bit. Example: """""""" @@ -6722,13 +6810,12 @@ Semantics: This instruction always performs a logical shift right operation. The most significant bits of the result will be filled with zero bits after the shift. If ``op2`` is (statically or dynamically) equal to or larger -than the number of bits in ``op1``, the result is undefined. If the -arguments are vectors, each vector element of ``op1`` is shifted by the -corresponding shift amount in ``op2``. +than the number of bits in ``op1``, this instruction returns a :ref:`poison +value `. If the arguments are vectors, each vector element +of ``op1`` is shifted by the corresponding shift amount in ``op2``. If the ``exact`` keyword is present, the result value of the ``lshr`` is -a :ref:`poison value ` if any of the bits shifted out are -non-zero. +a poison value if any of the bits shifted out are non-zero. Example: """""""" @@ -6773,13 +6860,12 @@ Semantics: This instruction always performs an arithmetic shift right operation, The most significant bits of the result will be filled with the sign bit of ``op1``. If ``op2`` is (statically or dynamically) equal to or larger -than the number of bits in ``op1``, the result is undefined. If the -arguments are vectors, each vector element of ``op1`` is shifted by the -corresponding shift amount in ``op2``. +than the number of bits in ``op1``, this instruction returns a :ref:`poison +value `. If the arguments are vectors, each vector element +of ``op1`` is shifted by the corresponding shift amount in ``op2``. If the ``exact`` keyword is present, the result value of the ``ashr`` is -a :ref:`poison value ` if any of the bits shifted out are -non-zero. +a poison value if any of the bits shifted out are non-zero. Example: """""""" @@ -7071,9 +7157,10 @@ Semantics: The elements of the two input vectors are numbered from left to right across both of the vectors. The shuffle mask operand specifies, for each element of the result vector, which element of the two input vectors the -result element gets. The element selector may be undef (meaning "don't -care") and the second operand may be undef if performing a shuffle from -only one vector. +result element gets. If the shuffle mask is undef, the result vector is +undef. If any element of the mask operand is undef, that element of the +result is undef. If the shuffle mask selects an undef element from one +of the input vectors, the resulting element is undef. Example: """""""" @@ -7900,7 +7987,7 @@ makes sense: ; get pointers for 8 elements from array B %ptrs = getelementptr double, double* %B, <8 x i32> %C ; load 8 elements from array B into A - %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs, + %A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> %ptrs, i32 8, <8 x i1> %mask, <8 x double> %passthru) Conversion Operations @@ -8872,9 +8959,7 @@ This instruction requires several arguments: be of :ref:`first class ` type. If the function signature indicates the function accepts a variable number of arguments, the extra arguments can be specified. -#. The optional :ref:`function attributes ` list. Only - '``noreturn``', '``nounwind``', '``readonly``' , '``readnone``', - and '``convergent``' attributes are valid here. +#. The optional :ref:`function attributes ` list. #. The optional :ref:`operand bundles ` list. Semantics: @@ -9523,7 +9608,7 @@ Syntax: :: - declare i8 *@llvm.returnaddress(i32 ) + declare i8* @llvm.returnaddress(i32 ) Overview: """"""""" @@ -9561,7 +9646,7 @@ Syntax: :: - declare i8 *@llvm.addressofreturnaddress() + declare i8* @llvm.addressofreturnaddress() Overview: """"""""" @@ -11674,241 +11759,573 @@ Examples: %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c -Half Precision Floating Point Intrinsics ----------------------------------------- -For most target platforms, half precision floating point is a -storage-only format. This means that it is a dense encoding (in memory) -but does not support computation in the format. +Experimental Vector Reduction Intrinsics +---------------------------------------- -This means that code must first load the half-precision floating point -value as an i16, then convert it to float with -:ref:`llvm.convert.from.fp16 `. Computation can -then be performed on the float value (including extending to double -etc). To store the value back to memory, it is first converted to float -if needed, then converted to i16 with -:ref:`llvm.convert.to.fp16 `, then storing as an -i16 value. +Horizontal reductions of vectors can be expressed using the following +intrinsics. Each one takes a vector operand as an input and applies its +respective operation across all elements of the vector, returning a single +scalar result of the same element type. -.. _int_convert_to_fp16: -'``llvm.convert.to.fp16``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.vector.reduce.add.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i16 @llvm.convert.to.fp16.f32(float %a) - declare i16 @llvm.convert.to.fp16.f64(double %a) + declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %a) + declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %a) Overview: """"""""" -The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a -conventional floating point type to half precision floating point format. +The '``llvm.experimental.vector.reduce.add.*``' intrinsics do an integer ``ADD`` +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input. Arguments: """""""""" +The argument to this intrinsic must be a vector of integer values. -The intrinsic function contains single argument - the value to be -converted. +'``llvm.experimental.vector.reduce.fadd.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Semantics: +Syntax: +""""""" + +:: + + declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %acc, <4 x float> %a) + declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double %acc, <2 x double> %a) + +Overview: +""""""""" + +The '``llvm.experimental.vector.reduce.fadd.*``' intrinsics do a floating point +``ADD`` reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input. + +If the intrinsic call has fast-math flags, then the reduction will not preserve +the associativity of an equivalent scalarized counterpart. If it does not have +fast-math flags, then the reduction will be *ordered*, implying that the +operation respects the associativity of a scalarized reduction. + + +Arguments: """""""""" +The first argument to this intrinsic is a scalar accumulator value, which is +only used when there are no fast-math flags attached. This argument may be undef +when fast-math flags are used. -The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a -conventional floating point format to half precision floating point format. The -return value is an ``i16`` which contains the converted number. +The second argument must be a vector of floating point values. Examples: """"""""" .. code-block:: llvm - %res = call i16 @llvm.convert.to.fp16.f32(float %a) - store i16 %res, i16* @x, align 2 + %fast = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %input) ; fast reduction + %ord = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %acc, <4 x float> %input) ; ordered reduction -.. _int_convert_from_fp16: -'``llvm.convert.from.fp16``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.vector.reduce.mul.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare float @llvm.convert.from.fp16.f32(i16 %a) - declare double @llvm.convert.from.fp16.f64(i16 %a) + declare i32 @llvm.experimental.vector.reduce.mul.i32.v4i32(<4 x i32> %a) + declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %a) Overview: """"""""" -The '``llvm.convert.from.fp16``' intrinsic function performs a -conversion from half precision floating point format to single precision -floating point format. +The '``llvm.experimental.vector.reduce.mul.*``' intrinsics do an integer ``MUL`` +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input. Arguments: """""""""" +The argument to this intrinsic must be a vector of integer values. -The intrinsic function contains single argument - the value to be -converted. +'``llvm.experimental.vector.reduce.fmul.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Semantics: -"""""""""" +Syntax: +""""""" -The '``llvm.convert.from.fp16``' intrinsic function performs a -conversion from half single precision floating point format to single -precision floating point format. The input half-float value is -represented by an ``i16`` value. +:: -Examples: + declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %acc, <4 x float> %a) + declare double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double %acc, <2 x double> %a) + +Overview: """"""""" -.. code-block:: llvm +The '``llvm.experimental.vector.reduce.fmul.*``' intrinsics do a floating point +``MUL`` reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input. - %a = load i16, i16* @x, align 2 - %res = call float @llvm.convert.from.fp16(i16 %a) +If the intrinsic call has fast-math flags, then the reduction will not preserve +the associativity of an equivalent scalarized counterpart. If it does not have +fast-math flags, then the reduction will be *ordered*, implying that the +operation respects the associativity of a scalarized reduction. -.. _dbg_intrinsics: -Debugger Intrinsics -------------------- +Arguments: +"""""""""" +The first argument to this intrinsic is a scalar accumulator value, which is +only used when there are no fast-math flags attached. This argument may be undef +when fast-math flags are used. -The LLVM debugger intrinsics (which all start with ``llvm.dbg.`` -prefix), are described in the `LLVM Source Level -Debugging `_ -document. +The second argument must be a vector of floating point values. -Exception Handling Intrinsics ------------------------------ +Examples: +""""""""" -The LLVM exception handling intrinsics (which all start with -``llvm.eh.`` prefix), are described in the `LLVM Exception -Handling `_ document. +.. code-block:: llvm -.. _int_trampoline: + %fast = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %input) ; fast reduction + %ord = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %acc, <4 x float> %input) ; ordered reduction -Trampoline Intrinsics ---------------------- +'``llvm.experimental.vector.reduce.and.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -These intrinsics make it possible to excise one parameter, marked with -the :ref:`nest ` attribute, from a function. The result is a -callable function pointer lacking the nest parameter - the caller does -not need to provide a value for it. Instead, the value to use is stored -in advance in a "trampoline", a block of memory usually allocated on the -stack, which also contains code to splice the nest value into the -argument list. This is used to implement the GCC nested function address -extension. +Syntax: +""""""" -For example, if the function is ``i32 f(i8* nest %c, i32 %x, i32 %y)`` -then the resulting function pointer has signature ``i32 (i32, i32)*``. -It can be created as follows: +:: -.. code-block:: llvm + declare i32 @llvm.experimental.vector.reduce.and.i32.v4i32(<4 x i32> %a) - %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86 - %tramp1 = getelementptr [10 x i8], [10 x i8]* %tramp, i32 0, i32 0 - call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval) - %p = call i8* @llvm.adjust.trampoline(i8* %tramp1) - %fp = bitcast i8* %p to i32 (i32, i32)* +Overview: +""""""""" -The call ``%val = call i32 %fp(i32 %x, i32 %y)`` is then equivalent to -``%val = call i32 %f(i8* %nval, i32 %x, i32 %y)``. +The '``llvm.experimental.vector.reduce.and.*``' intrinsics do a bitwise ``AND`` +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input. -.. _int_it: +Arguments: +"""""""""" +The argument to this intrinsic must be a vector of integer values. -'``llvm.init.trampoline``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.vector.reduce.or.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare void @llvm.init.trampoline(i8* , i8* , i8* ) + declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a) Overview: """"""""" -This fills the memory pointed to by ``tramp`` with executable code, -turning it into a trampoline. +The '``llvm.experimental.vector.reduce.or.*``' intrinsics do a bitwise ``OR`` reduction +of a vector, returning the result as a scalar. The return type matches the +element-type of the vector input. Arguments: """""""""" +The argument to this intrinsic must be a vector of integer values. -The ``llvm.init.trampoline`` intrinsic takes three arguments, all -pointers. The ``tramp`` argument must point to a sufficiently large and -sufficiently aligned block of memory; this memory is written to by the -intrinsic. Note that the size and the alignment are target-specific - -LLVM currently provides no portable way of determining them, so a -front-end that generates this intrinsic needs to have some -target-specific knowledge. The ``func`` argument must hold a function -bitcast to an ``i8*``. +'``llvm.experimental.vector.reduce.xor.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Semantics: -"""""""""" +Syntax: +""""""" -The block of memory pointed to by ``tramp`` is filled with target -dependent code, turning it into a function. Then ``tramp`` needs to be -passed to :ref:`llvm.adjust.trampoline ` to get a pointer which can -be :ref:`bitcast (to a new function) and called `. The new -function's signature is the same as that of ``func`` with any arguments -marked with the ``nest`` attribute removed. At most one such ``nest`` -argument is allowed, and it must be of pointer type. Calling the new -function is equivalent to calling ``func`` with the same argument list, -but with ``nval`` used for the missing ``nest`` argument. If, after -calling ``llvm.init.trampoline``, the memory pointed to by ``tramp`` is -modified, then the effect of any later call to the returned function -pointer is undefined. +:: -.. _int_at: + declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32> %a) -'``llvm.adjust.trampoline``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Overview: +""""""""" + +The '``llvm.experimental.vector.reduce.xor.*``' intrinsics do a bitwise ``XOR`` +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input. + +Arguments: +"""""""""" +The argument to this intrinsic must be a vector of integer values. + +'``llvm.experimental.vector.reduce.smax.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i8* @llvm.adjust.trampoline(i8* ) + declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %a) Overview: """"""""" -This performs any required machine-specific adjustment to the address of -a trampoline (passed as ``tramp``). +The '``llvm.experimental.vector.reduce.smax.*``' intrinsics do a signed integer +``MAX`` reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input. Arguments: """""""""" +The argument to this intrinsic must be a vector of integer values. -``tramp`` must point to a block of memory which already has trampoline -code filled in by a previous call to -:ref:`llvm.init.trampoline `. +'``llvm.experimental.vector.reduce.smin.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Semantics: -"""""""""" +Syntax: +""""""" -On some architectures the address of the code to be executed needs to be -different than the address where the trampoline is actually stored. This -intrinsic returns the executable address corresponding to ``tramp`` -after performing the required machine specific adjustments. The pointer -returned can then be :ref:`bitcast and executed `. +:: -.. _int_mload_mstore: + declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %a) -Masked Vector Load and Store Intrinsics ---------------------------------------- +Overview: +""""""""" -LLVM provides intrinsics for predicated vector load and store operations. The predicate is specified by a mask operand, which holds one bit per vector element, switching the associated vector lane on or off. The memory addresses corresponding to the "off" lanes are not accessed. When all bits of the mask are on, the intrinsic is identical to a regular vector load or store. When all bits are off, no memory is accessed. +The '``llvm.experimental.vector.reduce.smin.*``' intrinsics do a signed integer +``MIN`` reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input. -.. _int_mload: +Arguments: +"""""""""" +The argument to this intrinsic must be a vector of integer values. -'``llvm.masked.load.*``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.vector.reduce.umax.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %a) + +Overview: +""""""""" + +The '``llvm.experimental.vector.reduce.umax.*``' intrinsics do an unsigned +integer ``MAX`` reduction of a vector, returning the result as a scalar. The +return type matches the element-type of the vector input. + +Arguments: +"""""""""" +The argument to this intrinsic must be a vector of integer values. + +'``llvm.experimental.vector.reduce.umin.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %a) + +Overview: +""""""""" + +The '``llvm.experimental.vector.reduce.umin.*``' intrinsics do an unsigned +integer ``MIN`` reduction of a vector, returning the result as a scalar. The +return type matches the element-type of the vector input. + +Arguments: +"""""""""" +The argument to this intrinsic must be a vector of integer values. + +'``llvm.experimental.vector.reduce.fmax.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a) + declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a) + +Overview: +""""""""" + +The '``llvm.experimental.vector.reduce.fmax.*``' intrinsics do a floating point +``MAX`` reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input. + +If the intrinsic call has the ``nnan`` fast-math flag then the operation can +assume that NaNs are not present in the input vector. + +Arguments: +"""""""""" +The argument to this intrinsic must be a vector of floating point values. + +'``llvm.experimental.vector.reduce.fmin.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a) + declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a) + +Overview: +""""""""" + +The '``llvm.experimental.vector.reduce.fmin.*``' intrinsics do a floating point +``MIN`` reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input. + +If the intrinsic call has the ``nnan`` fast-math flag then the operation can +assume that NaNs are not present in the input vector. + +Arguments: +"""""""""" +The argument to this intrinsic must be a vector of floating point values. + +Half Precision Floating Point Intrinsics +---------------------------------------- + +For most target platforms, half precision floating point is a +storage-only format. This means that it is a dense encoding (in memory) +but does not support computation in the format. + +This means that code must first load the half-precision floating point +value as an i16, then convert it to float with +:ref:`llvm.convert.from.fp16 `. Computation can +then be performed on the float value (including extending to double +etc). To store the value back to memory, it is first converted to float +if needed, then converted to i16 with +:ref:`llvm.convert.to.fp16 `, then storing as an +i16 value. + +.. _int_convert_to_fp16: + +'``llvm.convert.to.fp16``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i16 @llvm.convert.to.fp16.f32(float %a) + declare i16 @llvm.convert.to.fp16.f64(double %a) + +Overview: +""""""""" + +The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a +conventional floating point type to half precision floating point format. + +Arguments: +"""""""""" + +The intrinsic function contains single argument - the value to be +converted. + +Semantics: +"""""""""" + +The '``llvm.convert.to.fp16``' intrinsic function performs a conversion from a +conventional floating point format to half precision floating point format. The +return value is an ``i16`` which contains the converted number. + +Examples: +""""""""" + +.. code-block:: llvm + + %res = call i16 @llvm.convert.to.fp16.f32(float %a) + store i16 %res, i16* @x, align 2 + +.. _int_convert_from_fp16: + +'``llvm.convert.from.fp16``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare float @llvm.convert.from.fp16.f32(i16 %a) + declare double @llvm.convert.from.fp16.f64(i16 %a) + +Overview: +""""""""" + +The '``llvm.convert.from.fp16``' intrinsic function performs a +conversion from half precision floating point format to single precision +floating point format. + +Arguments: +"""""""""" + +The intrinsic function contains single argument - the value to be +converted. + +Semantics: +"""""""""" + +The '``llvm.convert.from.fp16``' intrinsic function performs a +conversion from half single precision floating point format to single +precision floating point format. The input half-float value is +represented by an ``i16`` value. + +Examples: +""""""""" + +.. code-block:: llvm + + %a = load i16, i16* @x, align 2 + %res = call float @llvm.convert.from.fp16(i16 %a) + +.. _dbg_intrinsics: + +Debugger Intrinsics +------------------- + +The LLVM debugger intrinsics (which all start with ``llvm.dbg.`` +prefix), are described in the `LLVM Source Level +Debugging `_ +document. + +Exception Handling Intrinsics +----------------------------- + +The LLVM exception handling intrinsics (which all start with +``llvm.eh.`` prefix), are described in the `LLVM Exception +Handling `_ document. + +.. _int_trampoline: + +Trampoline Intrinsics +--------------------- + +These intrinsics make it possible to excise one parameter, marked with +the :ref:`nest ` attribute, from a function. The result is a +callable function pointer lacking the nest parameter - the caller does +not need to provide a value for it. Instead, the value to use is stored +in advance in a "trampoline", a block of memory usually allocated on the +stack, which also contains code to splice the nest value into the +argument list. This is used to implement the GCC nested function address +extension. + +For example, if the function is ``i32 f(i8* nest %c, i32 %x, i32 %y)`` +then the resulting function pointer has signature ``i32 (i32, i32)*``. +It can be created as follows: + +.. code-block:: llvm + + %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86 + %tramp1 = getelementptr [10 x i8], [10 x i8]* %tramp, i32 0, i32 0 + call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval) + %p = call i8* @llvm.adjust.trampoline(i8* %tramp1) + %fp = bitcast i8* %p to i32 (i32, i32)* + +The call ``%val = call i32 %fp(i32 %x, i32 %y)`` is then equivalent to +``%val = call i32 %f(i8* %nval, i32 %x, i32 %y)``. + +.. _int_it: + +'``llvm.init.trampoline``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.init.trampoline(i8* , i8* , i8* ) + +Overview: +""""""""" + +This fills the memory pointed to by ``tramp`` with executable code, +turning it into a trampoline. + +Arguments: +"""""""""" + +The ``llvm.init.trampoline`` intrinsic takes three arguments, all +pointers. The ``tramp`` argument must point to a sufficiently large and +sufficiently aligned block of memory; this memory is written to by the +intrinsic. Note that the size and the alignment are target-specific - +LLVM currently provides no portable way of determining them, so a +front-end that generates this intrinsic needs to have some +target-specific knowledge. The ``func`` argument must hold a function +bitcast to an ``i8*``. + +Semantics: +"""""""""" + +The block of memory pointed to by ``tramp`` is filled with target +dependent code, turning it into a function. Then ``tramp`` needs to be +passed to :ref:`llvm.adjust.trampoline ` to get a pointer which can +be :ref:`bitcast (to a new function) and called `. The new +function's signature is the same as that of ``func`` with any arguments +marked with the ``nest`` attribute removed. At most one such ``nest`` +argument is allowed, and it must be of pointer type. Calling the new +function is equivalent to calling ``func`` with the same argument list, +but with ``nval`` used for the missing ``nest`` argument. If, after +calling ``llvm.init.trampoline``, the memory pointed to by ``tramp`` is +modified, then the effect of any later call to the returned function +pointer is undefined. + +.. _int_at: + +'``llvm.adjust.trampoline``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.adjust.trampoline(i8* ) + +Overview: +""""""""" + +This performs any required machine-specific adjustment to the address of +a trampoline (passed as ``tramp``). + +Arguments: +"""""""""" + +``tramp`` must point to a block of memory which already has trampoline +code filled in by a previous call to +:ref:`llvm.init.trampoline `. + +Semantics: +"""""""""" + +On some architectures the address of the code to be executed needs to be +different than the address where the trampoline is actually stored. This +intrinsic returns the executable address corresponding to ``tramp`` +after performing the required machine specific adjustments. The pointer +returned can then be :ref:`bitcast and executed `. + +.. _int_mload_mstore: + +Masked Vector Load and Store Intrinsics +--------------------------------------- + +LLVM provides intrinsics for predicated vector load and store operations. The predicate is specified by a mask operand, which holds one bit per vector element, switching the associated vector lane on or off. The memory addresses corresponding to the "off" lanes are not accessed. When all bits of the mask are on, the intrinsic is identical to a regular vector load or store. When all bits are off, no memory is accessed. + +.. _int_mload: + +'``llvm.masked.load.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -11916,444 +12333,904 @@ This is an overloaded intrinsic. The loaded data is a vector of any integer, flo :: - declare <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* , i32 , <16 x i1> , <16 x float> ) - declare <2 x double> @llvm.masked.load.v2f64.p0v2f64 (<2 x double>* , i32 , <2 x i1> , <2 x double> ) - ;; The data is a vector of pointers to double - declare <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64 (<8 x double*>* , i32 , <8 x i1> , <8 x double*> ) - ;; The data is a vector of function pointers - declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f (<8 x i32 ()*>* , i32 , <8 x i1> , <8 x i32 ()*> ) + declare <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* , i32 , <16 x i1> , <16 x float> ) + declare <2 x double> @llvm.masked.load.v2f64.p0v2f64 (<2 x double>* , i32 , <2 x i1> , <2 x double> ) + ;; The data is a vector of pointers to double + declare <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64 (<8 x double*>* , i32 , <8 x i1> , <8 x double*> ) + ;; The data is a vector of function pointers + declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f (<8 x i32 ()*>* , i32 , <8 x i1> , <8 x i32 ()*> ) + +Overview: +""""""""" + +Reads a vector from memory according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. The masked-off lanes in the result vector are taken from the corresponding lanes of the '``passthru``' operand. + + +Arguments: +"""""""""" + +The first operand is the base pointer for the load. The second operand is the alignment of the source location. It must be a constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the base pointer and the type of the '``passthru``' operand are the same vector types. + + +Semantics: +"""""""""" + +The '``llvm.masked.load``' intrinsic is designed for conditional reading of selected vector elements in a single IR operation. It is useful for targets that support vector masked loads and allows vectorizing predicated basic blocks on these targets. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar load operations. +The result of this operation is equivalent to a regular vector load instruction followed by a 'select' between the loaded and the passthru values, predicated on the same mask. However, using this intrinsic prevents exceptions on memory access to masked-off lanes. + + +:: + + %res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* %ptr, i32 4, <16 x i1>%mask, <16 x float> %passthru) + + ;; The result of the two following instructions is identical aside from potential memory access exception + %loadlal = load <16 x float>, <16 x float>* %ptr, align 4 + %res = select <16 x i1> %mask, <16 x float> %loadlal, <16 x float> %passthru + +.. _int_mstore: + +'``llvm.masked.store.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. + +:: + + declare void @llvm.masked.store.v8i32.p0v8i32 (<8 x i32> , <8 x i32>* , i32 , <8 x i1> ) + declare void @llvm.masked.store.v16f32.p0v16f32 (<16 x float> , <16 x float>* , i32 , <16 x i1> ) + ;; The data is a vector of pointers to double + declare void @llvm.masked.store.v8p0f64.p0v8p0f64 (<8 x double*> , <8 x double*>* , i32 , <8 x i1> ) + ;; The data is a vector of function pointers + declare void @llvm.masked.store.v4p0f_i32f.p0v4p0f_i32f (<4 x i32 ()*> , <4 x i32 ()*>* , i32 , <4 x i1> ) + +Overview: +""""""""" + +Writes a vector to memory according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. + +Arguments: +"""""""""" + +The first operand is the vector value to be written to memory. The second operand is the base pointer for the store, it has the same underlying type as the value operand. The third operand is the alignment of the destination location. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements. + + +Semantics: +"""""""""" + +The '``llvm.masked.store``' intrinsics is designed for conditional writing of selected vector elements in a single IR operation. It is useful for targets that support vector masked store and allows vectorizing predicated basic blocks on these targets. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations. +The result of this operation is equivalent to a load-modify-store sequence. However, using this intrinsic prevents exceptions and data races on memory access to masked-off lanes. + +:: + + call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %value, <16 x float>* %ptr, i32 4, <16 x i1> %mask) + + ;; The result of the following instructions is identical aside from potential data races and memory access exceptions + %oldval = load <16 x float>, <16 x float>* %ptr, align 4 + %res = select <16 x i1> %mask, <16 x float> %value, <16 x float> %oldval + store <16 x float> %res, <16 x float>* %ptr, align 4 + + +Masked Vector Gather and Scatter Intrinsics +------------------------------------------- + +LLVM provides intrinsics for vector gather and scatter operations. They are similar to :ref:`Masked Vector Load and Store `, except they are designed for arbitrary memory accesses, rather than sequential memory accesses. Gather and scatter also employ a mask operand, which holds one bit per vector element, switching the associated vector lane on or off. The memory addresses corresponding to the "off" lanes are not accessed. When all bits are off, no memory is accessed. + +.. _int_mgather: + +'``llvm.masked.gather.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer, floating point or pointer data type gathered together into one vector. + +:: + + declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32 (<16 x float*> , i32 , <16 x i1> , <16 x float> ) + declare <2 x double> @llvm.masked.gather.v2f64.v2p1f64 (<2 x double addrspace(1)*> , i32 , <2 x i1> , <2 x double> ) + declare <8 x float*> @llvm.masked.gather.v8p0f32.v8p0p0f32 (<8 x float**> , i32 , <8 x i1> , <8 x float*> ) + +Overview: +""""""""" + +Reads scalar values from arbitrary memory locations and gathers them into one vector. The memory locations are provided in the vector of pointers '``ptrs``'. The memory is accessed according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. The masked-off lanes in the result vector are taken from the corresponding lanes of the '``passthru``' operand. + + +Arguments: +"""""""""" + +The first operand is a vector of pointers which holds all memory addresses to read. The second operand is an alignment of the source addresses. It must be a constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the vector of pointers and the type of the '``passthru``' operand are the same vector types. + + +Semantics: +"""""""""" + +The '``llvm.masked.gather``' intrinsic is designed for conditional reading of multiple scalar values from arbitrary memory locations in a single IR operation. It is useful for targets that support vector masked gathers and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of scalar load operations. +The semantics of this operation are equivalent to a sequence of conditional scalar loads with subsequent gathering all loaded values into a single vector. The mask restricts memory access to certain lanes and facilitates vectorization of predicated basic blocks. + + +:: + + %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64 (<4 x double*> %ptrs, i32 8, <4 x i1> , <4 x double> undef) + + ;; The gather with all-true mask is equivalent to the following instruction sequence + %ptr0 = extractelement <4 x double*> %ptrs, i32 0 + %ptr1 = extractelement <4 x double*> %ptrs, i32 1 + %ptr2 = extractelement <4 x double*> %ptrs, i32 2 + %ptr3 = extractelement <4 x double*> %ptrs, i32 3 + + %val0 = load double, double* %ptr0, align 8 + %val1 = load double, double* %ptr1, align 8 + %val2 = load double, double* %ptr2, align 8 + %val3 = load double, double* %ptr3, align 8 + + %vec0 = insertelement <4 x double>undef, %val0, 0 + %vec01 = insertelement <4 x double>%vec0, %val1, 1 + %vec012 = insertelement <4 x double>%vec01, %val2, 2 + %vec0123 = insertelement <4 x double>%vec012, %val3, 3 + +.. _int_mscatter: + +'``llvm.masked.scatter.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. Each vector element is stored in an arbitrary memory address. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element. + +:: + + declare void @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) + declare void @llvm.masked.scatter.v16f32.v16p1f32 (<16 x float> , <16 x float addrspace(1)*> , i32 , <16 x i1> ) + declare void @llvm.masked.scatter.v4p0f64.v4p0p0f64 (<4 x double*> , <4 x double**> , i32 , <4 x i1> ) + +Overview: +""""""""" + +Writes each element from the value vector to the corresponding memory address. The memory addresses are represented as a vector of pointers. Writing is done according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. + +Arguments: +"""""""""" + +The first operand is a vector value to be written to memory. The second operand is a vector of pointers, pointing to where the value elements should be stored. It has the same underlying type as the value operand. The third operand is an alignment of the destination addresses. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements. + + +Semantics: +"""""""""" + +The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector elements to arbitrary memory addresses in a single IR operation. The operation may be conditional, when not all bits in the mask are switched on. It is useful for targets that support vector masked scatter and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations. + +:: + + ;; This instruction unconditionally stores data vector in multiple addresses + call @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4, <8 x i1> ) + + ;; It is equivalent to a list of scalar stores + %val0 = extractelement <8 x i32> %value, i32 0 + %val1 = extractelement <8 x i32> %value, i32 1 + .. + %val7 = extractelement <8 x i32> %value, i32 7 + %ptr0 = extractelement <8 x i32*> %ptrs, i32 0 + %ptr1 = extractelement <8 x i32*> %ptrs, i32 1 + .. + %ptr7 = extractelement <8 x i32*> %ptrs, i32 7 + ;; Note: the order of the following stores is important when they overlap: + store i32 %val0, i32* %ptr0, align 4 + store i32 %val1, i32* %ptr1, align 4 + .. + store i32 %val7, i32* %ptr7, align 4 + + +Memory Use Markers +------------------ + +This class of intrinsics provides information about the lifetime of +memory objects and ranges where variables are immutable. + +.. _int_lifestart: + +'``llvm.lifetime.start``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.lifetime.start(i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.lifetime.start``' intrinsic specifies the start of a memory +object's lifetime. + +Arguments: +"""""""""" + +The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that before this point in the code, the value +of the memory pointed to by ``ptr`` is dead. This means that it is known +to never be used and has an undefined value. A load from the pointer +that precedes this intrinsic can be replaced with ``'undef'``. + +.. _int_lifeend: + +'``llvm.lifetime.end``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.lifetime.end(i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.lifetime.end``' intrinsic specifies the end of a memory +object's lifetime. + +Arguments: +"""""""""" + +The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that after this point in the code, the value of +the memory pointed to by ``ptr`` is dead. This means that it is known to +never be used and has an undefined value. Any stores into the memory +object following this intrinsic may be removed as dead. + +'``llvm.invariant.start``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. The memory object can belong to any address space. + +:: + + declare {}* @llvm.invariant.start.p0i8(i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.invariant.start``' intrinsic specifies that the contents of +a memory object will not change. + +Arguments: +"""""""""" + +The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that until an ``llvm.invariant.end`` that uses +the return value, the referenced memory location is constant and +unchanging. + +'``llvm.invariant.end``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. The memory object can belong to any address space. + +:: + + declare void @llvm.invariant.end.p0i8({}* , i64 , i8* nocapture ) + +Overview: +""""""""" + +The '``llvm.invariant.end``' intrinsic specifies that the contents of a +memory object are mutable. + +Arguments: +"""""""""" + +The first argument is the matching ``llvm.invariant.start`` intrinsic. +The second argument is a constant integer representing the size of the +object, or -1 if it is variable sized and the third argument is a +pointer to the object. + +Semantics: +"""""""""" + +This intrinsic indicates that the memory is mutable again. + +'``llvm.invariant.group.barrier``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare i8* @llvm.invariant.group.barrier(i8* ) + +Overview: +""""""""" + +The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant +established by invariant.group metadata no longer holds, to obtain a new pointer +value that does not carry the invariant information. + + +Arguments: +"""""""""" + +The ``llvm.invariant.group.barrier`` takes only one argument, which is +the pointer to the memory for which the ``invariant.group`` no longer holds. + +Semantics: +"""""""""" + +Returns another pointer that aliases its argument but which is considered different +for the purposes of ``load``/``store`` ``invariant.group`` metadata. + +Constrained Floating Point Intrinsics +------------------------------------- + +These intrinsics are used to provide special handling of floating point +operations when specific rounding mode or floating point exception behavior is +required. By default, LLVM optimization passes assume that the rounding mode is +round-to-nearest and that floating point exceptions will not be monitored. +Constrained FP intrinsics are used to support non-default rounding modes and +accurately preserve exception behavior without compromising LLVM's ability to +optimize FP code when the default behavior is used. + +Each of these intrinsics corresponds to a normal floating point operation. The +first two arguments and the return value are the same as the corresponding FP +operation. + +The third argument is a metadata argument specifying the rounding mode to be +assumed. This argument must be one of the following strings: + +:: + + "round.dynamic" + "round.tonearest" + "round.downward" + "round.upward" + "round.towardzero" + +If this argument is "round.dynamic" optimization passes must assume that the +rounding mode is unknown and may change at runtime. No transformations that +depend on rounding mode may be performed in this case. + +The other possible values for the rounding mode argument correspond to the +similarly named IEEE rounding modes. If the argument is any of these values +optimization passes may perform transformations as long as they are consistent +with the specified rounding mode. + +For example, 'x-0'->'x' is not a valid transformation if the rounding mode is +"round.downward" or "round.dynamic" because if the value of 'x' is +0 then +'x-0' should evaluate to '-0' when rounding downward. However, this +transformation is legal for all other rounding modes. + +For values other than "round.dynamic" optimization passes may assume that the +actual runtime rounding mode (as defined in a target-specific manner) matches +the specified rounding mode, but this is not guaranteed. Using a specific +non-dynamic rounding mode which does not match the actual rounding mode at +runtime results in undefined behavior. + +The fourth argument to the constrained floating point intrinsics specifies the +required exception behavior. This argument must be one of the following +strings: + +:: + + "fpexcept.ignore" + "fpexcept.maytrap" + "fpexcept.strict" + +If this argument is "fpexcept.ignore" optimization passes may assume that the +exception status flags will not be read and that floating point exceptions will +be masked. This allows transformations to be performed that may change the +exception semantics of the original code. For example, FP operations may be +speculatively executed in this case whereas they must not be for either of the +other possible values of this argument. + +If the exception behavior argument is "fpexcept.maytrap" optimization passes +must avoid transformations that may raise exceptions that would not have been +raised by the original code (such as speculatively executing FP operations), but +passes are not required to preserve all exceptions that are implied by the +original code. For example, exceptions may be potentially hidden by constant +folding. + +If the exception behavior argument is "fpexcept.strict" all transformations must +strictly preserve the floating point exception semantics of the original code. +Any FP exception that would have been raised by the original code must be raised +by the transformed code, and the transformed code must not raise any FP +exceptions that would not have been raised by the original code. This is the +exception behavior argument that will be used if the code being compiled reads +the FP exception status flags, but this mode can also be used with code that +unmasks FP exceptions. + +The number and order of floating point exceptions is NOT guaranteed. For +example, a series of FP operations that each may raise exceptions may be +vectorized into a single instruction that raises each unique exception a single +time. + + +'``llvm.experimental.constrained.fadd``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.fadd( , , + metadata , + metadata ) Overview: """"""""" -Reads a vector from memory according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. The masked-off lanes in the result vector are taken from the corresponding lanes of the '``passthru``' operand. +The '``llvm.experimental.constrained.fadd``' intrinsic returns the sum of its +two operands. Arguments: """""""""" -The first operand is the base pointer for the load. The second operand is the alignment of the source location. It must be a constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the base pointer and the type of the '``passthru``' operand are the same vector types. +The first two arguments to the '``llvm.experimental.constrained.fadd``' +intrinsic must be :ref:`floating point ` or :ref:`vector ` +of floating point values. Both arguments must have identical types. +The third and fourth arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -The '``llvm.masked.load``' intrinsic is designed for conditional reading of selected vector elements in a single IR operation. It is useful for targets that support vector masked loads and allows vectorizing predicated basic blocks on these targets. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar load operations. -The result of this operation is equivalent to a regular vector load instruction followed by a 'select' between the loaded and the passthru values, predicated on the same mask. However, using this intrinsic prevents exceptions on memory access to masked-off lanes. +The value produced is the floating point sum of the two value operands and has +the same type as the operands. + +'``llvm.experimental.constrained.fsub``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" :: - %res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* %ptr, i32 4, <16 x i1>%mask, <16 x float> %passthru) + declare + @llvm.experimental.constrained.fsub( , , + metadata , + metadata ) - ;; The result of the two following instructions is identical aside from potential memory access exception - %loadlal = load <16 x float>, <16 x float>* %ptr, align 4 - %res = select <16 x i1> %mask, <16 x float> %loadlal, <16 x float> %passthru +Overview: +""""""""" -.. _int_mstore: +The '``llvm.experimental.constrained.fsub``' intrinsic returns the difference +of its two operands. + + +Arguments: +"""""""""" + +The first two arguments to the '``llvm.experimental.constrained.fsub``' +intrinsic must be :ref:`floating point ` or :ref:`vector ` +of floating point values. Both arguments must have identical types. + +The third and fourth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +The value produced is the floating point difference of the two value operands +and has the same type as the operands. -'``llvm.masked.store.*``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +'``llvm.experimental.constrained.fmul``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" -This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. :: - declare void @llvm.masked.store.v8i32.p0v8i32 (<8 x i32> , <8 x i32>* , i32 , <8 x i1> ) - declare void @llvm.masked.store.v16f32.p0v16f32 (<16 x float> , <16 x float>* , i32 , <16 x i1> ) - ;; The data is a vector of pointers to double - declare void @llvm.masked.store.v8p0f64.p0v8p0f64 (<8 x double*> , <8 x double*>* , i32 , <8 x i1> ) - ;; The data is a vector of function pointers - declare void @llvm.masked.store.v4p0f_i32f.p0v4p0f_i32f (<4 x i32 ()*> , <4 x i32 ()*>* , i32 , <4 x i1> ) + declare + @llvm.experimental.constrained.fmul( , , + metadata , + metadata ) Overview: """"""""" -Writes a vector to memory according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. +The '``llvm.experimental.constrained.fmul``' intrinsic returns the product of +its two operands. + Arguments: """""""""" -The first operand is the vector value to be written to memory. The second operand is the base pointer for the store, it has the same underlying type as the value operand. The third operand is the alignment of the destination location. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements. +The first two arguments to the '``llvm.experimental.constrained.fmul``' +intrinsic must be :ref:`floating point ` or :ref:`vector ` +of floating point values. Both arguments must have identical types. +The third and fourth arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -The '``llvm.masked.store``' intrinsics is designed for conditional writing of selected vector elements in a single IR operation. It is useful for targets that support vector masked store and allows vectorizing predicated basic blocks on these targets. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations. -The result of this operation is equivalent to a load-modify-store sequence. However, using this intrinsic prevents exceptions and data races on memory access to masked-off lanes. +The value produced is the floating point product of the two value operands and +has the same type as the operands. + + +'``llvm.experimental.constrained.fdiv``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" :: - call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %value, <16 x float>* %ptr, i32 4, <16 x i1> %mask) + declare + @llvm.experimental.constrained.fdiv( , , + metadata , + metadata ) - ;; The result of the following instructions is identical aside from potential data races and memory access exceptions - %oldval = load <16 x float>, <16 x float>* %ptr, align 4 - %res = select <16 x i1> %mask, <16 x float> %value, <16 x float> %oldval - store <16 x float> %res, <16 x float>* %ptr, align 4 +Overview: +""""""""" +The '``llvm.experimental.constrained.fdiv``' intrinsic returns the quotient of +its two operands. -Masked Vector Gather and Scatter Intrinsics -------------------------------------------- -LLVM provides intrinsics for vector gather and scatter operations. They are similar to :ref:`Masked Vector Load and Store `, except they are designed for arbitrary memory accesses, rather than sequential memory accesses. Gather and scatter also employ a mask operand, which holds one bit per vector element, switching the associated vector lane on or off. The memory addresses corresponding to the "off" lanes are not accessed. When all bits are off, no memory is accessed. +Arguments: +"""""""""" -.. _int_mgather: +The first two arguments to the '``llvm.experimental.constrained.fdiv``' +intrinsic must be :ref:`floating point ` or :ref:`vector ` +of floating point values. Both arguments must have identical types. -'``llvm.masked.gather.*``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The third and fourth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +The value produced is the floating point quotient of the two value operands and +has the same type as the operands. + + +'``llvm.experimental.constrained.frem``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" -This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer, floating point or pointer data type gathered together into one vector. :: - declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> , i32 , <16 x i1> , <16 x float> ) - declare <2 x double> @llvm.masked.gather.v2f64 (<2 x double*> , i32 , <2 x i1> , <2 x double> ) - declare <8 x float*> @llvm.masked.gather.v8p0f32 (<8 x float**> , i32 , <8 x i1> , <8 x float*> ) + declare + @llvm.experimental.constrained.frem( , , + metadata , + metadata ) Overview: """"""""" -Reads scalar values from arbitrary memory locations and gathers them into one vector. The memory locations are provided in the vector of pointers '``ptrs``'. The memory is accessed according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. The masked-off lanes in the result vector are taken from the corresponding lanes of the '``passthru``' operand. +The '``llvm.experimental.constrained.frem``' intrinsic returns the remainder +from the division of its two operands. Arguments: """""""""" -The first operand is a vector of pointers which holds all memory addresses to read. The second operand is an alignment of the source addresses. It must be a constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the vector of pointers and the type of the '``passthru``' operand are the same vector types. +The first two arguments to the '``llvm.experimental.constrained.frem``' +intrinsic must be :ref:`floating point ` or :ref:`vector ` +of floating point values. Both arguments must have identical types. +The third and fourth arguments specify the rounding mode and exception +behavior as described above. The rounding mode argument has no effect, since +the result of frem is never rounded, but the argument is included for +consistency with the other constrained floating point intrinsics. Semantics: """""""""" -The '``llvm.masked.gather``' intrinsic is designed for conditional reading of multiple scalar values from arbitrary memory locations in a single IR operation. It is useful for targets that support vector masked gathers and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of scalar load operations. -The semantics of this operation are equivalent to a sequence of conditional scalar loads with subsequent gathering all loaded values into a single vector. The mask restricts memory access to certain lanes and facilitates vectorization of predicated basic blocks. - - -:: +The value produced is the floating point remainder from the division of the two +value operands and has the same type as the operands. The remainder has the +same sign as the dividend. - %res = call <4 x double> @llvm.masked.gather.v4f64 (<4 x double*> %ptrs, i32 8, <4 x i1> , <4 x double> undef) - ;; The gather with all-true mask is equivalent to the following instruction sequence - %ptr0 = extractelement <4 x double*> %ptrs, i32 0 - %ptr1 = extractelement <4 x double*> %ptrs, i32 1 - %ptr2 = extractelement <4 x double*> %ptrs, i32 2 - %ptr3 = extractelement <4 x double*> %ptrs, i32 3 +Constrained libm-equivalent Intrinsics +-------------------------------------- - %val0 = load double, double* %ptr0, align 8 - %val1 = load double, double* %ptr1, align 8 - %val2 = load double, double* %ptr2, align 8 - %val3 = load double, double* %ptr3, align 8 +In addition to the basic floating point operations for which constrained +intrinsics are described above, there are constrained versions of various +operations which provide equivalent behavior to a corresponding libm function. +These intrinsics allow the precise behavior of these operations with respect to +rounding mode and exception behavior to be controlled. - %vec0 = insertelement <4 x double>undef, %val0, 0 - %vec01 = insertelement <4 x double>%vec0, %val1, 1 - %vec012 = insertelement <4 x double>%vec01, %val2, 2 - %vec0123 = insertelement <4 x double>%vec012, %val3, 3 +As with the basic constrained floating point intrinsics, the rounding mode +and exception behavior arguments only control the behavior of the optimizer. +They do not change the runtime floating point environment. -.. _int_mscatter: -'``llvm.masked.scatter.*``' Intrinsics -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.constrained.sqrt``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" -This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. Each vector element is stored in an arbitrary memory address. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element. :: - declare void @llvm.masked.scatter.v8i32 (<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) - declare void @llvm.masked.scatter.v16f32 (<16 x float> , <16 x float*> , i32 , <16 x i1> ) - declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> , <4 x double**> , i32 , <4 x i1> ) + declare + @llvm.experimental.constrained.sqrt( , + metadata , + metadata ) Overview: """"""""" -Writes each element from the value vector to the corresponding memory address. The memory addresses are represented as a vector of pointers. Writing is done according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. +The '``llvm.experimental.constrained.sqrt``' intrinsic returns the square root +of the specified value, returning the same value as the libm '``sqrt``' +functions would, but without setting ``errno``. Arguments: """""""""" -The first operand is a vector value to be written to memory. The second operand is a vector of pointers, pointing to where the value elements should be stored. It has the same underlying type as the value operand. The third operand is an alignment of the destination addresses. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements. +The first argument and the return type are floating point numbers of the same +type. +The second and third arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector elements to arbitrary memory addresses in a single IR operation. The operation may be conditional, when not all bits in the mask are switched on. It is useful for targets that support vector masked scatter and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations. - -:: - - ;; This instruction unconditionally stores data vector in multiple addresses - call @llvm.masked.scatter.v8i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4, <8 x i1> ) - - ;; It is equivalent to a list of scalar stores - %val0 = extractelement <8 x i32> %value, i32 0 - %val1 = extractelement <8 x i32> %value, i32 1 - .. - %val7 = extractelement <8 x i32> %value, i32 7 - %ptr0 = extractelement <8 x i32*> %ptrs, i32 0 - %ptr1 = extractelement <8 x i32*> %ptrs, i32 1 - .. - %ptr7 = extractelement <8 x i32*> %ptrs, i32 7 - ;; Note: the order of the following stores is important when they overlap: - store i32 %val0, i32* %ptr0, align 4 - store i32 %val1, i32* %ptr1, align 4 - .. - store i32 %val7, i32* %ptr7, align 4 - - -Memory Use Markers ------------------- - -This class of intrinsics provides information about the lifetime of -memory objects and ranges where variables are immutable. +This function returns the nonnegative square root of the specified value. +If the value is less than negative zero, a floating point exception occurs +and the the return value is architecture specific. -.. _int_lifestart: -'``llvm.lifetime.start``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.constrained.pow``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare void @llvm.lifetime.start(i64 , i8* nocapture ) + declare + @llvm.experimental.constrained.pow( , , + metadata , + metadata ) Overview: """"""""" -The '``llvm.lifetime.start``' intrinsic specifies the start of a memory -object's lifetime. +The '``llvm.experimental.constrained.pow``' intrinsic returns the first operand +raised to the (positive or negative) power specified by the second operand. Arguments: """""""""" -The first argument is a constant integer representing the size of the -object, or -1 if it is variable sized. The second argument is a pointer -to the object. +The first two arguments and the return value are floating point numbers of the +same type. The second argument specifies the power to which the first argument +should be raised. + +The third and fourth arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -This intrinsic indicates that before this point in the code, the value -of the memory pointed to by ``ptr`` is dead. This means that it is known -to never be used and has an undefined value. A load from the pointer -that precedes this intrinsic can be replaced with ``'undef'``. +This function returns the first value raised to the second power, +returning the same values as the libm ``pow`` functions would, and +handles error conditions in the same way. -.. _int_lifeend: -'``llvm.lifetime.end``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.constrained.powi``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare void @llvm.lifetime.end(i64 , i8* nocapture ) + declare + @llvm.experimental.constrained.powi( , i32 , + metadata , + metadata ) Overview: """"""""" -The '``llvm.lifetime.end``' intrinsic specifies the end of a memory -object's lifetime. +The '``llvm.experimental.constrained.powi``' intrinsic returns the first operand +raised to the (positive or negative) power specified by the second operand. The +order of evaluation of multiplications is not defined. When a vector of floating +point type is used, the second argument remains a scalar integer value. + Arguments: """""""""" -The first argument is a constant integer representing the size of the -object, or -1 if it is variable sized. The second argument is a pointer -to the object. +The first argument and the return value are floating point numbers of the same +type. The second argument is a 32-bit signed integer specifying the power to +which the first argument should be raised. + +The third and fourth arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -This intrinsic indicates that after this point in the code, the value of -the memory pointed to by ``ptr`` is dead. This means that it is known to -never be used and has an undefined value. Any stores into the memory -object following this intrinsic may be removed as dead. +This function returns the first value raised to the second power with an +unspecified sequence of rounding operations. -'``llvm.invariant.start``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +'``llvm.experimental.constrained.sin``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" -This is an overloaded intrinsic. The memory object can belong to any address space. :: - declare {}* @llvm.invariant.start.p0i8(i64 , i8* nocapture ) + declare + @llvm.experimental.constrained.sin( , + metadata , + metadata ) Overview: """"""""" -The '``llvm.invariant.start``' intrinsic specifies that the contents of -a memory object will not change. +The '``llvm.experimental.constrained.sin``' intrinsic returns the sine of the +first operand. Arguments: """""""""" -The first argument is a constant integer representing the size of the -object, or -1 if it is variable sized. The second argument is a pointer -to the object. +The first argument and the return type are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -This intrinsic indicates that until an ``llvm.invariant.end`` that uses -the return value, the referenced memory location is constant and -unchanging. +This function returns the sine of the specified operand, returning the +same values as the libm ``sin`` functions would, and handles error +conditions in the same way. -'``llvm.invariant.end``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +'``llvm.experimental.constrained.cos``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" -This is an overloaded intrinsic. The memory object can belong to any address space. :: - declare void @llvm.invariant.end.p0i8({}* , i64 , i8* nocapture ) + declare + @llvm.experimental.constrained.cos( , + metadata , + metadata ) Overview: """"""""" -The '``llvm.invariant.end``' intrinsic specifies that the contents of a -memory object are mutable. +The '``llvm.experimental.constrained.cos``' intrinsic returns the cosine of the +first operand. Arguments: """""""""" -The first argument is the matching ``llvm.invariant.start`` intrinsic. -The second argument is a constant integer representing the size of the -object, or -1 if it is variable sized and the third argument is a -pointer to the object. +The first argument and the return type are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -This intrinsic indicates that the memory is mutable again. +This function returns the cosine of the specified operand, returning the +same values as the libm ``cos`` functions would, and handles error +conditions in the same way. -'``llvm.invariant.group.barrier``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +'``llvm.experimental.constrained.exp``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" :: - declare i8* @llvm.invariant.group.barrier(i8* ) + declare + @llvm.experimental.constrained.exp( , + metadata , + metadata ) Overview: """"""""" -The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant -established by invariant.group metadata no longer holds, to obtain a new pointer -value that does not carry the invariant information. - +The '``llvm.experimental.constrained.exp``' intrinsic computes the base-e +exponential of the specified value. Arguments: """""""""" -The ``llvm.invariant.group.barrier`` takes only one argument, which is -the pointer to the memory for which the ``invariant.group`` no longer holds. +The first argument and the return value are floating point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -Returns another pointer that aliases its argument but which is considered different -for the purposes of ``load``/``store`` ``invariant.group`` metadata. - -Constrained Floating Point Intrinsics -------------------------------------- +This function returns the same values as the libm ``exp`` functions +would, and handles error conditions in the same way. -These intrinsics are used to provide special handling of floating point -operations when specific rounding mode or floating point exception behavior is -required. By default, LLVM optimization passes assume that the rounding mode is -round-to-nearest and that floating point exceptions will not be monitored. -Constrained FP intrinsics are used to support non-default rounding modes and -accurately preserve exception behavior without compromising LLVM's ability to -optimize FP code when the default behavior is used. -Each of these intrinsics corresponds to a normal floating point operation. The -first two arguments and the return value are the same as the corresponding FP -operation. +'``llvm.experimental.constrained.exp2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The third argument is a metadata argument specifying the rounding mode to be -assumed. This argument must be one of the following strings: +Syntax: +""""""" :: - "round.dynamic" - "round.tonearest" - "round.downward" - "round.upward" - "round.towardzero" - -If this argument is "round.dynamic" optimization passes must assume that the -rounding mode is unknown and may change at runtime. No transformations that -depend on rounding mode may be performed in this case. -The other possible values for the rounding mode argument correspond to the -similarly named IEEE rounding modes. If the argument is any of these values -optimization passes may perform transformations as long as they are consistent -with the specified rounding mode. + declare + @llvm.experimental.constrained.exp2( , + metadata , + metadata ) -For example, 'x-0'->'x' is not a valid transformation if the rounding mode is -"round.downward" or "round.dynamic" because if the value of 'x' is +0 then -'x-0' should evaluate to '-0' when rounding downward. However, this -transformation is legal for all other rounding modes. +Overview: +""""""""" -For values other than "round.dynamic" optimization passes may assume that the -actual runtime rounding mode (as defined in a target-specific manner) matches -the specified rounding mode, but this is not guaranteed. Using a specific -non-dynamic rounding mode which does not match the actual rounding mode at -runtime results in undefined behavior. +The '``llvm.experimental.constrained.exp2``' intrinsic computes the base-2 +exponential of the specified value. -The fourth argument to the constrained floating point intrinsics specifies the -required exception behavior. This argument must be one of the following -strings: -:: - "fpexcept.ignore" - "fpexcept.maytrap" - "fpexcept.strict" +Arguments: +"""""""""" -If this argument is "fpexcept.ignore" optimization passes may assume that the -exception status flags will not be read and that floating point exceptions will -be masked. This allows transformations to be performed that may change the -exception semantics of the original code. For example, FP operations may be -speculatively executed in this case whereas they must not be for either of the -other possible values of this argument. +The first argument and the return value are floating point numbers of the same +type. -If the exception behavior argument is "fpexcept.maytrap" optimization passes -must avoid transformations that may raise exceptions that would not have been -raised by the original code (such as speculatively executing FP operations), but -passes are not required to preserve all exceptions that are implied by the -original code. For example, exceptions may be potentially hidden by constant -folding. +The second and third arguments specify the rounding mode and exception +behavior as described above. -If the exception behavior argument is "fpexcept.strict" all transformations must -strictly preserve the floating point exception semantics of the original code. -Any FP exception that would have been raised by the original code must be raised -by the transformed code, and the transformed code must not raise any FP -exceptions that would not have been raised by the original code. This is the -exception behavior argument that will be used if the code being compiled reads -the FP exception status flags, but this mode can also be used with code that -unmasks FP exceptions. +Semantics: +"""""""""" -The number and order of floating point exceptions is NOT guaranteed. For -example, a series of FP operations that each may raise exceptions may be -vectorized into a single instruction that raises each unique exception a single -time. +This function returns the same values as the libm ``exp2`` functions +would, and handles error conditions in the same way. -'``llvm.experimental.constrained.fadd``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.constrained.log``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -12361,36 +13238,35 @@ Syntax: :: declare - @llvm.experimental.constrained.fadd( , , - metadata , - metadata ) + @llvm.experimental.constrained.log( , + metadata , + metadata ) Overview: """"""""" -The '``llvm.experimental.constrained.fadd``' intrinsic returns the sum of its -two operands. - +The '``llvm.experimental.constrained.log``' intrinsic computes the base-e +logarithm of the specified value. Arguments: """""""""" -The first two arguments to the '``llvm.experimental.constrained.fadd``' -intrinsic must be :ref:`floating point ` or :ref:`vector ` -of floating point values. Both arguments must have identical types. +The first argument and the return value are floating point numbers of the same +type. -The third and fourth arguments specify the rounding mode and exception +The second and third arguments specify the rounding mode and exception behavior as described above. + Semantics: """""""""" -The value produced is the floating point sum of the two value operands and has -the same type as the operands. +This function returns the same values as the libm ``log`` functions +would, and handles error conditions in the same way. -'``llvm.experimental.constrained.fsub``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.constrained.log10``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -12398,35 +13274,33 @@ Syntax: :: declare - @llvm.experimental.constrained.fsub( , , - metadata , - metadata ) + @llvm.experimental.constrained.log10( , + metadata , + metadata ) Overview: """"""""" -The '``llvm.experimental.constrained.fsub``' intrinsic returns the difference -of its two operands. - +The '``llvm.experimental.constrained.log10``' intrinsic computes the base-10 +logarithm of the specified value. Arguments: """""""""" -The first two arguments to the '``llvm.experimental.constrained.fsub``' -intrinsic must be :ref:`floating point ` or :ref:`vector ` -of floating point values. Both arguments must have identical types. +The first argument and the return value are floating point numbers of the same +type. -The third and fourth arguments specify the rounding mode and exception +The second and third arguments specify the rounding mode and exception behavior as described above. Semantics: """""""""" -The value produced is the floating point difference of the two value operands -and has the same type as the operands. +This function returns the same values as the libm ``log10`` functions +would, and handles error conditions in the same way. -'``llvm.experimental.constrained.fmul``' Intrinsic +'``llvm.experimental.constrained.log2``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: @@ -12435,35 +13309,33 @@ Syntax: :: declare - @llvm.experimental.constrained.fmul( , , + @llvm.experimental.constrained.log2( , metadata , - metadata ) + metadata ) Overview: """"""""" -The '``llvm.experimental.constrained.fmul``' intrinsic returns the product of -its two operands. - +The '``llvm.experimental.constrained.log2``' intrinsic computes the base-2 +logarithm of the specified value. Arguments: """""""""" -The first two arguments to the '``llvm.experimental.constrained.fmul``' -intrinsic must be :ref:`floating point ` or :ref:`vector ` -of floating point values. Both arguments must have identical types. +The first argument and the return value are floating point numbers of the same +type. -The third and fourth arguments specify the rounding mode and exception +The second and third arguments specify the rounding mode and exception behavior as described above. Semantics: """""""""" -The value produced is the floating point product of the two value operands and -has the same type as the operands. +This function returns the same values as the libm ``log2`` functions +would, and handles error conditions in the same way. -'``llvm.experimental.constrained.fdiv``' Intrinsic +'``llvm.experimental.constrained.rint``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: @@ -12472,36 +13344,38 @@ Syntax: :: declare - @llvm.experimental.constrained.fdiv( , , + @llvm.experimental.constrained.rint( , metadata , - metadata ) + metadata ) Overview: """"""""" -The '``llvm.experimental.constrained.fdiv``' intrinsic returns the quotient of -its two operands. - +The '``llvm.experimental.constrained.rint``' intrinsic returns the first +operand rounded to the nearest integer. It may raise an inexact floating point +exception if the operand is not an integer. Arguments: """""""""" -The first two arguments to the '``llvm.experimental.constrained.fdiv``' -intrinsic must be :ref:`floating point ` or :ref:`vector ` -of floating point values. Both arguments must have identical types. +The first argument and the return value are floating point numbers of the same +type. -The third and fourth arguments specify the rounding mode and exception +The second and third arguments specify the rounding mode and exception behavior as described above. Semantics: """""""""" -The value produced is the floating point quotient of the two value operands and -has the same type as the operands. +This function returns the same values as the libm ``rint`` functions +would, and handles error conditions in the same way. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating point environment. The rounding +mode argument is only intended as information to the compiler. -'``llvm.experimental.constrained.frem``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.experimental.constrained.nearbyint``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" @@ -12509,35 +13383,35 @@ Syntax: :: declare - @llvm.experimental.constrained.frem( , , - metadata , - metadata ) + @llvm.experimental.constrained.nearbyint( , + metadata , + metadata ) Overview: """"""""" -The '``llvm.experimental.constrained.frem``' intrinsic returns the remainder -from the division of its two operands. +The '``llvm.experimental.constrained.nearbyint``' intrinsic returns the first +operand rounded to the nearest integer. It will not raise an inexact floating +point exception if the operand is not an integer. Arguments: """""""""" -The first two arguments to the '``llvm.experimental.constrained.frem``' -intrinsic must be :ref:`floating point ` or :ref:`vector ` -of floating point values. Both arguments must have identical types. +The first argument and the return value are floating point numbers of the same +type. -The third and fourth arguments specify the rounding mode and exception -behavior as described above. The rounding mode argument has no effect, since -the result of frem is never rounded, but the argument is included for -consistency with the other constrained floating point intrinsics. +The second and third arguments specify the rounding mode and exception +behavior as described above. Semantics: """""""""" -The value produced is the floating point remainder from the division of the two -value operands and has the same type as the operands. The remainder has the -same sign as the dividend. +This function returns the same values as the libm ``nearbyint`` functions +would, and handles error conditions in the same way. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating point environment. The rounding +mode argument is only intended as information to the compiler. General Intrinsics @@ -13194,62 +14068,66 @@ Element Wise Atomic Memory Intrinsics These intrinsics are similar to the standard library memory intrinsics except that they perform memory transfer as a sequence of atomic memory accesses. -.. _int_memcpy_element_atomic: +.. _int_memcpy_element_unordered_atomic: -'``llvm.memcpy.element.atomic``' Intrinsic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +'``llvm.memcpy.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: """"""" -This is an overloaded intrinsic. You can use ``llvm.memcpy.element.atomic`` on +This is an overloaded intrinsic. You can use ``llvm.memcpy.element.unordered.atomic`` on any integer bit width and for different address spaces. Not all targets support all bit widths however. :: - declare void @llvm.memcpy.element.atomic.p0i8.p0i8(i8* , i8* , - i64 , i32 ) + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* , + i8* , + i32 , + i32 ) + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* , + i8* , + i64 , + i32 ) Overview: """"""""" -The '``llvm.memcpy.element.atomic.*``' intrinsic performs copy of a block of -memory from the source location to the destination location as a sequence of -unordered atomic memory accesses where each access is a multiple of -``element_size`` bytes wide and aligned at an element size boundary. For example -each element is accessed atomically in source and destination buffers. +The '``llvm.memcpy.element.unordered.atomic.*``' intrinsic is a specialization of the +'``llvm.memcpy.*``' intrinsic. It differs in that the ``dest`` and ``src`` are treated +as arrays with elements that are exactly ``element_size`` bytes, and the copy between +buffers uses a sequence of :ref:`unordered atomic ` load/store operations +that are a positive integer multiple of the ``element_size`` in size. Arguments: """""""""" -The first argument is a pointer to the destination, the second is a -pointer to the source. The third argument is an integer argument -specifying the number of elements to copy, the fourth argument is size of -the single element in bytes. +The first three arguments are the same as they are in the :ref:`@llvm.memcpy ` +intrinsic, with the added constraint that ``len`` is required to be a positive integer +multiple of the ``element_size``. If ``len`` is not a positive integer multiple of +``element_size``, then the behaviour of the intrinsic is undefined. -``element_size`` should be a power of two, greater than zero and less than -a target-specific atomic access size limit. +``element_size`` must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit. -For each of the input pointers ``align`` parameter attribute must be specified. -It must be a power of two and greater than or equal to the ``element_size``. -Caller guarantees that both the source and destination pointers are aligned to -that boundary. +For each of the input pointers ``align`` parameter attribute must be specified. It +must be a power of two no less than the ``element_size``. Caller guarantees that +both the source and destination pointers are aligned to that boundary. Semantics: """""""""" -The '``llvm.memcpy.element.atomic.*``' intrinsic copies -'``num_elements`` * ``element_size``' bytes of memory from the source location to -the destination location. These locations are not allowed to overlap. Memory copy -is performed as a sequence of unordered atomic memory accesses where each access -is guaranteed to be a multiple of ``element_size`` bytes wide and aligned at an -element size boundary. +The '``llvm.memcpy.element.unordered.atomic.*``' intrinsic copies ``len`` bytes of +memory from the source location to the destination location. These locations are not +allowed to overlap. The memory copy is performed as a sequence of load/store operations +where each access is guaranteed to be a multiple of ``element_size`` bytes wide and +aligned at an ``element_size`` boundary. The order of the copy is unspecified. The same value may be read from the source buffer many times, but only one write is issued to the destination buffer per -element. It is well defined to have concurrent reads and writes to both source -and destination provided those reads and writes are at least unordered atomic. +element. It is well defined to have concurrent reads and writes to both source and +destination provided those reads and writes are unordered atomic when specified. This intrinsic does not provide any additional ordering guarantees over those provided by a set of unordered loads from the source location and stores to the @@ -13258,8 +14136,8 @@ destination. Lowering: """"""""" -In the most general case call to the '``llvm.memcpy.element.atomic.*``' is lowered -to a call to the symbol ``__llvm_memcpy_element_atomic_*``. Where '*' is replaced -with an actual element size. +In the most general case call to the '``llvm.memcpy.element.unordered.atomic.*``' is +lowered to a call to the symbol ``__llvm_memcpy_element_unordered_atomic_*``. Where '*' +is replaced with an actual element size. -Optimizer is allowed to inline memory copy when it's profitable to do so. +The optimizer is allowed to inline the memory copy when it's profitable to do so. diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst index 5d16091e27e5ceed8c7f9c9b4a525b8ba09cc28a..ce7ed318fe4b6082bae409a12832a7131595085a 100644 --- a/docs/Lexicon.rst +++ b/docs/Lexicon.rst @@ -38,6 +38,13 @@ B **BB Vectorization** Basic-Block Vectorization +**BDCE** + Bit-tracking dead code elimination. Some bit-wise instructions (shifts, + ands, ors, etc.) "kill" some of their input bits -- that is, they make it + such that those bits can be either zero or one without affecting control or + data flow of a program. The BDCE pass removes instructions that only + compute these dead bits. + **BURS** Bottom Up Rewriting System --- A method of instruction selection for code generation. An example is the `BURG @@ -102,6 +109,13 @@ G Garbage Collection. The practice of using reachability analysis instead of explicit memory management to reclaim unused memory. +**GVN** + Global Value Numbering. GVN is a pass that partitions values computed by a + function into congruence classes. Values ending up in the same congruence + class are guaranteed to be the same for every execution of the program. + In that respect, congruency is a compile-time approximation of equivalence + of values at runtime. + H - @@ -242,6 +256,14 @@ S Superword-Level Parallelism, same as :ref:`Basic-Block Vectorization `. +**Splat** + Splat refers to a vector of identical scalar elements. + + The term is based on the PowerPC Altivec instructions that provided + this functionality in hardware. For example, "vsplth" and the corresponding + software intrinsic "vec_splat()". Examples of other hardware names for this + action include "duplicate" (ARM) and "broadcast" (x86). + **SRoA** Scalar Replacement of Aggregates diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index a75dd38c7ea86bd3f377d4e204b2c809b105bbc2..5acfa04ce1f45b4c8722c01963fd88fbe514bd6f 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -87,10 +87,16 @@ Some important things to remember about fuzz targets: * Usually, the narrower the target the better. E.g. if your target can parse several data formats, split it into several targets, one per format. -Building --------- +Fuzzer Usage +------------ + +Very recent versions of Clang (> April 20 2017) include libFuzzer, +and no installation is necessary. +In order to fuzz your binary, use the `-fsanitize=fuzzer` flag during the compilation:: -Next, build the libFuzzer library as a static archive, without any sanitizer + clang -fsanitize=fuzzer,address mytarget.c + +Otherwise, build the libFuzzer library as a static archive, without any sanitizer options. Note that the libFuzzer library contains the ``main()`` function: .. code-block:: console @@ -299,6 +305,10 @@ The most important command line options are: - 1 : close ``stdout`` - 2 : close ``stderr`` - 3 : close both ``stdout`` and ``stderr``. +``-print_coverage`` + If 1, print coverage information as text at exit. +``-dump_coverage`` + If 1, dump coverage information as a .sancov file at exit. For the full list of flags run the fuzzer binary with ``-help=1``. @@ -537,12 +547,19 @@ You can get the coverage for your corpus like this: .. code-block:: console - ASAN_OPTIONS=coverage=1 ./fuzzer CORPUS_DIR -runs=0 + ./fuzzer CORPUS_DIR -runs=0 -print_coverage=1 This will run all tests in the CORPUS_DIR but will not perform any fuzzing. -At the end of the process it will dump a single ``.sancov`` file with coverage -information. See SanitizerCoverage_ for details on querying the file using the -``sancov`` tool. +At the end of the process it will print text describing what code has been covered and what hasn't. + +Alternatively, use + +.. code-block:: console + + ./fuzzer CORPUS_DIR -runs=0 -dump_coverage=1 + +which will dump a ``.sancov`` file with coverage information. +See SanitizerCoverage_ for details on querying the file using the ``sancov`` tool. You may also use other ways to visualize coverage, e.g. using `Clang coverage `_, @@ -728,6 +745,7 @@ to crash on invalid inputs. Examples: regular expression matchers, text or binary format parsers, compression, network, crypto. + Trophies ======== * GLIBC: https://sourceware.org/glibc/wiki/FuzzingLibc @@ -772,6 +790,8 @@ Trophies * Ffmpeg: `[1] `__ `[2] `__ `[3] `__ `[4] `__ +* `Wireshark `_ + .. _pcre2: http://www.pcre.org/ .. _AFL: http://lcamtuf.coredump.cx/afl/ .. _Radamsa: https://github.com/aoh/radamsa diff --git a/docs/MIRLangRef.rst b/docs/MIRLangRef.rst index f6ee6ccd0506b859ca2c23880e943f60c8a658fe..b4ca8f2347a79973fbf9b266d82fbd3959d7950a 100644 --- a/docs/MIRLangRef.rst +++ b/docs/MIRLangRef.rst @@ -39,37 +39,87 @@ MIR Testing Guide You can use the MIR format for testing in two different ways: - You can write MIR tests that invoke a single code generation pass using the - ``run-pass`` option in llc. + ``-run-pass`` option in llc. -- You can use llc's ``stop-after`` option with existing or new LLVM assembly +- You can use llc's ``-stop-after`` option with existing or new LLVM assembly tests and check the MIR output of a specific code generation pass. Testing Individual Code Generation Passes ----------------------------------------- -The ``run-pass`` option in llc allows you to create MIR tests that invoke -just a single code generation pass. When this option is used, llc will parse -an input MIR file, run the specified code generation pass, and print the -resulting MIR to the standard output stream. +The ``-run-pass`` option in llc allows you to create MIR tests that invoke just +a single code generation pass. When this option is used, llc will parse an +input MIR file, run the specified code generation pass(es), and output the +resulting MIR code. -You can generate an input MIR file for the test by using the ``stop-after`` -option in llc. For example, if you would like to write a test for the -post register allocation pseudo instruction expansion pass, you can specify -the machine copy propagation pass in the ``stop-after`` option, as it runs -just before the pass that we are trying to test: +You can generate an input MIR file for the test by using the ``-stop-after`` or +``-stop-before`` option in llc. For example, if you would like to write a test +for the post register allocation pseudo instruction expansion pass, you can +specify the machine copy propagation pass in the ``-stop-after`` option, as it +runs just before the pass that we are trying to test: - ``llc -stop-after machine-cp bug-trigger.ll > test.mir`` + ``llc -stop-after=machine-cp bug-trigger.ll > test.mir`` After generating the input MIR file, you'll have to add a run line that uses the ``-run-pass`` option to it. In order to test the post register allocation pseudo instruction expansion pass on X86-64, a run line like the one shown below can be used: - ``# RUN: llc -run-pass postrapseudos -march=x86-64 %s -o /dev/null | FileCheck %s`` + ``# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=postrapseudos | FileCheck %s`` The MIR files are target dependent, so they have to be placed in the target -specific test directories. They also need to specify a target triple or a -target architecture either in the run line or in the embedded LLVM IR module. +specific test directories (``lib/CodeGen/TARGETNAME``). They also need to +specify a target triple or a target architecture either in the run line or in +the embedded LLVM IR module. + +Simplifying MIR files +^^^^^^^^^^^^^^^^^^^^^ + +The MIR code coming out of ``-stop-after``/``-stop-before`` is very verbose; +Tests are more accessible and future proof when simplified: + +- Use the ``-simplify-mir`` option with llc. + +- Machine function attributes often have default values or the test works just + as well with default values. Typical candidates for this are: `alignment:`, + `exposesReturnsTwice`, `legalized`, `regBankSelected`, `selected`. + The whole `frameInfo` section is often unnecessary if there is no special + frame usage in the function. `tracksRegLiveness` on the other hand is often + necessary for some passes that care about block livein lists. + +- The (global) `liveins:` list is typically only interesting for early + instruction selection passes and can be removed when testing later passes. + The per-block `liveins:` on the other hand are necessary if + `tracksRegLiveness` is true. + +- Branch probability data in block `successors:` lists can be dropped if the + test doesn't depend on it. Example: + `successors: %bb.1(0x40000000), %bb.2(0x40000000)` can be replaced with + `successors: %bb.1, %bb.2`. + +- MIR code contains a whole IR module. This is necessary because there are + no equivalents in MIR for global variables, references to external functions, + function attributes, metadata, debug info. Instead some MIR data references + the IR constructs. You can often remove them if the test doesn't depend on + them. + +- Alias Analysis is performed on IR values. These are referenced by memory + operands in MIR. Example: `:: (load 8 from %ir.foobar, !alias.scope !9)`. + If the test doesn't depend on (good) alias analysis the references can be + dropped: `:: (load 8)` + +- MIR blocks can reference IR blocks for debug printing, profile information + or debug locations. Example: `bb.42.myblock` in MIR references the IR block + `myblock`. It is usually possible to drop the `.myblock` reference and simply + use `bb.42`. + +- If there are no memory operands or blocks referencing the IR then the + IR function can be replaced by a parameterless dummy function like + `define @func() { ret void }`. + +- It is possible to drop the whole IR section of the MIR file if it only + contains dummy functions (see above). The .mir loader will create the + IR functions automatically in this case. Limitations ----------- diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst index 8d1984b65cd9964f9ae07b2fdf3977cd6d48423c..cc8484cc1e3e360ffa5e4d3824dd88bc94375230 100644 --- a/docs/Phabricator.rst +++ b/docs/Phabricator.rst @@ -54,7 +54,8 @@ reviewer understand your code. To get a full diff, use one of the following commands (or just use Arcanist to upload your patch): -* ``git diff -U999999 other-branch`` +* ``git show HEAD -U999999 > mypatch.patch`` +* ``git format-patch -U999999 @{u}`` * ``svn diff --diff-cmd=diff -x -U999999`` To upload a new patch: diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 4fb67e1e6d5f8f4e091b6719526f6eafe00f0183..d115a9cf6de8ed134e9f4df7009a4a60585b4da4 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -776,22 +776,21 @@ readability. Using cantFail to simplify safe callsites """"""""""""""""""""""""""""""""""""""""" -Some functions may only fail for a subset of their inputs. For such functions -call-sites using known-safe inputs can assume that the result will be a success -value. +Some functions may only fail for a subset of their inputs, so calls using known +safe inputs can be assumed to succeed. The cantFail functions encapsulate this by wrapping an assertion that their argument is a success value and, in the case of Expected, unwrapping the -T value from the Expected argument: +T value: .. code-block:: c++ - Error mayFail(int X); - Expected mayFail2(int X); + Error onlyFailsForSomeXValues(int X); + Expected onlyFailsForSomeXValues2(int X); void foo() { - cantFail(mayFail(KnownSafeValue)); - int Y = cantFail(mayFail2(KnownSafeValue)); + cantFail(onlyFailsForSomeXValues(KnownSafeValue)); + int Y = cantFail(onlyFailsForSomeXValues2(KnownSafeValue)); ... } @@ -801,8 +800,8 @@ terminate the program on an error input, cantFile simply asserts that the result is success. In debug builds this will result in an assertion failure if an error is encountered. In release builds the behavior of cantFail for failure values is undefined. As such, care must be taken in the use of cantFail: clients must be -certain that a cantFail wrapped call really can not fail under any -circumstances. +certain that a cantFail wrapped call really can not fail with the given +arguments. Use of the cantFail functions should be rare in library code, but they are likely to be of more use in tool and unit-test code where inputs and/or diff --git a/docs/Proposals/VectorizationPlan.rst b/docs/Proposals/VectorizationPlan.rst new file mode 100644 index 0000000000000000000000000000000000000000..82ce4b2de17afb325df27147b23f88ec0c23bbba --- /dev/null +++ b/docs/Proposals/VectorizationPlan.rst @@ -0,0 +1,182 @@ +================== +Vectorization Plan +================== + +.. contents:: + :local: + +Abstract +======== +The vectorization transformation can be rather complicated, involving several +potential alternatives, especially for outer-loops [1]_ but also possibly for +innermost loops. These alternatives may have significant performance impact, +both positive and negative. A cost model is therefore employed to identify the +best alternative, including the alternative of avoiding any transformation +altogether. + +The Vectorization Plan is an explicit model for describing vectorization +candidates. It serves for both optimizing candidates including estimating their +cost reliably, and for performing their final translation into IR. This +facilitates dealing with multiple vectorization candidates. + +High-level Design +================= + +Vectorization Workflow +---------------------- +VPlan-based vectorization involves three major steps, taking a "scenario-based +approach" to vectorization planning: + +1. Legal Step: check if a loop can be legally vectorized; encode contraints and + artifacts if so. +2. Plan Step: + + a. Build initial VPlans following the constraints and decisions taken by + Legal Step 1, and compute their cost. + b. Apply optimizations to the VPlans, possibly forking additional VPlans. + Prune sub-optimal VPlans having relatively high cost. +3. Execute Step: materialize the best VPlan. Note that this is the only step + that modifies the IR. + +Design Guidelines +----------------- +In what follows, the term "input IR" refers to code that is fed into the +vectorizer whereas the term "output IR" refers to code that is generated by the +vectorizer. The output IR contains code that has been vectorized or "widened" +according to a loop Vectorization Factor (VF), and/or loop unroll-and-jammed +according to an Unroll Factor (UF). +The design of VPlan follows several high-level guidelines: + +1. Analysis-like: building and manipulating VPlans must not modify the input IR. + In particular, if the best option is not to vectorize at all, the + vectorization process terminates before reaching Step 3, and compilation + should proceed as if VPlans had not been built. + +2. Align Cost & Execute: each VPlan must support both estimating the cost and + generating the output IR code, such that the cost estimation evaluates the + to-be-generated code reliably. + +3. Support vectorizing additional constructs: + + a. Outer-loop vectorization. In particular, VPlan must be able to model the + control-flow of the output IR which may include multiple basic-blocks and + nested loops. + b. SLP vectorization. + c. Combinations of the above, including nested vectorization: vectorizing + both an inner loop and an outer-loop at the same time (each with its own + VF and UF), mixed vectorization: vectorizing a loop with SLP patterns + inside [4]_, (re)vectorizing input IR containing vector code. + d. Function vectorization [2]_. + +4. Support multiple candidates efficiently. In particular, similar candidates + related to a range of possible VF's and UF's must be represented efficiently. + Potential versioning needs to be supported efficiently. + +5. Support vectorizing idioms, such as interleaved groups of strided loads or + stores. This is achieved by modeling a sequence of output instructions using + a "Recipe", which is responsible for computing its cost and generating its + code. + +6. Encapsulate Single-Entry Single-Exit regions (SESE). During vectorization + such regions may need to be, for example, predicated and linearized, or + replicated VF*UF times to handle scalarized and predicated instructions. + Innerloops are also modelled as SESE regions. + +Low-level Design +================ +The low-level design of VPlan comprises of the following classes. + +:LoopVectorizationPlanner: + A LoopVectorizationPlanner is designed to handle the vectorization of a loop + or a loop nest. It can construct, optimize and discard one or more VPlans, + each VPlan modelling a distinct way to vectorize the loop or the loop nest. + Once the best VPlan is determined, including the best VF and UF, this VPlan + drives the generation of output IR. + +:VPlan: + A model of a vectorized candidate for a given input IR loop or loop nest. This + candidate is represented using a Hierarchical CFG. VPlan supports estimating + the cost and driving the generation of the output IR code it represents. + +:Hierarchical CFG: + A control-flow graph whose nodes are basic-blocks or Hierarchical CFG's. The + Hierarchical CFG data structure is similar to the Tile Tree [5]_, where + cross-Tile edges are lifted to connect Tiles instead of the original + basic-blocks as in Sharir [6]_, promoting the Tile encapsulation. The terms + Region and Block are used rather than Tile [5]_ to avoid confusion with loop + tiling. + +:VPBlockBase: + The building block of the Hierarchical CFG. A pure-virtual base-class of + VPBasicBlock and VPRegionBlock, see below. VPBlockBase models the hierarchical + control-flow relations with other VPBlocks. Note that in contrast to the IR + BasicBlock, a VPBlockBase models its control-flow successors and predecessors + directly, rather than through a Terminator branch or through predecessor + branches that "use" the VPBlockBase. + +:VPBasicBlock: + VPBasicBlock is a subclass of VPBlockBase, and serves as the leaves of the + Hierarchical CFG. It represents a sequence of output IR instructions that will + appear consecutively in an output IR basic-block. The instructions of this + basic-block originate from one or more VPBasicBlocks. VPBasicBlock holds a + sequence of zero or more VPRecipes that model the cost and generation of the + output IR instructions. + +:VPRegionBlock: + VPRegionBlock is a subclass of VPBlockBase. It models a collection of + VPBasicBlocks and VPRegionBlocks which form a SESE subgraph of the output IR + CFG. A VPRegionBlock may indicate that its contents are to be replicated a + constant number of times when output IR is generated, effectively representing + a loop with constant trip-count that will be completely unrolled. This is used + to support scalarized and predicated instructions with a single model for + multiple candidate VF's and UF's. + +:VPRecipeBase: + A pure-virtual base class modeling a sequence of one or more output IR + instructions, possibly based on one or more input IR instructions. These + input IR instructions are referred to as "Ingredients" of the Recipe. A Recipe + may specify how its ingredients are to be transformed to produce the output IR + instructions; e.g., cloned once, replicated multiple times or widened + according to selected VF. + +:VPTransformState: + Stores information used for generating output IR, passed from + LoopVectorizationPlanner to its selected VPlan for execution, and used to pass + additional information down to VPBlocks and VPRecipes. + +Related LLVM components +----------------------- +1. SLP Vectorizer: one can compare the VPlan model with LLVM's existing SLP + tree, where TSLP [3]_ adds Plan Step 2.b. + +2. RegionInfo: one can compare VPlan's H-CFG with the Region Analysis as used by + Polly [7]_. + +References +---------- +.. [1] "Outer-loop vectorization: revisited for short SIMD architectures", Dorit + Nuzman and Ayal Zaks, PACT 2008. + +.. [2] "Proposal for function vectorization and loop vectorization with function + calls", Xinmin Tian, [`cfe-dev + `_]., + March 2, 2016. + See also `review `_. + +.. [3] "Throttling Automatic Vectorization: When Less is More", Vasileios + Porpodas and Tim Jones, PACT 2015 and LLVM Developers' Meeting 2015. + +.. [4] "Exploiting mixed SIMD parallelism by reducing data reorganization + overhead", Hao Zhou and Jingling Xue, CGO 2016. + +.. [5] "Register Allocation via Hierarchical Graph Coloring", David Callahan and + Brian Koblenz, PLDI 1991 + +.. [6] "Structural analysis: A new approach to flow analysis in optimizing + compilers", M. Sharir, Journal of Computer Languages, Jan. 1980 + +.. [7] "Enabling Polyhedral Optimizations in LLVM", Tobias Grosser, Diploma + thesis, 2011. + +.. [8] "Introducing VPlan to the Loop Vectorizer", Gil Rapaport and Ayal Zaks, + European LLVM Developers' Meeting 2017. diff --git a/docs/README.txt b/docs/README.txt index 6c6e5b90ecf278bae2adedc93689175a88311ea3..f1c74261ce4d153f01aea9af633670826ec7b710 100644 --- a/docs/README.txt +++ b/docs/README.txt @@ -51,3 +51,18 @@ running: cd docs/ make -f Makefile.sphinx linkcheck + +Doxygen page Output +============== + +Install doxygen and dot2tex . + + cd + cmake -DLLVM_ENABLE_DOXYGEN=On + make doxygen-llvm # for LLVM docs + make doxygen-clang # for clang docs + +It will generate html in + + /docs/doxygen/html # for LLVM docs + /tools/clang/docs/doxygen/html # for clang docs diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index dbffb53d5a51911c5a8b08e8d0733d5579f6fdd4..95025fb91c72536c74639e960acdc298fcda3a7e 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -40,6 +40,21 @@ Non-comprehensive list of changes in this release functionality, or simply have a lot to talk about), see the `NOTE` below for adding a new subsection. +* LLVM's ``WeakVH`` has been renamed to ``WeakTrackingVH`` and a new ``WeakVH`` + has been introduced. The new ``WeakVH`` nulls itself out on deletion, but + does not track values across RAUW. + +* A new library named ``BinaryFormat`` has been created which holds a collection + of code which previously lived in ``Support``. This includes the + ``file_magic`` structure and ``identify_magic`` functions, as well as all the + structure and type definitions for DWARF, ELF, COFF, WASM, and MachO file + formats. + +* The tool ``llvm-pdbdump`` has been renamed ``llvm-pdbutil`` to better reflect + its nature as a general purpose PDB manipulation / diagnostics tool that does + more than just dumping contents. + + * ... next change ... .. NOTE diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst index 41f8dbfab3dce32ea12642171db549f9a5f8bfb6..a9f5c3a0814724a4c058e197ca54746b4716a7db 100644 --- a/docs/SourceLevelDebugging.rst +++ b/docs/SourceLevelDebugging.rst @@ -180,11 +180,27 @@ provide debug information at various points in generated code. void @llvm.dbg.declare(metadata, metadata, metadata) -This intrinsic provides information about a local element (e.g., variable). -The first argument is metadata holding the alloca for the variable. The second +This intrinsic provides information about a local element (e.g., variable). The +first argument is metadata holding the alloca for the variable. The second argument is a `local variable `_ containing a description of the variable. The third argument is a `complex expression -`_. +`_. An `llvm.dbg.declare` instrinsic describes the +*location* of a source variable. + +.. code-block:: llvm + + %i.addr = alloca i32, align 4 + call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !1, metadata !2), !dbg !3 + !1 = !DILocalVariable(name: "i", ...) ; int i + !2 = !DIExpression() + !3 = !DILocation(...) + ... + %buffer = alloca [256 x i8], align 8 + ; The address of i is buffer+64. + call void @llvm.dbg.declare(metadata [256 x i8]* %buffer, metadata !1, metadata !2) + !1 = !DILocalVariable(name: "i", ...) ; int i + !2 = !DIExpression(DW_OP_plus, 64) + ``llvm.dbg.value`` ^^^^^^^^^^^^^^^^^^ diff --git a/docs/StackMaps.rst b/docs/StackMaps.rst index a78fde16c2be983b326defd23987fc603a56481b..99c5e5fbe4de13bf3df4c4bc02af60f84ef360f0 100644 --- a/docs/StackMaps.rst +++ b/docs/StackMaps.rst @@ -319,7 +319,7 @@ format of this section follows: .. code-block:: none Header { - uint8 : Stack Map Version (current version is 2) + uint8 : Stack Map Version (current version is 3) uint8 : Reserved (expected to be 0) uint16 : Reserved (expected to be 0) } @@ -341,10 +341,13 @@ format of this section follows: uint16 : NumLocations Location[NumLocations] { uint8 : Register | Direct | Indirect | Constant | ConstantIndex - uint8 : Reserved (location flags) + uint8 : Reserved (expected to be 0) + uint16 : Location Size uint16 : Dwarf RegNum + uint16 : Reserved (expected to be 0) int32 : Offset or SmallConstant } + uint32 : Padding (only if required to align to 8 byte) uint16 : Padding uint16 : NumLiveOuts LiveOuts[NumLiveOuts] diff --git a/docs/Statepoints.rst b/docs/Statepoints.rst index 7f2b20544812f6e3da79e11c2984097b9c7d93e5..73e09ae8b620b290208d0982f73c877bc5252163 100644 --- a/docs/Statepoints.rst +++ b/docs/Statepoints.rst @@ -9,15 +9,22 @@ Garbage Collection Safepoints in LLVM Status ======= -This document describes a set of experimental extensions to LLVM. Use -with caution. Because the intrinsics have experimental status, -compatibility across LLVM releases is not guaranteed. - -LLVM currently supports an alternate mechanism for conservative -garbage collection support using the ``gcroot`` intrinsic. The mechanism -described here shares little in common with the alternate ``gcroot`` -implementation and it is hoped that this mechanism will eventually -replace the gc_root mechanism. +This document describes a set of extensions to LLVM to support garbage +collection. By now, these mechanisms are well proven with commercial java +implementation with a fully relocating collector having shipped using them. +There are a couple places where bugs might still linger; these are called out +below. + +They are still listed as "experimental" to indicate that no forward or backward +compatibility guarantees are offered across versions. If your use case is such +that you need some form of forward compatibility guarantee, please raise the +issue on the llvm-dev mailing list. + +LLVM still supports an alternate mechanism for conservative garbage collection +support using the ``gcroot`` intrinsic. The ``gcroot`` mechanism is mostly of +historical interest at this point with one exception - its implementation of +shadow stacks has been used successfully by a number of language frontends and +is still supported. Overview ======== @@ -86,9 +93,36 @@ the collector must be able to: This document describes the mechanism by which an LLVM based compiler can provide this information to a language runtime/collector, and -ensure that all pointers can be read and updated if desired. The -heart of the approach is to construct (or rewrite) the IR in a manner -where the possible updates performed by the garbage collector are +ensure that all pointers can be read and updated if desired. + +At a high level, LLVM has been extended to support compiling to an abstract +machine which extends the actual target with a non-integral pointer type +suitable for representing a garbage collected reference to an object. In +particular, such non-integral pointer type have no defined mapping to an +integer representation. This semantic quirk allows the runtime to pick a +integer mapping for each point in the program allowing relocations of objects +without visible effects. + +Warning: Non-Integral Pointer Types are a newly added concept in LLVM IR. +It's possible that we've missed disabling some of the optimizations which +assume an integral value for pointers. If you find such a case, please +file a bug or share a patch. + +Warning: There is one currently known semantic hole in the definition of +non-integral pointers which has not been addressed upstream. To work around +this, you need to disable speculation of loads unless the memory type +(non-integral pointer vs anything else) is known to unchanged. That is, it is +not safe to speculate a load if doing causes a non-integral pointer value to +be loaded as any other type or vice versa. In practice, this restriction is +well isolated to isSafeToSpeculate in ValueTracking.cpp. + +This high level abstract machine model is used for most of the LLVM optimizer. +Before starting code generation, we switch representations to an explicit form. +In theory, a frontend could directly generate this low level explicit form, but +doing so is likely to inhibit optimization. + +The heart of the explicit approach is to construct (or rewrite) the IR in a +manner where the possible updates performed by the garbage collector are explicitly visible in the IR. Doing so requires that we: #. create a new SSA value for each potentially relocated pointer, and @@ -104,7 +138,7 @@ explicitly visible in the IR. Doing so requires that we: At the most abstract level, inserting a safepoint can be thought of as replacing a call instruction with a call to a multiple return value function which both calls the original target of the call, returns -it's result, and returns updated values for any live pointers to +its result, and returns updated values for any live pointers to garbage collected objects. Note that the task of identifying all live pointers to garbage @@ -200,7 +234,9 @@ The relevant parts of the StackMap section for our example are: .short 7 .long 0 -This example was taken from the tests for the :ref:`RewriteStatepointsForGC` utility pass. As such, it's full StackMap can be easily examined with the following command. +This example was taken from the tests for the :ref:`RewriteStatepointsForGC` +utility pass. As such, its full StackMap can be easily examined with the +following command. .. code-block:: bash @@ -536,7 +572,7 @@ Semantics: """""""""" The return value of ``gc.relocate`` is the potentially relocated value -of the pointer specified by it's arguments. It is unspecified how the +of the pointer specified by its arguments. It is unspecified how the value of the returned pointer relates to the argument to the ``gc.statepoint`` other than that a) it points to the same source language object with the same offset, and b) the 'based-on' @@ -654,11 +690,15 @@ Utility Passes for Safepoint Insertion RewriteStatepointsForGC ^^^^^^^^^^^^^^^^^^^^^^^^ -The pass RewriteStatepointsForGC transforms a functions IR by replacing a -``gc.statepoint`` (with an optional ``gc.result``) with a full relocation -sequence, including all required ``gc.relocates``. To function, the pass -requires that the GC strategy specified for the function be able to reliably -distinguish between GC references and non-GC references in IR it is given. +The pass RewriteStatepointsForGC transforms a function's IR to lower from the +abstract machine model described above to the explicit statepoint model of +relocations. To do this, it replaces all calls or invokes of functions which +might contain a safepoint poll with a ``gc.statepoint`` and associated full +relocation sequence, including all required ``gc.relocates``. + +Note that by default, this pass only runs for the "statepoint-example" or +"core-clr" gc strategies. You will need to add your custom strategy to this +whitelist or use one of the predefined ones. As an example, given this code: @@ -666,7 +706,7 @@ As an example, given this code: define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" { - call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + call void @foo() ret i8 addrspace(1)* %obj } @@ -683,7 +723,8 @@ The pass would produce this IR: In the above examples, the addrspace(1) marker on the pointers is the mechanism that the ``statepoint-example`` GC strategy uses to distinguish references from -non references. Address space 1 is not globally reserved for this purpose. +non references. The pass assumes that all addrspace(1) pointers are non-integral +pointer types. Address space 1 is not globally reserved for this purpose. This pass can be used an utility function by a language frontend that doesn't want to manually reason about liveness, base pointers, or relocation when @@ -701,23 +742,34 @@ can be relaxed to producing interior derived pointers provided the target collector can find the associated allocation from an arbitrary interior derived pointer. -In practice, RewriteStatepointsForGC can be run much later in the pass +By default RewriteStatepointsForGC passes in ``0xABCDEF00`` as the statepoint +ID and ``0`` as the number of patchable bytes to the newly constructed +``gc.statepoint``. These values can be configured on a per-callsite +basis using the attributes ``"statepoint-id"`` and +``"statepoint-num-patch-bytes"``. If a call site is marked with a +``"statepoint-id"`` function attribute and its value is a positive +integer (represented as a string), then that value is used as the ID +of the newly constructed ``gc.statepoint``. If a call site is marked +with a ``"statepoint-num-patch-bytes"`` function attribute and its +value is a positive integer, then that value is used as the 'num patch +bytes' parameter of the newly constructed ``gc.statepoint``. The +``"statepoint-id"`` and ``"statepoint-num-patch-bytes"`` attributes +are not propagated to the ``gc.statepoint`` call or invoke if they +could be successfully parsed. + +In practice, RewriteStatepointsForGC should be run much later in the pass pipeline, after most optimization is already done. This helps to improve the quality of the generated code when compiled with garbage collection support. -In the long run, this is the intended usage model. At this time, a few details -have yet to be worked out about the semantic model required to guarantee this -is always correct. As such, please use with caution and report bugs. .. _PlaceSafepoints: PlaceSafepoints ^^^^^^^^^^^^^^^^ -The pass PlaceSafepoints transforms a function's IR by replacing any call or -invoke instructions with appropriate ``gc.statepoint`` and ``gc.result`` pairs, -and inserting safepoint polls sufficient to ensure running code checks for a -safepoint request on a timely manner. This pass is expected to be run before -RewriteStatepointsForGC and thus does not produce full relocation sequences. +The pass PlaceSafepoints inserts safepoint polls sufficient to ensure running +code checks for a safepoint request on a timely manner. This pass is expected +to be run before RewriteStatepointsForGC and thus does not produce full +relocation sequences. As an example, given input IR of the following: @@ -740,13 +792,16 @@ This pass would produce the following IR: .. code-block:: text define void @test() gc "statepoint-example" { - %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0) - %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0) + call void @do_safepoint() + call void @foo() ret void } -In this case, we've added an (unconditional) entry safepoint poll and converted the call into a ``gc.statepoint``. Note that despite appearances, the entry poll is not necessarily redundant. We'd have to know that ``foo`` and ``test`` were not mutually recursive for the poll to be redundant. In practice, you'd probably want to your poll definition to contain a conditional branch of some form. - +In this case, we've added an (unconditional) entry safepoint poll. Note that +despite appearances, the entry poll is not necessarily redundant. We'd have to +know that ``foo`` and ``test`` were not mutually recursive for the poll to be +redundant. In practice, you'd probably want to your poll definition to contain +a conditional branch of some form. At the moment, PlaceSafepoints can insert safepoint polls at method entry and loop backedges locations. Extending this to work with return polls would be @@ -763,26 +818,13 @@ of this function is inserted at each poll site desired. While calls or invokes inside this method are transformed to a ``gc.statepoints``, recursive poll insertion is not performed. -By default PlaceSafepoints passes in ``0xABCDEF00`` as the statepoint -ID and ``0`` as the number of patchable bytes to the newly constructed -``gc.statepoint``. These values can be configured on a per-callsite -basis using the attributes ``"statepoint-id"`` and -``"statepoint-num-patch-bytes"``. If a call site is marked with a -``"statepoint-id"`` function attribute and its value is a positive -integer (represented as a string), then that value is used as the ID -of the newly constructed ``gc.statepoint``. If a call site is marked -with a ``"statepoint-num-patch-bytes"`` function attribute and its -value is a positive integer, then that value is used as the 'num patch -bytes' parameter of the newly constructed ``gc.statepoint``. The -``"statepoint-id"`` and ``"statepoint-num-patch-bytes"`` attributes -are not propagated to the ``gc.statepoint`` call or invoke if they -could be successfully parsed. - -If you are scheduling the RewriteStatepointsForGC pass late in the pass order, -you should probably schedule this pass immediately before it. The exception -would be if you need to preserve abstract frame information (e.g. for -deoptimization or introspection) at safepoints. In that case, ask on the -llvm-dev mailing list for suggestions. +This pass is useful for any language frontend which only has to support +garbage collection semantics at safepoints. If you need other abstract +frame information at safepoints (e.g. for deoptimization or introspection), +you can insert safepoint polls in the frontend. If you have the later case, +please ask on llvm-dev for suggestions. There's been a good amount of work +done on making such a scheme work well in practice which is not yet documented +here. Supported Architectures @@ -794,13 +836,6 @@ Today, only X86_64 is supported. Problem Areas and Active Work ============================= -#. As the existing users of the late rewriting model have matured, we've found - cases where the optimizer breaks the assumption that an SSA value of - gc-pointer type actually contains a gc-pointer and vice-versa. We need to - clarify our expectations and propose at least one small IR change. (Today, - the gc-pointer distinction is managed via address spaces. This turns out - not to be quite strong enough.) - #. Support for languages which allow unmanaged pointers to garbage collected objects (i.e. pass a pointer to an object to a C routine) via pinning. diff --git a/docs/TableGen/LangIntro.rst b/docs/TableGen/LangIntro.rst index d8bd17d750b8ecc771633b7511146cccc7df9eea..460ff9067f201afa184370f4de9e7beff5ccf69c 100644 --- a/docs/TableGen/LangIntro.rst +++ b/docs/TableGen/LangIntro.rst @@ -58,6 +58,10 @@ types are: The 'string' type represents an ordered sequence of characters of arbitrary length. +``code`` + The `code` type represents a code fragment, which can be single/multi-line + string literal. + ``bits`` A 'bits' type is an arbitrary, but fixed, size integer that is broken up into individual bits. This type is useful because it can handle some bits @@ -105,7 +109,7 @@ supported include: hexadecimal integer value ``"foo"`` - string value + a single-line string value, can be assigned to ``string`` or ``code`` variable. ``[{ ... }]`` usually called a "code fragment", but is just a multiline string literal @@ -126,7 +130,8 @@ supported include: access to one bit of a value ``value{15-17}`` - access to multiple bits of a value + access to an ordered sequence of bits of a value, in particular ``value{15-17}`` + produces an order that is the reverse of ``value{17-15}``. ``DEF`` reference to a record definition diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst index 65c19aa2bc0cbfa4f7c0c9f88a91c2b2779a7368..92d6200e169f8c0b61fae78aaae8da23d9b60714 100644 --- a/docs/Vectorizers.rst +++ b/docs/Vectorizers.rst @@ -44,12 +44,12 @@ Users can control the vectorization SIMD width using the command line flag "-for $ clang -mllvm -force-vector-width=8 ... $ opt -loop-vectorize -force-vector-width=8 ... -Users can control the unroll factor using the command line flag "-force-vector-unroll" +Users can control the unroll factor using the command line flag "-force-vector-interleave" .. code-block:: console - $ clang -mllvm -force-vector-unroll=2 ... - $ opt -loop-vectorize -force-vector-unroll=2 ... + $ clang -mllvm -force-vector-interleave=2 ... + $ opt -loop-vectorize -force-vector-interleave=2 ... Pragma loop hint directives ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -99,7 +99,9 @@ Optimization remarks are enabled using: indicates if vectorization was specified. ``-Rpass-analysis=loop-vectorize`` identifies the statements that caused -vectorization to fail. +vectorization to fail. If in addition ``-fsave-optimization-record`` is +provided, multiple causes of vectorization failure may be listed (this behavior +might change in the future). Consider the following loop: @@ -380,6 +382,17 @@ And Linpack-pc with the same configuration. Result is Mflops, higher is better. .. image:: linpack-pc.png +Ongoing Development Directions +------------------------------ + +.. toctree:: + :hidden: + + Proposals/VectorizationPlan + +:doc:`Proposals/VectorizationPlan` + Modeling the process and upgrading the infrastructure of LLVM's Loop Vectorizer. + .. _slp-vectorizer: The SLP Vectorizer diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in index 451eaf4d2fcc9d6c835c330d732701139da7d20f..e3c7f479ac4e140e60922f53a8e14b86fa9e5232 100644 --- a/docs/doxygen.cfg.in +++ b/docs/doxygen.cfg.in @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = @abs_top_builddir@/docs/doxygen +OUTPUT_DIRECTORY = @abs_top_builddir@/doxygen # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and @@ -132,7 +132,7 @@ INLINE_INHERITED_MEMB = NO # shortest path that makes the file name unique will be used # The default value is: YES. -FULL_PATH_NAMES = NO +FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand @@ -144,7 +144,7 @@ FULL_PATH_NAMES = NO # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = ../.. +STRIP_FROM_PATH = @abs_top_srcdir@/.. # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -153,7 +153,8 @@ STRIP_FROM_PATH = ../.. # specify the list of include paths that are normally passed to the compiler # using the -I flag. -STRIP_FROM_INC_PATH = +STRIP_FROM_INC_PATH = @abs_top_srcdir@/../include +STRIP_FROM_INC_PATH += @abs_top_srcdir@/../lib # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't @@ -513,7 +514,7 @@ SHOW_GROUPED_MEMB_INC = NO # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. -FORCE_LOCAL_INCLUDES = NO +FORCE_LOCAL_INCLUDES = YES # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. @@ -743,9 +744,9 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = @abs_top_srcdir@/include \ - @abs_top_srcdir@/lib \ - @abs_top_srcdir@/docs/doxygen-mainpage.dox +INPUT = @abs_top_srcdir@/../include \ + @abs_top_srcdir@/../lib \ + @abs_top_srcdir@/doxygen-mainpage.dox # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -813,7 +814,7 @@ EXCLUDE_SYMBOLS = # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = @abs_top_srcdir@/examples +EXAMPLE_PATH = @abs_top_srcdir@/../examples # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and @@ -833,7 +834,7 @@ EXAMPLE_RECURSIVE = YES # that contain images that are to be included in the documentation (see the # \image command). -IMAGE_PATH = @abs_top_srcdir@/docs/img +IMAGE_PATH = @abs_top_srcdir@/img # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program diff --git a/docs/index.rst b/docs/index.rst index fe47eb1bcb7f7b38343412429548d84ae98999a5..54b60823653045a197b241c5e40dc75b0ca7da52 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -90,6 +90,7 @@ representation. CodeOfConduct CompileCudaWithLLVM ReportingGuide + Benchmarking :doc:`GettingStarted` Discusses how to get up and running quickly with the LLVM infrastructure. @@ -359,10 +360,10 @@ For API clients and LLVM developers. Answers some questions about the new Attributes infrastructure. :doc:`NVPTXUsage` - This document describes using the NVPTX back-end to compile GPU kernels. + This document describes using the NVPTX backend to compile GPU kernels. :doc:`AMDGPUUsage` - This document describes how to use the AMDGPU back-end. + This document describes using the AMDGPU backend to compile GPU kernels. :doc:`StackMaps` LLVM support for mapping instruction addresses to the location of @@ -527,6 +528,7 @@ can be better. CodeOfConduct Proposals/GitHubMove + Proposals/VectorizationPlan :doc:`CodeOfConduct` Proposal to adopt a code of conduct on the LLVM social spaces (lists, events, @@ -535,6 +537,8 @@ can be better. :doc:`Proposals/GitHubMove` Proposal to move from SVN/Git to GitHub. +:doc:`Proposals/VectorizationPlan` + Proposal to model the process and upgrade the infrastructure of LLVM's Loop Vectorizer. Indices and tables ================== diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index a8b82e1da778a0309a12e117b9795d5cb70fbf8e..d4c2a8cc5ad9b97780a9c66ec553517c1277033f 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -49,7 +49,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" -#include "llvm/IR/Verifier.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" @@ -59,7 +59,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h index a14fd1dc20eca72df000180fdde53587bc923b15..847662cc11befbdd3fde487375e61ec2d777e9e5 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,7 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" @@ -44,7 +44,7 @@ private: IRCompileLayer CompileLayer; public: - typedef decltype(CompileLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(CompileLayer)::ModuleSetHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp index 945b9706d4d73f2b72d29ca968bfd3071eefe750..163caa6872d7b91b01d7101390211e5d235076a4 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp @@ -1092,7 +1092,7 @@ Function *FunctionAST::codegen() { TheFunction->eraseFromParent(); if (P.isBinaryOp()) - BinopPrecedence.erase(Proto->getOperatorName()); + BinopPrecedence.erase(P.getOperatorName()); return nullptr; } diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h index 2039be4571a59956b7225518bd98dcdde1c5644c..a5ac2f017b748b34dcff01653c8ecbcdcf1d2ce3 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,7 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" @@ -47,13 +47,13 @@ private: RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::unique_ptr)>; IRTransformLayer OptimizeLayer; public: - typedef decltype(OptimizeLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(OptimizeLayer)::ModuleSetHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp index 945b9706d4d73f2b72d29ca968bfd3071eefe750..163caa6872d7b91b01d7101390211e5d235076a4 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp @@ -1092,7 +1092,7 @@ Function *FunctionAST::codegen() { TheFunction->eraseFromParent(); if (P.isBinaryOp()) - BinopPrecedence.erase(Proto->getOperatorName()); + BinopPrecedence.erase(P.getOperatorName()); return nullptr; } diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h index d22d41855072ca62bb03e9dea03f574e1f720d79..7acb9c748880c82fcebe8a1ba2661a8965b691ff 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" @@ -49,8 +50,8 @@ private: RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::unique_ptr)>; IRTransformLayer OptimizeLayer; @@ -58,7 +59,7 @@ private: CompileOnDemandLayer CODLayer; public: - typedef decltype(CODLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(CODLayer)::ModuleSetHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp index 945b9706d4d73f2b72d29ca968bfd3071eefe750..163caa6872d7b91b01d7101390211e5d235076a4 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp @@ -1092,7 +1092,7 @@ Function *FunctionAST::codegen() { TheFunction->eraseFromParent(); if (P.isBinaryOp()) - BinopPrecedence.erase(Proto->getOperatorName()); + BinopPrecedence.erase(P.getOperatorName()); return nullptr; } diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h index e0a78410f7134953450ab3978ec6a439b3a942ee..03e42230ae71b6a767ccbd606621281468aea893 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,10 +17,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" @@ -76,8 +76,8 @@ private: RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::unique_ptr)>; IRTransformLayer OptimizeLayer; @@ -85,7 +85,7 @@ private: std::unique_ptr IndirectStubsMgr; public: - typedef decltype(OptimizeLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(OptimizeLayer)::ModuleSetHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), @@ -106,7 +106,6 @@ public: TargetMachine &getTargetMachine() { return *TM; } ModuleHandle addModule(std::unique_ptr M) { - // Build our symbol resolver: // Lambda 1: Look back into the JIT itself to find symbols that are part of // the same "logical dylib". diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h index 70a896fe8f007b908826aaa8ca351133803e47fe..0ee9d094ab82f9ad39fc7941cef7bc2b8c5600b8 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,9 +20,8 @@ #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" @@ -73,7 +72,7 @@ namespace llvm { namespace orc { // Typedef the remote-client API. -typedef remote::OrcRemoteTargetClient MyRemote; +using MyRemote = remote::OrcRemoteTargetClient; class KaleidoscopeJIT { private: @@ -82,8 +81,8 @@ private: RTDyldObjectLinkingLayer<> ObjectLayer; IRCompileLayer CompileLayer; - typedef std::function(std::unique_ptr)> - OptimizeFunction; + using OptimizeFunction = + std::function(std::unique_ptr)>; IRTransformLayer OptimizeLayer; @@ -92,7 +91,7 @@ private: MyRemote &Remote; public: - typedef decltype(OptimizeLayer)::ModuleSetHandleT ModuleHandle; + using ModuleHandle = decltype(OptimizeLayer)::ModuleSetHandleT; KaleidoscopeJIT(MyRemote &Remote) : TM(EngineBuilder().selectTarget(Triple(Remote.getTargetTriple()), "", @@ -124,7 +123,6 @@ public: TargetMachine &getTargetMachine() { return *TM; } ModuleHandle addModule(std::unique_ptr M) { - // Build our symbol resolver: // Lambda 1: Look back into the JIT itself to find symbols that are part of // the same "logical dylib". diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp index da6e8ac65234840b01f84e4cc5a6e3888879f291..e50a7ecf96bcd7cb01be4c24a205b14166936e2d 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter5/Server/server.cpp @@ -1,17 +1,19 @@ +#include "../RemoteJITUtils.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h" +#include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetSelect.h" -#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h" -#include "llvm/ExecutionEngine/Orc/OrcABISupport.h" - -#include "../RemoteJITUtils.h" - +#include +#include #include -#include +#include #include #include - using namespace llvm; using namespace llvm::orc; @@ -22,7 +24,7 @@ cl::opt Port("port", ExitOnError ExitOnErr; -typedef int (*MainFun)(int, const char*[]); +using MainFun = int (*)(int, const char*[]); template NativePtrT MakeNative(uint64_t P) { @@ -36,7 +38,6 @@ void printExprResult(double Val) { // --- LAZY COMPILE TEST --- int main(int argc, char* argv[]) { - if (argc == 0) ExitOnErr.setBanner("jit_server: "); else @@ -59,14 +60,14 @@ int main(int argc, char* argv[]) { int sockfd = socket(PF_INET, SOCK_STREAM, 0); sockaddr_in servAddr, clientAddr; socklen_t clientAddrLen = sizeof(clientAddr); - bzero(&servAddr, sizeof(servAddr)); + memset(&servAddr, 0, sizeof(servAddr)); servAddr.sin_family = PF_INET; servAddr.sin_family = INADDR_ANY; servAddr.sin_port = htons(Port); { // avoid "Address already in use" error. - int yes=1; + int yes = 1; if (setsockopt(sockfd,SOL_SOCKET,SO_REUSEADDR,&yes,sizeof(int)) == -1) { errs() << "Error calling setsockopt.\n"; return 1; @@ -98,7 +99,8 @@ int main(int argc, char* argv[]) { }; FDRPCChannel TCPChannel(newsockfd, newsockfd); - typedef remote::OrcRemoteTargetServer MyServerT; + + using MyServerT = remote::OrcRemoteTargetServer; MyServerT Server(TCPChannel, SymbolLookup, RegisterEHFrames, DeregisterEHFrames); diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index 1e0ddca29b61f197555e25b971fd6d4ee4f80af0..0c2221735589cc232c3573f699eb4012edfb8c7b 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -932,7 +932,7 @@ Function *FunctionAST::codegen() { TheFunction->eraseFromParent(); if (P.isBinaryOp()) - BinopPrecedence.erase(Proto->getOperatorName()); + BinopPrecedence.erase(P.getOperatorName()); return nullptr; } diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index 2f8cb682a847141dfc6fa348a25af61107b71d17..79ac7b33d7a1628e1933e0ba6aad4966ad1116db 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1099,7 +1099,7 @@ Function *FunctionAST::codegen() { TheFunction->eraseFromParent(); if (P.isBinaryOp()) - BinopPrecedence.erase(Proto->getOperatorName()); + BinopPrecedence.erase(P.getOperatorName()); return nullptr; } diff --git a/examples/Kaleidoscope/Chapter8/toy.cpp b/examples/Kaleidoscope/Chapter8/toy.cpp index cdf650973b86022fc65162ad053ca3345312ee1f..3ed98fcfdb5cd31b829315ebefad76bf1fc652ef 100644 --- a/examples/Kaleidoscope/Chapter8/toy.cpp +++ b/examples/Kaleidoscope/Chapter8/toy.cpp @@ -1097,7 +1097,7 @@ Function *FunctionAST::codegen() { TheFunction->eraseFromParent(); if (P.isBinaryOp()) - BinopPrecedence.erase(Proto->getOperatorName()); + BinopPrecedence.erase(P.getOperatorName()); return nullptr; } diff --git a/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/examples/Kaleidoscope/include/KaleidoscopeJIT.h index 1dca39deba3c39e052796b2b61d1c3efa3f81af7..9a682f7ab7440070e8ac927a9479b8582bf76c7c 100644 --- a/examples/Kaleidoscope/include/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/include/KaleidoscopeJIT.h @@ -1,4 +1,4 @@ -//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===// +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,7 +19,6 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RTDyldMemoryManager.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" @@ -40,9 +39,9 @@ namespace orc { class KaleidoscopeJIT { public: - typedef RTDyldObjectLinkingLayer<> ObjLayerT; - typedef IRCompileLayer CompileLayerT; - typedef CompileLayerT::ModuleSetHandleT ModuleHandleT; + using ObjLayerT = RTDyldObjectLinkingLayer<>; + using CompileLayerT = IRCompileLayer; + using ModuleHandleT = CompileLayerT::ModuleSetHandleT; KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 7f5c05d21e650758a07cf73d9456c7e2e3ed71df..22cef23007c363badc38d2754d937c23236496db 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -1039,6 +1039,20 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy); */ LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty); +/** + * Returns type's subtypes + * + * @see llvm::Type::subtypes() + */ +void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr); + +/** + * Return the number of types in the derived type. + * + * @see llvm::Type::getNumContainedTypes() + */ +unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp); + /** * Create a fixed size array type that refers to a specific type. * @@ -2130,6 +2144,16 @@ LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, */ LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count); +/** + * Obtain a Metadata as a Value. + */ +LLVMValueRef LLVMMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD); + +/** + * Obtain a Value as a Metadata. + */ +LLVMMetadataRef LLVMValueAsMetadata(LLVMValueRef Val); + /** * Obtain the underlying string from a MDString value. * diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index b72a91a8b137efa23ec15665e1cdbd0d4ec3ae12..51830fe139c6e844ade2bdb9c182fec277144cd0 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -19,9 +19,9 @@ #ifndef LLVM_C_EXECUTIONENGINE_H #define LLVM_C_EXECUTIONENGINE_H -#include "llvm-c/Types.h" #include "llvm-c/Target.h" #include "llvm-c/TargetMachine.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h index 735d1fbc78cc40a76ef52c146cae295e73385017..6de184ccab49e3e6c441c67d534bd7d492324a3b 100644 --- a/include/llvm-c/Support.h +++ b/include/llvm-c/Support.h @@ -14,8 +14,8 @@ #ifndef LLVM_C_SUPPORT_H #define LLVM_C_SUPPORT_H -#include "llvm/Support/DataTypes.h" #include "llvm-c/Types.h" +#include "llvm/Support/DataTypes.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h index 1d1f61f1a5b4840cd73121e955ba2698f1688424..f4f7f7698c45bc7b1fe4c57771bf518c82479686 100644 --- a/include/llvm-c/TargetMachine.h +++ b/include/llvm-c/TargetMachine.h @@ -19,8 +19,8 @@ #ifndef LLVM_C_TARGETMACHINE_H #define LLVM_C_TARGETMACHINE_H -#include "llvm-c/Types.h" #include "llvm-c/Target.h" +#include "llvm-c/Types.h" #ifdef __cplusplus extern "C" { diff --git a/include/llvm-c/Types.h b/include/llvm-c/Types.h index 3d472a6bf47d0e6dd505b65d02b4bb79bac37416..d63ea4de933dea687156c677c3390ca95189bc66 100644 --- a/include/llvm-c/Types.h +++ b/include/llvm-c/Types.h @@ -82,6 +82,13 @@ typedef struct LLVMOpaqueValue *LLVMValueRef; */ typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef; +/** + * Represents an LLVM Metadata. + * + * This models llvm::Metadata. + */ +typedef struct LLVMOpaqueMetadata *LLVMMetadataRef; + /** * Represents an LLVM basic block builder. * @@ -89,6 +96,13 @@ typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef; */ typedef struct LLVMOpaqueBuilder *LLVMBuilderRef; +/** + * Represents an LLVM debug info builder. + * + * This models llvm::DIBuilder. + */ +typedef struct LLVMOpaqueDIBuilder *LLVMDIBuilderRef; + /** * Interface used to provide a module to JIT or interpreter. * This is now just a synonym for llvm::Module, but we have to keep using the diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index e7e5036e69307b7ceb5d8afdab5662bde70093bf..bef6efde1f0123a020bf8696ccb14a44da72756d 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -397,6 +397,12 @@ public: /// consider inserting before falling back to scientific /// notation. 0 means to always use scientific notation. /// + /// \param TruncateZero Indicate whether to remove the trailing zero in + /// fraction part or not. Also setting this parameter to false forcing + /// producing of output more similar to default printf behavior. + /// Specifically the lower e is used as exponent delimiter and exponent + /// always contains no less than two digits. + /// /// Number Precision MaxPadding Result /// ------ --------- ---------- ------ /// 1.01E+4 5 2 10100 @@ -406,7 +412,7 @@ public: /// 1.01E-2 4 2 0.0101 /// 1.01E-2 4 1 1.01E-2 void toString(SmallVectorImpl &Str, unsigned FormatPrecision = 0, - unsigned FormatMaxPadding = 3) const; + unsigned FormatMaxPadding = 3, bool TruncateZero = true) const; /// If this value has an exact multiplicative inverse, store it in inv and /// return true. @@ -649,7 +655,7 @@ public: bool isInteger() const; void toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const; + unsigned FormatMaxPadding, bool TruncateZero = true) const; bool getExactInverse(APFloat *inv) const; @@ -1144,9 +1150,9 @@ public: APFloat &operator=(APFloat &&RHS) = default; void toString(SmallVectorImpl &Str, unsigned FormatPrecision = 0, - unsigned FormatMaxPadding = 3) const { + unsigned FormatMaxPadding = 3, bool TruncateZero = true) const { APFLOAT_DISPATCH_ON_SEMANTICS( - toString(Str, FormatPrecision, FormatMaxPadding)); + toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero)); } void print(raw_ostream &) const; diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 045df3c908756aa90498af34fda37c614a1cf71d..ef9c66d2d700bfe744fc0989792fdc643a823f5e 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -78,23 +78,29 @@ public: APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT }; -private: - unsigned BitWidth; ///< The number of bits in this APInt. + static const WordType WORD_MAX = ~WordType(0); +private: /// This union is used to store the integer value. When the /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. union { uint64_t VAL; ///< Used to store the <= 64 bits integer value. uint64_t *pVal; ///< Used to store the >64 bits integer value. - }; + } U; + + unsigned BitWidth; ///< The number of bits in this APInt. friend struct DenseMapAPIntKeyInfo; + friend class APSInt; + /// \brief Fast internal constructor /// /// This constructor is used only internally for speed of construction of /// temporaries. It is unsafe for general use so it is not public. - APInt(uint64_t *val, unsigned bits) : BitWidth(bits), pVal(val) {} + APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { + U.pVal = val; + } /// \brief Determine if this APInt just has one word to store value. /// @@ -134,28 +140,28 @@ private: /// zero'd out. APInt &clearUnusedBits() { // Compute how many bits are used in the final word - unsigned wordBits = BitWidth % APINT_BITS_PER_WORD; - if (wordBits == 0) - // If all bits are used, we want to leave the value alone. This also - // avoids the undefined behavior of >> when the shift is the same size as - // the word size (64). - return *this; + unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1; // Mask out the high bits. - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - wordBits); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - WordBits); if (isSingleWord()) - VAL &= mask; + U.VAL &= mask; else - pVal[getNumWords() - 1] &= mask; + U.pVal[getNumWords() - 1] &= mask; return *this; } /// \brief Get the word corresponding to a bit position /// \returns the corresponding word for the specified bit position. uint64_t getWord(unsigned bitPosition) const { - return isSingleWord() ? VAL : pVal[whichWord(bitPosition)]; + return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; } + /// Utility method to change the bit width of this APInt to new bit width, + /// allocating and/or deallocating as necessary. There is no guarantee on the + /// value of any bits upon return. Caller should populate the bits after. + void reallocate(unsigned NewBitWidth); + /// \brief Convert a char array into an APInt /// /// \param radix 2, 8, 10, 16, or 36 @@ -176,8 +182,9 @@ private: /// provides a more convenient form of divide for internal use since KnuthDiv /// has specific constraints on its inputs. If those constraints are not met /// then it provides a simpler form of divide. - static void divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, - unsigned rhsWords, APInt *Quotient, APInt *Remainder); + static void divide(const WordType *LHS, unsigned lhsWords, + const WordType *RHS, unsigned rhsWords, WordType *Quotient, + WordType *Remainder); /// out-of-line slow case for inline constructor void initSlowCase(uint64_t val, bool isSigned); @@ -189,17 +196,20 @@ private: void initSlowCase(const APInt &that); /// out-of-line slow case for shl - APInt shlSlowCase(unsigned shiftAmt) const; + void shlSlowCase(unsigned ShiftAmt); + + /// out-of-line slow case for lshr. + void lshrSlowCase(unsigned ShiftAmt); + + /// out-of-line slow case for ashr. + void ashrSlowCase(unsigned ShiftAmt); /// out-of-line slow case for operator= - APInt &AssignSlowCase(const APInt &RHS); + void AssignSlowCase(const APInt &RHS); /// out-of-line slow case for operator== bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY; - /// out-of-line slow case for operator== - bool EqualSlowCase(uint64_t Val) const LLVM_READONLY; - /// out-of-line slow case for countLeadingZeros unsigned countLeadingZerosSlowCase() const LLVM_READONLY; @@ -209,6 +219,12 @@ private: /// out-of-line slow case for countPopulation unsigned countPopulationSlowCase() const LLVM_READONLY; + /// out-of-line slow case for intersects. + bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY; + + /// out-of-line slow case for isSubsetOf. + bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY; + /// out-of-line slow case for setBits. void setBitsSlowCase(unsigned loBit, unsigned hiBit); @@ -216,13 +232,21 @@ private: void flipAllBitsSlowCase(); /// out-of-line slow case for operator&=. - APInt& AndAssignSlowCase(const APInt& RHS); + void AndAssignSlowCase(const APInt& RHS); /// out-of-line slow case for operator|=. - APInt& OrAssignSlowCase(const APInt& RHS); + void OrAssignSlowCase(const APInt& RHS); /// out-of-line slow case for operator^=. - APInt& XorAssignSlowCase(const APInt& RHS); + void XorAssignSlowCase(const APInt& RHS); + + /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compare(const APInt &RHS) const LLVM_READONLY; + + /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compareSigned(const APInt &RHS) const LLVM_READONLY; public: /// \name Constructors @@ -242,7 +266,7 @@ public: : BitWidth(numBits) { assert(BitWidth && "bitwidth too small"); if (isSingleWord()) { - VAL = val; + U.VAL = val; clearUnusedBits(); } else { initSlowCase(val, isSigned); @@ -284,20 +308,21 @@ public: /// @brief Copy Constructor. APInt(const APInt &that) : BitWidth(that.BitWidth) { if (isSingleWord()) - VAL = that.VAL; + U.VAL = that.U.VAL; else initSlowCase(that); } /// \brief Move Constructor. - APInt(APInt &&that) : BitWidth(that.BitWidth), VAL(that.VAL) { + APInt(APInt &&that) : BitWidth(that.BitWidth) { + memcpy(&U, &that.U, sizeof(U)); that.BitWidth = 0; } /// \brief Destructor. ~APInt() { if (needsCleanup()) - delete[] pVal; + delete[] U.pVal; } /// \brief Default constructor that creates an uninteresting APInt @@ -305,7 +330,7 @@ public: /// /// This is useful for object deserialization (pair this with the static /// method Read). - explicit APInt() : BitWidth(1), VAL(0) {} + explicit APInt() : BitWidth(1) { U.VAL = 0; } /// \brief Returns whether this instance allocated memory. bool needsCleanup() const { return !isSingleWord(); } @@ -330,23 +355,48 @@ public: /// This tests the high bit of the APInt to determine if it is unset. bool isNonNegative() const { return !isNegative(); } + /// \brief Determine if sign bit of this APInt is set. + /// + /// This tests the high bit of this APInt to determine if it is set. + /// + /// \returns true if this APInt has its sign bit set, false otherwise. + bool isSignBitSet() const { return (*this)[BitWidth-1]; } + + /// \brief Determine if sign bit of this APInt is clear. + /// + /// This tests the high bit of this APInt to determine if it is clear. + /// + /// \returns true if this APInt has its sign bit clear, false otherwise. + bool isSignBitClear() const { return !isSignBitSet(); } + /// \brief Determine if this APInt Value is positive. /// /// This tests if the value of this APInt is positive (> 0). Note /// that 0 is not a positive value. /// /// \returns true if this APInt is positive. - bool isStrictlyPositive() const { return isNonNegative() && !!*this; } + bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); } /// \brief Determine if all bits are set /// /// This checks to see if the value has all bits of the APInt are set or not. bool isAllOnesValue() const { if (isSingleWord()) - return VAL == UINT64_MAX >> (APINT_BITS_PER_WORD - BitWidth); + return U.VAL == WORD_MAX >> (APINT_BITS_PER_WORD - BitWidth); return countPopulationSlowCase() == BitWidth; } + /// \brief Determine if all bits are clear + /// + /// This checks to see if the value has all bits of the APInt are clear or + /// not. + bool isNullValue() const { return !*this; } + + /// \brief Determine if this is a value of 1. + /// + /// This checks to see if the value of this APInt is one. + bool isOneValue() const { return getActiveBits() == 1; } + /// \brief Determine if this is the largest unsigned value. /// /// This checks to see if the value of this APInt is the maximum unsigned @@ -365,7 +415,7 @@ public: /// /// This checks to see if the value of this APInt is the minimum unsigned /// value for the APInt's bit width. - bool isMinValue() const { return !*this; } + bool isMinValue() const { return isNullValue(); } /// \brief Determine if this is the smallest signed value. /// @@ -392,14 +442,14 @@ public: /// \returns true if the argument APInt value is a power of two > 0. bool isPowerOf2() const { if (isSingleWord()) - return isPowerOf2_64(VAL); + return isPowerOf2_64(U.VAL); return countPopulationSlowCase() == 1; } - /// \brief Check if the APInt's value is returned by getSignBit. + /// \brief Check if the APInt's value is returned by getSignMask. /// - /// \returns true if this is the value returned by getSignBit. - bool isSignBit() const { return isMinSignedValue(); } + /// \returns true if this is the value returned by getSignMask. + bool isSignMask() const { return isMinSignedValue(); } /// \brief Convert APInt to a boolean value. /// @@ -409,8 +459,7 @@ public: /// If this value is smaller than the specified limit, return it, otherwise /// return the limit value. This causes the value to saturate to the limit. uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX) const { - return (getActiveBits() > 64 || getZExtValue() > Limit) ? Limit - : getZExtValue(); + return ugt(Limit) ? Limit : getZExtValue(); } /// \brief Check if the APInt consists of a repeated bit pattern. @@ -426,9 +475,10 @@ public: assert(numBits != 0 && "numBits must be non-zero"); assert(numBits <= BitWidth && "numBits out of range"); if (isSingleWord()) - return VAL == (UINT64_MAX >> (APINT_BITS_PER_WORD - numBits)); - unsigned Ones = countTrailingOnes(); - return (numBits == Ones) && ((Ones + countLeadingZeros()) == BitWidth); + return U.VAL == (WORD_MAX >> (APINT_BITS_PER_WORD - numBits)); + unsigned Ones = countTrailingOnesSlowCase(); + return (numBits == Ones) && + ((Ones + countLeadingZerosSlowCase()) == BitWidth); } /// \returns true if this APInt is a non-empty sequence of ones starting at @@ -436,18 +486,19 @@ public: /// Ex. isMask(0x0000FFFFU) == true. bool isMask() const { if (isSingleWord()) - return isMask_64(VAL); - unsigned Ones = countTrailingOnes(); - return (Ones > 0) && ((Ones + countLeadingZeros()) == BitWidth); + return isMask_64(U.VAL); + unsigned Ones = countTrailingOnesSlowCase(); + return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); } /// \brief Return true if this APInt value contains a sequence of ones with /// the remainder zero. bool isShiftedMask() const { if (isSingleWord()) - return isShiftedMask_64(VAL); - unsigned Ones = countPopulation(); - return (Ones + countTrailingZeros() + countLeadingZeros()) == BitWidth; + return isShiftedMask_64(U.VAL); + unsigned Ones = countPopulationSlowCase(); + unsigned LeadZ = countLeadingZerosSlowCase(); + return (Ones + LeadZ + countTrailingZeros()) == BitWidth; } /// @} @@ -476,11 +527,11 @@ public: return API; } - /// \brief Get the SignBit for a specific bit width. + /// \brief Get the SignMask for a specific bit width. /// /// This is just a wrapper function of getSignedMinValue(), and it helps code - /// readability when we want to get a SignBit. - static APInt getSignBit(unsigned BitWidth) { + /// readability when we want to get a SignMask. + static APInt getSignMask(unsigned BitWidth) { return getSignedMinValue(BitWidth); } @@ -488,7 +539,7 @@ public: /// /// \returns the all-ones value for an APInt of the specified bit-width. static APInt getAllOnesValue(unsigned numBits) { - return APInt(numBits, UINT64_MAX, true); + return APInt(numBits, WORD_MAX, true); } /// \brief Get the '0' value. @@ -580,15 +631,7 @@ public: } /// \brief Return a value containing V broadcasted over NewLen bits. - static APInt getSplat(unsigned NewLen, const APInt &V) { - assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); - - APInt Val = V.zextOrSelf(NewLen); - for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) - Val |= Val << I; - - return Val; - } + static APInt getSplat(unsigned NewLen, const APInt &V); /// \brief Determine if two APInts have the same value, after zero-extending /// one of them (if needed!) to ensure that the bit-widths match. @@ -610,8 +653,8 @@ public: /// conversions. const uint64_t *getRawData() const { if (isSingleWord()) - return &VAL; - return &pVal[0]; + return &U.VAL; + return &U.pVal[0]; } /// @} @@ -620,7 +663,9 @@ public: /// \brief Postfix increment operator. /// - /// \returns a new APInt value representing *this incremented by one + /// Increments *this by 1. + /// + /// \returns a new APInt value representing the original value of *this. const APInt operator++(int) { APInt API(*this); ++(*this); @@ -634,7 +679,9 @@ public: /// \brief Postfix decrement operator. /// - /// \returns a new APInt representing *this decremented by one. + /// Decrements *this by 1. + /// + /// \returns a new APInt value representing the original value of *this. const APInt operator--(int) { APInt API(*this); --(*this); @@ -652,7 +699,9 @@ public: /// /// \returns true if *this is zero, false otherwise. bool operator!() const { - return *this == 0; + if (isSingleWord()) + return U.VAL == 0; + return countLeadingZerosSlowCase() == BitWidth; } /// @} @@ -665,34 +714,27 @@ public: APInt &operator=(const APInt &RHS) { // If the bitwidths are the same, we can avoid mucking with memory if (isSingleWord() && RHS.isSingleWord()) { - VAL = RHS.VAL; + U.VAL = RHS.U.VAL; BitWidth = RHS.BitWidth; return clearUnusedBits(); } - return AssignSlowCase(RHS); + AssignSlowCase(RHS); + return *this; } /// @brief Move assignment operator. APInt &operator=(APInt &&that) { - if (!isSingleWord()) { - // The MSVC STL shipped in 2013 requires that self move assignment be a - // no-op. Otherwise algorithms like stable_sort will produce answers - // where half of the output is left in a moved-from state. - if (this == &that) - return *this; - delete[] pVal; - } + assert(this != &that && "Self-move not supported"); + if (!isSingleWord()) + delete[] U.pVal; // Use memcpy so that type based alias analysis sees both VAL and pVal // as modified. - memcpy(&VAL, &that.VAL, sizeof(uint64_t)); + memcpy(&U, &that.U, sizeof(U)); - // If 'this == &that', avoid zeroing our own bitwidth by storing to 'that' - // first. - unsigned ThatBitWidth = that.BitWidth; + BitWidth = that.BitWidth; that.BitWidth = 0; - BitWidth = ThatBitWidth; return *this; } @@ -706,11 +748,11 @@ public: /// \returns *this after assignment of RHS value. APInt &operator=(uint64_t RHS) { if (isSingleWord()) { - VAL = RHS; + U.VAL = RHS; clearUnusedBits(); } else { - pVal[0] = RHS; - memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + U.pVal[0] = RHS; + memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); } return *this; } @@ -723,11 +765,11 @@ public: /// \returns *this after ANDing with RHS. APInt &operator&=(const APInt &RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) { - VAL &= RHS.VAL; - return *this; - } - return AndAssignSlowCase(RHS); + if (isSingleWord()) + U.VAL &= RHS.U.VAL; + else + AndAssignSlowCase(RHS); + return *this; } /// \brief Bitwise AND assignment operator. @@ -737,11 +779,11 @@ public: /// the LHS. APInt &operator&=(uint64_t RHS) { if (isSingleWord()) { - VAL &= RHS; + U.VAL &= RHS; return *this; } - pVal[0] &= RHS; - memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + U.pVal[0] &= RHS; + memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); return *this; } @@ -753,11 +795,11 @@ public: /// \returns *this after ORing with RHS. APInt &operator|=(const APInt &RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) { - VAL |= RHS.VAL; - return *this; - } - return OrAssignSlowCase(RHS); + if (isSingleWord()) + U.VAL |= RHS.U.VAL; + else + OrAssignSlowCase(RHS); + return *this; } /// \brief Bitwise OR assignment operator. @@ -767,10 +809,10 @@ public: /// the LHS. APInt &operator|=(uint64_t RHS) { if (isSingleWord()) { - VAL |= RHS; + U.VAL |= RHS; clearUnusedBits(); } else { - pVal[0] |= RHS; + U.pVal[0] |= RHS; } return *this; } @@ -783,11 +825,11 @@ public: /// \returns *this after XORing with RHS. APInt &operator^=(const APInt &RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) { - VAL ^= RHS.VAL; - return *this; - } - return XorAssignSlowCase(RHS); + if (isSingleWord()) + U.VAL ^= RHS.U.VAL; + else + XorAssignSlowCase(RHS); + return *this; } /// \brief Bitwise XOR assignment operator. @@ -797,10 +839,10 @@ public: /// the LHS. APInt &operator^=(uint64_t RHS) { if (isSingleWord()) { - VAL ^= RHS; + U.VAL ^= RHS; clearUnusedBits(); } else { - pVal[0] ^= RHS; + U.pVal[0] ^= RHS; } return *this; } @@ -811,6 +853,7 @@ public: /// /// \returns *this APInt &operator*=(const APInt &RHS); + APInt &operator*=(uint64_t RHS); /// \brief Addition assignment operator. /// @@ -832,12 +875,27 @@ public: /// /// Shifts *this left by shiftAmt and assigns the result to *this. /// - /// \returns *this after shifting left by shiftAmt - APInt &operator<<=(unsigned shiftAmt) { - *this = shl(shiftAmt); + /// \returns *this after shifting left by ShiftAmt + APInt &operator<<=(unsigned ShiftAmt) { + assert(ShiftAmt <= BitWidth && "Invalid shift amount"); + if (isSingleWord()) { + if (ShiftAmt == BitWidth) + U.VAL = 0; + else + U.VAL <<= ShiftAmt; + return clearUnusedBits(); + } + shlSlowCase(ShiftAmt); return *this; } + /// \brief Left-shift assignment function. + /// + /// Shifts *this left by shiftAmt and assigns the result to *this. + /// + /// \returns *this after shifting left by ShiftAmt + APInt &operator<<=(const APInt &ShiftAmt); + /// @} /// \name Binary Operators /// @{ @@ -860,24 +918,56 @@ public: /// \brief Arithmetic right-shift function. /// /// Arithmetic right-shift this APInt by shiftAmt. - APInt ashr(unsigned shiftAmt) const; + APInt ashr(unsigned ShiftAmt) const { + APInt R(*this); + R.ashrInPlace(ShiftAmt); + return R; + } + + /// Arithmetic right-shift this APInt by ShiftAmt in place. + void ashrInPlace(unsigned ShiftAmt) { + assert(ShiftAmt <= BitWidth && "Invalid shift amount"); + if (isSingleWord()) { + int64_t SExtVAL = SignExtend64(U.VAL, BitWidth); + if (ShiftAmt == BitWidth) + U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. + else + U.VAL = SExtVAL >> ShiftAmt; + clearUnusedBits(); + return; + } + ashrSlowCase(ShiftAmt); + } /// \brief Logical right-shift function. /// /// Logical right-shift this APInt by shiftAmt. - APInt lshr(unsigned shiftAmt) const; + APInt lshr(unsigned shiftAmt) const { + APInt R(*this); + R.lshrInPlace(shiftAmt); + return R; + } + + /// Logical right-shift this APInt by ShiftAmt in place. + void lshrInPlace(unsigned ShiftAmt) { + assert(ShiftAmt <= BitWidth && "Invalid shift amount"); + if (isSingleWord()) { + if (ShiftAmt == BitWidth) + U.VAL = 0; + else + U.VAL >>= ShiftAmt; + return; + } + lshrSlowCase(ShiftAmt); + } /// \brief Left-shift function. /// /// Left-shift this APInt by shiftAmt. APInt shl(unsigned shiftAmt) const { - assert(shiftAmt <= BitWidth && "Invalid shift amount"); - if (isSingleWord()) { - if (shiftAmt >= BitWidth) - return APInt(BitWidth, 0); // avoid undefined shift results - return APInt(BitWidth, VAL << shiftAmt); - } - return shlSlowCase(shiftAmt); + APInt R(*this); + R <<= shiftAmt; + return R; } /// \brief Rotate left by rotateAmt. @@ -889,17 +979,35 @@ public: /// \brief Arithmetic right-shift function. /// /// Arithmetic right-shift this APInt by shiftAmt. - APInt ashr(const APInt &shiftAmt) const; + APInt ashr(const APInt &ShiftAmt) const { + APInt R(*this); + R.ashrInPlace(ShiftAmt); + return R; + } + + /// Arithmetic right-shift this APInt by shiftAmt in place. + void ashrInPlace(const APInt &shiftAmt); /// \brief Logical right-shift function. /// /// Logical right-shift this APInt by shiftAmt. - APInt lshr(const APInt &shiftAmt) const; + APInt lshr(const APInt &ShiftAmt) const { + APInt R(*this); + R.lshrInPlace(ShiftAmt); + return R; + } + + /// Logical right-shift this APInt by ShiftAmt in place. + void lshrInPlace(const APInt &ShiftAmt); /// \brief Left-shift function. /// /// Left-shift this APInt by shiftAmt. - APInt shl(const APInt &shiftAmt) const; + APInt shl(const APInt &ShiftAmt) const { + APInt R(*this); + R <<= ShiftAmt; + return R; + } /// \brief Rotate left by rotateAmt. APInt rotl(const APInt &rotateAmt) const; @@ -914,11 +1022,13 @@ public: /// /// \returns a new APInt value containing the division result APInt udiv(const APInt &RHS) const; + APInt udiv(uint64_t RHS) const; /// \brief Signed division function for APInt. /// /// Signed divide this APInt by APInt RHS. APInt sdiv(const APInt &RHS) const; + APInt sdiv(int64_t RHS) const; /// \brief Unsigned remainder operation. /// @@ -930,11 +1040,13 @@ public: /// /// \returns a new APInt value containing the remainder result APInt urem(const APInt &RHS) const; + uint64_t urem(uint64_t RHS) const; /// \brief Function for signed remainder operation. /// /// Signed remainder operation on APInt. APInt srem(const APInt &RHS) const; + int64_t srem(int64_t RHS) const; /// \brief Dual division/remainder interface. /// @@ -945,9 +1057,13 @@ public: /// udivrem(X, Y, X, Y), for example. static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder); + static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, + uint64_t &Remainder); static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder); + static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient, + int64_t &Remainder); // Operations that return overflow indicators. APInt sadd_ov(const APInt &RHS, bool &Overflow) const; @@ -965,9 +1081,7 @@ public: /// \returns the bit value at bitPosition bool operator[](unsigned bitPosition) const { assert(bitPosition < getBitWidth() && "Bit position out of bounds!"); - return (maskBit(bitPosition) & - (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != - 0; + return (maskBit(bitPosition) & getWord(bitPosition)) != 0; } /// @} @@ -981,7 +1095,7 @@ public: bool operator==(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"); if (isSingleWord()) - return VAL == RHS.VAL; + return U.VAL == RHS.U.VAL; return EqualSlowCase(RHS); } @@ -992,9 +1106,7 @@ public: /// /// \returns true if *this == Val bool operator==(uint64_t Val) const { - if (isSingleWord()) - return VAL == Val; - return EqualSlowCase(Val); + return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val; } /// \brief Equality comparison. @@ -1035,7 +1147,7 @@ public: /// the validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered unsigned. - bool ult(const APInt &RHS) const LLVM_READONLY; + bool ult(const APInt &RHS) const { return compare(RHS) < 0; } /// \brief Unsigned less than comparison /// @@ -1044,7 +1156,8 @@ public: /// /// \returns true if *this < RHS when considered unsigned. bool ult(uint64_t RHS) const { - return getActiveBits() > 64 ? false : getZExtValue() < RHS; + // Only need to check active bits if not a single word. + return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS; } /// \brief Signed less than comparison @@ -1053,7 +1166,7 @@ public: /// validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered signed. - bool slt(const APInt &RHS) const LLVM_READONLY; + bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; } /// \brief Signed less than comparison /// @@ -1062,7 +1175,8 @@ public: /// /// \returns true if *this < RHS when considered signed. bool slt(int64_t RHS) const { - return getMinSignedBits() > 64 ? isNegative() : getSExtValue() < RHS; + return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative() + : getSExtValue() < RHS; } /// \brief Unsigned less or equal comparison @@ -1071,7 +1185,7 @@ public: /// validity of the less-or-equal relationship. /// /// \returns true if *this <= RHS when both are considered unsigned. - bool ule(const APInt &RHS) const { return ult(RHS) || eq(RHS); } + bool ule(const APInt &RHS) const { return compare(RHS) <= 0; } /// \brief Unsigned less or equal comparison /// @@ -1087,7 +1201,7 @@ public: /// validity of the less-or-equal relationship. /// /// \returns true if *this <= RHS when both are considered signed. - bool sle(const APInt &RHS) const { return slt(RHS) || eq(RHS); } + bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; } /// \brief Signed less or equal comparison /// @@ -1103,7 +1217,7 @@ public: /// the validity of the greater-than relationship. /// /// \returns true if *this > RHS when both are considered unsigned. - bool ugt(const APInt &RHS) const { return !ult(RHS) && !eq(RHS); } + bool ugt(const APInt &RHS) const { return !ule(RHS); } /// \brief Unsigned greater than comparison /// @@ -1112,7 +1226,8 @@ public: /// /// \returns true if *this > RHS when considered unsigned. bool ugt(uint64_t RHS) const { - return getActiveBits() > 64 ? true : getZExtValue() > RHS; + // Only need to check active bits if not a single word. + return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS; } /// \brief Signed greather than comparison @@ -1121,7 +1236,7 @@ public: /// validity of the greater-than relationship. /// /// \returns true if *this > RHS when both are considered signed. - bool sgt(const APInt &RHS) const { return !slt(RHS) && !eq(RHS); } + bool sgt(const APInt &RHS) const { return !sle(RHS); } /// \brief Signed greater than comparison /// @@ -1130,7 +1245,8 @@ public: /// /// \returns true if *this > RHS when considered signed. bool sgt(int64_t RHS) const { - return getMinSignedBits() > 64 ? !isNegative() : getSExtValue() > RHS; + return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative() + : getSExtValue() > RHS; } /// \brief Unsigned greater or equal comparison @@ -1168,9 +1284,18 @@ public: /// This operation tests if there are any pairs of corresponding bits /// between this APInt and RHS that are both set. bool intersects(const APInt &RHS) const { - APInt temp(*this); - temp &= RHS; - return temp != 0; + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + return (U.VAL & RHS.U.VAL) != 0; + return intersectsSlowCase(RHS); + } + + /// This operation checks that all bits set in this APInt are also set in RHS. + bool isSubsetOf(const APInt &RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + return (U.VAL & ~RHS.U.VAL) == 0; + return isSubsetOfSlowCase(RHS); } /// @} @@ -1229,10 +1354,10 @@ public: /// \brief Set every bit to 1. void setAllBits() { if (isSingleWord()) - VAL = UINT64_MAX; + U.VAL = WORD_MAX; else // Set all the bits in all the words. - memset(pVal, -1, getNumWords() * APINT_WORD_SIZE); + memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE); // Clear the unused ones clearUnusedBits(); } @@ -1240,7 +1365,14 @@ public: /// \brief Set a given bit to 1. /// /// Set the given bit to 1 whose position is given as "bitPosition". - void setBit(unsigned bitPosition); + void setBit(unsigned BitPosition) { + assert(BitPosition <= BitWidth && "BitPosition out of range"); + WordType Mask = maskBit(BitPosition); + if (isSingleWord()) + U.VAL |= Mask; + else + U.pVal[whichWord(BitPosition)] |= Mask; + } /// Set the sign bit to 1. void setSignBit() { @@ -1251,20 +1383,16 @@ public: void setBits(unsigned loBit, unsigned hiBit) { assert(hiBit <= BitWidth && "hiBit out of range"); assert(loBit <= BitWidth && "loBit out of range"); + assert(loBit <= hiBit && "loBit greater than hiBit"); if (loBit == hiBit) return; - if (loBit > hiBit) { - setLowBits(hiBit); - setHighBits(BitWidth - loBit); - return; - } if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); mask <<= loBit; if (isSingleWord()) - VAL |= mask; + U.VAL |= mask; else - pVal[0] |= mask; + U.pVal[0] |= mask; } else { setBitsSlowCase(loBit, hiBit); } @@ -1288,20 +1416,32 @@ public: /// \brief Set every bit to 0. void clearAllBits() { if (isSingleWord()) - VAL = 0; + U.VAL = 0; else - memset(pVal, 0, getNumWords() * APINT_WORD_SIZE); + memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE); } /// \brief Set a given bit to 0. /// /// Set the given bit to 0 whose position is given as "bitPosition". - void clearBit(unsigned bitPosition); + void clearBit(unsigned BitPosition) { + assert(BitPosition <= BitWidth && "BitPosition out of range"); + WordType Mask = ~maskBit(BitPosition); + if (isSingleWord()) + U.VAL &= Mask; + else + U.pVal[whichWord(BitPosition)] &= Mask; + } + + /// Set the sign bit to 0. + void clearSignBit() { + clearBit(BitWidth - 1); + } /// \brief Toggle every bit to its opposite value. void flipAllBits() { if (isSingleWord()) { - VAL ^= UINT64_MAX; + U.VAL ^= WORD_MAX; clearUnusedBits(); } else { flipAllBitsSlowCase(); @@ -1314,6 +1454,12 @@ public: /// as "bitPosition". void flipBit(unsigned bitPosition); + /// Negate this APInt in place. + void negate() { + flipAllBits(); + ++(*this); + } + /// Insert the bits from a smaller APInt starting at bitPosition. void insertBits(const APInt &SubBits, unsigned bitPosition); @@ -1381,9 +1527,9 @@ public: /// uint64_t. Otherwise an assertion will result. uint64_t getZExtValue() const { if (isSingleWord()) - return VAL; + return U.VAL; assert(getActiveBits() <= 64 && "Too many bits for uint64_t"); - return pVal[0]; + return U.pVal[0]; } /// \brief Get sign extended value @@ -1393,10 +1539,9 @@ public: /// int64_t. Otherwise an assertion will result. int64_t getSExtValue() const { if (isSingleWord()) - return int64_t(VAL << (APINT_BITS_PER_WORD - BitWidth)) >> - (APINT_BITS_PER_WORD - BitWidth); + return SignExtend64(U.VAL, BitWidth); assert(getMinSignedBits() <= 64 && "Too many bits for int64_t"); - return int64_t(pVal[0]); + return int64_t(U.pVal[0]); } /// \brief Get bits required for string value. @@ -1416,7 +1561,7 @@ public: unsigned countLeadingZeros() const { if (isSingleWord()) { unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth; - return llvm::countLeadingZeros(VAL) - unusedBits; + return llvm::countLeadingZeros(U.VAL) - unusedBits; } return countLeadingZerosSlowCase(); } @@ -1457,7 +1602,7 @@ public: /// of ones from the least significant bit to the first zero bit. unsigned countTrailingOnes() const { if (isSingleWord()) - return llvm::countTrailingOnes(VAL); + return llvm::countTrailingOnes(U.VAL); return countTrailingOnesSlowCase(); } @@ -1469,7 +1614,7 @@ public: /// \returns 0 if the value is zero, otherwise returns the number of set bits. unsigned countPopulation() const { if (isSingleWord()) - return llvm::countPopulation(VAL); + return llvm::countPopulation(U.VAL); return countPopulationSlowCase(); } @@ -1524,12 +1669,7 @@ public: /// re-interprets the bits as a double. Note that it is valid to do this on /// any bit width. Exactly 64 bits will be translated. double bitsToDouble() const { - union { - uint64_t I; - double D; - } T; - T.I = (isSingleWord() ? VAL : pVal[0]); - return T.D; + return BitsToDouble(getWord(0)); } /// \brief Converts APInt bits to a double @@ -1538,12 +1678,7 @@ public: /// re-interprets the bits as a float. Note that it is valid to do this on /// any bit width. Exactly 32 bits will be translated. float bitsToFloat() const { - union { - unsigned I; - float F; - } T; - T.I = unsigned((isSingleWord() ? VAL : pVal[0])); - return T.F; + return BitsToFloat(getWord(0)); } /// \brief Converts a double to APInt bits. @@ -1551,12 +1686,7 @@ public: /// The conversion does not do a translation from double to integer, it just /// re-interprets the bits of the double. static APInt doubleToBits(double V) { - union { - uint64_t I; - double D; - } T; - T.D = V; - return APInt(sizeof T * CHAR_BIT, T.I); + return APInt(sizeof(double) * CHAR_BIT, DoubleToBits(V)); } /// \brief Converts a float to APInt bits. @@ -1564,12 +1694,7 @@ public: /// The conversion does not do a translation from float to integer, it just /// re-interprets the bits of the float. static APInt floatToBits(float V) { - union { - unsigned I; - float F; - } T; - T.F = V; - return APInt(sizeof T * CHAR_BIT, T.I); + return APInt(sizeof(float) * CHAR_BIT, FloatToBits(V)); } /// @} @@ -1597,13 +1722,13 @@ public: /// referencing 2 in a space where 2 does no exist. unsigned nearestLogBase2() const { // Special case when we have a bitwidth of 1. If VAL is 1, then we - // get 0. If VAL is 0, we get UINT64_MAX which gets truncated to + // get 0. If VAL is 0, we get WORD_MAX which gets truncated to // UINT32_MAX. if (BitWidth == 1) - return VAL - 1; + return U.VAL - 1; // Handle the zero case. - if (!getBoolValue()) + if (isNullValue()) return UINT32_MAX; // The non-zero case is handled by computing: @@ -1698,10 +1823,14 @@ public: /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag. static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned); + /// DST += RHS. Returns the carry flag. + static WordType tcAddPart(WordType *, WordType, unsigned); /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag. static WordType tcSubtract(WordType *, const WordType *, WordType carry, unsigned); + /// DST -= RHS. Returns the carry flag. + static WordType tcSubtractPart(WordType *, WordType, unsigned); /// DST += SRC * MULTIPLIER + PART if add is true /// DST = SRC * MULTIPLIER + PART if add is false @@ -1726,10 +1855,9 @@ public: unsigned); /// DST = LHS * RHS, where DST has width the sum of the widths of the - /// operands. No overflow occurs. DST must be disjoint from both - /// operands. Returns the number of parts required to hold the result. - static unsigned tcFullMultiply(WordType *, const WordType *, - const WordType *, unsigned, unsigned); + /// operands. No overflow occurs. DST must be disjoint from both operands. + static void tcFullMultiply(WordType *, const WordType *, + const WordType *, unsigned, unsigned); /// If RHS is zero LHS and REMAINDER are left unchanged, return one. /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set @@ -1744,13 +1872,13 @@ public: WordType *remainder, WordType *scratch, unsigned parts); - /// Shift a bignum left COUNT bits. Shifted in bits are zero. There are no - /// restrictions on COUNT. - static void tcShiftLeft(WordType *, unsigned parts, unsigned count); + /// Shift a bignum left Count bits. Shifted in bits are zero. There are no + /// restrictions on Count. + static void tcShiftLeft(WordType *, unsigned Words, unsigned Count); - /// Shift a bignum right COUNT bits. Shifted in bits are zero. There are no - /// restrictions on COUNT. - static void tcShiftRight(WordType *, unsigned parts, unsigned count); + /// Shift a bignum right Count bits. Shifted in bits are zero. There are no + /// restrictions on Count. + static void tcShiftRight(WordType *, unsigned Words, unsigned Count); /// The obvious AND, OR and XOR and complement operations. static void tcAnd(WordType *, const WordType *, unsigned); @@ -1762,10 +1890,14 @@ public: static int tcCompare(const WordType *, const WordType *, unsigned); /// Increment a bignum in-place. Return the carry flag. - static WordType tcIncrement(WordType *, unsigned); + static WordType tcIncrement(WordType *dst, unsigned parts) { + return tcAddPart(dst, 1, parts); + } /// Decrement a bignum in-place. Return the borrow flag. - static WordType tcDecrement(WordType *, unsigned); + static WordType tcDecrement(WordType *dst, unsigned parts) { + return tcSubtractPart(dst, 1, parts); + } /// Set the least significant BITS and clear the rest. static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits); @@ -1867,8 +1999,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) { } inline APInt operator-(APInt v) { - v.flipAllBits(); - ++v; + v.negate(); return v; } @@ -1898,7 +2029,7 @@ inline APInt operator-(APInt a, const APInt &b) { } inline APInt operator-(const APInt &a, APInt &&b) { - b = -std::move(b); + b.negate(); b += a; return std::move(b); } @@ -1909,11 +2040,21 @@ inline APInt operator-(APInt a, uint64_t RHS) { } inline APInt operator-(uint64_t LHS, APInt b) { - b = -std::move(b); + b.negate(); b += LHS; return b; } +inline APInt operator*(APInt a, uint64_t RHS) { + a *= RHS; + return a; +} + +inline APInt operator*(uint64_t LHS, APInt b) { + b *= LHS; + return b; +} + namespace APIntOps { @@ -1937,10 +2078,10 @@ inline const APInt &umax(const APInt &A, const APInt &B) { return A.ugt(B) ? A : B; } -/// \brief Compute GCD of two APInt values. +/// \brief Compute GCD of two unsigned APInt values. /// /// This function returns the greatest common divisor of the two APInt values -/// using Euclid's algorithm. +/// using Stein's algorithm. /// /// \returns the greatest common divisor of A and B. APInt GreatestCommonDivisor(APInt A, APInt B); diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h index 813b3686d6b1ae6efff88adcbb136ff32fca2115..dabbf3314bd022e33d7e8aa35f94683e41d91725 100644 --- a/include/llvm/ADT/APSInt.h +++ b/include/llvm/ADT/APSInt.h @@ -125,7 +125,10 @@ public: return IsUnsigned ? APSInt(lshr(Amt), true) : APSInt(ashr(Amt), false); } APSInt& operator>>=(unsigned Amt) { - *this = *this >> Amt; + if (IsUnsigned) + lshrInPlace(Amt); + else + ashrInPlace(Amt); return *this; } @@ -179,7 +182,7 @@ public: return APSInt(static_cast(*this) << Bits, IsUnsigned); } APSInt& operator<<=(unsigned Amt) { - *this = *this << Amt; + static_cast(*this) <<= Amt; return *this; } @@ -235,19 +238,16 @@ public: assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!"); return APSInt(static_cast(*this) & RHS, IsUnsigned); } - APSInt And(const APSInt &RHS) const { return this->operator&(RHS); } APSInt operator|(const APSInt& RHS) const { assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!"); return APSInt(static_cast(*this) | RHS, IsUnsigned); } - APSInt Or(const APSInt &RHS) const { return this->operator|(RHS); } APSInt operator^(const APSInt &RHS) const { assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!"); return APSInt(static_cast(*this) ^ RHS, IsUnsigned); } - APSInt Xor(const APSInt &RHS) const { return this->operator^(RHS); } APSInt operator*(const APSInt& RHS) const { assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!"); @@ -288,12 +288,12 @@ public: /// \brief Compare underlying values of two numbers. static int compareValues(const APSInt &I1, const APSInt &I2) { if (I1.getBitWidth() == I2.getBitWidth() && I1.isSigned() == I2.isSigned()) - return I1 == I2 ? 0 : I1 > I2 ? 1 : -1; + return I1.IsUnsigned ? I1.compare(I2) : I1.compareSigned(I2); // Check for a bit-width mismatch. if (I1.getBitWidth() > I2.getBitWidth()) return compareValues(I1, I2.extend(I1.getBitWidth())); - else if (I2.getBitWidth() > I1.getBitWidth()) + if (I2.getBitWidth() > I1.getBitWidth()) return compareValues(I1.extend(I2.getBitWidth()), I2); // We have a signedness mismatch. Check for negative values and do an @@ -308,7 +308,7 @@ public: return 1; } - return I1.eq(I2) ? 0 : I1.ugt(I2) ? 1 : -1; + return I1.compare(I2); } static APSInt get(int64_t X) { return APSInt(APInt(64, X), false); } diff --git a/include/llvm/ADT/AllocatorList.h b/include/llvm/ADT/AllocatorList.h index 05a549f96ec70a94482610b06dff67a015f7311c..178c6742a87b90d41a75c78ada41e3e51d38cd8b 100644 --- a/include/llvm/ADT/AllocatorList.h +++ b/include/llvm/ADT/AllocatorList.h @@ -10,10 +10,16 @@ #ifndef LLVM_ADT_ALLOCATORLIST_H #define LLVM_ADT_ALLOCATORLIST_H +#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/simple_ilist.h" #include "llvm/Support/Allocator.h" +#include +#include +#include +#include #include +#include namespace llvm { @@ -39,7 +45,8 @@ template class AllocatorList : AllocatorT { T V; }; - typedef simple_ilist list_type; + using list_type = simple_ilist; + list_type List; AllocatorT &getAlloc() { return *this; } @@ -51,13 +58,17 @@ template class AllocatorList : AllocatorT { struct Cloner { AllocatorList &AL; + Cloner(AllocatorList &AL) : AL(AL) {} + Node *operator()(const Node &N) const { return AL.create(N.V); } }; struct Disposer { AllocatorList &AL; + Disposer(AllocatorList &AL) : AL(AL) {} + void operator()(Node *N) const { N->~Node(); AL.getAlloc().Deallocate(N); @@ -65,13 +76,13 @@ template class AllocatorList : AllocatorT { }; public: - typedef T value_type; - typedef T *pointer; - typedef T &reference; - typedef const T *const_pointer; - typedef const T &const_reference; - typedef typename list_type::size_type size_type; - typedef typename list_type::difference_type difference_type; + using value_type = T; + using pointer = T *; + using reference = T &; + using const_pointer = const T *; + using const_reference = const T &; + using size_type = typename list_type::size_type; + using difference_type = typename list_type::difference_type; private: template @@ -83,20 +94,18 @@ private: friend class IteratorImpl; friend AllocatorList; - typedef iterator_adaptor_base, - IteratorBase, std::bidirectional_iterator_tag, - ValueT> - base_type; + using base_type = + iterator_adaptor_base, IteratorBase, + std::bidirectional_iterator_tag, ValueT>; public: - typedef ValueT value_type; - typedef ValueT *pointer; - typedef ValueT &reference; + using value_type = ValueT; + using pointer = ValueT *; + using reference = ValueT &; IteratorImpl() = default; IteratorImpl(const IteratorImpl &) = default; IteratorImpl &operator=(const IteratorImpl &) = default; - ~IteratorImpl() = default; explicit IteratorImpl(const IteratorBase &I) : base_type(I) {} @@ -106,6 +115,8 @@ private: OtherIteratorBase, IteratorBase>::value>::type * = nullptr) : base_type(X.wrapped()) {} + ~IteratorImpl() = default; + reference operator*() const { return base_type::wrapped()->V; } pointer operator->() const { return &operator*(); } @@ -118,30 +129,34 @@ private: }; public: - typedef IteratorImpl iterator; - typedef IteratorImpl - reverse_iterator; - typedef IteratorImpl - const_iterator; - typedef IteratorImpl - const_reverse_iterator; + using iterator = IteratorImpl; + using reverse_iterator = + IteratorImpl; + using const_iterator = + IteratorImpl; + using const_reverse_iterator = + IteratorImpl; AllocatorList() = default; AllocatorList(AllocatorList &&X) : AllocatorT(std::move(X.getAlloc())), List(std::move(X.List)) {} + AllocatorList(const AllocatorList &X) { List.cloneFrom(X.List, Cloner(*this), Disposer(*this)); } + AllocatorList &operator=(AllocatorList &&X) { clear(); // Dispose of current nodes explicitly. List = std::move(X.List); getAlloc() = std::move(X.getAlloc()); return *this; } + AllocatorList &operator=(const AllocatorList &X) { List.cloneFrom(X.List, Cloner(*this), Disposer(*this)); return *this; } + ~AllocatorList() { clear(); } void swap(AllocatorList &RHS) { diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index 6b35d0aec8b2b2f90756ac5177bd464843515870..925ebafc3feda7e7a3bf16a17d19f4371b3c2d9f 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -1,4 +1,4 @@ -//===--- ArrayRef.h - Array Reference Wrapper -------------------*- C++ -*-===// +//===- ArrayRef.h - Array Reference Wrapper ---------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,21 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Compiler.h" +#include #include +#include +#include +#include +#include +#include +#include #include namespace llvm { + /// ArrayRef - Represent a constant reference to an array (0 or more elements /// consecutively in memory), i.e. a start pointer and a length. It allows /// various APIs to take consecutive elements easily and conveniently. @@ -32,28 +41,27 @@ namespace llvm { template class LLVM_NODISCARD ArrayRef { public: - typedef const T *iterator; - typedef const T *const_iterator; - typedef size_t size_type; - - typedef std::reverse_iterator reverse_iterator; + using iterator = const T *; + using const_iterator = const T *; + using size_type = size_t; + using reverse_iterator = std::reverse_iterator; private: /// The start of the array, in an external buffer. - const T *Data; + const T *Data = nullptr; /// The number of elements. - size_type Length; + size_type Length = 0; public: /// @name Constructors /// @{ /// Construct an empty ArrayRef. - /*implicit*/ ArrayRef() : Data(nullptr), Length(0) {} + /*implicit*/ ArrayRef() = default; /// Construct an empty ArrayRef from None. - /*implicit*/ ArrayRef(NoneType) : Data(nullptr), Length(0) {} + /*implicit*/ ArrayRef(NoneType) {} /// Construct an ArrayRef from a single element. /*implicit*/ ArrayRef(const T &OneElt) @@ -282,9 +290,8 @@ namespace llvm { template class LLVM_NODISCARD MutableArrayRef : public ArrayRef { public: - typedef T *iterator; - - typedef std::reverse_iterator reverse_iterator; + using iterator = T *; + using reverse_iterator = std::reverse_iterator; /// Construct an empty MutableArrayRef. /*implicit*/ MutableArrayRef() : ArrayRef() {} @@ -416,19 +423,23 @@ namespace llvm { /// This is a MutableArrayRef that owns its array. template class OwningArrayRef : public MutableArrayRef { public: - OwningArrayRef() {} + OwningArrayRef() = default; OwningArrayRef(size_t Size) : MutableArrayRef(new T[Size], Size) {} + OwningArrayRef(ArrayRef Data) : MutableArrayRef(new T[Data.size()], Data.size()) { std::copy(Data.begin(), Data.end(), this->begin()); } + OwningArrayRef(OwningArrayRef &&Other) { *this = Other; } + OwningArrayRef &operator=(OwningArrayRef &&Other) { delete[] this->data(); this->MutableArrayRef::operator=(Other); Other.MutableArrayRef::operator=(MutableArrayRef()); return *this; } + ~OwningArrayRef() { delete[] this->data(); } }; @@ -517,13 +528,14 @@ namespace llvm { // ArrayRefs can be treated like a POD type. template struct isPodLike; - template struct isPodLike > { + template struct isPodLike> { static const bool value = true; }; template hash_code hash_value(ArrayRef S) { return hash_combine_range(S.begin(), S.end()); } + } // end namespace llvm #endif // LLVM_ADT_ARRAYREF_H diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index 8240d01ae977c7dc75179f997cf5eadb954ca590..e68ef5f53d106544d48f031909caee7dcbb3f1fd 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -14,6 +14,8 @@ #ifndef LLVM_ADT_BITVECTOR_H #define LLVM_ADT_BITVECTOR_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/MathExtras.h" #include #include @@ -25,6 +27,50 @@ namespace llvm { +/// ForwardIterator for the bits that are set. +/// Iterators get invalidated when resize / reserve is called. +template class const_set_bits_iterator_impl { + const BitVectorT &Parent; + int Current = 0; + + void advance() { + assert(Current != -1 && "Trying to advance past end."); + Current = Parent.find_next(Current); + } + +public: + const_set_bits_iterator_impl(const BitVectorT &Parent, int Current) + : Parent(Parent), Current(Current) {} + explicit const_set_bits_iterator_impl(const BitVectorT &Parent) + : const_set_bits_iterator_impl(Parent, Parent.find_first()) {} + const_set_bits_iterator_impl(const const_set_bits_iterator_impl &) = default; + + const_set_bits_iterator_impl operator++(int) { + auto Prev = *this; + advance(); + return Prev; + } + + const_set_bits_iterator_impl &operator++() { + advance(); + return *this; + } + + unsigned operator*() const { return Current; } + + bool operator==(const const_set_bits_iterator_impl &Other) const { + assert(&Parent == &Other.Parent && + "Comparing iterators from different BitVectors"); + return Current == Other.Current; + } + + bool operator!=(const const_set_bits_iterator_impl &Other) const { + assert(&Parent == &Other.Parent && + "Comparing iterators from different BitVectors"); + return Current != Other.Current; + } +}; + class BitVector { typedef unsigned long BitWord; @@ -33,9 +79,8 @@ class BitVector { static_assert(BITWORD_SIZE == 64 || BITWORD_SIZE == 32, "Unsupported word size"); - BitWord *Bits; // Actual bits. - unsigned Size; // Size of bitvector in bits. - unsigned Capacity; // Number of BitWords allocated in the Bits array. + MutableArrayRef Bits; // Actual bits. + unsigned Size; // Size of bitvector in bits. public: typedef unsigned size_type; @@ -73,18 +118,28 @@ public: } }; + typedef const_set_bits_iterator_impl const_set_bits_iterator; + typedef const_set_bits_iterator set_iterator; - /// BitVector default ctor - Creates an empty bitvector. - BitVector() : Size(0), Capacity(0) { - Bits = nullptr; + const_set_bits_iterator set_bits_begin() const { + return const_set_bits_iterator(*this); + } + const_set_bits_iterator set_bits_end() const { + return const_set_bits_iterator(*this, -1); } + iterator_range set_bits() const { + return make_range(set_bits_begin(), set_bits_end()); + } + + /// BitVector default ctor - Creates an empty bitvector. + BitVector() : Size(0) {} /// BitVector ctor - Creates a bitvector of specified number of bits. All /// bits are initialized to the specified value. explicit BitVector(unsigned s, bool t = false) : Size(s) { - Capacity = NumBitWords(s); - Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord)); - init_words(Bits, Capacity, t); + size_t Capacity = NumBitWords(s); + Bits = allocate(Capacity); + init_words(Bits, t); if (t) clear_unused_bits(); } @@ -92,25 +147,21 @@ public: /// BitVector copy ctor. BitVector(const BitVector &RHS) : Size(RHS.size()) { if (Size == 0) { - Bits = nullptr; - Capacity = 0; + Bits = MutableArrayRef(); return; } - Capacity = NumBitWords(RHS.size()); - Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord)); - std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord)); + size_t Capacity = NumBitWords(RHS.size()); + Bits = allocate(Capacity); + std::memcpy(Bits.data(), RHS.Bits.data(), Capacity * sizeof(BitWord)); } - BitVector(BitVector &&RHS) - : Bits(RHS.Bits), Size(RHS.Size), Capacity(RHS.Capacity) { - RHS.Bits = nullptr; - RHS.Size = RHS.Capacity = 0; + BitVector(BitVector &&RHS) : Bits(RHS.Bits), Size(RHS.Size) { + RHS.Bits = MutableArrayRef(); + RHS.Size = 0; } - ~BitVector() { - std::free(Bits); - } + ~BitVector() { std::free(Bits.data()); } /// empty - Tests whether there are no bits in this bitvector. bool empty() const { return Size == 0; } @@ -152,84 +203,177 @@ public: return !any(); } - /// find_first - Returns the index of the first set bit, -1 if none - /// of the bits are set. - int find_first() const { - for (unsigned i = 0; i < NumBitWords(size()); ++i) - if (Bits[i] != 0) - return i * BITWORD_SIZE + countTrailingZeros(Bits[i]); + /// find_first_in - Returns the index of the first set bit in the range + /// [Begin, End). Returns -1 if all bits in the range are unset. + int find_first_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) + return -1; + + unsigned FirstWord = Begin / BITWORD_SIZE; + unsigned LastWord = (End - 1) / BITWORD_SIZE; + + // Check subsequent words. + for (unsigned i = FirstWord; i <= LastWord; ++i) { + BitWord Copy = Bits[i]; + + if (i == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy &= maskTrailingZeros(FirstBit); + } + + if (i == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy &= maskTrailingOnes(LastBit + 1); + } + if (Copy != 0) + return i * BITWORD_SIZE + countTrailingZeros(Copy); + } return -1; } - /// find_first_unset - Returns the index of the first unset bit, -1 if all - /// of the bits are set. - int find_first_unset() const { - for (unsigned i = 0; i < NumBitWords(size()); ++i) - if (Bits[i] != ~0UL) { - unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Bits[i]); - return Result < size() ? Result : -1; + /// find_last_in - Returns the index of the last set bit in the range + /// [Begin, End). Returns -1 if all bits in the range are unset. + int find_last_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) + return -1; + + unsigned LastWord = (End - 1) / BITWORD_SIZE; + unsigned FirstWord = Begin / BITWORD_SIZE; + + for (unsigned i = LastWord + 1; i >= FirstWord + 1; --i) { + unsigned CurrentWord = i - 1; + + BitWord Copy = Bits[CurrentWord]; + if (CurrentWord == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy &= maskTrailingOnes(LastBit + 1); + } + + if (CurrentWord == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy &= maskTrailingZeros(FirstBit); } + + if (Copy != 0) + return (CurrentWord + 1) * BITWORD_SIZE - countLeadingZeros(Copy) - 1; + } + return -1; } - /// find_next - Returns the index of the next set bit following the - /// "Prev" bit. Returns -1 if the next set bit is not found. - int find_next(unsigned Prev) const { - ++Prev; - if (Prev >= Size) + /// find_first_unset_in - Returns the index of the first unset bit in the + /// range [Begin, End). Returns -1 if all bits in the range are set. + int find_first_unset_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) return -1; - unsigned WordPos = Prev / BITWORD_SIZE; - unsigned BitPos = Prev % BITWORD_SIZE; - BitWord Copy = Bits[WordPos]; - // Mask off previous bits. - Copy &= ~0UL << BitPos; - - if (Copy != 0) - return WordPos * BITWORD_SIZE + countTrailingZeros(Copy); + unsigned FirstWord = Begin / BITWORD_SIZE; + unsigned LastWord = (End - 1) / BITWORD_SIZE; // Check subsequent words. - for (unsigned i = WordPos+1; i < NumBitWords(size()); ++i) - if (Bits[i] != 0) - return i * BITWORD_SIZE + countTrailingZeros(Bits[i]); + for (unsigned i = FirstWord; i <= LastWord; ++i) { + BitWord Copy = Bits[i]; + + if (i == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy |= maskTrailingOnes(FirstBit); + } + + if (i == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy |= maskTrailingZeros(LastBit + 1); + } + if (Copy != ~0UL) { + unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Copy); + return Result < size() ? Result : -1; + } + } return -1; } - /// find_next_unset - Returns the index of the next usnet bit following the - /// "Prev" bit. Returns -1 if all remaining bits are set. - int find_next_unset(unsigned Prev) const { - ++Prev; - if (Prev >= Size) + /// find_last_unset_in - Returns the index of the last unset bit in the + /// range [Begin, End). Returns -1 if all bits in the range are set. + int find_last_unset_in(unsigned Begin, unsigned End) const { + assert(Begin <= End && End <= Size); + if (Begin == End) return -1; - unsigned WordPos = Prev / BITWORD_SIZE; - unsigned BitPos = Prev % BITWORD_SIZE; - BitWord Copy = Bits[WordPos]; - // Mask in previous bits. - BitWord Mask = (1 << BitPos) - 1; - Copy |= Mask; + unsigned LastWord = (End - 1) / BITWORD_SIZE; + unsigned FirstWord = Begin / BITWORD_SIZE; - if (Copy != ~0UL) - return next_unset_in_word(WordPos, Copy); + for (unsigned i = LastWord + 1; i >= FirstWord + 1; --i) { + unsigned CurrentWord = i - 1; - // Check subsequent words. - for (unsigned i = WordPos + 1; i < NumBitWords(size()); ++i) - if (Bits[i] != ~0UL) - return next_unset_in_word(i, Bits[i]); + BitWord Copy = Bits[CurrentWord]; + if (CurrentWord == LastWord) { + unsigned LastBit = (End - 1) % BITWORD_SIZE; + Copy |= maskTrailingZeros(LastBit + 1); + } + + if (CurrentWord == FirstWord) { + unsigned FirstBit = Begin % BITWORD_SIZE; + Copy |= maskTrailingOnes(FirstBit); + } + + if (Copy != ~0UL) { + unsigned Result = + (CurrentWord + 1) * BITWORD_SIZE - countLeadingOnes(Copy) - 1; + return Result < Size ? Result : -1; + } + } return -1; } - /// clear - Clear all bits. + /// find_first - Returns the index of the first set bit, -1 if none + /// of the bits are set. + int find_first() const { return find_first_in(0, Size); } + + /// find_last - Returns the index of the last set bit, -1 if none of the bits + /// are set. + int find_last() const { return find_last_in(0, Size); } + + /// find_next - Returns the index of the next set bit following the + /// "Prev" bit. Returns -1 if the next set bit is not found. + int find_next(unsigned Prev) const { return find_first_in(Prev + 1, Size); } + + /// find_prev - Returns the index of the first set bit that precedes the + /// the bit at \p PriorTo. Returns -1 if all previous bits are unset. + int find_prev(unsigned PriorTo) const { return find_last_in(0, PriorTo); } + + /// find_first_unset - Returns the index of the first unset bit, -1 if all + /// of the bits are set. + int find_first_unset() const { return find_first_unset_in(0, Size); } + + /// find_next_unset - Returns the index of the next unset bit following the + /// "Prev" bit. Returns -1 if all remaining bits are set. + int find_next_unset(unsigned Prev) const { + return find_first_unset_in(Prev + 1, Size); + } + + /// find_last_unset - Returns the index of the last unset bit, -1 if all of + /// the bits are set. + int find_last_unset() const { return find_last_unset_in(0, Size); } + + /// find_prev_unset - Returns the index of the first unset bit that precedes + /// the bit at \p PriorTo. Returns -1 if all previous bits are set. + int find_prev_unset(unsigned PriorTo) { + return find_last_unset_in(0, PriorTo); + } + + /// clear - Removes all bits from the bitvector. Does not change capacity. void clear() { Size = 0; } /// resize - Grow or shrink the bitvector. void resize(unsigned N, bool t = false) { - if (N > Capacity * BITWORD_SIZE) { - unsigned OldCapacity = Capacity; + if (N > getBitCapacity()) { + unsigned OldCapacity = Bits.size(); grow(N); - init_words(&Bits[OldCapacity], (Capacity-OldCapacity), t); + init_words(Bits.drop_front(OldCapacity), t); } // Set any old unused bits that are now included in the BitVector. This @@ -246,19 +390,19 @@ public: } void reserve(unsigned N) { - if (N > Capacity * BITWORD_SIZE) + if (N > getBitCapacity()) grow(N); } // Set, reset, flip BitVector &set() { - init_words(Bits, Capacity, true); + init_words(Bits, true); clear_unused_bits(); return *this; } BitVector &set(unsigned Idx) { - assert(Bits && "Bits never allocated"); + assert(Bits.data() && "Bits never allocated"); Bits[Idx / BITWORD_SIZE] |= BitWord(1) << (Idx % BITWORD_SIZE); return *this; } @@ -293,7 +437,7 @@ public: } BitVector &reset() { - init_words(Bits, Capacity, false); + init_words(Bits, false); return *this; } @@ -455,27 +599,126 @@ public: return *this; } + BitVector &operator>>=(unsigned N) { + assert(N <= Size); + if (LLVM_UNLIKELY(empty() || N == 0)) + return *this; + + unsigned NumWords = NumBitWords(Size); + assert(NumWords >= 1); + + wordShr(N / BITWORD_SIZE); + + unsigned BitDistance = N % BITWORD_SIZE; + if (BitDistance == 0) + return *this; + + // When the shift size is not a multiple of the word size, then we have + // a tricky situation where each word in succession needs to extract some + // of the bits from the next word and or them into this word while + // shifting this word to make room for the new bits. This has to be done + // for every word in the array. + + // Since we're shifting each word right, some bits will fall off the end + // of each word to the right, and empty space will be created on the left. + // The final word in the array will lose bits permanently, so starting at + // the beginning, work forwards shifting each word to the right, and + // OR'ing in the bits from the end of the next word to the beginning of + // the current word. + + // Example: + // Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting right + // by 4 bits. + // Step 1: Word[0] >>= 4 ; 0x0ABBCCDD + // Step 2: Word[0] |= 0x10000000 ; 0x1ABBCCDD + // Step 3: Word[1] >>= 4 ; 0x0EEFF001 + // Step 4: Word[1] |= 0x50000000 ; 0x5EEFF001 + // Step 5: Word[2] >>= 4 ; 0x02334455 + // Result: { 0x1ABBCCDD, 0x5EEFF001, 0x02334455 } + const BitWord Mask = maskTrailingOnes(BitDistance); + const unsigned LSH = BITWORD_SIZE - BitDistance; + + for (unsigned I = 0; I < NumWords - 1; ++I) { + Bits[I] >>= BitDistance; + Bits[I] |= (Bits[I + 1] & Mask) << LSH; + } + + Bits[NumWords - 1] >>= BitDistance; + + return *this; + } + + BitVector &operator<<=(unsigned N) { + assert(N <= Size); + if (LLVM_UNLIKELY(empty() || N == 0)) + return *this; + + unsigned NumWords = NumBitWords(Size); + assert(NumWords >= 1); + + wordShl(N / BITWORD_SIZE); + + unsigned BitDistance = N % BITWORD_SIZE; + if (BitDistance == 0) + return *this; + + // When the shift size is not a multiple of the word size, then we have + // a tricky situation where each word in succession needs to extract some + // of the bits from the previous word and or them into this word while + // shifting this word to make room for the new bits. This has to be done + // for every word in the array. This is similar to the algorithm outlined + // in operator>>=, but backwards. + + // Since we're shifting each word left, some bits will fall off the end + // of each word to the left, and empty space will be created on the right. + // The first word in the array will lose bits permanently, so starting at + // the end, work backwards shifting each word to the left, and OR'ing + // in the bits from the end of the next word to the beginning of the + // current word. + + // Example: + // Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting left + // by 4 bits. + // Step 1: Word[2] <<= 4 ; 0x23344550 + // Step 2: Word[2] |= 0x0000000E ; 0x2334455E + // Step 3: Word[1] <<= 4 ; 0xEFF00110 + // Step 4: Word[1] |= 0x0000000A ; 0xEFF0011A + // Step 5: Word[0] <<= 4 ; 0xABBCCDD0 + // Result: { 0xABBCCDD0, 0xEFF0011A, 0x2334455E } + const BitWord Mask = maskLeadingOnes(BitDistance); + const unsigned RSH = BITWORD_SIZE - BitDistance; + + for (int I = NumWords - 1; I > 0; --I) { + Bits[I] <<= BitDistance; + Bits[I] |= (Bits[I - 1] & Mask) >> RSH; + } + Bits[0] <<= BitDistance; + clear_unused_bits(); + + return *this; + } + // Assignment operator. const BitVector &operator=(const BitVector &RHS) { if (this == &RHS) return *this; Size = RHS.size(); unsigned RHSWords = NumBitWords(Size); - if (Size <= Capacity * BITWORD_SIZE) { + if (Size <= getBitCapacity()) { if (Size) - std::memcpy(Bits, RHS.Bits, RHSWords * sizeof(BitWord)); + std::memcpy(Bits.data(), RHS.Bits.data(), RHSWords * sizeof(BitWord)); clear_unused_bits(); return *this; } // Grow the bitvector to have enough elements. - Capacity = RHSWords; - assert(Capacity > 0 && "negative capacity?"); - BitWord *NewBits = (BitWord *)std::malloc(Capacity * sizeof(BitWord)); - std::memcpy(NewBits, RHS.Bits, Capacity * sizeof(BitWord)); + unsigned NewCapacity = RHSWords; + assert(NewCapacity > 0 && "negative capacity?"); + auto NewBits = allocate(NewCapacity); + std::memcpy(NewBits.data(), RHS.Bits.data(), NewCapacity * sizeof(BitWord)); // Destroy the old bits. - std::free(Bits); + std::free(Bits.data()); Bits = NewBits; return *this; @@ -484,13 +727,12 @@ public: const BitVector &operator=(BitVector &&RHS) { if (this == &RHS) return *this; - std::free(Bits); + std::free(Bits.data()); Bits = RHS.Bits; Size = RHS.Size; - Capacity = RHS.Capacity; - RHS.Bits = nullptr; - RHS.Size = RHS.Capacity = 0; + RHS.Bits = MutableArrayRef(); + RHS.Size = 0; return *this; } @@ -498,7 +740,6 @@ public: void swap(BitVector &RHS) { std::swap(Bits, RHS.Bits); std::swap(Size, RHS.Size); - std::swap(Capacity, RHS.Capacity); } //===--------------------------------------------------------------------===// @@ -538,6 +779,59 @@ public: } private: + /// \brief Perform a logical left shift of \p Count words by moving everything + /// \p Count words to the right in memory. + /// + /// While confusing, words are stored from least significant at Bits[0] to + /// most significant at Bits[NumWords-1]. A logical shift left, however, + /// moves the current least significant bit to a higher logical index, and + /// fills the previous least significant bits with 0. Thus, we actually + /// need to move the bytes of the memory to the right, not to the left. + /// Example: + /// Words = [0xBBBBAAAA, 0xDDDDFFFF, 0x00000000, 0xDDDD0000] + /// represents a BitVector where 0xBBBBAAAA contain the least significant + /// bits. So if we want to shift the BitVector left by 2 words, we need to + /// turn this into 0x00000000 0x00000000 0xBBBBAAAA 0xDDDDFFFF by using a + /// memmove which moves right, not left. + void wordShl(uint32_t Count) { + if (Count == 0) + return; + + uint32_t NumWords = NumBitWords(Size); + + auto Src = Bits.take_front(NumWords).drop_back(Count); + auto Dest = Bits.take_front(NumWords).drop_front(Count); + + // Since we always move Word-sized chunks of data with src and dest both + // aligned to a word-boundary, we don't need to worry about endianness + // here. + std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord)); + std::memset(Bits.data(), 0, Count * sizeof(BitWord)); + clear_unused_bits(); + } + + /// \brief Perform a logical right shift of \p Count words by moving those + /// words to the left in memory. See wordShl for more information. + /// + void wordShr(uint32_t Count) { + if (Count == 0) + return; + + uint32_t NumWords = NumBitWords(Size); + + auto Src = Bits.take_front(NumWords).drop_front(Count); + auto Dest = Bits.take_front(NumWords).drop_back(Count); + assert(Dest.size() == Src.size()); + + std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord)); + std::memset(Dest.end(), 0, Count * sizeof(BitWord)); + } + + MutableArrayRef allocate(size_t NumWords) { + BitWord *RawBits = (BitWord *)std::malloc(NumWords * sizeof(BitWord)); + return MutableArrayRef(RawBits, NumWords); + } + int next_unset_in_word(int WordIndex, BitWord Word) const { unsigned Result = WordIndex * BITWORD_SIZE + countTrailingOnes(Word); return Result < size() ? Result : -1; @@ -551,8 +845,8 @@ private: void set_unused_bits(bool t = true) { // Set high words first. unsigned UsedWords = NumBitWords(Size); - if (Capacity > UsedWords) - init_words(&Bits[UsedWords], (Capacity-UsedWords), t); + if (Bits.size() > UsedWords) + init_words(Bits.drop_front(UsedWords), t); // Then set any stray high bits of the last used word. unsigned ExtraBits = Size % BITWORD_SIZE; @@ -571,16 +865,17 @@ private: } void grow(unsigned NewSize) { - Capacity = std::max(NumBitWords(NewSize), Capacity * 2); - assert(Capacity > 0 && "realloc-ing zero space"); - Bits = (BitWord *)std::realloc(Bits, Capacity * sizeof(BitWord)); - + size_t NewCapacity = std::max(NumBitWords(NewSize), Bits.size() * 2); + assert(NewCapacity > 0 && "realloc-ing zero space"); + BitWord *NewBits = + (BitWord *)std::realloc(Bits.data(), NewCapacity * sizeof(BitWord)); + Bits = MutableArrayRef(NewBits, NewCapacity); clear_unused_bits(); } - void init_words(BitWord *B, unsigned NumWords, bool t) { - if (NumWords > 0) - memset(B, 0 - (int)t, NumWords*sizeof(BitWord)); + void init_words(MutableArrayRef B, bool t) { + if (B.size() > 0) + memset(B.data(), 0 - (int)t, B.size() * sizeof(BitWord)); } template @@ -612,7 +907,8 @@ private: public: /// Return the size (in bytes) of the bit vector. - size_t getMemorySize() const { return Capacity * sizeof(BitWord); } + size_t getMemorySize() const { return Bits.size() * sizeof(BitWord); } + size_t getBitCapacity() const { return Bits.size() * BITWORD_SIZE; } }; static inline size_t capacity_in_bytes(const BitVector &X) { diff --git a/include/llvm/ADT/BreadthFirstIterator.h b/include/llvm/ADT/BreadthFirstIterator.h index eaeecb6e057ffecf0e506193a205a9e3e51f6c56..6bc63c283b0975be2b883a68366147b988275998 100644 --- a/include/llvm/ADT/BreadthFirstIterator.h +++ b/include/llvm/ADT/BreadthFirstIterator.h @@ -25,7 +25,6 @@ #include "llvm/ADT/iterator_range.h" #include #include -#include #include namespace llvm { @@ -49,13 +48,13 @@ template , public bf_iterator_storage { - typedef std::iterator super; + using super = std::iterator; - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; // First element is the node reference, second is the next child to visit. - typedef std::pair> QueueElement; + using QueueElement = std::pair>; // Visit queue - used to maintain BFS ordering. // Optional<> because we need markers for levels. @@ -109,7 +108,7 @@ private: } public: - typedef typename super::pointer pointer; + using pointer = typename super::pointer; // Provide static begin and end methods as our public "constructors" static bf_iterator begin(const GraphT &G) { diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h index 5ea0fe872868247c75197892253b3dc9d3195d0a..41fdd43efb8a3aae2d4bb8b1ee3d4e0c0d597c86 100644 --- a/include/llvm/ADT/DAGDeltaAlgorithm.h +++ b/include/llvm/ADT/DAGDeltaAlgorithm.h @@ -1,4 +1,4 @@ -//===--- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ----*- C++ -*--===// +//===- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -40,12 +40,12 @@ class DAGDeltaAlgorithm { virtual void anchor(); public: - typedef unsigned change_ty; - typedef std::pair edge_ty; + using change_ty = unsigned; + using edge_ty = std::pair; // FIXME: Use a decent data structure. - typedef std::set changeset_ty; - typedef std::vector changesetlist_ty; + using changeset_ty = std::set; + using changesetlist_ty = std::vector; public: virtual ~DAGDeltaAlgorithm() = default; diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h index a26f37dfdc7dcee97a4f5e1f9afd7cdb061ac026..6becb2a6010446dad8c0ba26e54a26d43dcd5d62 100644 --- a/include/llvm/ADT/DeltaAlgorithm.h +++ b/include/llvm/ADT/DeltaAlgorithm.h @@ -1,4 +1,4 @@ -//===--- DeltaAlgorithm.h - A Set Minimization Algorithm -------*- C++ -*--===// +//===- DeltaAlgorithm.h - A Set Minimization Algorithm ---------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -35,10 +35,10 @@ namespace llvm { /// predicate. class DeltaAlgorithm { public: - typedef unsigned change_ty; + using change_ty = unsigned; // FIXME: Use a decent data structure. - typedef std::set changeset_ty; - typedef std::vector changesetlist_ty; + using changeset_ty = std::set; + using changesetlist_ty = std::vector; private: /// Cache of failed test results. Successful test results are never cached @@ -90,4 +90,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_ADT_DELTAALGORITHM_H diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index fd8d3bf368a8866f52ceaa41da6e98b51b94a3b9..b311e69ec9d378c21cc538cf4c4d08d2ec746488 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -25,8 +25,8 @@ #include #include #include -#include #include +#include #include namespace llvm { @@ -57,14 +57,15 @@ class DenseMapBase : public DebugEpochBase { using const_arg_type_t = typename const_pointer_or_const_ref::type; public: - typedef unsigned size_type; - typedef KeyT key_type; - typedef ValueT mapped_type; - typedef BucketT value_type; - - typedef DenseMapIterator iterator; - typedef DenseMapIterator - const_iterator; + using size_type = unsigned; + using key_type = KeyT; + using mapped_type = ValueT; + using value_type = BucketT; + + using iterator = DenseMapIterator; + using const_iterator = + DenseMapIterator; + inline iterator begin() { // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets(). return empty() ? end() : iterator(getBuckets(), getBucketsEnd(), *this); @@ -387,15 +388,18 @@ protected: static unsigned getHashValue(const KeyT &Val) { return KeyInfoT::getHashValue(Val); } + template static unsigned getHashValue(const LookupKeyT &Val) { return KeyInfoT::getHashValue(Val); } + static const KeyT getEmptyKey() { static_assert(std::is_base_of::value, "Must pass the derived type to this template!"); return KeyInfoT::getEmptyKey(); } + static const KeyT getTombstoneKey() { return KeyInfoT::getTombstoneKey(); } @@ -404,39 +408,51 @@ private: unsigned getNumEntries() const { return static_cast(this)->getNumEntries(); } + void setNumEntries(unsigned Num) { static_cast(this)->setNumEntries(Num); } + void incrementNumEntries() { setNumEntries(getNumEntries() + 1); } + void decrementNumEntries() { setNumEntries(getNumEntries() - 1); } + unsigned getNumTombstones() const { return static_cast(this)->getNumTombstones(); } + void setNumTombstones(unsigned Num) { static_cast(this)->setNumTombstones(Num); } + void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); } + void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); } + const BucketT *getBuckets() const { return static_cast(this)->getBuckets(); } + BucketT *getBuckets() { return static_cast(this)->getBuckets(); } + unsigned getNumBuckets() const { return static_cast(this)->getNumBuckets(); } + BucketT *getBucketsEnd() { return getBuckets() + getNumBuckets(); } + const BucketT *getBucketsEnd() const { return getBuckets() + getNumBuckets(); } @@ -587,10 +603,11 @@ template > class DenseMap : public DenseMapBase, KeyT, ValueT, KeyInfoT, BucketT> { + friend class DenseMapBase; + // Lift some types from the dependent base class into this class for // simplicity of referring to them. - typedef DenseMapBase BaseT; - friend class DenseMapBase; + using BaseT = DenseMapBase; BucketT *Buckets; unsigned NumEntries; @@ -705,6 +722,7 @@ private: unsigned getNumEntries() const { return NumEntries; } + void setNumEntries(unsigned Num) { NumEntries = Num; } @@ -712,6 +730,7 @@ private: unsigned getNumTombstones() const { return NumTombstones; } + void setNumTombstones(unsigned Num) { NumTombstones = Num; } @@ -743,10 +762,12 @@ class SmallDenseMap : public DenseMapBase< SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT> { + friend class DenseMapBase; + // Lift some types from the dependent base class into this class for // simplicity of referring to them. - typedef DenseMapBase BaseT; - friend class DenseMapBase; + using BaseT = DenseMapBase; + static_assert(isPowerOf2_64(InlineBuckets), "InlineBuckets must be a power of 2."); @@ -972,6 +993,7 @@ private: unsigned getNumEntries() const { return NumEntries; } + void setNumEntries(unsigned Num) { // NumEntries is hardcoded to be 31 bits wide. assert(Num < (1U << 31) && "Cannot support more than 1<<31 entries"); @@ -981,6 +1003,7 @@ private: unsigned getNumTombstones() const { return NumTombstones; } + void setNumTombstones(unsigned Num) { NumTombstones = Num; } @@ -992,15 +1015,18 @@ private: // 'storage.buffer' static type is 'char *'. return reinterpret_cast(storage.buffer); } + BucketT *getInlineBuckets() { return const_cast( const_cast(this)->getInlineBuckets()); } + const LargeRep *getLargeRep() const { assert(!Small); // Note, same rule about aliasing as with getInlineBuckets. return reinterpret_cast(storage.buffer); } + LargeRep *getLargeRep() { return const_cast( const_cast(this)->getLargeRep()); @@ -1009,10 +1035,12 @@ private: const BucketT *getBuckets() const { return Small ? getInlineBuckets() : getLargeRep()->Buckets; } + BucketT *getBuckets() { return const_cast( const_cast(this)->getBuckets()); } + unsigned getNumBuckets() const { return Small ? InlineBuckets : getLargeRep()->NumBuckets; } @@ -1037,23 +1065,25 @@ private: template class DenseMapIterator : DebugEpochBase::HandleBase { - typedef DenseMapIterator ConstIterator; friend class DenseMapIterator; friend class DenseMapIterator; + using ConstIterator = DenseMapIterator; + public: - typedef ptrdiff_t difference_type; - typedef typename std::conditional::type - value_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef std::forward_iterator_tag iterator_category; + using difference_type = ptrdiff_t; + using value_type = + typename std::conditional::type; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::forward_iterator_tag; private: - pointer Ptr, End; + pointer Ptr = nullptr; + pointer End = nullptr; public: - DenseMapIterator() : Ptr(nullptr), End(nullptr) {} + DenseMapIterator() = default; DenseMapIterator(pointer Pos, pointer E, const DebugEpochBase &Epoch, bool NoAdvance = false) diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h index bb973ac65063428ed5ea8e7e2059a413961b885c..a96904c7dbbf6ed7c8e701a4bed0012b231a2d15 100644 --- a/include/llvm/ADT/DenseMapInfo.h +++ b/include/llvm/ADT/DenseMapInfo.h @@ -18,7 +18,10 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm/Support/type_traits.h" +#include +#include +#include +#include namespace llvm { @@ -38,15 +41,18 @@ struct DenseMapInfo { Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } + static inline T* getTombstoneKey() { uintptr_t Val = static_cast(-2); Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } + static unsigned getHashValue(const T *PtrVal) { return (unsigned((uintptr_t)PtrVal) >> 4) ^ (unsigned((uintptr_t)PtrVal) >> 9); } + static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; } }; @@ -55,6 +61,7 @@ template<> struct DenseMapInfo { static inline char getEmptyKey() { return ~0; } static inline char getTombstoneKey() { return ~0 - 1; } static unsigned getHashValue(const char& Val) { return Val * 37U; } + static bool isEqual(const char &LHS, const char &RHS) { return LHS == RHS; } @@ -65,6 +72,7 @@ template <> struct DenseMapInfo { static inline unsigned short getEmptyKey() { return 0xFFFF; } static inline unsigned short getTombstoneKey() { return 0xFFFF - 1; } static unsigned getHashValue(const unsigned short &Val) { return Val * 37U; } + static bool isEqual(const unsigned short &LHS, const unsigned short &RHS) { return LHS == RHS; } @@ -75,6 +83,7 @@ template<> struct DenseMapInfo { static inline unsigned getEmptyKey() { return ~0U; } static inline unsigned getTombstoneKey() { return ~0U - 1; } static unsigned getHashValue(const unsigned& Val) { return Val * 37U; } + static bool isEqual(const unsigned& LHS, const unsigned& RHS) { return LHS == RHS; } @@ -84,9 +93,11 @@ template<> struct DenseMapInfo { template<> struct DenseMapInfo { static inline unsigned long getEmptyKey() { return ~0UL; } static inline unsigned long getTombstoneKey() { return ~0UL - 1L; } + static unsigned getHashValue(const unsigned long& Val) { return (unsigned)(Val * 37UL); } + static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) { return LHS == RHS; } @@ -96,9 +107,11 @@ template<> struct DenseMapInfo { template<> struct DenseMapInfo { static inline unsigned long long getEmptyKey() { return ~0ULL; } static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; } + static unsigned getHashValue(const unsigned long long& Val) { return (unsigned)(Val * 37ULL); } + static bool isEqual(const unsigned long long& LHS, const unsigned long long& RHS) { return LHS == RHS; @@ -118,6 +131,7 @@ template<> struct DenseMapInfo { static inline int getEmptyKey() { return 0x7fffffff; } static inline int getTombstoneKey() { return -0x7fffffff - 1; } static unsigned getHashValue(const int& Val) { return (unsigned)(Val * 37U); } + static bool isEqual(const int& LHS, const int& RHS) { return LHS == RHS; } @@ -128,10 +142,13 @@ template<> struct DenseMapInfo { static inline long getEmptyKey() { return (1UL << (sizeof(long) * 8 - 1)) - 1UL; } + static inline long getTombstoneKey() { return getEmptyKey() - 1L; } + static unsigned getHashValue(const long& Val) { return (unsigned)(Val * 37UL); } + static bool isEqual(const long& LHS, const long& RHS) { return LHS == RHS; } @@ -141,9 +158,11 @@ template<> struct DenseMapInfo { template<> struct DenseMapInfo { static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; } static inline long long getTombstoneKey() { return -0x7fffffffffffffffLL-1; } + static unsigned getHashValue(const long long& Val) { return (unsigned)(Val * 37ULL); } + static bool isEqual(const long long& LHS, const long long& RHS) { return LHS == RHS; @@ -152,19 +171,21 @@ template<> struct DenseMapInfo { // Provide DenseMapInfo for all pairs whose members have info. template -struct DenseMapInfo > { - typedef std::pair Pair; - typedef DenseMapInfo FirstInfo; - typedef DenseMapInfo SecondInfo; +struct DenseMapInfo> { + using Pair = std::pair; + using FirstInfo = DenseMapInfo; + using SecondInfo = DenseMapInfo; static inline Pair getEmptyKey() { return std::make_pair(FirstInfo::getEmptyKey(), SecondInfo::getEmptyKey()); } + static inline Pair getTombstoneKey() { return std::make_pair(FirstInfo::getTombstoneKey(), SecondInfo::getTombstoneKey()); } + static unsigned getHashValue(const Pair& PairVal) { uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32 | (uint64_t)SecondInfo::getHashValue(PairVal.second); @@ -178,6 +199,7 @@ struct DenseMapInfo > { key ^= (key >> 31); return (unsigned)key; } + static bool isEqual(const Pair &LHS, const Pair &RHS) { return FirstInfo::isEqual(LHS.first, RHS.first) && SecondInfo::isEqual(LHS.second, RHS.second); @@ -190,16 +212,19 @@ template <> struct DenseMapInfo { return StringRef(reinterpret_cast(~static_cast(0)), 0); } + static inline StringRef getTombstoneKey() { return StringRef(reinterpret_cast(~static_cast(1)), 0); } + static unsigned getHashValue(StringRef Val) { assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!"); assert(Val.data() != getTombstoneKey().data() && "Cannot hash the tombstone key!"); return (unsigned)(hash_value(Val)); } + static bool isEqual(StringRef LHS, StringRef RHS) { if (RHS.data() == getEmptyKey().data()) return LHS.data() == getEmptyKey().data(); @@ -215,16 +240,19 @@ template struct DenseMapInfo> { return ArrayRef(reinterpret_cast(~static_cast(0)), size_t(0)); } + static inline ArrayRef getTombstoneKey() { return ArrayRef(reinterpret_cast(~static_cast(1)), size_t(0)); } + static unsigned getHashValue(ArrayRef Val) { assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!"); assert(Val.data() != getTombstoneKey().data() && "Cannot hash the tombstone key!"); return (unsigned)(hash_value(Val)); } + static bool isEqual(ArrayRef LHS, ArrayRef RHS) { if (RHS.data() == getEmptyKey().data()) return LHS.data() == getEmptyKey().data(); @@ -236,4 +264,4 @@ template struct DenseMapInfo> { } // end namespace llvm -#endif +#endif // LLVM_ADT_DENSEMAPINFO_H diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h index fcf304c3ecc41060846c2f327515a04536a25917..7e5171c3f3a44559f5c488b1dc9c707fa45b86f9 100644 --- a/include/llvm/ADT/DenseSet.h +++ b/include/llvm/ADT/DenseSet.h @@ -15,11 +15,18 @@ #define LLVM_ADT_DENSESET_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/type_traits.h" +#include +#include #include +#include +#include namespace llvm { namespace detail { + struct DenseSetEmpty {}; // Use the empty base class trick so we can create a DenseMap where the buckets @@ -48,13 +55,14 @@ class DenseSetImpl { static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT), "DenseMap buckets unexpectedly large!"); MapTy TheMap; + template using const_arg_type_t = typename const_pointer_or_const_ref::type; public: - typedef ValueT key_type; - typedef ValueT value_type; - typedef unsigned size_type; + using key_type = ValueT; + using value_type = ValueT; + using size_type = unsigned; explicit DenseSetImpl(unsigned InitialReserve = 0) : TheMap(InitialReserve) {} @@ -100,11 +108,11 @@ public: friend class ConstIterator; public: - typedef typename MapTy::iterator::difference_type difference_type; - typedef ValueT value_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef std::forward_iterator_tag iterator_category; + using difference_type = typename MapTy::iterator::difference_type; + using value_type = ValueT; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::forward_iterator_tag; Iterator() = default; Iterator(const typename MapTy::iterator &i) : I(i) {} @@ -126,16 +134,14 @@ public: friend class Iterator; public: - typedef typename MapTy::const_iterator::difference_type difference_type; - typedef ValueT value_type; - typedef value_type *pointer; - typedef value_type &reference; - typedef std::forward_iterator_tag iterator_category; - - ConstIterator(const Iterator &B) : I(B.I) {} + using difference_type = typename MapTy::const_iterator::difference_type; + using value_type = ValueT; + using pointer = value_type *; + using reference = value_type &; + using iterator_category = std::forward_iterator_tag; ConstIterator() = default; - + ConstIterator(const Iterator &B) : I(B.I) {} ConstIterator(const typename MapTy::const_iterator &i) : I(i) {} const ValueT &operator*() const { return I->getFirst(); } @@ -147,8 +153,8 @@ public: bool operator!=(const ConstIterator& X) const { return I != X.I; } }; - typedef Iterator iterator; - typedef ConstIterator const_iterator; + using iterator = Iterator; + using const_iterator = ConstIterator; iterator begin() { return Iterator(TheMap.begin()); } iterator end() { return Iterator(TheMap.end()); } @@ -208,7 +214,7 @@ public: } }; -} // namespace detail +} // end namespace detail /// Implements a dense probed hash-table based set. template > @@ -246,4 +252,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_ADT_DENSESET_H diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h index b020d48cb3f082d4366da13498ea8a84fcb80baa..e964d7fa23911d1867d53ce0317688effcc4ea78 100644 --- a/include/llvm/ADT/DepthFirstIterator.h +++ b/include/llvm/ADT/DepthFirstIterator.h @@ -68,13 +68,14 @@ public: // cross edges in the spanning tree but is not used in the common case. template struct df_iterator_default_set : public SmallPtrSet { - typedef SmallPtrSet BaseSet; - typedef typename BaseSet::iterator iterator; - std::pair insert(NodeRef N) { return BaseSet::insert(N) ; } + using BaseSet = SmallPtrSet; + using iterator = typename BaseSet::iterator; + + std::pair insert(NodeRef N) { return BaseSet::insert(N); } template void insert(IterT Begin, IterT End) { BaseSet::insert(Begin,End); } - void completed(NodeRef) { } + void completed(NodeRef) {} }; // Generic Depth First Iterator @@ -85,15 +86,14 @@ template , public df_iterator_storage { - typedef std::iterator super; - - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; + using super = std::iterator; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; // First element is node reference, second is the 'next child' to visit. // The second child is initialized lazily to pick up graph changes during the // DFS. - typedef std::pair> StackElement; + using StackElement = std::pair>; // VisitStack - Used to maintain the ordering. Top = current block std::vector VisitStack; @@ -103,12 +103,15 @@ private: this->Visited.insert(Node); VisitStack.push_back(StackElement(Node, None)); } + inline df_iterator() = default; // End is when stack is empty + inline df_iterator(NodeRef Node, SetType &S) : df_iterator_storage(S) { if (this->Visited.insert(Node).second) VisitStack.push_back(StackElement(Node, None)); } + inline df_iterator(SetType &S) : df_iterator_storage(S) { // End is when stack is empty @@ -142,7 +145,7 @@ private: } public: - typedef typename super::pointer pointer; + using pointer = typename super::pointer; // Provide static begin and end methods as our public "constructors" static df_iterator begin(const GraphT &G) { diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h index 8fcac178ffc9744d3e2204e58ab25a9c7780ae71..af293d4c1422a657161eb5c51a82ffe6ad193696 100644 --- a/include/llvm/ADT/EquivalenceClasses.h +++ b/include/llvm/ADT/EquivalenceClasses.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes --*- C++ -*-===// +//===- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -69,6 +69,7 @@ class EquivalenceClasses { /// leader is determined by a bit stolen from one of the pointers. class ECValue { friend class EquivalenceClasses; + mutable const ECValue *Leader, *Next; ElemTy Data; @@ -141,14 +142,14 @@ public: // /// iterator* - Provides a way to iterate over all values in the set. - typedef typename std::set::const_iterator iterator; + using iterator = typename std::set::const_iterator; + iterator begin() const { return TheMapping.begin(); } iterator end() const { return TheMapping.end(); } bool empty() const { return TheMapping.empty(); } /// member_* Iterate over the members of an equivalence class. - /// class member_iterator; member_iterator member_begin(iterator I) const { // Only leaders provide anything to iterate over. @@ -204,7 +205,6 @@ public: /// equivalence class it is in. This does the path-compression part that /// makes union-find "union findy". This returns an end iterator if the value /// is not in the equivalence class. - /// member_iterator findLeader(iterator I) const { if (I == TheMapping.end()) return member_end(); return member_iterator(I->getLeader()); @@ -241,15 +241,17 @@ public: class member_iterator : public std::iterator { - typedef std::iterator super; - const ECValue *Node; friend class EquivalenceClasses; + using super = std::iterator; + + const ECValue *Node; + public: - typedef size_t size_type; - typedef typename super::pointer pointer; - typedef typename super::reference reference; + using size_type = size_t; + using pointer = typename super::pointer; + using reference = typename super::reference; explicit member_iterator() = default; explicit member_iterator(const ECValue *N) : Node(N) {} diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h index dab18297dd3b40acbb50da40fd9e3781862de8dd..c5987a947e18249ad832e183c906bc77d60e073d 100644 --- a/include/llvm/ADT/FoldingSet.h +++ b/include/llvm/ADT/FoldingSet.h @@ -40,7 +40,7 @@ namespace llvm { /// FoldingSetNode. The node class must also define a Profile method used to /// establish the unique bits of data for the node. The Profile method is /// passed a FoldingSetNodeID object which is used to gather the bits. Just -/// call one of the Add* functions defined in the FoldingSetImpl::NodeID class. +/// call one of the Add* functions defined in the FoldingSetBase::NodeID class. /// NOTE: That the folding set does not own the nodes and it is the /// responsibility of the user to dispose of the nodes. /// @@ -104,13 +104,13 @@ class FoldingSetNodeID; class StringRef; //===----------------------------------------------------------------------===// -/// FoldingSetImpl - Implements the folding set functionality. The main +/// FoldingSetBase - Implements the folding set functionality. The main /// structure is an array of buckets. Each bucket is indexed by the hash of /// the nodes it contains. The bucket itself points to the nodes contained /// in the bucket via a singly linked list. The last node in the list points /// back to the bucket to facilitate node removal. /// -class FoldingSetImpl { +class FoldingSetBase { virtual void anchor(); // Out of line virtual method. protected: @@ -126,10 +126,10 @@ protected: /// is greater than twice the number of buckets. unsigned NumNodes; - explicit FoldingSetImpl(unsigned Log2InitSize = 6); - FoldingSetImpl(FoldingSetImpl &&Arg); - FoldingSetImpl &operator=(FoldingSetImpl &&RHS); - ~FoldingSetImpl(); + explicit FoldingSetBase(unsigned Log2InitSize = 6); + FoldingSetBase(FoldingSetBase &&Arg); + FoldingSetBase &operator=(FoldingSetBase &&RHS); + ~FoldingSetBase(); public: //===--------------------------------------------------------------------===// @@ -152,33 +152,6 @@ public: /// clear - Remove all nodes from the folding set. void clear(); - /// RemoveNode - Remove a node from the folding set, returning true if one - /// was removed or false if the node was not in the folding set. - bool RemoveNode(Node *N); - - /// GetOrInsertNode - If there is an existing simple Node exactly - /// equal to the specified node, return it. Otherwise, insert 'N' and return - /// it instead. - Node *GetOrInsertNode(Node *N); - - /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, - /// return it. If not, return the insertion token that will make insertion - /// faster. - Node *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos); - - /// InsertNode - Insert the specified node into the folding set, knowing that - /// it is not already in the folding set. InsertPos must be obtained from - /// FindNodeOrInsertPos. - void InsertNode(Node *N, void *InsertPos); - - /// InsertNode - Insert the specified node into the folding set, knowing that - /// it is not already in the folding set. - void InsertNode(Node *N) { - Node *Inserted = GetOrInsertNode(N); - (void)Inserted; - assert(Inserted == N && "Node already inserted!"); - } - /// size - Returns the number of nodes in the folding set. unsigned size() const { return NumNodes; } @@ -220,6 +193,28 @@ protected: /// ComputeNodeHash - Instantiations of the FoldingSet template implement /// this function to compute a hash value for the given node. virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const = 0; + + // The below methods are protected to encourage subclasses to provide a more + // type-safe API. + + /// RemoveNode - Remove a node from the folding set, returning true if one + /// was removed or false if the node was not in the folding set. + bool RemoveNode(Node *N); + + /// GetOrInsertNode - If there is an existing simple Node exactly + /// equal to the specified node, return it. Otherwise, insert 'N' and return + /// it instead. + Node *GetOrInsertNode(Node *N); + + /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, + /// return it. If not, return the insertion token that will make insertion + /// faster. + Node *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos); + + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. InsertPos must be obtained from + /// FindNodeOrInsertPos. + void InsertNode(Node *N, void *InsertPos); }; //===----------------------------------------------------------------------===// @@ -293,7 +288,7 @@ public: FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {} /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, - /// used to lookup the node in the FoldingSetImpl. + /// used to lookup the node in the FoldingSetBase. unsigned ComputeHash() const; bool operator==(FoldingSetNodeIDRef) const; @@ -345,7 +340,7 @@ public: inline void clear() { Bits.clear(); } /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used - /// to lookup the node in the FoldingSetImpl. + /// to lookup the node in the FoldingSetBase. unsigned ComputeHash() const; /// operator== - Used to compare two nodes to each other. @@ -368,7 +363,7 @@ public: }; // Convenience type to hide the implementation of the folding set. -typedef FoldingSetImpl::Node FoldingSetNode; +typedef FoldingSetBase::Node FoldingSetNode; template class FoldingSetIterator; template class FoldingSetBucketIterator; @@ -407,6 +402,71 @@ DefaultContextualFoldingSetTrait::ComputeHash(T &X, return TempID.ComputeHash(); } +//===----------------------------------------------------------------------===// +/// FoldingSetImpl - An implementation detail that lets us share code between +/// FoldingSet and ContextualFoldingSet. +template class FoldingSetImpl : public FoldingSetBase { +protected: + explicit FoldingSetImpl(unsigned Log2InitSize) + : FoldingSetBase(Log2InitSize) {} + + FoldingSetImpl(FoldingSetImpl &&Arg) = default; + FoldingSetImpl &operator=(FoldingSetImpl &&RHS) = default; + ~FoldingSetImpl() = default; + +public: + typedef FoldingSetIterator iterator; + iterator begin() { return iterator(Buckets); } + iterator end() { return iterator(Buckets+NumBuckets); } + + typedef FoldingSetIterator const_iterator; + const_iterator begin() const { return const_iterator(Buckets); } + const_iterator end() const { return const_iterator(Buckets+NumBuckets); } + + typedef FoldingSetBucketIterator bucket_iterator; + + bucket_iterator bucket_begin(unsigned hash) { + return bucket_iterator(Buckets + (hash & (NumBuckets-1))); + } + + bucket_iterator bucket_end(unsigned hash) { + return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true); + } + + /// RemoveNode - Remove a node from the folding set, returning true if one + /// was removed or false if the node was not in the folding set. + bool RemoveNode(T *N) { return FoldingSetBase::RemoveNode(N); } + + /// GetOrInsertNode - If there is an existing simple Node exactly + /// equal to the specified node, return it. Otherwise, insert 'N' and + /// return it instead. + T *GetOrInsertNode(T *N) { + return static_cast(FoldingSetBase::GetOrInsertNode(N)); + } + + /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, + /// return it. If not, return the insertion token that will make insertion + /// faster. + T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { + return static_cast(FoldingSetBase::FindNodeOrInsertPos(ID, InsertPos)); + } + + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. InsertPos must be obtained from + /// FindNodeOrInsertPos. + void InsertNode(T *N, void *InsertPos) { + FoldingSetBase::InsertNode(N, InsertPos); + } + + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. + void InsertNode(T *N) { + T *Inserted = GetOrInsertNode(N); + (void)Inserted; + assert(Inserted == N && "Node already inserted!"); + } +}; + //===----------------------------------------------------------------------===// /// FoldingSet - This template class is used to instantiate a specialized /// implementation of the folding set to the node class T. T must be a @@ -416,8 +476,10 @@ DefaultContextualFoldingSetTrait::ComputeHash(T &X, /// moved-from state is not a valid state for anything other than /// move-assigning and destroying. This is primarily to enable movable APIs /// that incorporate these objects. -template class FoldingSet final : public FoldingSetImpl { -private: +template class FoldingSet final : public FoldingSetImpl { + using Super = FoldingSetImpl; + using Node = typename Super::Node; + /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a /// way to convert nodes into a unique specifier. void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const override { @@ -442,45 +504,10 @@ private: public: explicit FoldingSet(unsigned Log2InitSize = 6) - : FoldingSetImpl(Log2InitSize) {} - - FoldingSet(FoldingSet &&Arg) : FoldingSetImpl(std::move(Arg)) {} - FoldingSet &operator=(FoldingSet &&RHS) { - (void)FoldingSetImpl::operator=(std::move(RHS)); - return *this; - } - - typedef FoldingSetIterator iterator; - iterator begin() { return iterator(Buckets); } - iterator end() { return iterator(Buckets+NumBuckets); } - - typedef FoldingSetIterator const_iterator; - const_iterator begin() const { return const_iterator(Buckets); } - const_iterator end() const { return const_iterator(Buckets+NumBuckets); } - - typedef FoldingSetBucketIterator bucket_iterator; - - bucket_iterator bucket_begin(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1))); - } - - bucket_iterator bucket_end(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true); - } + : Super(Log2InitSize) {} - /// GetOrInsertNode - If there is an existing simple Node exactly - /// equal to the specified node, return it. Otherwise, insert 'N' and - /// return it instead. - T *GetOrInsertNode(Node *N) { - return static_cast(FoldingSetImpl::GetOrInsertNode(N)); - } - - /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, - /// return it. If not, return the insertion token that will make insertion - /// faster. - T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - return static_cast(FoldingSetImpl::FindNodeOrInsertPos(ID, InsertPos)); - } + FoldingSet(FoldingSet &&Arg) = default; + FoldingSet &operator=(FoldingSet &&RHS) = default; }; //===----------------------------------------------------------------------===// @@ -493,74 +520,42 @@ public: /// function with signature /// void Profile(FoldingSetNodeID &, Ctx); template -class ContextualFoldingSet final : public FoldingSetImpl { +class ContextualFoldingSet final : public FoldingSetImpl { // Unfortunately, this can't derive from FoldingSet because the - // construction vtable for FoldingSet requires + // construction of the vtable for FoldingSet requires // FoldingSet::GetNodeProfile to be instantiated, which in turn // requires a single-argument T::Profile(). -private: + using Super = FoldingSetImpl; + using Node = typename Super::Node; + Ctx Context; /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a /// way to convert nodes into a unique specifier. - void GetNodeProfile(FoldingSetImpl::Node *N, - FoldingSetNodeID &ID) const override { + void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const override { T *TN = static_cast(N); ContextualFoldingSetTrait::Profile(*TN, ID, Context); } - bool NodeEquals(FoldingSetImpl::Node *N, const FoldingSetNodeID &ID, - unsigned IDHash, FoldingSetNodeID &TempID) const override { + bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash, + FoldingSetNodeID &TempID) const override { T *TN = static_cast(N); return ContextualFoldingSetTrait::Equals(*TN, ID, IDHash, TempID, Context); } - unsigned ComputeNodeHash(FoldingSetImpl::Node *N, - FoldingSetNodeID &TempID) const override { + unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const override { T *TN = static_cast(N); return ContextualFoldingSetTrait::ComputeHash(*TN, TempID, Context); } public: explicit ContextualFoldingSet(Ctx Context, unsigned Log2InitSize = 6) - : FoldingSetImpl(Log2InitSize), Context(Context) + : Super(Log2InitSize), Context(Context) {} Ctx getContext() const { return Context; } - - typedef FoldingSetIterator iterator; - iterator begin() { return iterator(Buckets); } - iterator end() { return iterator(Buckets+NumBuckets); } - - typedef FoldingSetIterator const_iterator; - const_iterator begin() const { return const_iterator(Buckets); } - const_iterator end() const { return const_iterator(Buckets+NumBuckets); } - - typedef FoldingSetBucketIterator bucket_iterator; - - bucket_iterator bucket_begin(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1))); - } - - bucket_iterator bucket_end(unsigned hash) { - return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true); - } - - /// GetOrInsertNode - If there is an existing simple Node exactly - /// equal to the specified node, return it. Otherwise, insert 'N' - /// and return it instead. - T *GetOrInsertNode(Node *N) { - return static_cast(FoldingSetImpl::GetOrInsertNode(N)); - } - - /// FindNodeOrInsertPos - Look up the node specified by ID. If it - /// exists, return it. If not, return the insertion token that will - /// make insertion faster. - T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - return static_cast(FoldingSetImpl::FindNodeOrInsertPos(ID, InsertPos)); - } }; //===----------------------------------------------------------------------===// diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h index 2c88c4271b4895d6d305c25f5f430fb984ab7f79..225d9eb847f00d216b8598c232805984ece328ea 100644 --- a/include/llvm/ADT/GraphTraits.h +++ b/include/llvm/ADT/GraphTraits.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/GraphTraits.h - Graph traits template ----------*- C++ -*-===// +//===- llvm/ADT/GraphTraits.h - Graph traits template -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -41,7 +41,6 @@ struct GraphTraits { // static ChildIteratorType child_end (NodeRef) // Return iterators that point to the beginning and ending of the child // node list for the specified node. - // // typedef ...iterator nodes_iterator; - dereference to a NodeRef // static nodes_iterator nodes_begin(GraphType *G) @@ -50,8 +49,6 @@ struct GraphTraits { // static unsigned size (GraphType *G) // Return total number of nodes in the graph - // - // If anyone tries to use this class without having an appropriate // specialization, make an error. If you get this error, it's because you @@ -59,11 +56,9 @@ struct GraphTraits { // graph, or you need to define it for a new graph type. Either that or // your argument to XXX_begin(...) is unknown or needs to have the proper .h // file #include'd. - // - typedef typename GraphType::UnknownGraphTypeError NodeRef; + using NodeRef = typename GraphType::UnknownGraphTypeError; }; - // Inverse - This class is used as a little marker class to tell the graph // iterator to iterate over the graph in a graph defined "Inverse" ordering. // Not all graphs define an inverse ordering, and if they do, it depends on @@ -74,7 +69,7 @@ struct GraphTraits { // for (; I != E; ++I) { ... } // // Which is equivalent to: -// df_iterator > I = idf_begin(M), E = idf_end(M); +// df_iterator> I = idf_begin(M), E = idf_end(M); // for (; I != E; ++I) { ... } // template @@ -115,6 +110,7 @@ inverse_children(const typename GraphTraits::NodeRef &G) { return make_range(GraphTraits>::child_begin(G), GraphTraits>::child_end(G)); } -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_ADT_GRAPHTRAITS_H diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h index e5f51bafe995da64cd79aa9708827c4cfc450348..60d63e09d4268a1f1cb630f66b087027bcfbe0fc 100644 --- a/include/llvm/ADT/ImmutableList.h +++ b/include/llvm/ADT/ImmutableList.h @@ -63,8 +63,8 @@ public: template class ImmutableList { public: - typedef T value_type; - typedef ImmutableListFactory Factory; + using value_type = T; + using Factory = ImmutableListFactory; private: const ImmutableListImpl* X; @@ -141,8 +141,8 @@ public: template class ImmutableListFactory { - typedef ImmutableListImpl ListTy; - typedef FoldingSet CacheTy; + using ListTy = ImmutableListImpl; + using CacheTy = FoldingSet; CacheTy Cache; uintptr_t Allocator; diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h index f197d407ba3bc6667eceab7b393e64640846c5ac..10d1e1f0139baf43644cc86d7bf5f11ec48c26dc 100644 --- a/include/llvm/ADT/ImmutableMap.h +++ b/include/llvm/ADT/ImmutableMap.h @@ -26,12 +26,12 @@ namespace llvm { /// only the first element (the key) is used by isEqual and isLess. template struct ImutKeyValueInfo { - typedef const std::pair value_type; - typedef const value_type& value_type_ref; - typedef const T key_type; - typedef const T& key_type_ref; - typedef const S data_type; - typedef const S& data_type_ref; + using value_type = const std::pair; + using value_type_ref = const value_type&; + using key_type = const T; + using key_type_ref = const T&; + using data_type = const S; + using data_type_ref = const S&; static inline key_type_ref KeyOfValue(value_type_ref V) { return V.first; @@ -62,13 +62,13 @@ template > class ImmutableMap { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef typename ValInfo::key_type key_type; - typedef typename ValInfo::key_type_ref key_type_ref; - typedef typename ValInfo::data_type data_type; - typedef typename ValInfo::data_type_ref data_type_ref; - typedef ImutAVLTree TreeTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using key_type = typename ValInfo::key_type; + using key_type_ref = typename ValInfo::key_type_ref; + using data_type = typename ValInfo::data_type; + using data_type_ref = typename ValInfo::data_type_ref; + using TreeTy = ImutAVLTree; protected: TreeTy* Root; @@ -86,6 +86,10 @@ public: if (Root) { Root->retain(); } } + ~ImmutableMap() { + if (Root) { Root->release(); } + } + ImmutableMap &operator=(const ImmutableMap &X) { if (Root != X.Root) { if (X.Root) { X.Root->retain(); } @@ -95,10 +99,6 @@ public: return *this; } - ~ImmutableMap() { - if (Root) { Root->release(); } - } - class Factory { typename TreeTy::Factory F; const bool Canonicalize; @@ -166,12 +166,14 @@ private: template struct CBWrapper { Callback C; + void operator()(value_type_ref V) { C(V.first,V.second); } }; template struct CBWrapperRef { Callback &C; + CBWrapperRef(Callback& c) : C(c) {} void operator()(value_type_ref V) { C(V.first,V.second); } @@ -254,14 +256,14 @@ template > class ImmutableMapRef { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef typename ValInfo::key_type key_type; - typedef typename ValInfo::key_type_ref key_type_ref; - typedef typename ValInfo::data_type data_type; - typedef typename ValInfo::data_type_ref data_type_ref; - typedef ImutAVLTree TreeTy; - typedef typename TreeTy::Factory FactoryTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using key_type = typename ValInfo::key_type; + using key_type_ref = typename ValInfo::key_type_ref; + using data_type = typename ValInfo::data_type; + using data_type_ref = typename ValInfo::data_type_ref; + using TreeTy = ImutAVLTree; + using FactoryTy = typename TreeTy::Factory; protected: TreeTy *Root; @@ -292,6 +294,11 @@ public: } } + ~ImmutableMapRef() { + if (Root) + Root->release(); + } + ImmutableMapRef &operator=(const ImmutableMapRef &X) { if (Root != X.Root) { if (X.Root) @@ -306,11 +313,6 @@ public: return *this; } - ~ImmutableMapRef() { - if (Root) - Root->release(); - } - static inline ImmutableMapRef getEmptyMap(FactoryTy *F) { return ImmutableMapRef(0, F); } diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h index 0724a28306a0338b29518f4f6ac90fde949b307f..9d580c5a3d4165885502dc4559960da317261047 100644 --- a/include/llvm/ADT/ImmutableSet.h +++ b/include/llvm/ADT/ImmutableSet.h @@ -16,16 +16,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" #include -#include -#include #include +#include #include #include +#include namespace llvm { @@ -41,18 +41,16 @@ template class ImutAVLTreeGenericIterator; template class ImutAVLTree { public: - typedef typename ImutInfo::key_type_ref key_type_ref; - typedef typename ImutInfo::value_type value_type; - typedef typename ImutInfo::value_type_ref value_type_ref; + using key_type_ref = typename ImutInfo::key_type_ref; + using value_type = typename ImutInfo::value_type; + using value_type_ref = typename ImutInfo::value_type_ref; + using Factory = ImutAVLFactory; + using iterator = ImutAVLTreeInOrderIterator; - typedef ImutAVLFactory Factory; friend class ImutAVLFactory; friend class ImutIntervalAVLFactory; - friend class ImutAVLTreeGenericIterator; - typedef ImutAVLTreeInOrderIterator iterator; - //===----------------------------------------------------===// // Public Interface. //===----------------------------------------------------===// @@ -225,17 +223,17 @@ private: Factory *factory; ImutAVLTree *left; ImutAVLTree *right; - ImutAVLTree *prev; - ImutAVLTree *next; + ImutAVLTree *prev = nullptr; + ImutAVLTree *next = nullptr; - unsigned height : 28; - unsigned IsMutable : 1; - unsigned IsDigestCached : 1; - unsigned IsCanonicalized : 1; + unsigned height : 28; + bool IsMutable : 1; + bool IsDigestCached : 1; + bool IsCanonicalized : 1; value_type value; - uint32_t digest; - uint32_t refCount; + uint32_t digest = 0; + uint32_t refCount = 0; //===----------------------------------------------------===// // Internal methods (node manipulation; used by Factory). @@ -246,9 +244,8 @@ private: /// ImutAVLFactory. ImutAVLTree(Factory *f, ImutAVLTree* l, ImutAVLTree* r, value_type_ref v, unsigned height) - : factory(f), left(l), right(r), prev(nullptr), next(nullptr), - height(height), IsMutable(true), IsDigestCached(false), - IsCanonicalized(0), value(v), digest(0), refCount(0) + : factory(f), left(l), right(r), height(height), IsMutable(true), + IsDigestCached(false), IsCanonicalized(false), value(v) { if (left) left->retain(); if (right) right->retain(); @@ -369,11 +366,11 @@ public: template class ImutAVLFactory { friend class ImutAVLTree; - typedef ImutAVLTree TreeTy; - typedef typename TreeTy::value_type_ref value_type_ref; - typedef typename TreeTy::key_type_ref key_type_ref; - typedef DenseMap CacheTy; + using TreeTy = ImutAVLTree; + using value_type_ref = typename TreeTy::value_type_ref; + using key_type_ref = typename TreeTy::key_type_ref; + using CacheTy = DenseMap; CacheTy Cache; uintptr_t Allocator; @@ -659,7 +656,7 @@ public: enum VisitFlag { VisitedNone=0x0, VisitedLeft=0x1, VisitedRight=0x3, Flags=0x3 }; - typedef ImutAVLTree TreeTy; + using TreeTy = ImutAVLTree; ImutAVLTreeGenericIterator() = default; ImutAVLTreeGenericIterator(const TreeTy *Root) { @@ -764,11 +761,12 @@ template class ImutAVLTreeInOrderIterator : public std::iterator> { - typedef ImutAVLTreeGenericIterator InternalIteratorTy; + using InternalIteratorTy = ImutAVLTreeGenericIterator; + InternalIteratorTy InternalItr; public: - typedef ImutAVLTree TreeTy; + using TreeTy = ImutAVLTree; ImutAVLTreeInOrderIterator(const TreeTy* Root) : InternalItr(Root) { if (Root) @@ -840,8 +838,8 @@ struct ImutAVLValueIterator /// and generic handling of pointers is done below. template struct ImutProfileInfo { - typedef const T value_type; - typedef const T& value_type_ref; + using value_type = const T; + using value_type_ref = const T&; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { FoldingSetTrait::Profile(X,ID); @@ -851,8 +849,8 @@ struct ImutProfileInfo { /// Profile traits for integers. template struct ImutProfileInteger { - typedef const T value_type; - typedef const T& value_type_ref; + using value_type = const T; + using value_type_ref = const T&; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { ID.AddInteger(X); @@ -878,8 +876,8 @@ PROFILE_INTEGER_INFO(unsigned long long) /// Profile traits for booleans. template <> struct ImutProfileInfo { - typedef const bool value_type; - typedef const bool& value_type_ref; + using value_type = const bool; + using value_type_ref = const bool&; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { ID.AddBoolean(X); @@ -890,8 +888,8 @@ struct ImutProfileInfo { /// references to unique objects. template struct ImutProfileInfo { - typedef const T* value_type; - typedef value_type value_type_ref; + using value_type = const T*; + using value_type_ref = value_type; static void Profile(FoldingSetNodeID &ID, value_type_ref X) { ID.AddPointer(X); @@ -910,12 +908,12 @@ struct ImutProfileInfo { /// std::equal_to<> and std::less<> to perform comparison of elements. template struct ImutContainerInfo : public ImutProfileInfo { - typedef typename ImutProfileInfo::value_type value_type; - typedef typename ImutProfileInfo::value_type_ref value_type_ref; - typedef value_type key_type; - typedef value_type_ref key_type_ref; - typedef bool data_type; - typedef bool data_type_ref; + using value_type = typename ImutProfileInfo::value_type; + using value_type_ref = typename ImutProfileInfo::value_type_ref; + using key_type = value_type; + using key_type_ref = value_type_ref; + using data_type = bool; + using data_type_ref = bool; static key_type_ref KeyOfValue(value_type_ref D) { return D; } static data_type_ref DataOfValue(value_type_ref) { return true; } @@ -936,12 +934,12 @@ struct ImutContainerInfo : public ImutProfileInfo { /// their addresses. template struct ImutContainerInfo : public ImutProfileInfo { - typedef typename ImutProfileInfo::value_type value_type; - typedef typename ImutProfileInfo::value_type_ref value_type_ref; - typedef value_type key_type; - typedef value_type_ref key_type_ref; - typedef bool data_type; - typedef bool data_type_ref; + using value_type = typename ImutProfileInfo::value_type; + using value_type_ref = typename ImutProfileInfo::value_type_ref; + using key_type = value_type; + using key_type_ref = value_type_ref; + using data_type = bool; + using data_type_ref = bool; static key_type_ref KeyOfValue(value_type_ref D) { return D; } static data_type_ref DataOfValue(value_type_ref) { return true; } @@ -960,9 +958,9 @@ struct ImutContainerInfo : public ImutProfileInfo { template > class ImmutableSet { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef ImutAVLTree TreeTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using TreeTy = ImutAVLTree; private: TreeTy *Root; @@ -980,6 +978,10 @@ public: if (Root) { Root->retain(); } } + ~ImmutableSet() { + if (Root) { Root->release(); } + } + ImmutableSet &operator=(const ImmutableSet &X) { if (Root != X.Root) { if (X.Root) { X.Root->retain(); } @@ -989,10 +991,6 @@ public: return *this; } - ~ImmutableSet() { - if (Root) { Root->release(); } - } - class Factory { typename TreeTy::Factory F; const bool Canonicalize; @@ -1084,7 +1082,7 @@ public: // Iterators. //===--------------------------------------------------===// - typedef ImutAVLValueIterator iterator; + using iterator = ImutAVLValueIterator; iterator begin() const { return iterator(Root); } iterator end() const { return iterator(); } @@ -1112,10 +1110,10 @@ public: template > class ImmutableSetRef { public: - typedef typename ValInfo::value_type value_type; - typedef typename ValInfo::value_type_ref value_type_ref; - typedef ImutAVLTree TreeTy; - typedef typename TreeTy::Factory FactoryTy; + using value_type = typename ValInfo::value_type; + using value_type_ref = typename ValInfo::value_type_ref; + using TreeTy = ImutAVLTree; + using FactoryTy = typename TreeTy::Factory; private: TreeTy *Root; @@ -1138,6 +1136,10 @@ public: if (Root) { Root->retain(); } } + ~ImmutableSetRef() { + if (Root) { Root->release(); } + } + ImmutableSetRef &operator=(const ImmutableSetRef &X) { if (Root != X.Root) { if (X.Root) { X.Root->retain(); } @@ -1147,9 +1149,6 @@ public: } return *this; } - ~ImmutableSetRef() { - if (Root) { Root->release(); } - } static ImmutableSetRef getEmptySet(FactoryTy *F) { return ImmutableSetRef(0, F); @@ -1196,7 +1195,7 @@ public: // Iterators. //===--------------------------------------------------===// - typedef ImutAVLValueIterator iterator; + using iterator = ImutAVLValueIterator; iterator begin() const { return iterator(Root); } iterator end() const { return iterator(); } diff --git a/include/llvm/ADT/IndexedMap.h b/include/llvm/ADT/IndexedMap.h index 5ba85c02792095ff75ef49e21ee1d172481234eb..2ee80d2cde63a9c67ce7cc370d54be5d6b39fe04 100644 --- a/include/llvm/ADT/IndexedMap.h +++ b/include/llvm/ADT/IndexedMap.h @@ -20,28 +20,28 @@ #ifndef LLVM_ADT_INDEXEDMAP_H #define LLVM_ADT_INDEXEDMAP_H -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include -#include namespace llvm { -template > +template > class IndexedMap { - typedef typename ToIndexT::argument_type IndexT; + using IndexT = typename ToIndexT::argument_type; // Prefer SmallVector with zero inline storage over std::vector. IndexedMaps // can grow very large and SmallVector grows more efficiently as long as T // is trivially copyable. - typedef SmallVector StorageT; + using StorageT = SmallVector; + StorageT storage_; T nullVal_; ToIndexT toIndex_; public: - IndexedMap() : nullVal_(T()) { } + IndexedMap() : nullVal_(T()) {} - explicit IndexedMap(const T& val) : nullVal_(val) { } + explicit IndexedMap(const T& val) : nullVal_(val) {} typename StorageT::reference operator[](IndexT n) { assert(toIndex_(n) < storage_.size() && "index out of bounds!"); @@ -80,6 +80,6 @@ template > } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_INDEXEDMAP_H diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index 430b9671bd1d75ab8b7d01ab6f673a7175228dbf..f71366811218b098c265a803486279873b426fc7 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -106,6 +106,7 @@ #include "llvm/Support/RecyclingAllocator.h" #include #include +#include #include #include #include @@ -186,7 +187,7 @@ struct IntervalMapHalfOpenInfo { /// It should be considered private to the implementation. namespace IntervalMapImpl { -typedef std::pair IdxPair; +using IdxPair = std::pair; //===----------------------------------------------------------------------===// //--- IntervalMapImpl::NodeBase ---// @@ -445,7 +446,7 @@ struct NodeSizer { LeafSize = DesiredLeafSize > MinLeafSize ? DesiredLeafSize : MinLeafSize }; - typedef NodeBase, ValT, LeafSize> LeafBase; + using LeafBase = NodeBase, ValT, LeafSize>; enum { // Now that we have the leaf branching factor, compute the actual allocation @@ -461,8 +462,8 @@ struct NodeSizer { /// This typedef is very likely to be identical for all IntervalMaps with /// reasonably sized entries, so the same allocator can be shared among /// different kinds of maps. - typedef RecyclingAllocator Allocator; + using Allocator = + RecyclingAllocator; }; //===----------------------------------------------------------------------===// @@ -930,12 +931,12 @@ template ::LeafSize, typename Traits = IntervalMapInfo> class IntervalMap { - typedef IntervalMapImpl::NodeSizer Sizer; - typedef IntervalMapImpl::LeafNode Leaf; - typedef IntervalMapImpl::BranchNode - Branch; - typedef IntervalMapImpl::LeafNode RootLeaf; - typedef IntervalMapImpl::IdxPair IdxPair; + using Sizer = IntervalMapImpl::NodeSizer; + using Leaf = IntervalMapImpl::LeafNode; + using Branch = + IntervalMapImpl::BranchNode; + using RootLeaf = IntervalMapImpl::LeafNode; + using IdxPair = IntervalMapImpl::IdxPair; // The RootLeaf capacity is given as a template parameter. We must compute the // corresponding RootBranch capacity. @@ -945,8 +946,8 @@ class IntervalMap { RootBranchCap = DesiredRootBranchCap ? DesiredRootBranchCap : 1 }; - typedef IntervalMapImpl::BranchNode - RootBranch; + using RootBranch = + IntervalMapImpl::BranchNode; // When branched, we store a global start key as well as the branch node. struct RootBranchData { @@ -955,10 +956,10 @@ class IntervalMap { }; public: - typedef typename Sizer::Allocator Allocator; - typedef KeyT KeyType; - typedef ValT ValueType; - typedef Traits KeyTraits; + using Allocator = typename Sizer::Allocator; + using KeyType = KeyT; + using ValueType = ValT; + using KeyTraits = Traits; private: // The root data is either a RootLeaf or a RootBranchData instance. @@ -1290,7 +1291,7 @@ protected: friend class IntervalMap; // The map referred to. - IntervalMap *map; + IntervalMap *map = nullptr; // We store a full path from the root to the current position. // The path may be partially filled, but never between iterator calls. @@ -1338,7 +1339,7 @@ protected: public: /// const_iterator - Create an iterator that isn't pointing anywhere. - const_iterator() : map(nullptr) {} + const_iterator() = default; /// setMap - Change the map iterated over. This call must be followed by a /// call to goToBegin(), goToEnd(), or find() @@ -1509,7 +1510,8 @@ const_iterator::treeAdvanceTo(KeyT x) { template class IntervalMap::iterator : public const_iterator { friend class IntervalMap; - typedef IntervalMapImpl::IdxPair IdxPair; + + using IdxPair = IntervalMapImpl::IdxPair; explicit iterator(IntervalMap &map) : const_iterator(map) {} @@ -2003,7 +2005,7 @@ iterator::overflow(unsigned Level) { // Elements have been rearranged, now update node sizes and stops. bool SplitRoot = false; unsigned Pos = 0; - for (;;) { + while (true) { KeyT Stop = Node[Pos]->stop(NewSize[Pos]-1); if (NewNode && Pos == NewNode) { SplitRoot = insertNode(Level, NodeRef(Node[Pos], NewSize[Pos]), Stop); @@ -2045,8 +2047,9 @@ iterator::overflow(unsigned Level) { /// template class IntervalMapOverlaps { - typedef typename MapA::KeyType KeyType; - typedef typename MapA::KeyTraits Traits; + using KeyType = typename MapA::KeyType; + using Traits = typename MapA::KeyTraits; + typename MapA::const_iterator posA; typename MapB::const_iterator posB; @@ -2071,7 +2074,7 @@ class IntervalMapOverlaps { // Already overlapping. return; - for (;;) { + while (true) { // Make a.end > b.start. posA.advanceTo(posB.start()); if (!posA.valid() || !Traits::stopLess(posB.stop(), posA.start())) diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h index a77cf04ea4d1df2ee49021e2613121b02811215f..430ef86afbd95f1f3103535b8397cdbfa9cf2a51 100644 --- a/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -1,4 +1,4 @@ -//== llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer ---*- C++ -*-==// +//==- llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -73,9 +73,10 @@ template class RefCountedBase { public: RefCountedBase() = default; - RefCountedBase(const RefCountedBase &) : RefCount(0) {} + RefCountedBase(const RefCountedBase &) {} void Retain() const { ++RefCount; } + void Release() const { assert(RefCount > 0 && "Reference count is already zero."); if (--RefCount == 0) @@ -136,7 +137,7 @@ template class IntrusiveRefCntPtr { T *Obj = nullptr; public: - typedef T element_type; + using element_type = T; explicit IntrusiveRefCntPtr() = default; IntrusiveRefCntPtr(T *obj) : Obj(obj) { retain(); } @@ -153,13 +154,13 @@ public: retain(); } + ~IntrusiveRefCntPtr() { release(); } + IntrusiveRefCntPtr &operator=(IntrusiveRefCntPtr S) { swap(S); return *this; } - ~IntrusiveRefCntPtr() { release(); } - T &operator*() const { return *Obj; } T *operator->() const { return Obj; } T *get() const { return Obj; } @@ -183,6 +184,7 @@ private: if (Obj) IntrusiveRefCntPtrInfo::retain(Obj); } + void release() { if (Obj) IntrusiveRefCntPtrInfo::release(Obj); @@ -248,14 +250,16 @@ bool operator!=(const IntrusiveRefCntPtr &A, std::nullptr_t B) { template struct simplify_type; template struct simplify_type> { - typedef T *SimpleType; + using SimpleType = T *; + static SimpleType getSimplifiedValue(IntrusiveRefCntPtr &Val) { return Val.get(); } }; template struct simplify_type> { - typedef /*const*/ T *SimpleType; + using SimpleType = /*const*/ T *; + static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr &Val) { return Val.get(); } diff --git a/include/llvm/ADT/MapVector.h b/include/llvm/ADT/MapVector.h index ac1885758cb9cf7831ee63656c3fbc9abc247b14..26a555ee1d3bdda94eb3d6af88e2835c176bbdcf 100644 --- a/include/llvm/ADT/MapVector.h +++ b/include/llvm/ADT/MapVector.h @@ -19,6 +19,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include +#include +#include +#include +#include +#include #include namespace llvm { @@ -27,20 +33,20 @@ namespace llvm { /// in a deterministic order. The values are kept in a std::vector and the /// mapping is done with DenseMap from Keys to indexes in that vector. template, - typename VectorType = std::vector > > + typename MapType = DenseMap, + typename VectorType = std::vector>> class MapVector { - typedef typename VectorType::value_type value_type; - typedef typename VectorType::size_type size_type; + using value_type = typename VectorType::value_type; + using size_type = typename VectorType::size_type; MapType Map; VectorType Vector; public: - typedef typename VectorType::iterator iterator; - typedef typename VectorType::const_iterator const_iterator; - typedef typename VectorType::reverse_iterator reverse_iterator; - typedef typename VectorType::const_reverse_iterator const_reverse_iterator; + using iterator = typename VectorType::iterator; + using const_iterator = typename VectorType::const_iterator; + using reverse_iterator = typename VectorType::reverse_iterator; + using const_reverse_iterator = typename VectorType::const_reverse_iterator; /// Clear the MapVector and return the underlying vector. VectorType takeVector() { @@ -220,4 +226,4 @@ struct SmallMapVector } // end namespace llvm -#endif +#endif // LLVM_ADT_MAPVECTOR_H diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h index 701872c9f63fc9df796dd5dd29215086cb7581ae..b782d9da17ac485ddfa169850ca7e4f26f6787e7 100644 --- a/include/llvm/ADT/Optional.h +++ b/include/llvm/ADT/Optional.h @@ -1,4 +1,4 @@ -//===-- Optional.h - Simple variant for passing optional values ---*- C++ -*-=// +//===- Optional.h - Simple variant for passing optional values --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,6 +19,8 @@ #include "llvm/ADT/None.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/type_traits.h" +#include #include #include #include @@ -28,15 +30,18 @@ namespace llvm { template class Optional { AlignedCharArrayUnion storage; - bool hasVal; + bool hasVal = false; + public: - typedef T value_type; + using value_type = T; + + Optional(NoneType) {} + explicit Optional() {} - Optional(NoneType) : hasVal(false) {} - explicit Optional() : hasVal(false) {} Optional(const T &y) : hasVal(true) { new (storage.buffer) T(y); } + Optional(const Optional &O) : hasVal(O.hasVal) { if (hasVal) new (storage.buffer) T(*O); @@ -45,12 +50,18 @@ public: Optional(T &&y) : hasVal(true) { new (storage.buffer) T(std::forward(y)); } + Optional(Optional &&O) : hasVal(O) { if (O) { new (storage.buffer) T(std::move(*O)); O.reset(); } } + + ~Optional() { + reset(); + } + Optional &operator=(T &&y) { if (hasVal) **this = std::move(y); @@ -60,6 +71,7 @@ public: } return *this; } + Optional &operator=(Optional &&O) { if (!O) reset(); @@ -112,10 +124,6 @@ public: } } - ~Optional() { - reset(); - } - const T* getPointer() const { assert(hasVal); return reinterpret_cast(storage.buffer); } T* getPointer() { assert(hasVal); return reinterpret_cast(storage.buffer); } const T& getValue() const LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); } @@ -144,8 +152,7 @@ public: #endif }; -template struct isPodLike; -template struct isPodLike > { +template struct isPodLike> { // An Optional is pod-like if T is. static const bool value = isPodLike::value; }; @@ -284,6 +291,6 @@ template bool operator>=(const T &X, const Optional &Y) { return !(X < Y); } -} // end llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_OPTIONAL_H diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h index 8f925f1ff5cbc12cb459f3f8c180a4570b39b9a5..95adc2926813bed35b63db59c121303930391fd5 100644 --- a/include/llvm/ADT/PackedVector.h +++ b/include/llvm/ADT/PackedVector.h @@ -76,8 +76,8 @@ template class PackedVector : public PackedVectorBase::is_signed> { BitVectorTy Bits; - typedef PackedVectorBase::is_signed> base; + using base = PackedVectorBase::is_signed>; public: class reference { @@ -99,7 +99,7 @@ public: }; PackedVector() = default; - explicit PackedVector(unsigned size) : Bits(size << (BitNum-1)) { } + explicit PackedVector(unsigned size) : Bits(size << (BitNum-1)) {} bool empty() const { return Bits.empty(); } diff --git a/include/llvm/ADT/PointerEmbeddedInt.h b/include/llvm/ADT/PointerEmbeddedInt.h index 2279d43405fa756dcaed7bfe27771101b869fa54..34323b5b8af49acc53bc7bf5753366e72153abcb 100644 --- a/include/llvm/ADT/PointerEmbeddedInt.h +++ b/include/llvm/ADT/PointerEmbeddedInt.h @@ -13,7 +13,10 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include #include +#include +#include namespace llvm { @@ -29,7 +32,7 @@ namespace llvm { /// Also, the default constructed value zero initializes the integer. template class PointerEmbeddedInt { - uintptr_t Value; + uintptr_t Value = 0; // Note: This '<' is correct; using '<=' would result in some shifts // overflowing their storage types. @@ -54,15 +57,12 @@ class PointerEmbeddedInt { explicit PointerEmbeddedInt(uintptr_t Value, RawValueTag) : Value(Value) {} public: - PointerEmbeddedInt() : Value(0) {} + PointerEmbeddedInt() = default; - PointerEmbeddedInt(IntT I) { - *this = I; - } + PointerEmbeddedInt(IntT I) { *this = I; } PointerEmbeddedInt &operator=(IntT I) { - assert((std::is_signed::value ? llvm::isInt(I) - : llvm::isUInt(I)) && + assert((std::is_signed::value ? isInt(I) : isUInt(I)) && "Integer has bits outside those preserved!"); Value = static_cast(I) << Shift; return *this; @@ -81,15 +81,17 @@ public: // types. template class PointerLikeTypeTraits> { - typedef PointerEmbeddedInt T; + using T = PointerEmbeddedInt; public: static inline void *getAsVoidPointer(const T &P) { return reinterpret_cast(P.Value); } + static inline T getFromVoidPointer(void *P) { return T(reinterpret_cast(P), typename T::RawValueTag()); } + static inline T getFromVoidPointer(const void *P) { return T(reinterpret_cast(P), typename T::RawValueTag()); } @@ -101,17 +103,19 @@ public: // itself can be a key. template struct DenseMapInfo> { - typedef PointerEmbeddedInt T; - - typedef DenseMapInfo IntInfo; + using T = PointerEmbeddedInt; + using IntInfo = DenseMapInfo; static inline T getEmptyKey() { return IntInfo::getEmptyKey(); } static inline T getTombstoneKey() { return IntInfo::getTombstoneKey(); } + static unsigned getHashValue(const T &Arg) { return IntInfo::getHashValue(Arg); } + static bool isEqual(const T &LHS, const T &RHS) { return LHS == RHS; } }; -} -#endif +} // end namespace llvm + +#endif // LLVM_ADT_POINTEREMBEDDEDINT_H diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index 9eb15524c0f3059918d08e37e5596e3ea9e53f31..aeab641f5715ab9ccef2c05e4c398f42f2b75042 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -19,8 +19,8 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/PointerLikeTypeTraits.h" #include -#include #include +#include namespace llvm { @@ -158,7 +158,7 @@ public: assert( get() == Val.getPointer() && "Can't get the address because PointerLikeTypeTraits changes the ptr"); - return (PT1 *)Val.getAddrOfPointer(); + return const_cast(reinterpret_cast(Val.getAddrOfPointer())); } /// Assignment from nullptr which just clears the union. diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index 8fc08eb252eb214c6e041747f8cb454a3d5cf49b..dc8a9b6e78b20961c694d173c93db36fc4198471 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -17,9 +17,9 @@ #define LLVM_ADT_POSTORDERITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/iterator_range.h" #include #include #include @@ -96,24 +96,14 @@ template , public po_iterator_storage { - typedef std::iterator super; - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; + using super = std::iterator; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; // VisitStack - Used to maintain the ordering. Top = current block // First element is basic block pointer, second is the 'next child' to visit std::vector> VisitStack; - void traverseChild() { - while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) { - NodeRef BB = *VisitStack.back().second++; - if (this->insertEdge(Optional(VisitStack.back().first), BB)) { - // If the block is not visited... - VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); - } - } - } - po_iterator(NodeRef BB) { this->insertEdge(Optional(), BB); VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); @@ -134,8 +124,18 @@ class po_iterator : po_iterator_storage(S) { } // End is when stack is empty. + void traverseChild() { + while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) { + NodeRef BB = *VisitStack.back().second++; + if (this->insertEdge(Optional(VisitStack.back().first), BB)) { + // If the block is not visited... + VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); + } + } + } + public: - typedef typename super::pointer pointer; + using pointer = typename super::pointer; // Provide static "constructors"... static po_iterator begin(GraphT G) { @@ -286,7 +286,8 @@ inverse_post_order_ext(const T &G, SetType &S) { template> class ReversePostOrderTraversal { - typedef typename GT::NodeRef NodeRef; + using NodeRef = typename GT::NodeRef; + std::vector Blocks; // Block list in normal PO order void Initialize(NodeRef BB) { @@ -294,7 +295,7 @@ class ReversePostOrderTraversal { } public: - typedef typename std::vector::reverse_iterator rpo_iterator; + using rpo_iterator = typename std::vector::reverse_iterator; ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); } diff --git a/include/llvm/ADT/PriorityWorklist.h b/include/llvm/ADT/PriorityWorklist.h index 3198dd438700d1940d3da9e4f9ddd37a726f0a12..aa531f3337d9c673de8133a56c3c618f9a94ad2b 100644 --- a/include/llvm/ADT/PriorityWorklist.h +++ b/include/llvm/ADT/PriorityWorklist.h @@ -18,12 +18,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Sequence.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include #include #include +#include +#include #include namespace llvm { @@ -55,11 +56,11 @@ template , typename MapT = DenseMap> class PriorityWorklist { public: - typedef T value_type; - typedef T key_type; - typedef T& reference; - typedef const T& const_reference; - typedef typename MapT::size_type size_type; + using value_type = T; + using key_type = T; + using reference = T&; + using const_reference = const T&; + using size_type = typename MapT::size_type; /// Construct an empty PriorityWorklist PriorityWorklist() = default; diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h index 9a8a7b168fce2126e0d4e7dcbecad10ed9f08019..784a58dc002f594e4fb5c98e5d6085a1a17cab43 100644 --- a/include/llvm/ADT/SCCIterator.h +++ b/include/llvm/ADT/SCCIterator.h @@ -1,4 +1,4 @@ -//===---- ADT/SCCIterator.h - Strongly Connected Comp. Iter. ----*- C++ -*-===// +//===- ADT/SCCIterator.h - Strongly Connected Comp. Iter. -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -43,10 +43,10 @@ template > class scc_iterator : public iterator_facade_base< scc_iterator, std::forward_iterator_tag, const std::vector, ptrdiff_t> { - typedef typename GT::NodeRef NodeRef; - typedef typename GT::ChildIteratorType ChildItTy; - typedef std::vector SccTy; - typedef typename scc_iterator::reference reference; + using NodeRef = typename GT::NodeRef; + using ChildItTy = typename GT::ChildIteratorType; + using SccTy = std::vector; + using reference = typename scc_iterator::reference; /// Element of VisitStack during DFS. struct StackElement { @@ -232,16 +232,6 @@ template scc_iterator scc_end(const T &G) { return scc_iterator::end(G); } -/// \brief Construct the begin iterator for a deduced graph type T's Inverse. -template scc_iterator> scc_begin(const Inverse &G) { - return scc_iterator>::begin(G); -} - -/// \brief Construct the end iterator for a deduced graph type T's Inverse. -template scc_iterator> scc_end(const Inverse &G) { - return scc_iterator>::end(G); -} - } // end namespace llvm #endif // LLVM_ADT_SCCITERATOR_H diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 15945adbe589a4634e2dbf2738866467db2b9a94..8c28412bb607869285feda87b0ea786dd35d859f 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -706,6 +706,18 @@ struct is_one_of { std::is_same::value || is_one_of::value; }; +/// \brief traits class for checking whether type T is a base class for all +/// the given types in the variadic list. +template struct are_base_of { + static const bool value = true; +}; + +template +struct are_base_of { + static const bool value = + std::is_base_of::value && are_base_of::value; +}; + //===----------------------------------------------------------------------===// // Extra additions for arrays //===----------------------------------------------------------------------===// @@ -1079,7 +1091,7 @@ private: /// /// std::vector Items = {'A', 'B', 'C', 'D'}; /// for (auto X : enumerate(Items)) { -/// printf("Item %d - %c\n", X.Index, X.Value); +/// printf("Item %d - %c\n", X.index(), X.value()); /// } /// /// Output: diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h index d52128e294a32690ba6dfd8908ccfa481222663a..22b0c1bdaf4d0cb0fd4ae3dd6d66782588dea6dd 100644 --- a/include/llvm/ADT/ScopedHashTable.h +++ b/include/llvm/ADT/ScopedHashTable.h @@ -109,6 +109,7 @@ private: ScopedHashTableVal *getLastValInScope() { return LastValInScope; } + void setLastValInScope(ScopedHashTableVal *Val) { LastValInScope = Val; } @@ -151,13 +152,14 @@ class ScopedHashTable { public: /// ScopeTy - This is a helpful typedef that allows clients to get easy access /// to the name of the scope for this hash table. - typedef ScopedHashTableScope ScopeTy; - typedef unsigned size_type; + using ScopeTy = ScopedHashTableScope; + using size_type = unsigned; private: friend class ScopedHashTableScope; - typedef ScopedHashTableVal ValTy; + using ValTy = ScopedHashTableVal; + DenseMap TopLevelMap; ScopeTy *CurScope = nullptr; @@ -165,7 +167,7 @@ private: public: ScopedHashTable() = default; - ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {} + ScopedHashTable(AllocatorTy A) : Allocator(A) {} ScopedHashTable(const ScopedHashTable &) = delete; ScopedHashTable &operator=(const ScopedHashTable &) = delete; @@ -194,7 +196,7 @@ public: insertIntoScope(CurScope, Key, Val); } - typedef ScopedHashTableIterator iterator; + using iterator = ScopedHashTableIterator; iterator end() { return iterator(0); } diff --git a/include/llvm/ADT/Sequence.h b/include/llvm/ADT/Sequence.h index 5d36831cc128ec03a22e820d9af1865a7d07bd8e..3d4a897bf9a9e7efc1f12d62e8937391a47e56d1 100644 --- a/include/llvm/ADT/Sequence.h +++ b/include/llvm/ADT/Sequence.h @@ -13,27 +13,31 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_ADT_SEQ_H -#define LLVM_ADT_SEQ_H +#ifndef LLVM_ADT_SEQUENCE_H +#define LLVM_ADT_SEQUENCE_H #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include +#include +#include namespace llvm { namespace detail { + template class value_sequence_iterator : public iterator_facade_base, std::random_access_iterator_tag, const ValueT> { - typedef typename value_sequence_iterator::iterator_facade_base BaseT; + using BaseT = typename value_sequence_iterator::iterator_facade_base; ValueT Value; public: - typedef typename BaseT::difference_type difference_type; - typedef typename BaseT::reference reference; + using difference_type = typename BaseT::difference_type; + using reference = typename BaseT::reference; value_sequence_iterator() = default; value_sequence_iterator(const value_sequence_iterator &) = default; @@ -65,7 +69,8 @@ public: reference operator*() const { return Value; } }; -} // End detail namespace. + +} // end namespace detail template iterator_range> seq(ValueT Begin, @@ -74,6 +79,6 @@ iterator_range> seq(ValueT Begin, detail::value_sequence_iterator(End)); } -} +} // end namespace llvm -#endif +#endif // LLVM_ADT_SEQUENCE_H diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h index 13378aa3a04efc3b0ce8914ab5412bd2eb8e0b4b..04ed52fc543f35d73cdc1dae32f3b8575ee05c8e 100644 --- a/include/llvm/ADT/SetVector.h +++ b/include/llvm/ADT/SetVector.h @@ -40,17 +40,17 @@ template , typename Set = DenseSet> class SetVector { public: - typedef T value_type; - typedef T key_type; - typedef T& reference; - typedef const T& const_reference; - typedef Set set_type; - typedef Vector vector_type; - typedef typename vector_type::const_iterator iterator; - typedef typename vector_type::const_iterator const_iterator; - typedef typename vector_type::const_reverse_iterator reverse_iterator; - typedef typename vector_type::const_reverse_iterator const_reverse_iterator; - typedef typename vector_type::size_type size_type; + using value_type = T; + using key_type = T; + using reference = T&; + using const_reference = const T&; + using set_type = Set; + using vector_type = Vector; + using iterator = typename vector_type::const_iterator; + using const_iterator = typename vector_type::const_iterator; + using reverse_iterator = typename vector_type::const_reverse_iterator; + using const_reverse_iterator = typename vector_type::const_reverse_iterator; + using size_type = typename vector_type::size_type; /// \brief Construct an empty SetVector SetVector() = default; diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index edb37da38da1bec88e97163ba8e5b445e9d22ab9..b6391746639b0817edcc69d48e01ead483183695 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -15,8 +15,15 @@ #define LLVM_ADT_SMALLBITVECTOR_H #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/MathExtras.h" +#include #include +#include +#include +#include +#include +#include namespace llvm { @@ -29,7 +36,7 @@ class SmallBitVector { // TODO: In "large" mode, a pointer to a BitVector is used, leading to an // unnecessary level of indirection. It would be more efficient to use a // pointer to memory containing size, allocation size, and the array of bits. - uintptr_t X; + uintptr_t X = 1; enum { // The number of bits in this class. @@ -54,7 +61,8 @@ class SmallBitVector { "Unsupported word size"); public: - typedef unsigned size_type; + using size_type = unsigned; + // Encapsulation of a single bit. class reference { SmallBitVector &TheVector; @@ -117,9 +125,7 @@ private: } // Return the size. - size_t getSmallSize() const { - return getSmallRawBits() >> SmallNumDataBits; - } + size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; } void setSmallSize(size_t Size) { setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits)); @@ -137,7 +143,7 @@ private: public: /// Creates an empty bitvector. - SmallBitVector() : X(1) {} + SmallBitVector() = default; /// Creates a bitvector of specified number of bits. All bits are initialized /// to the specified value. @@ -165,6 +171,21 @@ public: delete getPointer(); } + using const_set_bits_iterator = const_set_bits_iterator_impl; + using set_iterator = const_set_bits_iterator; + + const_set_bits_iterator set_bits_begin() const { + return const_set_bits_iterator(*this); + } + + const_set_bits_iterator set_bits_end() const { + return const_set_bits_iterator(*this, -1); + } + + iterator_range set_bits() const { + return make_range(set_bits_begin(), set_bits_end()); + } + /// Tests whether there are no bits in this bitvector. bool empty() const { return isSmall() ? getSmallSize() == 0 : getPointer()->empty(); @@ -216,6 +237,16 @@ public: return getPointer()->find_first(); } + int find_last() const { + if (isSmall()) { + uintptr_t Bits = getSmallBits(); + if (Bits == 0) + return -1; + return NumBaseBits - countLeadingZeros(Bits); + } + return getPointer()->find_last(); + } + /// Returns the index of the first unset bit, -1 if all of the bits are set. int find_first_unset() const { if (isSmall()) { @@ -228,6 +259,17 @@ public: return getPointer()->find_first_unset(); } + int find_last_unset() const { + if (isSmall()) { + if (count() == getSmallSize()) + return -1; + + uintptr_t Bits = getSmallBits(); + return NumBaseBits - countLeadingOnes(Bits); + } + return getPointer()->find_last_unset(); + } + /// Returns the index of the next set bit following the "Prev" bit. /// Returns -1 if the next set bit is not found. int find_next(unsigned Prev) const { @@ -259,6 +301,24 @@ public: return getPointer()->find_next_unset(Prev); } + /// find_prev - Returns the index of the first set bit that precedes the + /// the bit at \p PriorTo. Returns -1 if all previous bits are unset. + int find_prev(unsigned PriorTo) const { + if (isSmall()) { + if (PriorTo == 0) + return -1; + + --PriorTo; + uintptr_t Bits = getSmallBits(); + Bits &= maskTrailingOnes(PriorTo + 1); + if (Bits == 0) + return -1; + + return NumBaseBits - countLeadingZeros(Bits) - 1; + } + return getPointer()->find_prev(PriorTo); + } + /// Clear all bits. void clear() { if (!isSmall()) @@ -508,6 +568,22 @@ public: return *this; } + SmallBitVector &operator<<=(unsigned N) { + if (isSmall()) + setSmallBits(getSmallBits() << N); + else + getPointer()->operator<<=(N); + return *this; + } + + SmallBitVector &operator>>=(unsigned N) { + if (isSmall()) + setSmallBits(getSmallBits() >> N); + else + getPointer()->operator>>=(N); + return *this; + } + // Assignment operator. const SmallBitVector &operator=(const SmallBitVector &RHS) { if (isSmall()) { @@ -611,14 +687,16 @@ operator^(const SmallBitVector &LHS, const SmallBitVector &RHS) { return Result; } -} // End llvm namespace +} // end namespace llvm namespace std { - /// Implement std::swap in terms of BitVector swap. - inline void - swap(llvm::SmallBitVector &LHS, llvm::SmallBitVector &RHS) { - LHS.swap(RHS); - } + +/// Implement std::swap in terms of BitVector swap. +inline void +swap(llvm::SmallBitVector &LHS, llvm::SmallBitVector &RHS) { + LHS.swap(RHS); } -#endif +} // end namespace std + +#endif // LLVM_ADT_SMALLBITVECTOR_H diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 196ab6338047cce727796ae40b0fcf4fc9d96394..a2ad74b1e04aeb56bf2734e64eb180e715ce1f5f 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -21,20 +21,22 @@ #include "llvm/Support/type_traits.h" #include #include -#include #include +#include #include #include #include -#if LLVM_ENABLE_ABI_BREAKING_CHECKS namespace llvm { + +#if LLVM_ENABLE_ABI_BREAKING_CHECKS template struct ReverseIterate { static bool value; }; +#if LLVM_ENABLE_REVERSE_ITERATION +template bool ReverseIterate::value = true; +#else template bool ReverseIterate::value = false; -} #endif - -namespace llvm { +#endif /// SmallPtrSetImplBase - This is the common code shared among all the /// SmallPtrSet<>'s, which is almost everything. SmallPtrSet has two modes, one @@ -92,7 +94,7 @@ protected: } public: - typedef unsigned size_type; + using size_type = unsigned; SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete; @@ -273,14 +275,14 @@ protected: /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet. template class SmallPtrSetIterator : public SmallPtrSetIteratorImpl { - typedef PointerLikeTypeTraits PtrTraits; + using PtrTraits = PointerLikeTypeTraits; public: - typedef PtrTy value_type; - typedef PtrTy reference; - typedef PtrTy pointer; - typedef std::ptrdiff_t difference_type; - typedef std::forward_iterator_tag iterator_category; + using value_type = PtrTy; + using reference = PtrTy; + using pointer = PtrTy; + using difference_type = std::ptrdiff_t; + using iterator_category = std::forward_iterator_tag; explicit SmallPtrSetIterator(const void *const *BP, const void *const *E) : SmallPtrSetIteratorImpl(BP, E) {} @@ -351,8 +353,8 @@ struct RoundUpToPowerOfTwo { template class SmallPtrSetImpl : public SmallPtrSetImplBase { using ConstPtrType = typename add_const_past_pointer::type; - typedef PointerLikeTypeTraits PtrTraits; - typedef PointerLikeTypeTraits ConstPtrTraits; + using PtrTraits = PointerLikeTypeTraits; + using ConstPtrTraits = PointerLikeTypeTraits; protected: // Constructors that forward to the base. @@ -365,8 +367,10 @@ protected: : SmallPtrSetImplBase(SmallStorage, SmallSize) {} public: - typedef SmallPtrSetIterator iterator; - typedef SmallPtrSetIterator const_iterator; + using iterator = SmallPtrSetIterator; + using const_iterator = SmallPtrSetIterator; + using key_type = ConstPtrType; + using value_type = PtrType; SmallPtrSetImpl(const SmallPtrSetImpl &) = delete; @@ -431,7 +435,7 @@ class SmallPtrSet : public SmallPtrSetImpl { // DenseSet<> instead if you expect many elements in the set. static_assert(SmallSize <= 32, "SmallSize should be small"); - typedef SmallPtrSetImpl BaseT; + using BaseT = SmallPtrSetImpl; // Make sure that SmallSize is a power of two, round up if not. enum { SmallSizePowTwo = RoundUpToPowerOfTwo::Val }; diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h index 6dac1677b7a26e99fb8e9f0d29bdcae9f1e255a8..d52d0f07f9a6327a375cad4a8ed5daa7da69971a 100644 --- a/include/llvm/ADT/SmallSet.h +++ b/include/llvm/ADT/SmallSet.h @@ -39,8 +39,9 @@ class SmallSet { /// we will never use. SmallVector Vector; std::set Set; - typedef typename SmallVector::const_iterator VIterator; - typedef typename SmallVector::iterator mutable_iterator; + + using VIterator = typename SmallVector::const_iterator; + using mutable_iterator = typename SmallVector::iterator; // In small mode SmallPtrSet uses linear search for the elements, so it is // not a good idea to choose this value too high. You may consider using a @@ -48,7 +49,7 @@ class SmallSet { static_assert(N <= 32, "N should be small"); public: - typedef size_t size_type; + using size_type = size_t; SmallSet() = default; diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index b9588214023ccf808b1c3346b66ee2bdd5e407e5..bf2a62f43affce5b9f03eeda03ab97904561f87c 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -71,7 +71,7 @@ private: // Allocate raw space for N elements of type T. If T has a ctor or dtor, we // don't want it to be automatically run, so we need to represent the space as // something else. Use an array of char of sufficient alignment. - typedef AlignedCharArrayUnion U; + using U = AlignedCharArrayUnion; U FirstEl; // Space after 'FirstEl' is clobbered, do not add any instance vars after it. @@ -96,19 +96,19 @@ protected: void setEnd(T *P) { this->EndX = P; } public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T value_type; - typedef T *iterator; - typedef const T *const_iterator; + using size_type = size_t; + using difference_type = ptrdiff_t; + using value_type = T; + using iterator = T *; + using const_iterator = const T *; - typedef std::reverse_iterator const_reverse_iterator; - typedef std::reverse_iterator reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using reverse_iterator = std::reverse_iterator; - typedef T &reference; - typedef const T &const_reference; - typedef T *pointer; - typedef const T *const_pointer; + using reference = T &; + using const_reference = const T &; + using pointer = T *; + using const_pointer = const T *; // forward iterator creation methods. LLVM_ATTRIBUTE_ALWAYS_INLINE @@ -319,12 +319,12 @@ public: /// reduce code duplication based on the SmallVector 'N' template parameter. template class SmallVectorImpl : public SmallVectorTemplateBase::value> { - typedef SmallVectorTemplateBase::value > SuperClass; + using SuperClass = SmallVectorTemplateBase::value>; public: - typedef typename SuperClass::iterator iterator; - typedef typename SuperClass::const_iterator const_iterator; - typedef typename SuperClass::size_type size_type; + using iterator = typename SuperClass::iterator; + using const_iterator = typename SuperClass::const_iterator; + using size_type = typename SuperClass::size_type; protected: // Default ctor - Initialize to empty. @@ -388,7 +388,10 @@ public: void swap(SmallVectorImpl &RHS); /// Add the specified range to the end of the SmallVector. - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> void append(in_iter in_start, in_iter in_end) { size_type NumInputs = std::distance(in_start, in_end); // Grow allocated space if needed. @@ -415,6 +418,9 @@ public: append(IL.begin(), IL.end()); } + // FIXME: Consider assigning over existing elements, rather than clearing & + // re-initializing them - for all assign(...) variants. + void assign(size_type NumElts, const T &Elt) { clear(); if (this->capacity() < NumElts) @@ -423,6 +429,15 @@ public: std::uninitialized_fill(this->begin(), this->end(), Elt); } + template ::iterator_category, + std::input_iterator_tag>::value>::type> + void assign(in_iter in_start, in_iter in_end) { + clear(); + append(in_start, in_end); + } + void assign(std::initializer_list IL) { clear(); append(IL); @@ -571,7 +586,10 @@ public: return I; } - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> iterator insert(iterator I, ItTy From, ItTy To) { // Convert iterator to elt# to avoid invalidating iterator when we reserve() size_t InsertElt = I - this->begin(); @@ -845,15 +863,17 @@ class SmallVector : public SmallVectorImpl { SmallVectorStorage Storage; public: - SmallVector() : SmallVectorImpl(N) { - } + SmallVector() : SmallVectorImpl(N) {} explicit SmallVector(size_t Size, const T &Value = T()) : SmallVectorImpl(N) { this->assign(Size, Value); } - template + template ::iterator_category, + std::input_iterator_tag>::value>::type> SmallVector(ItTy S, ItTy E) : SmallVectorImpl(N) { this->append(S, E); } @@ -883,16 +903,16 @@ public: SmallVectorImpl::operator=(::std::move(RHS)); } - const SmallVector &operator=(SmallVector &&RHS) { - SmallVectorImpl::operator=(::std::move(RHS)); - return *this; - } - SmallVector(SmallVectorImpl &&RHS) : SmallVectorImpl(N) { if (!RHS.empty()) SmallVectorImpl::operator=(::std::move(RHS)); } + const SmallVector &operator=(SmallVector &&RHS) { + SmallVectorImpl::operator=(::std::move(RHS)); + return *this; + } + const SmallVector &operator=(SmallVectorImpl &&RHS) { SmallVectorImpl::operator=(::std::move(RHS)); return *this; diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index a82cef6028f948764389970cf1191e5e653d248a..4cbf40c76805eeb8475c98b9e51d3a89a13980e8 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -1,4 +1,4 @@ -//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector -*- C++ -*- ===// +//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -41,8 +41,8 @@ namespace llvm { template struct SparseBitVectorElement { public: - typedef unsigned long BitWord; - typedef unsigned size_type; + using BitWord = unsigned long; + using size_type = unsigned; enum { BITWORD_SIZE = sizeof(BitWord) * CHAR_BIT, BITWORDS_PER_ELEMENT = (ElementSize + BITWORD_SIZE - 1) / BITWORD_SIZE, @@ -100,7 +100,7 @@ public: Bits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE); } - bool test_and_set (unsigned Idx) { + bool test_and_set(unsigned Idx) { bool old = test(Idx); if (!old) { set(Idx); @@ -254,9 +254,9 @@ public: template class SparseBitVector { - typedef std::list> ElementList; - typedef typename ElementList::iterator ElementListIter; - typedef typename ElementList::const_iterator ElementListConstIter; + using ElementList = std::list>; + using ElementListIter = typename ElementList::iterator; + using ElementListConstIter = typename ElementList::const_iterator; enum { BITWORD_SIZE = SparseBitVectorElement::BITWORD_SIZE }; @@ -421,14 +421,12 @@ class SparseBitVector { }; public: - typedef SparseBitVectorIterator iterator; + using iterator = SparseBitVectorIterator; SparseBitVector() { CurrElementIter = Elements.begin(); } - ~SparseBitVector() = default; - // SparseBitVector copy ctor. SparseBitVector(const SparseBitVector &RHS) { ElementListConstIter ElementIter = RHS.Elements.begin(); @@ -440,6 +438,8 @@ public: CurrElementIter = Elements.begin (); } + ~SparseBitVector() = default; + // Clear. void clear() { Elements.clear(); diff --git a/include/llvm/ADT/SparseMultiSet.h b/include/llvm/ADT/SparseMultiSet.h index 08da4b68ebaac8ab179ec98e8756e047b6d3c008..c91e0d70f65a3c19da8057a467d5a46ca2ceb2ed 100644 --- a/include/llvm/ADT/SparseMultiSet.h +++ b/include/llvm/ADT/SparseMultiSet.h @@ -1,4 +1,4 @@ -//===--- llvm/ADT/SparseMultiSet.h - Sparse multiset ------------*- C++ -*-===// +//===- llvm/ADT/SparseMultiSet.h - Sparse multiset --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,9 +21,9 @@ #ifndef LLVM_ADT_SPARSEMULTISET_H #define LLVM_ADT_SPARSEMULTISET_H +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/STLExtras.h" #include #include #include @@ -101,7 +101,7 @@ class SparseMultiSet { unsigned Prev; unsigned Next; - SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) { } + SMSNode(ValueT D, unsigned P, unsigned N) : Data(D), Prev(P), Next(N) {} /// List tails have invalid Nexts. bool isTail() const { @@ -118,8 +118,8 @@ class SparseMultiSet { bool isValid() const { return Prev != INVALID; } }; - typedef typename KeyFunctorT::argument_type KeyT; - typedef SmallVector DenseT; + using KeyT = typename KeyFunctorT::argument_type; + using DenseT = SmallVector; DenseT Dense; SparseT *Sparse = nullptr; unsigned Universe = 0; @@ -183,12 +183,12 @@ class SparseMultiSet { } public: - typedef ValueT value_type; - typedef ValueT &reference; - typedef const ValueT &const_reference; - typedef ValueT *pointer; - typedef const ValueT *const_pointer; - typedef unsigned size_type; + using value_type = ValueT; + using reference = ValueT &; + using const_reference = const ValueT &; + using pointer = ValueT *; + using const_pointer = const ValueT *; + using size_type = unsigned; SparseMultiSet() = default; SparseMultiSet(const SparseMultiSet &) = delete; @@ -227,7 +227,7 @@ public: unsigned SparseIdx; iterator_base(SMSPtrTy P, unsigned I, unsigned SI) - : SMS(P), Idx(I), SparseIdx(SI) { } + : SMS(P), Idx(I), SparseIdx(SI) {} /// Whether our iterator has fallen outside our dense vector. bool isEnd() const { @@ -248,11 +248,11 @@ public: void setNext(unsigned N) { SMS->Dense[Idx].Next = N; } public: - typedef std::iterator super; - typedef typename super::value_type value_type; - typedef typename super::difference_type difference_type; - typedef typename super::pointer pointer; - typedef typename super::reference reference; + using super = std::iterator; + using value_type = typename super::value_type; + using difference_type = typename super::difference_type; + using pointer = typename super::pointer; + using reference = typename super::reference; reference operator*() const { assert(isKeyed() && SMS->sparseIndex(SMS->Dense[Idx].Data) == SparseIdx && @@ -308,11 +308,12 @@ public: return I; } }; - typedef iterator_base iterator; - typedef iterator_base const_iterator; + + using iterator = iterator_base; + using const_iterator = iterator_base; // Convenience types - typedef std::pair RangePair; + using RangePair = std::pair; /// Returns an iterator past this container. Note that such an iterator cannot /// be decremented, but will compare equal to other end iterators. diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h index 00c18c743219a5f5eca8f1928a4a6443581f9731..25ade8831922f54eed38e604f7e98817cba0aaf4 100644 --- a/include/llvm/ADT/SparseSet.h +++ b/include/llvm/ADT/SparseSet.h @@ -1,4 +1,4 @@ -//===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===// +//===- llvm/ADT/SparseSet.h - Sparse set ------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -125,9 +125,9 @@ class SparseSet { !std::numeric_limits::is_signed, "SparseT must be an unsigned integer type"); - typedef typename KeyFunctorT::argument_type KeyT; - typedef SmallVector DenseT; - typedef unsigned size_type; + using KeyT = typename KeyFunctorT::argument_type; + using DenseT = SmallVector; + using size_type = unsigned; DenseT Dense; SparseT *Sparse = nullptr; unsigned Universe = 0; @@ -135,11 +135,11 @@ class SparseSet { SparseSetValFunctor ValIndexOf; public: - typedef ValueT value_type; - typedef ValueT &reference; - typedef const ValueT &const_reference; - typedef ValueT *pointer; - typedef const ValueT *const_pointer; + using value_type = ValueT; + using reference = ValueT &; + using const_reference = const ValueT &; + using pointer = ValueT *; + using const_pointer = const ValueT *; SparseSet() = default; SparseSet(const SparseSet &) = delete; @@ -168,8 +168,8 @@ public: } // Import trivial vector stuff from DenseT. - typedef typename DenseT::iterator iterator; - typedef typename DenseT::const_iterator const_iterator; + using iterator = typename DenseT::iterator; + using const_iterator = typename DenseT::const_iterator; const_iterator begin() const { return Dense.begin(); } const_iterator end() const { return Dense.end(); } diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h index 53fa2a50fcbafc519f15ca251aa2f1413f0f7fa5..d5ebba409c3d34dc4856f5b8d88812f5e1a34294 100644 --- a/include/llvm/ADT/Statistic.h +++ b/include/llvm/ADT/Statistic.h @@ -101,6 +101,16 @@ public: return init(); } + void updateMax(unsigned V) { + unsigned PrevMax = Value.load(std::memory_order_relaxed); + // Keep trying to update max until we succeed or another thread produces + // a bigger max than us. + while (V > PrevMax && !Value.compare_exchange_weak( + PrevMax, V, std::memory_order_relaxed)) { + } + init(); + } + #else // Statistics are disabled in release builds. const Statistic &operator=(unsigned Val) { @@ -131,6 +141,8 @@ public: return *this; } + void updateMax(unsigned V) {} + #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) protected: diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index 8214782bfe800e3a3c00605d5dfeecd0a0aeb6a5..ffcf998a3d3230900e94ba2e25e0f34a27b8ae97 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/StringExtras.h - Useful string functions -------*- C++ -*-===// +//===- llvm/ADT/StringExtras.h - Useful string functions --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,13 +14,20 @@ #ifndef LLVM_ADT_STRINGEXTRAS_H #define LLVM_ADT_STRINGEXTRAS_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/DataTypes.h" +#include +#include +#include +#include #include +#include +#include namespace llvm { -class raw_ostream; + template class SmallVectorImpl; +class raw_ostream; /// hexdigit - Return the hexadecimal character for the /// given number \p X (which should be less than 16). @@ -34,6 +41,11 @@ static inline StringRef toStringRef(bool B) { return StringRef(B ? "true" : "false"); } +/// Construct a string ref from an array ref of unsigned chars. +static inline StringRef toStringRef(ArrayRef Input) { + return StringRef(reinterpret_cast(Input.begin()), Input.size()); +} + /// Interpret the given character \p C as a hexadecimal digit and return its /// value. /// @@ -62,7 +74,7 @@ static inline std::string utohexstr(uint64_t X, bool LowerCase = false) { /// Convert buffer \p Input to its hexadecimal representation. /// The returned string is double the size of \p Input. -static inline std::string toHex(StringRef Input) { +inline std::string toHex(StringRef Input) { static const char *const LUT = "0123456789ABCDEF"; size_t Length = Input.size(); @@ -76,6 +88,47 @@ static inline std::string toHex(StringRef Input) { return Output; } +inline std::string toHex(ArrayRef Input) { + return toHex(toStringRef(Input)); +} + +static inline uint8_t hexFromNibbles(char MSB, char LSB) { + unsigned U1 = hexDigitValue(MSB); + unsigned U2 = hexDigitValue(LSB); + assert(U1 != -1U && U2 != -1U); + + return static_cast((U1 << 4) | U2); +} + +/// Convert hexadecimal string \p Input to its binary representation. +/// The return string is half the size of \p Input. +static inline std::string fromHex(StringRef Input) { + if (Input.empty()) + return std::string(); + + std::string Output; + Output.reserve((Input.size() + 1) / 2); + if (Input.size() % 2 == 1) { + Output.push_back(hexFromNibbles('0', Input.front())); + Input = Input.drop_front(); + } + + assert(Input.size() % 2 == 0); + while (!Input.empty()) { + uint8_t Hex = hexFromNibbles(Input[0], Input[1]); + Output.push_back(Hex); + Input = Input.drop_front(2); + } + return Output; +} + +/// \brief Convert the string \p S to an integer of the specified type using +/// the radix \p Base. If \p Base is 0, auto-detects the radix. +/// Returns true if the number was successfully converted, false otherwise. +template bool to_integer(StringRef S, N &Num, unsigned Base = 0) { + return !S.getAsInteger(Base, Num); +} + static inline std::string utostr(uint64_t X, bool isNeg = false) { char Buffer[21]; char *BufPtr = std::end(Buffer); @@ -91,7 +144,6 @@ static inline std::string utostr(uint64_t X, bool isNeg = false) { return std::string(BufPtr, std::end(Buffer)); } - static inline std::string itostr(int64_t X) { if (X < 0) return utostr(static_cast(-X), true); @@ -224,13 +276,14 @@ template inline size_t join_items_size(const A1 &A, Args &&... Items) { return join_one_item_size(A) + join_items_size(std::forward(Items)...); } -} + +} // end namespace detail /// Joins the strings in the range [Begin, End), adding Separator between /// the elements. template inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) { - typedef typename std::iterator_traits::iterator_category tag; + using tag = typename std::iterator_traits::iterator_category; return detail::join_impl(Begin, End, Separator, tag()); } @@ -258,6 +311,6 @@ inline std::string join_items(Sep Separator, Args &&... Items) { return Result; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_STRINGEXTRAS_H diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index c36fda7d690652b4f513aa9a253c07a9afc0daf2..d573148665a1ae39244efaec17bf0443700c0c30 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -1,4 +1,4 @@ -//===--- StringMap.h - String Hash table map interface ----------*- C++ -*-===// +//===- StringMap.h - String Hash table map interface ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,25 +16,23 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include #include #include #include #include #include -#include +#include #include namespace llvm { - template - class StringMapConstIterator; - template - class StringMapIterator; - template class StringMapKeyIterator; - template - class StringMapEntry; +template class StringMapConstIterator; +template class StringMapIterator; +template class StringMapKeyIterator; /// StringMapEntryBase - Shared base class of StringMapEntry instances. class StringMapEntryBase { @@ -53,17 +51,15 @@ protected: // Array of NumBuckets pointers to entries, null pointers are holes. // TheTable[NumBuckets] contains a sentinel value for easy iteration. Followed // by an array of the actual hash values as unsigned integers. - StringMapEntryBase **TheTable; - unsigned NumBuckets; - unsigned NumItems; - unsigned NumTombstones; + StringMapEntryBase **TheTable = nullptr; + unsigned NumBuckets = 0; + unsigned NumItems = 0; + unsigned NumTombstones = 0; unsigned ItemSize; protected: explicit StringMapImpl(unsigned itemSize) - : TheTable(nullptr), - // Initialize the map with zero buckets to allocation. - NumBuckets(0), NumItems(0), NumTombstones(0), ItemSize(itemSize) {} + : ItemSize(itemSize) {} StringMapImpl(StringMapImpl &&RHS) : TheTable(RHS.TheTable), NumBuckets(RHS.NumBuckets), NumItems(RHS.NumItems), NumTombstones(RHS.NumTombstones), @@ -225,9 +221,10 @@ class StringMap : public StringMapImpl { AllocatorTy Allocator; public: - typedef StringMapEntry MapEntryTy; + using MapEntryTy = StringMapEntry; StringMap() : StringMapImpl(static_cast(sizeof(MapEntryTy))) {} + explicit StringMap(unsigned InitialSize) : StringMapImpl(InitialSize, static_cast(sizeof(MapEntryTy))) {} @@ -248,12 +245,6 @@ public: StringMap(StringMap &&RHS) : StringMapImpl(std::move(RHS)), Allocator(std::move(RHS.Allocator)) {} - StringMap &operator=(StringMap RHS) { - StringMapImpl::swap(RHS); - std::swap(Allocator, RHS.Allocator); - return *this; - } - StringMap(const StringMap &RHS) : StringMapImpl(static_cast(sizeof(MapEntryTy))), Allocator(RHS.Allocator) { @@ -289,16 +280,37 @@ public: // not worthwhile. } + StringMap &operator=(StringMap RHS) { + StringMapImpl::swap(RHS); + std::swap(Allocator, RHS.Allocator); + return *this; + } + + ~StringMap() { + // Delete all the elements in the map, but don't reset the elements + // to default values. This is a copy of clear(), but avoids unnecessary + // work not required in the destructor. + if (!empty()) { + for (unsigned I = 0, E = NumBuckets; I != E; ++I) { + StringMapEntryBase *Bucket = TheTable[I]; + if (Bucket && Bucket != getTombstoneVal()) { + static_cast(Bucket)->Destroy(Allocator); + } + } + } + free(TheTable); + } + AllocatorTy &getAllocator() { return Allocator; } const AllocatorTy &getAllocator() const { return Allocator; } - typedef const char* key_type; - typedef ValueTy mapped_type; - typedef StringMapEntry value_type; - typedef size_t size_type; + using key_type = const char*; + using mapped_type = ValueTy; + using value_type = StringMapEntry; + using size_type = size_t; - typedef StringMapConstIterator const_iterator; - typedef StringMapIterator iterator; + using const_iterator = StringMapConstIterator; + using iterator = StringMapIterator; iterator begin() { return iterator(TheTable, NumBuckets == 0); @@ -313,7 +325,7 @@ public: return const_iterator(TheTable+NumBuckets, true); } - llvm::iterator_range> keys() const { + iterator_range> keys() const { return make_range(StringMapKeyIterator(begin()), StringMapKeyIterator(end())); } @@ -433,21 +445,6 @@ public: erase(I); return true; } - - ~StringMap() { - // Delete all the elements in the map, but don't reset the elements - // to default values. This is a copy of clear(), but avoids unnecessary - // work not required in the destructor. - if (!empty()) { - for (unsigned I = 0, E = NumBuckets; I != E; ++I) { - StringMapEntryBase *Bucket = TheTable[I]; - if (Bucket && Bucket != getTombstoneVal()) { - static_cast(Bucket)->Destroy(Allocator); - } - } - } - free(TheTable); - } }; template @@ -542,7 +539,6 @@ class StringMapKeyIterator public: StringMapKeyIterator() = default; - explicit StringMapKeyIterator(StringMapConstIterator Iter) : base(std::move(Iter)) {} diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index ce48f6d3bad32b78473c8645de06788ba60988b0..f6c93a858db1de8ed44c32c0cb4ebb666289598a 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -1,4 +1,4 @@ -//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// +//===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,16 +15,18 @@ #include "llvm/Support/Compiler.h" #include #include +#include #include #include #include +#include #include namespace llvm { - template - class SmallVectorImpl; + class APInt; class hash_code; + template class SmallVectorImpl; class StringRef; /// Helper functions for StringRef::getAsInteger. @@ -46,10 +48,11 @@ namespace llvm { /// general safe to store a StringRef. class StringRef { public: - typedef const char *iterator; - typedef const char *const_iterator; static const size_t npos = ~size_t(0); - typedef size_t size_type; + + using iterator = const char *; + using const_iterator = const char *; + using size_type = size_t; private: /// The start of the string, in an external buffer. @@ -906,6 +909,7 @@ namespace llvm { // StringRefs can be treated like a POD type. template struct isPodLike; template <> struct isPodLike { static const bool value = true; }; -} -#endif +} // end namespace llvm + +#endif // LLVM_ADT_STRINGREF_H diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index c32c2a49743853e92371eef3229ff27d20ec4136..9af44c07df795c7c5dd0032dae95540efd7c34ab 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -1,4 +1,4 @@ -//===--- StringSet.h - The LLVM Compiler Driver -----------------*- C++ -*-===// +//===- StringSet.h - The LLVM Compiler Driver -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,13 +15,19 @@ #define LLVM_ADT_STRINGSET_H #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include +#include +#include namespace llvm { /// StringSet - A wrapper for StringMap that provides set-like functionality. - template - class StringSet : public llvm::StringMap { - typedef llvm::StringMap base; + template + class StringSet : public StringMap { + using base = StringMap; + public: StringSet() = default; StringSet(std::initializer_list S) { @@ -40,6 +46,7 @@ namespace llvm { base::insert(std::make_pair(*It, '\0')); } }; -} + +} // end namespace llvm #endif // LLVM_ADT_STRINGSET_H diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h index ca43b6046193dd8e0d545f3d1a73acfd4352b441..79740713f75b03ae90b9ea4531c8e993b0e34978 100644 --- a/include/llvm/ADT/TinyPtrVector.h +++ b/include/llvm/ADT/TinyPtrVector.h @@ -30,9 +30,9 @@ namespace llvm { template class TinyPtrVector { public: - typedef SmallVector VecTy; - typedef typename VecTy::value_type value_type; - typedef PointerUnion PtrUnion; + using VecTy = SmallVector; + using value_type = typename VecTy::value_type; + using PtrUnion = PointerUnion; private: PtrUnion Val; @@ -167,10 +167,10 @@ public: return Val.template get()->size(); } - typedef EltTy *iterator; - typedef const EltTy *const_iterator; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; + using iterator = EltTy *; + using const_iterator = const EltTy *; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; iterator begin() { if (Val.template is()) diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index e271075b7e2add9e6c17b86e51a8c55eebc05711..26a991812a3a5a0ac2d1d9ae6c3714ee93cabd9d 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -59,6 +59,7 @@ public: mips64, // MIPS64: mips64 mips64el, // MIPS64EL: mips64el msp430, // MSP430: msp430 + nios2, // NIOSII: nios2 ppc, // PPC: powerpc ppc64, // PPC64: powerpc64, ppu ppc64le, // PPC64LE: powerpc64le @@ -140,7 +141,8 @@ public: Myriad, AMD, Mesa, - LastVendorType = Mesa + SUSE, + LastVendorType = SUSE }; enum OSType { UnknownOS, @@ -237,7 +239,9 @@ public: /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. - Triple() : Data(), Arch(), Vendor(), OS(), Environment(), ObjectFormat() {} + Triple() + : Data(), Arch(), SubArch(), Vendor(), OS(), Environment(), + ObjectFormat() {} explicit Triple(const Twine &Str); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); @@ -251,6 +255,10 @@ public: ObjectFormat == Other.ObjectFormat; } + bool operator!=(const Triple &Other) const { + return !(*this == Other); + } + /// @} /// @name Normalization /// @{ @@ -721,6 +729,12 @@ public: /// \returns true if the triple is little endian, false otherwise. bool isLittleEndian() const; + /// Test whether target triples are compatible. + bool isCompatibleWith(const Triple &Other) const; + + /// Merge target triples. + std::string merge(const Triple &Other) const; + /// @} /// @name Static helpers for IDs. /// @{ diff --git a/include/llvm/ADT/UniqueVector.h b/include/llvm/ADT/UniqueVector.h index e1ab4b56023f8eeb47bd40524a57f6a092480f71..b17fb2392bafc84f4a8bc81a1227fb88d5f9176b 100644 --- a/include/llvm/ADT/UniqueVector.h +++ b/include/llvm/ADT/UniqueVector.h @@ -1,4 +1,4 @@ -//===-- llvm/ADT/UniqueVector.h ---------------------------------*- C++ -*-===// +//===- llvm/ADT/UniqueVector.h ----------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -24,16 +24,15 @@ namespace llvm { /// Entries can be fetched using operator[] with the entry ID. template class UniqueVector { public: - typedef typename std::vector VectorType; - typedef typename VectorType::iterator iterator; - typedef typename VectorType::const_iterator const_iterator; + using VectorType = typename std::vector; + using iterator = typename VectorType::iterator; + using const_iterator = typename VectorType::const_iterator; private: // Map - Used to handle the correspondence of entry to ID. std::map Map; // Vector - ID ordered vector of entries. Entries can be indexed by ID - 1. - // VectorType Vector; public: @@ -68,7 +67,6 @@ public: } /// operator[] - Returns a reference to the entry with the specified ID. - /// const T &operator[](unsigned ID) const { assert(ID-1 < size() && "ID is 0 or out of range!"); return Vector[ID - 1]; @@ -87,21 +85,18 @@ public: const_iterator end() const { return Vector.end(); } /// size - Returns the number of entries in the vector. - /// size_t size() const { return Vector.size(); } /// empty - Returns true if the vector is empty. - /// bool empty() const { return Vector.empty(); } /// reset - Clears all the entries. - /// void reset() { Map.clear(); Vector.resize(0, 0); } }; -} // End of namespace llvm +} // end namespace llvm #endif // LLVM_ADT_UNIQUEVECTOR_H diff --git a/include/llvm/ADT/ilist_base.h b/include/llvm/ADT/ilist_base.h index 1ffc864bea2f36511a64984f2987a80489a92938..3d818a48d41d455832d49fe49ce90310c1dfb68f 100644 --- a/include/llvm/ADT/ilist_base.h +++ b/include/llvm/ADT/ilist_base.h @@ -1,4 +1,4 @@ -//===- llvm/ADT/ilist_base.h - Intrusive List Base ---------------*- C++ -*-==// +//===- llvm/ADT/ilist_base.h - Intrusive List Base --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,15 +12,13 @@ #include "llvm/ADT/ilist_node_base.h" #include -#include -#include namespace llvm { /// Implementations of list algorithms using ilist_node_base. template class ilist_base { public: - typedef ilist_node_base node_base_type; + using node_base_type = ilist_node_base; static void insertBeforeImpl(node_base_type &Next, node_base_type &N) { node_base_type &Prev = *Next.getPrev(); diff --git a/include/llvm/ADT/ilist_iterator.h b/include/llvm/ADT/ilist_iterator.h index c848d1a134f19cf04204476d4cbd858583c73ed7..671e644e0154283717756c52c598b83cfac3a78f 100644 --- a/include/llvm/ADT/ilist_iterator.h +++ b/include/llvm/ADT/ilist_iterator.h @@ -1,4 +1,4 @@ -//===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator -------*- C++ -*-==// +//===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,28 +23,30 @@ namespace ilist_detail { /// Find const-correct node types. template struct IteratorTraits; template struct IteratorTraits { - typedef typename OptionsT::value_type value_type; - typedef typename OptionsT::pointer pointer; - typedef typename OptionsT::reference reference; - typedef ilist_node_impl *node_pointer; - typedef ilist_node_impl &node_reference; + using value_type = typename OptionsT::value_type; + using pointer = typename OptionsT::pointer; + using reference = typename OptionsT::reference; + using node_pointer = ilist_node_impl *; + using node_reference = ilist_node_impl &; }; template struct IteratorTraits { - typedef const typename OptionsT::value_type value_type; - typedef typename OptionsT::const_pointer pointer; - typedef typename OptionsT::const_reference reference; - typedef const ilist_node_impl *node_pointer; - typedef const ilist_node_impl &node_reference; + using value_type = const typename OptionsT::value_type; + using pointer = typename OptionsT::const_pointer; + using reference = typename OptionsT::const_reference; + using node_pointer = const ilist_node_impl *; + using node_reference = const ilist_node_impl &; }; template struct IteratorHelper; template <> struct IteratorHelper : ilist_detail::NodeAccess { - typedef ilist_detail::NodeAccess Access; + using Access = ilist_detail::NodeAccess; + template static void increment(T *&I) { I = Access::getNext(*I); } template static void decrement(T *&I) { I = Access::getPrev(*I); } }; template <> struct IteratorHelper : ilist_detail::NodeAccess { - typedef ilist_detail::NodeAccess Access; + using Access = ilist_detail::NodeAccess; + template static void increment(T *&I) { I = Access::getPrev(*I); } template static void decrement(T *&I) { I = Access::getNext(*I); } }; @@ -58,24 +60,23 @@ class ilist_iterator : ilist_detail::SpecificNodeAccess { friend ilist_iterator; friend ilist_iterator; - typedef ilist_detail::IteratorTraits Traits; - typedef ilist_detail::SpecificNodeAccess Access; + using Traits = ilist_detail::IteratorTraits; + using Access = ilist_detail::SpecificNodeAccess; public: - typedef typename Traits::value_type value_type; - typedef typename Traits::pointer pointer; - typedef typename Traits::reference reference; - typedef ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - - typedef typename OptionsT::const_pointer const_pointer; - typedef typename OptionsT::const_reference const_reference; + using value_type = typename Traits::value_type; + using pointer = typename Traits::pointer; + using reference = typename Traits::reference; + using difference_type = ptrdiff_t; + using iterator_category = std::bidirectional_iterator_tag; + using const_pointer = typename OptionsT::const_pointer; + using const_reference = typename OptionsT::const_reference; private: - typedef typename Traits::node_pointer node_pointer; - typedef typename Traits::node_reference node_reference; + using node_pointer = typename Traits::node_pointer; + using node_reference = typename Traits::node_reference; - node_pointer NodePtr; + node_pointer NodePtr = nullptr; public: /// Create from an ilist_node. @@ -83,7 +84,7 @@ public: explicit ilist_iterator(pointer NP) : NodePtr(Access::getNodePtr(NP)) {} explicit ilist_iterator(reference NR) : NodePtr(Access::getNodePtr(&NR)) {} - ilist_iterator() : NodePtr(nullptr) {} + ilist_iterator() = default; // This is templated so that we can allow constructing a const iterator from // a nonconst iterator... @@ -184,8 +185,8 @@ template struct simplify_type; /// FIXME: remove this, since there is no implicit conversion to NodeTy. template struct simplify_type> { - typedef ilist_iterator iterator; - typedef typename iterator::pointer SimpleType; + using iterator = ilist_iterator; + using SimpleType = typename iterator::pointer; static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; } }; diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h index 7244d0f4058602116b6c5524defba09a5e9bf1cc..3362611697cb0e92bb3fa4bcc1275f8f06749f5b 100644 --- a/include/llvm/ADT/ilist_node.h +++ b/include/llvm/ADT/ilist_node.h @@ -1,4 +1,4 @@ -//==-- llvm/ADT/ilist_node.h - Intrusive Linked List Helper ------*- C++ -*-==// +//===- llvm/ADT/ilist_node.h - Intrusive Linked List Helper -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,11 +21,10 @@ namespace llvm { namespace ilist_detail { + struct NodeAccess; -} // end namespace ilist_detail -template -struct ilist_traits; +} // end namespace ilist_detail template class ilist_iterator; template class ilist_sentinel; @@ -39,9 +38,9 @@ template class ilist_sentinel; /// provide type safety: you can't insert nodes of \a ilist_node_impl into the /// wrong \a simple_ilist or \a iplist. template class ilist_node_impl : OptionsT::node_base_type { - typedef typename OptionsT::value_type value_type; - typedef typename OptionsT::node_base_type node_base_type; - typedef typename OptionsT::list_base_type list_base_type; + using value_type = typename OptionsT::value_type; + using node_base_type = typename OptionsT::node_base_type; + using list_base_type = typename OptionsT::list_base_type; friend typename OptionsT::list_base_type; friend struct ilist_detail::NodeAccess; @@ -52,17 +51,18 @@ template class ilist_node_impl : OptionsT::node_base_type { friend class ilist_iterator; protected: - ilist_node_impl() = default; + using self_iterator = ilist_iterator; + using const_self_iterator = ilist_iterator; + using reverse_self_iterator = ilist_iterator; + using const_reverse_self_iterator = ilist_iterator; - typedef ilist_iterator self_iterator; - typedef ilist_iterator const_self_iterator; - typedef ilist_iterator reverse_self_iterator; - typedef ilist_iterator const_reverse_self_iterator; + ilist_node_impl() = default; private: ilist_node_impl *getPrev() { return static_cast(node_base_type::getPrev()); } + ilist_node_impl *getNext() { return static_cast(node_base_type::getNext()); } @@ -70,6 +70,7 @@ private: const ilist_node_impl *getPrev() const { return static_cast(node_base_type::getPrev()); } + const ilist_node_impl *getNext() const { return static_cast(node_base_type::getNext()); } @@ -80,9 +81,11 @@ private: public: self_iterator getIterator() { return self_iterator(*this); } const_self_iterator getIterator() const { return const_self_iterator(*this); } + reverse_self_iterator getReverseIterator() { return reverse_self_iterator(*this); } + const_reverse_self_iterator getReverseIterator() const { return const_reverse_self_iterator(*this); } @@ -151,6 +154,7 @@ class ilist_node }; namespace ilist_detail { + /// An access class for ilist_node private API. /// /// This gives access to the private parts of ilist nodes. Nodes for an ilist @@ -163,15 +167,18 @@ protected: static ilist_node_impl *getNodePtr(typename OptionsT::pointer N) { return N; } + template static const ilist_node_impl * getNodePtr(typename OptionsT::const_pointer N) { return N; } + template static typename OptionsT::pointer getValuePtr(ilist_node_impl *N) { return static_cast(N); } + template static typename OptionsT::const_pointer getValuePtr(const ilist_node_impl *N) { @@ -182,15 +189,18 @@ protected: static ilist_node_impl *getPrev(ilist_node_impl &N) { return N.getPrev(); } + template static ilist_node_impl *getNext(ilist_node_impl &N) { return N.getNext(); } + template static const ilist_node_impl * getPrev(const ilist_node_impl &N) { return N.getPrev(); } + template static const ilist_node_impl * getNext(const ilist_node_impl &N) { @@ -200,23 +210,27 @@ protected: template struct SpecificNodeAccess : NodeAccess { protected: - typedef typename OptionsT::pointer pointer; - typedef typename OptionsT::const_pointer const_pointer; - typedef ilist_node_impl node_type; + using pointer = typename OptionsT::pointer; + using const_pointer = typename OptionsT::const_pointer; + using node_type = ilist_node_impl; static node_type *getNodePtr(pointer N) { return NodeAccess::getNodePtr(N); } + static const node_type *getNodePtr(const_pointer N) { return NodeAccess::getNodePtr(N); } + static pointer getValuePtr(node_type *N) { return NodeAccess::getValuePtr(N); } + static const_pointer getValuePtr(const node_type *N) { return NodeAccess::getValuePtr(N); } }; + } // end namespace ilist_detail template @@ -265,6 +279,7 @@ public: getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr)); return List.getPrevNode(*static_cast(this)); } + /// \brief Get the previous node, or \c nullptr for the list head. const NodeTy *getPrevNode() const { return const_cast(this)->getPrevNode(); @@ -278,6 +293,7 @@ public: getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr)); return List.getNextNode(*static_cast(this)); } + /// \brief Get the next node, or \c nullptr for the list tail. const NodeTy *getNextNode() const { return const_cast(this)->getNextNode(); @@ -285,6 +301,6 @@ public: /// @} }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_ADT_ILIST_NODE_H diff --git a/include/llvm/ADT/iterator.h b/include/llvm/ADT/iterator.h index 28dcdf9613ef2479885f344f53f63bed18640825..15720a67c047b6b1ead7bf8421f5f24e3a5b2f1a 100644 --- a/include/llvm/ADT/iterator.h +++ b/include/llvm/ADT/iterator.h @@ -11,9 +11,11 @@ #define LLVM_ADT_ITERATOR_H #include "llvm/ADT/iterator_range.h" +#include #include #include #include +#include namespace llvm { @@ -206,7 +208,7 @@ template < class iterator_adaptor_base : public iterator_facade_base { - typedef typename iterator_adaptor_base::iterator_facade_base BaseT; + using BaseT = typename iterator_adaptor_base::iterator_facade_base; protected: WrappedIteratorT I; @@ -221,7 +223,7 @@ protected: const WrappedIteratorT &wrapped() const { return I; } public: - typedef DifferenceTypeT difference_type; + using difference_type = DifferenceTypeT; DerivedT &operator+=(difference_type n) { static_assert( @@ -279,7 +281,7 @@ public: /// which is implemented with some iterator over T*s: /// /// \code -/// typedef pointee_iterator::iterator> iterator; +/// using iterator = pointee_iterator::iterator>; /// \endcode template #include +#include namespace llvm { diff --git a/include/llvm/ADT/simple_ilist.h b/include/llvm/ADT/simple_ilist.h index a1ab59170840f85f9971a6104cf12a51a9d5541a..4c7598a1acb4ed744cf0abd1e8fc3ff146d57a65 100644 --- a/include/llvm/ADT/simple_ilist.h +++ b/include/llvm/ADT/simple_ilist.h @@ -13,9 +13,14 @@ #include "llvm/ADT/ilist_base.h" #include "llvm/ADT/ilist_iterator.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/ilist_node_options.h" +#include "llvm/Support/Compiler.h" #include #include #include +#include +#include +#include namespace llvm { @@ -77,23 +82,23 @@ class simple_ilist typename ilist_detail::compute_node_options::type> { static_assert(ilist_detail::check_options::value, "Unrecognized node option!"); - typedef - typename ilist_detail::compute_node_options::type OptionsT; - typedef typename OptionsT::list_base_type list_base_type; + using OptionsT = + typename ilist_detail::compute_node_options::type; + using list_base_type = typename OptionsT::list_base_type; ilist_sentinel Sentinel; public: - typedef typename OptionsT::value_type value_type; - typedef typename OptionsT::pointer pointer; - typedef typename OptionsT::reference reference; - typedef typename OptionsT::const_pointer const_pointer; - typedef typename OptionsT::const_reference const_reference; - typedef ilist_iterator iterator; - typedef ilist_iterator const_iterator; - typedef ilist_iterator reverse_iterator; - typedef ilist_iterator const_reverse_iterator; - typedef size_t size_type; - typedef ptrdiff_t difference_type; + using value_type = typename OptionsT::value_type; + using pointer = typename OptionsT::pointer; + using reference = typename OptionsT::reference; + using const_pointer = typename OptionsT::const_pointer; + using const_reference = typename OptionsT::const_reference; + using iterator = ilist_iterator; + using const_iterator = ilist_iterator; + using reverse_iterator = ilist_iterator; + using const_reverse_iterator = ilist_iterator; + using size_type = size_t; + using difference_type = ptrdiff_t; simple_ilist() = default; ~simple_ilist() = default; diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 1b8b9751faa19cba5e22f104166ca5722908ac18..e00ae4f3beecec2bf18eb3f7b863b333bd50f026 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -38,11 +38,11 @@ #ifndef LLVM_ANALYSIS_ALIASANALYSIS_H #define LLVM_ANALYSIS_ALIASANALYSIS_H +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PassManager.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { class BasicAAResult; diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index f833f417c7dd2e6e9ad473a316c6932fe56ceda6..58d72afdc1b6fc96de7c57d0e115ec2405e73ffa 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -21,8 +21,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include @@ -43,7 +43,7 @@ class AssumptionCache { /// \brief Vector of weak value handles to calls of the @llvm.assume /// intrinsic. - SmallVector AssumeHandles; + SmallVector AssumeHandles; class AffectedValueCallbackVH final : public CallbackVH { AssumptionCache *AC; @@ -62,12 +62,12 @@ class AssumptionCache { /// \brief A map of values about which an assumption might be providing /// information to the relevant set of assumptions. using AffectedValuesMap = - DenseMap, - AffectedValueCallbackVH::DMI>; + DenseMap, + AffectedValueCallbackVH::DMI>; AffectedValuesMap AffectedValues; /// Get the vector of assumptions which affect a value from the cache. - SmallVector &getOrInsertAffectedValues(Value *V); + SmallVector &getOrInsertAffectedValues(Value *V); /// Copy affected values in the cache for OV to be affected values for NV. void copyAffectedValuesInCache(Value *OV, Value *NV); @@ -120,20 +120,20 @@ public: /// FIXME: We should replace this with pointee_iterator> /// when we can write that to filter out the null values. Then caller code /// will become simpler. - MutableArrayRef assumptions() { + MutableArrayRef assumptions() { if (!Scanned) scanFunction(); return AssumeHandles; } /// \brief Access the list of assumptions which affect this value. - MutableArrayRef assumptionsFor(const Value *V) { + MutableArrayRef assumptionsFor(const Value *V) { if (!Scanned) scanFunction(); auto AVI = AffectedValues.find_as(const_cast(V)); if (AVI == AffectedValues.end()) - return MutableArrayRef(); + return MutableArrayRef(); return AVI->second; } diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index e3d81fea49ea1ed7efae65d1d9f4a9e46a4b9bfe..3e05e09900a5fe684adf249ddda892ab8d4e7797 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1164,9 +1164,8 @@ template struct BlockEdgesAdder { void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr, const LoopData *OuterLoop) { const BlockT *BB = BFI.RPOT[Irr.Node.Index]; - for (auto I = Successor::child_begin(BB), E = Successor::child_end(BB); - I != E; ++I) - G.addEdge(Irr, BFI.getNode(*I), OuterLoop); + for (const auto Succ : children(BB)) + G.addEdge(Irr, BFI.getNode(Succ), OuterLoop); } }; } @@ -1210,10 +1209,9 @@ BlockFrequencyInfoImpl::propagateMassToSuccessors(LoopData *OuterLoop, return false; } else { const BlockT *BB = getBlock(Node); - for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB); - SI != SE; ++SI) - if (!addToDist(Dist, OuterLoop, Node, getNode(*SI), - getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI)))) + for (const auto Succ : children(BB)) + if (!addToDist(Dist, OuterLoop, Node, getNode(Succ), + getWeightFromBranchProb(BPI->getEdgeProbability(BB, Succ)))) // Irreducible backedge. return false; } diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h index 6a876679543d4026ab4b163957bb6aaf037d944b..94d3d4de6c9dd2a6b733b5ad8ca7e4f7005fa713 100644 --- a/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/include/llvm/Analysis/BranchProbabilityInfo.h @@ -26,6 +26,7 @@ namespace llvm { class LoopInfo; +class TargetLibraryInfo; class raw_ostream; /// \brief Analysis providing branch probability information. @@ -43,8 +44,9 @@ class raw_ostream; class BranchProbabilityInfo { public: BranchProbabilityInfo() {} - BranchProbabilityInfo(const Function &F, const LoopInfo &LI) { - calculate(F, LI); + BranchProbabilityInfo(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr) { + calculate(F, LI, TLI); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) @@ -116,7 +118,8 @@ public: return IsLikely ? LikelyProb : LikelyProb.getCompl(); } - void calculate(const Function &F, const LoopInfo &LI); + void calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI = nullptr); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); @@ -171,7 +174,7 @@ private: bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI); - bool calcZeroHeuristics(const BasicBlock *BB); + bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); bool calcInvokeHeuristics(const BasicBlock *BB); }; diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index 398bbfb0c4132752e478899c4ab5d311eee66a0d..a15a9e18c8153e0eb56d7038817c2c5cda3a256e 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -646,7 +646,7 @@ public: LazyCallGraph::SCC *C = &InitialC; // Collect value handles for all of the indirect call sites. - SmallVector CallHandles; + SmallVector CallHandles; // Struct to track the counts of direct and indirect calls in each function // of the SCC. @@ -658,7 +658,7 @@ public: // Put value handles on all of the indirect calls and return the number of // direct calls for each function in the SCC. auto ScanSCC = [](LazyCallGraph::SCC &C, - SmallVectorImpl &CallHandles) { + SmallVectorImpl &CallHandles) { assert(CallHandles.empty() && "Must start with a clear set of handles."); SmallVector CallCounts; @@ -671,7 +671,7 @@ public: ++Count.Direct; } else { ++Count.Indirect; - CallHandles.push_back(WeakVH(&I)); + CallHandles.push_back(WeakTrackingVH(&I)); } } } @@ -699,7 +699,7 @@ public: "Cannot have changed the size of the SCC!"); // Check whether any of the handles were devirtualized. - auto IsDevirtualizedHandle = [&](WeakVH &CallH) { + auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { if (!CallH) return false; auto CS = CallSite(CallH); diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h index ea85436ee580e49a0559b8766921ea40fdcfad10..01469a25c96cfea194336435f61cbbb00144441e 100644 --- a/include/llvm/Analysis/CallGraph.h +++ b/include/llvm/Analysis/CallGraph.h @@ -41,12 +41,6 @@ /// of all of the caller-callee relationships, which is useful for /// transformations. /// -/// The CallGraph class also attempts to figure out what the root of the -/// CallGraph is, which it currently does by looking for a function named -/// 'main'. If no function named 'main' is found, the external node is used as -/// the entry node, reflecting the fact that any function without internal -/// linkage could be called into (which is common for libraries). -/// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_CALLGRAPH_H @@ -82,10 +76,6 @@ class CallGraph { /// \brief A map from \c Function* to \c CallGraphNode*. FunctionMapTy FunctionMap; - /// \brief Root is root of the call graph, or the external node if a 'main' - /// function couldn't be found. - CallGraphNode *Root; - /// \brief This node has edges to all external functions and those internal /// functions that have their address taken. CallGraphNode *ExternalCallingNode; @@ -172,7 +162,7 @@ class CallGraphNode { public: /// \brief A pair of the calling instruction (a call or invoke) /// and the call graph node being called. - typedef std::pair CallRecord; + typedef std::pair CallRecord; public: typedef std::vector CalledFunctionsVector; diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index ff6ca1959153abbcf90febe6b3c1d2c88fd0f154..42034741b8e3c5676aace405c1a8c8e03572390d 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -31,6 +31,7 @@ class DataLayout; class Function; class GlobalValue; class Instruction; +class ImmutableCallSite; class TargetLibraryInfo; class Type; @@ -125,11 +126,12 @@ Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. -bool canConstantFoldCallTo(const Function *F); +bool canConstantFoldCallTo(ImmutableCallSite CS, const Function *F); /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. -Constant *ConstantFoldCall(Function *F, ArrayRef Operands, +Constant *ConstantFoldCall(ImmutableCallSite CS, Function *F, + ArrayRef Operands, const TargetLibraryInfo *TLI = nullptr); /// \brief Check whether the given call has no side-effects. diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h index c603274a7161b8e7929d3b3f57a61cb40673c796..e52c66f361c3d301593b74a17cb6d21c4679959b 100644 --- a/include/llvm/Analysis/DemandedBits.h +++ b/include/llvm/Analysis/DemandedBits.h @@ -22,11 +22,11 @@ #ifndef LLVM_ANALYSIS_DEMANDED_BITS_H #define LLVM_ANALYSIS_DEMANDED_BITS_H -#include "llvm/Pass.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" namespace llvm { @@ -35,6 +35,7 @@ class Function; class Instruction; class DominatorTree; class AssumptionCache; +struct KnownBits; class DemandedBits { public: @@ -58,8 +59,7 @@ private: void determineLiveOperandBits(const Instruction *UserI, const Instruction *I, unsigned OperandNo, const APInt &AOut, APInt &AB, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2); + KnownBits &Known, KnownBits &Known2); bool Analyzed; diff --git a/include/llvm/Analysis/DominanceFrontierImpl.h b/include/llvm/Analysis/DominanceFrontierImpl.h index 629ae3809045720ed4908715a9fcf232eb1c8e3c..9f8cacc24f2ce69c5c63fa3d59a68d202b79034d 100644 --- a/include/llvm/Analysis/DominanceFrontierImpl.h +++ b/include/llvm/Analysis/DominanceFrontierImpl.h @@ -174,12 +174,10 @@ ForwardDominanceFrontierBase::calculate(const DomTreeT &DT, // Visit each block only once. if (visited.insert(currentBB).second) { // Loop over CFG successors to calculate DFlocal[currentNode] - for (auto SI = BlockTraits::child_begin(currentBB), - SE = BlockTraits::child_end(currentBB); - SI != SE; ++SI) { + for (const auto Succ : children(currentBB)) { // Does Node immediately dominate this successor? - if (DT[*SI]->getIDom() != currentNode) - S.insert(*SI); + if (DT[Succ]->getIDom() != currentNode) + S.insert(Succ); } } diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index bb572dd5603b4f12ebe30a9a67701c578676a13a..035b974c5c1dec9b203931544e78f0a331e4646e 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -80,7 +80,7 @@ private: /// OperandValToReplace - The Value of the operand in the user instruction /// that this IVStrideUse is representing. - WeakVH OperandValToReplace; + WeakTrackingVH OperandValToReplace; /// PostIncLoops - The set of loops for which Expr has been adjusted to /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept. diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index 17e5cb6db02da3f1dd72f29337561e172eb310de..ce0b7895f253c8259f9a2c8fc7973057fe722dc4 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -14,8 +14,8 @@ #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H -#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include #include @@ -160,6 +160,10 @@ InlineParams getInlineParams(int Threshold); /// the -Oz flag. InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel); +/// Return the cost associated with a callsite, including paramater passing +/// and the call/return instruction. +int getCallsiteCost(CallSite CS, const DataLayout &DL); + /// \brief Get an InlineCost object representing the cost of inlining this /// callsite. /// diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index b829e995db055fff064d57ea6e60e7d1e83f5c26..be0f32ef444a490c5abd26263699d854b7d156ea 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -35,301 +35,210 @@ #include "llvm/IR/User.h" namespace llvm { - template - class ArrayRef; - class AssumptionCache; - class DominatorTree; - class Instruction; - class DataLayout; - class FastMathFlags; - class OptimizationRemarkEmitter; - class TargetLibraryInfo; - class Type; - class Value; - - /// Given operands for an Add, fold the result or return null. - Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a Sub, fold the result or return null. - Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an FAdd, fold the result or return null. - Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an FSub, fold the result or return null. - Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an FMul, fold the result or return null. - Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a Mul, fold the result or return null. - Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an SDiv, fold the result or return null. - Value *SimplifySDivInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a UDiv, fold the result or return null. - Value *SimplifyUDivInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an FDiv, fold the result or return null. - Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an SRem, fold the result or return null. - Value *SimplifySRemInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a URem, fold the result or return null. - Value *SimplifyURemInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an FRem, fold the result or return null. - Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a Shl, fold the result or return null. - Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a LShr, fold the result or return null. - Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a AShr, fold the result or return nulll. - Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an And, fold the result or return null. - Value *SimplifyAndInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an Or, fold the result or return null. - Value *SimplifyOrInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an Xor, fold the result or return null. - Value *SimplifyXorInst(Value *LHS, Value *RHS, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an ICmpInst, fold the result or return null. - Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an FCmpInst, fold the result or return null. - Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a SelectInst, fold the result or return null. - Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a GetElementPtrInst, fold the result or return null. - Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an InsertValueInst, fold the result or return null. - Value *SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef Idxs, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an ExtractValueInst, fold the result or return null. - Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an ExtractElementInst, fold the result or return null. - Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a CastInst, fold the result or return null. - Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a ShuffleVectorInst, fold the result or return null. - Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const DataLayout &DL, +class Function; +template class AnalysisManager; +template class ArrayRef; +class AssumptionCache; +class DominatorTree; +class Instruction; +class ImmutableCallSite; +class DataLayout; +class FastMathFlags; +struct LoopStandardAnalysisResults; +class OptimizationRemarkEmitter; +class Pass; +class TargetLibraryInfo; +class Type; +class Value; + +struct SimplifyQuery { + const DataLayout &DL; + const TargetLibraryInfo *TLI = nullptr; + const DominatorTree *DT = nullptr; + AssumptionCache *AC = nullptr; + const Instruction *CxtI = nullptr; + + SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr) + : DL(DL), CxtI(CXTI) {} + + SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr, + const Instruction *CXTI = nullptr) + : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI) {} + SimplifyQuery getWithInstruction(Instruction *I) const { + SimplifyQuery Copy(*this); + Copy.CxtI = I; + return Copy; + } +}; + +// NOTE: the explicit multiple argument versions of these functions are +// deprecated. +// Please use the SimplifyQuery versions in new code. + +/// Given operands for an Add, fold the result or return null. +Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); + +/// Given operands for a Sub, fold the result or return null. +Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); + +/// Given operands for an FAdd, fold the result or return null. +Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an FSub, fold the result or return null. +Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an FMul, fold the result or return null. +Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for a Mul, fold the result or return null. +Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an SDiv, fold the result or return null. +Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for a UDiv, fold the result or return null. +Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an FDiv, fold the result or return null. +Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an SRem, fold the result or return null. +Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for a URem, fold the result or return null. +Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an FRem, fold the result or return null. +Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for a Shl, fold the result or return null. +Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q); + +/// Given operands for a LShr, fold the result or return null. +Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); + +/// Given operands for a AShr, fold the result or return nulll. +Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); + +/// Given operands for an And, fold the result or return null. +Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an Or, fold the result or return null. +Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an Xor, fold the result or return null. +Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an ICmpInst, fold the result or return null. +Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for an FCmpInst, fold the result or return null. +Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); + +/// Given operands for a SelectInst, fold the result or return null. +Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const SimplifyQuery &Q); + +/// Given operands for a GetElementPtrInst, fold the result or return null. +Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, + const SimplifyQuery &Q); + +/// Given operands for an InsertValueInst, fold the result or return null. +Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, + const SimplifyQuery &Q); + +/// Given operands for an ExtractValueInst, fold the result or return null. +Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, + const SimplifyQuery &Q); + +/// Given operands for an ExtractElementInst, fold the result or return null. +Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q); + +/// Given operands for a CastInst, fold the result or return null. +Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q); + +/// Given operands for a ShuffleVectorInst, fold the result or return null. +Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q); + +//=== Helper functions for higher up the class hierarchy. + +/// Given operands for a CmpInst, fold the result or return null. +Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for a BinaryOperator, fold the result or return null. +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for an FP BinaryOperator, fold the result or return null. +/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. +Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); + +/// Given a function and iterators over arguments, fold the result or return +/// null. +Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const SimplifyQuery &Q); + +/// Given a function and set of arguments, fold the result or return null. +Value *SimplifyCall(ImmutableCallSite CS, Value *V, ArrayRef Args, + const SimplifyQuery &Q); + +/// See if we can compute a simplified version of this instruction. If not, +/// return null. +Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE = nullptr); + +/// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. +/// +/// This first performs a normal RAUW of I with SimpleV. It then recursively +/// attempts to simplify those users updated by the operation. The 'I' +/// instruction must not be equal to the simplified value 'SimpleV'. +/// +/// The function returns true if any simplifications were performed. +bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI = nullptr, const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - //=== Helper functions for higher up the class hierarchy. - - - /// Given operands for a CmpInst, fold the result or return null. - Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for a BinaryOperator, fold the result or return null. - Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given operands for an FP BinaryOperator, fold the result or return null. - /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the - /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. - Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given a function and iterators over arguments, fold the result or return - /// null. - Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// Given a function and set of arguments, fold the result or return null. - Value *SimplifyCall(Value *V, ArrayRef Args, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr); - - /// See if we can compute a simplified version of this instruction. If not, - /// return null. - Value *SimplifyInstruction(Instruction *I, const DataLayout &DL, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr, - OptimizationRemarkEmitter *ORE = nullptr); - - /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. - /// - /// This first performs a normal RAUW of I with SimpleV. It then recursively - /// attempts to simplify those users updated by the operation. The 'I' - /// instruction must not be equal to the simplified value 'SimpleV'. - /// - /// The function returns true if any simplifications were performed. - bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); - - /// Recursively attempt to simplify an instruction. - /// - /// This routine uses SimplifyInstruction to simplify 'I', and if successful - /// replaces uses of 'I' with the simplified value. It then recurses on each - /// of the users impacted. It returns true if any simplifications were - /// performed. - bool recursivelySimplifyInstruction(Instruction *I, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); + AssumptionCache *AC = nullptr); + +/// Recursively attempt to simplify an instruction. +/// +/// This routine uses SimplifyInstruction to simplify 'I', and if successful +/// replaces uses of 'I' with the simplified value. It then recurses on each +/// of the users impacted. It returns true if any simplifications were +/// performed. +bool recursivelySimplifyInstruction(Instruction *I, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); + +// These helper functions return a SimplifyQuery structure that contains as +// many of the optional analysis we use as are currently valid. This is the +// strongly preferred way of constructing SimplifyQuery in passes. +const SimplifyQuery getBestSimplifyQuery(Pass &, Function &); +template +const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, + Function &); +const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &, + const DataLayout &); } // end namespace llvm #endif diff --git a/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/include/llvm/Analysis/LazyBranchProbabilityInfo.h index 067d7ebfd1f53808be3ddd9561a2b7ac68e04e7f..e1d404b1ada23444338f9ce8ebf4b88599a98e0b 100644 --- a/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -24,6 +24,7 @@ namespace llvm { class AnalysisUsage; class Function; class LoopInfo; +class TargetLibraryInfo; /// \brief This is an alternative analysis pass to /// BranchProbabilityInfoWrapperPass. The difference is that with this pass the @@ -55,14 +56,15 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { /// analysis without paying for the overhead if BPI doesn't end up being used. class LazyBranchProbabilityInfo { public: - LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI) - : Calculated(false), F(F), LI(LI) {} + LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI, + const TargetLibraryInfo *TLI) + : Calculated(false), F(F), LI(LI), TLI(TLI) {} /// Retrieve the BPI with the branch probabilities computed. BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI); + BPI.calculate(*F, *LI, TLI); Calculated = true; } return BPI; @@ -77,6 +79,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { bool Calculated; const Function *F; const LoopInfo *LI; + const TargetLibraryInfo *TLI; }; std::unique_ptr LBPI; diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h index 49e088e533dc175db576d0ea2cab1139b5d43085..7b178fc7bcc21db47978c8f63ccc58374d192037 100644 --- a/include/llvm/Analysis/LazyValueInfo.h +++ b/include/llvm/Analysis/LazyValueInfo.h @@ -100,8 +100,11 @@ public: /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); - /// Print the \LazyValueInfoCache. - void printCache(Function &F, raw_ostream &OS); + /// Print the \LazyValueInfo Analysis. + /// We pass in the DTree that is required for identifying which basic blocks + /// we can solve/print for, in the LVIPrinter. The DT is optional + /// in LVI, so we need to pass it here as an argument. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS); // For old PM pass. Delete once LazyValueInfoWrapperPass is gone. void releaseMemory(); diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 996794b660a9e8771fc9afd862b5186fad7433b4..096df1e421a77edb868a35e18684ade6a7f532db 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -158,11 +158,8 @@ public: /// True if terminator in the block can branch to another block that is /// outside of the current loop. bool isLoopExiting(const BlockT *BB) const { - typedef GraphTraits BlockTraits; - for (typename BlockTraits::ChildIteratorType SI = - BlockTraits::child_begin(BB), - SE = BlockTraits::child_end(BB); SI != SE; ++SI) { - if (!contains(*SI)) + for (const auto &Succ : children(BB)) { + if (!contains(Succ)) return true; } return false; @@ -186,11 +183,8 @@ public: unsigned NumBackEdges = 0; BlockT *H = getHeader(); - typedef GraphTraits > InvBlockTraits; - for (typename InvBlockTraits::ChildIteratorType I = - InvBlockTraits::child_begin(H), - E = InvBlockTraits::child_end(H); I != E; ++I) - if (contains(*I)) + for (const auto Pred : children >(H)) + if (contains(Pred)) ++NumBackEdges; return NumBackEdges; @@ -249,12 +243,9 @@ public: /// contains a branch back to the header. void getLoopLatches(SmallVectorImpl &LoopLatches) const { BlockT *H = getHeader(); - typedef GraphTraits > InvBlockTraits; - for (typename InvBlockTraits::ChildIteratorType I = - InvBlockTraits::child_begin(H), - E = InvBlockTraits::child_end(H); I != E; ++I) - if (contains(*I)) - LoopLatches.push_back(*I); + for (const auto Pred : children>(H)) + if (contains(Pred)) + LoopLatches.push_back(Pred); } //===--------------------------------------------------------------------===// diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h index 761f8721b54fda6ba14b0d4fff1849545c4a0824..6ff4335f1ad5d252a39f18ca830bbd36e0c6d620 100644 --- a/include/llvm/Analysis/LoopInfoImpl.h +++ b/include/llvm/Analysis/LoopInfoImpl.h @@ -17,8 +17,8 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/Dominators.h" @@ -34,14 +34,11 @@ namespace llvm { template void LoopBase:: getExitingBlocks(SmallVectorImpl &ExitingBlocks) const { - typedef GraphTraits BlockTraits; - for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) - for (typename BlockTraits::ChildIteratorType I = - BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) - if (!contains(*I)) { + for (const auto BB : blocks()) + for (const auto &Succ : children(BB)) + if (!contains(Succ)) { // Not in current loop? It must be an exit block. - ExitingBlocks.push_back(*BI); + ExitingBlocks.push_back(BB); break; } } @@ -63,14 +60,11 @@ BlockT *LoopBase::getExitingBlock() const { template void LoopBase:: getExitBlocks(SmallVectorImpl &ExitBlocks) const { - typedef GraphTraits BlockTraits; - for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) - for (typename BlockTraits::ChildIteratorType I = - BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) - if (!contains(*I)) + for (const auto BB : blocks()) + for (const auto &Succ : children(BB)) + if (!contains(Succ)) // Not in current loop? It must be an exit block. - ExitBlocks.push_back(*I); + ExitBlocks.push_back(Succ); } /// getExitBlock - If getExitBlocks would return exactly one block, @@ -88,14 +82,11 @@ BlockT *LoopBase::getExitBlock() const { template void LoopBase:: getExitEdges(SmallVectorImpl &ExitEdges) const { - typedef GraphTraits BlockTraits; - for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) - for (typename BlockTraits::ChildIteratorType I = - BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) - if (!contains(*I)) + for (const auto BB : blocks()) + for (const auto &Succ : children(BB)) + if (!contains(Succ)) // Not in current loop? It must be an exit block. - ExitEdges.push_back(Edge(*BI, *I)); + ExitEdges.emplace_back(BB, Succ); } /// getLoopPreheader - If there is a preheader for this loop, return it. A @@ -134,15 +125,11 @@ BlockT *LoopBase::getLoopPredecessor() const { // Loop over the predecessors of the header node... BlockT *Header = getHeader(); - typedef GraphTraits > InvBlockTraits; - for (typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(Header), - PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) { - typename InvBlockTraits::NodeRef N = *PI; - if (!contains(N)) { // If the block is not in the loop... - if (Out && Out != N) + for (const auto Pred : children>(Header)) { + if (!contains(Pred)) { // If the block is not in the loop... + if (Out && Out != Pred) return nullptr; // Multiple predecessors outside the loop - Out = N; + Out = Pred; } } @@ -156,17 +143,11 @@ BlockT *LoopBase::getLoopPredecessor() const { template BlockT *LoopBase::getLoopLatch() const { BlockT *Header = getHeader(); - typedef GraphTraits > InvBlockTraits; - typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(Header); - typename InvBlockTraits::ChildIteratorType PE = - InvBlockTraits::child_end(Header); BlockT *Latch = nullptr; - for (; PI != PE; ++PI) { - typename InvBlockTraits::NodeRef N = *PI; - if (contains(N)) { + for (const auto Pred : children>(Header)) { + if (contains(Pred)) { if (Latch) return nullptr; - Latch = N; + Latch = Pred; } } @@ -239,8 +220,8 @@ void LoopBase::verifyLoop() const { BI = df_ext_begin(getHeader(), VisitSet), BE = df_ext_end(getHeader(), VisitSet); - // Keep track of the number of BBs visited. - unsigned NumVisited = 0; + // Keep track of the BBs visited. + SmallPtrSet VisitedBBs; // Check the individual blocks. for ( ; BI != BE; ++BI) { @@ -278,10 +259,18 @@ void LoopBase::verifyLoop() const { assert(BB != &getHeader()->getParent()->front() && "Loop contains function entry block!"); - NumVisited++; + VisitedBBs.insert(BB); } - assert(NumVisited == getNumBlocks() && "Unreachable block in loop"); + if (VisitedBBs.size() != getNumBlocks()) { + dbgs() << "The following blocks are unreachable in the loop: "; + for (auto BB : Blocks) { + if (!VisitedBBs.count(BB)) { + dbgs() << *BB << "\n"; + } + } + assert(false && "Unreachable block in loop"); + } // Check the subloops. for (iterator I = begin(), E = end(); I != E; ++I) @@ -394,11 +383,9 @@ static void discoverAndMapSubloop(LoopT *L, ArrayRef Backedges, // within this subloop tree itself. Note that a predecessor may directly // reach another subloop that is not yet discovered to be a subloop of // this loop, which we must traverse. - for (typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(PredBB), - PE = InvBlockTraits::child_end(PredBB); PI != PE; ++PI) { - if (LI->getLoopFor(*PI) != Subloop) - ReverseCFGWorklist.push_back(*PI); + for (const auto Pred : children>(PredBB)) { + if (LI->getLoopFor(Pred) != Subloop) + ReverseCFGWorklist.push_back(Pred); } } } @@ -482,13 +469,7 @@ analyze(const DominatorTreeBase &DomTree) { SmallVector Backedges; // Check each predecessor of the potential loop header. - typedef GraphTraits > InvBlockTraits; - for (typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(Header), - PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) { - - BlockT *Backedge = *PI; - + for (const auto Backedge : children>(Header)) { // If Header dominates predBB, this is a new loop. Collect the backedges. if (DomTree.dominates(Header, Backedge) && DomTree.isReachableFromEntry(Backedge)) { diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h index 496ae189e57ba7b959b168393b203512c3d4d42c..75e7688bbdc26cc8ecd99ed608835dd12009d059 100644 --- a/include/llvm/Analysis/LoopPass.h +++ b/include/llvm/Analysis/LoopPass.h @@ -126,9 +126,8 @@ public: } public: - // Add a new loop into the loop queue as a child of the given parent, or at - // the top level if \c ParentLoop is null. - Loop &addLoop(Loop *ParentLoop); + // Add a new loop into the loop queue. + void addLoop(Loop &L); //===--------------------------------------------------------------------===// /// SimpleAnalysis - Provides simple interface to update analysis info diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index c5514316f75f043159e1f3a3bbae409706a12c90..60dafccd84bdf5211a32925b4f3a221e04111b8a 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -53,6 +53,11 @@ bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false); +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory similar to malloc or calloc. +bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast = false); + /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, @@ -230,7 +235,7 @@ class ObjectSizeOffsetEvaluator : public InstVisitor { typedef IRBuilder BuilderTy; - typedef std::pair WeakEvalType; + typedef std::pair WeakEvalType; typedef DenseMap CacheMapTy; typedef SmallPtrSet PtrSetTy; diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index a401887016c94b21f7b5becc3206249e369f915b..1dbbf6cc6addf540e2e1491cc49ac13d099cf8c7 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -15,8 +15,8 @@ #define LLVM_ANALYSIS_MEMORYDEPENDENCEANALYSIS_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/PointerEmbeddedInt.h" +#include "llvm/ADT/PointerSumType.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/BasicBlock.h" diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h index db31ae9f4f109196e70da6cf6556c0aa1bb8ac96..462e4594266e004d3ccd0b5b6d4b1b36b99f698e 100644 --- a/include/llvm/Analysis/MemorySSA.h +++ b/include/llvm/Analysis/MemorySSA.h @@ -84,6 +84,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedUser.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" @@ -127,7 +128,7 @@ using const_memoryaccess_def_iterator = // \brief The base for all memory accesses. All memory accesses in a block are // linked together using an intrusive list. class MemoryAccess - : public User, + : public DerivedUser, public ilist_node>, public ilist_node> { public: @@ -145,15 +146,13 @@ public: MemoryAccess(const MemoryAccess &) = delete; MemoryAccess &operator=(const MemoryAccess &) = delete; - ~MemoryAccess() override; - void *operator new(size_t, unsigned) = delete; void *operator new(size_t) = delete; BasicBlock *getBlock() const { return Block; } - virtual void print(raw_ostream &OS) const = 0; - virtual void dump() const; + void print(raw_ostream &OS) const; + void dump() const; /// \brief The user iterators for a memory access typedef user_iterator iterator; @@ -207,11 +206,12 @@ protected: /// \brief Used for debugging and tracking things about MemoryAccesses. /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. - virtual unsigned getID() const = 0; + inline unsigned getID() const; - MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB, - unsigned NumOperands) - : User(Type::getVoidTy(C), Vty, nullptr, NumOperands), Block(BB) {} + MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue, + BasicBlock *BB, unsigned NumOperands) + : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue), + Block(BB) {} private: BasicBlock *Block; @@ -231,7 +231,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) { /// MemoryDef instead. class MemoryUseOrDef : public MemoryAccess { public: - void *operator new(size_t, unsigned) = delete; void *operator new(size_t) = delete; DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); @@ -248,21 +247,21 @@ public: // Sadly, these have to be public because they are needed in some of the // iterators. - virtual bool isOptimized() const = 0; - virtual MemoryAccess *getOptimized() const = 0; - virtual void setOptimized(MemoryAccess *) = 0; + inline bool isOptimized() const; + inline MemoryAccess *getOptimized() const; + inline void setOptimized(MemoryAccess *); /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to /// be rewalked by the walker if necessary. /// This really should only be called by tests. - virtual void resetOptimized() = 0; + inline void resetOptimized(); protected: friend class MemorySSA; friend class MemorySSAUpdater; MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty, - Instruction *MI, BasicBlock *BB) - : MemoryAccess(C, Vty, BB, 1), MemoryInst(MI) { + DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB) + : MemoryAccess(C, Vty, DeleteValue, BB, 1), MemoryInst(MI) { setDefiningAccess(DMA); } void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) { @@ -292,42 +291,40 @@ public: DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) - : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {} + : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB), + OptimizedID(0) {} // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; static inline bool classof(const Value *MA) { return MA->getValueID() == MemoryUseVal; } - void print(raw_ostream &OS) const override; + void print(raw_ostream &OS) const; - virtual void setOptimized(MemoryAccess *DMA) override { + void setOptimized(MemoryAccess *DMA) { OptimizedID = DMA->getID(); setOperand(0, DMA); } - virtual bool isOptimized() const override { + bool isOptimized() const { return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); } - virtual MemoryAccess *getOptimized() const override { + MemoryAccess *getOptimized() const { return getDefiningAccess(); } - virtual void resetOptimized() override { + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } protected: friend class MemorySSA; - unsigned getID() const override { - llvm_unreachable("MemoryUses do not have IDs"); - } - private: + static void deleteMe(DerivedUser *Self); + unsigned int OptimizedID; }; @@ -350,38 +347,38 @@ public: MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, unsigned Ver) - : MemoryUseOrDef(C, DMA, MemoryDefVal, MI, BB), ID(Ver), - Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {} + : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB), + ID(Ver), Optimized(nullptr), OptimizedID(INVALID_MEMORYACCESS_ID) {} // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; static inline bool classof(const Value *MA) { return MA->getValueID() == MemoryDefVal; } - virtual void setOptimized(MemoryAccess *MA) override { + void setOptimized(MemoryAccess *MA) { Optimized = MA; OptimizedID = getDefiningAccess()->getID(); } - virtual MemoryAccess *getOptimized() const override { return Optimized; } - virtual bool isOptimized() const override { + MemoryAccess *getOptimized() const { return Optimized; } + bool isOptimized() const { return getOptimized() && getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); } - virtual void resetOptimized() override { + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } - void print(raw_ostream &OS) const override; + void print(raw_ostream &OS) const; -protected: friend class MemorySSA; - unsigned getID() const override { return ID; } + unsigned getID() const { return ID; } private: + static void deleteMe(DerivedUser *Self); + const unsigned ID; MemoryAccess *Optimized; unsigned int OptimizedID; @@ -432,12 +429,11 @@ public: DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0) - : MemoryAccess(C, MemoryPhiVal, BB, 0), ID(Ver), ReservedSpace(NumPreds) { + : MemoryAccess(C, MemoryPhiVal, deleteMe, BB, 0), ID(Ver), + ReservedSpace(NumPreds) { allocHungoffUses(ReservedSpace); } - void *operator new(size_t, unsigned) = delete; - // Block iterator interface. This provides access to the list of incoming // basic blocks, which parallels the list of incoming values. typedef BasicBlock **block_iterator; @@ -534,7 +530,9 @@ public: return V->getValueID() == MemoryPhiVal; } - void print(raw_ostream &OS) const override; + void print(raw_ostream &OS) const; + + unsigned getID() const { return ID; } protected: friend class MemorySSA; @@ -546,8 +544,6 @@ protected: User::allocHungoffUses(N, /* IsPhi */ true); } - unsigned getID() const final { return ID; } - private: // For debugging only const unsigned ID; @@ -561,8 +557,45 @@ private: ReservedSpace = std::max(E + E / 2, 2u); growHungoffUses(ReservedSpace, /* IsPhi */ true); } + + static void deleteMe(DerivedUser *Self); }; +inline unsigned MemoryAccess::getID() const { + assert((isa(this) || isa(this)) && + "only memory defs and phis have ids"); + if (const auto *MD = dyn_cast(this)) + return MD->getID(); + return cast(this)->getID(); +} + +inline bool MemoryUseOrDef::isOptimized() const { + if (const auto *MD = dyn_cast(this)) + return MD->isOptimized(); + return cast(this)->isOptimized(); +} + +inline MemoryAccess *MemoryUseOrDef::getOptimized() const { + if (const auto *MD = dyn_cast(this)) + return MD->getOptimized(); + return cast(this)->getOptimized(); +} + +inline void MemoryUseOrDef::setOptimized(MemoryAccess *MA) { + if (auto *MD = dyn_cast(this)) + MD->setOptimized(MA); + else + cast(this)->setOptimized(MA); +} + +inline void MemoryUseOrDef::resetOptimized() { + if (auto *MD = dyn_cast(this)) + MD->resetOptimized(); + else + cast(this)->resetOptimized(); +} + + template <> struct OperandTraits : public HungoffOperandTraits<2> {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess) diff --git a/include/llvm/Analysis/MemorySSAUpdater.h b/include/llvm/Analysis/MemorySSAUpdater.h index d30eeeaa95b6a54a91efa45d0a7322271b75a013..b36b2f01dac62cf3cc9e2dabc7836740a6baa9b3 100644 --- a/include/llvm/Analysis/MemorySSAUpdater.h +++ b/include/llvm/Analysis/MemorySSAUpdater.h @@ -34,6 +34,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" @@ -45,7 +46,6 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Analysis/MemorySSA.h" namespace llvm { diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h index 5f4d8ecbbfbbe17b0c0dfd383af0e5149f45ba23..e80412a30564125ab54f24a6ea791eef74d418c4 100644 --- a/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -23,8 +23,8 @@ #ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H #define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/Passes.h" diff --git a/include/llvm/Analysis/ObjCARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h index 3b37ddf78f587b0574cd54fc8fb96694cc24fee2..02ff035782388f8954e610ae1da2c68a6bf3270d 100644 --- a/include/llvm/Analysis/ObjCARCInstKind.h +++ b/include/llvm/Analysis/ObjCARCInstKind.h @@ -10,8 +10,8 @@ #ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H #define LLVM_ANALYSIS_OBJCARCINSTKIND_H -#include "llvm/IR/Instructions.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" namespace llvm { namespace objcarc { diff --git a/include/llvm/Analysis/OrderedBasicBlock.h b/include/llvm/Analysis/OrderedBasicBlock.h index 5aa813eb483246f9ac4faff9de5e412ca510e3d9..2e716af1f60ddbbf77f92dbeebb80a1ab337c6d8 100644 --- a/include/llvm/Analysis/OrderedBasicBlock.h +++ b/include/llvm/Analysis/OrderedBasicBlock.h @@ -58,6 +58,7 @@ public: /// comes before \p B in \p BB. This is a simplification that considers /// cached instruction positions and ignores other basic blocks, being /// only relevant to compare relative instructions positions inside \p BB. + /// Returns false for A == B. bool dominates(const Instruction *A, const Instruction *B); }; diff --git a/include/llvm/Analysis/ProfileSummaryInfo.h b/include/llvm/Analysis/ProfileSummaryInfo.h index 1aec35c3e677e6a975504eb38145810fcf8c49e9..6aaabe1d1889084bb291a513cb9b4ce333e249b7 100644 --- a/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/include/llvm/Analysis/ProfileSummaryInfo.h @@ -54,9 +54,36 @@ public: ProfileSummaryInfo(Module &M) : M(M) {} ProfileSummaryInfo(ProfileSummaryInfo &&Arg) : M(Arg.M), Summary(std::move(Arg.Summary)) {} + + /// \brief Returns true if profile summary is available. + bool hasProfileSummary() { return computeSummary(); } + + /// \brief Returns true if module \c M has sample profile. + bool hasSampleProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Sample; + } + + /// \brief Returns true if module \c M has instrumentation profile. + bool hasInstrumentationProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_Instr; + } + + /// Handle the invalidation of this information. + /// + /// When used as a result of \c ProfileSummaryAnalysis this method will be + /// called when the module this was computed for changes. Since profile + /// summary is immutable after it is annotated on the module, we return false + /// here. + bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &) { + return false; + } + /// Returns the profile count for \p CallInst. - static Optional getProfileCount(const Instruction *CallInst, - BlockFrequencyInfo *BFI); + Optional getProfileCount(const Instruction *CallInst, + BlockFrequencyInfo *BFI); /// \brief Returns true if \p F has hot function entry. bool isFunctionEntryHot(const Function *F); /// Returns true if \p F has hot function entry or hot call edge. diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index caeb21db613e7a8c5a0b77af6b110e5abc2d5611..16ee07fa317712d8ec3cf2f2d61e387698b36283 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -708,10 +708,24 @@ class RegionInfoBase { /// The top level region. RegionT *TopLevelRegion; -private: /// Map every BB to the smallest region, that contains BB. BBtoRegionMap BBtoRegion; +protected: + /// \brief Update refences to a RegionInfoT held by the RegionT managed here + /// + /// This is a post-move helper. Regions hold references to the owning + /// RegionInfo object. After a move these need to be fixed. + template + void updateRegionTree(RegionInfoT &RI, TheRegionT *R) { + if (!R) + return; + R->RI = &RI; + for (auto &SubR : *R) + updateRegionTree(RI, SubR.get()); + } + +private: /// \brief Wipe this region tree's state without releasing any resources. /// /// This is essentially a post-move helper only. It leaves the object in an @@ -879,10 +893,12 @@ public: ~RegionInfo() override; - RegionInfo(RegionInfo &&Arg) - : Base(std::move(static_cast(Arg))) {} + RegionInfo(RegionInfo &&Arg) : Base(std::move(static_cast(Arg))) { + updateRegionTree(*this, TopLevelRegion); + } RegionInfo &operator=(RegionInfo &&RHS) { Base::operator=(std::move(static_cast(RHS))); + updateRegionTree(*this, TopLevelRegion); return *this; } diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h index b5f38139abf20556fe9eef9075e8f7642a94c794..515b362e540710c0f8a607b8aafc54b99b0a6b69 100644 --- a/include/llvm/Analysis/RegionPass.h +++ b/include/llvm/Analysis/RegionPass.h @@ -78,6 +78,11 @@ public: return PMT_RegionPassManager; } //@} + +protected: + /// Optional passes call this function to check whether the pass should be + /// skipped. This is the case when optimization bisect is over the limit. + bool skipRegion(Region &R) const; }; /// @brief The pass manager to schedule RegionPasses. diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 9a50de540f2b1b4d6e205844a714e19b231c1d4d..2a4b768256d1b3ab1ecfecbf0773b3cb571ad7fb 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -568,27 +568,16 @@ private: Predicates.insert(P); } - /*implicit*/ ExitLimit(const SCEV *E) - : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {} + /*implicit*/ ExitLimit(const SCEV *E); ExitLimit( const SCEV *E, const SCEV *M, bool MaxOrZero, - ArrayRef *> PredSetList) - : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { - assert((isa(ExactNotTaken) || - !isa(MaxNotTaken)) && - "Exact is not allowed to be less precise than Max"); - for (auto *PredSet : PredSetList) - for (auto *P : *PredSet) - addPredicate(P); - } + ArrayRef *> PredSetList); ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero, - const SmallPtrSetImpl &PredSet) - : ExitLimit(E, M, MaxOrZero, {&PredSet}) {} + const SmallPtrSetImpl &PredSet); - ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero) - : ExitLimit(E, M, MaxOrZero, None) {} + ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero); /// Test whether this ExitLimit contains any computed information, or /// whether it's all SCEVCouldNotCompute values. @@ -647,7 +636,7 @@ private: /// @} public: - BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {} + BackedgeTakenInfo() : MaxAndComplete(nullptr, 0), MaxOrZero(false) {} BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; @@ -667,10 +656,12 @@ private: /// Test whether this BackedgeTakenInfo contains complete information. bool hasFullInfo() const { return isComplete(); } - /// Return an expression indicating the exact backedge-taken count of the - /// loop if it is known or SCEVCouldNotCompute otherwise. This is the - /// number of times the loop header can be guaranteed to execute, minus - /// one. + /// Return an expression indicating the exact *backedge-taken* + /// count of the loop if it is known or SCEVCouldNotCompute + /// otherwise. If execution makes it to the backedge on every + /// iteration (i.e. there are no abnormal exists like exception + /// throws and thread exits) then this is the number of times the + /// loop header will execute minus one. /// /// If the SCEV predicate associated with the answer can be different /// from AlwaysTrue, we must add a (non null) Predicates argument. @@ -782,13 +773,13 @@ private: /// Set the memoized range for the given SCEV. const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint, - const ConstantRange &CR) { + ConstantRange CR) { DenseMap &Cache = Hint == HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges; - auto Pair = Cache.insert({S, CR}); + auto Pair = Cache.try_emplace(S, std::move(CR)); if (!Pair.second) - Pair.first->second = CR; + Pair.first->second = std::move(CR); return Pair.first->second; } @@ -816,6 +807,10 @@ private: /// Helper function called from createNodeForPHI. const SCEV *createAddRecFromPHI(PHINode *PN); + /// A helper function for createAddRecFromPHI to handle simple cases. + const SCEV *createSimpleAffineAddRec(PHINode *PN, Value *BEValueV, + Value *StartValueV); + /// Helper function called from createNodeForPHI. const SCEV *createNodeFromSelectLikePHI(PHINode *PN); @@ -877,6 +872,47 @@ private: bool ControlsExit, bool AllowPredicates = false); + // Helper functions for computeExitLimitFromCond to avoid exponential time + // complexity. + + class ExitLimitCache { + // It may look like we need key on the whole (L, TBB, FBB, ControlsExit, + // AllowPredicates) tuple, but recursive calls to + // computeExitLimitFromCondCached from computeExitLimitFromCondImpl only + // vary the in \c ExitCond and \c ControlsExit parameters. We remember the + // initial values of the other values to assert our assumption. + SmallDenseMap, ExitLimit> TripCountMap; + + const Loop *L; + BasicBlock *TBB; + BasicBlock *FBB; + bool AllowPredicates; + + public: + ExitLimitCache(const Loop *L, BasicBlock *TBB, BasicBlock *FBB, + bool AllowPredicates) + : L(L), TBB(TBB), FBB(FBB), AllowPredicates(AllowPredicates) {} + + Optional find(const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, + bool AllowPredicates); + + void insert(const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates, + const ExitLimit &EL); + }; + + typedef ExitLimitCache ExitLimitCacheTy; + ExitLimit computeExitLimitFromCondCached(ExitLimitCacheTy &Cache, + const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, + bool AllowPredicates); + ExitLimit computeExitLimitFromCondImpl(ExitLimitCacheTy &Cache, const Loop *L, + Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, + bool AllowPredicates); + /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try @@ -1159,33 +1195,50 @@ public: const SCEV *getConstant(const APInt &Val); const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty); + + typedef SmallDenseMap, const SCEV *, 8> + ExtendCacheTy; const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty); + const SCEV *getZeroExtendExprCached(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache); + const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache); + const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty); + const SCEV *getSignExtendExprCached(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache); + const SCEV *getSignExtendExprImpl(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, unsigned Depth = 0); const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {LHS, RHS}; - return getAddExpr(Ops, Flags); + return getAddExpr(Ops, Flags, Depth); } const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {Op0, Op1, Op2}; - return getAddExpr(Ops, Flags); + return getAddExpr(Ops, Flags, Depth); } const SCEV *getMulExpr(SmallVectorImpl &Ops, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {LHS, RHS}; - return getMulExpr(Ops, Flags); + return getMulExpr(Ops, Flags, Depth); } const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {Op0, Op1, Op2}; - return getMulExpr(Ops, Flags); + return getMulExpr(Ops, Flags, Depth); } const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS); @@ -1239,7 +1292,8 @@ public: /// Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); /// Return a SCEV corresponding to a conversion of the input value to the /// specified type. If the type must be extended, it is zero extended. @@ -1352,11 +1406,11 @@ public: const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock); /// If the specified loop has a predictable backedge-taken count, return it, - /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count - /// is the number of times the loop header will be branched to from within - /// the loop. This is one less than the trip count of the loop, since it - /// doesn't count the first iteration, when the header is branched to from - /// outside the loop. + /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count is + /// the number of times the loop header will be branched to from within the + /// loop, assuming there are no abnormal exists like exception throws. This is + /// one less than the trip count of the loop, since it doesn't count the first + /// iteration, when the header is branched to from outside the loop. /// /// Note that it is not valid to call this method on a loop without a /// loop-invariant backedge-taken count (see @@ -1371,8 +1425,10 @@ public: const SCEV *getPredicatedBackedgeTakenCount(const Loop *L, SCEVUnionPredicate &Predicates); - /// Similar to getBackedgeTakenCount, except return the least SCEV value - /// that is known never to be less than the actual backedge taken count. + /// When successful, this returns a SCEVConstant that is greater than or equal + /// to (i.e. a "conservative over-approximation") of the value returend by + /// getBackedgeTakenCount. If such a value cannot be computed, it returns the + /// SCEVCouldNotCompute object. const SCEV *getMaxBackedgeTakenCount(const Loop *L); /// Return true if the backedge taken count is either the value returned by @@ -1483,6 +1539,11 @@ public: /// specified loop. bool isLoopInvariant(const SCEV *S, const Loop *L); + /// Determine if the SCEV can be evaluated at loop's entry. It is true if it + /// doesn't depend on a SCEVUnknown of an instruction which is dominated by + /// the header of loop L. + bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L); + /// Return true if the given SCEV changes value in a known way in the /// specified loop. This property being true implies that the value is /// variant in the loop AND that we can emit an expression to compute the @@ -1512,7 +1573,7 @@ public: /// delinearization). void findArrayDimensions(SmallVectorImpl &Terms, SmallVectorImpl &Sizes, - const SCEV *ElementSize) const; + const SCEV *ElementSize); void print(raw_ostream &OS) const; void verify() const; @@ -1638,10 +1699,14 @@ private: bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned, bool NoWrap); - /// Get add expr already created or create a new one + /// Get add expr already created or create a new one. const SCEV *getOrCreateAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags); + /// Get mul expr already created or create a new one. + const SCEV *getOrCreateMulExpr(SmallVectorImpl &Ops, + SCEV::NoWrapFlags Flags); + private: FoldingSet UniqueSCEVs; FoldingSet UniquePreds; diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 517592a3d049360571f69f34cbcb830d32e433e0..7d16f34e54cb125035159a7c438bdd18496b2ede 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -189,7 +189,7 @@ namespace llvm { /// replace congruent phis with their most canonical representative. Return /// the number of phis eliminated. unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl &DeadInsts, + SmallVectorImpl &DeadInsts, const TargetTransformInfo *TTI = nullptr); /// Insert code to directly compute the specified SCEV expression into the diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index fdcd8be00dde88aab4dc5faebb658d8424b27efd..2c693bceb24db307cb2196ddb597f823fe348ee8 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -595,58 +595,82 @@ namespace llvm { const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); - return SE.getTruncateExpr(Operand, Expr->getType()); + return Operand == Expr->getOperand() + ? Expr + : SE.getTruncateExpr(Operand, Expr->getType()); } const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); - return SE.getZeroExtendExpr(Operand, Expr->getType()); + return Operand == Expr->getOperand() + ? Expr + : SE.getZeroExtendExpr(Operand, Expr->getType()); } const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); - return SE.getSignExtendExpr(Operand, Expr->getType()); + return Operand == Expr->getOperand() + ? Expr + : SE.getSignExtendExpr(Operand, Expr->getType()); } const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getAddExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getAddExpr(Operands); } const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getMulExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getMulExpr(Operands); } const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()), - ((SC*)this)->visit(Expr->getRHS())); + auto *LHS = ((SC *)this)->visit(Expr->getLHS()); + auto *RHS = ((SC *)this)->visit(Expr->getRHS()); + bool Changed = LHS != Expr->getLHS() || RHS != Expr->getRHS(); + return !Changed ? Expr : SE.getUDivExpr(LHS, RHS); } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getAddRecExpr(Operands, Expr->getLoop(), - Expr->getNoWrapFlags()); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr + : SE.getAddRecExpr(Operands, Expr->getLoop(), + Expr->getNoWrapFlags()); } const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getSMaxExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC *)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getSMaxExpr(Operands); } const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(((SC*)this)->visit(Expr->getOperand(i))); - return SE.getUMaxExpr(Operands); + bool Changed = false; + for (auto *Op : Expr->operands()) { + Operands.push_back(((SC*)this)->visit(Op)); + Changed |= Op != Operands.back(); + } + return !Changed ? Expr : SE.getUMaxExpr(Operands); } const SCEV *visitUnknown(const SCEVUnknown *Expr) { diff --git a/include/llvm/Analysis/ScalarEvolutionNormalization.h b/include/llvm/Analysis/ScalarEvolutionNormalization.h index 7c6423a21cfae97dfec3fdb67053d8f374401256..51c92121c8f0ea7e98494266379102b8c7d31978 100644 --- a/include/llvm/Analysis/ScalarEvolutionNormalization.h +++ b/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -36,43 +36,34 @@ #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H #define LLVM_ANALYSIS_SCALAREVOLUTIONNORMALIZATION_H +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" namespace llvm { -class Instruction; -class DominatorTree; class Loop; class ScalarEvolution; class SCEV; -class Value; -/// TransformKind - Different types of transformations that -/// TransformForPostIncUse can do. -enum TransformKind { - /// Normalize - Normalize according to the given loops. - Normalize, - /// NormalizeAutodetect - Detect post-inc opportunities on new expressions, - /// update the given loop set, and normalize. - NormalizeAutodetect, - /// Denormalize - Perform the inverse transform on the expression with the - /// given loop set. - Denormalize -}; - -/// PostIncLoopSet - A set of loops. typedef SmallPtrSet PostIncLoopSet; -/// TransformForPostIncUse - Transform the given expression according to the -/// given transformation kind. -const SCEV *TransformForPostIncUse(TransformKind Kind, - const SCEV *S, - Instruction *User, - Value *OperandValToReplace, - PostIncLoopSet &Loops, - ScalarEvolution &SE, - DominatorTree &DT); +typedef function_ref NormalizePredTy; + +/// Normalize \p S to be post-increment for all loops present in \p +/// Loops. +const SCEV *normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, + ScalarEvolution &SE); + +/// Normalize \p S for all add recurrence sub-expressions for which \p +/// Pred returns true. +const SCEV *normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred, + ScalarEvolution &SE); -} +/// Denormalize \p S to be post-increment for all loops present in \p +/// Loops. +const SCEV *denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, + ScalarEvolution &SE); +} // namespace llvm #endif diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 637fc7ed30ddeb0c188ad15a50780ac543067f52..9cbe917c146d956d0c72bd79bda4cb2475d232ae 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -161,6 +161,60 @@ TLI_DEFINE_STRING_INTERNAL("_Znwm") /// void *new(unsigned long, nothrow); TLI_DEFINE_ENUM_INTERNAL(ZnwmRKSt9nothrow_t) TLI_DEFINE_STRING_INTERNAL("_ZnwmRKSt9nothrow_t") +/// double __acos_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(acos_finite) +TLI_DEFINE_STRING_INTERNAL("__acos_finite") +/// float __acosf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(acosf_finite) +TLI_DEFINE_STRING_INTERNAL("__acosf_finite") +/// double __acosh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(acosh_finite) +TLI_DEFINE_STRING_INTERNAL("__acosh_finite") +/// float __acoshf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(acoshf_finite) +TLI_DEFINE_STRING_INTERNAL("__acoshf_finite") +/// long double __acoshl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(acoshl_finite) +TLI_DEFINE_STRING_INTERNAL("__acoshl_finite") +/// long double __acosl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(acosl_finite) +TLI_DEFINE_STRING_INTERNAL("__acosl_finite") +/// double __asin_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(asin_finite) +TLI_DEFINE_STRING_INTERNAL("__asin_finite") +/// float __asinf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(asinf_finite) +TLI_DEFINE_STRING_INTERNAL("__asinf_finite") +/// long double __asinl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(asinl_finite) +TLI_DEFINE_STRING_INTERNAL("__asinl_finite") +/// double atan2_finite(double y, double x); +TLI_DEFINE_ENUM_INTERNAL(atan2_finite) +TLI_DEFINE_STRING_INTERNAL("__atan2_finite") +/// float atan2f_finite(float y, float x); +TLI_DEFINE_ENUM_INTERNAL(atan2f_finite) +TLI_DEFINE_STRING_INTERNAL("__atan2f_finite") +/// long double atan2l_finite(long double y, long double x); +TLI_DEFINE_ENUM_INTERNAL(atan2l_finite) +TLI_DEFINE_STRING_INTERNAL("__atan2l_finite") +/// double __atanh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(atanh_finite) +TLI_DEFINE_STRING_INTERNAL("__atanh_finite") +/// float __atanhf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(atanhf_finite) +TLI_DEFINE_STRING_INTERNAL("__atanhf_finite") +/// long double __atanhl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(atanhl_finite) +TLI_DEFINE_STRING_INTERNAL("__atanhl_finite") +/// double __cosh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(cosh_finite) +TLI_DEFINE_STRING_INTERNAL("__cosh_finite") +/// float __coshf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(coshf_finite) +TLI_DEFINE_STRING_INTERNAL("__coshf_finite") +/// long double __coshl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(coshl_finite) +TLI_DEFINE_STRING_INTERNAL("__coshl_finite") /// double __cospi(double x); TLI_DEFINE_ENUM_INTERNAL(cospi) TLI_DEFINE_STRING_INTERNAL("__cospi") @@ -180,12 +234,66 @@ TLI_DEFINE_STRING_INTERNAL("__cxa_guard_acquire") /// void __cxa_guard_release(guard_t *guard); TLI_DEFINE_ENUM_INTERNAL(cxa_guard_release) TLI_DEFINE_STRING_INTERNAL("__cxa_guard_release") +/// double __exp10_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(exp10_finite) +TLI_DEFINE_STRING_INTERNAL("__exp10_finite") +/// float __exp10f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(exp10f_finite) +TLI_DEFINE_STRING_INTERNAL("__exp10f_finite") +/// long double __exp10l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(exp10l_finite) +TLI_DEFINE_STRING_INTERNAL("__exp10l_finite") +/// double __exp2_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(exp2_finite) +TLI_DEFINE_STRING_INTERNAL("__exp2_finite") +/// float __exp2f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(exp2f_finite) +TLI_DEFINE_STRING_INTERNAL("__exp2f_finite") +/// long double __exp2l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(exp2l_finite) +TLI_DEFINE_STRING_INTERNAL("__exp2l_finite") +/// double __exp_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(exp_finite) +TLI_DEFINE_STRING_INTERNAL("__exp_finite") +/// float __expf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(expf_finite) +TLI_DEFINE_STRING_INTERNAL("__expf_finite") +/// long double __expl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(expl_finite) +TLI_DEFINE_STRING_INTERNAL("__expl_finite") /// int __isoc99_scanf (const char *format, ...) TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_scanf) TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf") /// int __isoc99_sscanf(const char *s, const char *format, ...) TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_sscanf) TLI_DEFINE_STRING_INTERNAL("__isoc99_sscanf") +/// double __log10_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(log10_finite) +TLI_DEFINE_STRING_INTERNAL("__log10_finite") +/// float __log10f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(log10f_finite) +TLI_DEFINE_STRING_INTERNAL("__log10f_finite") +/// long double __log10l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(log10l_finite) +TLI_DEFINE_STRING_INTERNAL("__log10l_finite") +/// double __log2_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(log2_finite) +TLI_DEFINE_STRING_INTERNAL("__log2_finite") +/// float __log2f_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(log2f_finite) +TLI_DEFINE_STRING_INTERNAL("__log2f_finite") +/// long double __log2l_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(log2l_finite) +TLI_DEFINE_STRING_INTERNAL("__log2l_finite") +/// double __log_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(log_finite) +TLI_DEFINE_STRING_INTERNAL("__log_finite") +/// float __logf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(logf_finite) +TLI_DEFINE_STRING_INTERNAL("__logf_finite") +/// long double __logl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(logl_finite) +TLI_DEFINE_STRING_INTERNAL("__logl_finite") /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memcpy_chk) TLI_DEFINE_STRING_INTERNAL("__memcpy_chk") @@ -199,13 +307,30 @@ TLI_DEFINE_STRING_INTERNAL("__memset_chk") // int __nvvm_reflect(const char *) TLI_DEFINE_ENUM_INTERNAL(nvvm_reflect) TLI_DEFINE_STRING_INTERNAL("__nvvm_reflect") - +/// double __pow_finite(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(pow_finite) +TLI_DEFINE_STRING_INTERNAL("__pow_finite") +/// float _powf_finite(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(powf_finite) +TLI_DEFINE_STRING_INTERNAL("__powf_finite") +/// long double __powl_finite(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(powl_finite) +TLI_DEFINE_STRING_INTERNAL("__powl_finite") /// double __sincospi_stret(double x); TLI_DEFINE_ENUM_INTERNAL(sincospi_stret) TLI_DEFINE_STRING_INTERNAL("__sincospi_stret") /// float __sincospif_stret(float x); TLI_DEFINE_ENUM_INTERNAL(sincospif_stret) TLI_DEFINE_STRING_INTERNAL("__sincospif_stret") +/// double __sinh_finite(double x); +TLI_DEFINE_ENUM_INTERNAL(sinh_finite) +TLI_DEFINE_STRING_INTERNAL("__sinh_finite") +/// float _sinhf_finite(float x); +TLI_DEFINE_ENUM_INTERNAL(sinhf_finite) +TLI_DEFINE_STRING_INTERNAL("__sinhf_finite") +/// long double __sinhl_finite(long double x); +TLI_DEFINE_ENUM_INTERNAL(sinhl_finite) +TLI_DEFINE_STRING_INTERNAL("__sinhl_finite") /// double __sinpi(double x); TLI_DEFINE_ENUM_INTERNAL(sinpi) TLI_DEFINE_STRING_INTERNAL("__sinpi") @@ -1115,6 +1240,9 @@ TLI_DEFINE_STRING_INTERNAL("vsprintf") /// int vsscanf(const char *s, const char *format, va_list arg); TLI_DEFINE_ENUM_INTERNAL(vsscanf) TLI_DEFINE_STRING_INTERNAL("vsscanf") +/// size_t wcslen (const wchar_t* wcs); +TLI_DEFINE_ENUM_INTERNAL(wcslen) +TLI_DEFINE_STRING_INTERNAL("wcslen") /// ssize_t write(int fildes, const void *buf, size_t nbyte); TLI_DEFINE_ENUM_INTERNAL(write) TLI_DEFINE_STRING_INTERNAL("write") diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h index 944250cfd6ac14e50796af6f5150877443e5d672..d75e7833279b0248397f43fb72c7ede0eefba503 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.h +++ b/include/llvm/Analysis/TargetLibraryInfo.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -191,6 +192,14 @@ public: void setShouldSignExtI32Param(bool Val) { ShouldSignExtI32Param = Val; } + + /// Returns the size of the wchar_t type in bytes. + unsigned getWCharSize(const Module &M) const; + + /// Returns size of the default wchar_t type on target \p T. This is mostly + /// intended to verify that the size in the frontend matches LLVM. All other + /// queries should use getWCharSize() instead. + static unsigned getTargetWCharSize(const Triple &T); }; /// Provides information about what library functions are available for @@ -231,6 +240,13 @@ public: return Impl->getLibFunc(FDecl, F); } + /// If a callsite does not have the 'nobuiltin' attribute, return if the + /// called function is a known library function and set F to that function. + bool getLibFunc(ImmutableCallSite CS, LibFunc &F) const { + return !CS.isNoBuiltin() && CS.getCalledFunction() && + getLibFunc(*(CS.getCalledFunction()), F); + } + /// Tests whether a library function is available. bool has(LibFunc F) const { return Impl->getState(F) != TargetLibraryInfoImpl::Unavailable; @@ -307,6 +323,11 @@ public: return Attribute::None; } + /// \copydoc TargetLibraryInfoImpl::getWCharSize() + unsigned getWCharSize(const Module &M) const { + return Impl->getWCharSize(M); + } + /// Handle invalidation from the pass manager. /// /// If we try to invalidate this info, just return false. It cannot become diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 67196687d55610cde207e4cc4c209e0ea92c534a..af2ebb7b6b44f72892a68bce541019ccd314a8a8 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -197,6 +197,12 @@ public: int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const; + /// \return The estimated number of case clusters when lowering \p 'SI'. + /// \p JTSize Set a jump table size only when \p SI is suitable for a jump + /// table. + unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) const; + /// \brief Estimate the cost of a given IR user when lowered. /// /// This can estimate the cost of either a ConstantExpr or Instruction when @@ -229,6 +235,11 @@ public: /// starting with the sources of divergence. bool isSourceOfDivergence(const Value *V) const; + // \brief Returns true for the target specific + // set of operations which produce uniform result + // even taking non-unform arguments + bool isAlwaysUniform(const Value *V) const; + /// Returns the address space ID for a target's 'flat' address space. Note /// this is not necessarily the same as addrspace(0), which LLVM sometimes /// refers to as the generic address space. The flat address space is a @@ -261,6 +272,19 @@ public: /// incurs significant execution cost. bool isLoweredToCall(const Function *F) const; + struct LSRCost { + /// TODO: Some of these could be merged. Also, a lexical ordering + /// isn't always optimal. + unsigned Insns; + unsigned NumRegs; + unsigned AddRecCost; + unsigned NumIVMuls; + unsigned NumBaseAdds; + unsigned ImmCost; + unsigned SetupCost; + unsigned ScaleCost; + }; + /// Parameters that control the generic loop unrolling transformation. struct UnrollingPreferences { /// The cost threshold for the unrolled loop. Should be relative to the @@ -379,6 +403,10 @@ public: bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; + /// \brief Return true if LSR cost of C1 is lower than C1. + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) const; + /// \brief Return true if the target supports masked load/store /// AVX2 and AVX-512 targets allow masks for consecutive load and store bool isLegalMaskedStore(Type *DataType) const; @@ -390,6 +418,9 @@ public: bool isLegalMaskedScatter(Type *DataType) const; bool isLegalMaskedGather(Type *DataType) const; + /// Return true if target doesn't mind addresses in vectors. + bool prefersVectorizedAddressing() const; + /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. @@ -445,6 +476,9 @@ public: /// \brief Don't restrict interleaved unrolling to small loops. bool enableAggressiveInterleaving(bool LoopHasReductions) const; + /// \brief Enable inline expansion of memcmp + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const; + /// \brief Enable matching of interleaved access groups. bool enableInterleavedAccessVectorization() const; @@ -531,6 +565,9 @@ public: /// \return The width of the largest scalar or vector register type. unsigned getRegisterBitWidth(bool Vector) const; + /// \return The width of the smallest vector register type. + unsigned getMinVectorRegisterBitWidth() const; + /// \return True if it should be considered for address type promotion. /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is /// profitable without finding other extensions fed by the same input. @@ -690,6 +727,10 @@ public: /// if false is returned. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; + /// \returns The maximum element size, in bytes, for an element + /// unordered-atomic memory intrinsic. + unsigned getAtomicMemIntrinsicMaxElementSize() const; + /// \returns A value which is the result of the given memory intrinsic. New /// instructions may be created to extract the result from the given intrinsic /// memory operation. Returns nullptr if the target cannot create a result @@ -734,6 +775,22 @@ public: unsigned ChainSizeInBytes, VectorType *VecTy) const; + /// Flags describing the kind of vector reduction. + struct ReductionFlags { + ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {} + bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation. + bool IsSigned; ///< Whether the operation is a signed int reduction. + bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present. + }; + + /// \returns True if the target wants to handle the given reduction idiom in + /// the intrinsics form instead of the shuffle form. + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const; + + /// \returns True if the target wants to expand the given reduction intrinsic + /// into a shuffle sequence. + bool shouldExpandReduction(const IntrinsicInst *II) const; /// @} private: @@ -764,9 +821,12 @@ public: ArrayRef ParamTys) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) = 0; + virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) = 0; virtual int getUserCost(const User *U) = 0; virtual bool hasBranchDivergence() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; + virtual bool isAlwaysUniform(const Value *V) = 0; virtual unsigned getFlatAddressSpace() = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0; @@ -776,10 +836,13 @@ public: int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; + virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; @@ -797,6 +860,7 @@ public: unsigned VF) = 0; virtual bool supportsEfficientVectorElementLoadStore() = 0; virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; + virtual bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, @@ -815,7 +879,8 @@ public: virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) = 0; virtual unsigned getNumberOfRegisters(bool Vector) = 0; - virtual unsigned getRegisterBitWidth(bool Vector) = 0; + virtual unsigned getRegisterBitWidth(bool Vector) const = 0; + virtual unsigned getMinVectorRegisterBitWidth() = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() = 0; @@ -868,6 +933,7 @@ public: virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) = 0; virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) = 0; + virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) = 0; virtual bool areInlineCompatible(const Function *Caller, @@ -887,6 +953,9 @@ public: virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const = 0; + virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags) const = 0; + virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; }; template @@ -935,6 +1004,10 @@ public: return Impl.isSourceOfDivergence(V); } + bool isAlwaysUniform(const Value *V) override { + return Impl.isAlwaysUniform(V); + } + unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } @@ -957,6 +1030,10 @@ public: return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) override { + return Impl.isLSRCostLess(C1, C2); + } bool isLegalMaskedStore(Type *DataType) override { return Impl.isLegalMaskedStore(DataType); } @@ -969,6 +1046,9 @@ public: bool isLegalMaskedGather(Type *DataType) override { return Impl.isLegalMaskedGather(DataType); } + bool prefersVectorizedAddressing() override { + return Impl.prefersVectorizedAddressing(); + } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) override { @@ -1009,6 +1089,9 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) override { return Impl.enableAggressiveInterleaving(LoopHasReductions); } + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) override { + return Impl.expandMemCmp(I, MaxLoadSize); + } bool enableInterleavedAccessVectorization() override { return Impl.enableInterleavedAccessVectorization(); } @@ -1046,9 +1129,12 @@ public: unsigned getNumberOfRegisters(bool Vector) override { return Impl.getNumberOfRegisters(Vector); } - unsigned getRegisterBitWidth(bool Vector) override { + unsigned getRegisterBitWidth(bool Vector) const override { return Impl.getRegisterBitWidth(Vector); } + unsigned getMinVectorRegisterBitWidth() override { + return Impl.getMinVectorRegisterBitWidth(); + } bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { return Impl.shouldConsiderAddressTypePromotion( @@ -1067,6 +1153,10 @@ public: unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } + unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) override { + return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize); + } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, OperandValueKind Opd2Info, @@ -1149,6 +1239,9 @@ public: MemIntrinsicInfo &Info) override { return Impl.getTgtMemIntrinsic(Inst, Info); } + unsigned getAtomicMemIntrinsicMaxElementSize() const override { + return Impl.getAtomicMemIntrinsicMaxElementSize(); + } Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) override { return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); @@ -1188,6 +1281,13 @@ public: VectorType *VecTy) const override { return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const override { + return Impl.useReductionIntrinsic(Opcode, Ty, Flags); + } + bool shouldExpandReduction(const IntrinsicInst *II) const override { + return Impl.shouldExpandReduction(II); + } }; template diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 9ab6b7445ab86cd157b9393852d0219833cb5162..24ac3b1213e160cdf7c465b08a7d20955ce6832d 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -17,13 +17,13 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" -#include "llvm/Analysis/VectorUtils.h" namespace llvm { @@ -114,6 +114,12 @@ public: return TTI::TCC_Free; } + unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) { + JTSize = 0; + return SI.getNumCases(); + } + unsigned getCallCost(FunctionType *FTy, int NumArgs) { assert(FTy && "FunctionType must be provided to this routine."); @@ -171,6 +177,8 @@ public: bool isSourceOfDivergence(const Value *V) { return false; } + bool isAlwaysUniform(const Value *V) { return false; } + unsigned getFlatAddressSpace () { return -1; } @@ -223,6 +231,13 @@ public: return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } + bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { + return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); + } + bool isLegalMaskedStore(Type *DataType) { return false; } bool isLegalMaskedLoad(Type *DataType) { return false; } @@ -231,6 +246,8 @@ public: bool isLegalMaskedGather(Type *DataType) { return false; } + bool prefersVectorizedAddressing() { return true; } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { // Guess that all legal addressing mode are free. @@ -266,6 +283,8 @@ public: bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { return false; } + bool enableInterleavedAccessVectorization() { return false; } bool isFPVectorizationPotentiallyUnsafe() { return false; } @@ -303,7 +322,9 @@ public: unsigned getNumberOfRegisters(bool Vector) { return 8; } - unsigned getRegisterBitWidth(bool Vector) { return 32; } + unsigned getRegisterBitWidth(bool Vector) const { return 32; } + + unsigned getMinVectorRegisterBitWidth() { return 128; } bool shouldConsiderAddressTypePromotion(const Instruction &I, @@ -408,6 +429,15 @@ public: return false; } + unsigned getAtomicMemIntrinsicMaxElementSize() const { + // Note for overrides: You must ensure for all element unordered-atomic + // memory intrinsics that all power-of-2 element sizes up to, and + // including, the return value of this method have a corresponding + // runtime lib call. These runtime lib call definitions can be found + // in RuntimeLibcalls.h + return 0; + } + Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) { return nullptr; @@ -450,6 +480,16 @@ public: VectorType *VecTy) const { return VF; } + + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return false; + } + + bool shouldExpandReduction(const IntrinsicInst *II) const { + return true; + } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/include/llvm/Analysis/TypeMetadataUtils.h b/include/llvm/Analysis/TypeMetadataUtils.h index 17906ba4e392657b63d8b2f684b599531964cbde..422e153a5a78cc761b1a1e410668bd6ced7f013b 100644 --- a/include/llvm/Analysis/TypeMetadataUtils.h +++ b/include/llvm/Analysis/TypeMetadataUtils.h @@ -20,6 +20,13 @@ namespace llvm { +/// The type of CFI jumptable needed for a function. +enum CfiFunctionLinkage { + CFL_Definition = 0, + CFL_Declaration = 1, + CFL_WeakDeclaration = 2 +}; + /// A call site that could be devirtualized. struct DevirtCallSite { /// The offset from the address point to the virtual function. diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index e3c2f3bed2277537c90fb51a5240eb5f66d78991..e953ec8ab6abed7118d8c07928bb0e5bfe8f50b6 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -29,6 +29,7 @@ template class ArrayRef; class DominatorTree; class GEPOperator; class Instruction; + struct KnownBits; class Loop; class LoopInfo; class OptimizationRemarkEmitter; @@ -49,17 +50,23 @@ template class ArrayRef; /// where V is a vector, the known zero and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. - void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, + void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr); + /// Returns the known bits rather than passing by reference. + KnownBits computeKnownBits(const Value *V, const DataLayout &DL, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - APInt &KnownZero, APInt &KnownOne); + KnownBits &Known); /// Return true if LHS and RHS have no common bits set. bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -67,14 +74,6 @@ template class ArrayRef; const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); - /// Determine whether the sign bit is known to be zero or one. Convenience - /// wrapper around computeKnownBits. - void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth = 0, - AssumptionCache *AC = nullptr, - const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); - /// Return true if the given value is known to have exactly one bit set when /// defined. For vectors return true if every element is known to be a power /// of two when defined. Supports values with integer or pointer type and @@ -86,6 +85,8 @@ template class ArrayRef; const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); + bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); + /// Return true if the given value is known to be non-zero when defined. For /// vectors, return true if every element is known to be non-zero when /// defined. For pointers, if the context instruction and dominator tree are @@ -220,9 +221,38 @@ template class ArrayRef; DL); } - /// Returns true if the GEP is based on a pointer to a string (array of i8), - /// and is indexing into this string. - bool isGEPBasedOnPointerToString(const GEPOperator *GEP); + /// Returns true if the GEP is based on a pointer to a string (array of + // \p CharSize integers) and is indexing into this string. + bool isGEPBasedOnPointerToString(const GEPOperator *GEP, + unsigned CharSize = 8); + + /// Represents offset+length into a ConstantDataArray. + struct ConstantDataArraySlice { + /// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid + /// initializer, it just doesn't fit the ConstantDataArray interface). + const ConstantDataArray *Array; + /// Slice starts at this Offset. + uint64_t Offset; + /// Length of the slice. + uint64_t Length; + + /// Moves the Offset and adjusts Length accordingly. + void move(uint64_t Delta) { + assert(Delta < Length); + Offset += Delta; + Length -= Delta; + } + /// Convenience accessor for elements in the slice. + uint64_t operator[](unsigned I) const { + return Array==nullptr ? 0 : Array->getElementAsInteger(I + Offset); + } + }; + + /// Returns true if the value \p V is a pointer into a ContantDataArray. + /// If successful \p Index will point to a ConstantDataArray info object + /// with an appropriate offset. + bool getConstantDataArrayInfo(const Value *V, ConstantDataArraySlice &Slice, + unsigned ElementSize, uint64_t Offset = 0); /// This function computes the length of a null-terminated C string pointed to /// by V. If successful, it returns true and returns the string in Str. If @@ -235,7 +265,7 @@ template class ArrayRef; /// If we can compute the length of the string pointed to by the specified /// pointer, return 'len+1'. If we can't, return 0. - uint64_t GetStringLength(const Value *V); + uint64_t GetStringLength(const Value *V, unsigned CharSize = 8); /// This method strips off any GEP address adjustments and pointer casts from /// the specified value, returning the original object being addressed. Note @@ -416,7 +446,7 @@ template class ArrayRef; /// /// Note that this currently only considers the basic block that is /// the parent of I. - bool isKnownNotFullPoison(const Instruction *PoisonI); + bool programUndefinedIfFullPoison(const Instruction *PoisonI); /// \brief Specific patterns of select instructions we can match. enum SelectPatternFlavor { diff --git a/include/llvm/BinaryFormat/COFF.h b/include/llvm/BinaryFormat/COFF.h new file mode 100644 index 0000000000000000000000000000000000000000..5171c72b9e670865f0850e97975e3c4ef0406c61 --- /dev/null +++ b/include/llvm/BinaryFormat/COFF.h @@ -0,0 +1,713 @@ +//===-- llvm/BinaryFormat/COFF.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an definitions used in Windows COFF Files. +// +// Structures and enums defined within this file where created using +// information from Microsoft's publicly available PE/COFF format document: +// +// Microsoft Portable Executable and Common Object File Format Specification +// Revision 8.1 - February 15, 2008 +// +// As of 5/2/2010, hosted by Microsoft at: +// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_COFF_H +#define LLVM_BINARYFORMAT_COFF_H + +#include "llvm/Support/DataTypes.h" +#include +#include + +namespace llvm { +namespace COFF { + +// The maximum number of sections that a COFF object can have (inclusive). +const int32_t MaxNumberOfSections16 = 65279; + +// The PE signature bytes that follows the DOS stub header. +static const char PEMagic[] = {'P', 'E', '\0', '\0'}; + +static const char BigObjMagic[] = { + '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b', + '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8', +}; + +static const char ClGlObjMagic[] = { + '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d', + '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2', +}; + +// Sizes in bytes of various things in the COFF format. +enum { + Header16Size = 20, + Header32Size = 56, + NameSize = 8, + Symbol16Size = 18, + Symbol32Size = 20, + SectionSize = 40, + RelocationSize = 10 +}; + +struct header { + uint16_t Machine; + int32_t NumberOfSections; + uint32_t TimeDateStamp; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; + uint16_t SizeOfOptionalHeader; + uint16_t Characteristics; +}; + +struct BigObjHeader { + enum : uint16_t { MinBigObjectVersion = 2 }; + + uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). + uint16_t Sig2; ///< Must be 0xFFFF. + uint16_t Version; + uint16_t Machine; + uint32_t TimeDateStamp; + uint8_t UUID[16]; + uint32_t unused1; + uint32_t unused2; + uint32_t unused3; + uint32_t unused4; + uint32_t NumberOfSections; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; +}; + +enum MachineTypes { + MT_Invalid = 0xffff, + + IMAGE_FILE_MACHINE_UNKNOWN = 0x0, + IMAGE_FILE_MACHINE_AM33 = 0x13, + IMAGE_FILE_MACHINE_AMD64 = 0x8664, + IMAGE_FILE_MACHINE_ARM = 0x1C0, + IMAGE_FILE_MACHINE_ARMNT = 0x1C4, + IMAGE_FILE_MACHINE_ARM64 = 0xAA64, + IMAGE_FILE_MACHINE_EBC = 0xEBC, + IMAGE_FILE_MACHINE_I386 = 0x14C, + IMAGE_FILE_MACHINE_IA64 = 0x200, + IMAGE_FILE_MACHINE_M32R = 0x9041, + IMAGE_FILE_MACHINE_MIPS16 = 0x266, + IMAGE_FILE_MACHINE_MIPSFPU = 0x366, + IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466, + IMAGE_FILE_MACHINE_POWERPC = 0x1F0, + IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1, + IMAGE_FILE_MACHINE_R4000 = 0x166, + IMAGE_FILE_MACHINE_SH3 = 0x1A2, + IMAGE_FILE_MACHINE_SH3DSP = 0x1A3, + IMAGE_FILE_MACHINE_SH4 = 0x1A6, + IMAGE_FILE_MACHINE_SH5 = 0x1A8, + IMAGE_FILE_MACHINE_THUMB = 0x1C2, + IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 +}; + +enum Characteristics { + C_Invalid = 0, + + /// The file does not contain base relocations and must be loaded at its + /// preferred base. If this cannot be done, the loader will error. + IMAGE_FILE_RELOCS_STRIPPED = 0x0001, + /// The file is valid and can be run. + IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002, + /// COFF line numbers have been stripped. This is deprecated and should be + /// 0. + IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004, + /// COFF symbol table entries for local symbols have been removed. This is + /// deprecated and should be 0. + IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008, + /// Aggressively trim working set. This is deprecated and must be 0. + IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010, + /// Image can handle > 2GiB addresses. + IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020, + /// Little endian: the LSB precedes the MSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_LO = 0x0080, + /// Machine is based on a 32bit word architecture. + IMAGE_FILE_32BIT_MACHINE = 0x0100, + /// Debugging info has been removed. + IMAGE_FILE_DEBUG_STRIPPED = 0x0200, + /// If the image is on removable media, fully load it and copy it to swap. + IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400, + /// If the image is on network media, fully load it and copy it to swap. + IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800, + /// The image file is a system file, not a user program. + IMAGE_FILE_SYSTEM = 0x1000, + /// The image file is a DLL. + IMAGE_FILE_DLL = 0x2000, + /// This file should only be run on a uniprocessor machine. + IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000, + /// Big endian: the MSB precedes the LSB in memory. This is deprecated + /// and should be 0. + IMAGE_FILE_BYTES_REVERSED_HI = 0x8000 +}; + +enum ResourceTypeID { + RID_Cursor = 1, + RID_Bitmap = 2, + RID_Icon = 3, + RID_Menu = 4, + RID_Dialog = 5, + RID_String = 6, + RID_FontDir = 7, + RID_Font = 8, + RID_Accelerator = 9, + RID_RCData = 10, + RID_MessageTable = 11, + RID_Group_Cursor = 12, + RID_Group_Icon = 14, + RID_Version = 16, + RID_DLGInclude = 17, + RID_PlugPlay = 19, + RID_VXD = 20, + RID_AniCursor = 21, + RID_AniIcon = 22, + RID_HTML = 23, + RID_Manifest = 24, +}; + +struct symbol { + char Name[NameSize]; + uint32_t Value; + int32_t SectionNumber; + uint16_t Type; + uint8_t StorageClass; + uint8_t NumberOfAuxSymbols; +}; + +enum SymbolSectionNumber : int32_t { + IMAGE_SYM_DEBUG = -2, + IMAGE_SYM_ABSOLUTE = -1, + IMAGE_SYM_UNDEFINED = 0 +}; + +/// Storage class tells where and what the symbol represents +enum SymbolStorageClass { + SSC_Invalid = 0xff, + + IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, ///< Physical end of function + IMAGE_SYM_CLASS_NULL = 0, ///< No symbol + IMAGE_SYM_CLASS_AUTOMATIC = 1, ///< Stack variable + IMAGE_SYM_CLASS_EXTERNAL = 2, ///< External symbol + IMAGE_SYM_CLASS_STATIC = 3, ///< Static + IMAGE_SYM_CLASS_REGISTER = 4, ///< Register variable + IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, ///< External definition + IMAGE_SYM_CLASS_LABEL = 6, ///< Label + IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, ///< Undefined label + IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, ///< Member of structure + IMAGE_SYM_CLASS_ARGUMENT = 9, ///< Function argument + IMAGE_SYM_CLASS_STRUCT_TAG = 10, ///< Structure tag + IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, ///< Member of union + IMAGE_SYM_CLASS_UNION_TAG = 12, ///< Union tag + IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, ///< Type definition + IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, ///< Undefined static + IMAGE_SYM_CLASS_ENUM_TAG = 15, ///< Enumeration tag + IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, ///< Member of enumeration + IMAGE_SYM_CLASS_REGISTER_PARAM = 17, ///< Register parameter + IMAGE_SYM_CLASS_BIT_FIELD = 18, ///< Bit field + /// ".bb" or ".eb" - beginning or end of block + IMAGE_SYM_CLASS_BLOCK = 100, + /// ".bf" or ".ef" - beginning or end of function + IMAGE_SYM_CLASS_FUNCTION = 101, + IMAGE_SYM_CLASS_END_OF_STRUCT = 102, ///< End of structure + IMAGE_SYM_CLASS_FILE = 103, ///< File name + /// Line number, reformatted as symbol + IMAGE_SYM_CLASS_SECTION = 104, + IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, ///< Duplicate tag + /// External symbol in dmert public lib + IMAGE_SYM_CLASS_CLR_TOKEN = 107 +}; + +enum SymbolBaseType { + IMAGE_SYM_TYPE_NULL = 0, ///< No type information or unknown base type. + IMAGE_SYM_TYPE_VOID = 1, ///< Used with void pointers and functions. + IMAGE_SYM_TYPE_CHAR = 2, ///< A character (signed byte). + IMAGE_SYM_TYPE_SHORT = 3, ///< A 2-byte signed integer. + IMAGE_SYM_TYPE_INT = 4, ///< A natural integer type on the target. + IMAGE_SYM_TYPE_LONG = 5, ///< A 4-byte signed integer. + IMAGE_SYM_TYPE_FLOAT = 6, ///< A 4-byte floating-point number. + IMAGE_SYM_TYPE_DOUBLE = 7, ///< An 8-byte floating-point number. + IMAGE_SYM_TYPE_STRUCT = 8, ///< A structure. + IMAGE_SYM_TYPE_UNION = 9, ///< An union. + IMAGE_SYM_TYPE_ENUM = 10, ///< An enumerated type. + IMAGE_SYM_TYPE_MOE = 11, ///< A member of enumeration (a specific value). + IMAGE_SYM_TYPE_BYTE = 12, ///< A byte; unsigned 1-byte integer. + IMAGE_SYM_TYPE_WORD = 13, ///< A word; unsigned 2-byte integer. + IMAGE_SYM_TYPE_UINT = 14, ///< An unsigned integer of natural size. + IMAGE_SYM_TYPE_DWORD = 15 ///< An unsigned 4-byte integer. +}; + +enum SymbolComplexType { + IMAGE_SYM_DTYPE_NULL = 0, ///< No complex type; simple scalar variable. + IMAGE_SYM_DTYPE_POINTER = 1, ///< A pointer to base type. + IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type. + IMAGE_SYM_DTYPE_ARRAY = 3, ///< An array of base type. + + /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT)) + SCT_COMPLEX_TYPE_SHIFT = 4 +}; + +enum AuxSymbolType { IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1 }; + +struct section { + char Name[NameSize]; + uint32_t VirtualSize; + uint32_t VirtualAddress; + uint32_t SizeOfRawData; + uint32_t PointerToRawData; + uint32_t PointerToRelocations; + uint32_t PointerToLineNumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLineNumbers; + uint32_t Characteristics; +}; + +enum SectionCharacteristics : uint32_t { + SC_Invalid = 0xffffffff, + + IMAGE_SCN_TYPE_NOLOAD = 0x00000002, + IMAGE_SCN_TYPE_NO_PAD = 0x00000008, + IMAGE_SCN_CNT_CODE = 0x00000020, + IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, + IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, + IMAGE_SCN_LNK_OTHER = 0x00000100, + IMAGE_SCN_LNK_INFO = 0x00000200, + IMAGE_SCN_LNK_REMOVE = 0x00000800, + IMAGE_SCN_LNK_COMDAT = 0x00001000, + IMAGE_SCN_GPREL = 0x00008000, + IMAGE_SCN_MEM_PURGEABLE = 0x00020000, + IMAGE_SCN_MEM_16BIT = 0x00020000, + IMAGE_SCN_MEM_LOCKED = 0x00040000, + IMAGE_SCN_MEM_PRELOAD = 0x00080000, + IMAGE_SCN_ALIGN_1BYTES = 0x00100000, + IMAGE_SCN_ALIGN_2BYTES = 0x00200000, + IMAGE_SCN_ALIGN_4BYTES = 0x00300000, + IMAGE_SCN_ALIGN_8BYTES = 0x00400000, + IMAGE_SCN_ALIGN_16BYTES = 0x00500000, + IMAGE_SCN_ALIGN_32BYTES = 0x00600000, + IMAGE_SCN_ALIGN_64BYTES = 0x00700000, + IMAGE_SCN_ALIGN_128BYTES = 0x00800000, + IMAGE_SCN_ALIGN_256BYTES = 0x00900000, + IMAGE_SCN_ALIGN_512BYTES = 0x00A00000, + IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000, + IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, + IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, + IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, + IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, + IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, + IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, + IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, + IMAGE_SCN_MEM_SHARED = 0x10000000, + IMAGE_SCN_MEM_EXECUTE = 0x20000000, + IMAGE_SCN_MEM_READ = 0x40000000, + IMAGE_SCN_MEM_WRITE = 0x80000000 +}; + +struct relocation { + uint32_t VirtualAddress; + uint32_t SymbolTableIndex; + uint16_t Type; +}; + +enum RelocationTypeI386 { + IMAGE_REL_I386_ABSOLUTE = 0x0000, + IMAGE_REL_I386_DIR16 = 0x0001, + IMAGE_REL_I386_REL16 = 0x0002, + IMAGE_REL_I386_DIR32 = 0x0006, + IMAGE_REL_I386_DIR32NB = 0x0007, + IMAGE_REL_I386_SEG12 = 0x0009, + IMAGE_REL_I386_SECTION = 0x000A, + IMAGE_REL_I386_SECREL = 0x000B, + IMAGE_REL_I386_TOKEN = 0x000C, + IMAGE_REL_I386_SECREL7 = 0x000D, + IMAGE_REL_I386_REL32 = 0x0014 +}; + +enum RelocationTypeAMD64 { + IMAGE_REL_AMD64_ABSOLUTE = 0x0000, + IMAGE_REL_AMD64_ADDR64 = 0x0001, + IMAGE_REL_AMD64_ADDR32 = 0x0002, + IMAGE_REL_AMD64_ADDR32NB = 0x0003, + IMAGE_REL_AMD64_REL32 = 0x0004, + IMAGE_REL_AMD64_REL32_1 = 0x0005, + IMAGE_REL_AMD64_REL32_2 = 0x0006, + IMAGE_REL_AMD64_REL32_3 = 0x0007, + IMAGE_REL_AMD64_REL32_4 = 0x0008, + IMAGE_REL_AMD64_REL32_5 = 0x0009, + IMAGE_REL_AMD64_SECTION = 0x000A, + IMAGE_REL_AMD64_SECREL = 0x000B, + IMAGE_REL_AMD64_SECREL7 = 0x000C, + IMAGE_REL_AMD64_TOKEN = 0x000D, + IMAGE_REL_AMD64_SREL32 = 0x000E, + IMAGE_REL_AMD64_PAIR = 0x000F, + IMAGE_REL_AMD64_SSPAN32 = 0x0010 +}; + +enum RelocationTypesARM { + IMAGE_REL_ARM_ABSOLUTE = 0x0000, + IMAGE_REL_ARM_ADDR32 = 0x0001, + IMAGE_REL_ARM_ADDR32NB = 0x0002, + IMAGE_REL_ARM_BRANCH24 = 0x0003, + IMAGE_REL_ARM_BRANCH11 = 0x0004, + IMAGE_REL_ARM_TOKEN = 0x0005, + IMAGE_REL_ARM_BLX24 = 0x0008, + IMAGE_REL_ARM_BLX11 = 0x0009, + IMAGE_REL_ARM_SECTION = 0x000E, + IMAGE_REL_ARM_SECREL = 0x000F, + IMAGE_REL_ARM_MOV32A = 0x0010, + IMAGE_REL_ARM_MOV32T = 0x0011, + IMAGE_REL_ARM_BRANCH20T = 0x0012, + IMAGE_REL_ARM_BRANCH24T = 0x0014, + IMAGE_REL_ARM_BLX23T = 0x0015 +}; + +enum RelocationTypesARM64 { + IMAGE_REL_ARM64_ABSOLUTE = 0x0000, + IMAGE_REL_ARM64_ADDR32 = 0x0001, + IMAGE_REL_ARM64_ADDR32NB = 0x0002, + IMAGE_REL_ARM64_BRANCH26 = 0x0003, + IMAGE_REL_ARM64_PAGEBASE_REL2 = 0x0004, + IMAGE_REL_ARM64_REL21 = 0x0005, + IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006, + IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007, + IMAGE_REL_ARM64_SECREL = 0x0008, + IMAGE_REL_ARM64_SECREL_LOW12A = 0x0009, + IMAGE_REL_ARM64_SECREL_HIGH12A = 0x000A, + IMAGE_REL_ARM64_SECREL_LOW12L = 0x000B, + IMAGE_REL_ARM64_TOKEN = 0x000C, + IMAGE_REL_ARM64_SECTION = 0x000D, + IMAGE_REL_ARM64_ADDR64 = 0x000E, + IMAGE_REL_ARM64_BRANCH19 = 0x000F, + IMAGE_REL_ARM64_BRANCH14 = 0x0010, +}; + +enum COMDATType { + IMAGE_COMDAT_SELECT_NODUPLICATES = 1, + IMAGE_COMDAT_SELECT_ANY, + IMAGE_COMDAT_SELECT_SAME_SIZE, + IMAGE_COMDAT_SELECT_EXACT_MATCH, + IMAGE_COMDAT_SELECT_ASSOCIATIVE, + IMAGE_COMDAT_SELECT_LARGEST, + IMAGE_COMDAT_SELECT_NEWEST +}; + +// Auxiliary Symbol Formats +struct AuxiliaryFunctionDefinition { + uint32_t TagIndex; + uint32_t TotalSize; + uint32_t PointerToLinenumber; + uint32_t PointerToNextFunction; + char unused[2]; +}; + +struct AuxiliarybfAndefSymbol { + uint8_t unused1[4]; + uint16_t Linenumber; + uint8_t unused2[6]; + uint32_t PointerToNextFunction; + uint8_t unused3[2]; +}; + +struct AuxiliaryWeakExternal { + uint32_t TagIndex; + uint32_t Characteristics; + uint8_t unused[10]; +}; + +enum WeakExternalCharacteristics { + IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1, + IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2, + IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3 +}; + +struct AuxiliarySectionDefinition { + uint32_t Length; + uint16_t NumberOfRelocations; + uint16_t NumberOfLinenumbers; + uint32_t CheckSum; + uint32_t Number; + uint8_t Selection; + char unused; +}; + +struct AuxiliaryCLRToken { + uint8_t AuxType; + uint8_t unused1; + uint32_t SymbolTableIndex; + char unused2[12]; +}; + +union Auxiliary { + AuxiliaryFunctionDefinition FunctionDefinition; + AuxiliarybfAndefSymbol bfAndefSymbol; + AuxiliaryWeakExternal WeakExternal; + AuxiliarySectionDefinition SectionDefinition; +}; + +/// @brief The Import Directory Table. +/// +/// There is a single array of these and one entry per imported DLL. +struct ImportDirectoryTableEntry { + uint32_t ImportLookupTableRVA; + uint32_t TimeDateStamp; + uint32_t ForwarderChain; + uint32_t NameRVA; + uint32_t ImportAddressTableRVA; +}; + +/// @brief The PE32 Import Lookup Table. +/// +/// There is an array of these for each imported DLL. It represents either +/// the ordinal to import from the target DLL, or a name to lookup and import +/// from the target DLL. +/// +/// This also happens to be the same format used by the Import Address Table +/// when it is initially written out to the image. +struct ImportLookupTableEntry32 { + uint32_t data; + + /// @brief Is this entry specified by ordinal, or name? + bool isOrdinal() const { return data & 0x80000000; } + + /// @brief Get the ordinal value of this entry. isOrdinal must be true. + uint16_t getOrdinal() const { + assert(isOrdinal() && "ILT entry is not an ordinal!"); + return data & 0xFFFF; + } + + /// @brief Set the ordinal value and set isOrdinal to true. + void setOrdinal(uint16_t o) { + data = o; + data |= 0x80000000; + } + + /// @brief Get the Hint/Name entry RVA. isOrdinal must be false. + uint32_t getHintNameRVA() const { + assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!"); + return data; + } + + /// @brief Set the Hint/Name entry RVA and set isOrdinal to false. + void setHintNameRVA(uint32_t rva) { data = rva; } +}; + +/// @brief The DOS compatible header at the front of all PEs. +struct DOSHeader { + uint16_t Magic; + uint16_t UsedBytesInTheLastPage; + uint16_t FileSizeInPages; + uint16_t NumberOfRelocationItems; + uint16_t HeaderSizeInParagraphs; + uint16_t MinimumExtraParagraphs; + uint16_t MaximumExtraParagraphs; + uint16_t InitialRelativeSS; + uint16_t InitialSP; + uint16_t Checksum; + uint16_t InitialIP; + uint16_t InitialRelativeCS; + uint16_t AddressOfRelocationTable; + uint16_t OverlayNumber; + uint16_t Reserved[4]; + uint16_t OEMid; + uint16_t OEMinfo; + uint16_t Reserved2[10]; + uint32_t AddressOfNewExeHeader; +}; + +struct PE32Header { + enum { PE32 = 0x10b, PE32_PLUS = 0x20b }; + + uint16_t Magic; + uint8_t MajorLinkerVersion; + uint8_t MinorLinkerVersion; + uint32_t SizeOfCode; + uint32_t SizeOfInitializedData; + uint32_t SizeOfUninitializedData; + uint32_t AddressOfEntryPoint; // RVA + uint32_t BaseOfCode; // RVA + uint32_t BaseOfData; // RVA + uint32_t ImageBase; + uint32_t SectionAlignment; + uint32_t FileAlignment; + uint16_t MajorOperatingSystemVersion; + uint16_t MinorOperatingSystemVersion; + uint16_t MajorImageVersion; + uint16_t MinorImageVersion; + uint16_t MajorSubsystemVersion; + uint16_t MinorSubsystemVersion; + uint32_t Win32VersionValue; + uint32_t SizeOfImage; + uint32_t SizeOfHeaders; + uint32_t CheckSum; + uint16_t Subsystem; + // FIXME: This should be DllCharacteristics to match the COFF spec. + uint16_t DLLCharacteristics; + uint32_t SizeOfStackReserve; + uint32_t SizeOfStackCommit; + uint32_t SizeOfHeapReserve; + uint32_t SizeOfHeapCommit; + uint32_t LoaderFlags; + // FIXME: This should be NumberOfRvaAndSizes to match the COFF spec. + uint32_t NumberOfRvaAndSize; +}; + +struct DataDirectory { + uint32_t RelativeVirtualAddress; + uint32_t Size; +}; + +enum DataDirectoryIndex { + EXPORT_TABLE = 0, + IMPORT_TABLE, + RESOURCE_TABLE, + EXCEPTION_TABLE, + CERTIFICATE_TABLE, + BASE_RELOCATION_TABLE, + DEBUG_DIRECTORY, + ARCHITECTURE, + GLOBAL_PTR, + TLS_TABLE, + LOAD_CONFIG_TABLE, + BOUND_IMPORT, + IAT, + DELAY_IMPORT_DESCRIPTOR, + CLR_RUNTIME_HEADER, + + NUM_DATA_DIRECTORIES +}; + +enum WindowsSubsystem { + IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem. + IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes + IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem. + IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem. + IMAGE_SUBSYSTEM_OS2_CUI = 5, ///< The OS/2 character subsytem. + IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem. + IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, ///< Native Windows 9x driver. + IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE. + IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application. + IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot + /// services. + IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time + /// services. + IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image. + IMAGE_SUBSYSTEM_XBOX = 14, ///< XBOX. + IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application. +}; + +enum DLLCharacteristics { + /// ASLR with 64 bit address space. + IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020, + /// DLL can be relocated at load time. + IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040, + /// Code integrity checks are enforced. + IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080, + ///< Image is NX compatible. + IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100, + /// Isolation aware, but do not isolate the image. + IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200, + /// Does not use structured exception handling (SEH). No SEH handler may be + /// called in this image. + IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400, + /// Do not bind the image. + IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800, + ///< Image should execute in an AppContainer. + IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000, + ///< A WDM driver. + IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000, + ///< Image supports Control Flow Guard. + IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000, + /// Terminal Server aware. + IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000 +}; + +enum DebugType { + IMAGE_DEBUG_TYPE_UNKNOWN = 0, + IMAGE_DEBUG_TYPE_COFF = 1, + IMAGE_DEBUG_TYPE_CODEVIEW = 2, + IMAGE_DEBUG_TYPE_FPO = 3, + IMAGE_DEBUG_TYPE_MISC = 4, + IMAGE_DEBUG_TYPE_EXCEPTION = 5, + IMAGE_DEBUG_TYPE_FIXUP = 6, + IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7, + IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8, + IMAGE_DEBUG_TYPE_BORLAND = 9, + IMAGE_DEBUG_TYPE_RESERVED10 = 10, + IMAGE_DEBUG_TYPE_CLSID = 11, + IMAGE_DEBUG_TYPE_VC_FEATURE = 12, + IMAGE_DEBUG_TYPE_POGO = 13, + IMAGE_DEBUG_TYPE_ILTCG = 14, + IMAGE_DEBUG_TYPE_MPX = 15, + IMAGE_DEBUG_TYPE_REPRO = 16, +}; + +enum BaseRelocationType { + IMAGE_REL_BASED_ABSOLUTE = 0, + IMAGE_REL_BASED_HIGH = 1, + IMAGE_REL_BASED_LOW = 2, + IMAGE_REL_BASED_HIGHLOW = 3, + IMAGE_REL_BASED_HIGHADJ = 4, + IMAGE_REL_BASED_MIPS_JMPADDR = 5, + IMAGE_REL_BASED_ARM_MOV32A = 5, + IMAGE_REL_BASED_ARM_MOV32T = 7, + IMAGE_REL_BASED_MIPS_JMPADDR16 = 9, + IMAGE_REL_BASED_DIR64 = 10 +}; + +enum ImportType { IMPORT_CODE = 0, IMPORT_DATA = 1, IMPORT_CONST = 2 }; + +enum ImportNameType { + /// Import is by ordinal. This indicates that the value in the Ordinal/Hint + /// field of the import header is the import's ordinal. If this constant is + /// not specified, then the Ordinal/Hint field should always be interpreted + /// as the import's hint. + IMPORT_ORDINAL = 0, + /// The import name is identical to the public symbol name + IMPORT_NAME = 1, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _. + IMPORT_NAME_NOPREFIX = 2, + /// The import name is the public symbol name, but skipping the leading ?, + /// @, or optionally _, and truncating at the first @. + IMPORT_NAME_UNDECORATE = 3 +}; + +struct ImportHeader { + uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). + uint16_t Sig2; ///< Must be 0xFFFF. + uint16_t Version; + uint16_t Machine; + uint32_t TimeDateStamp; + uint32_t SizeOfData; + uint16_t OrdinalHint; + uint16_t TypeInfo; + + ImportType getType() const { return static_cast(TypeInfo & 0x3); } + + ImportNameType getNameType() const { + return static_cast((TypeInfo & 0x1C) >> 2); + } +}; + +enum CodeViewIdentifiers { + DEBUG_SECTION_MAGIC = 0x4, +}; + +inline bool isReservedSectionNumber(int32_t SectionNumber) { + return SectionNumber <= 0; +} + +} // End namespace COFF. +} // End namespace llvm. + +#endif diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def new file mode 100644 index 0000000000000000000000000000000000000000..3df3300de4668427f2d4abf2d48571f912c82df3 --- /dev/null +++ b/include/llvm/BinaryFormat/Dwarf.def @@ -0,0 +1,838 @@ +//===- llvm/Support/Dwarf.def - Dwarf definitions ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Macros for running through Dwarf enumerators. +// +//===----------------------------------------------------------------------===// + +// TODO: Add other DW-based macros. +#if !(defined HANDLE_DW_TAG || defined HANDLE_DW_AT || \ + defined HANDLE_DW_FORM || defined HANDLE_DW_OP || \ + defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \ + defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ + defined HANDLE_DW_CC || defined HANDLE_DW_LNS || \ + defined HANDLE_DW_LNE || defined HANDLE_DW_LNCT || \ + defined HANDLE_DW_MACRO || defined HANDLE_DW_RLE || \ + defined HANDLE_DW_CFA || defined HANDLE_DW_APPLE_PROPERTY || \ + defined HANDLE_DW_UT) +#error "Missing macro definition of HANDLE_DW*" +#endif + +#ifndef HANDLE_DW_TAG +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_AT +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_FORM +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_OP +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_LANG +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_ATE +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) +#endif + +#ifndef HANDLE_DW_VIRTUALITY +#define HANDLE_DW_VIRTUALITY(ID, NAME) +#endif + +#ifndef HANDLE_DW_DEFAULTED +#define HANDLE_DW_DEFAULTED(ID, NAME) +#endif + +#ifndef HANDLE_DW_CC +#define HANDLE_DW_CC(ID, NAME) +#endif + +#ifndef HANDLE_DW_LNS +#define HANDLE_DW_LNS(ID, NAME) +#endif + +#ifndef HANDLE_DW_LNE +#define HANDLE_DW_LNE(ID, NAME) +#endif + +#ifndef HANDLE_DW_LNCT +#define HANDLE_DW_LNCT(ID, NAME) +#endif + +#ifndef HANDLE_DW_MACRO +#define HANDLE_DW_MACRO(ID, NAME) +#endif + +#ifndef HANDLE_DW_RLE +#define HANDLE_DW_RLE(ID, NAME) +#endif + +#ifndef HANDLE_DW_CFA +#define HANDLE_DW_CFA(ID, NAME) +#endif + +#ifndef HANDLE_DW_APPLE_PROPERTY +#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) +#endif + +#ifndef HANDLE_DW_UT +#define HANDLE_DW_UT(ID, NAME) +#endif + +HANDLE_DW_TAG(0x0000, null, 2, DWARF) +HANDLE_DW_TAG(0x0001, array_type, 2, DWARF) +HANDLE_DW_TAG(0x0002, class_type, 2, DWARF) +HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF) +HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF) +HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF) +HANDLE_DW_TAG(0x000a, label, 2, DWARF) +HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF) +HANDLE_DW_TAG(0x000d, member, 2, DWARF) +HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF) +HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF) +HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF) +HANDLE_DW_TAG(0x0012, string_type, 2, DWARF) +HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF) +HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF) +HANDLE_DW_TAG(0x0016, typedef, 2, DWARF) +HANDLE_DW_TAG(0x0017, union_type, 2, DWARF) +HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF) +HANDLE_DW_TAG(0x0019, variant, 2, DWARF) +HANDLE_DW_TAG(0x001a, common_block, 2, DWARF) +HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF) +HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF) +HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF) +HANDLE_DW_TAG(0x001e, module, 2, DWARF) +HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF) +HANDLE_DW_TAG(0x0020, set_type, 2, DWARF) +HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF) +HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF) +HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF) +HANDLE_DW_TAG(0x0024, base_type, 2, DWARF) +HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF) +HANDLE_DW_TAG(0x0026, const_type, 2, DWARF) +HANDLE_DW_TAG(0x0027, constant, 2, DWARF) +HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF) +HANDLE_DW_TAG(0x0029, file_type, 2, DWARF) +HANDLE_DW_TAG(0x002a, friend, 2, DWARF) +HANDLE_DW_TAG(0x002b, namelist, 2, DWARF) +HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF) +HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF) +HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF) +HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF) +HANDLE_DW_TAG(0x0032, try_block, 2, DWARF) +HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF) +HANDLE_DW_TAG(0x0034, variable, 2, DWARF) +HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF) +HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF) +HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF) +HANDLE_DW_TAG(0x0039, namespace, 3, DWARF) +HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF) +HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF) +HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF) +HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF) +HANDLE_DW_TAG(0x003f, condition, 3, DWARF) +HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF) +HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF) +HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF) +HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF) +HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF) +HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF) +HANDLE_DW_TAG(0x0048, call_site, 5, DWARF) +HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF) +HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF) +HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF) +// Vendor extensions: +HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS) +HANDLE_DW_TAG(0x4101, format_label, 0, GNU) +HANDLE_DW_TAG(0x4102, function_template, 0, GNU) +HANDLE_DW_TAG(0x4103, class_template, 0, GNU) +HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU) +HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU) +HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU) +HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE) +HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND) +HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND) +HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND) +HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND) +HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND) + +// Attributes. +HANDLE_DW_AT(0x01, sibling, 2, DWARF) +HANDLE_DW_AT(0x02, location, 2, DWARF) +HANDLE_DW_AT(0x03, name, 2, DWARF) +HANDLE_DW_AT(0x09, ordering, 2, DWARF) +HANDLE_DW_AT(0x0b, byte_size, 2, DWARF) +HANDLE_DW_AT(0x0c, bit_offset, 2, DWARF) +HANDLE_DW_AT(0x0d, bit_size, 2, DWARF) +HANDLE_DW_AT(0x10, stmt_list, 2, DWARF) +HANDLE_DW_AT(0x11, low_pc, 2, DWARF) +HANDLE_DW_AT(0x12, high_pc, 2, DWARF) +HANDLE_DW_AT(0x13, language, 2, DWARF) +HANDLE_DW_AT(0x15, discr, 2, DWARF) +HANDLE_DW_AT(0x16, discr_value, 2, DWARF) +HANDLE_DW_AT(0x17, visibility, 2, DWARF) +HANDLE_DW_AT(0x18, import, 2, DWARF) +HANDLE_DW_AT(0x19, string_length, 2, DWARF) +HANDLE_DW_AT(0x1a, common_reference, 2, DWARF) +HANDLE_DW_AT(0x1b, comp_dir, 2, DWARF) +HANDLE_DW_AT(0x1c, const_value, 2, DWARF) +HANDLE_DW_AT(0x1d, containing_type, 2, DWARF) +HANDLE_DW_AT(0x1e, default_value, 2, DWARF) +HANDLE_DW_AT(0x20, inline, 2, DWARF) +HANDLE_DW_AT(0x21, is_optional, 2, DWARF) +HANDLE_DW_AT(0x22, lower_bound, 2, DWARF) +HANDLE_DW_AT(0x25, producer, 2, DWARF) +HANDLE_DW_AT(0x27, prototyped, 2, DWARF) +HANDLE_DW_AT(0x2a, return_addr, 2, DWARF) +HANDLE_DW_AT(0x2c, start_scope, 2, DWARF) +HANDLE_DW_AT(0x2e, bit_stride, 2, DWARF) +HANDLE_DW_AT(0x2f, upper_bound, 2, DWARF) +HANDLE_DW_AT(0x31, abstract_origin, 2, DWARF) +HANDLE_DW_AT(0x32, accessibility, 2, DWARF) +HANDLE_DW_AT(0x33, address_class, 2, DWARF) +HANDLE_DW_AT(0x34, artificial, 2, DWARF) +HANDLE_DW_AT(0x35, base_types, 2, DWARF) +HANDLE_DW_AT(0x36, calling_convention, 2, DWARF) +HANDLE_DW_AT(0x37, count, 2, DWARF) +HANDLE_DW_AT(0x38, data_member_location, 2, DWARF) +HANDLE_DW_AT(0x39, decl_column, 2, DWARF) +HANDLE_DW_AT(0x3a, decl_file, 2, DWARF) +HANDLE_DW_AT(0x3b, decl_line, 2, DWARF) +HANDLE_DW_AT(0x3c, declaration, 2, DWARF) +HANDLE_DW_AT(0x3d, discr_list, 2, DWARF) +HANDLE_DW_AT(0x3e, encoding, 2, DWARF) +HANDLE_DW_AT(0x3f, external, 2, DWARF) +HANDLE_DW_AT(0x40, frame_base, 2, DWARF) +HANDLE_DW_AT(0x41, friend, 2, DWARF) +HANDLE_DW_AT(0x42, identifier_case, 2, DWARF) +HANDLE_DW_AT(0x43, macro_info, 2, DWARF) +HANDLE_DW_AT(0x44, namelist_item, 2, DWARF) +HANDLE_DW_AT(0x45, priority, 2, DWARF) +HANDLE_DW_AT(0x46, segment, 2, DWARF) +HANDLE_DW_AT(0x47, specification, 2, DWARF) +HANDLE_DW_AT(0x48, static_link, 2, DWARF) +HANDLE_DW_AT(0x49, type, 2, DWARF) +HANDLE_DW_AT(0x4a, use_location, 2, DWARF) +HANDLE_DW_AT(0x4b, variable_parameter, 2, DWARF) +HANDLE_DW_AT(0x4c, virtuality, 2, DWARF) +HANDLE_DW_AT(0x4d, vtable_elem_location, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_AT(0x4e, allocated, 3, DWARF) +HANDLE_DW_AT(0x4f, associated, 3, DWARF) +HANDLE_DW_AT(0x50, data_location, 3, DWARF) +HANDLE_DW_AT(0x51, byte_stride, 3, DWARF) +HANDLE_DW_AT(0x52, entry_pc, 3, DWARF) +HANDLE_DW_AT(0x53, use_UTF8, 3, DWARF) +HANDLE_DW_AT(0x54, extension, 3, DWARF) +HANDLE_DW_AT(0x55, ranges, 3, DWARF) +HANDLE_DW_AT(0x56, trampoline, 3, DWARF) +HANDLE_DW_AT(0x57, call_column, 3, DWARF) +HANDLE_DW_AT(0x58, call_file, 3, DWARF) +HANDLE_DW_AT(0x59, call_line, 3, DWARF) +HANDLE_DW_AT(0x5a, description, 3, DWARF) +HANDLE_DW_AT(0x5b, binary_scale, 3, DWARF) +HANDLE_DW_AT(0x5c, decimal_scale, 3, DWARF) +HANDLE_DW_AT(0x5d, small, 3, DWARF) +HANDLE_DW_AT(0x5e, decimal_sign, 3, DWARF) +HANDLE_DW_AT(0x5f, digit_count, 3, DWARF) +HANDLE_DW_AT(0x60, picture_string, 3, DWARF) +HANDLE_DW_AT(0x61, mutable, 3, DWARF) +HANDLE_DW_AT(0x62, threads_scaled, 3, DWARF) +HANDLE_DW_AT(0x63, explicit, 3, DWARF) +HANDLE_DW_AT(0x64, object_pointer, 3, DWARF) +HANDLE_DW_AT(0x65, endianity, 3, DWARF) +HANDLE_DW_AT(0x66, elemental, 3, DWARF) +HANDLE_DW_AT(0x67, pure, 3, DWARF) +HANDLE_DW_AT(0x68, recursive, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_AT(0x69, signature, 4, DWARF) +HANDLE_DW_AT(0x6a, main_subprogram, 4, DWARF) +HANDLE_DW_AT(0x6b, data_bit_offset, 4, DWARF) +HANDLE_DW_AT(0x6c, const_expr, 4, DWARF) +HANDLE_DW_AT(0x6d, enum_class, 4, DWARF) +HANDLE_DW_AT(0x6e, linkage_name, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_AT(0x6f, string_length_bit_size, 5, DWARF) +HANDLE_DW_AT(0x70, string_length_byte_size, 5, DWARF) +HANDLE_DW_AT(0x71, rank, 5, DWARF) +HANDLE_DW_AT(0x72, str_offsets_base, 5, DWARF) +HANDLE_DW_AT(0x73, addr_base, 5, DWARF) +HANDLE_DW_AT(0x74, rnglists_base, 5, DWARF) +HANDLE_DW_AT(0x75, dwo_id, 0, DWARF) ///< Retracted from DWARF v5. +HANDLE_DW_AT(0x76, dwo_name, 5, DWARF) +HANDLE_DW_AT(0x77, reference, 5, DWARF) +HANDLE_DW_AT(0x78, rvalue_reference, 5, DWARF) +HANDLE_DW_AT(0x79, macros, 5, DWARF) +HANDLE_DW_AT(0x7a, call_all_calls, 5, DWARF) +HANDLE_DW_AT(0x7b, call_all_source_calls, 5, DWARF) +HANDLE_DW_AT(0x7c, call_all_tail_calls, 5, DWARF) +HANDLE_DW_AT(0x7d, call_return_pc, 5, DWARF) +HANDLE_DW_AT(0x7e, call_value, 5, DWARF) +HANDLE_DW_AT(0x7f, call_origin, 5, DWARF) +HANDLE_DW_AT(0x80, call_parameter, 5, DWARF) +HANDLE_DW_AT(0x81, call_pc, 5, DWARF) +HANDLE_DW_AT(0x82, call_tail_call, 5, DWARF) +HANDLE_DW_AT(0x83, call_target, 5, DWARF) +HANDLE_DW_AT(0x84, call_target_clobbered, 5, DWARF) +HANDLE_DW_AT(0x85, call_data_location, 5, DWARF) +HANDLE_DW_AT(0x86, call_data_value, 5, DWARF) +HANDLE_DW_AT(0x87, noreturn, 5, DWARF) +HANDLE_DW_AT(0x88, alignment, 5, DWARF) +HANDLE_DW_AT(0x89, export_symbols, 5, DWARF) +HANDLE_DW_AT(0x8a, deleted, 5, DWARF) +HANDLE_DW_AT(0x8b, defaulted, 5, DWARF) +HANDLE_DW_AT(0x8c, loclists_base, 5, DWARF) +// Vendor extensions: +HANDLE_DW_AT(0x2002, MIPS_loop_begin, 0, MIPS) +HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin, 0, MIPS) +HANDLE_DW_AT(0x2004, MIPS_epilog_begin, 0, MIPS) +HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor, 0, MIPS) +HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth, 0, MIPS) +HANDLE_DW_AT(0x2007, MIPS_linkage_name, 0, MIPS) +HANDLE_DW_AT(0x2008, MIPS_stride, 0, MIPS) +HANDLE_DW_AT(0x2009, MIPS_abstract_name, 0, MIPS) +HANDLE_DW_AT(0x200a, MIPS_clone_origin, 0, MIPS) +HANDLE_DW_AT(0x200b, MIPS_has_inlines, 0, MIPS) +HANDLE_DW_AT(0x200c, MIPS_stride_byte, 0, MIPS) +HANDLE_DW_AT(0x200d, MIPS_stride_elem, 0, MIPS) +HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype, 0, MIPS) +HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype, 0, MIPS) +HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype, 0, MIPS) +// This one appears to have only been implemented by Open64 for +// fortran and may conflict with other extensions. +HANDLE_DW_AT(0x2011, MIPS_assumed_size, 0, MIPS) +// GNU extensions +HANDLE_DW_AT(0x2101, sf_names, 0, GNU) +HANDLE_DW_AT(0x2102, src_info, 0, GNU) +HANDLE_DW_AT(0x2103, mac_info, 0, GNU) +HANDLE_DW_AT(0x2104, src_coords, 0, GNU) +HANDLE_DW_AT(0x2105, body_begin, 0, GNU) +HANDLE_DW_AT(0x2106, body_end, 0, GNU) +HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU) +HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU) +HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU) +HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU) +// Extensions for Fission proposal. +HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU) +HANDLE_DW_AT(0x2131, GNU_dwo_id, 0, GNU) +HANDLE_DW_AT(0x2132, GNU_ranges_base, 0, GNU) +HANDLE_DW_AT(0x2133, GNU_addr_base, 0, GNU) +HANDLE_DW_AT(0x2134, GNU_pubnames, 0, GNU) +HANDLE_DW_AT(0x2135, GNU_pubtypes, 0, GNU) +HANDLE_DW_AT(0x2136, GNU_discriminator, 0, GNU) +// Borland extensions. +HANDLE_DW_AT(0x3b11, BORLAND_property_read, 0, BORLAND) +HANDLE_DW_AT(0x3b12, BORLAND_property_write, 0, BORLAND) +HANDLE_DW_AT(0x3b13, BORLAND_property_implements, 0, BORLAND) +HANDLE_DW_AT(0x3b14, BORLAND_property_index, 0, BORLAND) +HANDLE_DW_AT(0x3b15, BORLAND_property_default, 0, BORLAND) +HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit, 0, BORLAND) +HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class, 0, BORLAND) +HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record, 0, BORLAND) +HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass, 0, BORLAND) +HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor, 0, BORLAND) +HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor, 0, BORLAND) +HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method, 0, BORLAND) +HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface, 0, BORLAND) +HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI, 0, BORLAND) +HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return, 0, BORLAND) +HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr, 0, BORLAND) +HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND) +// LLVM project extensions. +HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM) +HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM) +HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM) +// Apple extensions. +HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE) +HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE) +HANDLE_DW_AT(0x3fe3, APPLE_isa, 0, APPLE) +HANDLE_DW_AT(0x3fe4, APPLE_block, 0, APPLE) +HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers, 0, APPLE) +HANDLE_DW_AT(0x3fe6, APPLE_runtime_class, 0, APPLE) +HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr, 0, APPLE) +HANDLE_DW_AT(0x3fe8, APPLE_property_name, 0, APPLE) +HANDLE_DW_AT(0x3fe9, APPLE_property_getter, 0, APPLE) +HANDLE_DW_AT(0x3fea, APPLE_property_setter, 0, APPLE) +HANDLE_DW_AT(0x3feb, APPLE_property_attribute, 0, APPLE) +HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type, 0, APPLE) +HANDLE_DW_AT(0x3fed, APPLE_property, 0, APPLE) + +// Attribute form encodings. +HANDLE_DW_FORM(0x01, addr, 2, DWARF) +HANDLE_DW_FORM(0x03, block2, 2, DWARF) +HANDLE_DW_FORM(0x04, block4, 2, DWARF) +HANDLE_DW_FORM(0x05, data2, 2, DWARF) +HANDLE_DW_FORM(0x06, data4, 2, DWARF) +HANDLE_DW_FORM(0x07, data8, 2, DWARF) +HANDLE_DW_FORM(0x08, string, 2, DWARF) +HANDLE_DW_FORM(0x09, block, 2, DWARF) +HANDLE_DW_FORM(0x0a, block1, 2, DWARF) +HANDLE_DW_FORM(0x0b, data1, 2, DWARF) +HANDLE_DW_FORM(0x0c, flag, 2, DWARF) +HANDLE_DW_FORM(0x0d, sdata, 2, DWARF) +HANDLE_DW_FORM(0x0e, strp, 2, DWARF) +HANDLE_DW_FORM(0x0f, udata, 2, DWARF) +HANDLE_DW_FORM(0x10, ref_addr, 2, DWARF) +HANDLE_DW_FORM(0x11, ref1, 2, DWARF) +HANDLE_DW_FORM(0x12, ref2, 2, DWARF) +HANDLE_DW_FORM(0x13, ref4, 2, DWARF) +HANDLE_DW_FORM(0x14, ref8, 2, DWARF) +HANDLE_DW_FORM(0x15, ref_udata, 2, DWARF) +HANDLE_DW_FORM(0x16, indirect, 2, DWARF) +// New in DWARF v4: +HANDLE_DW_FORM(0x17, sec_offset, 4, DWARF) +HANDLE_DW_FORM(0x18, exprloc, 4, DWARF) +HANDLE_DW_FORM(0x19, flag_present, 4, DWARF) +// This was defined out of sequence. +HANDLE_DW_FORM(0x20, ref_sig8, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_FORM(0x1a, strx, 5, DWARF) +HANDLE_DW_FORM(0x1b, addrx, 5, DWARF) +HANDLE_DW_FORM(0x1c, ref_sup4, 5, DWARF) +HANDLE_DW_FORM(0x1d, strp_sup, 5, DWARF) +HANDLE_DW_FORM(0x1e, data16, 5, DWARF) +HANDLE_DW_FORM(0x1f, line_strp, 5, DWARF) +HANDLE_DW_FORM(0x21, implicit_const, 5, DWARF) +HANDLE_DW_FORM(0x22, loclistx, 5, DWARF) +HANDLE_DW_FORM(0x23, rnglistx, 5, DWARF) +HANDLE_DW_FORM(0x24, ref_sup8, 5, DWARF) +HANDLE_DW_FORM(0x25, strx1, 5, DWARF) +HANDLE_DW_FORM(0x26, strx2, 5, DWARF) +HANDLE_DW_FORM(0x27, strx3, 5, DWARF) +HANDLE_DW_FORM(0x28, strx4, 5, DWARF) +HANDLE_DW_FORM(0x29, addrx1, 5, DWARF) +HANDLE_DW_FORM(0x2a, addrx2, 5, DWARF) +HANDLE_DW_FORM(0x2b, addrx3, 5, DWARF) +HANDLE_DW_FORM(0x2c, addrx4, 5, DWARF) +// Extensions for Fission proposal +HANDLE_DW_FORM(0x1f01, GNU_addr_index, 0, GNU) +HANDLE_DW_FORM(0x1f02, GNU_str_index, 0, GNU) +// Alternate debug sections proposal (output of "dwz" tool). +HANDLE_DW_FORM(0x1f20, GNU_ref_alt, 0, GNU) +HANDLE_DW_FORM(0x1f21, GNU_strp_alt, 0, GNU) + +// DWARF Expression operators. +HANDLE_DW_OP(0x03, addr, 2, DWARF) +HANDLE_DW_OP(0x06, deref, 2, DWARF) +HANDLE_DW_OP(0x08, const1u, 2, DWARF) +HANDLE_DW_OP(0x09, const1s, 2, DWARF) +HANDLE_DW_OP(0x0a, const2u, 2, DWARF) +HANDLE_DW_OP(0x0b, const2s, 2, DWARF) +HANDLE_DW_OP(0x0c, const4u, 2, DWARF) +HANDLE_DW_OP(0x0d, const4s, 2, DWARF) +HANDLE_DW_OP(0x0e, const8u, 2, DWARF) +HANDLE_DW_OP(0x0f, const8s, 2, DWARF) +HANDLE_DW_OP(0x10, constu, 2, DWARF) +HANDLE_DW_OP(0x11, consts, 2, DWARF) +HANDLE_DW_OP(0x12, dup, 2, DWARF) +HANDLE_DW_OP(0x13, drop, 2, DWARF) +HANDLE_DW_OP(0x14, over, 2, DWARF) +HANDLE_DW_OP(0x15, pick, 2, DWARF) +HANDLE_DW_OP(0x16, swap, 2, DWARF) +HANDLE_DW_OP(0x17, rot, 2, DWARF) +HANDLE_DW_OP(0x18, xderef, 2, DWARF) +HANDLE_DW_OP(0x19, abs, 2, DWARF) +HANDLE_DW_OP(0x1a, and, 2, DWARF) +HANDLE_DW_OP(0x1b, div, 2, DWARF) +HANDLE_DW_OP(0x1c, minus, 2, DWARF) +HANDLE_DW_OP(0x1d, mod, 2, DWARF) +HANDLE_DW_OP(0x1e, mul, 2, DWARF) +HANDLE_DW_OP(0x1f, neg, 2, DWARF) +HANDLE_DW_OP(0x20, not, 2, DWARF) +HANDLE_DW_OP(0x21, or, 2, DWARF) +HANDLE_DW_OP(0x22, plus, 2, DWARF) +HANDLE_DW_OP(0x23, plus_uconst, 2, DWARF) +HANDLE_DW_OP(0x24, shl, 2, DWARF) +HANDLE_DW_OP(0x25, shr, 2, DWARF) +HANDLE_DW_OP(0x26, shra, 2, DWARF) +HANDLE_DW_OP(0x27, xor, 2, DWARF) +HANDLE_DW_OP(0x28, bra, 2, DWARF) +HANDLE_DW_OP(0x29, eq, 2, DWARF) +HANDLE_DW_OP(0x2a, ge, 2, DWARF) +HANDLE_DW_OP(0x2b, gt, 2, DWARF) +HANDLE_DW_OP(0x2c, le, 2, DWARF) +HANDLE_DW_OP(0x2d, lt, 2, DWARF) +HANDLE_DW_OP(0x2e, ne, 2, DWARF) +HANDLE_DW_OP(0x2f, skip, 2, DWARF) +HANDLE_DW_OP(0x30, lit0, 2, DWARF) +HANDLE_DW_OP(0x31, lit1, 2, DWARF) +HANDLE_DW_OP(0x32, lit2, 2, DWARF) +HANDLE_DW_OP(0x33, lit3, 2, DWARF) +HANDLE_DW_OP(0x34, lit4, 2, DWARF) +HANDLE_DW_OP(0x35, lit5, 2, DWARF) +HANDLE_DW_OP(0x36, lit6, 2, DWARF) +HANDLE_DW_OP(0x37, lit7, 2, DWARF) +HANDLE_DW_OP(0x38, lit8, 2, DWARF) +HANDLE_DW_OP(0x39, lit9, 2, DWARF) +HANDLE_DW_OP(0x3a, lit10, 2, DWARF) +HANDLE_DW_OP(0x3b, lit11, 2, DWARF) +HANDLE_DW_OP(0x3c, lit12, 2, DWARF) +HANDLE_DW_OP(0x3d, lit13, 2, DWARF) +HANDLE_DW_OP(0x3e, lit14, 2, DWARF) +HANDLE_DW_OP(0x3f, lit15, 2, DWARF) +HANDLE_DW_OP(0x40, lit16, 2, DWARF) +HANDLE_DW_OP(0x41, lit17, 2, DWARF) +HANDLE_DW_OP(0x42, lit18, 2, DWARF) +HANDLE_DW_OP(0x43, lit19, 2, DWARF) +HANDLE_DW_OP(0x44, lit20, 2, DWARF) +HANDLE_DW_OP(0x45, lit21, 2, DWARF) +HANDLE_DW_OP(0x46, lit22, 2, DWARF) +HANDLE_DW_OP(0x47, lit23, 2, DWARF) +HANDLE_DW_OP(0x48, lit24, 2, DWARF) +HANDLE_DW_OP(0x49, lit25, 2, DWARF) +HANDLE_DW_OP(0x4a, lit26, 2, DWARF) +HANDLE_DW_OP(0x4b, lit27, 2, DWARF) +HANDLE_DW_OP(0x4c, lit28, 2, DWARF) +HANDLE_DW_OP(0x4d, lit29, 2, DWARF) +HANDLE_DW_OP(0x4e, lit30, 2, DWARF) +HANDLE_DW_OP(0x4f, lit31, 2, DWARF) +HANDLE_DW_OP(0x50, reg0, 2, DWARF) +HANDLE_DW_OP(0x51, reg1, 2, DWARF) +HANDLE_DW_OP(0x52, reg2, 2, DWARF) +HANDLE_DW_OP(0x53, reg3, 2, DWARF) +HANDLE_DW_OP(0x54, reg4, 2, DWARF) +HANDLE_DW_OP(0x55, reg5, 2, DWARF) +HANDLE_DW_OP(0x56, reg6, 2, DWARF) +HANDLE_DW_OP(0x57, reg7, 2, DWARF) +HANDLE_DW_OP(0x58, reg8, 2, DWARF) +HANDLE_DW_OP(0x59, reg9, 2, DWARF) +HANDLE_DW_OP(0x5a, reg10, 2, DWARF) +HANDLE_DW_OP(0x5b, reg11, 2, DWARF) +HANDLE_DW_OP(0x5c, reg12, 2, DWARF) +HANDLE_DW_OP(0x5d, reg13, 2, DWARF) +HANDLE_DW_OP(0x5e, reg14, 2, DWARF) +HANDLE_DW_OP(0x5f, reg15, 2, DWARF) +HANDLE_DW_OP(0x60, reg16, 2, DWARF) +HANDLE_DW_OP(0x61, reg17, 2, DWARF) +HANDLE_DW_OP(0x62, reg18, 2, DWARF) +HANDLE_DW_OP(0x63, reg19, 2, DWARF) +HANDLE_DW_OP(0x64, reg20, 2, DWARF) +HANDLE_DW_OP(0x65, reg21, 2, DWARF) +HANDLE_DW_OP(0x66, reg22, 2, DWARF) +HANDLE_DW_OP(0x67, reg23, 2, DWARF) +HANDLE_DW_OP(0x68, reg24, 2, DWARF) +HANDLE_DW_OP(0x69, reg25, 2, DWARF) +HANDLE_DW_OP(0x6a, reg26, 2, DWARF) +HANDLE_DW_OP(0x6b, reg27, 2, DWARF) +HANDLE_DW_OP(0x6c, reg28, 2, DWARF) +HANDLE_DW_OP(0x6d, reg29, 2, DWARF) +HANDLE_DW_OP(0x6e, reg30, 2, DWARF) +HANDLE_DW_OP(0x6f, reg31, 2, DWARF) +HANDLE_DW_OP(0x70, breg0, 2, DWARF) +HANDLE_DW_OP(0x71, breg1, 2, DWARF) +HANDLE_DW_OP(0x72, breg2, 2, DWARF) +HANDLE_DW_OP(0x73, breg3, 2, DWARF) +HANDLE_DW_OP(0x74, breg4, 2, DWARF) +HANDLE_DW_OP(0x75, breg5, 2, DWARF) +HANDLE_DW_OP(0x76, breg6, 2, DWARF) +HANDLE_DW_OP(0x77, breg7, 2, DWARF) +HANDLE_DW_OP(0x78, breg8, 2, DWARF) +HANDLE_DW_OP(0x79, breg9, 2, DWARF) +HANDLE_DW_OP(0x7a, breg10, 2, DWARF) +HANDLE_DW_OP(0x7b, breg11, 2, DWARF) +HANDLE_DW_OP(0x7c, breg12, 2, DWARF) +HANDLE_DW_OP(0x7d, breg13, 2, DWARF) +HANDLE_DW_OP(0x7e, breg14, 2, DWARF) +HANDLE_DW_OP(0x7f, breg15, 2, DWARF) +HANDLE_DW_OP(0x80, breg16, 2, DWARF) +HANDLE_DW_OP(0x81, breg17, 2, DWARF) +HANDLE_DW_OP(0x82, breg18, 2, DWARF) +HANDLE_DW_OP(0x83, breg19, 2, DWARF) +HANDLE_DW_OP(0x84, breg20, 2, DWARF) +HANDLE_DW_OP(0x85, breg21, 2, DWARF) +HANDLE_DW_OP(0x86, breg22, 2, DWARF) +HANDLE_DW_OP(0x87, breg23, 2, DWARF) +HANDLE_DW_OP(0x88, breg24, 2, DWARF) +HANDLE_DW_OP(0x89, breg25, 2, DWARF) +HANDLE_DW_OP(0x8a, breg26, 2, DWARF) +HANDLE_DW_OP(0x8b, breg27, 2, DWARF) +HANDLE_DW_OP(0x8c, breg28, 2, DWARF) +HANDLE_DW_OP(0x8d, breg29, 2, DWARF) +HANDLE_DW_OP(0x8e, breg30, 2, DWARF) +HANDLE_DW_OP(0x8f, breg31, 2, DWARF) +HANDLE_DW_OP(0x90, regx, 2, DWARF) +HANDLE_DW_OP(0x91, fbreg, 2, DWARF) +HANDLE_DW_OP(0x92, bregx, 2, DWARF) +HANDLE_DW_OP(0x93, piece, 2, DWARF) +HANDLE_DW_OP(0x94, deref_size, 2, DWARF) +HANDLE_DW_OP(0x95, xderef_size, 2, DWARF) +HANDLE_DW_OP(0x96, nop, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_OP(0x97, push_object_address, 3, DWARF) +HANDLE_DW_OP(0x98, call2, 3, DWARF) +HANDLE_DW_OP(0x99, call4, 3, DWARF) +HANDLE_DW_OP(0x9a, call_ref, 3, DWARF) +HANDLE_DW_OP(0x9b, form_tls_address, 3, DWARF) +HANDLE_DW_OP(0x9c, call_frame_cfa, 3, DWARF) +HANDLE_DW_OP(0x9d, bit_piece, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_OP(0x9e, implicit_value, 4, DWARF) +HANDLE_DW_OP(0x9f, stack_value, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_OP(0xa0, implicit_pointer, 5, DWARF) +HANDLE_DW_OP(0xa1, addrx, 5, DWARF) +HANDLE_DW_OP(0xa2, constx, 5, DWARF) +HANDLE_DW_OP(0xa3, entry_value, 5, DWARF) +HANDLE_DW_OP(0xa4, const_type, 5, DWARF) +HANDLE_DW_OP(0xa5, regval_type, 5, DWARF) +HANDLE_DW_OP(0xa6, deref_type, 5, DWARF) +HANDLE_DW_OP(0xa7, xderef_type, 5, DWARF) +HANDLE_DW_OP(0xa8, convert, 5, DWARF) +HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF) +// Vendor extensions: +// Extensions for GNU-style thread-local storage. +HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU) +// Extensions for Fission proposal. +HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU) +HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU) + +// DWARF languages. +HANDLE_DW_LANG(0x0001, C89, 2, DWARF) +HANDLE_DW_LANG(0x0002, C, 2, DWARF) +HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF) +HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF) +HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF) +HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF) +HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF) +HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF) +HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF) +HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_LANG(0x000b, Java, 3, DWARF) +HANDLE_DW_LANG(0x000c, C99, 3, DWARF) +HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF) +HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF) +HANDLE_DW_LANG(0x000f, PLI, 3, DWARF) +HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF) +HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF) +HANDLE_DW_LANG(0x0012, UPC, 3, DWARF) +HANDLE_DW_LANG(0x0013, D, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_LANG(0x0014, Python, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF) +HANDLE_DW_LANG(0x0016, Go, 5, DWARF) +HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF) +HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF) +HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF) +HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF) +HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF) +HANDLE_DW_LANG(0x001c, Rust, 5, DWARF) +HANDLE_DW_LANG(0x001d, C11, 5, DWARF) +HANDLE_DW_LANG(0x001e, Swift, 5, DWARF) +HANDLE_DW_LANG(0x001f, Julia, 5, DWARF) +HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF) +HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF) +HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF) +HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF) +HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF) +HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF) +// Vendor extensions: +HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS) +HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE) +HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND) + +// DWARF attribute type encodings. +HANDLE_DW_ATE(0x01, address, 2, DWARF) +HANDLE_DW_ATE(0x02, boolean, 2, DWARF) +HANDLE_DW_ATE(0x03, complex_float, 2, DWARF) +HANDLE_DW_ATE(0x04, float, 2, DWARF) +HANDLE_DW_ATE(0x05, signed, 2, DWARF) +HANDLE_DW_ATE(0x06, signed_char, 2, DWARF) +HANDLE_DW_ATE(0x07, unsigned, 2, DWARF) +HANDLE_DW_ATE(0x08, unsigned_char, 2, DWARF) +// New in DWARF v3: +HANDLE_DW_ATE(0x09, imaginary_float, 3, DWARF) +HANDLE_DW_ATE(0x0a, packed_decimal, 3, DWARF) +HANDLE_DW_ATE(0x0b, numeric_string, 3, DWARF) +HANDLE_DW_ATE(0x0c, edited, 3, DWARF) +HANDLE_DW_ATE(0x0d, signed_fixed, 3, DWARF) +HANDLE_DW_ATE(0x0e, unsigned_fixed, 3, DWARF) +HANDLE_DW_ATE(0x0f, decimal_float, 3, DWARF) +// New in DWARF v4: +HANDLE_DW_ATE(0x10, UTF, 4, DWARF) +// New in DWARF v5: +HANDLE_DW_ATE(0x11, UCS, 5, DWARF) +HANDLE_DW_ATE(0x12, ASCII, 5, DWARF) + +// DWARF virtuality codes. +HANDLE_DW_VIRTUALITY(0x00, none) +HANDLE_DW_VIRTUALITY(0x01, virtual) +HANDLE_DW_VIRTUALITY(0x02, pure_virtual) + +// DWARF v5 Defaulted Member Encodings. +HANDLE_DW_DEFAULTED(0x00, no) +HANDLE_DW_DEFAULTED(0x01, in_class) +HANDLE_DW_DEFAULTED(0x02, out_of_class) + +// DWARF calling convention codes. +HANDLE_DW_CC(0x01, normal) +HANDLE_DW_CC(0x02, program) +HANDLE_DW_CC(0x03, nocall) +// New in DWARF v5: +HANDLE_DW_CC(0x04, pass_by_reference) +HANDLE_DW_CC(0x05, pass_by_value) +// Vendor extensions: +HANDLE_DW_CC(0x41, GNU_borland_fastcall_i386) +HANDLE_DW_CC(0xb0, BORLAND_safecall) +HANDLE_DW_CC(0xb1, BORLAND_stdcall) +HANDLE_DW_CC(0xb2, BORLAND_pascal) +HANDLE_DW_CC(0xb3, BORLAND_msfastcall) +HANDLE_DW_CC(0xb4, BORLAND_msreturn) +HANDLE_DW_CC(0xb5, BORLAND_thiscall) +HANDLE_DW_CC(0xb6, BORLAND_fastcall) +HANDLE_DW_CC(0xc0, LLVM_vectorcall) + +// Line Number Extended Opcode Encodings +HANDLE_DW_LNE(0x01, end_sequence) +HANDLE_DW_LNE(0x02, set_address) +HANDLE_DW_LNE(0x03, define_file) +// New in DWARF v4: +HANDLE_DW_LNE(0x04, set_discriminator) + +// Line Number Standard Opcode Encodings. +HANDLE_DW_LNS(0x00, extended_op) +HANDLE_DW_LNS(0x01, copy) +HANDLE_DW_LNS(0x02, advance_pc) +HANDLE_DW_LNS(0x03, advance_line) +HANDLE_DW_LNS(0x04, set_file) +HANDLE_DW_LNS(0x05, set_column) +HANDLE_DW_LNS(0x06, negate_stmt) +HANDLE_DW_LNS(0x07, set_basic_block) +HANDLE_DW_LNS(0x08, const_add_pc) +HANDLE_DW_LNS(0x09, fixed_advance_pc) +// New in DWARF v3: +HANDLE_DW_LNS(0x0a, set_prologue_end) +HANDLE_DW_LNS(0x0b, set_epilogue_begin) +HANDLE_DW_LNS(0x0c, set_isa) + +// DWARF v5 Line number header entry format. +HANDLE_DW_LNCT(0x01, path) +HANDLE_DW_LNCT(0x02, directory_index) +HANDLE_DW_LNCT(0x03, timestamp) +HANDLE_DW_LNCT(0x04, size) +HANDLE_DW_LNCT(0x05, MD5) + +// DWARF v5 Macro information. +HANDLE_DW_MACRO(0x01, define) +HANDLE_DW_MACRO(0x02, undef) +HANDLE_DW_MACRO(0x03, start_file) +HANDLE_DW_MACRO(0x04, end_file) +HANDLE_DW_MACRO(0x05, define_strp) +HANDLE_DW_MACRO(0x06, undef_strp) +HANDLE_DW_MACRO(0x07, import) +HANDLE_DW_MACRO(0x08, define_sup) +HANDLE_DW_MACRO(0x09, undef_sup) +HANDLE_DW_MACRO(0x0a, import_sup) +HANDLE_DW_MACRO(0x0b, define_strx) +HANDLE_DW_MACRO(0x0c, undef_strx) + +// DWARF v5 Range List Entry encoding values. +HANDLE_DW_RLE(0x00, end_of_list) +HANDLE_DW_RLE(0x01, base_addressx) +HANDLE_DW_RLE(0x02, startx_endx) +HANDLE_DW_RLE(0x03, startx_length) +HANDLE_DW_RLE(0x04, offset_pair) +HANDLE_DW_RLE(0x05, base_address) +HANDLE_DW_RLE(0x06, start_end) +HANDLE_DW_RLE(0x07, start_length) + +// Call frame instruction encodings. +HANDLE_DW_CFA(0x00, nop) +HANDLE_DW_CFA(0x40, advance_loc) +HANDLE_DW_CFA(0x80, offset) +HANDLE_DW_CFA(0xc0, restore) +HANDLE_DW_CFA(0x01, set_loc) +HANDLE_DW_CFA(0x02, advance_loc1) +HANDLE_DW_CFA(0x03, advance_loc2) +HANDLE_DW_CFA(0x04, advance_loc4) +HANDLE_DW_CFA(0x05, offset_extended) +HANDLE_DW_CFA(0x06, restore_extended) +HANDLE_DW_CFA(0x07, undefined) +HANDLE_DW_CFA(0x08, same_value) +HANDLE_DW_CFA(0x09, register) +HANDLE_DW_CFA(0x0a, remember_state) +HANDLE_DW_CFA(0x0b, restore_state) +HANDLE_DW_CFA(0x0c, def_cfa) +HANDLE_DW_CFA(0x0d, def_cfa_register) +HANDLE_DW_CFA(0x0e, def_cfa_offset) +// New in DWARF v3: +HANDLE_DW_CFA(0x0f, def_cfa_expression) +HANDLE_DW_CFA(0x10, expression) +HANDLE_DW_CFA(0x11, offset_extended_sf) +HANDLE_DW_CFA(0x12, def_cfa_sf) +HANDLE_DW_CFA(0x13, def_cfa_offset_sf) +HANDLE_DW_CFA(0x14, val_offset) +HANDLE_DW_CFA(0x15, val_offset_sf) +HANDLE_DW_CFA(0x16, val_expression) +// Vendor extensions: +HANDLE_DW_CFA(0x1d, MIPS_advance_loc8) +HANDLE_DW_CFA(0x2d, GNU_window_save) +HANDLE_DW_CFA(0x2e, GNU_args_size) + +// Apple Objective-C Property Attributes. +// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! +HANDLE_DW_APPLE_PROPERTY(0x01, readonly) +HANDLE_DW_APPLE_PROPERTY(0x02, getter) +HANDLE_DW_APPLE_PROPERTY(0x04, assign) +HANDLE_DW_APPLE_PROPERTY(0x08, readwrite) +HANDLE_DW_APPLE_PROPERTY(0x10, retain) +HANDLE_DW_APPLE_PROPERTY(0x20, copy) +HANDLE_DW_APPLE_PROPERTY(0x40, nonatomic) +HANDLE_DW_APPLE_PROPERTY(0x80, setter) +HANDLE_DW_APPLE_PROPERTY(0x100, atomic) +HANDLE_DW_APPLE_PROPERTY(0x200, weak) +HANDLE_DW_APPLE_PROPERTY(0x400, strong) +HANDLE_DW_APPLE_PROPERTY(0x800, unsafe_unretained) +HANDLE_DW_APPLE_PROPERTY(0x1000, nullability) +HANDLE_DW_APPLE_PROPERTY(0x2000, null_resettable) +HANDLE_DW_APPLE_PROPERTY(0x4000, class) + +// DWARF v5 Unit Types. +HANDLE_DW_UT(0x01, compile) +HANDLE_DW_UT(0x02, type) +HANDLE_DW_UT(0x03, partial) +HANDLE_DW_UT(0x04, skeleton) +HANDLE_DW_UT(0x05, split_compile) +HANDLE_DW_UT(0x06, split_type) + +#undef HANDLE_DW_TAG +#undef HANDLE_DW_AT +#undef HANDLE_DW_FORM +#undef HANDLE_DW_OP +#undef HANDLE_DW_LANG +#undef HANDLE_DW_ATE +#undef HANDLE_DW_VIRTUALITY +#undef HANDLE_DW_DEFAULTED +#undef HANDLE_DW_CC +#undef HANDLE_DW_LNS +#undef HANDLE_DW_LNE +#undef HANDLE_DW_LNCT +#undef HANDLE_DW_MACRO +#undef HANDLE_DW_RLE +#undef HANDLE_DW_CFA +#undef HANDLE_DW_APPLE_PROPERTY +#undef HANDLE_DW_UT diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h similarity index 78% rename from include/llvm/Support/Dwarf.h rename to include/llvm/BinaryFormat/Dwarf.h index 84056682924ebb596cd6f72ffe7fb27a46c0743d..b7a056b18119d55ef79cb13e8620e41ccf9d5155 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/BinaryFormat/Dwarf.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/Dwarf.h ---Dwarf Constants------------------*- C++ -*-===// +//===-- llvm/BinaryFormat/Dwarf.h ---Dwarf Constants-------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,8 +17,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_DWARF_H -#define LLVM_SUPPORT_DWARF_H +#ifndef LLVM_BINARYFORMAT_DWARF_H +#define LLVM_BINARYFORMAT_DWARF_H #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" @@ -37,7 +37,7 @@ namespace dwarf { // enumeration base type. enum LLVMConstants : uint32_t { - // LLVM mock tags (see also llvm/Support/Dwarf.def). + // LLVM mock tags (see also llvm/BinaryFormat/Dwarf.def). DW_TAG_invalid = ~0U, // Tag for invalid results. DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results. DW_MACINFO_invalid = ~0U, // Macinfo type for invalid results. @@ -46,7 +46,15 @@ enum LLVMConstants : uint32_t { DWARF_VERSION = 4, // Default dwarf version we output. DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes. DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames. - DW_ARANGES_VERSION = 2 // Section version number for .debug_aranges. + DW_ARANGES_VERSION = 2, // Section version number for .debug_aranges. + // Identifiers we use to distinguish vendor extensions. + DWARF_VENDOR_DWARF = 0, // Defined in v2 or later of the DWARF standard. + DWARF_VENDOR_APPLE = 1, + DWARF_VENDOR_BORLAND = 2, + DWARF_VENDOR_GNU = 3, + DWARF_VENDOR_GOOGLE = 4, + DWARF_VENDOR_LLVM = 5, + DWARF_VENDOR_MIPS = 6 }; // Special ID values that distinguish a CIE from a FDE in DWARF CFI. @@ -55,8 +63,8 @@ const uint32_t DW_CIE_ID = UINT32_MAX; const uint64_t DW64_CIE_ID = UINT64_MAX; enum Tag : uint16_t { -#define HANDLE_DW_TAG(ID, NAME) DW_TAG_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" DW_TAG_lo_user = 0x4080, DW_TAG_hi_user = 0xffff, DW_TAG_user_base = 0x1000 // Recommended base for user tags. @@ -92,29 +100,29 @@ inline bool isType(Tag T) { /// Attributes. enum Attribute : uint16_t { -#define HANDLE_DW_AT(ID, NAME) DW_AT_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) DW_AT_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" DW_AT_lo_user = 0x2000, DW_AT_hi_user = 0x3fff, }; enum Form : uint16_t { -#define HANDLE_DW_FORM(ID, NAME) DW_FORM_##NAME = ID, -#include "llvm/Support/Dwarf.def" - DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF. +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) DW_FORM_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF. }; enum LocationAtom { -#define HANDLE_DW_OP(ID, NAME) DW_OP_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) DW_OP_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata. }; enum TypeKind { -#define HANDLE_DW_ATE(ID, NAME) DW_ATE_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" DW_ATE_lo_user = 0x80, DW_ATE_hi_user = 0xff }; @@ -153,19 +161,19 @@ enum VisibilityAttribute { enum VirtualityAttribute { #define HANDLE_DW_VIRTUALITY(ID, NAME) DW_VIRTUALITY_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_VIRTUALITY_max = 0x02 }; enum DefaultedMemberAttribute { #define HANDLE_DW_DEFAULTED(ID, NAME) DW_DEFAULTED_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_DEFAULTED_max = 0x02 }; enum SourceLanguage { -#define HANDLE_DW_LANG(ID, NAME) DW_LANG_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" DW_LANG_lo_user = 0x8000, DW_LANG_hi_user = 0xffff }; @@ -179,9 +187,9 @@ enum CaseSensitivity { }; enum CallingConvention { - // Calling convention codes +// Calling convention codes #define HANDLE_DW_CC(ID, NAME) DW_CC_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_CC_lo_user = 0x40, DW_CC_hi_user = 0xff }; @@ -209,20 +217,20 @@ enum DiscriminantList { /// Line Number Standard Opcode Encodings. enum LineNumberOps : uint8_t { #define HANDLE_DW_LNS(ID, NAME) DW_LNS_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" }; /// Line Number Extended Opcode Encodings. enum LineNumberExtendedOps { #define HANDLE_DW_LNE(ID, NAME) DW_LNE_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_LNE_lo_user = 0x80, DW_LNE_hi_user = 0xff }; -enum LinerNumberEntryFormat { -#define HANDLE_DW_LNCT(ID, NAME) DW_DEFAULTED_##NAME = ID, -#include "llvm/Support/Dwarf.def" +enum LineNumberEntryFormat { +#define HANDLE_DW_LNCT(ID, NAME) DW_LNCT_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" DW_LNCT_lo_user = 0x2000, DW_LNCT_hi_user = 0x3fff, }; @@ -239,7 +247,7 @@ enum MacinfoRecordType { /// DWARF v5 macro information entry type encodings. enum MacroEntryType { #define HANDLE_DW_MACRO(ID, NAME) DW_MACRO_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_MACRO_lo_user = 0xe0, DW_MACRO_hi_user = 0xff }; @@ -247,14 +255,13 @@ enum MacroEntryType { /// DWARF v5 range list entry encoding values. enum RangeListEntries { #define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" }; - /// Call frame instruction encodings. enum CallFrameInfo { #define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_CFA_extended = 0x00, DW_CFA_lo_user = 0x1c, @@ -302,13 +309,13 @@ enum LocationListEntry : unsigned char { /// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! enum ApplePropertyAttributes { #define HANDLE_DW_APPLE_PROPERTY(ID, NAME) DW_APPLE_PROPERTY_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" }; /// Constants for unit types in DWARF v5. enum UnitType : unsigned char { #define HANDLE_DW_UT(ID, NAME) DW_UT_##NAME = ID, -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" DW_UT_lo_user = 0x80, DW_UT_hi_user = 0xff }; @@ -347,10 +354,7 @@ enum GDBIndexEntryKind { GIEK_UNUSED7 }; -enum GDBIndexEntryLinkage { - GIEL_EXTERNAL, - GIEL_STATIC -}; +enum GDBIndexEntryLinkage { GIEL_EXTERNAL, GIEL_STATIC }; /// \defgroup DwarfConstantsDumping Dwarf constants dumping functions /// @@ -406,6 +410,40 @@ unsigned getAttributeEncoding(StringRef EncodingString); unsigned getMacinfo(StringRef MacinfoString); /// @} +/// \defgroup DwarfConstantsVersioning Dwarf version for constants +/// +/// For constants defined by DWARF, returns the DWARF version when the constant +/// was first defined. For vendor extensions, if there is a version-related +/// policy for when to emit it, returns a version number for that policy. +/// Otherwise returns 0. +/// +/// @{ +unsigned TagVersion(Tag T); +unsigned AttributeVersion(Attribute A); +unsigned FormVersion(Form F); +unsigned OperationVersion(LocationAtom O); +unsigned AttributeEncodingVersion(TypeKind E); +unsigned LanguageVersion(SourceLanguage L); +/// @} + +/// \defgroup DwarfConstantsVendor Dwarf "vendor" for constants +/// +/// These functions return an identifier describing "who" defined the constant, +/// either the DWARF standard itself or the vendor who defined the extension. +/// +/// @{ +unsigned TagVendor(Tag T); +unsigned AttributeVendor(Attribute A); +unsigned FormVendor(Form F); +unsigned OperationVendor(LocationAtom O); +unsigned AttributeEncodingVendor(TypeKind E); +unsigned LanguageVendor(SourceLanguage L); +/// @} + +/// Tells whether the specified form is defined in the specified version, +/// or is an extension if extensions are allowed. +bool isValidFormForVersion(Form F, unsigned Version, bool ExtensionsOk = true); + /// \brief Returns the symbolic string representing Val when used as a value /// for attribute Attr. StringRef AttributeValueString(uint16_t Attr, unsigned Val); @@ -428,8 +466,8 @@ struct PubIndexEntryDescriptor { /* implicit */ PubIndexEntryDescriptor(GDBIndexEntryKind Kind) : Kind(Kind), Linkage(GIEL_EXTERNAL) {} explicit PubIndexEntryDescriptor(uint8_t Value) - : Kind(static_cast((Value & KIND_MASK) >> - KIND_OFFSET)), + : Kind( + static_cast((Value & KIND_MASK) >> KIND_OFFSET)), Linkage(static_cast((Value & LINKAGE_MASK) >> LINKAGE_OFFSET)) {} uint8_t toBits() const { diff --git a/include/llvm/Support/ELF.h b/include/llvm/BinaryFormat/ELF.h similarity index 98% rename from include/llvm/Support/ELF.h rename to include/llvm/BinaryFormat/ELF.h index 33f20a809d6ca11dd7c219971dbdd539627c315d..a4450ee13b4094f3e362f94d0eda55c003098972 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/ELF.h - ELF constants and data structures --*- C++ -*-===// +//===- llvm/BinaryFormat/ELF.h - ELF constants and structures ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,30 +17,28 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_ELF_H -#define LLVM_SUPPORT_ELF_H +#ifndef LLVM_BINARYFORMAT_ELF_H +#define LLVM_BINARYFORMAT_ELF_H -#include "llvm/Support/Compiler.h" -#include "llvm/Support/DataTypes.h" +#include #include namespace llvm { - namespace ELF { -typedef uint32_t Elf32_Addr; // Program address -typedef uint32_t Elf32_Off; // File offset -typedef uint16_t Elf32_Half; -typedef uint32_t Elf32_Word; -typedef int32_t Elf32_Sword; +using Elf32_Addr = uint32_t; // Program address +using Elf32_Off = uint32_t; // File offset +using Elf32_Half = uint16_t; +using Elf32_Word = uint32_t; +using Elf32_Sword = int32_t; -typedef uint64_t Elf64_Addr; -typedef uint64_t Elf64_Off; -typedef uint16_t Elf64_Half; -typedef uint32_t Elf64_Word; -typedef int32_t Elf64_Sword; -typedef uint64_t Elf64_Xword; -typedef int64_t Elf64_Sxword; +using Elf64_Addr = uint64_t; +using Elf64_Off = uint64_t; +using Elf64_Half = uint16_t; +using Elf64_Word = uint32_t; +using Elf64_Sword = int32_t; +using Elf64_Xword = uint64_t; +using Elf64_Sxword = int64_t; // Object file magic string. static const char ElfMagic[] = {0x7f, 'E', 'L', 'F', '\0'}; @@ -75,9 +73,11 @@ struct Elf32_Ehdr { Elf32_Half e_shentsize; // Size of an entry in the section header table Elf32_Half e_shnum; // Number of entries in the section header table Elf32_Half e_shstrndx; // Sect hdr table index of sect name string table + bool checkMagic() const { return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0; } + unsigned char getFileClass() const { return e_ident[EI_CLASS]; } unsigned char getDataEncoding() const { return e_ident[EI_DATA]; } }; @@ -99,9 +99,11 @@ struct Elf64_Ehdr { Elf64_Half e_shentsize; Elf64_Half e_shnum; Elf64_Half e_shstrndx; + bool checkMagic() const { return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0; } + unsigned char getFileClass() const { return e_ident[EI_CLASS]; } unsigned char getDataEncoding() const { return e_ident[EI_DATA]; } }; @@ -683,6 +685,7 @@ enum : unsigned { SHT_GROUP = 17, // Section group. SHT_SYMTAB_SHNDX = 18, // Indices for SHN_XINDEX entries. SHT_LOOS = 0x60000000, // Lowest operating system-specific type. + SHT_LLVM_ODRTAB = 0x6fff4c00, // LLVM ODR table. SHT_GNU_ATTRIBUTES = 0x6ffffff5, // Object attributes. SHT_GNU_HASH = 0x6ffffff6, // GNU-style hash table. SHT_GNU_verdef = 0x6ffffffd, // GNU version definitions. @@ -808,12 +811,7 @@ enum : unsigned { SHF_MIPS_STRING = 0x80000000, // Make code section unreadable when in execute-only mode - SHF_ARM_PURECODE = 0x20000000, - - SHF_AMDGPU_HSA_GLOBAL = 0x00100000, - SHF_AMDGPU_HSA_READONLY = 0x00200000, - SHF_AMDGPU_HSA_CODE = 0x00400000, - SHF_AMDGPU_HSA_AGENT = 0x00800000 + SHF_ARM_PURECODE = 0x20000000 }; // Section Group Flags @@ -897,9 +895,7 @@ enum { STT_HIPROC = 15, // Highest processor-specific symbol type // AMDGPU symbol types - STT_AMDGPU_HSA_KERNEL = 10, - STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11, - STT_AMDGPU_HSA_METADATA = 12 + STT_AMDGPU_HSA_KERNEL = 10 }; enum { @@ -1050,12 +1046,6 @@ enum { PT_MIPS_OPTIONS = 0x70000002, // Options segment. PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment. - // AMDGPU program header types. - PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000, - PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001, - PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002, - PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003, - // WebAssembly program header types. PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions. }; @@ -1369,7 +1359,6 @@ enum { }; } // end namespace ELF - } // end namespace llvm -#endif +#endif // LLVM_BINARYFORMAT_ELF_H diff --git a/include/llvm/Support/ELFRelocs/AArch64.def b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def similarity index 89% rename from include/llvm/Support/ELFRelocs/AArch64.def rename to include/llvm/BinaryFormat/ELFRelocs/AArch64.def index c21df07d2dbc8f0efae3513ce428705738da5fe5..4afcd7d1f09394ac57547125cb7a22d7af8a89ab 100644 --- a/include/llvm/Support/ELFRelocs/AArch64.def +++ b/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -109,8 +109,8 @@ ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC, 0x22f) ELF_RELOC(R_AARCH64_TLSDESC_LD_PREL19, 0x230) ELF_RELOC(R_AARCH64_TLSDESC_ADR_PREL21, 0x231) ELF_RELOC(R_AARCH64_TLSDESC_ADR_PAGE21, 0x232) -ELF_RELOC(R_AARCH64_TLSDESC_LD64_LO12_NC, 0x233) -ELF_RELOC(R_AARCH64_TLSDESC_ADD_LO12_NC, 0x234) +ELF_RELOC(R_AARCH64_TLSDESC_LD64_LO12, 0x233) +ELF_RELOC(R_AARCH64_TLSDESC_ADD_LO12, 0x234) ELF_RELOC(R_AARCH64_TLSDESC_OFF_G1, 0x235) ELF_RELOC(R_AARCH64_TLSDESC_OFF_G0_NC, 0x236) ELF_RELOC(R_AARCH64_TLSDESC_LDR, 0x237) @@ -144,21 +144,28 @@ ELF_RELOC(R_AARCH64_P32_ADR_PREL_LO21, 0x00a) ELF_RELOC(R_AARCH64_P32_ADR_PREL_PG_HI21, 0x00b) ELF_RELOC(R_AARCH64_P32_ADD_ABS_LO12_NC, 0x00c) ELF_RELOC(R_AARCH64_P32_LDST8_ABS_LO12_NC, 0x00d) +ELF_RELOC(R_AARCH64_P32_LDST16_ABS_LO12_NC, 0x00e) +ELF_RELOC(R_AARCH64_P32_LDST32_ABS_LO12_NC, 0x00f) +ELF_RELOC(R_AARCH64_P32_LDST64_ABS_LO12_NC, 0x010) +ELF_RELOC(R_AARCH64_P32_LDST128_ABS_LO12_NC, 0x011) ELF_RELOC(R_AARCH64_P32_TSTBR14, 0x012) ELF_RELOC(R_AARCH64_P32_CONDBR19, 0x013) ELF_RELOC(R_AARCH64_P32_JUMP26, 0x014) ELF_RELOC(R_AARCH64_P32_CALL26, 0x015) -ELF_RELOC(R_AARCH64_P32_LDST16_ABS_LO12_NC, 0x00e) -ELF_RELOC(R_AARCH64_P32_LDST32_ABS_LO12_NC, 0x00f) -ELF_RELOC(R_AARCH64_P32_LDST64_ABS_LO12_NC, 0x010) ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0, 0x016) ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0_NC, 0x017) ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G1, 0x018) -ELF_RELOC(R_AARCH64_P32_LDST128_ABS_LO12_NC, 0x011) ELF_RELOC(R_AARCH64_P32_GOT_LD_PREL19, 0x019) ELF_RELOC(R_AARCH64_P32_ADR_GOT_PAGE, 0x01a) -ELF_RELOC(R_AARCH64_P32_LD64_GOT_LO12_NC, 0x01b) +ELF_RELOC(R_AARCH64_P32_LD32_GOT_LO12_NC, 0x01b) ELF_RELOC(R_AARCH64_P32_LD32_GOTPAGE_LO14, 0x01c) +ELF_RELOC(R_AARCH64_P32_TLSGD_ADR_PREL21, 0x050) +ELF_RELOC(R_AARCH64_P32_TLSGD_ADR_PAGE21, 0x051) +ELF_RELOC(R_AARCH64_P32_TLSGD_ADD_LO12_NC, 0x052) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADR_PREL21, 0x053) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADR_PAGE21, 0x054) +ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_LO12_NC, 0x055) +ELF_RELOC(R_AARCH64_P32_TLSLD_LD_PREL19, 0x056) ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1, 0x057) ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0, 0x058) ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC, 0x059) @@ -173,6 +180,8 @@ ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12, 0x061) ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC, 0x062) ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12, 0x063) ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC, 0x064) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12, 0x065) +ELF_RELOC(R_AARCH64_P32_TLSLD_LDST128_DTPREL_LO12_NC,0x066) ELF_RELOC(R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21, 0x067) ELF_RELOC(R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC, 0x068) ELF_RELOC(R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19, 0x069) @@ -190,12 +199,20 @@ ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12, 0x074) ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC, 0x075) ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12, 0x076) ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC, 0x077) -ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PAGE21, 0x051) -ELF_RELOC(R_AARCH64_P32_TLSDESC_LD32_LO12_NC, 0x07d) -ELF_RELOC(R_AARCH64_P32_TLSDESC_ADD_LO12_NC, 0x034) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12, 0x078) +ELF_RELOC(R_AARCH64_P32_TLSLE_LDST128_TPREL_LO12_NC, 0x079) +ELF_RELOC(R_AARCH64_P32_TLSDESC_LD_PREL19, 0x07a) +ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PREL21, 0x07b) +ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PAGE21, 0x07c) +ELF_RELOC(R_AARCH64_P32_TLSDESC_LD32_LO12, 0x07d) +ELF_RELOC(R_AARCH64_P32_TLSDESC_ADD_LO12, 0x07e) ELF_RELOC(R_AARCH64_P32_TLSDESC_CALL, 0x07f) ELF_RELOC(R_AARCH64_P32_COPY, 0x0b4) ELF_RELOC(R_AARCH64_P32_GLOB_DAT, 0x0b5) ELF_RELOC(R_AARCH64_P32_JUMP_SLOT, 0x0b6) ELF_RELOC(R_AARCH64_P32_RELATIVE, 0x0b7) +ELF_RELOC(R_AARCH64_P32_TLS_DTPREL, 0x0b8) +ELF_RELOC(R_AARCH64_P32_TLS_DTPMOD, 0x0b9) +ELF_RELOC(R_AARCH64_P32_TLS_TPREL, 0x0ba) +ELF_RELOC(R_AARCH64_P32_TLSDESC, 0x0bb) ELF_RELOC(R_AARCH64_P32_IRELATIVE, 0x0bc) diff --git a/include/llvm/Support/ELFRelocs/AMDGPU.def b/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def similarity index 100% rename from include/llvm/Support/ELFRelocs/AMDGPU.def rename to include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def diff --git a/include/llvm/Support/ELFRelocs/ARM.def b/include/llvm/BinaryFormat/ELFRelocs/ARM.def similarity index 100% rename from include/llvm/Support/ELFRelocs/ARM.def rename to include/llvm/BinaryFormat/ELFRelocs/ARM.def diff --git a/include/llvm/Support/ELFRelocs/AVR.def b/include/llvm/BinaryFormat/ELFRelocs/AVR.def similarity index 100% rename from include/llvm/Support/ELFRelocs/AVR.def rename to include/llvm/BinaryFormat/ELFRelocs/AVR.def diff --git a/include/llvm/Support/ELFRelocs/BPF.def b/include/llvm/BinaryFormat/ELFRelocs/BPF.def similarity index 100% rename from include/llvm/Support/ELFRelocs/BPF.def rename to include/llvm/BinaryFormat/ELFRelocs/BPF.def diff --git a/include/llvm/Support/ELFRelocs/Hexagon.def b/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def similarity index 95% rename from include/llvm/Support/ELFRelocs/Hexagon.def rename to include/llvm/BinaryFormat/ELFRelocs/Hexagon.def index 74e1d405cebdcc110eaaa43d32421c978e1278a1..5021e2b26ce5f3da881f531b9f2cf24b83051fff 100644 --- a/include/llvm/Support/ELFRelocs/Hexagon.def +++ b/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def @@ -99,3 +99,8 @@ ELF_RELOC(R_HEX_LD_GOT_32_6_X, 91) ELF_RELOC(R_HEX_LD_GOT_16_X, 92) ELF_RELOC(R_HEX_LD_GOT_11_X, 93) ELF_RELOC(R_HEX_23_REG, 94) +ELF_RELOC(R_HEX_GD_PLT_B22_PCREL_X, 95) +ELF_RELOC(R_HEX_GD_PLT_B32_PCREL_X, 96) +ELF_RELOC(R_HEX_LD_PLT_B22_PCREL_X, 97) +ELF_RELOC(R_HEX_LD_PLT_B32_PCREL_X, 98) +ELF_RELOC(R_HEX_27_REG, 99) diff --git a/include/llvm/Support/ELFRelocs/Lanai.def b/include/llvm/BinaryFormat/ELFRelocs/Lanai.def similarity index 100% rename from include/llvm/Support/ELFRelocs/Lanai.def rename to include/llvm/BinaryFormat/ELFRelocs/Lanai.def diff --git a/include/llvm/Support/ELFRelocs/Mips.def b/include/llvm/BinaryFormat/ELFRelocs/Mips.def similarity index 100% rename from include/llvm/Support/ELFRelocs/Mips.def rename to include/llvm/BinaryFormat/ELFRelocs/Mips.def diff --git a/include/llvm/Support/ELFRelocs/PowerPC.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC.def similarity index 100% rename from include/llvm/Support/ELFRelocs/PowerPC.def rename to include/llvm/BinaryFormat/ELFRelocs/PowerPC.def diff --git a/include/llvm/Support/ELFRelocs/PowerPC64.def b/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def similarity index 100% rename from include/llvm/Support/ELFRelocs/PowerPC64.def rename to include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def diff --git a/include/llvm/Support/ELFRelocs/RISCV.def b/include/llvm/BinaryFormat/ELFRelocs/RISCV.def similarity index 100% rename from include/llvm/Support/ELFRelocs/RISCV.def rename to include/llvm/BinaryFormat/ELFRelocs/RISCV.def diff --git a/include/llvm/Support/ELFRelocs/Sparc.def b/include/llvm/BinaryFormat/ELFRelocs/Sparc.def similarity index 100% rename from include/llvm/Support/ELFRelocs/Sparc.def rename to include/llvm/BinaryFormat/ELFRelocs/Sparc.def diff --git a/include/llvm/Support/ELFRelocs/SystemZ.def b/include/llvm/BinaryFormat/ELFRelocs/SystemZ.def similarity index 100% rename from include/llvm/Support/ELFRelocs/SystemZ.def rename to include/llvm/BinaryFormat/ELFRelocs/SystemZ.def diff --git a/include/llvm/Support/ELFRelocs/WebAssembly.def b/include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def similarity index 100% rename from include/llvm/Support/ELFRelocs/WebAssembly.def rename to include/llvm/BinaryFormat/ELFRelocs/WebAssembly.def diff --git a/include/llvm/Support/ELFRelocs/i386.def b/include/llvm/BinaryFormat/ELFRelocs/i386.def similarity index 100% rename from include/llvm/Support/ELFRelocs/i386.def rename to include/llvm/BinaryFormat/ELFRelocs/i386.def diff --git a/include/llvm/Support/ELFRelocs/x86_64.def b/include/llvm/BinaryFormat/ELFRelocs/x86_64.def similarity index 100% rename from include/llvm/Support/ELFRelocs/x86_64.def rename to include/llvm/BinaryFormat/ELFRelocs/x86_64.def diff --git a/include/llvm/Support/MachO.def b/include/llvm/BinaryFormat/MachO.def similarity index 100% rename from include/llvm/Support/MachO.def rename to include/llvm/BinaryFormat/MachO.def diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h new file mode 100644 index 0000000000000000000000000000000000000000..8ab6dde800c2b5e7be8726f9ff762215ef0af5e1 --- /dev/null +++ b/include/llvm/BinaryFormat/MachO.h @@ -0,0 +1,1984 @@ +//===-- llvm/BinaryFormat/MachO.h - The MachO file format -------*- C++/-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines manifest constants for the MachO object file format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_MACHO_H +#define LLVM_BINARYFORMAT_MACHO_H + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Host.h" + +namespace llvm { +namespace MachO { +// Enums from +enum : uint32_t { + // Constants for the "magic" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_MAGIC = 0xFEEDFACEu, + MH_CIGAM = 0xCEFAEDFEu, + MH_MAGIC_64 = 0xFEEDFACFu, + MH_CIGAM_64 = 0xCFFAEDFEu, + FAT_MAGIC = 0xCAFEBABEu, + FAT_CIGAM = 0xBEBAFECAu, + FAT_MAGIC_64 = 0xCAFEBABFu, + FAT_CIGAM_64 = 0xBFBAFECAu +}; + +enum HeaderFileType { + // Constants for the "filetype" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_OBJECT = 0x1u, + MH_EXECUTE = 0x2u, + MH_FVMLIB = 0x3u, + MH_CORE = 0x4u, + MH_PRELOAD = 0x5u, + MH_DYLIB = 0x6u, + MH_DYLINKER = 0x7u, + MH_BUNDLE = 0x8u, + MH_DYLIB_STUB = 0x9u, + MH_DSYM = 0xAu, + MH_KEXT_BUNDLE = 0xBu +}; + +enum { + // Constant bits for the "flags" field in llvm::MachO::mach_header and + // llvm::MachO::mach_header_64 + MH_NOUNDEFS = 0x00000001u, + MH_INCRLINK = 0x00000002u, + MH_DYLDLINK = 0x00000004u, + MH_BINDATLOAD = 0x00000008u, + MH_PREBOUND = 0x00000010u, + MH_SPLIT_SEGS = 0x00000020u, + MH_LAZY_INIT = 0x00000040u, + MH_TWOLEVEL = 0x00000080u, + MH_FORCE_FLAT = 0x00000100u, + MH_NOMULTIDEFS = 0x00000200u, + MH_NOFIXPREBINDING = 0x00000400u, + MH_PREBINDABLE = 0x00000800u, + MH_ALLMODSBOUND = 0x00001000u, + MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000u, + MH_CANONICAL = 0x00004000u, + MH_WEAK_DEFINES = 0x00008000u, + MH_BINDS_TO_WEAK = 0x00010000u, + MH_ALLOW_STACK_EXECUTION = 0x00020000u, + MH_ROOT_SAFE = 0x00040000u, + MH_SETUID_SAFE = 0x00080000u, + MH_NO_REEXPORTED_DYLIBS = 0x00100000u, + MH_PIE = 0x00200000u, + MH_DEAD_STRIPPABLE_DYLIB = 0x00400000u, + MH_HAS_TLV_DESCRIPTORS = 0x00800000u, + MH_NO_HEAP_EXECUTION = 0x01000000u, + MH_APP_EXTENSION_SAFE = 0x02000000u +}; + +enum : uint32_t { + // Flags for the "cmd" field in llvm::MachO::load_command + LC_REQ_DYLD = 0x80000000u +}; + +#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) LCName = LCValue, + +enum LoadCommandType : uint32_t { +#include "llvm/BinaryFormat/MachO.def" +}; + +#undef HANDLE_LOAD_COMMAND + +enum : uint32_t { + // Constant bits for the "flags" field in llvm::MachO::segment_command + SG_HIGHVM = 0x1u, + SG_FVMLIB = 0x2u, + SG_NORELOC = 0x4u, + SG_PROTECTED_VERSION_1 = 0x8u, + + // Constant masks for the "flags" field in llvm::MachO::section and + // llvm::MachO::section_64 + SECTION_TYPE = 0x000000ffu, // SECTION_TYPE + SECTION_ATTRIBUTES = 0xffffff00u, // SECTION_ATTRIBUTES + SECTION_ATTRIBUTES_USR = 0xff000000u, // SECTION_ATTRIBUTES_USR + SECTION_ATTRIBUTES_SYS = 0x00ffff00u // SECTION_ATTRIBUTES_SYS +}; + +/// These are the section type and attributes fields. A MachO section can +/// have only one Type, but can have any of the attributes specified. +enum SectionType : uint32_t { + // Constant masks for the "flags[7:0]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE) + + /// S_REGULAR - Regular section. + S_REGULAR = 0x00u, + /// S_ZEROFILL - Zero fill on demand section. + S_ZEROFILL = 0x01u, + /// S_CSTRING_LITERALS - Section with literal C strings. + S_CSTRING_LITERALS = 0x02u, + /// S_4BYTE_LITERALS - Section with 4 byte literals. + S_4BYTE_LITERALS = 0x03u, + /// S_8BYTE_LITERALS - Section with 8 byte literals. + S_8BYTE_LITERALS = 0x04u, + /// S_LITERAL_POINTERS - Section with pointers to literals. + S_LITERAL_POINTERS = 0x05u, + /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers. + S_NON_LAZY_SYMBOL_POINTERS = 0x06u, + /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers. + S_LAZY_SYMBOL_POINTERS = 0x07u, + /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in + /// the Reserved2 field. + S_SYMBOL_STUBS = 0x08u, + /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for + /// initialization. + S_MOD_INIT_FUNC_POINTERS = 0x09u, + /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for + /// termination. + S_MOD_TERM_FUNC_POINTERS = 0x0au, + /// S_COALESCED - Section contains symbols that are to be coalesced. + S_COALESCED = 0x0bu, + /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4 + /// gigabytes). + S_GB_ZEROFILL = 0x0cu, + /// S_INTERPOSING - Section with only pairs of function pointers for + /// interposing. + S_INTERPOSING = 0x0du, + /// S_16BYTE_LITERALS - Section with only 16 byte literals. + S_16BYTE_LITERALS = 0x0eu, + /// S_DTRACE_DOF - Section contains DTrace Object Format. + S_DTRACE_DOF = 0x0fu, + /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to + /// lazy loaded dylibs. + S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10u, + /// S_THREAD_LOCAL_REGULAR - Thread local data section. + S_THREAD_LOCAL_REGULAR = 0x11u, + /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section. + S_THREAD_LOCAL_ZEROFILL = 0x12u, + /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable + /// structure data. + S_THREAD_LOCAL_VARIABLES = 0x13u, + /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with pointers to thread + /// local structures. + S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14u, + /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local + /// variable initialization pointers to functions. + S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u, + + LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS +}; + +enum : uint32_t { + // Constant masks for the "flags[31:24]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR) + + /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine + /// instructions. + S_ATTR_PURE_INSTRUCTIONS = 0x80000000u, + /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be + /// in a ranlib table of contents. + S_ATTR_NO_TOC = 0x40000000u, + /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section + /// in files with the MY_DYLDLINK flag. + S_ATTR_STRIP_STATIC_SYMS = 0x20000000u, + /// S_ATTR_NO_DEAD_STRIP - No dead stripping. + S_ATTR_NO_DEAD_STRIP = 0x10000000u, + /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks. + S_ATTR_LIVE_SUPPORT = 0x08000000u, + /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by + /// dyld. + S_ATTR_SELF_MODIFYING_CODE = 0x04000000u, + /// S_ATTR_DEBUG - A debug section. + S_ATTR_DEBUG = 0x02000000u, + + // Constant masks for the "flags[23:8]" field in llvm::MachO::section and + // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS) + + /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions. + S_ATTR_SOME_INSTRUCTIONS = 0x00000400u, + /// S_ATTR_EXT_RELOC - Section has external relocation entries. + S_ATTR_EXT_RELOC = 0x00000200u, + /// S_ATTR_LOC_RELOC - Section has local relocation entries. + S_ATTR_LOC_RELOC = 0x00000100u, + + // Constant masks for the value of an indirect symbol in an indirect + // symbol table + INDIRECT_SYMBOL_LOCAL = 0x80000000u, + INDIRECT_SYMBOL_ABS = 0x40000000u +}; + +enum DataRegionType { + // Constants for the "kind" field in a data_in_code_entry structure + DICE_KIND_DATA = 1u, + DICE_KIND_JUMP_TABLE8 = 2u, + DICE_KIND_JUMP_TABLE16 = 3u, + DICE_KIND_JUMP_TABLE32 = 4u, + DICE_KIND_ABS_JUMP_TABLE32 = 5u +}; + +enum RebaseType { + REBASE_TYPE_POINTER = 1u, + REBASE_TYPE_TEXT_ABSOLUTE32 = 2u, + REBASE_TYPE_TEXT_PCREL32 = 3u +}; + +enum { REBASE_OPCODE_MASK = 0xF0u, REBASE_IMMEDIATE_MASK = 0x0Fu }; + +enum RebaseOpcode { + REBASE_OPCODE_DONE = 0x00u, + REBASE_OPCODE_SET_TYPE_IMM = 0x10u, + REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x20u, + REBASE_OPCODE_ADD_ADDR_ULEB = 0x30u, + REBASE_OPCODE_ADD_ADDR_IMM_SCALED = 0x40u, + REBASE_OPCODE_DO_REBASE_IMM_TIMES = 0x50u, + REBASE_OPCODE_DO_REBASE_ULEB_TIMES = 0x60u, + REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB = 0x70u, + REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80u +}; + +enum BindType { + BIND_TYPE_POINTER = 1u, + BIND_TYPE_TEXT_ABSOLUTE32 = 2u, + BIND_TYPE_TEXT_PCREL32 = 3u +}; + +enum BindSpecialDylib { + BIND_SPECIAL_DYLIB_SELF = 0, + BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1, + BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2 +}; + +enum { + BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1u, + BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8u, + + BIND_OPCODE_MASK = 0xF0u, + BIND_IMMEDIATE_MASK = 0x0Fu +}; + +enum BindOpcode { + BIND_OPCODE_DONE = 0x00u, + BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10u, + BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20u, + BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30u, + BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40u, + BIND_OPCODE_SET_TYPE_IMM = 0x50u, + BIND_OPCODE_SET_ADDEND_SLEB = 0x60u, + BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70u, + BIND_OPCODE_ADD_ADDR_ULEB = 0x80u, + BIND_OPCODE_DO_BIND = 0x90u, + BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0u, + BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0u, + BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0u +}; + +enum { + EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u, + EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u, + EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u, + EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u +}; + +enum ExportSymbolKind { + EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u, + EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u, + EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u +}; + +enum { + // Constant masks for the "n_type" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + N_STAB = 0xe0, + N_PEXT = 0x10, + N_TYPE = 0x0e, + N_EXT = 0x01 +}; + +enum NListType : uint8_t { + // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and + // llvm::MachO::nlist_64 + N_UNDF = 0x0u, + N_ABS = 0x2u, + N_SECT = 0xeu, + N_PBUD = 0xcu, + N_INDR = 0xau +}; + +enum SectionOrdinal { + // Constants for the "n_sect" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + NO_SECT = 0u, + MAX_SECT = 0xffu +}; + +enum { + // Constant masks for the "n_desc" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 + // The low 3 bits are the for the REFERENCE_TYPE. + REFERENCE_TYPE = 0x7, + REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5, + // Flag bits (some overlap with the library ordinal bits). + N_ARM_THUMB_DEF = 0x0008u, + REFERENCED_DYNAMICALLY = 0x0010u, + N_NO_DEAD_STRIP = 0x0020u, + N_WEAK_REF = 0x0040u, + N_WEAK_DEF = 0x0080u, + N_SYMBOL_RESOLVER = 0x0100u, + N_ALT_ENTRY = 0x0200u, + // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL() + // as these are in the top 8 bits. + SELF_LIBRARY_ORDINAL = 0x0, + MAX_LIBRARY_ORDINAL = 0xfd, + DYNAMIC_LOOKUP_ORDINAL = 0xfe, + EXECUTABLE_ORDINAL = 0xff +}; + +enum StabType { + // Constant values for the "n_type" field in llvm::MachO::nlist and + // llvm::MachO::nlist_64 when "(n_type & N_STAB) != 0" + N_GSYM = 0x20u, + N_FNAME = 0x22u, + N_FUN = 0x24u, + N_STSYM = 0x26u, + N_LCSYM = 0x28u, + N_BNSYM = 0x2Eu, + N_PC = 0x30u, + N_AST = 0x32u, + N_OPT = 0x3Cu, + N_RSYM = 0x40u, + N_SLINE = 0x44u, + N_ENSYM = 0x4Eu, + N_SSYM = 0x60u, + N_SO = 0x64u, + N_OSO = 0x66u, + N_LSYM = 0x80u, + N_BINCL = 0x82u, + N_SOL = 0x84u, + N_PARAMS = 0x86u, + N_VERSION = 0x88u, + N_OLEVEL = 0x8Au, + N_PSYM = 0xA0u, + N_EINCL = 0xA2u, + N_ENTRY = 0xA4u, + N_LBRAC = 0xC0u, + N_EXCL = 0xC2u, + N_RBRAC = 0xE0u, + N_BCOMM = 0xE2u, + N_ECOMM = 0xE4u, + N_ECOML = 0xE8u, + N_LENG = 0xFEu +}; + +enum : uint32_t { + // Constant values for the r_symbolnum field in an + // llvm::MachO::relocation_info structure when r_extern is 0. + R_ABS = 0, + + // Constant bits for the r_address field in an + // llvm::MachO::relocation_info structure. + R_SCATTERED = 0x80000000 +}; + +enum RelocationInfoType { + // Constant values for the r_type field in an + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + GENERIC_RELOC_VANILLA = 0, + GENERIC_RELOC_PAIR = 1, + GENERIC_RELOC_SECTDIFF = 2, + GENERIC_RELOC_PB_LA_PTR = 3, + GENERIC_RELOC_LOCAL_SECTDIFF = 4, + GENERIC_RELOC_TLV = 5, + + // Constant values for the r_type field in a PowerPC architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + PPC_RELOC_VANILLA = GENERIC_RELOC_VANILLA, + PPC_RELOC_PAIR = GENERIC_RELOC_PAIR, + PPC_RELOC_BR14 = 2, + PPC_RELOC_BR24 = 3, + PPC_RELOC_HI16 = 4, + PPC_RELOC_LO16 = 5, + PPC_RELOC_HA16 = 6, + PPC_RELOC_LO14 = 7, + PPC_RELOC_SECTDIFF = 8, + PPC_RELOC_PB_LA_PTR = 9, + PPC_RELOC_HI16_SECTDIFF = 10, + PPC_RELOC_LO16_SECTDIFF = 11, + PPC_RELOC_HA16_SECTDIFF = 12, + PPC_RELOC_JBSR = 13, + PPC_RELOC_LO14_SECTDIFF = 14, + PPC_RELOC_LOCAL_SECTDIFF = 15, + + // Constant values for the r_type field in an ARM architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + ARM_RELOC_VANILLA = GENERIC_RELOC_VANILLA, + ARM_RELOC_PAIR = GENERIC_RELOC_PAIR, + ARM_RELOC_SECTDIFF = GENERIC_RELOC_SECTDIFF, + ARM_RELOC_LOCAL_SECTDIFF = 3, + ARM_RELOC_PB_LA_PTR = 4, + ARM_RELOC_BR24 = 5, + ARM_THUMB_RELOC_BR22 = 6, + ARM_THUMB_32BIT_BRANCH = 7, // obsolete + ARM_RELOC_HALF = 8, + ARM_RELOC_HALF_SECTDIFF = 9, + + // Constant values for the r_type field in an ARM64 architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure. + + // For pointers. + ARM64_RELOC_UNSIGNED = 0, + // Must be followed by an ARM64_RELOC_UNSIGNED + ARM64_RELOC_SUBTRACTOR = 1, + // A B/BL instruction with 26-bit displacement. + ARM64_RELOC_BRANCH26 = 2, + // PC-rel distance to page of target. + ARM64_RELOC_PAGE21 = 3, + // Offset within page, scaled by r_length. + ARM64_RELOC_PAGEOFF12 = 4, + // PC-rel distance to page of GOT slot. + ARM64_RELOC_GOT_LOAD_PAGE21 = 5, + // Offset within page of GOT slot, scaled by r_length. + ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6, + // For pointers to GOT slots. + ARM64_RELOC_POINTER_TO_GOT = 7, + // PC-rel distance to page of TLVP slot. + ARM64_RELOC_TLVP_LOAD_PAGE21 = 8, + // Offset within page of TLVP slot, scaled by r_length. + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9, + // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. + ARM64_RELOC_ADDEND = 10, + + // Constant values for the r_type field in an x86_64 architecture + // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info + // structure + X86_64_RELOC_UNSIGNED = 0, + X86_64_RELOC_SIGNED = 1, + X86_64_RELOC_BRANCH = 2, + X86_64_RELOC_GOT_LOAD = 3, + X86_64_RELOC_GOT = 4, + X86_64_RELOC_SUBTRACTOR = 5, + X86_64_RELOC_SIGNED_1 = 6, + X86_64_RELOC_SIGNED_2 = 7, + X86_64_RELOC_SIGNED_4 = 8, + X86_64_RELOC_TLV = 9 +}; + +// Values for segment_command.initprot. +// From +enum { VM_PROT_READ = 0x1, VM_PROT_WRITE = 0x2, VM_PROT_EXECUTE = 0x4 }; + +// Values for platform field in build_version_command. +enum { + PLATFORM_MACOS = 1, + PLATFORM_IOS = 2, + PLATFORM_TVOS = 3, + PLATFORM_WATCHOS = 4, + PLATFORM_BRIDGEOS = 5 +}; + +// Values for tools enum in build_tool_version. +enum { TOOL_CLANG = 1, TOOL_SWIFT = 2, TOOL_LD = 3 }; + +// Structs from + +struct mach_header { + uint32_t magic; + uint32_t cputype; + uint32_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; +}; + +struct mach_header_64 { + uint32_t magic; + uint32_t cputype; + uint32_t cpusubtype; + uint32_t filetype; + uint32_t ncmds; + uint32_t sizeofcmds; + uint32_t flags; + uint32_t reserved; +}; + +struct load_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +struct segment_command { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint32_t vmaddr; + uint32_t vmsize; + uint32_t fileoff; + uint32_t filesize; + uint32_t maxprot; + uint32_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct segment_command_64 { + uint32_t cmd; + uint32_t cmdsize; + char segname[16]; + uint64_t vmaddr; + uint64_t vmsize; + uint64_t fileoff; + uint64_t filesize; + uint32_t maxprot; + uint32_t initprot; + uint32_t nsects; + uint32_t flags; +}; + +struct section { + char sectname[16]; + char segname[16]; + uint32_t addr; + uint32_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; +}; + +struct section_64 { + char sectname[16]; + char segname[16]; + uint64_t addr; + uint64_t size; + uint32_t offset; + uint32_t align; + uint32_t reloff; + uint32_t nreloc; + uint32_t flags; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; +}; + +struct fvmlib { + uint32_t name; + uint32_t minor_version; + uint32_t header_addr; +}; + +// The fvmlib_command is obsolete and no longer supported. +struct fvmlib_command { + uint32_t cmd; + uint32_t cmdsize; + struct fvmlib fvmlib; +}; + +struct dylib { + uint32_t name; + uint32_t timestamp; + uint32_t current_version; + uint32_t compatibility_version; +}; + +struct dylib_command { + uint32_t cmd; + uint32_t cmdsize; + struct dylib dylib; +}; + +struct sub_framework_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t umbrella; +}; + +struct sub_client_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t client; +}; + +struct sub_umbrella_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t sub_umbrella; +}; + +struct sub_library_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t sub_library; +}; + +// The prebound_dylib_command is obsolete and no longer supported. +struct prebound_dylib_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; + uint32_t nmodules; + uint32_t linked_modules; +}; + +struct dylinker_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; +}; + +struct thread_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +struct routines_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t init_address; + uint32_t init_module; + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; + uint32_t reserved4; + uint32_t reserved5; + uint32_t reserved6; +}; + +struct routines_command_64 { + uint32_t cmd; + uint32_t cmdsize; + uint64_t init_address; + uint64_t init_module; + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t reserved4; + uint64_t reserved5; + uint64_t reserved6; +}; + +struct symtab_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t symoff; + uint32_t nsyms; + uint32_t stroff; + uint32_t strsize; +}; + +struct dysymtab_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t iundefsym; + uint32_t nundefsym; + uint32_t tocoff; + uint32_t ntoc; + uint32_t modtaboff; + uint32_t nmodtab; + uint32_t extrefsymoff; + uint32_t nextrefsyms; + uint32_t indirectsymoff; + uint32_t nindirectsyms; + uint32_t extreloff; + uint32_t nextrel; + uint32_t locreloff; + uint32_t nlocrel; +}; + +struct dylib_table_of_contents { + uint32_t symbol_index; + uint32_t module_index; +}; + +struct dylib_module { + uint32_t module_name; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t irefsym; + uint32_t nrefsym; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextrel; + uint32_t nextrel; + uint32_t iinit_iterm; + uint32_t ninit_nterm; + uint32_t objc_module_info_addr; + uint32_t objc_module_info_size; +}; + +struct dylib_module_64 { + uint32_t module_name; + uint32_t iextdefsym; + uint32_t nextdefsym; + uint32_t irefsym; + uint32_t nrefsym; + uint32_t ilocalsym; + uint32_t nlocalsym; + uint32_t iextrel; + uint32_t nextrel; + uint32_t iinit_iterm; + uint32_t ninit_nterm; + uint32_t objc_module_info_size; + uint64_t objc_module_info_addr; +}; + +struct dylib_reference { + uint32_t isym : 24, flags : 8; +}; + +// The twolevel_hints_command is obsolete and no longer supported. +struct twolevel_hints_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t offset; + uint32_t nhints; +}; + +// The twolevel_hints_command is obsolete and no longer supported. +struct twolevel_hint { + uint32_t isub_image : 8, itoc : 24; +}; + +// The prebind_cksum_command is obsolete and no longer supported. +struct prebind_cksum_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cksum; +}; + +struct uuid_command { + uint32_t cmd; + uint32_t cmdsize; + uint8_t uuid[16]; +}; + +struct rpath_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t path; +}; + +struct linkedit_data_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t dataoff; + uint32_t datasize; +}; + +struct data_in_code_entry { + uint32_t offset; + uint16_t length; + uint16_t kind; +}; + +struct source_version_command { + uint32_t cmd; + uint32_t cmdsize; + uint64_t version; +}; + +struct encryption_info_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cryptoff; + uint32_t cryptsize; + uint32_t cryptid; +}; + +struct encryption_info_command_64 { + uint32_t cmd; + uint32_t cmdsize; + uint32_t cryptoff; + uint32_t cryptsize; + uint32_t cryptid; + uint32_t pad; +}; + +struct version_min_command { + uint32_t cmd; // LC_VERSION_MIN_MACOSX or + // LC_VERSION_MIN_IPHONEOS + uint32_t cmdsize; // sizeof(struct version_min_command) + uint32_t version; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz +}; + +struct note_command { + uint32_t cmd; // LC_NOTE + uint32_t cmdsize; // sizeof(struct note_command) + char data_owner[16]; // owner name for this LC_NOTE + uint64_t offset; // file offset of this data + uint64_t size; // length of data region +}; + +struct build_tool_version { + uint32_t tool; // enum for the tool + uint32_t version; // version of the tool +}; + +struct build_version_command { + uint32_t cmd; // LC_BUILD_VERSION + uint32_t cmdsize; // sizeof(struct build_version_command) + + // ntools * sizeof(struct build_tool_version) + uint32_t platform; // platform + uint32_t minos; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz + uint32_t ntools; // number of tool entries following this +}; + +struct dyld_info_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t rebase_off; + uint32_t rebase_size; + uint32_t bind_off; + uint32_t bind_size; + uint32_t weak_bind_off; + uint32_t weak_bind_size; + uint32_t lazy_bind_off; + uint32_t lazy_bind_size; + uint32_t export_off; + uint32_t export_size; +}; + +struct linker_option_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t count; +}; + +// The symseg_command is obsolete and no longer supported. +struct symseg_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t offset; + uint32_t size; +}; + +// The ident_command is obsolete and no longer supported. +struct ident_command { + uint32_t cmd; + uint32_t cmdsize; +}; + +// The fvmfile_command is obsolete and no longer supported. +struct fvmfile_command { + uint32_t cmd; + uint32_t cmdsize; + uint32_t name; + uint32_t header_addr; +}; + +struct tlv_descriptor_32 { + uint32_t thunk; + uint32_t key; + uint32_t offset; +}; + +struct tlv_descriptor_64 { + uint64_t thunk; + uint64_t key; + uint64_t offset; +}; + +struct tlv_descriptor { + uintptr_t thunk; + uintptr_t key; + uintptr_t offset; +}; + +struct entry_point_command { + uint32_t cmd; + uint32_t cmdsize; + uint64_t entryoff; + uint64_t stacksize; +}; + +// Structs from +struct fat_header { + uint32_t magic; + uint32_t nfat_arch; +}; + +struct fat_arch { + uint32_t cputype; + uint32_t cpusubtype; + uint32_t offset; + uint32_t size; + uint32_t align; +}; + +struct fat_arch_64 { + uint32_t cputype; + uint32_t cpusubtype; + uint64_t offset; + uint64_t size; + uint32_t align; + uint32_t reserved; +}; + +// Structs from +struct relocation_info { + int32_t r_address; + uint32_t r_symbolnum : 24, r_pcrel : 1, r_length : 2, r_extern : 1, + r_type : 4; +}; + +struct scattered_relocation_info { +#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) + uint32_t r_scattered : 1, r_pcrel : 1, r_length : 2, r_type : 4, + r_address : 24; +#else + uint32_t r_address : 24, r_type : 4, r_length : 2, r_pcrel : 1, + r_scattered : 1; +#endif + int32_t r_value; +}; + +// Structs NOT from , but that make LLVM's life easier +struct any_relocation_info { + uint32_t r_word0, r_word1; +}; + +// Structs from +struct nlist_base { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; +}; + +struct nlist { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + int16_t n_desc; + uint32_t n_value; +}; + +struct nlist_64 { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; + uint64_t n_value; +}; + +// Byte order swapping functions for MachO structs + +inline void swapStruct(fat_header &mh) { + sys::swapByteOrder(mh.magic); + sys::swapByteOrder(mh.nfat_arch); +} + +inline void swapStruct(fat_arch &mh) { + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.offset); + sys::swapByteOrder(mh.size); + sys::swapByteOrder(mh.align); +} + +inline void swapStruct(fat_arch_64 &mh) { + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.offset); + sys::swapByteOrder(mh.size); + sys::swapByteOrder(mh.align); + sys::swapByteOrder(mh.reserved); +} + +inline void swapStruct(mach_header &mh) { + sys::swapByteOrder(mh.magic); + sys::swapByteOrder(mh.cputype); + sys::swapByteOrder(mh.cpusubtype); + sys::swapByteOrder(mh.filetype); + sys::swapByteOrder(mh.ncmds); + sys::swapByteOrder(mh.sizeofcmds); + sys::swapByteOrder(mh.flags); +} + +inline void swapStruct(mach_header_64 &H) { + sys::swapByteOrder(H.magic); + sys::swapByteOrder(H.cputype); + sys::swapByteOrder(H.cpusubtype); + sys::swapByteOrder(H.filetype); + sys::swapByteOrder(H.ncmds); + sys::swapByteOrder(H.sizeofcmds); + sys::swapByteOrder(H.flags); + sys::swapByteOrder(H.reserved); +} + +inline void swapStruct(load_command &lc) { + sys::swapByteOrder(lc.cmd); + sys::swapByteOrder(lc.cmdsize); +} + +inline void swapStruct(symtab_command &lc) { + sys::swapByteOrder(lc.cmd); + sys::swapByteOrder(lc.cmdsize); + sys::swapByteOrder(lc.symoff); + sys::swapByteOrder(lc.nsyms); + sys::swapByteOrder(lc.stroff); + sys::swapByteOrder(lc.strsize); +} + +inline void swapStruct(segment_command_64 &seg) { + sys::swapByteOrder(seg.cmd); + sys::swapByteOrder(seg.cmdsize); + sys::swapByteOrder(seg.vmaddr); + sys::swapByteOrder(seg.vmsize); + sys::swapByteOrder(seg.fileoff); + sys::swapByteOrder(seg.filesize); + sys::swapByteOrder(seg.maxprot); + sys::swapByteOrder(seg.initprot); + sys::swapByteOrder(seg.nsects); + sys::swapByteOrder(seg.flags); +} + +inline void swapStruct(segment_command &seg) { + sys::swapByteOrder(seg.cmd); + sys::swapByteOrder(seg.cmdsize); + sys::swapByteOrder(seg.vmaddr); + sys::swapByteOrder(seg.vmsize); + sys::swapByteOrder(seg.fileoff); + sys::swapByteOrder(seg.filesize); + sys::swapByteOrder(seg.maxprot); + sys::swapByteOrder(seg.initprot); + sys::swapByteOrder(seg.nsects); + sys::swapByteOrder(seg.flags); +} + +inline void swapStruct(section_64 §) { + sys::swapByteOrder(sect.addr); + sys::swapByteOrder(sect.size); + sys::swapByteOrder(sect.offset); + sys::swapByteOrder(sect.align); + sys::swapByteOrder(sect.reloff); + sys::swapByteOrder(sect.nreloc); + sys::swapByteOrder(sect.flags); + sys::swapByteOrder(sect.reserved1); + sys::swapByteOrder(sect.reserved2); +} + +inline void swapStruct(section §) { + sys::swapByteOrder(sect.addr); + sys::swapByteOrder(sect.size); + sys::swapByteOrder(sect.offset); + sys::swapByteOrder(sect.align); + sys::swapByteOrder(sect.reloff); + sys::swapByteOrder(sect.nreloc); + sys::swapByteOrder(sect.flags); + sys::swapByteOrder(sect.reserved1); + sys::swapByteOrder(sect.reserved2); +} + +inline void swapStruct(dyld_info_command &info) { + sys::swapByteOrder(info.cmd); + sys::swapByteOrder(info.cmdsize); + sys::swapByteOrder(info.rebase_off); + sys::swapByteOrder(info.rebase_size); + sys::swapByteOrder(info.bind_off); + sys::swapByteOrder(info.bind_size); + sys::swapByteOrder(info.weak_bind_off); + sys::swapByteOrder(info.weak_bind_size); + sys::swapByteOrder(info.lazy_bind_off); + sys::swapByteOrder(info.lazy_bind_size); + sys::swapByteOrder(info.export_off); + sys::swapByteOrder(info.export_size); +} + +inline void swapStruct(dylib_command &d) { + sys::swapByteOrder(d.cmd); + sys::swapByteOrder(d.cmdsize); + sys::swapByteOrder(d.dylib.name); + sys::swapByteOrder(d.dylib.timestamp); + sys::swapByteOrder(d.dylib.current_version); + sys::swapByteOrder(d.dylib.compatibility_version); +} + +inline void swapStruct(sub_framework_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.umbrella); +} + +inline void swapStruct(sub_umbrella_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.sub_umbrella); +} + +inline void swapStruct(sub_library_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.sub_library); +} + +inline void swapStruct(sub_client_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.client); +} + +inline void swapStruct(routines_command &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.init_address); + sys::swapByteOrder(r.init_module); + sys::swapByteOrder(r.reserved1); + sys::swapByteOrder(r.reserved2); + sys::swapByteOrder(r.reserved3); + sys::swapByteOrder(r.reserved4); + sys::swapByteOrder(r.reserved5); + sys::swapByteOrder(r.reserved6); +} + +inline void swapStruct(routines_command_64 &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.init_address); + sys::swapByteOrder(r.init_module); + sys::swapByteOrder(r.reserved1); + sys::swapByteOrder(r.reserved2); + sys::swapByteOrder(r.reserved3); + sys::swapByteOrder(r.reserved4); + sys::swapByteOrder(r.reserved5); + sys::swapByteOrder(r.reserved6); +} + +inline void swapStruct(thread_command &t) { + sys::swapByteOrder(t.cmd); + sys::swapByteOrder(t.cmdsize); +} + +inline void swapStruct(dylinker_command &d) { + sys::swapByteOrder(d.cmd); + sys::swapByteOrder(d.cmdsize); + sys::swapByteOrder(d.name); +} + +inline void swapStruct(uuid_command &u) { + sys::swapByteOrder(u.cmd); + sys::swapByteOrder(u.cmdsize); +} + +inline void swapStruct(rpath_command &r) { + sys::swapByteOrder(r.cmd); + sys::swapByteOrder(r.cmdsize); + sys::swapByteOrder(r.path); +} + +inline void swapStruct(source_version_command &s) { + sys::swapByteOrder(s.cmd); + sys::swapByteOrder(s.cmdsize); + sys::swapByteOrder(s.version); +} + +inline void swapStruct(entry_point_command &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.entryoff); + sys::swapByteOrder(e.stacksize); +} + +inline void swapStruct(encryption_info_command &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.cryptoff); + sys::swapByteOrder(e.cryptsize); + sys::swapByteOrder(e.cryptid); +} + +inline void swapStruct(encryption_info_command_64 &e) { + sys::swapByteOrder(e.cmd); + sys::swapByteOrder(e.cmdsize); + sys::swapByteOrder(e.cryptoff); + sys::swapByteOrder(e.cryptsize); + sys::swapByteOrder(e.cryptid); + sys::swapByteOrder(e.pad); +} + +inline void swapStruct(dysymtab_command &dst) { + sys::swapByteOrder(dst.cmd); + sys::swapByteOrder(dst.cmdsize); + sys::swapByteOrder(dst.ilocalsym); + sys::swapByteOrder(dst.nlocalsym); + sys::swapByteOrder(dst.iextdefsym); + sys::swapByteOrder(dst.nextdefsym); + sys::swapByteOrder(dst.iundefsym); + sys::swapByteOrder(dst.nundefsym); + sys::swapByteOrder(dst.tocoff); + sys::swapByteOrder(dst.ntoc); + sys::swapByteOrder(dst.modtaboff); + sys::swapByteOrder(dst.nmodtab); + sys::swapByteOrder(dst.extrefsymoff); + sys::swapByteOrder(dst.nextrefsyms); + sys::swapByteOrder(dst.indirectsymoff); + sys::swapByteOrder(dst.nindirectsyms); + sys::swapByteOrder(dst.extreloff); + sys::swapByteOrder(dst.nextrel); + sys::swapByteOrder(dst.locreloff); + sys::swapByteOrder(dst.nlocrel); +} + +inline void swapStruct(any_relocation_info &reloc) { + sys::swapByteOrder(reloc.r_word0); + sys::swapByteOrder(reloc.r_word1); +} + +inline void swapStruct(nlist_base &S) { + sys::swapByteOrder(S.n_strx); + sys::swapByteOrder(S.n_desc); +} + +inline void swapStruct(nlist &sym) { + sys::swapByteOrder(sym.n_strx); + sys::swapByteOrder(sym.n_desc); + sys::swapByteOrder(sym.n_value); +} + +inline void swapStruct(nlist_64 &sym) { + sys::swapByteOrder(sym.n_strx); + sys::swapByteOrder(sym.n_desc); + sys::swapByteOrder(sym.n_value); +} + +inline void swapStruct(linkedit_data_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.dataoff); + sys::swapByteOrder(C.datasize); +} + +inline void swapStruct(linker_option_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.count); +} + +inline void swapStruct(version_min_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.version); + sys::swapByteOrder(C.sdk); +} + +inline void swapStruct(note_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.size); +} + +inline void swapStruct(build_version_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.platform); + sys::swapByteOrder(C.minos); + sys::swapByteOrder(C.sdk); + sys::swapByteOrder(C.ntools); +} + +inline void swapStruct(build_tool_version &C) { + sys::swapByteOrder(C.tool); + sys::swapByteOrder(C.version); +} + +inline void swapStruct(data_in_code_entry &C) { + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.length); + sys::swapByteOrder(C.kind); +} + +inline void swapStruct(uint32_t &C) { sys::swapByteOrder(C); } + +// The prebind_cksum_command is obsolete and no longer supported. +inline void swapStruct(prebind_cksum_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.cksum); +} + +// The twolevel_hints_command is obsolete and no longer supported. +inline void swapStruct(twolevel_hints_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.nhints); +} + +// The prebound_dylib_command is obsolete and no longer supported. +inline void swapStruct(prebound_dylib_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.nmodules); + sys::swapByteOrder(C.linked_modules); +} + +// The fvmfile_command is obsolete and no longer supported. +inline void swapStruct(fvmfile_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.header_addr); +} + +// The symseg_command is obsolete and no longer supported. +inline void swapStruct(symseg_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.size); +} + +// The ident_command is obsolete and no longer supported. +inline void swapStruct(ident_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); +} + +inline void swapStruct(fvmlib &C) { + sys::swapByteOrder(C.name); + sys::swapByteOrder(C.minor_version); + sys::swapByteOrder(C.header_addr); +} + +// The fvmlib_command is obsolete and no longer supported. +inline void swapStruct(fvmlib_command &C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + swapStruct(C.fvmlib); +} + +// Get/Set functions from + +static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) { + return (((n_desc) >> 8u) & 0xffu); +} + +static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) { + n_desc = (((n_desc)&0x00ff) | (((ordinal)&0xff) << 8)); +} + +static inline uint8_t GET_COMM_ALIGN(uint16_t n_desc) { + return (n_desc >> 8u) & 0x0fu; +} + +static inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) { + n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u)); +} + +// Enums from +enum : uint32_t { + // Capability bits used in the definition of cpu_type. + CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits + CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI +}; + +// Constants for the cputype field. +enum CPUType { + CPU_TYPE_ANY = -1, + CPU_TYPE_X86 = 7, + CPU_TYPE_I386 = CPU_TYPE_X86, + CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64, + /* CPU_TYPE_MIPS = 8, */ + CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC + CPU_TYPE_ARM = 12, + CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64, + CPU_TYPE_SPARC = 14, + CPU_TYPE_POWERPC = 18, + CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64 +}; + +enum : uint32_t { + // Capability bits used in the definition of cpusubtype. + CPU_SUBTYPE_MASK = 0xff000000, // Mask for architecture bits + CPU_SUBTYPE_LIB64 = 0x80000000, // 64 bit libraries + + // Special CPU subtype constants. + CPU_SUBTYPE_MULTIPLE = ~0u +}; + +// Constants for the cpusubtype field. +enum CPUSubTypeX86 { + CPU_SUBTYPE_I386_ALL = 3, + CPU_SUBTYPE_386 = 3, + CPU_SUBTYPE_486 = 4, + CPU_SUBTYPE_486SX = 0x84, + CPU_SUBTYPE_586 = 5, + CPU_SUBTYPE_PENT = CPU_SUBTYPE_586, + CPU_SUBTYPE_PENTPRO = 0x16, + CPU_SUBTYPE_PENTII_M3 = 0x36, + CPU_SUBTYPE_PENTII_M5 = 0x56, + CPU_SUBTYPE_CELERON = 0x67, + CPU_SUBTYPE_CELERON_MOBILE = 0x77, + CPU_SUBTYPE_PENTIUM_3 = 0x08, + CPU_SUBTYPE_PENTIUM_3_M = 0x18, + CPU_SUBTYPE_PENTIUM_3_XEON = 0x28, + CPU_SUBTYPE_PENTIUM_M = 0x09, + CPU_SUBTYPE_PENTIUM_4 = 0x0a, + CPU_SUBTYPE_PENTIUM_4_M = 0x1a, + CPU_SUBTYPE_ITANIUM = 0x0b, + CPU_SUBTYPE_ITANIUM_2 = 0x1b, + CPU_SUBTYPE_XEON = 0x0c, + CPU_SUBTYPE_XEON_MP = 0x1c, + + CPU_SUBTYPE_X86_ALL = 3, + CPU_SUBTYPE_X86_64_ALL = 3, + CPU_SUBTYPE_X86_ARCH1 = 4, + CPU_SUBTYPE_X86_64_H = 8 +}; +static inline int CPU_SUBTYPE_INTEL(int Family, int Model) { + return Family | (Model << 4); +} +static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) { + return ((int)ST) & 0x0f; +} +static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) { + return ((int)ST) >> 4; +} +enum { CPU_SUBTYPE_INTEL_FAMILY_MAX = 15, CPU_SUBTYPE_INTEL_MODEL_ALL = 0 }; + +enum CPUSubTypeARM { + CPU_SUBTYPE_ARM_ALL = 0, + CPU_SUBTYPE_ARM_V4T = 5, + CPU_SUBTYPE_ARM_V6 = 6, + CPU_SUBTYPE_ARM_V5 = 7, + CPU_SUBTYPE_ARM_V5TEJ = 7, + CPU_SUBTYPE_ARM_XSCALE = 8, + CPU_SUBTYPE_ARM_V7 = 9, + // unused ARM_V7F = 10, + CPU_SUBTYPE_ARM_V7S = 11, + CPU_SUBTYPE_ARM_V7K = 12, + CPU_SUBTYPE_ARM_V6M = 14, + CPU_SUBTYPE_ARM_V7M = 15, + CPU_SUBTYPE_ARM_V7EM = 16 +}; + +enum CPUSubTypeARM64 { CPU_SUBTYPE_ARM64_ALL = 0 }; + +enum CPUSubTypeSPARC { CPU_SUBTYPE_SPARC_ALL = 0 }; + +enum CPUSubTypePowerPC { + CPU_SUBTYPE_POWERPC_ALL = 0, + CPU_SUBTYPE_POWERPC_601 = 1, + CPU_SUBTYPE_POWERPC_602 = 2, + CPU_SUBTYPE_POWERPC_603 = 3, + CPU_SUBTYPE_POWERPC_603e = 4, + CPU_SUBTYPE_POWERPC_603ev = 5, + CPU_SUBTYPE_POWERPC_604 = 6, + CPU_SUBTYPE_POWERPC_604e = 7, + CPU_SUBTYPE_POWERPC_620 = 8, + CPU_SUBTYPE_POWERPC_750 = 9, + CPU_SUBTYPE_POWERPC_7400 = 10, + CPU_SUBTYPE_POWERPC_7450 = 11, + CPU_SUBTYPE_POWERPC_970 = 100, + + CPU_SUBTYPE_MC980000_ALL = CPU_SUBTYPE_POWERPC_ALL, + CPU_SUBTYPE_MC98601 = CPU_SUBTYPE_POWERPC_601 +}; + +struct x86_thread_state32_t { + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t edi; + uint32_t esi; + uint32_t ebp; + uint32_t esp; + uint32_t ss; + uint32_t eflags; + uint32_t eip; + uint32_t cs; + uint32_t ds; + uint32_t es; + uint32_t fs; + uint32_t gs; +}; + +struct x86_thread_state64_t { + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rdi; + uint64_t rsi; + uint64_t rbp; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + uint64_t rip; + uint64_t rflags; + uint64_t cs; + uint64_t fs; + uint64_t gs; +}; + +enum x86_fp_control_precis { + x86_FP_PREC_24B = 0, + x86_FP_PREC_53B = 2, + x86_FP_PREC_64B = 3 +}; + +enum x86_fp_control_rc { + x86_FP_RND_NEAR = 0, + x86_FP_RND_DOWN = 1, + x86_FP_RND_UP = 2, + x86_FP_CHOP = 3 +}; + +struct fp_control_t { + unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1, + precis : 1, : 2, pc : 2, rc : 2, : 1, : 3; +}; + +struct fp_status_t { + unsigned short invalid : 1, denorm : 1, zdiv : 1, ovrfl : 1, undfl : 1, + precis : 1, stkflt : 1, errsumm : 1, c0 : 1, c1 : 1, c2 : 1, tos : 3, + c3 : 1, busy : 1; +}; + +struct mmst_reg_t { + char mmst_reg[10]; + char mmst_rsrv[6]; +}; + +struct xmm_reg_t { + char xmm_reg[16]; +}; + +struct x86_float_state64_t { + int32_t fpu_reserved[2]; + fp_control_t fpu_fcw; + fp_status_t fpu_fsw; + uint8_t fpu_ftw; + uint8_t fpu_rsrv1; + uint16_t fpu_fop; + uint32_t fpu_ip; + uint16_t fpu_cs; + uint16_t fpu_rsrv2; + uint32_t fpu_dp; + uint16_t fpu_ds; + uint16_t fpu_rsrv3; + uint32_t fpu_mxcsr; + uint32_t fpu_mxcsrmask; + mmst_reg_t fpu_stmm0; + mmst_reg_t fpu_stmm1; + mmst_reg_t fpu_stmm2; + mmst_reg_t fpu_stmm3; + mmst_reg_t fpu_stmm4; + mmst_reg_t fpu_stmm5; + mmst_reg_t fpu_stmm6; + mmst_reg_t fpu_stmm7; + xmm_reg_t fpu_xmm0; + xmm_reg_t fpu_xmm1; + xmm_reg_t fpu_xmm2; + xmm_reg_t fpu_xmm3; + xmm_reg_t fpu_xmm4; + xmm_reg_t fpu_xmm5; + xmm_reg_t fpu_xmm6; + xmm_reg_t fpu_xmm7; + xmm_reg_t fpu_xmm8; + xmm_reg_t fpu_xmm9; + xmm_reg_t fpu_xmm10; + xmm_reg_t fpu_xmm11; + xmm_reg_t fpu_xmm12; + xmm_reg_t fpu_xmm13; + xmm_reg_t fpu_xmm14; + xmm_reg_t fpu_xmm15; + char fpu_rsrv4[6 * 16]; + uint32_t fpu_reserved1; +}; + +struct x86_exception_state64_t { + uint16_t trapno; + uint16_t cpu; + uint32_t err; + uint64_t faultvaddr; +}; + +inline void swapStruct(x86_thread_state32_t &x) { + sys::swapByteOrder(x.eax); + sys::swapByteOrder(x.ebx); + sys::swapByteOrder(x.ecx); + sys::swapByteOrder(x.edx); + sys::swapByteOrder(x.edi); + sys::swapByteOrder(x.esi); + sys::swapByteOrder(x.ebp); + sys::swapByteOrder(x.esp); + sys::swapByteOrder(x.ss); + sys::swapByteOrder(x.eflags); + sys::swapByteOrder(x.eip); + sys::swapByteOrder(x.cs); + sys::swapByteOrder(x.ds); + sys::swapByteOrder(x.es); + sys::swapByteOrder(x.fs); + sys::swapByteOrder(x.gs); +} + +inline void swapStruct(x86_thread_state64_t &x) { + sys::swapByteOrder(x.rax); + sys::swapByteOrder(x.rbx); + sys::swapByteOrder(x.rcx); + sys::swapByteOrder(x.rdx); + sys::swapByteOrder(x.rdi); + sys::swapByteOrder(x.rsi); + sys::swapByteOrder(x.rbp); + sys::swapByteOrder(x.rsp); + sys::swapByteOrder(x.r8); + sys::swapByteOrder(x.r9); + sys::swapByteOrder(x.r10); + sys::swapByteOrder(x.r11); + sys::swapByteOrder(x.r12); + sys::swapByteOrder(x.r13); + sys::swapByteOrder(x.r14); + sys::swapByteOrder(x.r15); + sys::swapByteOrder(x.rip); + sys::swapByteOrder(x.rflags); + sys::swapByteOrder(x.cs); + sys::swapByteOrder(x.fs); + sys::swapByteOrder(x.gs); +} + +inline void swapStruct(x86_float_state64_t &x) { + sys::swapByteOrder(x.fpu_reserved[0]); + sys::swapByteOrder(x.fpu_reserved[1]); + // TODO swap: fp_control_t fpu_fcw; + // TODO swap: fp_status_t fpu_fsw; + sys::swapByteOrder(x.fpu_fop); + sys::swapByteOrder(x.fpu_ip); + sys::swapByteOrder(x.fpu_cs); + sys::swapByteOrder(x.fpu_rsrv2); + sys::swapByteOrder(x.fpu_dp); + sys::swapByteOrder(x.fpu_ds); + sys::swapByteOrder(x.fpu_rsrv3); + sys::swapByteOrder(x.fpu_mxcsr); + sys::swapByteOrder(x.fpu_mxcsrmask); + sys::swapByteOrder(x.fpu_reserved1); +} + +inline void swapStruct(x86_exception_state64_t &x) { + sys::swapByteOrder(x.trapno); + sys::swapByteOrder(x.cpu); + sys::swapByteOrder(x.err); + sys::swapByteOrder(x.faultvaddr); +} + +struct x86_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct x86_thread_state_t { + x86_state_hdr_t tsh; + union { + x86_thread_state64_t ts64; + x86_thread_state32_t ts32; + } uts; +}; + +struct x86_float_state_t { + x86_state_hdr_t fsh; + union { + x86_float_state64_t fs64; + } ufs; +}; + +struct x86_exception_state_t { + x86_state_hdr_t esh; + union { + x86_exception_state64_t es64; + } ues; +}; + +inline void swapStruct(x86_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum X86ThreadFlavors { + x86_THREAD_STATE32 = 1, + x86_FLOAT_STATE32 = 2, + x86_EXCEPTION_STATE32 = 3, + x86_THREAD_STATE64 = 4, + x86_FLOAT_STATE64 = 5, + x86_EXCEPTION_STATE64 = 6, + x86_THREAD_STATE = 7, + x86_FLOAT_STATE = 8, + x86_EXCEPTION_STATE = 9, + x86_DEBUG_STATE32 = 10, + x86_DEBUG_STATE64 = 11, + x86_DEBUG_STATE = 12 +}; + +inline void swapStruct(x86_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == x86_THREAD_STATE64) + swapStruct(x.uts.ts64); +} + +inline void swapStruct(x86_float_state_t &x) { + swapStruct(x.fsh); + if (x.fsh.flavor == x86_FLOAT_STATE64) + swapStruct(x.ufs.fs64); +} + +inline void swapStruct(x86_exception_state_t &x) { + swapStruct(x.esh); + if (x.esh.flavor == x86_EXCEPTION_STATE64) + swapStruct(x.ues.es64); +} + +const uint32_t x86_THREAD_STATE32_COUNT = + sizeof(x86_thread_state32_t) / sizeof(uint32_t); + +const uint32_t x86_THREAD_STATE64_COUNT = + sizeof(x86_thread_state64_t) / sizeof(uint32_t); +const uint32_t x86_FLOAT_STATE64_COUNT = + sizeof(x86_float_state64_t) / sizeof(uint32_t); +const uint32_t x86_EXCEPTION_STATE64_COUNT = + sizeof(x86_exception_state64_t) / sizeof(uint32_t); + +const uint32_t x86_THREAD_STATE_COUNT = + sizeof(x86_thread_state_t) / sizeof(uint32_t); +const uint32_t x86_FLOAT_STATE_COUNT = + sizeof(x86_float_state_t) / sizeof(uint32_t); +const uint32_t x86_EXCEPTION_STATE_COUNT = + sizeof(x86_exception_state_t) / sizeof(uint32_t); + +struct arm_thread_state32_t { + uint32_t r[13]; + uint32_t sp; + uint32_t lr; + uint32_t pc; + uint32_t cpsr; +}; + +inline void swapStruct(arm_thread_state32_t &x) { + for (int i = 0; i < 13; i++) + sys::swapByteOrder(x.r[i]); + sys::swapByteOrder(x.sp); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.pc); + sys::swapByteOrder(x.cpsr); +} + +struct arm_thread_state64_t { + uint64_t x[29]; + uint64_t fp; + uint64_t lr; + uint64_t sp; + uint64_t pc; + uint32_t cpsr; + uint32_t pad; +}; + +inline void swapStruct(arm_thread_state64_t &x) { + for (int i = 0; i < 29; i++) + sys::swapByteOrder(x.x[i]); + sys::swapByteOrder(x.fp); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.sp); + sys::swapByteOrder(x.pc); + sys::swapByteOrder(x.cpsr); +} + +struct arm_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct arm_thread_state_t { + arm_state_hdr_t tsh; + union { + arm_thread_state32_t ts32; + } uts; +}; + +inline void swapStruct(arm_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum ARMThreadFlavors { + ARM_THREAD_STATE = 1, + ARM_VFP_STATE = 2, + ARM_EXCEPTION_STATE = 3, + ARM_DEBUG_STATE = 4, + ARN_THREAD_STATE_NONE = 5, + ARM_THREAD_STATE64 = 6, + ARM_EXCEPTION_STATE64 = 7 +}; + +inline void swapStruct(arm_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == ARM_THREAD_STATE) + swapStruct(x.uts.ts32); +} + +const uint32_t ARM_THREAD_STATE_COUNT = + sizeof(arm_thread_state32_t) / sizeof(uint32_t); + +const uint32_t ARM_THREAD_STATE64_COUNT = + sizeof(arm_thread_state64_t) / sizeof(uint32_t); + +struct ppc_thread_state32_t { + uint32_t srr0; + uint32_t srr1; + uint32_t r0; + uint32_t r1; + uint32_t r2; + uint32_t r3; + uint32_t r4; + uint32_t r5; + uint32_t r6; + uint32_t r7; + uint32_t r8; + uint32_t r9; + uint32_t r10; + uint32_t r11; + uint32_t r12; + uint32_t r13; + uint32_t r14; + uint32_t r15; + uint32_t r16; + uint32_t r17; + uint32_t r18; + uint32_t r19; + uint32_t r20; + uint32_t r21; + uint32_t r22; + uint32_t r23; + uint32_t r24; + uint32_t r25; + uint32_t r26; + uint32_t r27; + uint32_t r28; + uint32_t r29; + uint32_t r30; + uint32_t r31; + uint32_t ct; + uint32_t xer; + uint32_t lr; + uint32_t ctr; + uint32_t mq; + uint32_t vrsave; +}; + +inline void swapStruct(ppc_thread_state32_t &x) { + sys::swapByteOrder(x.srr0); + sys::swapByteOrder(x.srr1); + sys::swapByteOrder(x.r0); + sys::swapByteOrder(x.r1); + sys::swapByteOrder(x.r2); + sys::swapByteOrder(x.r3); + sys::swapByteOrder(x.r4); + sys::swapByteOrder(x.r5); + sys::swapByteOrder(x.r6); + sys::swapByteOrder(x.r7); + sys::swapByteOrder(x.r8); + sys::swapByteOrder(x.r9); + sys::swapByteOrder(x.r10); + sys::swapByteOrder(x.r11); + sys::swapByteOrder(x.r12); + sys::swapByteOrder(x.r13); + sys::swapByteOrder(x.r14); + sys::swapByteOrder(x.r15); + sys::swapByteOrder(x.r16); + sys::swapByteOrder(x.r17); + sys::swapByteOrder(x.r18); + sys::swapByteOrder(x.r19); + sys::swapByteOrder(x.r20); + sys::swapByteOrder(x.r21); + sys::swapByteOrder(x.r22); + sys::swapByteOrder(x.r23); + sys::swapByteOrder(x.r24); + sys::swapByteOrder(x.r25); + sys::swapByteOrder(x.r26); + sys::swapByteOrder(x.r27); + sys::swapByteOrder(x.r28); + sys::swapByteOrder(x.r29); + sys::swapByteOrder(x.r30); + sys::swapByteOrder(x.r31); + sys::swapByteOrder(x.ct); + sys::swapByteOrder(x.xer); + sys::swapByteOrder(x.lr); + sys::swapByteOrder(x.ctr); + sys::swapByteOrder(x.mq); + sys::swapByteOrder(x.vrsave); +} + +struct ppc_state_hdr_t { + uint32_t flavor; + uint32_t count; +}; + +struct ppc_thread_state_t { + ppc_state_hdr_t tsh; + union { + ppc_thread_state32_t ts32; + } uts; +}; + +inline void swapStruct(ppc_state_hdr_t &x) { + sys::swapByteOrder(x.flavor); + sys::swapByteOrder(x.count); +} + +enum PPCThreadFlavors { + PPC_THREAD_STATE = 1, + PPC_FLOAT_STATE = 2, + PPC_EXCEPTION_STATE = 3, + PPC_VECTOR_STATE = 4, + PPC_THREAD_STATE64 = 5, + PPC_EXCEPTION_STATE64 = 6, + PPC_THREAD_STATE_NONE = 7 +}; + +inline void swapStruct(ppc_thread_state_t &x) { + swapStruct(x.tsh); + if (x.tsh.flavor == PPC_THREAD_STATE) + swapStruct(x.uts.ts32); +} + +const uint32_t PPC_THREAD_STATE_COUNT = + sizeof(ppc_thread_state32_t) / sizeof(uint32_t); + +// Define a union of all load command structs +#define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data; + +union macho_load_command { +#include "llvm/BinaryFormat/MachO.def" +}; + +} // end namespace MachO +} // end namespace llvm + +#endif diff --git a/include/llvm/BinaryFormat/Magic.h b/include/llvm/BinaryFormat/Magic.h new file mode 100644 index 0000000000000000000000000000000000000000..c0e23db5e1aeec066d8ea5cbff84db7cdd22915d --- /dev/null +++ b/include/llvm/BinaryFormat/Magic.h @@ -0,0 +1,73 @@ +//===- llvm/BinaryFormat/Magic.h - File magic identification ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BINARYFORMAT_MAGIC_H +#define LLVM_BINARYFORMAT_MAGIC_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" + +#include + +namespace llvm { +/// file_magic - An "enum class" enumeration of file types based on magic (the +/// first N bytes of the file). +struct file_magic { + enum Impl { + unknown = 0, ///< Unrecognized file + bitcode, ///< Bitcode file + archive, ///< ar style archive file + elf, ///< ELF Unknown type + elf_relocatable, ///< ELF Relocatable object file + elf_executable, ///< ELF Executable image + elf_shared_object, ///< ELF dynamically linked shared lib + elf_core, ///< ELF core image + macho_object, ///< Mach-O Object file + macho_executable, ///< Mach-O Executable + macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM + macho_core, ///< Mach-O Core File + macho_preload_executable, ///< Mach-O Preloaded Executable + macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib + macho_dynamic_linker, ///< The Mach-O dynamic linker + macho_bundle, ///< Mach-O Bundle file + macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub + macho_dsym_companion, ///< Mach-O dSYM companion file + macho_kext_bundle, ///< Mach-O kext bundle file + macho_universal_binary, ///< Mach-O universal binary + coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file + coff_object, ///< COFF object file + coff_import_library, ///< COFF import library + pecoff_executable, ///< PECOFF executable file + windows_resource, ///< Windows compiled resource file (.res) + wasm_object ///< WebAssembly Object file + }; + + bool is_object() const { return V != unknown; } + + file_magic() = default; + file_magic(Impl V) : V(V) {} + operator Impl() const { return V; } + +private: + Impl V = unknown; +}; + +/// @brief Identify the type of a binary file based on how magical it is. +file_magic identify_magic(StringRef magic); + +/// @brief Get and identify \a path's type based on its content. +/// +/// @param path Input path. +/// @param result Set to the type of file, or file_magic::unknown. +/// @returns errc::success if result has been successfully set, otherwise a +/// platform-specific error_code. +std::error_code identify_magic(const Twine &path, file_magic &result); +} // namespace llvm + +#endif diff --git a/include/llvm/Support/Wasm.h b/include/llvm/BinaryFormat/Wasm.h similarity index 79% rename from include/llvm/Support/Wasm.h rename to include/llvm/BinaryFormat/Wasm.h index 8e6c418c8189a3ca7b8a91787a2f0033149fc33e..fcd8ad957040fc4184ba0a94378f47afc10207e9 100644 --- a/include/llvm/Support/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_WASM_H -#define LLVM_SUPPORT_WASM_H +#ifndef LLVM_BINARYFORMAT_WASM_H +#define LLVM_BINARYFORMAT_WASM_H #include "llvm/ADT/ArrayRef.h" @@ -24,6 +24,8 @@ namespace wasm { const char WasmMagic[] = {'\0', 'a', 's', 'm'}; // Wasm binary format version const uint32_t WasmVersion = 0x1; +// Wasm uses a 64k page size +const uint32_t WasmPageSize = 65536; struct WasmObjectHeader { StringRef Magic; @@ -35,17 +37,6 @@ struct WasmSignature { int32_t ReturnType; }; -struct WasmImport { - StringRef Module; - StringRef Field; - uint32_t Kind; - union { - uint32_t SigIndex; - int32_t GlobalType; - }; - bool GlobalMutable; -}; - struct WasmExport { StringRef Name; uint32_t Kind; @@ -80,6 +71,18 @@ struct WasmGlobal { WasmInitExpr InitExpr; }; +struct WasmImport { + StringRef Module; + StringRef Field; + uint32_t Kind; + union { + uint32_t SigIndex; + WasmGlobal Global; + WasmTable Table; + WasmLimits Memory; + }; +}; + struct WasmLocalDecl { int32_t Type; uint32_t Count; @@ -103,10 +106,10 @@ struct WasmElemSegment { }; struct WasmRelocation { - uint32_t Type; // The type of the relocation. - int32_t Index; // Index into function to global index space. - uint64_t Offset; // Offset from the start of the section. - uint64_t Addend; // A value to add to the symbol. + uint32_t Type; // The type of the relocation. + int32_t Index; // Index into function to global index space. + uint64_t Offset; // Offset from the start of the section. + int64_t Addend; // A value to add to the symbol. }; enum : unsigned { @@ -126,36 +129,36 @@ enum : unsigned { // Type immediate encodings used in various contexts. enum { - WASM_TYPE_I32 = -0x01, - WASM_TYPE_I64 = -0x02, - WASM_TYPE_F32 = -0x03, - WASM_TYPE_F64 = -0x04, - WASM_TYPE_ANYFUNC = -0x10, - WASM_TYPE_FUNC = -0x20, - WASM_TYPE_NORESULT = -0x40, // for blocks with no result values + WASM_TYPE_I32 = -0x01, + WASM_TYPE_I64 = -0x02, + WASM_TYPE_F32 = -0x03, + WASM_TYPE_F64 = -0x04, + WASM_TYPE_ANYFUNC = -0x10, + WASM_TYPE_FUNC = -0x20, + WASM_TYPE_NORESULT = -0x40, // for blocks with no result values }; // Kinds of externals (for imports and exports). enum : unsigned { WASM_EXTERNAL_FUNCTION = 0x0, - WASM_EXTERNAL_TABLE = 0x1, - WASM_EXTERNAL_MEMORY = 0x2, - WASM_EXTERNAL_GLOBAL = 0x3, + WASM_EXTERNAL_TABLE = 0x1, + WASM_EXTERNAL_MEMORY = 0x2, + WASM_EXTERNAL_GLOBAL = 0x3, }; // Opcodes used in initializer expressions. enum : unsigned { - WASM_OPCODE_END = 0x0b, + WASM_OPCODE_END = 0x0b, WASM_OPCODE_GET_GLOBAL = 0x23, - WASM_OPCODE_I32_CONST = 0x41, - WASM_OPCODE_I64_CONST = 0x42, - WASM_OPCODE_F32_CONST = 0x43, - WASM_OPCODE_F64_CONST = 0x44, + WASM_OPCODE_I32_CONST = 0x41, + WASM_OPCODE_I64_CONST = 0x42, + WASM_OPCODE_F32_CONST = 0x43, + WASM_OPCODE_F64_CONST = 0x44, }; enum : unsigned { - WASM_NAMES_FUNCTION = 0x1, - WASM_NAMES_LOCAL = 0x2, + WASM_NAMES_FUNCTION = 0x1, + WASM_NAMES_LOCAL = 0x2, }; enum : unsigned { diff --git a/include/llvm/Support/WasmRelocs/WebAssembly.def b/include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def similarity index 100% rename from include/llvm/Support/WasmRelocs/WebAssembly.def rename to include/llvm/BinaryFormat/WasmRelocs/WebAssembly.def diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h index 9e042b17241f7efaaa3baf420ae2977b3e99e7b7..0e17e9a0a2781cb3a3b186e1e830ebf95105ee94 100644 --- a/include/llvm/Bitcode/BitcodeReader.h +++ b/include/llvm/Bitcode/BitcodeReader.h @@ -40,12 +40,23 @@ namespace llvm { return std::move(*Val); } + struct BitcodeFileContents; + + /// Basic information extracted from a bitcode module to be used for LTO. + struct BitcodeLTOInfo { + bool IsThinLTO; + bool HasSummary; + }; + /// Represents a module in a bitcode file. class BitcodeModule { // This covers the identification (if present) and module blocks. ArrayRef Buffer; StringRef ModuleIdentifier; + // The string table used to interpret this module. + StringRef Strtab; + // The bitstream location of the IDENTIFICATION_BLOCK. uint64_t IdentificationBit; @@ -58,8 +69,8 @@ namespace llvm { IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} // Calls the ctor. - friend Expected> - getBitcodeModuleList(MemoryBufferRef Buffer); + friend Expected + getBitcodeFileContents(MemoryBufferRef Buffer); Expected> getModuleImpl(LLVMContext &Context, bool MaterializeAll, @@ -70,6 +81,7 @@ namespace llvm { StringRef getBuffer() const { return StringRef((const char *)Buffer.begin(), Buffer.size()); } + StringRef getStrtab() const { return Strtab; } StringRef getModuleIdentifier() const { return ModuleIdentifier; } @@ -84,13 +96,26 @@ namespace llvm { /// Read the entire bitcode module and return it. Expected> parseModule(LLVMContext &Context); - /// Check if the given bitcode buffer contains a summary block. - Expected hasSummary(); + /// Returns information about the module to be used for LTO: whether to + /// compile with ThinLTO, and whether it has a summary. + Expected getLTOInfo(); /// Parse the specified bitcode buffer, returning the module summary index. Expected> getSummary(); + + /// Parse the specified bitcode buffer and merge its module summary index + /// into CombinedIndex. + Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, + uint64_t ModuleId); }; + struct BitcodeFileContents { + std::vector Mods; + }; + + /// Returns the contents of a bitcode file. + Expected getBitcodeFileContents(MemoryBufferRef Buffer); + /// Returns a list of modules in the specified bitcode buffer. Expected> getBitcodeModuleList(MemoryBufferRef Buffer); @@ -130,13 +155,26 @@ namespace llvm { Expected> parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context); - /// Check if the given bitcode buffer contains a summary block. - Expected hasGlobalValueSummary(MemoryBufferRef Buffer); + /// Returns LTO information for the specified bitcode file. + Expected getBitcodeLTOInfo(MemoryBufferRef Buffer); /// Parse the specified bitcode buffer, returning the module summary index. Expected> getModuleSummaryIndex(MemoryBufferRef Buffer); + /// Parse the specified bitcode buffer and merge the index into CombinedIndex. + Error readModuleSummaryIndex(MemoryBufferRef Buffer, + ModuleSummaryIndex &CombinedIndex, + uint64_t ModuleId); + + /// Parse the module summary index out of an IR file and return the module + /// summary index object if found, or an empty summary if not. If Path refers + /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then + /// this function will return nullptr. + Expected> + getModuleSummaryIndexForFile(StringRef Path, + bool IgnoreEmptyThinLTOIndexFile = false); + /// isBitcodeWrapper - Return true if the given bytes are the magic bytes /// for an LLVM IR bitcode wrapper. /// diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h index 271cb2d81bbb20bf8f8e08b21539deaa1437756c..7c3c4b2e0cbd3728c9aba57b45ced1d11ccacb0d 100644 --- a/include/llvm/Bitcode/BitcodeWriter.h +++ b/include/llvm/Bitcode/BitcodeWriter.h @@ -15,6 +15,7 @@ #define LLVM_BITCODE_BITCODEWRITER_H #include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/MC/StringTableBuilder.h" #include namespace llvm { @@ -26,12 +27,25 @@ namespace llvm { SmallVectorImpl &Buffer; std::unique_ptr Stream; + StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; + bool WroteStrtab = false; + + void writeBlob(unsigned Block, unsigned Record, StringRef Blob); + public: /// Create a BitcodeWriter that writes to Buffer. BitcodeWriter(SmallVectorImpl &Buffer); ~BitcodeWriter(); + /// Write the bitcode file's string table. This must be called exactly once + /// after all modules have been written. + void writeStrtab(); + + /// Copy the string table for another module into this bitcode file. This + /// should be called after copying the module itself into the bitcode file. + void copyStrtab(StringRef Strtab); + /// Write the specified module to the buffer specified at construction time. /// /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a @@ -53,6 +67,10 @@ namespace llvm { void writeModule(const Module *M, bool ShouldPreserveUseListOrder = false, const ModuleSummaryIndex *Index = nullptr, bool GenerateHash = false, ModuleHash *ModHash = nullptr); + + void writeIndex( + const ModuleSummaryIndex *Index, + const std::map *ModuleToSummariesForIndex); }; /// \brief Write the specified module to the specified raw output stream. diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index e2d2fbb0f449aae2051a88eb02264e2eadb90d4d..4e3e177cac8f1315af749f35997fe164b39cbf96 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -22,7 +22,7 @@ namespace llvm { namespace bitc { -// The only top-level block type defined is for a module. +// The only top-level block types are MODULE, IDENTIFICATION and STRTAB. enum BlockIDs { // Blocks MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, @@ -52,7 +52,11 @@ enum BlockIDs { OPERAND_BUNDLE_TAGS_BLOCK_ID, - METADATA_KIND_BLOCK_ID + METADATA_KIND_BLOCK_ID, + + STRTAB_BLOCK_ID, + + FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, @@ -232,6 +236,18 @@ enum GlobalValueSummarySymtabCodes { // llvm.type.checked.load intrinsic with all constant integer arguments. // [typeid, offset, n x arg] FS_TYPE_CHECKED_LOAD_CONST_VCALL = 15, + // Assigns a GUID to a value ID. This normally appears only in combined + // summaries, but it can also appear in per-module summaries for PGO data. + // [valueid, guid] + FS_VALUE_GUID = 16, + // The list of local functions with CFI jump tables. Function names are + // strings in strtab. + // [n * name] + FS_CFI_FUNCTION_DEFS = 17, + // The list of external functions with CFI jump tables. Function names are + // strings in strtab. + // [n * name] + FS_CFI_FUNCTION_DECLS = 18, }; enum MetadataCodes { @@ -539,7 +555,8 @@ enum AttributeKindCodes { ATTR_KIND_INACCESSIBLEMEM_ONLY = 49, ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50, ATTR_KIND_ALLOC_SIZE = 51, - ATTR_KIND_WRITEONLY = 52 + ATTR_KIND_WRITEONLY = 52, + ATTR_KIND_SPECULATABLE = 53 }; enum ComdatSelectionKindCodes { @@ -550,6 +567,10 @@ enum ComdatSelectionKindCodes { COMDAT_SELECTION_KIND_SAME_SIZE = 5, }; +enum StrtabCodes { + STRTAB_BLOB = 1, +}; + } // End bitc namespace } // End llvm namespace diff --git a/include/llvm/CMakeLists.txt b/include/llvm/CMakeLists.txt index ff805396eb0c85af96dde63c972bd56b957c904f..1d5ca3ba92b0e153b3424d83316cee9534522a4f 100644 --- a/include/llvm/CMakeLists.txt +++ b/include/llvm/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(IR) +add_subdirectory(Support) # If we're doing an out-of-tree build, copy a module map for generated # header files into the build area. diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 772043fa3ce3632fb92d24119eaee93c283363db..c898667f147427967d514694f86afed8dbb4a370 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -34,6 +34,7 @@ namespace llvm { class AsmPrinterHandler; +class BasicBlock; class BlockAddress; class Constant; class ConstantArray; @@ -43,6 +44,7 @@ class DIEAbbrev; class DwarfDebug; class GCMetadataPrinter; class GlobalIndirectSymbol; +class GlobalObject; class GlobalValue; class GlobalVariable; class GCStrategy; @@ -65,6 +67,8 @@ class MCSubtargetInfo; class MCSymbol; class MCTargetOptions; class MDNode; +class Module; +class raw_ostream; class TargetLoweringObjectFile; class TargetMachine; @@ -109,9 +113,12 @@ public: /// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of /// its number of uses by other globals. - typedef std::pair GOTEquivUsePair; + using GOTEquivUsePair = std::pair; MapVector GlobalGOTEquivs; + /// Enable print [latency:throughput] in output + bool EnablePrintSchedInfo = false; + private: MCSymbol *CurrentFnBegin = nullptr; MCSymbol *CurrentFnEnd = nullptr; @@ -223,6 +230,7 @@ public: FUNCTION_EXIT = 1, TAIL_CALL = 2, LOG_ARGS_ENTER = 3, + CUSTOM_EVENT = 4, }; // The table will contain these structs that point to the sled, the function @@ -239,7 +247,7 @@ public: }; // All the sleds to be emitted. - std::vector Sleds; + SmallVector Sleds; // Helper function to record a given XRay sled. void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind); diff --git a/include/llvm/CodeGen/AtomicExpandUtils.h b/include/llvm/CodeGen/AtomicExpandUtils.h index ac18eac8a1cefbf1c0f8a5a29defcbc92c37cfd6..1f9c96b18e1b4542958787a17789b9c9b659a5ee 100644 --- a/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/include/llvm/CodeGen/AtomicExpandUtils.h @@ -1,4 +1,4 @@ -//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===// +//===- AtomicExpandUtils.h - Utilities for expanding atomic instructions --===// // // The LLVM Compiler Infrastructure // @@ -7,19 +7,24 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_ATOMICEXPANDUTILS_H +#define LLVM_CODEGEN_ATOMICEXPANDUTILS_H + #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/Support/AtomicOrdering.h" namespace llvm { -class Value; -class AtomicRMWInst; +class AtomicRMWInst; +class Value; /// Parameters (see the expansion example below): /// (the builder, %addr, %loaded, %new_val, ordering, /// /* OUT */ %success, /* OUT */ %new_loaded) -typedef function_ref &, Value *, Value *, Value *, - AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun; +using CreateCmpXchgInstFun = + function_ref &, Value *, Value *, Value *, AtomicOrdering, + Value *&, Value *&)>; /// \brief Expand an atomic RMW instruction into a loop utilizing /// cmpxchg. You'll want to make sure your target machine likes cmpxchg @@ -42,7 +47,8 @@ typedef function_ref &, Value *, Value *, Value *, /// loop: /// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] /// %new = some_op iN %loaded, %incr -/// ; This is what -atomic-expand will produce using this function on i686 targets: +/// ; This is what -atomic-expand will produce using this function on i686 +/// targets: /// %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val /// %new_loaded = extractvalue { iN, i1 } %pair, 0 /// %success = extractvalue { iN, i1 } %pair, 1 @@ -52,6 +58,8 @@ typedef function_ref &, Value *, Value *, Value *, /// [...] /// /// Returns true if the containing function was modified. -bool -expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); -} +bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_ATOMICEXPANDUTILS_H diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index e30e947f787f18ad27f89f0cc6855098d211d541..5eb7a0f61eec4588fe47899d3674f6402aaf3f28 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -17,11 +17,11 @@ #define LLVM_CODEGEN_BASICTTIIMPL_H #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" namespace llvm { @@ -93,6 +93,8 @@ public: bool isSourceOfDivergence(const Value *V) { return false; } + bool isAlwaysUniform(const Value *V) { return false; } + unsigned getFlatAddressSpace() { // Return an invalid address space. return -1; @@ -117,6 +119,10 @@ public: return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace); } + bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { + return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); + } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { TargetLoweringBase::AddrMode AM; @@ -171,6 +177,62 @@ public: return BaseT::getIntrinsicCost(IID, RetTy, ParamTys); } + unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JumpTableSize) { + /// Try to find the estimated number of clusters. Note that the number of + /// clusters identified in this function could be different from the actural + /// numbers found in lowering. This function ignore switches that are + /// lowered with a mix of jump table / bit test / BTree. This function was + /// initially intended to be used when estimating the cost of switch in + /// inline cost heuristic, but it's a generic cost model to be used in other + /// places (e.g., in loop unrolling). + unsigned N = SI.getNumCases(); + const TargetLoweringBase *TLI = getTLI(); + const DataLayout &DL = this->getDataLayout(); + + JumpTableSize = 0; + bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); + + // Early exit if both a jump table and bit test are not allowed. + if (N < 1 || (!IsJTAllowed && DL.getPointerSizeInBits() < N)) + return N; + + APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); + APInt MinCaseVal = MaxCaseVal; + for (auto CI : SI.cases()) { + const APInt &CaseVal = CI.getCaseValue()->getValue(); + if (CaseVal.sgt(MaxCaseVal)) + MaxCaseVal = CaseVal; + if (CaseVal.slt(MinCaseVal)) + MinCaseVal = CaseVal; + } + + // Check if suitable for a bit test + if (N <= DL.getPointerSizeInBits()) { + SmallPtrSet Dests; + for (auto I : SI.cases()) + Dests.insert(I.getCaseSuccessor()); + + if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, + DL)) + return 1; + } + + // Check if suitable for a jump table. + if (IsJTAllowed) { + if (N < 2 || N < TLI->getMinimumJumpTableEntries()) + return N; + uint64_t Range = + (MaxCaseVal - MinCaseVal).getLimitedValue(UINT64_MAX - 1) + 1; + // Check whether a range of clusters is dense enough for a jump table + if (TLI->isSuitableForJumpTable(&SI, N, Range)) { + JumpTableSize = Range; + return 1; + } + } + return N; + } + unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); } unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); } @@ -286,7 +348,7 @@ public: unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; } - unsigned getRegisterBitWidth(bool Vector) { return 32; } + unsigned getRegisterBitWidth(bool Vector) const { return 32; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. @@ -1024,46 +1086,46 @@ public: return 0; } + /// Try to calculate arithmetic and shuffle op costs for reduction operations. + /// We're assuming that reduction operation are performing the following way: + /// 1. Non-pairwise reduction + /// %val1 = shufflevector %val, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val, val1 + /// After this operation we have a vector %red1 where only the first n/2 + /// elements are meaningful, the second n/2 elements are undefined and can be + /// dropped. All other operations are actually working with the vector of + /// length n/2, not n, though the real vector length is still n. + /// %val2 = shufflevector %red1, %undef, + /// + /// \----------------v-------------/ \----------v------------/ + /// n/4 elements 3*n/4 elements + /// %red2 = op %red1, val2 - working with the vector of + /// length n/2, the resulting vector has length n/4 etc. + /// 2. Pairwise reduction: + /// Everything is the same except for an additional shuffle operation which + /// is used to produce operands for pairwise kind of reductions. + /// %val1 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %val2 = shufflevector %val, %undef, + /// + /// \-------------v----------/ \----------v------------/ + /// n/2 elements n/2 elements + /// %red1 = op %val1, val2 + /// Again, the operation is performed on vector, but the resulting + /// vector %red1 is vector. + /// + /// The cost model should take into account that the actual length of the + /// vector is reduced on each iteration. unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) { assert(Ty->isVectorTy() && "Expect a vector type"); Type *ScalarTy = Ty->getVectorElementType(); unsigned NumVecElts = Ty->getVectorNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); - // Try to calculate arithmetic and shuffle op costs for reduction operations. - // We're assuming that reduction operation are performing the following way: - // 1. Non-pairwise reduction - // %val1 = shufflevector %val, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val, val1 - // After this operation we have a vector %red1 with only maningfull the - // first n/2 elements, the second n/2 elements are undefined and can be - // dropped. All other operations are actually working with the vector of - // length n/2, not n. though the real vector length is still n. - // %val2 = shufflevector %red1, %undef, - // - // \----------------v-------------/ \----------v------------/ - // n/4 elements 3*n/4 elements - // %red2 = op %red1, val2 - working with the vector of - // length n/2, the resulting vector has length n/4 etc. - // 2. Pairwise reduction: - // Everything is the same except for an additional shuffle operation which - // is used to produce operands for pairwise kind of reductions. - // %val1 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %val2 = shufflevector %val, %undef, - // - // \-------------v----------/ \----------v------------/ - // n/2 elements n/2 elements - // %red1 = op %val1, val2 - // Again, the operation is performed on vector, but the resulting - // vector %red1 is vector. - // - // The cost model should take into account that the actual length of the - // vector is reduced on each iteration. unsigned ArithCost = 0; unsigned ShuffleCost = 0; auto *ConcreteTTI = static_cast(this); diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h index 317a5d3f54c8a5c9da4ce8408d1dbb0508146594..0d898827efc617058eef2fc6fda10fa96f5fa531 100644 --- a/include/llvm/CodeGen/CommandFlags.h +++ b/include/llvm/CodeGen/CommandFlags.h @@ -346,29 +346,21 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M) { for (auto &F : M) { auto &Ctx = F.getContext(); - AttributeList Attrs = F.getAttributes(), NewAttrs; + AttributeList Attrs = F.getAttributes(); + AttrBuilder NewAttrs; if (!CPU.empty()) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "target-cpu", CPU); - + NewAttrs.addAttribute("target-cpu", CPU); if (!Features.empty()) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "target-features", Features); - + NewAttrs.addAttribute("target-features", Features); if (DisableFPElim.getNumOccurrences() > 0) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "no-frame-pointer-elim", - DisableFPElim ? "true" : "false"); - + NewAttrs.addAttribute("no-frame-pointer-elim", + DisableFPElim ? "true" : "false"); if (DisableTailCalls.getNumOccurrences() > 0) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "disable-tail-calls", - toStringRef(DisableTailCalls)); - + NewAttrs.addAttribute("disable-tail-calls", + toStringRef(DisableTailCalls)); if (StackRealign) - NewAttrs = NewAttrs.addAttribute(Ctx, AttributeList::FunctionIndex, - "stackrealign"); + NewAttrs.addAttribute("stackrealign"); if (TrapFuncName.getNumOccurrences() > 0) for (auto &B : F) @@ -382,8 +374,8 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features, Attribute::get(Ctx, "trap-func-name", TrapFuncName)); // Let NewAttrs override Attrs. - NewAttrs = Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs); - F.setAttributes(NewAttrs); + F.setAttributes( + Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); } } diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index 8de140e91bf3717f44f144be39e65d23d1916bee..77c37ac7abeae9ee683f79595015f44b6231dfb7 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-=====// +//===- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -29,17 +29,22 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include #include +#include +#include +#include namespace llvm { -class MCInstrDesc; +class DefaultVLIWScheduler; +class InstrItineraryData; +class MachineFunction; class MachineInstr; class MachineLoopInfo; -class MachineDominatorTree; -class InstrItineraryData; -class DefaultVLIWScheduler; +class MCInstrDesc; class SUnit; +class TargetInstrInfo; // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp @@ -64,17 +69,18 @@ class SUnit; #define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. #define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. -typedef uint64_t DFAInput; -typedef int64_t DFAStateInput; +using DFAInput = uint64_t; +using DFAStateInput = int64_t; + #define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. // -------------------------------------------------------------------- class DFAPacketizer { private: - typedef std::pair UnsignPair; + using UnsignPair = std::pair; const InstrItineraryData *InstrItins; - int CurrentState; + int CurrentState = 0; const DFAStateInput (*DFAStateInputTable)[2]; const unsigned *DFAStateEntryTable; @@ -101,24 +107,23 @@ public: // Check if the resources occupied by a MCInstrDesc are available in // the current state. - bool canReserveResources(const llvm::MCInstrDesc *MID); + bool canReserveResources(const MCInstrDesc *MID); // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. - void reserveResources(const llvm::MCInstrDesc *MID); + void reserveResources(const MCInstrDesc *MID); // Check if the resources occupied by a machine instruction are available // in the current state. - bool canReserveResources(llvm::MachineInstr &MI); + bool canReserveResources(MachineInstr &MI); // Reserve the resources occupied by a machine instruction and change the // current state to reflect that change. - void reserveResources(llvm::MachineInstr &MI); + void reserveResources(MachineInstr &MI); const InstrItineraryData *getInstrItins() const { return InstrItins; } }; - // VLIWPacketizerList implements a simple VLIW packetizer using DFA. The // packetizer works on machine basic blocks. For each instruction I in BB, // the packetizer consults the DFA to see if machine resources are available @@ -205,6 +210,6 @@ public: void addMutation(std::unique_ptr Mutation); }; -} // namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_DFAPACKETIZER_H diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h index 95c4b4248bbd0dcd091f67f5a44f4134953f103b..5ed5faa2c415013e97243a43255fa15b6bb7ef00 100644 --- a/include/llvm/CodeGen/DIE.h +++ b/include/llvm/CodeGen/DIE.h @@ -1,4 +1,4 @@ -//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// +//===- lib/CodeGen/DIE.h - DWARF Info Entries -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,16 +21,17 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DwarfStringPoolEntry.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Dwarf.h" #include #include #include #include #include #include +#include #include namespace llvm { @@ -53,11 +54,11 @@ class DIEAbbrevData { dwarf::Form Form; /// Dwarf attribute value for DW_FORM_implicit_const - int64_t Value; + int64_t Value = 0; public: DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) - : Attribute(A), Form(F), Value(0) {} + : Attribute(A), Form(F) {} DIEAbbrevData(dwarf::Attribute A, int64_t V) : Attribute(A), Form(dwarf::DW_FORM_implicit_const), Value(V) {} @@ -136,13 +137,14 @@ class DIEAbbrevSet { /// storage container. BumpPtrAllocator &Alloc; /// \brief FoldingSet that uniques the abbreviations. - llvm::FoldingSet AbbreviationsSet; + FoldingSet AbbreviationsSet; /// A list of all the unique abbreviations in use. std::vector Abbreviations; public: DIEAbbrevSet(BumpPtrAllocator &A) : Alloc(A) {} ~DIEAbbrevSet(); + /// Generate the abbreviation declaration for a DIE and return a pointer to /// the generated abbreviation. /// @@ -289,13 +291,11 @@ public: /// A pointer to another debug information entry. An instance of this class can /// also be used as a proxy for a debug information entry not yet defined /// (ie. types.) -class DIE; class DIEEntry { DIE *Entry; - DIEEntry() = delete; - public: + DIEEntry() = delete; explicit DIEEntry(DIE &E) : Entry(&E) {} DIE &getEntry() const { return *Entry; } @@ -348,10 +348,10 @@ private: /// /// All values that aren't standard layout (or are larger than 8 bytes) /// should be stored by reference instead of by value. - typedef AlignedCharArrayUnion - ValTy; + using ValTy = AlignedCharArrayUnion; + static_assert(sizeof(ValTy) <= sizeof(uint64_t) || sizeof(ValTy) <= sizeof(void *), "Expected all large types to be stored via pointer"); @@ -383,11 +383,11 @@ private: return; #define HANDLE_DIEVALUE_SMALL(T) \ case is##T: \ - destruct(); + destruct(); \ return; #define HANDLE_DIEVALUE_LARGE(T) \ case is##T: \ - destruct(); + destruct(); \ return; #include "llvm/CodeGen/DIEValue.def" } @@ -486,10 +486,12 @@ struct IntrusiveBackListNode { }; struct IntrusiveBackListBase { - typedef IntrusiveBackListNode Node; + using Node = IntrusiveBackListNode; + Node *Last = nullptr; bool empty() const { return !Last; } + void push_back(Node &N) { assert(N.Next.getPointer() == &N && "Expected unlinked node"); assert(N.Next.getInt() == true && "Expected unlinked node"); @@ -505,6 +507,7 @@ struct IntrusiveBackListBase { template class IntrusiveBackList : IntrusiveBackListBase { public: using IntrusiveBackListBase::empty; + void push_back(T &N) { IntrusiveBackListBase::push_back(N); } T &back() { return *static_cast(Last); } const T &back() const { return *static_cast(Last); } @@ -513,6 +516,7 @@ public: class iterator : public iterator_facade_base { friend class const_iterator; + Node *N = nullptr; public: @@ -585,10 +589,12 @@ public: class DIEValueList { struct Node : IntrusiveBackListNode { DIEValue V; + explicit Node(DIEValue V) : V(V) {} }; - typedef IntrusiveBackList ListTy; + using ListTy = IntrusiveBackList; + ListTy List; public: @@ -597,9 +603,10 @@ public: : public iterator_adaptor_base { friend class const_value_iterator; - typedef iterator_adaptor_base iterator_adaptor; + + using iterator_adaptor = + iterator_adaptor_base; public: value_iterator() = default; @@ -612,9 +619,9 @@ public: class const_value_iterator : public iterator_adaptor_base< const_value_iterator, ListTy::const_iterator, std::forward_iterator_tag, const DIEValue> { - typedef iterator_adaptor_base iterator_adaptor; + using iterator_adaptor = + iterator_adaptor_base; public: const_value_iterator() = default; @@ -627,8 +634,8 @@ public: const DIEValue &operator*() const { return wrapped()->V; } }; - typedef iterator_range value_range; - typedef iterator_range const_value_range; + using value_range = iterator_range; + using const_value_range = iterator_range; value_iterator addValue(BumpPtrAllocator &Alloc, const DIEValue &V) { List.push_back(*new (Alloc) Node(V)); @@ -657,15 +664,15 @@ class DIE : IntrusiveBackListNode, public DIEValueList { friend class DIEUnit; /// Dwarf unit relative offset. - unsigned Offset; + unsigned Offset = 0; /// Size of instance + children. - unsigned Size; + unsigned Size = 0; unsigned AbbrevNumber = ~0u; /// Dwarf tag code. dwarf::Tag Tag = (dwarf::Tag)0; /// Set to true to force a DIE to emit an abbreviation that says it has /// children even when it doesn't. This is used for unit testing purposes. - bool ForceChildren; + bool ForceChildren = false; /// Children DIEs. IntrusiveBackList Children; @@ -673,20 +680,19 @@ class DIE : IntrusiveBackListNode, public DIEValueList { /// DIEUnit which contains this DIE as its unit DIE. PointerUnion Owner; - DIE() = delete; - explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag), - ForceChildren(false) {} + explicit DIE(dwarf::Tag Tag) : Tag(Tag) {} public: + DIE() = delete; + DIE(const DIE &RHS) = delete; + DIE(DIE &&RHS) = delete; + DIE &operator=(const DIE &RHS) = delete; + DIE &operator=(const DIE &&RHS) = delete; + static DIE *get(BumpPtrAllocator &Alloc, dwarf::Tag Tag) { return new (Alloc) DIE(Tag); } - DIE(const DIE &RHS) = delete; - DIE(DIE &&RHS) = delete; - void operator=(const DIE &RHS) = delete; - void operator=(const DIE &&RHS) = delete; - // Accessors. unsigned getAbbrevNumber() const { return AbbrevNumber; } dwarf::Tag getTag() const { return Tag; } @@ -696,10 +702,10 @@ public: bool hasChildren() const { return ForceChildren || !Children.empty(); } void setForceChildren(bool B) { ForceChildren = B; } - typedef IntrusiveBackList::iterator child_iterator; - typedef IntrusiveBackList::const_iterator const_child_iterator; - typedef iterator_range child_range; - typedef iterator_range const_child_range; + using child_iterator = IntrusiveBackList::iterator; + using const_child_iterator = IntrusiveBackList::const_iterator; + using child_range = iterator_range; + using const_child_range = iterator_range; child_range children() { return make_range(Children.begin(), Children.end()); @@ -793,6 +799,9 @@ class DIEUnit { uint32_t Length; /// The length in bytes of all of the DIEs in this unit. const uint16_t Version; /// The Dwarf version number for this unit. const uint8_t AddrSize; /// The size in bytes of an address for this unit. +protected: + ~DIEUnit() = default; + public: DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag); DIEUnit(const DIEUnit &RHS) = delete; @@ -808,6 +817,10 @@ public: this->Section = Section; } + virtual const MCSymbol *getCrossSectionRelativeBaseAddress() const { + return nullptr; + } + /// Return the section that this DIEUnit will be emitted into. /// /// \returns Section pointer which can be NULL. @@ -822,15 +835,19 @@ public: const DIE &getUnitDie() const { return Die; } }; - +struct BasicDIEUnit final : DIEUnit { + BasicDIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag) + : DIEUnit(Version, AddrSize, UnitTag) {} +}; + //===--------------------------------------------------------------------===// /// DIELoc - Represents an expression location. // class DIELoc : public DIEValueList { - mutable unsigned Size; // Size in bytes excluding size header. + mutable unsigned Size = 0; // Size in bytes excluding size header. public: - DIELoc() : Size(0) {} + DIELoc() = default; /// ComputeSize - Calculate the size of the location expression. /// @@ -861,10 +878,10 @@ public: /// DIEBlock - Represents a block of values. // class DIEBlock : public DIEValueList { - mutable unsigned Size; // Size in bytes excluding size header. + mutable unsigned Size = 0; // Size in bytes excluding size header. public: - DIEBlock() : Size(0) {} + DIEBlock() = default; /// ComputeSize - Calculate the size of the location expression. /// diff --git a/include/llvm/CodeGen/ExecutionDepsFix.h b/include/llvm/CodeGen/ExecutionDepsFix.h index 1d5b9684e105503d80e64add6cc889a36a42a455..f4db8b7322dae6f21af85b9d394aa4b33ac276c8 100644 --- a/include/llvm/CodeGen/ExecutionDepsFix.h +++ b/include/llvm/CodeGen/ExecutionDepsFix.h @@ -1,4 +1,4 @@ -//===- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-=// +//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -20,19 +20,30 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H #define LLVM_CODEGEN_EXECUTIONDEPSFIX_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include #include namespace llvm { +class MachineBasicBlock; +class MachineInstr; +class TargetInstrInfo; + /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track /// of execution domains. /// @@ -50,7 +61,7 @@ namespace llvm { /// domains. struct DomainValue { // Basic reference counting. - unsigned Refs; + unsigned Refs = 0; // Bitmask of available domains. For an open DomainValue, it is the still // possible domains for collapsing. For a collapsed DomainValue it is the @@ -65,6 +76,8 @@ struct DomainValue { // Twiddleable instructions using or defining these registers. SmallVector Instrs; + DomainValue() { clear(); } + // A collapsed DomainValue has no instructions to twiddle - it simply keeps // track of the domains where the registers are already available. bool isCollapsed() const { return Instrs.empty(); } @@ -97,8 +110,6 @@ struct DomainValue { return countTrailingZeros(AvailableDomains); } - DomainValue() : Refs(0) { clear(); } - // Clear this DomainValue and point to next which has all its data. void clear() { AvailableDomains = 0; @@ -136,29 +147,27 @@ class ExecutionDepsFix : public MachineFunctionPass { // Keeps clearance and domain information for all registers. Note that this // is different from the usual definition notion of liveness. The CPU // doesn't care whether or not we consider a register killed. - LiveReg *OutRegs; + LiveReg *OutRegs = nullptr; // Whether we have gotten to this block in primary processing yet. - bool PrimaryCompleted; + bool PrimaryCompleted = false; // The number of predecessors for which primary processing has completed - unsigned IncomingProcessed; + unsigned IncomingProcessed = 0; // The value of `IncomingProcessed` at the start of primary processing - unsigned PrimaryIncoming; + unsigned PrimaryIncoming = 0; // The number of predecessors for which all processing steps are done. - unsigned IncomingCompleted; + unsigned IncomingCompleted = 0; - MBBInfo() - : OutRegs(nullptr), PrimaryCompleted(false), IncomingProcessed(0), - PrimaryIncoming(0), IncomingCompleted(0) {} + MBBInfo() = default; }; - typedef DenseMap MBBInfoMap; + using MBBInfoMap = DenseMap; MBBInfoMap MBBInfos; /// List of undefined register reads in this block in forward order. - std::vector > UndefReads; + std::vector> UndefReads; /// Storage for register unit liveness. LivePhysRegs LiveRegSet; @@ -166,6 +175,7 @@ class ExecutionDepsFix : public MachineFunctionPass { /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; + public: ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} @@ -217,4 +227,4 @@ private: } // end namepsace llvm -#endif +#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H diff --git a/include/llvm/CodeGen/ExpandReductions.h b/include/llvm/CodeGen/ExpandReductions.h new file mode 100644 index 0000000000000000000000000000000000000000..c6aaaad967b38fd3f35267dcc68d0f5eadab8aa0 --- /dev/null +++ b/include/llvm/CodeGen/ExpandReductions.h @@ -0,0 +1,24 @@ +//===----- ExpandReductions.h - Expand experimental reduction intrinsics --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXPANDREDUCTIONS_H +#define LLVM_CODEGEN_EXPANDREDUCTIONS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ExpandReductionsPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_CODEGEN_EXPANDREDUCTIONS_H diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 79c96283e7337fe2448322832e41c380c940adca..74e4179e73e989c53a95074e4945778f31412ed9 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -17,11 +17,12 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InstrTypes.h" @@ -30,19 +31,43 @@ #include #include #include -#include namespace llvm { +class AllocaInst; +class BasicBlock; +class CallInst; +class Constant; +class ConstantFP; +class DataLayout; +class FunctionLoweringInfo; +class LoadInst; class MachineConstantPool; +class MachineFrameInfo; +class MachineFunction; +class MachineInstr; +class MachineMemOperand; +class MachineOperand; +class MachineRegisterInfo; +class MCContext; +class MCInstrDesc; +class MCSymbol; +class TargetInstrInfo; +class TargetLibraryInfo; +class TargetMachine; +class TargetRegisterClass; +class TargetRegisterInfo; +class Type; +class User; +class Value; /// \brief This is a fast-path instruction selection class that generates poor /// code and doesn't support illegal types or non-trivial lowering, but runs /// quickly. class FastISel { public: - typedef TargetLoweringBase::ArgListEntry ArgListEntry; - typedef TargetLoweringBase::ArgListTy ArgListTy; + using ArgListEntry = TargetLoweringBase::ArgListEntry; + using ArgListTy = TargetLoweringBase::ArgListTy; struct CallLoweringInfo { Type *RetTy = nullptr; bool RetSExt : 1; @@ -83,12 +108,12 @@ public: RetTy = ResultTy; Callee = Target; - IsInReg = Call.paramHasAttr(0, Attribute::InReg); + IsInReg = Call.hasRetAttr(Attribute::InReg); DoesNotReturn = Call.doesNotReturn(); IsVarArg = FuncTy->isVarArg(); IsReturnValueUsed = !Call.getInstruction()->use_empty(); - RetSExt = Call.paramHasAttr(0, Attribute::SExt); - RetZExt = Call.paramHasAttr(0, Attribute::ZExt); + RetSExt = Call.hasRetAttr(Attribute::SExt); + RetZExt = Call.hasRetAttr(Attribute::ZExt); CallConv = Call.getCallingConv(); Args = std::move(ArgsList); @@ -107,12 +132,12 @@ public: Callee = Call.getCalledValue(); Symbol = Target; - IsInReg = Call.paramHasAttr(0, Attribute::InReg); + IsInReg = Call.hasRetAttr(Attribute::InReg); DoesNotReturn = Call.doesNotReturn(); IsVarArg = FuncTy->isVarArg(); IsReturnValueUsed = !Call.getInstruction()->use_empty(); - RetSExt = Call.paramHasAttr(0, Attribute::SExt); - RetZExt = Call.paramHasAttr(0, Attribute::ZExt); + RetSExt = Call.hasRetAttr(Attribute::SExt); + RetZExt = Call.hasRetAttr(Attribute::ZExt); CallConv = Call.getCallingConv(); Args = std::move(ArgsList); @@ -202,6 +227,8 @@ protected: MachineInstr *EmitStartPt; public: + virtual ~FastISel(); + /// \brief Return the position of the last instruction emitted for /// materializing constants for use in the current block. MachineInstr *getLastLocalValue() { return LastLocalValue; } @@ -293,8 +320,6 @@ public: /// \brief Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); - virtual ~FastISel(); - protected: explicit FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, @@ -334,7 +359,7 @@ protected: /// \brief This method is called by target-independent code to request that an /// instruction with the given type, opcode, and register and immediate - // operands be emitted. + /// operands be emitted. virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm); @@ -506,6 +531,7 @@ protected: bool selectCast(const User *I, unsigned Opcode); bool selectExtractValue(const User *I); bool selectInsertValue(const User *I); + bool selectXRayCustomEvent(const CallInst *II); private: /// \brief Handle PHI nodes in successor blocks. diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h index 0f0005b83c543f8404d9cb1f27670d9d82bfc39d..98ff526dfe946bab8258246f7d1de66a0c952c9e 100644 --- a/include/llvm/CodeGen/FaultMaps.h +++ b/include/llvm/CodeGen/FaultMaps.h @@ -56,7 +56,7 @@ private: HandlerOffsetExpr(HandlerOffset) {} }; - typedef std::vector FunctionFaultInfos; + using FunctionFaultInfos = std::vector; // We'd like to keep a stable iteration order for FunctionInfos to help // FileCheck based testing. @@ -78,20 +78,17 @@ private: /// generated by the version of LLVM that includes it. No guarantees are made /// with respect to forward or backward compatibility. class FaultMapParser { - typedef uint8_t FaultMapVersionType; - static const size_t FaultMapVersionOffset = 0; + using FaultMapVersionType = uint8_t; + using Reserved0Type = uint8_t; + using Reserved1Type = uint16_t; + using NumFunctionsType = uint32_t; - typedef uint8_t Reserved0Type; + static const size_t FaultMapVersionOffset = 0; static const size_t Reserved0Offset = FaultMapVersionOffset + sizeof(FaultMapVersionType); - - typedef uint16_t Reserved1Type; static const size_t Reserved1Offset = Reserved0Offset + sizeof(Reserved0Type); - - typedef uint32_t NumFunctionsType; static const size_t NumFunctionsOffset = Reserved1Offset + sizeof(Reserved1Type); - static const size_t FunctionInfosOffset = NumFunctionsOffset + sizeof(NumFunctionsType); @@ -105,14 +102,13 @@ class FaultMapParser { public: class FunctionFaultInfoAccessor { - typedef uint32_t FaultKindType; - static const size_t FaultKindOffset = 0; + using FaultKindType = uint32_t; + using FaultingPCOffsetType = uint32_t; + using HandlerPCOffsetType = uint32_t; - typedef uint32_t FaultingPCOffsetType; + static const size_t FaultKindOffset = 0; static const size_t FaultingPCOffsetOffset = FaultKindOffset + sizeof(FaultKindType); - - typedef uint32_t HandlerPCOffsetType; static const size_t HandlerPCOffsetOffset = FaultingPCOffsetOffset + sizeof(FaultingPCOffsetType); @@ -140,20 +136,17 @@ public: }; class FunctionInfoAccessor { - typedef uint64_t FunctionAddrType; - static const size_t FunctionAddrOffset = 0; + using FunctionAddrType = uint64_t; + using NumFaultingPCsType = uint32_t; + using ReservedType = uint32_t; - typedef uint32_t NumFaultingPCsType; + static const size_t FunctionAddrOffset = 0; static const size_t NumFaultingPCsOffset = FunctionAddrOffset + sizeof(FunctionAddrType); - - typedef uint32_t ReservedType; static const size_t ReservedOffset = NumFaultingPCsOffset + sizeof(NumFaultingPCsType); - static const size_t FunctionFaultInfosOffset = ReservedOffset + sizeof(ReservedType); - static const size_t FunctionInfoHeaderSize = FunctionFaultInfosOffset; const uint8_t *P = nullptr; diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index 75cd7da9d6b930e1a2ff523ae0da39aaf5d422ed..f32a58915118f1ba2d772c2b957ba3f8bf1696db 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -1,4 +1,4 @@ -//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// +//===- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen ---===// // // The LLVM Compiler Infrastructure // @@ -23,28 +23,28 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include #include namespace llvm { -class AllocaInst; +class Argument; class BasicBlock; class BranchProbabilityInfo; class Function; -class GlobalVariable; class Instruction; -class MachineInstr; -class MachineBasicBlock; class MachineFunction; -class MachineModuleInfo; +class MachineInstr; class MachineRegisterInfo; -class SelectionDAG; class MVT; +class SelectionDAG; class TargetLowering; -class Value; //===--------------------------------------------------------------------===// /// FunctionLoweringInfo - This contains information that is global to a @@ -73,25 +73,29 @@ public: /// A map from swifterror value in a basic block to the virtual register it is /// currently represented by. - llvm::DenseMap, unsigned> + DenseMap, unsigned> SwiftErrorVRegDefMap; /// A list of upward exposed vreg uses that need to be satisfied by either a /// copy def or a phi node at the beginning of the basic block representing /// the predecessor(s) swifterror value. - llvm::DenseMap, unsigned> + DenseMap, unsigned> SwiftErrorVRegUpwardsUse; + /// A map from instructions that define/use a swifterror value to the virtual + /// register that represents that def/use. + llvm::DenseMap, unsigned> + SwiftErrorVRegDefUses; + /// The swifterror argument of the current function. const Value *SwiftErrorArg; - typedef SmallVector SwiftErrorValues; + using SwiftErrorValues = SmallVector; /// A function can only have a single swifterror argument. And if it does /// have a swifterror argument, it must be the first entry in /// SwiftErrorVals. SwiftErrorValues SwiftErrorVals; - /// Get or create the swifterror value virtual register in /// SwiftErrorVRegDefMap for this basic block. unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *, @@ -102,6 +106,13 @@ public: void setCurrentSwiftErrorVReg(const MachineBasicBlock *MBB, const Value *, unsigned); + /// Get or create the swifterror value virtual register for a def of a + /// swifterror by an instruction. + std::pair getOrCreateSwiftErrorVRegDefAt(const Instruction *); + std::pair + getOrCreateSwiftErrorVRegUseAt(const Instruction *, const MachineBasicBlock *, + const Value *); + /// ValueMap - Since we emit code for the function a basic block at a time, /// we must remember which virtual registers hold the values for /// cross-basic-block values. @@ -117,7 +128,7 @@ public: /// slot), and we track that here. struct StatepointSpillMap { - typedef DenseMap> SlotMapTy; + using SlotMapTy = DenseMap>; /// Maps uniqued llvm IR values to the slots they were spilled in. If a /// value is mapped to None it means we visited the value but didn't spill @@ -171,9 +182,9 @@ public: struct LiveOutInfo { unsigned NumSignBits : 31; unsigned IsValid : 1; - APInt KnownOne, KnownZero; - LiveOutInfo() : NumSignBits(0), IsValid(true), KnownOne(1, 0), - KnownZero(1, 0) {} + KnownBits Known = 1; + + LiveOutInfo() : NumSignBits(0), IsValid(true) {} }; /// Record the preferred extend type (ISD::SIGN_EXTEND or ISD::ZERO_EXTEND) @@ -247,16 +258,16 @@ public: /// AddLiveOutRegInfo - Adds LiveOutInfo for a register. void AddLiveOutRegInfo(unsigned Reg, unsigned NumSignBits, - const APInt &KnownZero, const APInt &KnownOne) { + const KnownBits &Known) { // Only install this information if it tells us something. - if (NumSignBits == 1 && KnownZero == 0 && KnownOne == 0) + if (NumSignBits == 1 && Known.isUnknown()) return; LiveOutRegInfo.grow(Reg); LiveOutInfo &LOI = LiveOutRegInfo[Reg]; LOI.NumSignBits = NumSignBits; - LOI.KnownOne = KnownOne; - LOI.KnownZero = KnownZero; + LOI.Known.One = Known.One; + LOI.Known.Zero = Known.Zero; } /// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination @@ -298,4 +309,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index e6afcbc8ded28a81fc775331976752943fab2464..ad2599fc120e54e064e5bfe21063a934459c98ca 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -1,4 +1,4 @@ -//===-- GCMetadata.h - Garbage collector metadata ---------------*- C++ -*-===// +//===- GCMetadata.h - Garbage collector metadata ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -36,15 +36,20 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" +#include +#include +#include #include -#include +#include namespace llvm { -class AsmPrinter; + class Constant; +class Function; class MCSymbol; /// GCPoint - Metadata for a collector-safe point in machine code. @@ -62,20 +67,20 @@ struct GCPoint { /// collector. struct GCRoot { int Num; ///< Usually a frame index. - int StackOffset; ///< Offset from the stack pointer. + int StackOffset = -1; ///< Offset from the stack pointer. const Constant *Metadata; ///< Metadata straight from the call ///< to llvm.gcroot. - GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {} + GCRoot(int N, const Constant *MD) : Num(N), Metadata(MD) {} }; /// Garbage collection metadata for a single function. Currently, this /// information only applies to GCStrategies which use GCRoot. class GCFunctionInfo { public: - typedef std::vector::iterator iterator; - typedef std::vector::iterator roots_iterator; - typedef std::vector::const_iterator live_iterator; + using iterator = std::vector::iterator; + using roots_iterator = std::vector::iterator; + using live_iterator = std::vector::const_iterator; private: const Function &F; @@ -99,11 +104,9 @@ public: ~GCFunctionInfo(); /// getFunction - Return the function to which this metadata applies. - /// const Function &getFunction() const { return F; } /// getStrategy - Return the GC strategy for the function. - /// GCStrategy &getStrategy() { return S; } /// addStackRoot - Registers a root that lives on the stack. Num is the @@ -126,24 +129,20 @@ public: } /// getFrameSize/setFrameSize - Records the function's frame size. - /// uint64_t getFrameSize() const { return FrameSize; } void setFrameSize(uint64_t S) { FrameSize = S; } /// begin/end - Iterators for safe points. - /// iterator begin() { return SafePoints.begin(); } iterator end() { return SafePoints.end(); } size_t size() const { return SafePoints.size(); } /// roots_begin/roots_end - Iterators for all roots in the function. - /// roots_iterator roots_begin() { return Roots.begin(); } roots_iterator roots_end() { return Roots.end(); } size_t roots_size() const { return Roots.size(); } /// live_begin/live_end - Iterators for live roots at a given safe point. - /// live_iterator live_begin(const iterator &p) { return roots_begin(); } live_iterator live_end(const iterator &p) { return roots_end(); } size_t live_size(const iterator &p) const { return roots_size(); } @@ -166,7 +165,7 @@ public: /// List of per function info objects. In theory, Each of these /// may be associated with a different GC. - typedef std::vector> FuncInfoVec; + using FuncInfoVec = std::vector>; FuncInfoVec::iterator funcinfo_begin() { return Functions.begin(); } FuncInfoVec::iterator funcinfo_end() { return Functions.end(); } @@ -177,11 +176,11 @@ private: /// Non-owning map to bypass linear search when finding the GCFunctionInfo /// associated with a particular Function. - typedef DenseMap finfo_map_type; + using finfo_map_type = DenseMap; finfo_map_type FInfoMap; public: - typedef SmallVector,1>::const_iterator iterator; + using iterator = SmallVector, 1>::const_iterator; static char ID; @@ -202,6 +201,7 @@ public: /// will soon change. GCFunctionInfo &getFunctionInfo(const Function &F); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GCMETADATA_H diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h index 220847029113858998efe2884619f70f2f28f1bb..1cc69a7b71af5fd01c535d37922a57e2bcb0bb80 100644 --- a/include/llvm/CodeGen/GCMetadataPrinter.h +++ b/include/llvm/CodeGen/GCMetadataPrinter.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables -*- C++ -*-===// +//===- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,45 +20,48 @@ #ifndef LLVM_CODEGEN_GCMETADATAPRINTER_H #define LLVM_CODEGEN_GCMETADATAPRINTER_H -#include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/Support/Registry.h" namespace llvm { +class AsmPrinter; class GCMetadataPrinter; +class GCModuleInfo; +class GCStrategy; +class Module; /// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the /// defaults from Registry. -typedef Registry GCMetadataPrinterRegistry; +using GCMetadataPrinterRegistry = Registry; /// GCMetadataPrinter - Emits GC metadata as assembly code. Instances are /// created, managed, and owned by the AsmPrinter. class GCMetadataPrinter { private: - GCStrategy *S; friend class AsmPrinter; + GCStrategy *S; + protected: // May only be subclassed. GCMetadataPrinter(); -private: +public: GCMetadataPrinter(const GCMetadataPrinter &) = delete; GCMetadataPrinter &operator=(const GCMetadataPrinter &) = delete; + virtual ~GCMetadataPrinter(); -public: GCStrategy &getStrategy() { return *S; } /// Called before the assembly for the module is generated by /// the AsmPrinter (but after target specific hooks.) virtual void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {} + /// Called after the assembly for the module is generated by /// the AsmPrinter (but before target specific hooks) virtual void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) {} - - virtual ~GCMetadataPrinter(); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_GCMETADATAPRINTER_H diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h index 5b1fafea25b57d7ff6b931f931fe9ff259dfce48..16168e785f812b9a73e7724e4266ccfa9a799582 100644 --- a/include/llvm/CodeGen/GCStrategy.h +++ b/include/llvm/CodeGen/GCStrategy.h @@ -174,7 +174,7 @@ public: /// Note that to use a custom GCMetadataPrinter w/gc.roots, you must also /// register your GCMetadataPrinter subclass with the /// GCMetadataPrinterRegistery as well. -typedef Registry GCRegistry; +using GCRegistry = Registry; } // end namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 31ffdc0e2e78c2c117145fa027dfcd67d3a51759..e292e8913db0605c95eda5744bf5efe86b6841f1 100644 --- a/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -78,7 +78,7 @@ private: /// this function. DenseMap FrameIndices; - /// Methods for translating form LLVM IR to MachineInstr. + /// \name Methods for translating form LLVM IR to MachineInstr. /// \see ::translate for general information on the translate methods. /// @{ diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index d8096aeb215ada2ad247d1130abe98c301a0c2fe..1a865c3f0dce7eb55dda912b072c019e76efa8de 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -17,16 +17,45 @@ #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H #include "llvm/ADT/Optional.h" +#include #include +#include namespace llvm { class MachineInstr; +class MachineInstrBuilder; +class MachineFunction; class MachineOperand; class MachineRegisterInfo; class RegisterBankInfo; class TargetInstrInfo; class TargetRegisterInfo; +/// Container class for CodeGen predicate results. +/// This is convenient because std::bitset does not have a constructor +/// with an initializer list of set bits. +/// +/// Each InstructionSelector subclass should define a PredicateBitset class with: +/// const unsigned MAX_SUBTARGET_PREDICATES = 192; +/// using PredicateBitset = PredicateBitsetImpl; +/// and updating the constant to suit the target. Tablegen provides a suitable +/// definition for the predicates in use in GenGlobalISel.inc when +/// GET_GLOBALISEL_PREDICATE_BITSET is defined. +template +class PredicateBitsetImpl : public std::bitset { +public: + // Cannot inherit constructors because it's not supported by VC++.. + PredicateBitsetImpl() = default; + + PredicateBitsetImpl(const std::bitset &B) + : std::bitset(B) {} + + PredicateBitsetImpl(std::initializer_list Init) { + for (auto I : Init) + std::bitset::set(I); + } +}; + /// Provides the logic to select generic machine instructions. class InstructionSelector { public: @@ -46,6 +75,8 @@ public: virtual bool select(MachineInstr &I) const = 0; protected: + typedef std::function ComplexRendererFn; + InstructionSelector(); /// Mutate the newly-selected instruction \p I to constrain its (possibly @@ -62,9 +93,6 @@ protected: const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const; - Optional getConstantVRegVal(unsigned VReg, - const MachineRegisterInfo &MRI) const; - bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 8fecafdc08d0e9fdf1eba436716413feda1f7b50..5197ba869c0a6677e57074078e3c0c15739a903b 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -21,9 +21,11 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H #define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H +#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" namespace llvm { // Forward declarations. @@ -99,6 +101,12 @@ private: const LegalizerInfo &LI; }; +/// Helper function that replaces \p MI with a libcall. +LegalizerHelper::LegalizeResult +replaceWithLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, + RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, + ArrayRef Args); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 30d67eb4992333428f6d3b2ded3aa752ad3bfcc3..21354ae20ed140db26ae76f2fb4cd0341a8b5a9f 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -145,7 +145,7 @@ public: /// Iterate the given function (typically something like doubling the width) /// on Ty until we find a legal type for this operation. - LLT findLegalType(const InstrAspect &Aspect, + Optional findLegalType(const InstrAspect &Aspect, function_ref NextType) const { LegalizeAction Action; const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx]; @@ -153,8 +153,12 @@ public: do { Ty = NextType(Ty); auto ActionIt = Map.find(Ty); - if (ActionIt == Map.end()) - Action = DefaultActions.find(Aspect.Opcode)->second; + if (ActionIt == Map.end()) { + auto DefaultIt = DefaultActions.find(Aspect.Opcode); + if (DefaultIt == DefaultActions.end()) + return None; + Action = DefaultIt->second; + } else Action = ActionIt->second; } while(Action != Legal); @@ -163,11 +167,14 @@ public: /// Find what type it's actually OK to perform the given operation on, given /// the general approach we've decided to take. - LLT findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const; + Optional findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const; std::pair findLegalAction(const InstrAspect &Aspect, LegalizeAction Action) const { - return std::make_pair(Action, findLegalType(Aspect, Action)); + auto LegalType = findLegalType(Aspect, Action); + if (!LegalType) + return std::make_pair(LegalizeAction::Unsupported, LLT()); + return std::make_pair(Action, *LegalType); } /// Find the specified \p Aspect in the primary (explicitly set) Actions diff --git a/include/llvm/CodeGen/GlobalISel/Localizer.h b/include/llvm/CodeGen/GlobalISel/Localizer.h new file mode 100644 index 0000000000000000000000000000000000000000..0a46eb9e7840d0cc9e375e8d6c93c8b5e353b639 --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -0,0 +1,78 @@ +//== llvm/CodeGen/GlobalISel/Localizer.h - Localizer -------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file describes the interface of the Localizer pass. +/// This pass moves/duplicates constant-like instructions close to their uses. +/// Its primarily goal is to workaround the deficiencies of the fast register +/// allocator. +/// With GlobalISel constants are all materialized in the entry block of +/// a function. However, the fast allocator cannot rematerialize constants and +/// has a lot more live-ranges to deal with and will most likely end up +/// spilling a lot. +/// By pushing the constants close to their use, we only create small +/// live-ranges. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H +#define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H + +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { +// Forward declarations. +class MachineRegisterInfo; + +/// This pass implements the localization mechanism described at the +/// top of this file. One specificity of the implementation is that +/// it will materialize one and only one instance of a constant per +/// basic block, thus enabling reuse of that constant within that block. +/// Moreover, it only materializes constants in blocks where they +/// are used. PHI uses are considered happening at the end of the +/// related predecessor. +class Localizer : public MachineFunctionPass { +public: + static char ID; + +private: + /// MRI contains all the register class/bank information that this + /// pass uses and updates. + MachineRegisterInfo *MRI; + + /// Check whether or not \p MI needs to be moved close to its uses. + static bool shouldLocalize(const MachineInstr &MI); + + /// Check if \p MOUse is used in the same basic block as \p Def. + /// If the use is in the same block, we say it is local. + /// When the use is not local, \p InsertMBB will contain the basic + /// block when to insert \p Def to have a local use. + static bool isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, + MachineBasicBlock *&InsertMBB); + + /// Initialize the field members using \p MF. + void init(MachineFunction &MF); + +public: + Localizer(); + + StringRef getPassName() const override { return "Localizer"; } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA) + .set(MachineFunctionProperties::Property::Legalized) + .set(MachineFunctionProperties::Property::RegBankSelected); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // End namespace llvm. + +#endif diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 472f50576d966ca8149255ab8410a19c6cac13f7..4e7b8350038b81c7b3ea261c0b8ea6701e306bb5 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -16,9 +16,9 @@ #include "llvm/CodeGen/GlobalISel/Types.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/LowLevelType.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" @@ -40,12 +40,12 @@ class MachineIRBuilder { MachineFunction *MF; /// Information used to access the description of the opcodes. const TargetInstrInfo *TII; - /// Information used to verify types are consistent. - const MachineRegisterInfo *MRI; + /// Information used to verify types are consistent and to create virtual registers. + MachineRegisterInfo *MRI; /// Debug location to be set to any instruction we create. DebugLoc DL; - /// Fields describing the insertion point. + /// \name Fields describing the insertion point. /// @{ MachineBasicBlock *MBB; MachineBasicBlock::iterator II; @@ -84,7 +84,7 @@ public: void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II); /// @} - /// Setters for the insertion point. + /// \name Setters for the insertion point. /// @{ /// Set the MachineFunction where to build instructions. void setMF(MachineFunction &); @@ -98,7 +98,7 @@ public: void setInstr(MachineInstr &MI); /// @} - /// Control where instructions we create are recorded (typically for + /// \name Control where instructions we create are recorded (typically for /// visiting again later during legalization). /// @{ void recordInsertions(std::function InsertedInstr); @@ -229,6 +229,26 @@ public: MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0, unsigned Op1); + /// Materialize and insert \p Res = G_GEP \p Op0, (G_CONSTANT \p Value) + /// + /// G_GEP adds \p Value bytes to the pointer specified by \p Op0, + /// storing the resulting pointer in \p Res. If \p Value is zero then no + /// G_GEP or G_CONSTANT will be created and \pre Op0 will be assigned to + /// \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Op0 must be a generic virtual register with pointer type. + /// \pre \p ValueTy must be a scalar type. + /// \pre \p Res must be 0. This is to detect confusion between + /// materializeGEP() and buildGEP(). + /// \post \p Res will either be a new generic virtual register of the same + /// type as \p Op0 or \p Op0 itself. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + Optional materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, + uint64_t Value); + /// Build and insert \p Res = G_PTR_MASK \p Op0, \p NumBits /// /// G_PTR_MASK clears the low bits of a pointer operand without destroying its diff --git a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h index daa8dcf2061b7d944fa764134763ed6ed6b3143b..f610bc02b6f2640c77df02deac68b4484c9d7fe2 100644 --- a/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -309,7 +309,7 @@ public: Impossible }; - /// Convenient types for a list of insertion points. + /// \name Convenient types for a list of insertion points. /// @{ typedef SmallVector, 2> InsertionPoints; typedef InsertionPoints::iterator insertpt_iterator; @@ -341,7 +341,7 @@ public: const TargetRegisterInfo &TRI, Pass &P, RepairingKind Kind = RepairingKind::Insert); - /// Getters. + /// \name Getters. /// @{ RepairingKind getKind() const { return Kind; } unsigned getOpIdx() const { return OpIdx; } @@ -349,7 +349,7 @@ public: bool hasSplit() { return HasSplit; } /// @} - /// Overloaded methods to add an insertion point. + /// \name Overloaded methods to add an insertion point. /// @{ /// Add a MBBInsertionPoint to the list of InsertPoints. void addInsertPoint(MachineBasicBlock &MBB, bool Beginning); @@ -362,7 +362,7 @@ public: void addInsertPoint(InsertPoint &Point); /// @} - /// Accessors related to the insertion points. + /// \name Accessors related to the insertion points. /// @{ insertpt_iterator begin() { return InsertPoints.begin(); } insertpt_iterator end() { return InsertPoints.end(); } @@ -561,7 +561,7 @@ private: /// Find the best mapping for \p MI from \p PossibleMappings. /// \return a reference on the best mapping in \p PossibleMappings. - RegisterBankInfo::InstructionMapping & + const RegisterBankInfo::InstructionMapping & findBestMapping(MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings, SmallVectorImpl &RepairPts); diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h index 600733ac6a2d80880ad2410670bfaf335f16a0df..e3549d8988cdae002532958dc2e53e93be405f10 100644 --- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -264,7 +264,7 @@ public: /// Convenient type to represent the alternatives for mapping an /// instruction. /// \todo When we move to TableGen this should be an array ref. - typedef SmallVector InstructionMappings; + typedef SmallVector InstructionMappings; /// Helper class used to get/create the virtual registers that will be used /// to replace the MachineOperand when applying a mapping. @@ -310,7 +310,7 @@ public: OperandsMapper(MachineInstr &MI, const InstructionMapping &InstrMapping, MachineRegisterInfo &MRI); - /// Getters. + /// \name Getters. /// @{ /// The MachineInstr being remapped. MachineInstr &getMI() const { return MI; } @@ -378,17 +378,25 @@ protected: /// Keep dynamically allocated PartialMapping in a separate map. /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> MapOfPartialMappings; + mutable DenseMap> + MapOfPartialMappings; /// Keep dynamically allocated ValueMapping in a separate map. /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap > MapOfValueMappings; + mutable DenseMap> + MapOfValueMappings; /// Keep dynamically allocated array of ValueMapping in a separate map. /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> MapOfOperandsMappings; + mutable DenseMap> + MapOfOperandsMappings; - /// Create a RegisterBankInfo that can accomodate up to \p NumRegBanks + /// Keep dynamically allocated InstructionMapping in a separate map. + /// This shouldn't be needed when everything gets TableGen'ed. + mutable DenseMap> + MapOfInstructionMappings; + + /// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks /// RegisterBank instances. RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks); @@ -425,14 +433,14 @@ protected: /// register, a register class, or a register bank. /// In other words, this method will likely fail to find a mapping for /// any generic opcode that has not been lowered by target specific code. - InstructionMapping getInstrMappingImpl(const MachineInstr &MI) const; + const InstructionMapping &getInstrMappingImpl(const MachineInstr &MI) const; /// Get the uniquely generated PartialMapping for the /// given arguments. const PartialMapping &getPartialMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const; - /// Methods to get a uniquely generated ValueMapping. + /// \name Methods to get a uniquely generated ValueMapping. /// @{ /// The most common ValueMapping consists of a single PartialMapping. @@ -445,7 +453,7 @@ protected: unsigned NumBreakDowns) const; /// @} - /// Methods to get a uniquely generated array of ValueMapping. + /// \name Methods to get a uniquely generated array of ValueMapping. /// @{ /// Get the uniquely generated array of ValueMapping for the @@ -478,6 +486,33 @@ protected: std::initializer_list OpdsMapping) const; /// @} + /// \name Methods to get a uniquely generated InstructionMapping. + /// @{ + +private: + /// Method to get a uniquely generated InstructionMapping. + const InstructionMapping & + getInstructionMappingImpl(bool IsInvalid, unsigned ID = InvalidMappingID, + unsigned Cost = 0, + const ValueMapping *OperandsMapping = nullptr, + unsigned NumOperands = 0) const; + +public: + /// Method to get a uniquely generated InstructionMapping. + const InstructionMapping & + getInstructionMapping(unsigned ID, unsigned Cost, + const ValueMapping *OperandsMapping, + unsigned NumOperands) const { + return getInstructionMappingImpl(/*IsInvalid*/ false, ID, Cost, + OperandsMapping, NumOperands); + } + + /// Method to get a uniquely generated invalid InstructionMapping. + const InstructionMapping &getInvalidInstructionMapping() const { + return getInstructionMappingImpl(/*IsInvalid*/ true); + } + /// @} + /// Get the register bank for the \p OpIdx-th operand of \p MI form /// the encoding constraints, if any. /// @@ -603,7 +638,8 @@ public: /// /// \note If returnedVal does not verify MI, this would probably mean /// that the target does not support that instruction. - virtual InstructionMapping getInstrMapping(const MachineInstr &MI) const; + virtual const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const; /// Get the alternative mappings for \p MI. /// Alternative in the sense different from getInstrMapping. diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index 52bf965a3cb3f953e4a27783ce388e59f208b083..69d507069808213adfe082e9edfa11e2b4f88f7d 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -30,6 +30,7 @@ class TargetInstrInfo; class TargetPassConfig; class TargetRegisterInfo; class Twine; +class ConstantFP; /// Try to constrain Reg so that it is usable by argument OpIdx of the /// provided MCInstrDesc \p II. If this fails, create a new virtual @@ -60,5 +61,10 @@ void reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, const char *PassName, StringRef Msg, const MachineInstr &MI); +Optional getConstantVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI); +const ConstantFP* getConstantFPVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI); + } // End namespace llvm. #endif diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index ee3fd0bdda2a997aa8ce33bfaef49d13d387f103..bc5d2353f63e31a3655113547d685e4572f9b8aa 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -216,6 +216,9 @@ namespace ISD { /// These nodes take two operands of the same value type, and produce two /// results. The first result is the normal add or sub result, the second /// result is the carry flag result. + /// FIXME: These nodes are deprecated in favor of ADDCARRY and SUBCARRY. + /// They are kept around for now to provide a smooth transition path + /// toward the use of ADDCARRY/SUBCARRY and will eventually be removed. ADDC, SUBC, /// Carry-using nodes for multiple precision addition and subtraction. These @@ -227,6 +230,16 @@ namespace ISD { /// values. ADDE, SUBE, + /// Carry-using nodes for multiple precision addition and subtraction. + /// These nodes take three operands: The first two are the normal lhs and + /// rhs to the add or sub, and the third is a boolean indicating if there + /// is an incoming carry. These nodes produce two results: the normal + /// result of the add or sub, and the output carry so they can be chained + /// together. The use of this opcode is preferable to adde/sube if the + /// target supports it, as the carry is a regular value rather than a + /// glue, which allows further optimisation. + ADDCARRY, SUBCARRY, + /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition. /// These nodes take two operands: the normal LHS and RHS to the add. They /// produce two results: the normal result of the add, and a boolean that @@ -251,6 +264,14 @@ namespace ISD { /// optimized. STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM, + /// Constrained versions of libm-equivalent floating point intrinsics. + /// These will be lowered to the equivalent non-constrained pseudo-op + /// (or expanded to the equivalent library call) before final selection. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS, + STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, + STRICT_FRINT, STRICT_FNEARBYINT, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, @@ -389,12 +410,22 @@ namespace ISD { /// then the result type must also be a vector type. SETCC, - /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but + /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, and /// op #2 is a *carry value*. This operator checks the result of /// "LHS - RHS - Carry", and can be used to compare two wide integers: /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers. + /// FIXME: This node is deprecated in favor of SETCCCARRY. + /// It is kept around for now to provide a smooth transition path + /// toward the use of SETCCCARRY and will eventually be removed. SETCCE, + /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but + /// op #2 is a boolean indicating if there is an incoming carry. This + /// operator checks the result of "LHS - RHS - Carry", and can be used to + /// compare two wide integers: (setcce lhshi rhshi (subc lhslo rhslo) cc). + /// Only valid for integers. + SETCCCARRY, + /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded /// integer shift operations. The operation ordering is: /// [Lo,Hi] = op [LoLHS,HiLHS], Amt @@ -631,6 +662,13 @@ namespace ISD { /// of a call sequence, and carry arbitrary information that target might /// want to know. The first operand is a chain, the rest are specified by /// the target and not touched by the DAG optimizers. + /// Targets that may use stack to pass call arguments define additional + /// operands: + /// - size of the call frame part that must be set up within the + /// CALLSEQ_START..CALLSEQ_END pair, + /// - part of the call frame prepared prior to CALLSEQ_START. + /// Both these parameters must be constants, their sum is the total call + /// frame size. /// CALLSEQ_START..CALLSEQ_END pairs may not be nested. CALLSEQ_START, // Beginning of a call sequence CALLSEQ_END, // End of a call sequence @@ -770,6 +808,20 @@ namespace ISD { /// known nonzero constant. The only operand here is the chain. GET_DYNAMIC_AREA_OFFSET, + /// Generic reduction nodes. These nodes represent horizontal vector + /// reduction operations, producing a scalar result. + /// The STRICT variants perform reductions in sequential order. The first + /// operand is an initial scalar accumulator value, and the second operand + /// is the vector to reduce. + VECREDUCE_STRICT_FADD, VECREDUCE_STRICT_FMUL, + /// These reductions are non-strict, and have a single vector operand. + VECREDUCE_FADD, VECREDUCE_FMUL, + VECREDUCE_ADD, VECREDUCE_MUL, + VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR, + VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN, + /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. + VECREDUCE_FMAX, VECREDUCE_FMIN, + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h index 6c35832f963c7868153d71a70701225f921465d5..79fa12ec2fbbbd95ef16b41c59f172c025ea27a2 100644 --- a/include/llvm/CodeGen/LexicalScopes.h +++ b/include/llvm/CodeGen/LexicalScopes.h @@ -31,12 +31,13 @@ namespace llvm { class MachineBasicBlock; class MachineFunction; class MachineInstr; +class MDNode; //===----------------------------------------------------------------------===// /// InsnRange - This is used to track range of instructions with identical /// lexical scope. /// -typedef std::pair InsnRange; +using InsnRange = std::pair; //===----------------------------------------------------------------------===// /// LexicalScope - This class is used to track scope information. diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index b792cba4b78a5e2dfddda60bbfe8154feb98e73f..f4fa872c7f5bb8a65da474ad2c5b71ae259b3dee 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/LiveInterval.h - Interval representation ---*- C++ -*-===// +//===- llvm/CodeGen/LiveInterval.h - Interval representation ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,22 +21,30 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_H #define LLVM_CODEGEN_LIVEINTERVAL_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/MathExtras.h" +#include #include -#include +#include +#include +#include #include +#include +#include namespace llvm { + class CoalescerPair; class LiveIntervals; - class MachineInstr; class MachineRegisterInfo; - class TargetRegisterInfo; class raw_ostream; - template class SmallPtrSet; /// VNInfo - Value Number Information. /// This class holds information about a machine level values, including @@ -44,7 +52,7 @@ namespace llvm { /// class VNInfo { public: - typedef BumpPtrAllocator Allocator; + using Allocator = BumpPtrAllocator; /// The ID number of this value. unsigned id; @@ -53,14 +61,10 @@ namespace llvm { SlotIndex def; /// VNInfo constructor. - VNInfo(unsigned i, SlotIndex d) - : id(i), def(d) - { } + VNInfo(unsigned i, SlotIndex d) : id(i), def(d) {} /// VNInfo constructor, copies values from orig, except for the value number. - VNInfo(unsigned i, const VNInfo &orig) - : id(i), def(orig.def) - { } + VNInfo(unsigned i, const VNInfo &orig) : id(i), def(orig.def) {} /// Copy from the parameter into this VNInfo. void copyFrom(VNInfo &src) { @@ -152,16 +156,16 @@ namespace llvm { /// segment with a new value number is used. class LiveRange { public: - /// This represents a simple continuous liveness interval for a value. /// The start point is inclusive, the end point exclusive. These intervals /// are rendered as [start,end). struct Segment { SlotIndex start; // Start point of the interval (inclusive) SlotIndex end; // End point of the interval (exclusive) - VNInfo *valno; // identifier for the value contained in this segment. + VNInfo *valno = nullptr; // identifier for the value contained in this + // segment. - Segment() : valno(nullptr) {} + Segment() = default; Segment(SlotIndex S, SlotIndex E, VNInfo *V) : start(S), end(E), valno(V) { @@ -189,8 +193,8 @@ namespace llvm { void dump() const; }; - typedef SmallVector Segments; - typedef SmallVector VNInfoList; + using Segments = SmallVector; + using VNInfoList = SmallVector; Segments segments; // the liveness segments VNInfoList valnos; // value#'s @@ -198,22 +202,24 @@ namespace llvm { // The segment set is used temporarily to accelerate initial computation // of live ranges of physical registers in computeRegUnitRange. // After that the set is flushed to the segment vector and deleted. - typedef std::set SegmentSet; + using SegmentSet = std::set; std::unique_ptr segmentSet; - typedef Segments::iterator iterator; + using iterator = Segments::iterator; + using const_iterator = Segments::const_iterator; + iterator begin() { return segments.begin(); } iterator end() { return segments.end(); } - typedef Segments::const_iterator const_iterator; const_iterator begin() const { return segments.begin(); } const_iterator end() const { return segments.end(); } - typedef VNInfoList::iterator vni_iterator; + using vni_iterator = VNInfoList::iterator; + using const_vni_iterator = VNInfoList::const_iterator; + vni_iterator vni_begin() { return valnos.begin(); } vni_iterator vni_end() { return valnos.end(); } - typedef VNInfoList::const_iterator const_vni_iterator; const_vni_iterator vni_begin() const { return valnos.begin(); } const_vni_iterator vni_end() const { return valnos.end(); } @@ -631,40 +637,37 @@ namespace llvm { /// or stack slot. class LiveInterval : public LiveRange { public: - typedef LiveRange super; + using super = LiveRange; /// A live range for subregisters. The LaneMask specifies which parts of the /// super register are covered by the interval. /// (@sa TargetRegisterInfo::getSubRegIndexLaneMask()). class SubRange : public LiveRange { public: - SubRange *Next; + SubRange *Next = nullptr; LaneBitmask LaneMask; /// Constructs a new SubRange object. - SubRange(LaneBitmask LaneMask) - : Next(nullptr), LaneMask(LaneMask) { - } + SubRange(LaneBitmask LaneMask) : LaneMask(LaneMask) {} /// Constructs a new SubRange object by copying liveness from @p Other. SubRange(LaneBitmask LaneMask, const LiveRange &Other, BumpPtrAllocator &Allocator) - : LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) { - } + : LiveRange(Other, Allocator), LaneMask(LaneMask) {} void print(raw_ostream &OS) const; void dump() const; }; private: - SubRange *SubRanges; ///< Single linked list of subregister live ranges. + SubRange *SubRanges = nullptr; ///< Single linked list of subregister live + /// ranges. public: const unsigned reg; // the register or stack slot of this interval. float weight; // weight of this interval - LiveInterval(unsigned Reg, float Weight) - : SubRanges(nullptr), reg(Reg), weight(Weight) {} + LiveInterval(unsigned Reg, float Weight) : reg(Reg), weight(Weight) {} ~LiveInterval() { clearSubRanges(); @@ -673,8 +676,10 @@ namespace llvm { template class SingleLinkedListIterator { T *P; + public: SingleLinkedListIterator(T *P) : P(P) {} + SingleLinkedListIterator &operator++() { P = P->Next; return *this; @@ -698,7 +703,9 @@ namespace llvm { } }; - typedef SingleLinkedListIterator subrange_iterator; + using subrange_iterator = SingleLinkedListIterator; + using const_subrange_iterator = SingleLinkedListIterator; + subrange_iterator subrange_begin() { return subrange_iterator(SubRanges); } @@ -706,7 +713,6 @@ namespace llvm { return subrange_iterator(nullptr); } - typedef SingleLinkedListIterator const_subrange_iterator; const_subrange_iterator subrange_begin() const { return const_subrange_iterator(SubRanges); } @@ -759,12 +765,12 @@ namespace llvm { /// isSpillable - Can this interval be spilled? bool isSpillable() const { - return weight != llvm::huge_valf; + return weight != huge_valf; } /// markNotSpillable - Mark interval as not spillable void markNotSpillable() { - weight = llvm::huge_valf; + weight = huge_valf; } /// For a given lane mask @p LaneMask, compute indexes at which the @@ -931,5 +937,7 @@ namespace llvm { void Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineRegisterInfo &MRI); }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEINTERVAL_H diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index f5b1f87720ad3d549e6db65b948419e89f98fd97..820e883624837b439893185fb571bb07314f88d7 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -1,4 +1,4 @@ -//===-- LiveIntervalAnalysis.h - Live Interval Analysis ---------*- C++ -*-===// +//===- LiveIntervalAnalysis.h - Live Interval Analysis ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #ifndef LLVM_CODEGEN_LIVEINTERVALANALYSIS_H #define LLVM_CODEGEN_LIVEINTERVALANALYSIS_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -27,27 +28,29 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Support/Allocator.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetRegisterInfo.h" -#include +#include +#include +#include namespace llvm { extern cl::opt UseSegmentSetForPhysRegs; - class BitVector; - class BlockFrequency; - class LiveRangeCalc; - class LiveVariables; - class MachineDominatorTree; - class MachineLoopInfo; - class TargetRegisterInfo; - class MachineRegisterInfo; - class TargetInstrInfo; - class TargetRegisterClass; - class VirtRegMap; - class MachineBlockFrequencyInfo; +class BitVector; +class LiveRangeCalc; +class MachineBlockFrequencyInfo; +class MachineDominatorTree; +class MachineFunction; +class MachineInstr; +class MachineRegisterInfo; +class raw_ostream; +class TargetInstrInfo; +class VirtRegMap; class LiveIntervals : public MachineFunctionPass { MachineFunction* MF; @@ -56,8 +59,8 @@ extern cl::opt UseSegmentSetForPhysRegs; const TargetInstrInfo* TII; AliasAnalysis *AA; SlotIndexes* Indexes; - MachineDominatorTree *DomTree; - LiveRangeCalc *LRCalc; + MachineDominatorTree *DomTree = nullptr; + LiveRangeCalc *LRCalc = nullptr; /// Special pool allocator for VNInfo's (LiveInterval val#). VNInfo::Allocator VNInfoAllocator; @@ -95,6 +98,7 @@ extern cl::opt UseSegmentSetForPhysRegs; public: static char ID; + LiveIntervals(); ~LiveIntervals() override; @@ -189,7 +193,7 @@ extern cl::opt UseSegmentSetForPhysRegs; void pruneValue(LiveRange &LR, SlotIndex Kill, SmallVectorImpl *EndPoints); - /// This function should be used. Its intend is to tell you that + /// This function should not be used. Its intend is to tell you that /// you are doing something wrong if you call pruveValue directly on a /// LiveInterval. Indeed, you are supposed to call pruneValue on the main /// LiveRange and all the LiveRange of the subranges if any. @@ -466,6 +470,7 @@ extern cl::opt UseSegmentSetForPhysRegs; class HMEditor; }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEINTERVALANALYSIS_H diff --git a/include/llvm/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h index 57e3deb038af3b029111c6e1a1beb243aa32a650..b922e543c8560375a27b5a79752753fa931decf6 100644 --- a/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/include/llvm/CodeGen/LiveIntervalUnion.h @@ -26,12 +26,14 @@ namespace llvm { +class raw_ostream; class TargetRegisterInfo; #ifndef NDEBUG // forward declaration template class SparseBitVector; -typedef SparseBitVector<128> LiveVirtRegBitSet; + +using LiveVirtRegBitSet = SparseBitVector<128>; #endif /// Union of live intervals that are strong candidates for coalescing into a @@ -42,19 +44,19 @@ class LiveIntervalUnion { // A set of live virtual register segments that supports fast insertion, // intersection, and removal. // Mapping SlotIndex intervals to virtual register numbers. - typedef IntervalMap LiveSegments; + using LiveSegments = IntervalMap; public: // SegmentIter can advance to the next segment ordered by starting position // which may belong to a different live virtual register. We also must be able // to reach the current segment's containing virtual register. - typedef LiveSegments::iterator SegmentIter; + using SegmentIter = LiveSegments::iterator; /// Const version of SegmentIter. - typedef LiveSegments::const_iterator ConstSegmentIter; + using ConstSegmentIter = LiveSegments::const_iterator; // LiveIntervalUnions share an external allocator. - typedef LiveSegments::Allocator Allocator; + using Allocator = LiveSegments::Allocator; private: unsigned Tag = 0; // unique tag for current contents. @@ -76,7 +78,7 @@ public: SlotIndex startIndex() const { return Segments.start(); } // Provide public access to the underlying map to allow overlap iteration. - typedef LiveSegments Map; + using Map = LiveSegments; const Map &getMap() const { return Segments; } /// getTag - Return an opaque tag representing the current state of the union. diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h index 9e04c467fadc06479dd2f725b6fc22b6dcd239d6..f9c741dd75b2d586679da0cefc751b8a74144a83 100644 --- a/include/llvm/CodeGen/LivePhysRegs.h +++ b/include/llvm/CodeGen/LivePhysRegs.h @@ -7,23 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file implements the LivePhysRegs utility for tracking liveness of -// physical registers. This can be used for ad-hoc liveness tracking after -// register allocation. You can start with the live-ins/live-outs at the -// beginning/end of a block and update the information while walking the -// instructions inside the block. This implementation tracks the liveness on a -// sub-register granularity. -// -// We assume that the high bits of a physical super-register are not preserved -// unless the instruction has an implicit-use operand reading the super- -// register. -// -// X86 Example: -// %YMM0 = ... -// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) -// -// %YMM0 = ... -// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) +/// \file +/// This file implements the LivePhysRegs utility for tracking liveness of +/// physical registers. This can be used for ad-hoc liveness tracking after +/// register allocation. You can start with the live-ins/live-outs at the +/// beginning/end of a block and update the information while walking the +/// instructions inside the block. This implementation tracks the liveness on a +/// sub-register granularity. +/// +/// We assume that the high bits of a physical super-register are not preserved +/// unless the instruction has an implicit-use operand reading the super- +/// register. +/// +/// X86 Example: +/// %YMM0 = ... +/// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) +/// +/// %YMM0 = ... +/// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H @@ -39,40 +40,42 @@ namespace llvm { class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; +class raw_ostream; -/// \brief A set of live physical registers with functions to track liveness +/// \brief A set of physical registers with utility functions to track liveness /// when walking backward/forward through a basic block. class LivePhysRegs { const TargetRegisterInfo *TRI = nullptr; SparseSet LiveRegs; - LivePhysRegs(const LivePhysRegs&) = delete; - LivePhysRegs &operator=(const LivePhysRegs&) = delete; - public: - /// \brief Constructs a new empty LivePhysRegs set. + /// Constructs an unitialized set. init() needs to be called to initialize it. LivePhysRegs() = default; - /// \brief Constructs and initialize an empty LivePhysRegs set. - LivePhysRegs(const TargetRegisterInfo *TRI) : TRI(TRI) { - assert(TRI && "Invalid TargetRegisterInfo pointer."); - LiveRegs.setUniverse(TRI->getNumRegs()); + /// Constructs and initializes an empty set. + LivePhysRegs(const TargetRegisterInfo &TRI) : TRI(&TRI) { + LiveRegs.setUniverse(TRI.getNumRegs()); } - /// \brief Clear and initialize the LivePhysRegs set. + LivePhysRegs(const LivePhysRegs&) = delete; + LivePhysRegs &operator=(const LivePhysRegs&) = delete; + + /// (re-)initializes and clears the set. void init(const TargetRegisterInfo &TRI) { this->TRI = &TRI; LiveRegs.clear(); LiveRegs.setUniverse(TRI.getNumRegs()); } - /// \brief Clears the LivePhysRegs set. + /// Clears the set. void clear() { LiveRegs.clear(); } - /// \brief Returns true if the set is empty. + /// Returns true if the set is empty. bool empty() const { return LiveRegs.empty(); } - /// \brief Adds a physical register and all its sub-registers to the set. + /// Adds a physical register and all its sub-registers to the set. void addReg(unsigned Reg) { assert(TRI && "LivePhysRegs is not initialized."); assert(Reg <= TRI->getNumRegs() && "Expected a physical register."); @@ -90,12 +93,13 @@ public: LiveRegs.erase(*R); } - /// \brief Removes physical registers clobbered by the regmask operand @p MO. + /// Removes physical registers clobbered by the regmask operand \p MO. void removeRegsInMask(const MachineOperand &MO, - SmallVectorImpl> *Clobbers); + SmallVectorImpl> *Clobbers = + nullptr); - /// \brief Returns true if register @p Reg is contained in the set. This also - /// works if only the super register of @p Reg has been defined, because + /// \brief Returns true if register \p Reg is contained in the set. This also + /// works if only the super register of \p Reg has been defined, because /// addReg() always adds all sub-registers to the set as well. /// Note: Returns false if just some sub registers are live, use available() /// when searching a free register. @@ -104,48 +108,48 @@ public: /// Returns true if register \p Reg and no aliasing register is in the set. bool available(const MachineRegisterInfo &MRI, unsigned Reg) const; - /// \brief Simulates liveness when stepping backwards over an - /// instruction(bundle): Remove Defs, add uses. This is the recommended way of - /// calculating liveness. + /// Simulates liveness when stepping backwards over an instruction(bundle). + /// Remove Defs, add uses. This is the recommended way of calculating + /// liveness. void stepBackward(const MachineInstr &MI); - /// \brief Simulates liveness when stepping forward over an - /// instruction(bundle): Remove killed-uses, add defs. This is the not - /// recommended way, because it depends on accurate kill flags. If possible - /// use stepBackward() instead of this function. - /// The clobbers set will be the list of registers either defined or clobbered - /// by a regmask. The operand will identify whether this is a regmask or - /// register operand. + /// Simulates liveness when stepping forward over an instruction(bundle). + /// Remove killed-uses, add defs. This is the not recommended way, because it + /// depends on accurate kill flags. If possible use stepBackward() instead of + /// this function. The clobbers set will be the list of registers either + /// defined or clobbered by a regmask. The operand will identify whether this + /// is a regmask or register operand. void stepForward(const MachineInstr &MI, SmallVectorImpl> &Clobbers); - /// Adds all live-in registers of basic block @p MBB. + /// Adds all live-in registers of basic block \p MBB. /// Live in registers are the registers in the blocks live-in list and the /// pristine registers. void addLiveIns(const MachineBasicBlock &MBB); - /// Adds all live-out registers of basic block @p MBB. + /// Adds all live-out registers of basic block \p MBB. /// Live out registers are the union of the live-in registers of the successor /// blocks and pristine registers. Live out registers of the end block are the /// callee saved registers. void addLiveOuts(const MachineBasicBlock &MBB); - /// Like addLiveOuts() but does not add pristine registers/callee saved + /// Adds all live-out registers of basic block \p MBB but skips pristine /// registers. void addLiveOutsNoPristines(const MachineBasicBlock &MBB); - typedef SparseSet::const_iterator const_iterator; + using const_iterator = SparseSet::const_iterator; + const_iterator begin() const { return LiveRegs.begin(); } const_iterator end() const { return LiveRegs.end(); } - /// \brief Prints the currently live registers to @p OS. + /// Prints the currently live registers to \p OS. void print(raw_ostream &OS) const; - /// \brief Dumps the currently live registers to the debug output. + /// Dumps the currently live registers to the debug output. void dump() const; private: - /// Adds live-in registers from basic block @p MBB, taking associated + /// \brief Adds live-in registers from basic block \p MBB, taking associated /// lane masks into consideration. void addBlockLiveIns(const MachineBasicBlock &MBB); }; @@ -155,11 +159,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) { return OS; } -/// Compute the live-in list for \p MBB assuming all of its successors live-in -/// lists are up-to-date. Uses the given LivePhysReg instance \p LiveRegs; This -/// is just here to avoid repeated heap allocations when calling this multiple -/// times in a pass. -void computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, +/// \brief Computes the live-in list for \p MBB assuming all of its successors +/// live-in lists are up-to-date. Uses the given LivePhysReg instance \p +/// LiveRegs; This is just here to avoid repeated heap allocations when calling +/// this multiple times in a pass. +void computeLiveIns(LivePhysRegs &LiveRegs, const MachineRegisterInfo &MRI, MachineBasicBlock &MBB); } // end namespace llvm diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h index 4250777682ba516994bce094cf3f620c4917cf21..362d9854a271a3fdcf5c1d4215954d96c9c00dc7 100644 --- a/include/llvm/CodeGen/LiveRangeEdit.h +++ b/include/llvm/CodeGen/LiveRangeEdit.h @@ -1,4 +1,4 @@ -//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===// +//===- LiveRangeEdit.h - Basic tools for split and spill --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,19 +19,28 @@ #define LLVM_CODEGEN_LIVERANGEEDIT_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include namespace llvm { class LiveIntervals; class MachineBlockFrequencyInfo; +class MachineInstr; class MachineLoopInfo; +class MachineOperand; +class TargetInstrInfo; +class TargetRegisterInfo; class VirtRegMap; class LiveRangeEdit : private MachineRegisterInfo::Delegate { @@ -39,7 +48,10 @@ public: /// Callback methods for LiveRangeEdit owners. class Delegate { virtual void anchor(); + public: + virtual ~Delegate() = default; + /// Called immediately before erasing a dead machine instruction. virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} @@ -53,8 +65,6 @@ public: /// Called after cloning a virtual register. /// This is used for new registers representing connected components of Old. virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} - - virtual ~Delegate() {} }; private: @@ -70,7 +80,7 @@ private: const unsigned FirstNew; /// ScannedRemattable - true when remattable values have been identified. - bool ScannedRemattable; + bool ScannedRemattable = false; /// DeadRemats - The saved instructions which have already been dead after /// rematerialization but not deleted yet -- to be done in postOptimization. @@ -78,11 +88,11 @@ private: /// Remattable - Values defined by remattable instructions as identified by /// tii.isTriviallyReMaterializable(). - SmallPtrSet Remattable; + SmallPtrSet Remattable; /// Rematted - Values that were actually rematted, and so need to have their /// live range trimmed or entirely removed. - SmallPtrSet Rematted; + SmallPtrSet Rematted; /// scanRemattable - Identify the Parent values that may rematerialize. void scanRemattable(AliasAnalysis *aa); @@ -94,11 +104,11 @@ private: /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. - bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); + bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); + + using ToShrinkSet = SetVector, + SmallPtrSet>; - typedef SetVector, - SmallPtrSet > ToShrinkSet; /// Helper for eliminateDeadDefs. void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, AliasAnalysis *AA); @@ -129,26 +139,26 @@ public: SmallPtrSet *deadRemats = nullptr) : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis), VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate), - FirstNew(newRegs.size()), ScannedRemattable(false), - DeadRemats(deadRemats) { + FirstNew(newRegs.size()), DeadRemats(deadRemats) { MRI.setDelegate(this); } ~LiveRangeEdit() override { MRI.resetDelegate(this); } LiveInterval &getParent() const { - assert(Parent && "No parent LiveInterval"); - return *Parent; + assert(Parent && "No parent LiveInterval"); + return *Parent; } + unsigned getReg() const { return getParent().reg; } /// Iterator for accessing the new registers added by this edit. - typedef SmallVectorImpl::const_iterator iterator; - iterator begin() const { return NewRegs.begin()+FirstNew; } + using iterator = SmallVectorImpl::const_iterator; + iterator begin() const { return NewRegs.begin() + FirstNew; } iterator end() const { return NewRegs.end(); } - unsigned size() const { return NewRegs.size()-FirstNew; } + unsigned size() const { return NewRegs.size() - FirstNew; } bool empty() const { return size() == 0; } - unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; } + unsigned get(unsigned idx) const { return NewRegs[idx + FirstNew]; } /// pop_back - It allows LiveRangeEdit users to drop new registers. /// The context is when an original def instruction of a register is @@ -176,26 +186,25 @@ public: return createEmptyIntervalFrom(getReg()); } - unsigned create() { - return createFrom(getReg()); - } + unsigned create() { return createFrom(getReg()); } /// anyRematerializable - Return true if any parent values may be /// rematerializable. /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(AliasAnalysis*); + bool anyRematerializable(AliasAnalysis *); /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - AliasAnalysis*); + AliasAnalysis *); /// Remat - Information needed to rematerialize at a specific location. struct Remat { - VNInfo *ParentVNI; // parent_'s value at the remat location. - MachineInstr *OrigMI; // Instruction defining OrigVNI. It contains the - // real expr for remat. - explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {} + VNInfo *ParentVNI; // parent_'s value at the remat location. + MachineInstr *OrigMI = nullptr; // Instruction defining OrigVNI. It contains + // the real expr for remat. + + explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {} }; /// canRematerializeAt - Determine if ParentVNI can be rematerialized at @@ -209,10 +218,8 @@ public: /// liveness is not updated. /// Return the SlotIndex of the new instruction. SlotIndex rematerializeAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, - const Remat &RM, - const TargetRegisterInfo&, + MachineBasicBlock::iterator MI, unsigned DestReg, + const Remat &RM, const TargetRegisterInfo &, bool Late = false); /// markRematerialized - explicitly mark a value as rematerialized after doing @@ -248,11 +255,10 @@ public: /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. - void calculateRegClassAndHint(MachineFunction&, - const MachineLoopInfo&, - const MachineBlockFrequencyInfo&); + void calculateRegClassAndHint(MachineFunction &, const MachineLoopInfo &, + const MachineBlockFrequencyInfo &); }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_LIVERANGEEDIT_H diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index 5de76c8b87bf75bf2c29a6bfb437af4d48601824..fa1ec867ea3dc418c79e4a671704677d6ebd5f83 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -16,9 +16,9 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include namespace llvm { diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h index 3ffbe3d775b42ebbb6828d3383c64fcf1c101d91..c90ae7b184f4eb4071f9181c5c64804e99dbd531 100644 --- a/include/llvm/CodeGen/LiveStackAnalysis.h +++ b/include/llvm/CodeGen/LiveStackAnalysis.h @@ -1,4 +1,4 @@ -//===-- LiveStackAnalysis.h - Live Stack Slot Analysis ----------*- C++ -*-===// +//===- LiveStackAnalysis.h - Live Stack Slot Analysis -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,13 +18,16 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Pass.h" +#include #include #include namespace llvm { +class TargetRegisterClass; +class TargetRegisterInfo; + class LiveStacks : public MachineFunctionPass { const TargetRegisterInfo *TRI; @@ -33,8 +36,7 @@ class LiveStacks : public MachineFunctionPass { VNInfo::Allocator VNInfoAllocator; /// S2IMap - Stack slot indices to live interval mapping. - /// - typedef std::unordered_map SS2IntervalMap; + using SS2IntervalMap = std::unordered_map; SS2IntervalMap S2IMap; /// S2RCMap - Stack slot indices to register class mapping. @@ -42,12 +44,14 @@ class LiveStacks : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid + LiveStacks() : MachineFunctionPass(ID) { initializeLiveStacksPass(*PassRegistry::getPassRegistry()); } - typedef SS2IntervalMap::iterator iterator; - typedef SS2IntervalMap::const_iterator const_iterator; + using iterator = SS2IntervalMap::iterator; + using const_iterator = SS2IntervalMap::const_iterator; + const_iterator begin() const { return S2IMap.begin(); } const_iterator end() const { return S2IMap.end(); } iterator begin() { return S2IMap.begin(); } @@ -93,6 +97,7 @@ public: /// print - Implement the dump method. void print(raw_ostream &O, const Module * = nullptr) const override; }; -} -#endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */ +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVESTACK_ANALYSIS_H diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h index dd0780397f4293db4301a92eee9c02079fd06b2b..b631a8c0122a290f4ce30572d6e28d4394e23879 100644 --- a/include/llvm/CodeGen/MIRParser/MIRParser.h +++ b/include/llvm/CodeGen/MIRParser/MIRParser.h @@ -18,7 +18,6 @@ #ifndef LLVM_CODEGEN_MIRPARSER_MIRPARSER_H #define LLVM_CODEGEN_MIRPARSER_MIRPARSER_H -#include "llvm/CodeGen/MachineFunctionInitializer.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include @@ -27,29 +26,30 @@ namespace llvm { class StringRef; class MIRParserImpl; +class MachineModuleInfo; class SMDiagnostic; /// This class initializes machine functions by applying the state loaded from /// a MIR file. -class MIRParser : public MachineFunctionInitializer { +class MIRParser { std::unique_ptr Impl; public: MIRParser(std::unique_ptr Impl); MIRParser(const MIRParser &) = delete; - ~MIRParser() override; + ~MIRParser(); - /// Parse the optional LLVM IR module that's embedded in the MIR file. + /// Parses the optional LLVM IR module in the MIR file. /// /// A new, empty module is created if the LLVM IR isn't present. - /// Returns null if a parsing error occurred. - std::unique_ptr parseLLVMModule(); + /// \returns nullptr if a parsing error occurred. + std::unique_ptr parseIRModule(); - /// Initialize the machine function to the state that's described in the MIR - /// file. + /// \brief Parses MachineFunctions in the MIR file and add them to the given + /// MachineModuleInfo \p MMI. /// - /// Return true if error occurred. - bool initializeMachineFunction(MachineFunction &MF) override; + /// \returns true if an error occurred. + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); }; /// This function is the main interface to the MIR serialization format parser. diff --git a/lib/CodeGen/MIRPrinter.h b/include/llvm/CodeGen/MIRPrinter.h similarity index 58% rename from lib/CodeGen/MIRPrinter.h rename to include/llvm/CodeGen/MIRPrinter.h index 16aa9038b6b2ef9d9d2d23730f632b2eb198d825..c73adc3f2b114594b52a35b3d2c9dd6f3776af31 100644 --- a/lib/CodeGen/MIRPrinter.h +++ b/include/llvm/CodeGen/MIRPrinter.h @@ -17,9 +17,11 @@ namespace llvm { +class MachineBasicBlock; class MachineFunction; class Module; class raw_ostream; +template class SmallVectorImpl; /// Print LLVM IR using the MIR serialization format to the given output stream. void printMIR(raw_ostream &OS, const Module &M); @@ -28,6 +30,17 @@ void printMIR(raw_ostream &OS, const Module &M); /// output stream. void printMIR(raw_ostream &OS, const MachineFunction &MF); +/// Determine a possible list of successors of a basic block based on the +/// basic block machine operand being used inside the block. This should give +/// you the correct list of successor blocks in most cases except for things +/// like jump tables where the basic block references can't easily be found. +/// The MIRPRinter will skip printing successors if they match the result of +/// this funciton and the parser will use this function to construct a list if +/// it is missing. +void guessSuccessors(const MachineBasicBlock &MBB, + SmallVectorImpl &Successors, + bool &IsFallthrough); + } // end namespace llvm #endif diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 38cf8aa165a457104f9b522218470727096bae4b..1b1ba6a05837ca79f7ce6c40a6ee076bb382445e 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -72,6 +72,9 @@ template <> struct ScalarTraits { struct BlockStringValue { StringValue Value; + bool operator==(const BlockStringValue &Other) const { + return Value == Other.Value; + } }; template <> struct BlockScalarTraits { @@ -146,6 +149,10 @@ struct VirtualRegisterDefinition { StringValue Class; StringValue PreferredRegister; // TODO: Serialize the target specific register hints. + bool operator==(const VirtualRegisterDefinition &Other) const { + return ID == Other.ID && Class == Other.Class && + PreferredRegister == Other.PreferredRegister; + } }; template <> struct MappingTraits { @@ -162,6 +169,10 @@ template <> struct MappingTraits { struct MachineFunctionLiveIn { StringValue Register; StringValue VirtualRegister; + bool operator==(const MachineFunctionLiveIn &Other) const { + return Register == Other.Register && + VirtualRegister == Other.VirtualRegister; + } }; template <> struct MappingTraits { @@ -196,6 +207,14 @@ struct MachineStackObject { StringValue DebugVar; StringValue DebugExpr; StringValue DebugLoc; + bool operator==(const MachineStackObject &Other) const { + return ID == Other.ID && Name == Other.Name && Type == Other.Type && + Offset == Other.Offset && Size == Other.Size && + Alignment == Other.Alignment && + CalleeSavedRegister == Other.CalleeSavedRegister && + LocalOffset == Other.LocalOffset && DebugVar == Other.DebugVar && + DebugExpr == Other.DebugExpr && DebugLoc == Other.DebugLoc; + } }; template <> struct ScalarEnumerationTraits { @@ -214,13 +233,13 @@ template <> struct MappingTraits { YamlIO.mapOptional( "type", Object.Type, MachineStackObject::DefaultType); // Don't print the default type. - YamlIO.mapOptional("offset", Object.Offset); + YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); if (Object.Type != MachineStackObject::VariableSized) YamlIO.mapRequired("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment); + YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("local-offset", Object.LocalOffset); + YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional()); YamlIO.mapOptional("di-variable", Object.DebugVar, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("di-expression", Object.DebugExpr, @@ -244,6 +263,12 @@ struct FixedMachineStackObject { bool IsImmutable = false; bool IsAliased = false; StringValue CalleeSavedRegister; + bool operator==(const FixedMachineStackObject &Other) const { + return ID == Other.ID && Type == Other.Type && Offset == Other.Offset && + Size == Other.Size && Alignment == Other.Alignment && + IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased && + CalleeSavedRegister == Other.CalleeSavedRegister; + } }; template <> @@ -261,12 +286,12 @@ template <> struct MappingTraits { YamlIO.mapOptional( "type", Object.Type, FixedMachineStackObject::DefaultType); // Don't print the default type. - YamlIO.mapOptional("offset", Object.Offset); - YamlIO.mapOptional("size", Object.Size); - YamlIO.mapOptional("alignment", Object.Alignment); + YamlIO.mapOptional("offset", Object.Offset, (int64_t)0); + YamlIO.mapOptional("size", Object.Size, (uint64_t)0); + YamlIO.mapOptional("alignment", Object.Alignment, (unsigned)0); if (Object.Type != FixedMachineStackObject::SpillSlot) { - YamlIO.mapOptional("isImmutable", Object.IsImmutable); - YamlIO.mapOptional("isAliased", Object.IsAliased); + YamlIO.mapOptional("isImmutable", Object.IsImmutable, false); + YamlIO.mapOptional("isAliased", Object.IsAliased, false); } YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. @@ -279,13 +304,17 @@ struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; unsigned Alignment = 0; + bool operator==(const MachineConstantPoolValue &Other) const { + return ID == Other.ID && Value == Other.Value && + Alignment == Other.Alignment; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) { YamlIO.mapRequired("id", Constant.ID); - YamlIO.mapOptional("value", Constant.Value); - YamlIO.mapOptional("alignment", Constant.Alignment); + YamlIO.mapOptional("value", Constant.Value, StringValue()); + YamlIO.mapOptional("alignment", Constant.Alignment, (unsigned)0); } }; @@ -293,16 +322,22 @@ struct MachineJumpTable { struct Entry { UnsignedValue ID; std::vector Blocks; + bool operator==(const Entry &Other) const { + return ID == Other.ID && Blocks == Other.Blocks; + } }; MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32; std::vector Entries; + bool operator==(const MachineJumpTable &Other) const { + return Kind == Other.Kind && Entries == Other.Entries; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineJumpTable::Entry &Entry) { YamlIO.mapRequired("id", Entry.ID); - YamlIO.mapOptional("blocks", Entry.Blocks); + YamlIO.mapOptional("blocks", Entry.Blocks, std::vector()); } }; @@ -322,7 +357,8 @@ namespace yaml { template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineJumpTable &JT) { YamlIO.mapRequired("kind", JT.Kind); - YamlIO.mapOptional("entries", JT.Entries); + YamlIO.mapOptional("entries", JT.Entries, + std::vector()); } }; @@ -345,31 +381,49 @@ struct MachineFrameInfo { bool HasCalls = false; StringValue StackProtector; // TODO: Serialize FunctionContextIdx - unsigned MaxCallFrameSize = 0; + unsigned MaxCallFrameSize = ~0u; ///< ~0u means: not computed yet. bool HasOpaqueSPAdjustment = false; bool HasVAStart = false; bool HasMustTailInVarArgFunc = false; StringValue SavePoint; StringValue RestorePoint; + bool operator==(const MachineFrameInfo &Other) const { + return IsFrameAddressTaken == Other.IsFrameAddressTaken && + IsReturnAddressTaken == Other.IsReturnAddressTaken && + HasStackMap == Other.HasStackMap && + HasPatchPoint == Other.HasPatchPoint && + StackSize == Other.StackSize && + OffsetAdjustment == Other.OffsetAdjustment && + MaxAlignment == Other.MaxAlignment && + AdjustsStack == Other.AdjustsStack && HasCalls == Other.HasCalls && + StackProtector == Other.StackProtector && + MaxCallFrameSize == Other.MaxCallFrameSize && + HasOpaqueSPAdjustment == Other.HasOpaqueSPAdjustment && + HasVAStart == Other.HasVAStart && + HasMustTailInVarArgFunc == Other.HasMustTailInVarArgFunc && + SavePoint == Other.SavePoint && RestorePoint == Other.RestorePoint; + } }; template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFrameInfo &MFI) { - YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken); - YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken); - YamlIO.mapOptional("hasStackMap", MFI.HasStackMap); - YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint); - YamlIO.mapOptional("stackSize", MFI.StackSize); - YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment); - YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment); - YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack); - YamlIO.mapOptional("hasCalls", MFI.HasCalls); + YamlIO.mapOptional("isFrameAddressTaken", MFI.IsFrameAddressTaken, false); + YamlIO.mapOptional("isReturnAddressTaken", MFI.IsReturnAddressTaken, false); + YamlIO.mapOptional("hasStackMap", MFI.HasStackMap, false); + YamlIO.mapOptional("hasPatchPoint", MFI.HasPatchPoint, false); + YamlIO.mapOptional("stackSize", MFI.StackSize, (uint64_t)0); + YamlIO.mapOptional("offsetAdjustment", MFI.OffsetAdjustment, (int)0); + YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment, (unsigned)0); + YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack, false); + YamlIO.mapOptional("hasCalls", MFI.HasCalls, false); YamlIO.mapOptional("stackProtector", MFI.StackProtector, StringValue()); // Don't print it out when it's empty. - YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize); - YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment); - YamlIO.mapOptional("hasVAStart", MFI.HasVAStart); - YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc); + YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize, (unsigned)~0); + YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment, + false); + YamlIO.mapOptional("hasVAStart", MFI.HasVAStart, false); + YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc, + false); YamlIO.mapOptional("savePoint", MFI.SavePoint, StringValue()); // Don't print it out when it's empty. YamlIO.mapOptional("restorePoint", MFI.RestorePoint, @@ -381,7 +435,6 @@ struct MachineFunction { StringRef Name; unsigned Alignment = 0; bool ExposesReturnsTwice = false; - bool NoVRegs; // GISel MachineFunctionProperties. bool Legalized = false; bool RegBankSelected = false; @@ -404,23 +457,28 @@ struct MachineFunction { template <> struct MappingTraits { static void mapping(IO &YamlIO, MachineFunction &MF) { YamlIO.mapRequired("name", MF.Name); - YamlIO.mapOptional("alignment", MF.Alignment); - YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice); - YamlIO.mapOptional("noVRegs", MF.NoVRegs); - YamlIO.mapOptional("legalized", MF.Legalized); - YamlIO.mapOptional("regBankSelected", MF.RegBankSelected); - YamlIO.mapOptional("selected", MF.Selected); - YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness); - YamlIO.mapOptional("registers", MF.VirtualRegisters); - YamlIO.mapOptional("liveins", MF.LiveIns); - YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters); - YamlIO.mapOptional("frameInfo", MF.FrameInfo); - YamlIO.mapOptional("fixedStack", MF.FixedStackObjects); - YamlIO.mapOptional("stack", MF.StackObjects); - YamlIO.mapOptional("constants", MF.Constants); + YamlIO.mapOptional("alignment", MF.Alignment, (unsigned)0); + YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false); + YamlIO.mapOptional("legalized", MF.Legalized, false); + YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); + YamlIO.mapOptional("selected", MF.Selected, false); + YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false); + YamlIO.mapOptional("registers", MF.VirtualRegisters, + std::vector()); + YamlIO.mapOptional("liveins", MF.LiveIns, + std::vector()); + YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters, + Optional>()); + YamlIO.mapOptional("frameInfo", MF.FrameInfo, MachineFrameInfo()); + YamlIO.mapOptional("fixedStack", MF.FixedStackObjects, + std::vector()); + YamlIO.mapOptional("stack", MF.StackObjects, + std::vector()); + YamlIO.mapOptional("constants", MF.Constants, + std::vector()); if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty()) - YamlIO.mapOptional("jumpTable", MF.JumpTableInfo); - YamlIO.mapOptional("body", MF.Body); + YamlIO.mapOptional("jumpTable", MF.JumpTableInfo, MachineJumpTable()); + YamlIO.mapOptional("body", MF.Body, BlockStringValue()); } }; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 18d40564856d56ca31aa29fbae648789a741ad72..051908c40df719a08a16be3b064980feaecca296 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineBasicBlock.h ------------------------*- C++ -*-===// +//===- llvm/CodeGen/MachineBasicBlock.h -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,41 +15,50 @@ #define LLVM_CODEGEN_MACHINEBASICBLOCK_H #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/ADT/simple_ilist.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Support/BranchProbability.h" +#include "llvm/CodeGen/MachineInstrBundleIterator.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/Support/BranchProbability.h" +#include +#include #include +#include +#include +#include namespace llvm { -class Pass; class BasicBlock; class MachineFunction; class MCSymbol; -class MIPrinter; +class ModuleSlotTracker; +class Pass; class SlotIndexes; class StringRef; class raw_ostream; -class MachineBranchProbabilityInfo; +class TargetRegisterClass; +class TargetRegisterInfo; template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. + MachineBasicBlock *Parent; - typedef simple_ilist>::iterator - instr_iterator; + using instr_iterator = + simple_ilist>::iterator; public: void addNodeToList(MachineInstr *N); void removeNodeFromList(MachineInstr *N); void transferNodesFromList(ilist_traits &OldList, instr_iterator First, instr_iterator Last); - void deleteNode(MachineInstr *MI); }; @@ -69,7 +78,8 @@ public: }; private: - typedef ilist> Instructions; + using Instructions = ilist>; + Instructions Insts; const BasicBlock *BB; int Number; @@ -83,12 +93,12 @@ private: /// same order as Successors, or it is empty if we don't use it (disable /// optimization). std::vector Probs; - typedef std::vector::iterator probability_iterator; - typedef std::vector::const_iterator - const_probability_iterator; + using probability_iterator = std::vector::iterator; + using const_probability_iterator = + std::vector::const_iterator; /// Keep track of the physical registers that are livein of the basicblock. - typedef std::vector LiveInVector; + using LiveInVector = std::vector; LiveInVector LiveIns; /// Alignment of the basic block. Zero if the basic block does not need to be @@ -113,7 +123,7 @@ private: mutable MCSymbol *CachedMCSymbol = nullptr; // Intrusive list support - MachineBasicBlock() {} + MachineBasicBlock() = default; explicit MachineBasicBlock(MachineFunction &MF, const BasicBlock *BB); @@ -145,16 +155,16 @@ public: const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } - typedef Instructions::iterator instr_iterator; - typedef Instructions::const_iterator const_instr_iterator; - typedef Instructions::reverse_iterator reverse_instr_iterator; - typedef Instructions::const_reverse_iterator const_reverse_instr_iterator; + using instr_iterator = Instructions::iterator; + using const_instr_iterator = Instructions::const_iterator; + using reverse_instr_iterator = Instructions::reverse_iterator; + using const_reverse_instr_iterator = Instructions::const_reverse_iterator; - typedef MachineInstrBundleIterator iterator; - typedef MachineInstrBundleIterator const_iterator; - typedef MachineInstrBundleIterator reverse_iterator; - typedef MachineInstrBundleIterator - const_reverse_iterator; + using iterator = MachineInstrBundleIterator; + using const_iterator = MachineInstrBundleIterator; + using reverse_iterator = MachineInstrBundleIterator; + using const_reverse_iterator = + MachineInstrBundleIterator; unsigned size() const { return (unsigned)Insts.size(); } bool empty() const { return Insts.empty(); } @@ -178,8 +188,8 @@ public: reverse_instr_iterator instr_rend () { return Insts.rend(); } const_reverse_instr_iterator instr_rend () const { return Insts.rend(); } - typedef iterator_range instr_range; - typedef iterator_range const_instr_range; + using instr_range = iterator_range; + using const_instr_range = iterator_range; instr_range instrs() { return instr_range(instr_begin(), instr_end()); } const_instr_range instrs() const { return const_instr_range(instr_begin(), instr_end()); @@ -213,18 +223,18 @@ public: } // Machine-CFG iterators - typedef std::vector::iterator pred_iterator; - typedef std::vector::const_iterator const_pred_iterator; - typedef std::vector::iterator succ_iterator; - typedef std::vector::const_iterator const_succ_iterator; - typedef std::vector::reverse_iterator - pred_reverse_iterator; - typedef std::vector::const_reverse_iterator - const_pred_reverse_iterator; - typedef std::vector::reverse_iterator - succ_reverse_iterator; - typedef std::vector::const_reverse_iterator - const_succ_reverse_iterator; + using pred_iterator = std::vector::iterator; + using const_pred_iterator = std::vector::const_iterator; + using succ_iterator = std::vector::iterator; + using const_succ_iterator = std::vector::const_iterator; + using pred_reverse_iterator = + std::vector::reverse_iterator; + using const_pred_reverse_iterator = + std::vector::const_reverse_iterator; + using succ_reverse_iterator = + std::vector::reverse_iterator; + using const_succ_reverse_iterator = + std::vector::const_reverse_iterator; pred_iterator pred_begin() { return Predecessors.begin(); } const_pred_iterator pred_begin() const { return Predecessors.begin(); } pred_iterator pred_end() { return Predecessors.end(); } @@ -307,7 +317,7 @@ public: // Iteration support for live in sets. These sets are kept in sorted // order by their register number. - typedef LiveInVector::const_iterator livein_iterator; + using livein_iterator = LiveInVector::const_iterator; #ifndef NDEBUG /// Unlike livein_begin, this method does not check that the liveness /// information is accurate. Still for debug purposes it may be useful @@ -325,6 +335,9 @@ public: return make_range(livein_begin(), livein_end()); } + /// Remove entry from the livein set and return iterator to the next. + livein_iterator removeLiveIn(livein_iterator I); + /// Get the clobber mask for the start of this basic block. Funclets use this /// to prevent register allocation across funclet transitions. const uint32_t *getBeginClobberMask(const TargetRegisterInfo *TRI) const; @@ -455,7 +468,6 @@ public: /// other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; - /// Return the fallthrough block if the block can implicitly /// transfer control to the block after it by falling off the end of /// it. This should return null if it can reach the block after @@ -695,7 +707,7 @@ public: LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, - unsigned Neighborhood=10) const; + unsigned Neighborhood = 10) const; // Debugging methods. void dump() const; @@ -714,7 +726,6 @@ public: /// Return the MCSymbol for this basic block. MCSymbol *getSymbol() const; - private: /// Return probability iterator corresponding to the I successor iterator. probability_iterator getProbabilityIterator(succ_iterator I); @@ -764,8 +775,8 @@ struct MBB2NumberFunctor : // template <> struct GraphTraits { - typedef MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::succ_iterator ChildIteratorType; + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::succ_iterator; static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } @@ -773,8 +784,8 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef const MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_succ_iterator; static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } @@ -787,28 +798,30 @@ template <> struct GraphTraits { // to be when traversing the predecessor edges of a MBB // instead of the successor edges. // -template <> struct GraphTraits > { - typedef MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::pred_iterator ChildIteratorType; +template <> struct GraphTraits> { + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); } static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; -template <> struct GraphTraits > { - typedef const MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::const_pred_iterator ChildIteratorType; +template <> struct GraphTraits> { + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); } static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; - - /// MachineInstrSpan provides an interface to get an iteration range /// containing the instruction it was initialized with, along with all /// those instructions inserted prior to or following that instruction @@ -816,6 +829,7 @@ template <> struct GraphTraits > { class MachineInstrSpan { MachineBasicBlock &MBB; MachineBasicBlock::iterator I, B, E; + public: MachineInstrSpan(MachineBasicBlock::iterator I) : MBB(*I->getParent()), @@ -854,6 +868,6 @@ inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) { return It; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEBASICBLOCK_H diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index cd1c204981ed86760ad2d377dfb83dc7c5e086b5..cba79c818a761d134388b8b6239cd5b31b9269c6 100644 --- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -1,4 +1,4 @@ -//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*-----===// +//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,26 +17,28 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/BlockFrequency.h" -#include +#include +#include namespace llvm { +template class BlockFrequencyInfoImpl; class MachineBasicBlock; class MachineBranchProbabilityInfo; +class MachineFunction; class MachineLoopInfo; -template class BlockFrequencyInfoImpl; +class raw_ostream; /// MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation /// to estimate machine basic block frequencies. class MachineBlockFrequencyInfo : public MachineFunctionPass { - typedef BlockFrequencyInfoImpl ImplType; + using ImplType = BlockFrequencyInfoImpl; std::unique_ptr MBFI; public: static char ID; MachineBlockFrequencyInfo(); - ~MachineBlockFrequencyInfo() override; void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -74,9 +76,8 @@ public: const MachineBasicBlock *MBB) const; uint64_t getEntryFreq() const; - }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h index 11238016d447b2082210f86e97fcdebe07b7568e..8c54ae925470811b74ebf9476ad8e7bf6993fd3a 100644 --- a/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/include/llvm/CodeGen/MachineCombinerPattern.h @@ -48,6 +48,8 @@ enum class MachineCombinerPattern { FMULADDD_OP2, FMULSUBD_OP1, FMULSUBD_OP2, + FNMULSUBS_OP1, + FNMULSUBD_OP1, FMLAv1i32_indexed_OP1, FMLAv1i32_indexed_OP2, FMLAv1i64_indexed_OP1, diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h index d2036c4a29a554e38b96087be22612b84d5cdaa3..1705a0f7e59b3cc7e3ccaaf715e64986eaa0b0e5 100644 --- a/include/llvm/CodeGen/MachineConstantPool.h +++ b/include/llvm/CodeGen/MachineConstantPool.h @@ -1,4 +1,4 @@ -//===-- CodeGen/MachineConstantPool.h - Abstract Constant Pool --*- C++ -*-===// +//===- CodeGen/MachineConstantPool.h - Abstract Constant Pool ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,29 +18,28 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/MC/SectionKind.h" -#include #include #include namespace llvm { class Constant; -class FoldingSetNodeID; class DataLayout; -class TargetMachine; -class Type; +class FoldingSetNodeID; class MachineConstantPool; class raw_ostream; +class Type; /// Abstract base class for all machine specific constantpool value subclasses. /// class MachineConstantPoolValue { virtual void anchor(); + Type *Ty; public: explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {} - virtual ~MachineConstantPoolValue() {} + virtual ~MachineConstantPoolValue() = default; /// getType - get type of this MachineConstantPoolValue. /// @@ -81,6 +80,7 @@ public: : Alignment(A) { Val.ConstVal = V; } + MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A) : Alignment(A) { Val.MachineCPVal = V; @@ -153,13 +153,12 @@ public: /// print - Used by the MachineFunction printer to print information about /// constant pool objects. Implemented in MachineFunction.cpp - /// void print(raw_ostream &OS) const; /// dump - Call print(cerr) to be called from the debugger. void dump() const; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINECONSTANTPOOL_H diff --git a/include/llvm/CodeGen/MachineDominanceFrontier.h b/include/llvm/CodeGen/MachineDominanceFrontier.h index 4131194a0c0fc8dca39b8dcbbd84be03e8a69f37..370ffbe4862eb641a163c86460778cf9508bfa64 100644 --- a/include/llvm/CodeGen/MachineDominanceFrontier.h +++ b/include/llvm/CodeGen/MachineDominanceFrontier.h @@ -11,23 +11,28 @@ #define LLVM_CODEGEN_MACHINEDOMINANCEFRONTIER_H #include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" - +#include "llvm/Support/GenericDomTree.h" +#include namespace llvm { class MachineDominanceFrontier : public MachineFunctionPass { ForwardDominanceFrontierBase Base; + public: - typedef DominatorTreeBase DomTreeT; - typedef DomTreeNodeBase DomTreeNodeT; - typedef DominanceFrontierBase::DomSetType DomSetType; - typedef DominanceFrontierBase::iterator iterator; - typedef DominanceFrontierBase::const_iterator const_iterator; + using DomTreeT = DominatorTreeBase; + using DomTreeNodeT = DomTreeNodeBase; + using DomSetType = DominanceFrontierBase::DomSetType; + using iterator = DominanceFrontierBase::iterator; + using const_iterator = + DominanceFrontierBase::const_iterator; - void operator=(const MachineDominanceFrontier &) = delete; MachineDominanceFrontier(const MachineDominanceFrontier &) = delete; + MachineDominanceFrontier & + operator=(const MachineDominanceFrontier &) = delete; static char ID; @@ -104,6 +109,6 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEDOMINANCEFRONTIER_H diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index 30b6cfdd1c36d78a0c35a7744a59ae928b2789f1..74a7c3ea04aea4e06bc0c8505707e49da85ed25d 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation --*- C++ -*-==// +//==- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -16,12 +16,15 @@ #define LLVM_CODEGEN_MACHINEDOMINATORS_H #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/GenericDomTreeConstruction.h" +#include #include +#include namespace llvm { @@ -33,7 +36,7 @@ inline void DominatorTreeBase::addRoot(MachineBasicBlock* MBB extern template class DomTreeNodeBase; extern template class DominatorTreeBase; -typedef DomTreeNodeBase MachineDomTreeNode; +using MachineDomTreeNode = DomTreeNodeBase; //===------------------------------------- /// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to @@ -52,6 +55,7 @@ class MachineDominatorTree : public MachineFunctionPass { /// The splitting of a critical edge is local and thus, it is possible /// to apply several of those changes at the same time. mutable SmallVector CriticalEdgesToSplit; + /// \brief Remember all the basic blocks that are inserted during /// edge splitting. /// Invariant: NewBBs == all the basic blocks contained in the NewBB @@ -259,8 +263,8 @@ public: template struct MachineDomTreeGraphTraitsBase { - typedef Node *NodeRef; - typedef ChildIterator ChildIteratorType; + using NodeRef = Node *; + using ChildIteratorType = ChildIterator; static NodeRef getEntryNode(NodeRef N) { return N; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } @@ -287,6 +291,6 @@ template <> struct GraphTraits } }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEDOMINATORS_H diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 5c9728b0a51ed128c3c078d44816b716b912b008..689f3cd9fd12b260388440e2bbfeab04ed6c0f31 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -21,15 +21,9 @@ namespace llvm { class raw_ostream; -class DataLayout; -class TargetRegisterClass; -class Type; class MachineFunction; class MachineBasicBlock; -class TargetFrameLowering; -class TargetMachine; class BitVector; -class Value; class AllocaInst; /// The CalleeSavedInfo class tracks the information need to locate where a @@ -226,7 +220,7 @@ class MachineFrameInfo { /// setup/destroy pseudo instructions (as defined in the TargetFrameInfo /// class). This information is important for frame pointer elimination. /// It is only valid during and after prolog/epilog code insertion. - unsigned MaxCallFrameSize = 0; + unsigned MaxCallFrameSize = ~0u; /// The prolog/epilog code inserter fills in this vector with each /// callee saved register saved in the frame. Beyond its use by the prolog/ @@ -526,12 +520,29 @@ public: bool hasTailCall() const { return HasTailCall; } void setHasTailCall() { HasTailCall = true; } + /// Computes the maximum size of a callframe and the AdjustsStack property. + /// This only works for targets defining + /// TargetInstrInfo::getCallFrameSetupOpcode(), getCallFrameDestroyOpcode(), + /// and getFrameSize(). + /// This is usually computed by the prologue epilogue inserter but some + /// targets may call this to compute it earlier. + void computeMaxCallFrameSize(const MachineFunction &MF); + /// Return the maximum size of a call frame that must be /// allocated for an outgoing function call. This is only available if /// CallFrameSetup/Destroy pseudo instructions are used by the target, and /// then only during or after prolog/epilog code insertion. /// - unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } + unsigned getMaxCallFrameSize() const { + // TODO: Enable this assert when targets are fixed. + //assert(isMaxCallFrameSizeComputed() && "MaxCallFrameSize not computed yet"); + if (!isMaxCallFrameSizeComputed()) + return 0; + return MaxCallFrameSize; + } + bool isMaxCallFrameSizeComputed() const { + return MaxCallFrameSize != ~0u; + } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } /// Create a new object at a fixed location on the stack. diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index 5859a4e61fdd57a8e41f9de0138886a337ae0dbb..f67da7b01c54b5c756a9bdce0ab791ee1901f5f5 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineFunction.h --------------------------*- C++ -*-===// +//===- llvm/CodeGen/MachineFunction.h ---------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,38 +18,61 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H #define LLVM_CODEGEN_MACHINEFUNCTION_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/ilist.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Recycler.h" +#include +#include +#include +#include +#include namespace llvm { -class Value; +class BasicBlock; +class BlockAddress; +class DataLayout; +class DIExpression; +class DILocalVariable; +class DILocation; class Function; -class GCModuleInfo; -class MachineRegisterInfo; -class MachineFrameInfo; +class GlobalValue; class MachineConstantPool; +class MachineFrameInfo; +class MachineFunction; class MachineJumpTableInfo; class MachineModuleInfo; +class MachineRegisterInfo; class MCContext; +class MCInstrDesc; class Pass; class PseudoSourceValueManager; +class raw_ostream; +class SlotIndexes; class TargetMachine; -class TargetSubtargetInfo; class TargetRegisterClass; -struct MachinePointerInfo; +class TargetSubtargetInfo; struct WinEHFuncInfo; template <> struct ilist_alloc_traits { @@ -137,27 +160,33 @@ public: bool hasProperty(Property P) const { return Properties[static_cast(P)]; } + MachineFunctionProperties &set(Property P) { Properties.set(static_cast(P)); return *this; } + MachineFunctionProperties &reset(Property P) { Properties.reset(static_cast(P)); return *this; } + /// Reset all the properties. MachineFunctionProperties &reset() { Properties.reset(); return *this; } + MachineFunctionProperties &set(const MachineFunctionProperties &MFP) { Properties |= MFP.Properties; return *this; } + MachineFunctionProperties &reset(const MachineFunctionProperties &MFP) { Properties.reset(MFP.Properties); return *this; } + // Returns true if all properties set in V (i.e. required by a pass) are set // in this. bool verifyRequiredProperties(const MachineFunctionProperties &V) const { @@ -180,18 +209,17 @@ struct SEHHandler { const BlockAddress *RecoverBA; }; - /// This structure is used to retain landing pad info for the current function. struct LandingPadInfo { MachineBasicBlock *LandingPadBlock; // Landing pad block. SmallVector BeginLabels; // Labels prior to invoke. SmallVector EndLabels; // Labels after invoke. SmallVector SEHHandlers; // SEH handlers active at this lpad. - MCSymbol *LandingPadLabel; // Label at beginning of landing pad. - std::vector TypeIds; // List of type ids (filters negative). + MCSymbol *LandingPadLabel = nullptr; // Label at beginning of landing pad. + std::vector TypeIds; // List of type ids (filters negative). explicit LandingPadInfo(MachineBasicBlock *MBB) - : LandingPadBlock(MBB), LandingPadLabel(nullptr) {} + : LandingPadBlock(MBB) {} }; class MachineFunction { @@ -239,7 +267,7 @@ class MachineFunction { Recycler BasicBlockRecycler; // List of machine basic blocks in function - typedef ilist BasicBlockListType; + using BasicBlockListType = ilist; BasicBlockListType BasicBlocks; /// FunctionNumber - This provides a unique ID for each function emitted in @@ -281,7 +309,7 @@ class MachineFunction { std::vector LandingPads; /// Map a landing pad's EH symbol to the call site indexes. - DenseMap > LPadToCallSiteMap; + DenseMap> LPadToCallSiteMap; /// Map of invoke call site index values to associated begin EH_LABEL. DenseMap CallSiteMap; @@ -303,9 +331,6 @@ class MachineFunction { /// \} - MachineFunction(const MachineFunction &) = delete; - void operator=(const MachineFunction&) = delete; - /// Clear all the members of this MachineFunction, but the ones used /// to initialize again the MachineFunction. /// More specifically, this deallocates all the dynamically allocated @@ -316,8 +341,8 @@ class MachineFunction { /// In particular, the XXXInfo data structure. /// \pre Fn, Target, MMI, and FunctionNumber are properly set. void init(); -public: +public: struct VariableDbgInfo { const DILocalVariable *Var; const DIExpression *Expr; @@ -328,11 +353,13 @@ public: unsigned Slot, const DILocation *Loc) : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {} }; - typedef SmallVector VariableDbgInfoMapTy; + using VariableDbgInfoMapTy = SmallVector; VariableDbgInfoMapTy VariableDbgInfos; MachineFunction(const Function *Fn, const TargetMachine &TM, unsigned FunctionNum, MachineModuleInfo &MMI); + MachineFunction(const MachineFunction &) = delete; + MachineFunction &operator=(const MachineFunction &) = delete; ~MachineFunction(); /// Reset the instance as if it was just created. @@ -350,19 +377,15 @@ public: const DataLayout &getDataLayout() const; /// getFunction - Return the LLVM function that this machine code represents - /// const Function *getFunction() const { return Fn; } /// getName - Return the name of the corresponding LLVM function. - /// StringRef getName() const; /// getFunctionNumber - Return a unique ID for the current function. - /// unsigned getFunctionNumber() const { return FunctionNumber; } /// getTarget - Return the target machine this machine code is compiled with - /// const TargetMachine &getTarget() const { return Target; } /// getSubtarget - Return the subtarget for which this machine code is being @@ -378,14 +401,12 @@ public: } /// getRegInfo - Return information about the registers currently in use. - /// MachineRegisterInfo &getRegInfo() { return *RegInfo; } const MachineRegisterInfo &getRegInfo() const { return *RegInfo; } /// getFrameInfo - Return the frame info object for the current function. /// This object contains information about objects allocated on the stack /// frame of the current function in an abstract way. - /// MachineFrameInfo &getFrameInfo() { return *FrameInfo; } const MachineFrameInfo &getFrameInfo() const { return *FrameInfo; } @@ -402,7 +423,6 @@ public: /// getConstantPool - Return the constant pool object for the current /// function. - /// MachineConstantPool *getConstantPool() { return ConstantPool; } const MachineConstantPool *getConstantPool() const { return ConstantPool; } @@ -413,11 +433,9 @@ public: WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; } /// getAlignment - Return the alignment (log2, not bytes) of the function. - /// unsigned getAlignment() const { return Alignment; } /// setAlignment - Set the alignment (log2, not bytes) of the function. - /// void setAlignment(unsigned A) { Alignment = A; } /// ensureAlignment - Make sure the function is at least 1 << A bytes aligned. @@ -487,7 +505,6 @@ public: bool shouldSplitStack() const; /// getNumBlockIDs - Return the number of MBB ID's allocated. - /// unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); } /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and @@ -499,7 +516,6 @@ public: /// print - Print out the MachineFunction in a format suitable for debugging /// to the specified stream. - /// void print(raw_ostream &OS, const SlotIndexes* = nullptr) const; /// viewCFG - This function is meant for use from the debugger. You can just @@ -507,7 +523,6 @@ public: /// program, displaying the CFG of the current function with the code for each /// basic block inside. This depends on there being a 'dot' and 'gv' program /// in your path. - /// void viewCFG() const; /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -518,7 +533,6 @@ public: void viewCFGOnly() const; /// dump - Print the current MachineFunction to cerr, useful for debugger use. - /// void dump() const; /// Run the current MachineFunction through the machine code verifier, useful @@ -528,10 +542,10 @@ public: bool AbortOnError = true) const; // Provide accessors for the MachineBasicBlock list... - typedef BasicBlockListType::iterator iterator; - typedef BasicBlockListType::const_iterator const_iterator; - typedef BasicBlockListType::const_reverse_iterator const_reverse_iterator; - typedef BasicBlockListType::reverse_iterator reverse_iterator; + using iterator = BasicBlockListType::iterator; + using const_iterator = BasicBlockListType::const_iterator; + using const_reverse_iterator = BasicBlockListType::const_reverse_iterator; + using reverse_iterator = BasicBlockListType::reverse_iterator; /// Support for MachineBasicBlock::getNextNode(). static BasicBlockListType MachineFunction::* @@ -590,11 +604,9 @@ public: //===--------------------------------------------------------------------===// // Internal functions used to automatically number MachineBasicBlocks - // /// \brief Adds the MBB to the internal numbering. Returns the unique number /// assigned to the MBB. - /// unsigned addToMBBNumbering(MachineBasicBlock *MBB) { MBBNumbering.push_back(MBB); return (unsigned)MBBNumbering.size()-1; @@ -610,7 +622,6 @@ public: /// CreateMachineInstr - Allocate a new MachineInstr. Use this instead /// of `new MachineInstr'. - /// MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImp = false); @@ -623,16 +634,13 @@ public: MachineInstr *CloneMachineInstr(const MachineInstr *Orig); /// DeleteMachineInstr - Delete the given MachineInstr. - /// void DeleteMachineInstr(MachineInstr *MI); /// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this /// instead of `new MachineBasicBlock'. - /// MachineBasicBlock *CreateMachineBasicBlock(const BasicBlock *bb = nullptr); /// DeleteMachineBasicBlock - Delete the given MachineBasicBlock. - /// void DeleteMachineBasicBlock(MachineBasicBlock *MBB); /// getMachineMemOperand - Allocate a new MachineMemOperand. @@ -653,7 +661,7 @@ public: MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size); - typedef ArrayRecycler::Capacity OperandCapacity; + using OperandCapacity = ArrayRecycler::Capacity; /// Allocate an array of MachineOperands. This is only intended for use by /// internal MachineInstr functions. @@ -700,7 +708,6 @@ public: //===--------------------------------------------------------------------===// // Label Manipulation. - // /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a @@ -858,13 +865,16 @@ template <> struct GraphTraits : static NodeRef getEntryNode(MachineFunction *F) { return &F->front(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; + static nodes_iterator nodes_begin(MachineFunction *F) { return nodes_iterator(F->begin()); } + static nodes_iterator nodes_end(MachineFunction *F) { return nodes_iterator(F->end()); } + static unsigned size (MachineFunction *F) { return F->size(); } }; template <> struct GraphTraits : @@ -872,37 +882,39 @@ template <> struct GraphTraits : static NodeRef getEntryNode(const MachineFunction *F) { return &F->front(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; + static nodes_iterator nodes_begin(const MachineFunction *F) { return nodes_iterator(F->begin()); } + static nodes_iterator nodes_end (const MachineFunction *F) { return nodes_iterator(F->end()); } + static unsigned size (const MachineFunction *F) { return F->size(); } }; - // Provide specializations of GraphTraits to be able to treat a function as a // graph of basic blocks... and to walk it in inverse order. Inverse order for // a function is considered to be when traversing the predecessor edges of a BB // instead of the successor edges. // -template <> struct GraphTraits > : - public GraphTraits > { +template <> struct GraphTraits> : + public GraphTraits> { static NodeRef getEntryNode(Inverse G) { return &G.Graph->front(); } }; -template <> struct GraphTraits > : - public GraphTraits > { +template <> struct GraphTraits> : + public GraphTraits> { static NodeRef getEntryNode(Inverse G) { return &G.Graph->front(); } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEFUNCTION_H diff --git a/include/llvm/CodeGen/MachineFunctionInitializer.h b/include/llvm/CodeGen/MachineFunctionInitializer.h deleted file mode 100644 index c644c9783e2fefa5fae145c17246878f4b4aac38..0000000000000000000000000000000000000000 --- a/include/llvm/CodeGen/MachineFunctionInitializer.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- MachineFunctionInitializer.h - machine function initializer ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares an interface that allows custom machine function -// initialization. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H -#define LLVM_CODEGEN_MACHINEFUNCTIONINITIALIZER_H - -namespace llvm { - -class MachineFunction; - -/// This interface provides a way to initialize machine functions after they are -/// created by the machine function analysis pass. -class MachineFunctionInitializer { - virtual void anchor(); - -public: - virtual ~MachineFunctionInitializer() {} - - /// Initialize the machine function. - /// - /// Return true if error occurred. - virtual bool initializeMachineFunction(MachineFunction &MF) = 0; -}; - -} // end namespace llvm - -#endif diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h index 653d1175d04b42c1194dc95e07f797d224eb211d..6d978daa20181bc68073ebc2c90102779f0f56d8 100644 --- a/include/llvm/CodeGen/MachineFunctionPass.h +++ b/include/llvm/CodeGen/MachineFunctionPass.h @@ -19,8 +19,8 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTIONPASS_H #define LLVM_CODEGEN_MACHINEFUNCTIONPASS_H -#include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Pass.h" namespace llvm { diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index e7e728c1be28b7f0e08613d230ad003b68af4a76..95401e98b2974d470018af0d60322b233080a91f 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineInstr.h - MachineInstr class --------*- C++ -*-===// +//===- llvm/CodeGen/MachineInstr.h - MachineInstr class ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,7 +17,6 @@ #define LLVM_CODEGEN_MACHINEINSTR_H #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" @@ -28,19 +27,27 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/ArrayRecycler.h" #include "llvm/Target/TargetOpcodes.h" +#include +#include +#include +#include namespace llvm { -class StringRef; template class ArrayRef; -template class SmallVectorImpl; -class DILocalVariable; class DIExpression; +class DILocalVariable; +class MachineBasicBlock; +class MachineFunction; +class MachineMemOperand; +class MachineRegisterInfo; +class ModuleSlotTracker; +class raw_ostream; +template class SmallVectorImpl; +class StringRef; class TargetInstrInfo; class TargetRegisterClass; class TargetRegisterInfo; -class MachineFunction; -class MachineMemOperand; //===----------------------------------------------------------------------===// /// Representation of each machine instruction. @@ -53,7 +60,7 @@ class MachineInstr : public ilist_node_with_parent> { public: - typedef MachineMemOperand **mmo_iterator; + using mmo_iterator = MachineMemOperand **; /// Flags to specify different kinds of comments to output in /// assembly code. These flags carry semantic information not @@ -72,43 +79,39 @@ public: BundledPred = 1 << 2, // Instruction has bundled predecessors. BundledSucc = 1 << 3 // Instruction has bundled successors. }; + private: const MCInstrDesc *MCID; // Instruction descriptor. - MachineBasicBlock *Parent; // Pointer to the owning basic block. + MachineBasicBlock *Parent = nullptr; // Pointer to the owning basic block. // Operands are allocated by an ArrayRecycler. - MachineOperand *Operands; // Pointer to the first operand. - unsigned NumOperands; // Number of operands on instruction. - typedef ArrayRecycler::Capacity OperandCapacity; + MachineOperand *Operands = nullptr; // Pointer to the first operand. + unsigned NumOperands = 0; // Number of operands on instruction. + using OperandCapacity = ArrayRecycler::Capacity; OperandCapacity CapOperands; // Capacity of the Operands array. - uint8_t Flags; // Various bits of additional + uint8_t Flags = 0; // Various bits of additional // information about machine // instruction. - uint8_t AsmPrinterFlags; // Various bits of information used by + uint8_t AsmPrinterFlags = 0; // Various bits of information used by // the AsmPrinter to emit helpful // comments. This is *not* semantic // information. Do not use this for // anything other than to convey comment // information to AsmPrinter. - uint8_t NumMemRefs; // Information on memory references. + uint8_t NumMemRefs = 0; // Information on memory references. // Note that MemRefs == nullptr, means 'don't know', not 'no memory access'. // Calling code must treat missing information conservatively. If the number // of memory operands required to be precise exceeds the maximum value of // NumMemRefs - currently 256 - we remove the operands entirely. Note also // that this is a non-owning reference to a shared copy on write buffer owned // by the MachineFunction and created via MF.allocateMemRefsArray. - mmo_iterator MemRefs; + mmo_iterator MemRefs = nullptr; DebugLoc debugLoc; // Source line information. - MachineInstr(const MachineInstr&) = delete; - void operator=(const MachineInstr&) = delete; - // Use MachineFunction::DeleteMachineInstr() instead. - ~MachineInstr() = delete; - // Intrusive list support friend struct ilist_traits; friend struct ilist_callback_traits; @@ -128,6 +131,11 @@ private: friend class MachineFunction; public: + MachineInstr(const MachineInstr &) = delete; + MachineInstr &operator=(const MachineInstr &) = delete; + // Use MachineFunction::DeleteMachineInstr() instead. + ~MachineInstr() = delete; + const MachineBasicBlock* getParent() const { return Parent; } MachineBasicBlock* getParent() { return Parent; } @@ -178,7 +186,6 @@ public: Flags &= ~((uint8_t)Flag); } - /// Return true if MI is in a bundle (but not the first MI in a bundle). /// /// A bundle looks like this before it's finalized: @@ -263,7 +270,6 @@ public: /// earlier. /// /// If this method returns, the caller should try to recover from the error. - /// void emitError(StringRef Msg) const; /// Returns the target instruction descriptor of this MachineInstr. @@ -273,7 +279,6 @@ public: unsigned getOpcode() const { return MCID->Opcode; } /// Access to explicit operands of the instruction. - /// unsigned getNumOperands() const { return NumOperands; } const MachineOperand& getOperand(unsigned i) const { @@ -289,8 +294,8 @@ public: unsigned getNumExplicitOperands() const; /// iterator/begin/end - Iterate over all operands of a machine instruction. - typedef MachineOperand *mop_iterator; - typedef const MachineOperand *const_mop_iterator; + using mop_iterator = MachineOperand *; + using const_mop_iterator = const MachineOperand *; mop_iterator operands_begin() { return Operands; } mop_iterator operands_end() { return Operands + NumOperands; } @@ -713,7 +718,6 @@ public: return hasProperty(MCID::ExtraDefRegAllocReq, Type); } - enum MICheckType { CheckDefs, // Check all operands for equality CheckKillDead, // Check all operands including kill / dead markers @@ -767,6 +771,7 @@ public: /// Returns true if the MachineInstr represents a label. bool isLabel() const { return isEHLabel() || isGCLabel(); } + bool isCFIInstruction() const { return getOpcode() == TargetOpcode::CFI_INSTRUCTION; } @@ -775,6 +780,7 @@ public: bool isPosition() const { return isLabel() || isCFIInstruction(); } bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; } + /// A DBG_VALUE is indirect iff the first operand is a register and /// the second operand is an immediate. bool isIndirectDebugValue() const { @@ -787,29 +793,38 @@ public: bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; } + bool isMSInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM && getInlineAsmDialect(); } + bool isStackAligningInlineAsm() const; InlineAsm::AsmDialect getInlineAsmDialect() const; + bool isInsertSubreg() const { return getOpcode() == TargetOpcode::INSERT_SUBREG; } + bool isSubregToReg() const { return getOpcode() == TargetOpcode::SUBREG_TO_REG; } + bool isRegSequence() const { return getOpcode() == TargetOpcode::REG_SEQUENCE; } + bool isBundle() const { return getOpcode() == TargetOpcode::BUNDLE; } + bool isCopy() const { return getOpcode() == TargetOpcode::COPY; } + bool isFullCopy() const { return isCopy() && !getOperand(0).getSubReg() && !getOperand(1).getSubReg(); } + bool isExtractSubreg() const { return getOpcode() == TargetOpcode::EXTRACT_SUBREG; } @@ -826,26 +841,35 @@ public: getOperand(0).getSubReg() == getOperand(1).getSubReg(); } - /// Return true if this is a transient instruction that is - /// either very likely to be eliminated during register allocation (such as - /// copy-like instructions), or if this instruction doesn't have an - /// execution-time cost. + /// Return true if this instruction doesn't produce any output in the form of + /// executable instructions. + bool isMetaInstruction() const { + switch (getOpcode()) { + default: + return false; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + case TargetOpcode::DBG_VALUE: + return true; + } + } + + /// Return true if this is a transient instruction that is either very likely + /// to be eliminated during register allocation (such as copy-like + /// instructions), or if this instruction doesn't have an execution-time cost. bool isTransient() const { - switch(getOpcode()) { - default: return false; + switch (getOpcode()) { + default: + return isMetaInstruction(); // Copy-like instructions are usually eliminated during register allocation. case TargetOpcode::PHI: case TargetOpcode::COPY: case TargetOpcode::INSERT_SUBREG: case TargetOpcode::SUBREG_TO_REG: case TargetOpcode::REG_SEQUENCE: - // Pseudo-instructions that don't produce any real output. - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: return true; } } @@ -969,7 +993,6 @@ public: /// /// The flag operand is an immediate that can be decoded with methods like /// InlineAsm::hasRegClassConstraint(). - /// int findInlineAsmFlagIdx(unsigned OpIdx, unsigned *GroupNo = nullptr) const; /// Compute the static register class constraint for operand OpIdx. @@ -978,7 +1001,6 @@ public: /// /// Returns NULL if the static register class constraint cannot be /// determined. - /// const TargetRegisterClass* getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, @@ -1319,6 +1341,6 @@ inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) { return OS; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEINSTR_H diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index ef4226d30fe3627b61c78b8ea431f6a7fdfc26ef..412c55d542ea6af91f5f65b96ecba70b5b1922ba 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -413,6 +413,11 @@ MachineInstrBuilder BuildMI(MachineBasicBlock &BB, unsigned Reg, unsigned Offset, const MDNode *Variable, const MDNode *Expr); +/// Clone a DBG_VALUE whose value has been spilled to FrameIndex. +MachineInstr *buildDbgValueForSpill(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const MachineInstr &Orig, int FrameIndex); + inline unsigned getDefRegState(bool B) { return B ? RegState::Define : 0; } diff --git a/include/llvm/CodeGen/MachineInstrBundleIterator.h b/include/llvm/CodeGen/MachineInstrBundleIterator.h index 3104185385eaf8a5fed02b44d593d9a57ce40f6a..5fe4964ff116571762823806d3f6c1ba1437aa47 100644 --- a/include/llvm/CodeGen/MachineInstrBundleIterator.h +++ b/include/llvm/CodeGen/MachineInstrBundleIterator.h @@ -15,34 +15,37 @@ #define LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H #include "llvm/ADT/ilist.h" +#include "llvm/ADT/simple_ilist.h" +#include #include +#include namespace llvm { template struct MachineInstrBundleIteratorTraits; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::iterator instr_iterator; - typedef typename list_type::iterator nonconst_instr_iterator; - typedef typename list_type::const_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::iterator; + using nonconst_instr_iterator = typename list_type::iterator; + using const_instr_iterator = typename list_type::const_iterator; }; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::reverse_iterator instr_iterator; - typedef typename list_type::reverse_iterator nonconst_instr_iterator; - typedef typename list_type::const_reverse_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::reverse_iterator; + using nonconst_instr_iterator = typename list_type::reverse_iterator; + using const_instr_iterator = typename list_type::const_reverse_iterator; }; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::const_iterator instr_iterator; - typedef typename list_type::iterator nonconst_instr_iterator; - typedef typename list_type::const_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::const_iterator; + using nonconst_instr_iterator = typename list_type::iterator; + using const_instr_iterator = typename list_type::const_iterator; }; template struct MachineInstrBundleIteratorTraits { - typedef simple_ilist> list_type; - typedef typename list_type::const_reverse_iterator instr_iterator; - typedef typename list_type::reverse_iterator nonconst_instr_iterator; - typedef typename list_type::const_reverse_iterator const_instr_iterator; + using list_type = simple_ilist>; + using instr_iterator = typename list_type::const_reverse_iterator; + using nonconst_instr_iterator = typename list_type::reverse_iterator; + using const_instr_iterator = typename list_type::const_reverse_iterator; }; template struct MachineInstrBundleIteratorHelper; @@ -104,27 +107,27 @@ template <> struct MachineInstrBundleIteratorHelper { /// inside bundles (i.e. walk top level MIs only). template class MachineInstrBundleIterator : MachineInstrBundleIteratorHelper { - typedef MachineInstrBundleIteratorTraits Traits; - typedef typename Traits::instr_iterator instr_iterator; + using Traits = MachineInstrBundleIteratorTraits; + using instr_iterator = typename Traits::instr_iterator; + instr_iterator MII; public: - typedef typename instr_iterator::value_type value_type; - typedef typename instr_iterator::difference_type difference_type; - typedef typename instr_iterator::pointer pointer; - typedef typename instr_iterator::reference reference; - typedef std::bidirectional_iterator_tag iterator_category; - - typedef typename instr_iterator::const_pointer const_pointer; - typedef typename instr_iterator::const_reference const_reference; + using value_type = typename instr_iterator::value_type; + using difference_type = typename instr_iterator::difference_type; + using pointer = typename instr_iterator::pointer; + using reference = typename instr_iterator::reference; + using const_pointer = typename instr_iterator::const_pointer; + using const_reference = typename instr_iterator::const_reference; + using iterator_category = std::bidirectional_iterator_tag; private: - typedef typename Traits::nonconst_instr_iterator nonconst_instr_iterator; - typedef typename Traits::const_instr_iterator const_instr_iterator; - typedef MachineInstrBundleIterator< - typename nonconst_instr_iterator::value_type, IsReverse> - nonconst_iterator; - typedef MachineInstrBundleIterator reverse_iterator; + using nonconst_instr_iterator = typename Traits::nonconst_instr_iterator; + using const_instr_iterator = typename Traits::const_instr_iterator; + using nonconst_iterator = + MachineInstrBundleIterator; + using reverse_iterator = MachineInstrBundleIterator; public: MachineInstrBundleIterator(instr_iterator MI) : MII(MI) { @@ -138,12 +141,14 @@ public: "MachineInstrBundleIterator with a " "bundled MI"); } + MachineInstrBundleIterator(pointer MI) : MII(MI) { // FIXME: This conversion should be explicit. assert((!MI || !MI->isBundledWithPred()) && "It's not legal to initialize " "MachineInstrBundleIterator " "with a bundled MI"); } + // Template allows conversion from const to nonconst. template MachineInstrBundleIterator( @@ -151,6 +156,7 @@ public: typename std::enable_if::value, void *>::type = nullptr) : MII(I.getInstrIterator()) {} + MachineInstrBundleIterator() : MII(nullptr) {} /// Explicit conversion between forward/reverse iterators. @@ -280,4 +286,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEINSTRBUNDLEITERATOR_H diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index 5c814f22f99b253e13e97bf2770c8b786033d1d0..58cffaade9d2ae22d0fc6d398116ae38b0430512 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -33,6 +33,8 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" namespace llvm { @@ -71,6 +73,7 @@ public: private: friend class LoopInfoBase; + explicit MachineLoop(MachineBasicBlock *MBB) : LoopBase(MBB) {} }; @@ -79,11 +82,9 @@ private: extern template class LoopInfoBase; class MachineLoopInfo : public MachineFunctionPass { - LoopInfoBase LI; friend class LoopBase; - void operator=(const MachineLoopInfo &) = delete; - MachineLoopInfo(const MachineLoopInfo &) = delete; + LoopInfoBase LI; public: static char ID; // Pass identification, replacement for typeid @@ -91,6 +92,8 @@ public: MachineLoopInfo() : MachineFunctionPass(ID) { initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); } + MachineLoopInfo(const MachineLoopInfo &) = delete; + MachineLoopInfo &operator=(const MachineLoopInfo &) = delete; LoopInfoBase& getBase() { return LI; } @@ -103,7 +106,7 @@ public: bool SpeculativePreheader = false) const; /// The iterator interface to the top-level loops in the current function. - typedef LoopInfoBase::iterator iterator; + using iterator = LoopInfoBase::iterator; inline iterator begin() const { return LI.begin(); } inline iterator end() const { return LI.end(); } bool empty() const { return LI.empty(); } @@ -166,11 +169,10 @@ public: } }; - // Allow clients to walk the list of nested loops... template <> struct GraphTraits { - typedef const MachineLoop *NodeRef; - typedef MachineLoopInfo::iterator ChildIteratorType; + using NodeRef = const MachineLoop *; + using ChildIteratorType = MachineLoopInfo::iterator; static NodeRef getEntryNode(const MachineLoop *L) { return L; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } @@ -178,14 +180,14 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef MachineLoop *NodeRef; - typedef MachineLoopInfo::iterator ChildIteratorType; + using NodeRef = MachineLoop *; + using ChildIteratorType = MachineLoopInfo::iterator; static NodeRef getEntryNode(MachineLoop *L) { return L; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } static ChildIteratorType child_end(NodeRef N) { return N->end(); } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINELOOPINFO_H diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index a311124a35bae5f95aa12837fede8fc6f364b3e2..4d83f27eac3c35f9886873ae500a4a0353ffa154 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -21,7 +21,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" -#include "llvm/IR/Value.h" // PointerLikeTypeTraits +#include "llvm/IR/Value.h" // PointerLikeTypeTraits #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 182d23ef3c904c5bde1ea5e69f619c017f8a939d..d64941a9e725a27c8e5379c93edce41cf4dad265 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -31,35 +31,25 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H #define LLVM_CODEGEN_MACHINEMODULEINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Pass.h" -#include "llvm/Support/DataTypes.h" +#include +#include +#include namespace llvm { -//===----------------------------------------------------------------------===// -// Forward declarations. -class BlockAddress; +class BasicBlock; class CallInst; -class Constant; -class GlobalVariable; -class LandingPadInst; -class MDNode; -class MMIAddrLabelMap; -class MachineBasicBlock; +class Function; class MachineFunction; -class MachineFunctionInitializer; +class MMIAddrLabelMap; class Module; -class PointerType; -class StructType; +class TargetMachine; //===----------------------------------------------------------------------===// /// This class can be derived from and used by targets to hold private @@ -69,11 +59,12 @@ class StructType; /// class MachineModuleInfoImpl { public: - typedef PointerIntPair StubValueTy; + using StubValueTy = PointerIntPair; + using SymbolListTy = std::vector>; + virtual ~MachineModuleInfoImpl(); - typedef std::vector > SymbolListTy; -protected: +protected: /// Return the entries from a DenseMap in a deterministic sorted orer. /// Clears the map. static SymbolListTy getSortedStubs(DenseMap&); @@ -116,7 +107,7 @@ class MachineModuleInfo : public ImmutablePass { // TODO: Ideally, what we'd like is to have a switch that allows emitting // synchronous (precise at call-sites only) CFA into .eh_frame. However, - // even under this switch, we'd like .debug_frame to be precise when using. + // even under this switch, we'd like .debug_frame to be precise when using // -g. At this moment, there's no way to specify that some CFI directives // go into .eh_frame only, while others go into .debug_frame only. @@ -134,7 +125,6 @@ class MachineModuleInfo : public ImmutablePass { /// comments in lib/Target/X86/X86FrameLowering.cpp for more details. bool UsesMorestackAddr; - MachineFunctionInitializer *MFInitializer; /// Maps IR Functions to their corresponding MachineFunctions. DenseMap> MachineFunctions; /// Next unique number available for a MachineFunction. @@ -158,14 +148,13 @@ public: void setModule(const Module *M) { TheModule = M; } const Module *getModule() const { return TheModule; } - void setMachineFunctionInitializer(MachineFunctionInitializer *MFInit) { - MFInitializer = MFInit; - } - /// Returns the MachineFunction constructed for the IR function \p F. - /// Creates a new MachineFunction and runs the MachineFunctionInitializer - /// if none exists yet. - MachineFunction &getMachineFunction(const Function &F); + /// Creates a new MachineFunction if none exists yet. + MachineFunction &getOrCreateMachineFunction(const Function &F); + + /// \bried Returns the MachineFunction associated to IR function \p F if there + /// is one, otherwise nullptr. + MachineFunction *getMachineFunction(const Function &F) const; /// Delete the MachineFunction \p MF and reset the link in the IR Function to /// Machine Function map. @@ -252,6 +241,6 @@ public: /// which will link in MSVCRT's floating-point support. void computeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo &MMI); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEMODULEINFO_H diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h index f28a79c5b5cca3c98b9c78dfd94b3ae267ade0a8..34b21ceddd4340ec26aab7fbad4faab2ffe23daa 100644 --- a/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -15,9 +15,9 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H #define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/Wasm.h" +#include "llvm/CodeGen/ValueTypes.h" namespace llvm { class MCSymbol; @@ -77,33 +77,6 @@ public: SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); } }; -/// MachineModuleInfoWasm - This is a MachineModuleInfoImpl implementation -/// for Wasm targets. -class MachineModuleInfoWasm : public MachineModuleInfoImpl { - /// WebAssembly global variables defined by CodeGen. - std::vector Globals; - - /// The WebAssembly global variable which is the stack pointer. - unsigned StackPointerGlobal; - - virtual void anchor(); // Out of line virtual method. -public: - MachineModuleInfoWasm(const MachineModuleInfo &) - : StackPointerGlobal(-1U) {} - - void addGlobal(const wasm::Global &G) { Globals.push_back(G); } - const std::vector &getGlobals() const { return Globals; } - - bool hasStackPointerGlobal() const { - return StackPointerGlobal != -1U; - } - unsigned getStackPointerGlobal() const { - assert(hasStackPointerGlobal() && "Stack ptr global hasn't been set"); - return StackPointerGlobal; - } - void setStackPointerGlobal(unsigned Global) { StackPointerGlobal = Global; } -}; - } // end namespace llvm #endif diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 81b43126adeba7899e61c19d3f99af1e65ede466..2560399bcf545747fc91f9504e39cf8301476b28 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -14,8 +14,8 @@ #ifndef LLVM_CODEGEN_MACHINEOPERAND_H #define LLVM_CODEGEN_MACHINEOPERAND_H -#include "llvm/Support/DataTypes.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/DataTypes.h" #include namespace llvm { @@ -65,7 +65,6 @@ public: MO_CFIIndex, ///< MCCFIInstruction index. MO_IntrinsicID, ///< Intrinsic ID for ISel MO_Predicate, ///< Generic predicate for ISel - MO_Placeholder, ///< Placeholder for GlobalISel ComplexPattern result. }; private: @@ -768,11 +767,6 @@ public: return Op; } - static MachineOperand CreatePlaceholder() { - MachineOperand Op(MachineOperand::MO_Placeholder); - return Op; - } - friend class MachineInstr; friend class MachineRegisterInfo; private: diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h index 21f847c7e5bab443f141c36b10cae5eb80a2935d..8394b58d0a16c81600572f885031af9d99ccb707 100644 --- a/include/llvm/CodeGen/MachineRegionInfo.h +++ b/include/llvm/CodeGen/MachineRegionInfo.h @@ -10,83 +10,77 @@ #ifndef LLVM_CODEGEN_MACHINEREGIONINFO_H #define LLVM_CODEGEN_MACHINEREGIONINFO_H +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" - +#include namespace llvm { -class MachineDominatorTree; struct MachinePostDominatorTree; class MachineRegion; class MachineRegionNode; class MachineRegionInfo; -template<> -struct RegionTraits { - typedef MachineFunction FuncT; - typedef MachineBasicBlock BlockT; - typedef MachineRegion RegionT; - typedef MachineRegionNode RegionNodeT; - typedef MachineRegionInfo RegionInfoT; - typedef MachineDominatorTree DomTreeT; - typedef MachineDomTreeNode DomTreeNodeT; - typedef MachinePostDominatorTree PostDomTreeT; - typedef MachineDominanceFrontier DomFrontierT; - typedef MachineInstr InstT; - typedef MachineLoop LoopT; - typedef MachineLoopInfo LoopInfoT; +template <> struct RegionTraits { + using FuncT = MachineFunction; + using BlockT = MachineBasicBlock; + using RegionT = MachineRegion; + using RegionNodeT = MachineRegionNode; + using RegionInfoT = MachineRegionInfo; + using DomTreeT = MachineDominatorTree; + using DomTreeNodeT = MachineDomTreeNode; + using PostDomTreeT = MachinePostDominatorTree; + using DomFrontierT = MachineDominanceFrontier; + using InstT = MachineInstr; + using LoopT = MachineLoop; + using LoopInfoT = MachineLoopInfo; static unsigned getNumSuccessors(MachineBasicBlock *BB) { return BB->succ_size(); } }; - class MachineRegionNode : public RegionNodeBase> { public: - inline MachineRegionNode(MachineRegion *Parent, - MachineBasicBlock *Entry, + inline MachineRegionNode(MachineRegion *Parent, MachineBasicBlock *Entry, bool isSubRegion = false) - : RegionNodeBase>(Parent, Entry, isSubRegion) { - - } + : RegionNodeBase>(Parent, Entry, + isSubRegion) {} bool operator==(const MachineRegion &RN) const { - return this == reinterpret_cast(&RN); + return this == reinterpret_cast(&RN); } }; class MachineRegion : public RegionBase> { public: MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, - MachineRegionInfo* RI, - MachineDominatorTree *DT, MachineRegion *Parent = nullptr); + MachineRegionInfo *RI, MachineDominatorTree *DT, + MachineRegion *Parent = nullptr); ~MachineRegion(); bool operator==(const MachineRegionNode &RN) const { - return &RN == reinterpret_cast(this); + return &RN == reinterpret_cast(this); } }; class MachineRegionInfo : public RegionInfoBase> { public: explicit MachineRegionInfo(); - ~MachineRegionInfo() override; // updateStatistics - Update statistic about created regions. void updateStatistics(MachineRegion *R) final; - void recalculate(MachineFunction &F, - MachineDominatorTree *DT, - MachinePostDominatorTree *PDT, - MachineDominanceFrontier *DF); + void recalculate(MachineFunction &F, MachineDominatorTree *DT, + MachinePostDominatorTree *PDT, MachineDominanceFrontier *DF); }; class MachineRegionInfoPass : public MachineFunctionPass { @@ -94,17 +88,13 @@ class MachineRegionInfoPass : public MachineFunctionPass { public: static char ID; - explicit MachineRegionInfoPass(); + explicit MachineRegionInfoPass(); ~MachineRegionInfoPass() override; - MachineRegionInfo &getRegionInfo() { - return RI; - } + MachineRegionInfo &getRegionInfo() { return RI; } - const MachineRegionInfo &getRegionInfo() const { - return RI; - } + const MachineRegionInfo &getRegionInfo() const { return RI; } /// @name MachineFunctionPass interface //@{ @@ -117,66 +107,76 @@ public: //@} }; - template <> template <> -inline MachineBasicBlock* RegionNodeBase>::getNodeAs() const { +inline MachineBasicBlock * +RegionNodeBase>::getNodeAs() + const { assert(!isSubRegion() && "This is not a MachineBasicBlock RegionNode!"); return getEntry(); } -template<> -template<> -inline MachineRegion* RegionNodeBase>::getNodeAs() const { +template <> +template <> +inline MachineRegion * +RegionNodeBase>::getNodeAs() + const { assert(isSubRegion() && "This is not a subregion RegionNode!"); - auto Unconst = const_cast>*>(this); - return reinterpret_cast(Unconst); + auto Unconst = + const_cast> *>(this); + return reinterpret_cast(Unconst); } - RegionNodeGraphTraits(MachineRegionNode, MachineBasicBlock, MachineRegion); -RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock, MachineRegion); +RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock, + MachineRegion); RegionGraphTraits(MachineRegion, MachineRegionNode); RegionGraphTraits(const MachineRegion, const MachineRegionNode); -template <> struct GraphTraits - : public GraphTraits > { - typedef df_iterator, false, - GraphTraits>> - nodes_iterator; +template <> +struct GraphTraits + : public GraphTraits> { + using nodes_iterator = df_iterator, + false, GraphTraits>>; static NodeRef getEntryNode(MachineRegionInfo *RI) { - return GraphTraits >::getEntryNode(RI->getTopLevelRegion()); + return GraphTraits>::getEntryNode( + RI->getTopLevelRegion()); } - static nodes_iterator nodes_begin(MachineRegionInfo* RI) { + + static nodes_iterator nodes_begin(MachineRegionInfo *RI) { return nodes_iterator::begin(getEntryNode(RI)); } + static nodes_iterator nodes_end(MachineRegionInfo *RI) { return nodes_iterator::end(getEntryNode(RI)); } }; -template <> struct GraphTraits - : public GraphTraits { - typedef df_iterator, false, - GraphTraits>> - nodes_iterator; +template <> +struct GraphTraits + : public GraphTraits { + using nodes_iterator = df_iterator, + false, GraphTraits>>; static NodeRef getEntryNode(MachineRegionInfoPass *RI) { - return GraphTraits::getEntryNode(&RI->getRegionInfo()); + return GraphTraits::getEntryNode(&RI->getRegionInfo()); } - static nodes_iterator nodes_begin(MachineRegionInfoPass* RI) { - return GraphTraits::nodes_begin(&RI->getRegionInfo()); + + static nodes_iterator nodes_begin(MachineRegionInfoPass *RI) { + return GraphTraits::nodes_begin(&RI->getRegionInfo()); } + static nodes_iterator nodes_end(MachineRegionInfoPass *RI) { - return GraphTraits::nodes_end(&RI->getRegionInfo()); + return GraphTraits::nodes_end(&RI->getRegionInfo()); } }; extern template class RegionBase>; extern template class RegionNodeBase>; extern template class RegionInfoBase>; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_MACHINEREGIONINFO_H diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 6e5c6473ff4a49d9c829ce109078a7e3932f7772..8347f00cbc7a4422efbed45a57ca7110c88bf865 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -14,11 +14,13 @@ #ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H #define LLVM_CODEGEN_MACHINEREGISTERINFO_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -28,21 +30,21 @@ #include "llvm/MC/LaneBitmask.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include #include #include #include #include #include #include +#include namespace llvm { class PSetIterator; /// Convenient type to represent either a register class or a register bank. -typedef PointerUnion - RegClassOrRegBank; +using RegClassOrRegBank = + PointerUnion; /// MachineRegisterInfo - Keep track of information for virtual and physical /// registers, including vreg register classes, use/def chains for registers, @@ -125,7 +127,7 @@ private: /// started. BitVector ReservedRegs; - typedef DenseMap VRegToTypeMap; + using VRegToTypeMap = DenseMap; /// Map generic virtual registers to their actual size. mutable std::unique_ptr VRegToType; @@ -266,8 +268,8 @@ public: /// reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified /// register. - typedef defusechain_iterator - reg_iterator; + using reg_iterator = + defusechain_iterator; reg_iterator reg_begin(unsigned RegNo) const { return reg_iterator(getRegUseDefListHead(RegNo)); } @@ -279,8 +281,8 @@ public: /// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses /// of the specified register, stepping by MachineInstr. - typedef defusechain_instr_iterator - reg_instr_iterator; + using reg_instr_iterator = + defusechain_instr_iterator; reg_instr_iterator reg_instr_begin(unsigned RegNo) const { return reg_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -295,8 +297,8 @@ public: /// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses /// of the specified register, stepping by bundle. - typedef defusechain_instr_iterator - reg_bundle_iterator; + using reg_bundle_iterator = + defusechain_instr_iterator; reg_bundle_iterator reg_bundle_begin(unsigned RegNo) const { return reg_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -314,8 +316,8 @@ public: /// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses /// of the specified register, skipping those marked as Debug. - typedef defusechain_iterator - reg_nodbg_iterator; + using reg_nodbg_iterator = + defusechain_iterator; reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const { return reg_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -331,8 +333,8 @@ public: /// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk /// all defs and uses of the specified register, stepping by MachineInstr, /// skipping those marked as Debug. - typedef defusechain_instr_iterator - reg_instr_nodbg_iterator; + using reg_instr_nodbg_iterator = + defusechain_instr_iterator; reg_instr_nodbg_iterator reg_instr_nodbg_begin(unsigned RegNo) const { return reg_instr_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -348,8 +350,8 @@ public: /// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk /// all defs and uses of the specified register, stepping by bundle, /// skipping those marked as Debug. - typedef defusechain_instr_iterator - reg_bundle_nodbg_iterator; + using reg_bundle_nodbg_iterator = + defusechain_instr_iterator; reg_bundle_nodbg_iterator reg_bundle_nodbg_begin(unsigned RegNo) const { return reg_bundle_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -369,8 +371,8 @@ public: } /// def_iterator/def_begin/def_end - Walk all defs of the specified register. - typedef defusechain_iterator - def_iterator; + using def_iterator = + defusechain_iterator; def_iterator def_begin(unsigned RegNo) const { return def_iterator(getRegUseDefListHead(RegNo)); } @@ -382,8 +384,8 @@ public: /// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the /// specified register, stepping by MachineInst. - typedef defusechain_instr_iterator - def_instr_iterator; + using def_instr_iterator = + defusechain_instr_iterator; def_instr_iterator def_instr_begin(unsigned RegNo) const { return def_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -398,8 +400,8 @@ public: /// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the /// specified register, stepping by bundle. - typedef defusechain_instr_iterator - def_bundle_iterator; + using def_bundle_iterator = + defusechain_instr_iterator; def_bundle_iterator def_bundle_begin(unsigned RegNo) const { return def_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -425,8 +427,8 @@ public: } /// use_iterator/use_begin/use_end - Walk all uses of the specified register. - typedef defusechain_iterator - use_iterator; + using use_iterator = + defusechain_iterator; use_iterator use_begin(unsigned RegNo) const { return use_iterator(getRegUseDefListHead(RegNo)); } @@ -438,8 +440,8 @@ public: /// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the /// specified register, stepping by MachineInstr. - typedef defusechain_instr_iterator - use_instr_iterator; + using use_instr_iterator = + defusechain_instr_iterator; use_instr_iterator use_instr_begin(unsigned RegNo) const { return use_instr_iterator(getRegUseDefListHead(RegNo)); } @@ -454,8 +456,8 @@ public: /// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the /// specified register, stepping by bundle. - typedef defusechain_instr_iterator - use_bundle_iterator; + using use_bundle_iterator = + defusechain_instr_iterator; use_bundle_iterator use_bundle_begin(unsigned RegNo) const { return use_bundle_iterator(getRegUseDefListHead(RegNo)); } @@ -482,8 +484,8 @@ public: /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the /// specified register, skipping those marked as Debug. - typedef defusechain_iterator - use_nodbg_iterator; + using use_nodbg_iterator = + defusechain_iterator; use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const { return use_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -499,8 +501,8 @@ public: /// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk /// all uses of the specified register, stepping by MachineInstr, skipping /// those marked as Debug. - typedef defusechain_instr_iterator - use_instr_nodbg_iterator; + using use_instr_nodbg_iterator = + defusechain_instr_iterator; use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const { return use_instr_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -516,8 +518,8 @@ public: /// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk /// all uses of the specified register, stepping by bundle, skipping /// those marked as Debug. - typedef defusechain_instr_iterator - use_bundle_nodbg_iterator; + using use_bundle_nodbg_iterator = + defusechain_instr_iterator; use_bundle_nodbg_iterator use_bundle_nodbg_begin(unsigned RegNo) const { return use_bundle_nodbg_iterator(getRegUseDefListHead(RegNo)); } @@ -593,7 +595,6 @@ public: /// Return the register class of the specified virtual register. /// This shouldn't be used directly unless \p Reg has a register class. /// \see getRegClassOrNull when this might happen. - /// const TargetRegisterClass *getRegClass(unsigned Reg) const { assert(VRegInfo[Reg].first.is() && "Register class not set, wrong accessor"); @@ -620,7 +621,6 @@ public: /// a register bank or has been assigned a register class. /// \note It is possible to get the register bank from the register class via /// RegisterBankInfo::getRegBankFromRegClass. - /// const RegisterBank *getRegBankOrNull(unsigned Reg) const { const RegClassOrRegBank &Val = VRegInfo[Reg].first; return Val.dyn_cast(); @@ -629,26 +629,27 @@ public: /// Return the register bank or register class of \p Reg. /// \note Before the register bank gets assigned (i.e., before the /// RegBankSelect pass) \p Reg may not have either. - /// const RegClassOrRegBank &getRegClassOrRegBank(unsigned Reg) const { return VRegInfo[Reg].first; } /// setRegClass - Set the register class of the specified virtual register. - /// void setRegClass(unsigned Reg, const TargetRegisterClass *RC); /// Set the register bank to \p RegBank for \p Reg. - /// void setRegBank(unsigned Reg, const RegisterBank &RegBank); + void setRegClassOrRegBank(unsigned Reg, + const RegClassOrRegBank &RCOrRB){ + VRegInfo[Reg].first = RCOrRB; + } + /// constrainRegClass - Constrain the register class of the specified virtual /// register to be a common subclass of RC and the current register class, /// but only if the new class has at least MinNumRegs registers. Return the /// new register class, or NULL if no such class exists. /// This should only be used when the constraint is known to be trivial, like /// GR32 -> GR32_NOSP. Beware of increasing register pressure. - /// const TargetRegisterClass *constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs = 0); @@ -660,12 +661,10 @@ public: /// This method can be used after constraints have been removed from a /// virtual register, for example after removing instructions or splitting /// the live range. - /// bool recomputeRegClass(unsigned Reg); /// createVirtualRegister - Create and return a new virtual register in the /// function with the specified register class. - /// unsigned createVirtualRegister(const TargetRegisterClass *RegClass); /// Accessor for VRegToType. This accessor should only be used @@ -699,7 +698,6 @@ public: unsigned createIncompleteVirtualRegister(); /// getNumVirtRegs - Return the number of virtual registers created. - /// unsigned getNumVirtRegs() const { return VRegInfo.size(); } /// clearVirtRegs - Remove all virtual registers (after physreg assignment). @@ -805,7 +803,6 @@ public: /// /// Reserved registers may belong to an allocatable register class, but the /// target has explicitly requested that they are not used. - /// bool isReserved(unsigned PhysReg) const { return getReservedRegs().test(PhysReg); } @@ -833,8 +830,8 @@ public: // Iteration support for the live-ins set. It's kept in sorted order // by register number. - typedef std::vector>::const_iterator - livein_iterator; + using livein_iterator = + std::vector>::const_iterator; livein_iterator livein_begin() const { return LiveIns.begin(); } livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } @@ -905,10 +902,10 @@ public: } public: - typedef std::iterator::reference reference; - typedef std::iterator::pointer pointer; + using reference = std::iterator::reference; + using pointer = std::iterator::pointer; defusechain_iterator() = default; @@ -1011,10 +1008,10 @@ public: } public: - typedef std::iterator::reference reference; - typedef std::iterator::pointer pointer; + using reference = std::iterator::reference; + using pointer = std::iterator::pointer; defusechain_instr_iterator() = default; diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 6b2a16e1d36e672e1213d812644f703164079ef8..3b02ec400abac3485b43deb8828ce1116eed98c0 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -104,10 +104,15 @@ extern cl::opt ForceBottomUp; class LiveIntervals; class MachineDominatorTree; +class MachineFunction; +class MachineInstr; class MachineLoopInfo; class RegisterClassInfo; class SchedDFSResult; class ScheduleHazardRecognizer; +class TargetInstrInfo; +class TargetPassConfig; +class TargetRegisterInfo; /// MachineSchedContext provides enough context from the MachineScheduler pass /// for the target to instantiate a scheduler. @@ -129,10 +134,10 @@ struct MachineSchedContext { /// schedulers. class MachineSchedRegistry : public MachinePassRegistryNode { public: - typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *); + using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); // RegisterPassParser requires a (misnamed) FunctionPassCtor type. - typedef ScheduleDAGCtor FunctionPassCtor; + using FunctionPassCtor = ScheduleDAGCtor; static MachinePassRegistry Registry; @@ -527,7 +532,7 @@ public: unsigned size() const { return Queue.size(); } - typedef std::vector::iterator iterator; + using iterator = std::vector::iterator; iterator begin() { return Queue.begin(); } diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h index e4744fd5e260b877c35f6054ebbbcb6eef99b1a2..d991e4c216d958f61f9aeb255eb0ca68305bc14c 100644 --- a/include/llvm/CodeGen/MachineValueType.h +++ b/include/llvm/CodeGen/MachineValueType.h @@ -26,157 +26,250 @@ namespace llvm { /// Machine Value Type. Every type that is supported natively by some /// processor targeted by LLVM occurs here. This means that any legal value /// type can be represented by an MVT. -class MVT { + class MVT { public: - enum SimpleValueType : int8_t { - // Simple value types less than zero are considered extended value types. - INVALID_SIMPLE_VALUE_TYPE = -1, + enum SimpleValueType : uint8_t { + // Simple value types that aren't explicitly part of this enumeration + // are considered extended value types. + INVALID_SIMPLE_VALUE_TYPE = 0, // If you change this numbering, you must change the values in // ValueTypes.td as well! - Other = 0, // This is a non-standard value - i1 = 1, // This is a 1 bit integer value - i8 = 2, // This is an 8 bit integer value - i16 = 3, // This is a 16 bit integer value - i32 = 4, // This is a 32 bit integer value - i64 = 5, // This is a 64 bit integer value - i128 = 6, // This is a 128 bit integer value + Other = 1, // This is a non-standard value + i1 = 2, // This is a 1 bit integer value + i8 = 3, // This is an 8 bit integer value + i16 = 4, // This is a 16 bit integer value + i32 = 5, // This is a 32 bit integer value + i64 = 6, // This is a 64 bit integer value + i128 = 7, // This is a 128 bit integer value FIRST_INTEGER_VALUETYPE = i1, LAST_INTEGER_VALUETYPE = i128, - f16 = 7, // This is a 16 bit floating point value - f32 = 8, // This is a 32 bit floating point value - f64 = 9, // This is a 64 bit floating point value - f80 = 10, // This is a 80 bit floating point value - f128 = 11, // This is a 128 bit floating point value - ppcf128 = 12, // This is a PPC 128-bit floating point value + f16 = 8, // This is a 16 bit floating point value + f32 = 9, // This is a 32 bit floating point value + f64 = 10, // This is a 64 bit floating point value + f80 = 11, // This is a 80 bit floating point value + f128 = 12, // This is a 128 bit floating point value + ppcf128 = 13, // This is a PPC 128-bit floating point value FIRST_FP_VALUETYPE = f16, LAST_FP_VALUETYPE = ppcf128, - v2i1 = 13, // 2 x i1 - v4i1 = 14, // 4 x i1 - v8i1 = 15, // 8 x i1 - v16i1 = 16, // 16 x i1 - v32i1 = 17, // 32 x i1 - v64i1 = 18, // 64 x i1 - v512i1 = 19, // 512 x i1 - v1024i1 = 20, // 1024 x i1 - - v1i8 = 21, // 1 x i8 - v2i8 = 22, // 2 x i8 - v4i8 = 23, // 4 x i8 - v8i8 = 24, // 8 x i8 - v16i8 = 25, // 16 x i8 - v32i8 = 26, // 32 x i8 - v64i8 = 27, // 64 x i8 - v128i8 = 28, //128 x i8 - v256i8 = 29, //256 x i8 - - v1i16 = 30, // 1 x i16 - v2i16 = 31, // 2 x i16 - v4i16 = 32, // 4 x i16 - v8i16 = 33, // 8 x i16 - v16i16 = 34, // 16 x i16 - v32i16 = 35, // 32 x i16 - v64i16 = 36, // 64 x i16 - v128i16 = 37, //128 x i16 - - v1i32 = 38, // 1 x i32 - v2i32 = 39, // 2 x i32 - v4i32 = 40, // 4 x i32 - v8i32 = 41, // 8 x i32 - v16i32 = 42, // 16 x i32 - v32i32 = 43, // 32 x i32 - v64i32 = 44, // 64 x i32 - - v1i64 = 45, // 1 x i64 - v2i64 = 46, // 2 x i64 - v4i64 = 47, // 4 x i64 - v8i64 = 48, // 8 x i64 - v16i64 = 49, // 16 x i64 - v32i64 = 50, // 32 x i64 - - v1i128 = 51, // 1 x i128 - - FIRST_INTEGER_VECTOR_VALUETYPE = v2i1, - LAST_INTEGER_VECTOR_VALUETYPE = v1i128, - - v2f16 = 52, // 2 x f16 - v4f16 = 53, // 4 x f16 - v8f16 = 54, // 8 x f16 - v1f32 = 55, // 1 x f32 - v2f32 = 56, // 2 x f32 - v4f32 = 57, // 4 x f32 - v8f32 = 58, // 8 x f32 - v16f32 = 59, // 16 x f32 - v1f64 = 60, // 1 x f64 - v2f64 = 61, // 2 x f64 - v4f64 = 62, // 4 x f64 - v8f64 = 63, // 8 x f64 + v1i1 = 14, // 1 x i1 + v2i1 = 15, // 2 x i1 + v4i1 = 16, // 4 x i1 + v8i1 = 17, // 8 x i1 + v16i1 = 18, // 16 x i1 + v32i1 = 19, // 32 x i1 + v64i1 = 20, // 64 x i1 + v512i1 = 21, // 512 x i1 + v1024i1 = 22, // 1024 x i1 + + v1i8 = 23, // 1 x i8 + v2i8 = 24, // 2 x i8 + v4i8 = 25, // 4 x i8 + v8i8 = 26, // 8 x i8 + v16i8 = 27, // 16 x i8 + v32i8 = 28, // 32 x i8 + v64i8 = 29, // 64 x i8 + v128i8 = 30, //128 x i8 + v256i8 = 31, //256 x i8 + + v1i16 = 32, // 1 x i16 + v2i16 = 33, // 2 x i16 + v4i16 = 34, // 4 x i16 + v8i16 = 35, // 8 x i16 + v16i16 = 36, // 16 x i16 + v32i16 = 37, // 32 x i16 + v64i16 = 38, // 64 x i16 + v128i16 = 39, //128 x i16 + + v1i32 = 40, // 1 x i32 + v2i32 = 41, // 2 x i32 + v4i32 = 42, // 4 x i32 + v8i32 = 43, // 8 x i32 + v16i32 = 44, // 16 x i32 + v32i32 = 45, // 32 x i32 + v64i32 = 46, // 64 x i32 + + v1i64 = 47, // 1 x i64 + v2i64 = 48, // 2 x i64 + v4i64 = 49, // 4 x i64 + v8i64 = 50, // 8 x i64 + v16i64 = 51, // 16 x i64 + v32i64 = 52, // 32 x i64 + + v1i128 = 53, // 1 x i128 + + // Scalable integer types + nxv1i1 = 54, // n x 1 x i1 + nxv2i1 = 55, // n x 2 x i1 + nxv4i1 = 56, // n x 4 x i1 + nxv8i1 = 57, // n x 8 x i1 + nxv16i1 = 58, // n x 16 x i1 + nxv32i1 = 59, // n x 32 x i1 + + nxv1i8 = 60, // n x 1 x i8 + nxv2i8 = 61, // n x 2 x i8 + nxv4i8 = 62, // n x 4 x i8 + nxv8i8 = 63, // n x 8 x i8 + nxv16i8 = 64, // n x 16 x i8 + nxv32i8 = 65, // n x 32 x i8 + + nxv1i16 = 66, // n x 1 x i16 + nxv2i16 = 67, // n x 2 x i16 + nxv4i16 = 68, // n x 4 x i16 + nxv8i16 = 69, // n x 8 x i16 + nxv16i16 = 70, // n x 16 x i16 + nxv32i16 = 71, // n x 32 x i16 + + nxv1i32 = 72, // n x 1 x i32 + nxv2i32 = 73, // n x 2 x i32 + nxv4i32 = 74, // n x 4 x i32 + nxv8i32 = 75, // n x 8 x i32 + nxv16i32 = 76, // n x 16 x i32 + nxv32i32 = 77, // n x 32 x i32 + + nxv1i64 = 78, // n x 1 x i64 + nxv2i64 = 79, // n x 2 x i64 + nxv4i64 = 80, // n x 4 x i64 + nxv8i64 = 81, // n x 8 x i64 + nxv16i64 = 82, // n x 16 x i64 + nxv32i64 = 83, // n x 32 x i64 + + FIRST_INTEGER_VECTOR_VALUETYPE = v1i1, + LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, + + FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1, + LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, + + v2f16 = 84, // 2 x f16 + v4f16 = 85, // 4 x f16 + v8f16 = 86, // 8 x f16 + v1f32 = 87, // 1 x f32 + v2f32 = 88, // 2 x f32 + v4f32 = 89, // 4 x f32 + v8f32 = 90, // 8 x f32 + v16f32 = 91, // 16 x f32 + v1f64 = 92, // 1 x f64 + v2f64 = 93, // 2 x f64 + v4f64 = 94, // 4 x f64 + v8f64 = 95, // 8 x f64 + + nxv2f16 = 96, // n x 2 x f16 + nxv4f16 = 97, // n x 4 x f16 + nxv8f16 = 98, // n x 8 x f16 + nxv1f32 = 99, // n x 1 x f32 + nxv2f32 = 100, // n x 2 x f32 + nxv4f32 = 101, // n x 4 x f32 + nxv8f32 = 102, // n x 8 x f32 + nxv16f32 = 103, // n x 16 x f32 + nxv1f64 = 104, // n x 1 x f64 + nxv2f64 = 105, // n x 2 x f64 + nxv4f64 = 106, // n x 4 x f64 + nxv8f64 = 107, // n x 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, - LAST_FP_VECTOR_VALUETYPE = v8f64, + LAST_FP_VECTOR_VALUETYPE = nxv8f64, - FIRST_VECTOR_VALUETYPE = v2i1, - LAST_VECTOR_VALUETYPE = v8f64, + FIRST_FP_SCALABLE_VALUETYPE = nxv2f16, + LAST_FP_SCALABLE_VALUETYPE = nxv8f64, - x86mmx = 64, // This is an X86 MMX value + FIRST_VECTOR_VALUETYPE = v1i1, + LAST_VECTOR_VALUETYPE = nxv8f64, - Glue = 65, // This glues nodes together during pre-RA sched + x86mmx = 108, // This is an X86 MMX value - isVoid = 66, // This has no value + Glue = 109, // This glues nodes together during pre-RA sched - Untyped = 67, // This value takes a register, but has - // unspecified type. The register class - // will be determined by the opcode. + isVoid = 110, // This has no value - FIRST_VALUETYPE = 0, // This is always the beginning of the list. - LAST_VALUETYPE = 68, // This always remains at the end of the list. + Untyped = 111, // This value takes a register, but has + // unspecified type. The register class + // will be determined by the opcode. + + FIRST_VALUETYPE = 1, // This is always the beginning of the list. + LAST_VALUETYPE = 112, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors // This value must be a multiple of 32. - MAX_ALLOWED_VALUETYPE = 96, + MAX_ALLOWED_VALUETYPE = 128, // A value of type llvm::TokenTy - token = 120, + token = 248, // This is MDNode or MDString. - Metadata = 121, + Metadata = 249, // An int value the size of the pointer of the current // target to any address space. This must only be used internal to // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR. - iPTRAny = 122, + iPTRAny = 250, // A vector with any length and element size. This is used // for intrinsics that have overloadings based on vector types. // This is only for tblgen's consumption! - vAny = 123, + vAny = 251, // Any floating-point or vector floating-point value. This is used // for intrinsics that have overloadings based on floating-point types. // This is only for tblgen's consumption! - fAny = 124, + fAny = 252, // An integer or vector integer value of any bit width. This is // used for intrinsics that have overloadings based on integer bit widths. // This is only for tblgen's consumption! - iAny = 125, + iAny = 253, // An int value the size of the pointer of the current // target. This should only be used internal to tblgen! - iPTR = 126, + iPTR = 254, // Any type. This is used for intrinsics that have overloadings. // This is only for tblgen's consumption! - Any = 127 + Any = 255 }; SimpleValueType SimpleTy; + + // A class to represent the number of elements in a vector + // + // For fixed-length vectors, the total number of elements is equal to 'Min' + // For scalable vectors, the total number of elements is a multiple of 'Min' + class ElementCount { + public: + unsigned Min; + bool Scalable; + + ElementCount(unsigned Min, bool Scalable) + : Min(Min), Scalable(Scalable) {} + + ElementCount operator*(unsigned RHS) { + return { Min * RHS, Scalable }; + } + + ElementCount& operator*=(unsigned RHS) { + Min *= RHS; + return *this; + } + + ElementCount operator/(unsigned RHS) { + return { Min / RHS, Scalable }; + } + + ElementCount& operator/=(unsigned RHS) { + Min /= RHS; + return *this; + } + + bool operator==(const ElementCount& RHS) { + return Min == RHS.Min && Scalable == RHS.Scalable; + } + }; + constexpr MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {} constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {} @@ -221,6 +314,15 @@ class MVT { SimpleTy <= MVT::LAST_VECTOR_VALUETYPE); } + /// Return true if this is a vector value type where the + /// runtime length is machine dependent + bool isScalableVector() const { + return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE && + SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) || + (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE && + SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE)); + } + /// Return true if this is a 16-bit vector type. bool is16BitVector() const { return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 || @@ -311,6 +413,7 @@ class MVT { switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); + case v1i1: case v2i1: case v4i1: case v8i1: @@ -318,7 +421,13 @@ class MVT { case v32i1: case v64i1: case v512i1: - case v1024i1: return i1; + case v1024i1: + case nxv1i1: + case nxv2i1: + case nxv4i1: + case nxv8i1: + case nxv16i1: + case nxv32i1: return i1; case v1i8: case v2i8: case v4i8: @@ -327,7 +436,13 @@ class MVT { case v32i8: case v64i8: case v128i8: - case v256i8: return i8; + case v256i8: + case nxv1i8: + case nxv2i8: + case nxv4i8: + case nxv8i8: + case nxv16i8: + case nxv32i8: return i8; case v1i16: case v2i16: case v4i16: @@ -335,33 +450,63 @@ class MVT { case v16i16: case v32i16: case v64i16: - case v128i16: return i16; + case v128i16: + case nxv1i16: + case nxv2i16: + case nxv4i16: + case nxv8i16: + case nxv16i16: + case nxv32i16: return i16; case v1i32: case v2i32: case v4i32: case v8i32: case v16i32: case v32i32: - case v64i32: return i32; + case v64i32: + case nxv1i32: + case nxv2i32: + case nxv4i32: + case nxv8i32: + case nxv16i32: + case nxv32i32: return i32; case v1i64: case v2i64: case v4i64: case v8i64: case v16i64: - case v32i64: return i64; + case v32i64: + case nxv1i64: + case nxv2i64: + case nxv4i64: + case nxv8i64: + case nxv16i64: + case nxv32i64: return i64; case v1i128: return i128; case v2f16: case v4f16: - case v8f16: return f16; + case v8f16: + case nxv2f16: + case nxv4f16: + case nxv8f16: return f16; case v1f32: case v2f32: case v4f32: case v8f32: - case v16f32: return f32; + case v16f32: + case nxv1f32: + case nxv2f32: + case nxv4f32: + case nxv8f32: + case nxv16f32: return f32; case v1f64: case v2f64: case v4f64: - case v8f64: return f64; + case v8f64: + case nxv1f64: + case nxv2f64: + case nxv4f64: + case nxv8f64: return f64; } } @@ -382,13 +527,24 @@ class MVT { case v32i8: case v32i16: case v32i32: - case v32i64: return 32; + case v32i64: + case nxv32i1: + case nxv32i8: + case nxv32i16: + case nxv32i32: + case nxv32i64: return 32; case v16i1: case v16i8: case v16i16: case v16i32: case v16i64: - case v16f32: return 16; + case v16f32: + case nxv16i1: + case nxv16i8: + case nxv16i16: + case nxv16i32: + case nxv16i64: + case nxv16f32: return 16; case v8i1: case v8i8: case v8i16: @@ -396,7 +552,15 @@ class MVT { case v8i64: case v8f16: case v8f32: - case v8f64: return 8; + case v8f64: + case nxv8i1: + case nxv8i8: + case nxv8i16: + case nxv8i32: + case nxv8i64: + case nxv8f16: + case nxv8f32: + case nxv8f64: return 8; case v4i1: case v4i8: case v4i16: @@ -404,7 +568,15 @@ class MVT { case v4i64: case v4f16: case v4f32: - case v4f64: return 4; + case v4f64: + case nxv4i1: + case nxv4i8: + case nxv4i16: + case nxv4i32: + case nxv4i64: + case nxv4f16: + case nxv4f32: + case nxv4f64: return 4; case v2i1: case v2i8: case v2i16: @@ -412,17 +584,37 @@ class MVT { case v2i64: case v2f16: case v2f32: - case v2f64: return 2; + case v2f64: + case nxv2i1: + case nxv2i8: + case nxv2i16: + case nxv2i32: + case nxv2i64: + case nxv2f16: + case nxv2f32: + case nxv2f64: return 2; + case v1i1: case v1i8: case v1i16: case v1i32: case v1i64: case v1i128: case v1f32: - case v1f64: return 1; + case v1f64: + case nxv1i1: + case nxv1i8: + case nxv1i16: + case nxv1i32: + case nxv1i64: + case nxv1f32: + case nxv1f64: return 1; } } + MVT::ElementCount getVectorElementCount() const { + return { getVectorNumElements(), isScalableVector() }; + } + unsigned getSizeInBits() const { switch (SimpleTy) { default: @@ -442,17 +634,26 @@ class MVT { "in codegen and has no size"); case Metadata: llvm_unreachable("Value type is metadata."); - case i1 : return 1; - case v2i1: return 2; - case v4i1: return 4; + case i1: + case v1i1: + case nxv1i1: return 1; + case v2i1: + case nxv2i1: return 2; + case v4i1: + case nxv4i1: return 4; case i8 : case v1i8: - case v8i1: return 8; + case v8i1: + case nxv1i8: + case nxv8i1: return 8; case i16 : case f16: case v16i1: case v2i8: - case v1i16: return 16; + case v1i16: + case nxv16i1: + case nxv2i8: + case nxv1i16: return 16; case f32 : case i32 : case v32i1: @@ -460,7 +661,13 @@ class MVT { case v2i16: case v2f16: case v1f32: - case v1i32: return 32; + case v1i32: + case nxv32i1: + case nxv4i8: + case nxv2i16: + case nxv1i32: + case nxv2f16: + case nxv1f32: return 32; case x86mmx: case f64 : case i64 : @@ -471,7 +678,14 @@ class MVT { case v1i64: case v4f16: case v2f32: - case v1f64: return 64; + case v1f64: + case nxv8i8: + case nxv4i16: + case nxv2i32: + case nxv1i64: + case nxv4f16: + case nxv2f32: + case nxv1f64: return 64; case f80 : return 80; case f128: case ppcf128: @@ -483,29 +697,50 @@ class MVT { case v1i128: case v8f16: case v4f32: - case v2f64: return 128; + case v2f64: + case nxv16i8: + case nxv8i16: + case nxv4i32: + case nxv2i64: + case nxv8f16: + case nxv4f32: + case nxv2f64: return 128; case v32i8: case v16i16: case v8i32: case v4i64: case v8f32: - case v4f64: return 256; + case v4f64: + case nxv32i8: + case nxv16i16: + case nxv8i32: + case nxv4i64: + case nxv8f32: + case nxv4f64: return 256; case v512i1: case v64i8: case v32i16: case v16i32: case v8i64: case v16f32: - case v8f64: return 512; + case v8f64: + case nxv32i16: + case nxv16i32: + case nxv8i64: + case nxv16f32: + case nxv8f64: return 512; case v1024i1: case v128i8: case v64i16: case v32i32: - case v16i64: return 1024; + case v16i64: + case nxv32i32: + case nxv16i64: return 1024; case v256i8: case v128i16: case v64i32: - case v32i64: return 2048; + case v32i64: + case nxv32i64: return 2048; } } @@ -587,6 +822,7 @@ class MVT { default: break; case MVT::i1: + if (NumElements == 1) return MVT::v1i1; if (NumElements == 2) return MVT::v2i1; if (NumElements == 4) return MVT::v4i1; if (NumElements == 8) return MVT::v8i1; @@ -659,6 +895,84 @@ class MVT { return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); } + static MVT getScalableVectorVT(MVT VT, unsigned NumElements) { + switch(VT.SimpleTy) { + default: + break; + case MVT::i1: + if (NumElements == 1) return MVT::nxv1i1; + if (NumElements == 2) return MVT::nxv2i1; + if (NumElements == 4) return MVT::nxv4i1; + if (NumElements == 8) return MVT::nxv8i1; + if (NumElements == 16) return MVT::nxv16i1; + if (NumElements == 32) return MVT::nxv32i1; + break; + case MVT::i8: + if (NumElements == 1) return MVT::nxv1i8; + if (NumElements == 2) return MVT::nxv2i8; + if (NumElements == 4) return MVT::nxv4i8; + if (NumElements == 8) return MVT::nxv8i8; + if (NumElements == 16) return MVT::nxv16i8; + if (NumElements == 32) return MVT::nxv32i8; + break; + case MVT::i16: + if (NumElements == 1) return MVT::nxv1i16; + if (NumElements == 2) return MVT::nxv2i16; + if (NumElements == 4) return MVT::nxv4i16; + if (NumElements == 8) return MVT::nxv8i16; + if (NumElements == 16) return MVT::nxv16i16; + if (NumElements == 32) return MVT::nxv32i16; + break; + case MVT::i32: + if (NumElements == 1) return MVT::nxv1i32; + if (NumElements == 2) return MVT::nxv2i32; + if (NumElements == 4) return MVT::nxv4i32; + if (NumElements == 8) return MVT::nxv8i32; + if (NumElements == 16) return MVT::nxv16i32; + if (NumElements == 32) return MVT::nxv32i32; + break; + case MVT::i64: + if (NumElements == 1) return MVT::nxv1i64; + if (NumElements == 2) return MVT::nxv2i64; + if (NumElements == 4) return MVT::nxv4i64; + if (NumElements == 8) return MVT::nxv8i64; + if (NumElements == 16) return MVT::nxv16i64; + if (NumElements == 32) return MVT::nxv32i64; + break; + case MVT::f16: + if (NumElements == 2) return MVT::nxv2f16; + if (NumElements == 4) return MVT::nxv4f16; + if (NumElements == 8) return MVT::nxv8f16; + break; + case MVT::f32: + if (NumElements == 1) return MVT::nxv1f32; + if (NumElements == 2) return MVT::nxv2f32; + if (NumElements == 4) return MVT::nxv4f32; + if (NumElements == 8) return MVT::nxv8f32; + if (NumElements == 16) return MVT::nxv16f32; + break; + case MVT::f64: + if (NumElements == 1) return MVT::nxv1f64; + if (NumElements == 2) return MVT::nxv2f64; + if (NumElements == 4) return MVT::nxv4f64; + if (NumElements == 8) return MVT::nxv8f64; + break; + } + return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); + } + + static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) { + if (IsScalable) + return getScalableVectorVT(VT, NumElements); + return getVectorVT(VT, NumElements); + } + + static MVT getVectorVT(MVT VT, MVT::ElementCount EC) { + if (EC.Scalable) + return getScalableVectorVT(VT, EC.Min); + return getVectorVT(VT, EC.Min); + } + /// Return the value type corresponding to the specified type. This returns /// all pointers as iPTR. If HandleUnknown is true, unknown types are /// returned as Other, otherwise they are invalid. @@ -709,6 +1023,14 @@ class MVT { MVT::FIRST_FP_VECTOR_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1)); } + static mvt_range integer_scalable_vector_valuetypes() { + return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1)); + } + static mvt_range fp_scalable_vector_valuetypes() { + return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1)); + } /// @} }; diff --git a/include/llvm/CodeGen/PBQP/CostAllocator.h b/include/llvm/CodeGen/PBQP/CostAllocator.h index 02d39fe383f1f2dbf7cb275c04a553608b5f6a48..bde451ae1fccd24748e7b53e3043f4b742d0e9cf 100644 --- a/include/llvm/CodeGen/PBQP/CostAllocator.h +++ b/include/llvm/CodeGen/PBQP/CostAllocator.h @@ -1,4 +1,4 @@ -//===---------- CostAllocator.h - PBQP Cost Allocator -----------*- C++ -*-===// +//===- CostAllocator.h - PBQP Cost Allocator --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,26 +19,28 @@ #define LLVM_CODEGEN_PBQP_COSTALLOCATOR_H #include "llvm/ADT/DenseSet.h" +#include +#include #include -#include namespace llvm { namespace PBQP { -template -class ValuePool { +template class ValuePool { public: - typedef std::shared_ptr PoolRef; + using PoolRef = std::shared_ptr; private: - class PoolEntry : public std::enable_shared_from_this { public: template PoolEntry(ValuePool &Pool, ValueKeyT Value) : Pool(Pool), Value(std::move(Value)) {} + ~PoolEntry() { Pool.removeEntry(this); } - const ValueT& getValue() const { return Value; } + + const ValueT &getValue() const { return Value; } + private: ValuePool &Pool; ValueT Value; @@ -46,10 +48,10 @@ private: class PoolEntryDSInfo { public: - static inline PoolEntry* getEmptyKey() { return nullptr; } + static inline PoolEntry *getEmptyKey() { return nullptr; } - static inline PoolEntry* getTombstoneKey() { - return reinterpret_cast(static_cast(1)); + static inline PoolEntry *getTombstoneKey() { + return reinterpret_cast(static_cast(1)); } template @@ -66,8 +68,7 @@ private: } template - static - bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) { + static bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) { return C1 == C2; } @@ -83,10 +84,9 @@ private: return P1 == P2; return isEqual(P1->getValue(), P2); } - }; - typedef DenseSet EntrySetT; + using EntrySetT = DenseSet; EntrySetT EntrySet; @@ -105,28 +105,31 @@ public: } }; -template -class PoolCostAllocator { +template class PoolCostAllocator { private: - typedef ValuePool VectorCostPool; - typedef ValuePool MatrixCostPool; + using VectorCostPool = ValuePool; + using MatrixCostPool = ValuePool; + public: - typedef VectorT Vector; - typedef MatrixT Matrix; - typedef typename VectorCostPool::PoolRef VectorPtr; - typedef typename MatrixCostPool::PoolRef MatrixPtr; + using Vector = VectorT; + using Matrix = MatrixT; + using VectorPtr = typename VectorCostPool::PoolRef; + using MatrixPtr = typename MatrixCostPool::PoolRef; + + template VectorPtr getVector(VectorKeyT v) { + return VectorPool.getValue(std::move(v)); + } - template - VectorPtr getVector(VectorKeyT v) { return VectorPool.getValue(std::move(v)); } + template MatrixPtr getMatrix(MatrixKeyT m) { + return MatrixPool.getValue(std::move(m)); + } - template - MatrixPtr getMatrix(MatrixKeyT m) { return MatrixPool.getValue(std::move(m)); } private: VectorCostPool VectorPool; MatrixCostPool MatrixPool; }; -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_PBQP_COSTALLOCATOR_H diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h index 83487e6a808af5bc012260670ccc8726bc9a6eea..e94878ced10dc9a1dfe2458c8a62fb062f9361bf 100644 --- a/include/llvm/CodeGen/PBQP/Graph.h +++ b/include/llvm/CodeGen/PBQP/Graph.h @@ -1,4 +1,4 @@ -//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===// +//===- Graph.h - PBQP Graph -------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,14 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_PBQP_GRAPH_H #define LLVM_CODEGEN_PBQP_GRAPH_H #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" #include #include +#include #include -#include #include namespace llvm { @@ -28,8 +26,8 @@ namespace PBQP { class GraphBase { public: - typedef unsigned NodeId; - typedef unsigned EdgeId; + using NodeId = unsigned; + using EdgeId = unsigned; /// @brief Returns a value representing an invalid (non-existent) node. static NodeId invalidNodeId() { @@ -48,32 +46,32 @@ namespace PBQP { template class Graph : public GraphBase { private: - typedef typename SolverT::CostAllocator CostAllocator; + using CostAllocator = typename SolverT::CostAllocator; + public: - typedef typename SolverT::RawVector RawVector; - typedef typename SolverT::RawMatrix RawMatrix; - typedef typename SolverT::Vector Vector; - typedef typename SolverT::Matrix Matrix; - typedef typename CostAllocator::VectorPtr VectorPtr; - typedef typename CostAllocator::MatrixPtr MatrixPtr; - typedef typename SolverT::NodeMetadata NodeMetadata; - typedef typename SolverT::EdgeMetadata EdgeMetadata; - typedef typename SolverT::GraphMetadata GraphMetadata; + using RawVector = typename SolverT::RawVector; + using RawMatrix = typename SolverT::RawMatrix; + using Vector = typename SolverT::Vector; + using Matrix = typename SolverT::Matrix; + using VectorPtr = typename CostAllocator::VectorPtr; + using MatrixPtr = typename CostAllocator::MatrixPtr; + using NodeMetadata = typename SolverT::NodeMetadata; + using EdgeMetadata = typename SolverT::EdgeMetadata; + using GraphMetadata = typename SolverT::GraphMetadata; private: - class NodeEntry { public: - typedef std::vector AdjEdgeList; - typedef AdjEdgeList::size_type AdjEdgeIdx; - typedef AdjEdgeList::const_iterator AdjEdgeItr; + using AdjEdgeList = std::vector; + using AdjEdgeIdx = AdjEdgeList::size_type; + using AdjEdgeItr = AdjEdgeList::const_iterator; + + NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {} static AdjEdgeIdx getInvalidAdjEdgeIdx() { return std::numeric_limits::max(); } - NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {} - AdjEdgeIdx addAdjEdgeId(EdgeId EId) { AdjEdgeIdx Idx = AdjEdgeIds.size(); AdjEdgeIds.push_back(EId); @@ -96,6 +94,7 @@ namespace PBQP { VectorPtr Costs; NodeMetadata Metadata; + private: AdjEdgeList AdjEdgeIds; }; @@ -150,8 +149,10 @@ namespace PBQP { NodeId getN1Id() const { return NIds[0]; } NodeId getN2Id() const { return NIds[1]; } + MatrixPtr Costs; EdgeMetadata Metadata; + private: NodeId NIds[2]; typename NodeEntry::AdjEdgeIdx ThisEdgeAdjIdxs[2]; @@ -161,18 +162,20 @@ namespace PBQP { GraphMetadata Metadata; CostAllocator CostAlloc; - SolverT *Solver; + SolverT *Solver = nullptr; - typedef std::vector NodeVector; - typedef std::vector FreeNodeVector; + using NodeVector = std::vector; + using FreeNodeVector = std::vector; NodeVector Nodes; FreeNodeVector FreeNodeIds; - typedef std::vector EdgeVector; - typedef std::vector FreeEdgeVector; + using EdgeVector = std::vector; + using FreeEdgeVector = std::vector; EdgeVector Edges; FreeEdgeVector FreeEdgeIds; + Graph(const Graph &Other) {} + // ----- INTERNAL METHODS ----- NodeEntry &getNode(NodeId NId) { @@ -220,20 +223,18 @@ namespace PBQP { return EId; } - Graph(const Graph &Other) {} void operator=(const Graph &Other) {} public: - - typedef typename NodeEntry::AdjEdgeItr AdjEdgeItr; + using AdjEdgeItr = typename NodeEntry::AdjEdgeItr; class NodeItr { public: - typedef std::forward_iterator_tag iterator_category; - typedef NodeId value_type; - typedef int difference_type; - typedef NodeId* pointer; - typedef NodeId& reference; + using iterator_category = std::forward_iterator_tag; + using value_type = NodeId; + using difference_type = int; + using pointer = NodeId *; + using reference = NodeId &; NodeItr(NodeId CurNId, const Graph &G) : CurNId(CurNId), EndNId(G.Nodes.size()), FreeNodeIds(G.FreeNodeIds) { @@ -283,53 +284,65 @@ namespace PBQP { class NodeIdSet { public: - NodeIdSet(const Graph &G) : G(G) { } + NodeIdSet(const Graph &G) : G(G) {} + NodeItr begin() const { return NodeItr(0, G); } NodeItr end() const { return NodeItr(G.Nodes.size(), G); } + bool empty() const { return G.Nodes.empty(); } + typename NodeVector::size_type size() const { return G.Nodes.size() - G.FreeNodeIds.size(); } + private: const Graph& G; }; class EdgeIdSet { public: - EdgeIdSet(const Graph &G) : G(G) { } + EdgeIdSet(const Graph &G) : G(G) {} + EdgeItr begin() const { return EdgeItr(0, G); } EdgeItr end() const { return EdgeItr(G.Edges.size(), G); } + bool empty() const { return G.Edges.empty(); } + typename NodeVector::size_type size() const { return G.Edges.size() - G.FreeEdgeIds.size(); } + private: const Graph& G; }; class AdjEdgeIdSet { public: - AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) { } + AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) {} + typename NodeEntry::AdjEdgeItr begin() const { return NE.getAdjEdgeIds().begin(); } + typename NodeEntry::AdjEdgeItr end() const { return NE.getAdjEdgeIds().end(); } + bool empty() const { return NE.getAdjEdgeIds().empty(); } + typename NodeEntry::AdjEdgeList::size_type size() const { return NE.getAdjEdgeIds().size(); } + private: const NodeEntry &NE; }; /// @brief Construct an empty PBQP graph. - Graph() : Solver(nullptr) {} + Graph() = default; /// @brief Construct an empty PBQP graph with the given graph metadata. - Graph(GraphMetadata Metadata) - : Metadata(std::move(Metadata)), Solver(nullptr) {} + Graph(GraphMetadata Metadata) : Metadata(std::move(Metadata)) {} /// @brief Get a reference to the graph metadata. GraphMetadata& getMetadata() { return Metadata; } @@ -656,7 +669,7 @@ namespace PBQP { } }; -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm #endif // LLVM_CODEGEN_PBQP_GRAPH_HPP diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h index 278787550a43c7af2e49465caa61adc16ed071b2..ba405e816d1076438e9f226a542516f7fa835750 100644 --- a/include/llvm/CodeGen/PBQP/Math.h +++ b/include/llvm/CodeGen/PBQP/Math.h @@ -1,4 +1,4 @@ -//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===// +//===- Math.h - PBQP Vector and Matrix classes ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,20 +11,22 @@ #define LLVM_CODEGEN_PBQP_MATH_H #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include #include #include +#include namespace llvm { namespace PBQP { -typedef float PBQPNum; +using PBQPNum = float; /// \brief PBQP Vector class. class Vector { friend hash_code hash_value(const Vector &); -public: +public: /// \brief Construct a PBQP vector of the given size. explicit Vector(unsigned Length) : Length(Length), Data(llvm::make_unique(Length)) {} @@ -120,8 +122,8 @@ OStream& operator<<(OStream &OS, const Vector &V) { class Matrix { private: friend hash_code hash_value(const Matrix &); -public: +public: /// \brief Construct a PBQP Matrix with the given dimensions. Matrix(unsigned Rows, unsigned Cols) : Rows(Rows), Cols(Cols), Data(llvm::make_unique(Rows * Cols)) { @@ -253,9 +255,11 @@ OStream& operator<<(OStream &OS, const Matrix &M) { template class MDVector : public Vector { public: - MDVector(const Vector &v) : Vector(v), md(*this) { } + MDVector(const Vector &v) : Vector(v), md(*this) {} MDVector(Vector &&v) : Vector(std::move(v)), md(*this) { } + const Metadata& getMetadata() const { return md; } + private: Metadata md; }; @@ -268,9 +272,11 @@ inline hash_code hash_value(const MDVector &V) { template class MDMatrix : public Matrix { public: - MDMatrix(const Matrix &m) : Matrix(m), md(*this) { } + MDMatrix(const Matrix &m) : Matrix(m), md(*this) {} MDMatrix(Matrix &&m) : Matrix(std::move(m)), md(*this) { } + const Metadata& getMetadata() const { return md; } + private: Metadata md; }; @@ -280,7 +286,7 @@ inline hash_code hash_value(const MDMatrix &M) { return hash_value(static_cast(M)); } -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm #endif // LLVM_CODEGEN_PBQP_MATH_H diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h index d4a544bfe72144416d5defaa557a7d7db70cf5a2..8aeb5193676084137c376426eb8e42c57ef7fb28 100644 --- a/include/llvm/CodeGen/PBQP/ReductionRules.h +++ b/include/llvm/CodeGen/PBQP/ReductionRules.h @@ -1,4 +1,4 @@ -//===----------- ReductionRules.h - Reduction Rules -------------*- C++ -*-===// +//===- ReductionRules.h - Reduction Rules -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,6 +17,8 @@ #include "Graph.h" #include "Math.h" #include "Solution.h" +#include +#include namespace llvm { namespace PBQP { @@ -27,11 +29,11 @@ namespace PBQP { /// neighbor. Notify the problem domain. template void applyR1(GraphT &G, typename GraphT::NodeId NId) { - typedef typename GraphT::NodeId NodeId; - typedef typename GraphT::EdgeId EdgeId; - typedef typename GraphT::Vector Vector; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawVector RawVector; + using NodeId = typename GraphT::NodeId; + using EdgeId = typename GraphT::EdgeId; + using Vector = typename GraphT::Vector; + using Matrix = typename GraphT::Matrix; + using RawVector = typename GraphT::RawVector; assert(G.getNodeDegree(NId) == 1 && "R1 applied to node with degree != 1."); @@ -71,11 +73,11 @@ namespace PBQP { template void applyR2(GraphT &G, typename GraphT::NodeId NId) { - typedef typename GraphT::NodeId NodeId; - typedef typename GraphT::EdgeId EdgeId; - typedef typename GraphT::Vector Vector; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawMatrix RawMatrix; + using NodeId = typename GraphT::NodeId; + using EdgeId = typename GraphT::EdgeId; + using Vector = typename GraphT::Vector; + using Matrix = typename GraphT::Matrix; + using RawMatrix = typename GraphT::RawMatrix; assert(G.getNodeDegree(NId) == 2 && "R2 applied to node with degree != 2."); @@ -177,9 +179,9 @@ namespace PBQP { // state. template Solution backpropagate(GraphT& G, StackT stack) { - typedef GraphBase::NodeId NodeId; - typedef typename GraphT::Matrix Matrix; - typedef typename GraphT::RawVector RawVector; + using NodeId = GraphBase::NodeId; + using Matrix = typename GraphT::Matrix; + using RawVector = typename GraphT::RawVector; Solution s; @@ -215,7 +217,7 @@ namespace PBQP { return s; } -} // namespace PBQP -} // namespace llvm +} // end namespace PBQP +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_PBQP_REDUCTIONRULES_H diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h index d96b5eac45200de1f7e93e3f7d5de9c53b0f0706..8d5d2374679d3aa783c3dd34dbd2ac511c493ef8 100644 --- a/include/llvm/CodeGen/PBQP/Solution.h +++ b/include/llvm/CodeGen/PBQP/Solution.h @@ -26,7 +26,7 @@ namespace PBQP { /// To get the selection for each node in the problem use the getSelection method. class Solution { private: - typedef std::map SelectionsMap; + using SelectionsMap = std::map; SelectionsMap selections; unsigned r0Reductions = 0; diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h index 833b9bad613ff03f533ed9a0cec5f78cfc0f1bb6..269b7a7b3a35d5cae61f651a7c5258628beccd72 100644 --- a/include/llvm/CodeGen/PBQPRAConstraint.h +++ b/include/llvm/CodeGen/PBQPRAConstraint.h @@ -1,4 +1,4 @@ -//===-- RegAllocPBQP.h ------------------------------------------*- C++ -*-===// +//===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,22 @@ #ifndef LLVM_CODEGEN_PBQPRACONSTRAINT_H #define LLVM_CODEGEN_PBQPRACONSTRAINT_H +#include #include #include namespace llvm { + namespace PBQP { namespace RegAlloc { + // Forward declare PBQP graph class. class PBQPRAGraph; -} -} -class LiveIntervals; -class MachineBlockFrequencyInfo; -class MachineFunction; -class TargetRegisterInfo; +} // end namespace RegAlloc +} // end namespace PBQP -typedef PBQP::RegAlloc::PBQPRAGraph PBQPRAGraph; +using PBQPRAGraph = PBQP::RegAlloc::PBQPRAGraph; /// @brief Abstract base for classes implementing PBQP register allocation /// constraints (e.g. Spill-costs, interference, coalescing). @@ -40,6 +39,7 @@ class PBQPRAConstraint { public: virtual ~PBQPRAConstraint() = 0; virtual void apply(PBQPRAGraph &G) = 0; + private: virtual void anchor(); }; @@ -59,11 +59,13 @@ public: if (C) Constraints.push_back(std::move(C)); } + private: std::vector> Constraints; + void anchor() override; }; -} +} // end namespace llvm -#endif /* LLVM_CODEGEN_PBQPRACONSTRAINT_H */ +#endif // LLVM_CODEGEN_PBQPRACONSTRAINT_H diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 42299b5294108362e8e506260b7e82ef59e0df78..96cfce5b84dfe613a087773a527d17ea8839ba63 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -33,7 +33,7 @@ class raw_ostream; /// List of target independent CodeGen pass IDs. namespace llvm { - FunctionPass *createAtomicExpandPass(const TargetMachine *TM); + FunctionPass *createAtomicExpandPass(); /// createUnreachableBlockEliminationPass - The LLVM code generator does not /// work well with unreachable basic blocks (what live ranges make sense for a @@ -66,7 +66,11 @@ namespace llvm { /// createCodeGenPreparePass - Transform the code to expose more pattern /// matching during instruction selection. - FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr); + FunctionPass *createCodeGenPreparePass(); + + /// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather + /// and scatter intrinsics with scalar code when target doesn't support them. + FunctionPass *createScalarizeMaskedMemIntrinPass(); /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg /// load-linked/store-conditional loops. @@ -129,9 +133,16 @@ namespace llvm { // instruction and update the MachineFunctionInfo with that information. extern char &ShrinkWrapID; + /// LiveRangeShrink pass. Move instruction close to its definition to shrink + /// the definition's live range. + extern char &LiveRangeShrinkID; + /// Greedy register allocator. extern char &RAGreedyID; + /// Basic register allocator. + extern char &RABasicID; + /// VirtRegRewriter pass. Rewrite virtual registers to physical registers as /// assigned in VirtRegMap. extern char &VirtRegRewriterID; @@ -169,7 +180,7 @@ namespace llvm { /// PrologEpilogCodeInserter - This pass inserts prolog and epilog code, /// and eliminates abstract frame references. extern char &PrologEpilogCodeInserterID; - MachineFunctionPass *createPrologEpilogInserterPass(const TargetMachine *TM); + MachineFunctionPass *createPrologEpilogInserterPass(); /// ExpandPostRAPseudos - This pass expands pseudo instructions after /// register allocation. @@ -297,7 +308,7 @@ namespace llvm { /// createStackProtectorPass - This pass adds stack protectors to functions. /// - FunctionPass *createStackProtectorPass(const TargetMachine *TM); + FunctionPass *createStackProtectorPass(); /// createMachineVerifierPass - This pass verifies cenerated machine code /// instructions for correctness. @@ -306,11 +317,11 @@ namespace llvm { /// createDwarfEHPass - This pass mulches exception handling code into a form /// adapted to code generation. Required if using dwarf exception handling. - FunctionPass *createDwarfEHPass(const TargetMachine *TM); + FunctionPass *createDwarfEHPass(); /// createWinEHPass - Prepares personality functions used by MSVC on Windows, /// in addition to the Itanium LSDA based personalities. - FunctionPass *createWinEHPass(const TargetMachine *TM); + FunctionPass *createWinEHPass(); /// createSjLjEHPreparePass - This pass adapts exception handling code to use /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. @@ -354,12 +365,12 @@ namespace llvm { /// InterleavedAccess Pass - This pass identifies and matches interleaved /// memory accesses to target specific intrinsics. /// - FunctionPass *createInterleavedAccessPass(const TargetMachine *TM); + FunctionPass *createInterleavedAccessPass(); /// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all /// TLS variables for the emulated TLS model. /// - ModulePass *createLowerEmuTLSPass(const TargetMachine *TM); + ModulePass *createLowerEmuTLSPass(); /// This pass lowers the @llvm.load.relative intrinsic to instructions. /// This is unsafe to do earlier because a pass may combine the constant @@ -376,7 +387,7 @@ namespace llvm { /// This pass splits the stack into a safe stack and an unsafe stack to /// protect against stack-based overflow vulnerabilities. - FunctionPass *createSafeStackPass(const TargetMachine *TM = nullptr); + FunctionPass *createSafeStackPass(); /// This pass detects subregister lanes in a virtual register that are used /// independently of other lanes and splits them into separate virtual @@ -405,35 +416,10 @@ namespace llvm { /// printing assembly. ModulePass *createMachineOutlinerPass(); -} // End llvm namespace + /// This pass expands the experimental reduction intrinsics into sequences of + /// shuffles. + FunctionPass *createExpandReductionsPass(); -/// Target machine pass initializer for passes with dependencies. Use with -/// INITIALIZE_TM_PASS_END. -#define INITIALIZE_TM_PASS_BEGIN INITIALIZE_PASS_BEGIN - -/// Target machine pass initializer for passes with dependencies. Use with -/// INITIALIZE_TM_PASS_BEGIN. -#define INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis) \ - PassInfo *PI = new PassInfo( \ - name, arg, &passName::ID, \ - PassInfo::NormalCtor_t(callDefaultCtor), cfg, analysis, \ - PassInfo::TargetMachineCtor_t(callTargetMachineCtor)); \ - Registry.registerPass(*PI, true); \ - return PI; \ - } \ - static llvm::once_flag Initialize##passName##PassFlag; \ - void llvm::initialize##passName##Pass(PassRegistry &Registry) { \ - llvm::call_once(Initialize##passName##PassFlag, \ - initialize##passName##PassOnce, std::ref(Registry)); \ - } - -/// This initializer registers TargetMachine constructor, so the pass being -/// initialized can use target dependent interfaces. Please do not move this -/// macro to be together with INITIALIZE_PASS, which is a complete target -/// independent initializer, and we don't want to make libScalarOpts depend -/// on libCodeGen. -#define INITIALIZE_TM_PASS(passName, arg, name, cfg, analysis) \ - INITIALIZE_TM_PASS_BEGIN(passName, arg, name, cfg, analysis) \ - INITIALIZE_TM_PASS_END(passName, arg, name, cfg, analysis) +} // End llvm namespace #endif diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h index 8872a5dc54a1d66686e672a0499cce2c2325eee9..5b342863eb50de45ab445c61767298aaa0f4dbf1 100644 --- a/include/llvm/CodeGen/RegAllocPBQP.h +++ b/include/llvm/CodeGen/RegAllocPBQP.h @@ -130,10 +130,10 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) { /// \brief Holds graph-level metadata relevant to PBQP RA problems. class GraphMetadata { private: - typedef ValuePool AllowedRegVecPool; + using AllowedRegVecPool = ValuePool; public: - typedef AllowedRegVecPool::PoolRef AllowedRegVecRef; + using AllowedRegVecRef = AllowedRegVecPool::PoolRef; GraphMetadata(MachineFunction &MF, LiveIntervals &LIS, @@ -167,17 +167,17 @@ private: /// \brief Holds solver state and other metadata relevant to each PBQP RA node. class NodeMetadata { public: - typedef RegAlloc::AllowedRegVector AllowedRegVector; + using AllowedRegVector = RegAlloc::AllowedRegVector; // The node's reduction state. The order in this enum is important, // as it is assumed nodes can only progress up (i.e. towards being // optimally reducible) when reducing the graph. - typedef enum { + using ReductionState = enum { Unprocessed, NotProvablyAllocatable, ConservativelyAllocatable, OptimallyReducible - } ReductionState; + }; NodeMetadata() = default; @@ -267,23 +267,23 @@ private: class RegAllocSolverImpl { private: - typedef MDMatrix RAMatrix; + using RAMatrix = MDMatrix; public: - typedef PBQP::Vector RawVector; - typedef PBQP::Matrix RawMatrix; - typedef PBQP::Vector Vector; - typedef RAMatrix Matrix; - typedef PBQP::PoolCostAllocator CostAllocator; + using RawVector = PBQP::Vector; + using RawMatrix = PBQP::Matrix; + using Vector = PBQP::Vector; + using Matrix = RAMatrix; + using CostAllocator = PBQP::PoolCostAllocator; - typedef GraphBase::NodeId NodeId; - typedef GraphBase::EdgeId EdgeId; + using NodeId = GraphBase::NodeId; + using EdgeId = GraphBase::EdgeId; - typedef RegAlloc::NodeMetadata NodeMetadata; - struct EdgeMetadata { }; - typedef RegAlloc::GraphMetadata GraphMetadata; + using NodeMetadata = RegAlloc::NodeMetadata; + struct EdgeMetadata {}; + using GraphMetadata = RegAlloc::GraphMetadata; - typedef PBQP::Graph Graph; + using Graph = PBQP::Graph; RegAllocSolverImpl(Graph &G) : G(G) {} @@ -426,7 +426,7 @@ private: std::vector reduce() { assert(!G.empty() && "Cannot reduce empty graph."); - typedef GraphBase::NodeId NodeId; + using NodeId = GraphBase::NodeId; std::vector NodeStack; // Consume worklists. @@ -459,7 +459,6 @@ private: ConservativelyAllocatableNodes.erase(NItr); NodeStack.push_back(NId); G.disconnectAllNeighborsFromNode(NId); - } else if (!NotProvablyAllocatableNodes.empty()) { NodeSet::iterator NItr = std::min_element(NotProvablyAllocatableNodes.begin(), @@ -493,7 +492,7 @@ private: }; Graph& G; - typedef std::set NodeSet; + using NodeSet = std::set; NodeSet OptimallyReducibleNodes; NodeSet ConservativelyAllocatableNodes; NodeSet NotProvablyAllocatableNodes; @@ -501,7 +500,7 @@ private: class PBQPRAGraph : public PBQP::Graph { private: - typedef PBQP::Graph BaseT; + using BaseT = PBQP::Graph; public: PBQPRAGraph(GraphMetadata Metadata) : BaseT(std::move(Metadata)) {} diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h index 5c7e9999cc9a39112a0258427d6ba664d68ac412..481747dc163eea6a997e9b6e82b62580fc429c33 100644 --- a/include/llvm/CodeGen/RegAllocRegistry.h +++ b/include/llvm/CodeGen/RegAllocRegistry.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/RegAllocRegistry.h -------------------------*- C++ -*-===// +//===- llvm/CodeGen/RegAllocRegistry.h --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,16 +19,16 @@ namespace llvm { +class FunctionPass; + //===----------------------------------------------------------------------===// /// /// RegisterRegAlloc class - Track the registration of register allocators. /// //===----------------------------------------------------------------------===// class RegisterRegAlloc : public MachinePassRegistryNode { - public: - - typedef FunctionPass *(*FunctionPassCtor)(); + using FunctionPassCtor = FunctionPass *(*)(); static MachinePassRegistry Registry; @@ -36,22 +36,26 @@ public: : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { Registry.Add(this); } + ~RegisterRegAlloc() { Registry.Remove(this); } // Accessors. - // RegisterRegAlloc *getNext() const { return (RegisterRegAlloc *)MachinePassRegistryNode::getNext(); } + static RegisterRegAlloc *getList() { return (RegisterRegAlloc *)Registry.getList(); } + static FunctionPassCtor getDefault() { return (FunctionPassCtor)Registry.getDefault(); } + static void setDefault(FunctionPassCtor C) { Registry.setDefault((MachinePassCtor)C); } + static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } @@ -59,5 +63,4 @@ public: } // end namespace llvm - -#endif +#endif // LLVM_CODEGEN_REGALLOCREGISTRY_H diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index a3ea41d5236e3be47d39ba9d2d82b26617552cf6..e997aaf269e31fd412273ed47b0aba434c49fd0d 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -32,7 +32,9 @@ namespace llvm { class LiveIntervals; +class MachineFunction; class MachineInstr; +class MachineRegisterInfo; class RegisterClassInfo; struct RegisterMaskPair { @@ -147,12 +149,14 @@ class PressureDiff { PressureChange PressureChanges[MaxPSets]; - typedef PressureChange* iterator; + using iterator = PressureChange *; + iterator nonconst_begin() { return &PressureChanges[0]; } iterator nonconst_end() { return &PressureChanges[MaxPSets]; } public: - typedef const PressureChange* const_iterator; + using const_iterator = const PressureChange *; + const_iterator begin() const { return &PressureChanges[0]; } const_iterator end() const { return &PressureChanges[MaxPSets]; } @@ -269,7 +273,7 @@ private: } }; - typedef SparseSet RegSet; + using RegSet = SparseSet; RegSet Regs; unsigned NumRegUnits; diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 1f939e72e1396bf9f473ecc08be019fa47b14008..489c72b81a9854dddce4eda1d3d3ff56eac21048 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -156,12 +156,24 @@ public: /// available and do the appropriate bookkeeping. SPAdj is the stack /// adjustment due to call frame, it's passed along to eliminateFrameIndex(). /// Returns the scavenged register. + /// This is deprecated as it depends on the quality of the kill flags being + /// present; Use scavengeRegisterBackwards() instead! unsigned scavengeRegister(const TargetRegisterClass *RegClass, MachineBasicBlock::iterator I, int SPAdj); unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj) { return scavengeRegister(RegClass, MBBI, SPAdj); } + /// Make a register of the specific register class available from the current + /// position backwards to the place before \p To. If \p RestoreAfter is true + /// this includes the instruction following the current position. + /// SPAdj is the stack adjustment due to call frame, it's passed along to + /// eliminateFrameIndex(). + /// Returns the scavenged register. + unsigned scavengeRegisterBackwards(const TargetRegisterClass &RC, + MachineBasicBlock::iterator To, + bool RestoreAfter, int SPAdj); + /// Tell the scavenger a register is used. void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); @@ -202,8 +214,18 @@ private: /// Mark live-in registers of basic block as used. void setLiveInsUsed(const MachineBasicBlock &MBB); + + /// Spill a register after position \p After and reload it before position + /// \p UseMI. + ScavengedInfo &spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + MachineBasicBlock::iterator After, + MachineBasicBlock::iterator &UseMI); }; +/// Replaces all frame index virtual registers with physical registers. Uses the +/// register scavenger to find an appropriate register to use. +void scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS); + } // end namespace llvm #endif // LLVM_CODEGEN_REGISTERSCAVENGING_H diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h index 3f88032cb6385a516951168a19c9365592ec6749..0a04bc6a89f4dbd90639577a578d7fb22544fddf 100644 --- a/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/include/llvm/CodeGen/RegisterUsageInfo.h @@ -1,4 +1,4 @@ -//==- RegisterUsageInfo.h - Register Usage Informartion Storage -*- C++ -*-===// +//==- RegisterUsageInfo.h - Register Usage Informartion Storage --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -20,15 +20,15 @@ #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H #include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" +#include +#include namespace llvm { +class Function; +class TargetMachine; + class PhysicalRegisterUsageInfo : public ImmutablePass { virtual void anchor(); @@ -70,6 +70,7 @@ private: const TargetMachine *TM; }; -} -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index ddfabb0c44d633072d8c8c8cecdfcdd097ffdd2e..8c3aacaa8efc19017d18088d3192fbc5c7453bbf 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -333,12 +333,12 @@ namespace RTLIB { MEMSET, MEMMOVE, - // ELEMENT-WISE ATOMIC MEMORY - MEMCPY_ELEMENT_ATOMIC_1, - MEMCPY_ELEMENT_ATOMIC_2, - MEMCPY_ELEMENT_ATOMIC_4, - MEMCPY_ELEMENT_ATOMIC_8, - MEMCPY_ELEMENT_ATOMIC_16, + // ELEMENT-WISE UNORDERED-ATOMIC MEMORY of different element sizes + MEMCPY_ELEMENT_UNORDERED_ATOMIC_1, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_2, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_4, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, + MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, // EXCEPTION HANDLING UNWIND_RESUME, @@ -511,9 +511,10 @@ namespace RTLIB { /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); - /// getMEMCPY_ELEMENT_ATOMIC - Return MEMCPY_ELEMENT_ATOMIC_* value for the - /// given element size or UNKNOW_LIBCALL if there is none. - Libcall getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize); + /// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); } } diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 99afd8c5c9ab9ba9010639a0dd986352d8885ec3..4d72eda5c71ae4572e49f959c2c84121477d02ea 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -18,9 +18,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" @@ -52,14 +52,14 @@ class TargetRegisterInfo; /// These are the different kinds of scheduling dependencies. enum Kind { Data, ///< Regular data dependence (aka true-dependence). - Anti, ///< A register anti-dependedence (aka WAR). + Anti, ///< A register anti-dependence (aka WAR). Output, ///< A register output-dependence (aka WAW). Order ///< Any other ordering dependency. }; // Strong dependencies must be respected by the scheduler. Artificial // dependencies may be removed only if they are redundant with another - // strong depedence. + // strong dependence. // // Weak dependencies may be violated by the scheduling strategy, but only if // the strategy can prove it is correct to do so. @@ -342,7 +342,7 @@ class TargetRegisterInfo; /// BoundaryNodes can have DAG edges, including Data edges, but they do not /// correspond to schedulable entities (e.g. instructions) and do not have a /// valid ID. Consequently, always check for boundary nodes before accessing - /// an assoicative data structure keyed on node ID. + /// an associative data structure keyed on node ID. bool isBoundaryNode() const { return NodeNum == BoundaryID; } /// Assigns the representative SDNode for this SUnit. This may be used diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 21e1740aa6b88f4f365b89eb47cbfc0311e82fa7..218e22e4023499984a4bf0117b2594f3a8bd61c6 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -1,4 +1,4 @@ -//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// +//===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,21 +15,38 @@ #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H -#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/Compiler.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Target/TargetRegisterInfo.h" +#include +#include #include +#include +#include namespace llvm { + + class LiveIntervals; class MachineFrameInfo; + class MachineFunction; + class MachineInstr; class MachineLoopInfo; - class MachineDominatorTree; - class RegPressureTracker; + class MachineOperand; + struct MCSchedClassDesc; class PressureDiffs; + class PseudoSourceValue; + class RegPressureTracker; + class UndefValue; + class Value; /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { @@ -69,31 +86,34 @@ namespace llvm { /// Use a SparseMultiSet to track physical registers. Storage is only /// allocated once for the pass. It can be cleared in constant time and reused /// without any frees. - typedef SparseMultiSet, uint16_t> - Reg2SUnitsMap; + using Reg2SUnitsMap = + SparseMultiSet, uint16_t>; /// Use SparseSet as a SparseMap by relying on the fact that it never /// compares ValueT's, only unsigned keys. This allows the set to be cleared /// between scheduling regions in constant time as long as ValueT does not /// require a destructor. - typedef SparseSet VReg2SUnitMap; + using VReg2SUnitMap = SparseSet; /// Track local uses of virtual registers. These uses are gathered by the DAG /// builder and may be consulted by the scheduler to avoid iterating an entire /// vreg use list. - typedef SparseMultiSet VReg2SUnitMultiMap; + using VReg2SUnitMultiMap = SparseMultiSet; + + using VReg2SUnitOperIdxMultiMap = + SparseMultiSet; - typedef SparseMultiSet - VReg2SUnitOperIdxMultiMap; + using ValueType = PointerUnion; - typedef PointerUnion ValueType; struct UnderlyingObject : PointerIntPair { UnderlyingObject(ValueType V, bool MayAlias) : PointerIntPair(V, MayAlias) {} + ValueType getValue() const { return getPointer(); } bool mayAlias() const { return getInt(); } }; - typedef SmallVector UnderlyingObjectsVector; + + using UnderlyingObjectsVector = SmallVector; /// A ScheduleDAG for scheduling lists of MachineInstr. class ScheduleDAGInstrs : public ScheduleDAG { @@ -113,10 +133,10 @@ namespace llvm { /// reordering. A specialized scheduler can override /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate /// it has taken responsibility for scheduling the terminator correctly. - bool CanHandleTerminators; + bool CanHandleTerminators = false; /// Whether lane masks should get tracked. - bool TrackLaneMasks; + bool TrackLaneMasks = false; // State specific to the current scheduling region. // ------------------------------------------------ @@ -154,12 +174,12 @@ namespace llvm { /// Tracks the last instructions in this region using each virtual register. VReg2SUnitOperIdxMultiMap CurrentVRegUses; - AliasAnalysis *AAForDep; + AliasAnalysis *AAForDep = nullptr; /// Remember a generic side-effecting instruction as we proceed. /// No other SU ever gets scheduled around it (except in the special /// case of a huge region that gets reduced). - SUnit *BarrierChain; + SUnit *BarrierChain = nullptr; public: /// A list of SUnits, used in Value2SUsMap, during DAG construction. @@ -167,7 +187,7 @@ namespace llvm { /// implementation of this data structure, such as a singly linked list /// with a memory pool (SmallVector was tried but slow and SparseSet is not /// applicable). - typedef std::list SUList; + using SUList = std::list; protected: /// \brief A map from ValueType to SUList, used during DAG construction, as @@ -215,23 +235,23 @@ namespace llvm { /// For an unanalyzable memory access, this Value is used in maps. UndefValue *UnknownValue; - typedef std::vector> - DbgValueVector; + using DbgValueVector = + std::vector>; /// Remember instruction that precedes DBG_VALUE. /// These are generated by buildSchedGraph but persist so they can be /// referenced when emitting the final schedule. DbgValueVector DbgValues; - MachineInstr *FirstDbgValue; + MachineInstr *FirstDbgValue = nullptr; /// Set of live physical registers for updating kill flags. - BitVector LiveRegs; + LivePhysRegs LiveRegs; public: explicit ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags = false); - ~ScheduleDAGInstrs() override {} + ~ScheduleDAGInstrs() override = default; /// Gets the machine model for instruction scheduling. const TargetSchedModel *getSchedModel() const { return &SchedModel; } @@ -311,7 +331,7 @@ namespace llvm { std::string getDAGName() const override; /// Fixes register kill flags that scheduling has made invalid. - void fixupKills(MachineBasicBlock *MBB); + void fixupKills(MachineBasicBlock &MBB); protected: void initSUnits(); @@ -353,6 +373,7 @@ namespace llvm { return nullptr; return I->second; } + } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h index c2013661cfff8d89b5ff163a7041961eb3964d17..d6a8c791392cadd320c77636fcc2ee824da71a67 100644 --- a/include/llvm/CodeGen/ScheduleDFS.h +++ b/include/llvm/CodeGen/ScheduleDFS.h @@ -17,9 +17,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include #include #include +#include namespace llvm { diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h index a7a6227664dee922cb37e96aa73e171d3d492783..badf927d0e95618b7f5c091649f01f24e0893045 100644 --- a/include/llvm/CodeGen/SchedulerRegistry.h +++ b/include/llvm/CodeGen/SchedulerRegistry.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/SchedulerRegistry.h ------------------------*- C++ -*-===// +//===- llvm/CodeGen/SchedulerRegistry.h -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,7 +16,7 @@ #define LLVM_CODEGEN_SCHEDULERREGISTRY_H #include "llvm/CodeGen/MachinePassRegistry.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CodeGen.h" namespace llvm { @@ -26,15 +26,13 @@ namespace llvm { /// //===----------------------------------------------------------------------===// -class SelectionDAGISel; class ScheduleDAGSDNodes; -class SelectionDAG; -class MachineBasicBlock; +class SelectionDAGISel; class RegisterScheduler : public MachinePassRegistryNode { public: - typedef ScheduleDAGSDNodes *(*FunctionPassCtor)(SelectionDAGISel*, - CodeGenOpt::Level); + using FunctionPassCtor = ScheduleDAGSDNodes *(*)(SelectionDAGISel*, + CodeGenOpt::Level); static MachinePassRegistry Registry; @@ -45,13 +43,14 @@ public: // Accessors. - // RegisterScheduler *getNext() const { return (RegisterScheduler *)MachinePassRegistryNode::getNext(); } + static RegisterScheduler *getList() { return (RegisterScheduler *)Registry.getList(); } + static void setListener(MachinePassRegistryListener *L) { Registry.setListener(L); } @@ -103,4 +102,4 @@ ScheduleDAGSDNodes *createDAGLinearizer(SelectionDAGISel *IS, } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SCHEDULERREGISTRY_H diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 6f0509543e7d88d3204ca9e28348930c6a20b8dd..f3f3003b7e20ac2cb6cdba6715b30d1e3446550e 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ---------*- C++ -*-===// +//===- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,34 +15,72 @@ #ifndef LLVM_CODEGEN_SELECTIONDAG_H #define LLVM_CODEGEN_SELECTIONDAG_H +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Target/TargetMachine.h" +#include #include +#include +#include #include #include +#include +#include #include namespace llvm { +class BlockAddress; +class Constant; +class ConstantFP; +class ConstantInt; +class DataLayout; +struct fltSemantics; +class GlobalValue; +struct KnownBits; +class LLVMContext; +class MachineBasicBlock; class MachineConstantPoolValue; -class MachineFunction; -class MDNode; +class MCSymbol; class OptimizationRemarkEmitter; class SDDbgValue; -class TargetLowering; +class SelectionDAG; class SelectionDAGTargetInfo; +class TargetLowering; +class TargetMachine; +class TargetSubtargetInfo; +class Value; class SDVTListNode : public FoldingSetNode { friend struct FoldingSetTrait; + /// A reference to an Interned FoldingSetNodeID for this node. /// The Allocator in SelectionDAG holds the data. /// SDVTList contains all types which are frequently accessed in SelectionDAG. @@ -54,11 +92,13 @@ class SDVTListNode : public FoldingSetNode { /// The hash value for SDVTList is fixed, so cache it to avoid /// hash calculation. unsigned HashValue; + public: SDVTListNode(const FoldingSetNodeIDRef ID, const EVT *VT, unsigned int Num) : FastID(ID), VTs(VT), NumVTs(Num) { HashValue = ID.ComputeHash(); } + SDVTList getSDVTList() { SDVTList result = {VTs, NumVTs}; return result; @@ -71,12 +111,14 @@ template<> struct FoldingSetTrait : DefaultFoldingSetTrait DbgValues; SmallVector ByvalParmDbgValues; - typedef DenseMap > DbgValMapType; + using DbgValMapType = DenseMap>; DbgValMapType DbgValMap; - void operator=(const SDDbgInfo&) = delete; - SDDbgInfo(const SDDbgInfo&) = delete; public: - SDDbgInfo() {} + SDDbgInfo() = default; + SDDbgInfo(const SDDbgInfo &) = delete; + SDDbgInfo &operator=(const SDDbgInfo &) = delete; void add(SDDbgValue *V, const SDNode *Node, bool isParameter) { if (isParameter) { @@ -143,14 +185,14 @@ public: return ArrayRef(); } - typedef SmallVectorImpl::iterator DbgIterator; + using DbgIterator = SmallVectorImpl::iterator; + DbgIterator DbgBegin() { return DbgValues.begin(); } DbgIterator DbgEnd() { return DbgValues.end(); } DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); } DbgIterator ByvalParmDbgEnd() { return ByvalParmDbgValues.end(); } }; -class SelectionDAG; void checkForCycles(const SelectionDAG *DAG, bool force = false); /// This is used to represent a portion of an LLVM function in a low-level @@ -166,8 +208,8 @@ void checkForCycles(const SelectionDAG *DAG, bool force = false); /// class SelectionDAG { const TargetMachine &TM; - const SelectionDAGTargetInfo *TSI; - const TargetLowering *TLI; + const SelectionDAGTargetInfo *TSI = nullptr; + const TargetLowering *TLI = nullptr; MachineFunction *MF; LLVMContext *Context; CodeGenOpt::Level OptLevel; @@ -187,9 +229,9 @@ class SelectionDAG { /// The AllocatorType for allocating SDNodes. We use /// pool allocation with recycling. - typedef RecyclingAllocator - NodeAllocatorType; + using NodeAllocatorType = RecyclingAllocator; /// Pool allocation for nodes. NodeAllocatorType NodeAllocator; @@ -242,9 +284,11 @@ public: struct DAGNodeDeletedListener : public DAGUpdateListener { std::function Callback; + DAGNodeDeletedListener(SelectionDAG &DAG, std::function Callback) : DAGUpdateListener(DAG), Callback(std::move(Callback)) {} + void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); } }; @@ -253,7 +297,7 @@ public: /// have legal types. This is important after type legalization since /// any illegally typed nodes generated after this point will not experience /// type legalization. - bool NewNodesMustHaveLegalTypes; + bool NewNodesMustHaveLegalTypes = false; private: /// DAGUpdateListener is a friend so it can manipulate the listener stack. @@ -261,7 +305,7 @@ private: /// Linked list of registered DAGUpdateListener instances. /// This stack is maintained by DAGUpdateListener RAII. - DAGUpdateListener *UpdateListeners; + DAGUpdateListener *UpdateListeners = nullptr; /// Implementation of setSubgraphColor. /// Return whether we had to truncate the search. @@ -315,11 +359,10 @@ private: Node->OperandList = nullptr; } - void operator=(const SelectionDAG&) = delete; - SelectionDAG(const SelectionDAG&) = delete; - public: - explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level); + explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level); + SelectionDAG(const SelectionDAG &) = delete; + SelectionDAG &operator=(const SelectionDAG &) = delete; ~SelectionDAG(); /// Prepare this SelectionDAG to process code in the given MachineFunction. @@ -363,12 +406,16 @@ public: /// Convenience for setting subgraph color attribute. void setSubgraphColor(SDNode *N, const char *Color); - typedef ilist::const_iterator allnodes_const_iterator; + using allnodes_const_iterator = ilist::const_iterator; + allnodes_const_iterator allnodes_begin() const { return AllNodes.begin(); } allnodes_const_iterator allnodes_end() const { return AllNodes.end(); } - typedef ilist::iterator allnodes_iterator; + + using allnodes_iterator = ilist::iterator; + allnodes_iterator allnodes_begin() { return AllNodes.begin(); } allnodes_iterator allnodes_end() { return AllNodes.end(); } + ilist::size_type allnodes_size() const { return AllNodes.size(); } @@ -405,7 +452,7 @@ public: /// certain types of nodes together, or eliminating superfluous nodes. The /// Level argument controls whether Combine is allowed to produce nodes and /// types that are illegal on the target. - void Combine(CombineLevel Level, AliasAnalysis &AA, + void Combine(CombineLevel Level, AliasAnalysis *AA, CodeGenOpt::Level OptLevel); /// This transforms the SelectionDAG into a SelectionDAG that @@ -474,7 +521,6 @@ public: //===--------------------------------------------------------------------===// // Node creation methods. - // /// \brief Create a ConstantSDNode wrapping a constant value. /// If VT is a vector type, the constant is splatted into a BUILD_VECTOR. @@ -654,6 +700,15 @@ public: return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); } + /// Return an ISD::BUILD_VECTOR node. The number of elements in VT, + /// which must be a vector type, must match the number of operands in Ops. + /// The operands must have the same type as (or, for integers, a type wider + /// than) VT's element type. + SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef Ops) { + // VerifySDNode (via InsertNode) checks BUILD_VECTOR later. + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + /// Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all /// elements. VT must be a vector type. Op's type must be the same as (or, /// for integers, a type wider than) VT's element type. @@ -678,6 +733,10 @@ public: /// Example: shuffle A, B, <0,5,2,7> -> shuffle B, A, <4,1,6,3> SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV); + /// Convert Op, which must be of float type, to the + /// float type VT, by either extending or rounding (by truncation). + SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT); + /// Convert Op, which must be of integer type, to the /// integer type VT, by either any-extending or truncating it. SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT); @@ -723,11 +782,15 @@ public: /// \brief Create a logical NOT operation as (XOR Val, BooleanOne). SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT); - /// Return a new CALLSEQ_START node, which always must have a glue result - /// (to ensure it's not CSE'd). CALLSEQ_START does not have a useful SDLoc. - SDValue getCALLSEQ_START(SDValue Chain, SDValue Op, const SDLoc &DL) { + /// Return a new CALLSEQ_START node, that starts new call frame, in which + /// InSize bytes are set up inside CALLSEQ_START..CALLSEQ_END sequence and + /// OutSize specifies part of the frame set up prior to the sequence. + SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, + const SDLoc &DL) { SDVTList VTs = getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, Op }; + SDValue Ops[] = { Chain, + getIntPtrConstant(InSize, DL, true), + getIntPtrConstant(OutSize, DL, true) }; return getNode(ISD::CALLSEQ_START, DL, VTs, Ops); } @@ -764,7 +827,7 @@ public: SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef Ops); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef Ops, const SDNodeFlags *Flags = nullptr); + ArrayRef Ops, const SDNodeFlags Flags = SDNodeFlags()); SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef ResultTys, ArrayRef Ops); SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, @@ -772,9 +835,10 @@ public: // Specialize based on number of operands. SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT); - SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N); + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N, + const SDNodeFlags Flags = SDNodeFlags()); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2, const SDNodeFlags *Flags = nullptr); + SDValue N2, const SDNodeFlags Flags = SDNodeFlags()); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, SDValue N3); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, @@ -968,7 +1032,7 @@ public: bool IsExpanding = false); SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, - MachineMemOperand *MMO, bool IsTruncating = false, + MachineMemOperand *MMO, bool IsTruncating = false, bool IsCompressing = false); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO); @@ -1051,6 +1115,11 @@ public: SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef Ops); + /// Mutate the specified strict FP node to its non-strict equivalent, + /// unlinking the node from its chain and dropping the metadata arguments. + /// The node must be a strict FP node. + SDNode *mutateStrictFPToFP(SDNode *Node); + /// These are used for target selectors to create a new node /// with specified return type(s), MachineInstr opcode, and operands. /// @@ -1094,7 +1163,7 @@ public: /// Get the specified node if it's already available, or else return NULL. SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs, ArrayRef Ops, - const SDNodeFlags *Flags = nullptr); + const SDNodeFlags Flags = SDNodeFlags()); /// Creates a SDDbgValue node. SDDbgValue *getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, @@ -1148,6 +1217,12 @@ public: void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num); + /// If an existing load has uses of its chain, create a token factor node with + /// that chain and the new memory node's chain and update users of the old + /// chain to the token factor. This ensures that the new memory node will have + /// the same relative memory dependency position as the old load. + void makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New); + /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their /// topological order. Returns the number of nodes. @@ -1160,39 +1235,6 @@ public: AllNodes.insert(Position, AllNodes.remove(N)); } - /// Returns true if the opcode is a commutative binary operation. - static bool isCommutativeBinOp(unsigned Opcode) { - // FIXME: This should get its info from the td file, so that we can include - // target info. - switch (Opcode) { - case ISD::ADD: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: - case ISD::MUL: - case ISD::MULHU: - case ISD::MULHS: - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: - case ISD::FADD: - case ISD::FMUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::SADDO: - case ISD::UADDO: - case ISD::ADDC: - case ISD::ADDE: - case ISD::FMINNUM: - case ISD::FMAXNUM: - case ISD::FMINNAN: - case ISD::FMAXNAN: - return true; - default: return false; - } - } - /// Returns an APFloat semantics tag appropriate for the given type. If VT is /// a vector type, the element semantics are returned. static const fltSemantics &EVTToAPFloatSemantics(EVT VT) { @@ -1227,9 +1269,11 @@ public: SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); } SDDbgInfo::DbgIterator DbgEnd() { return DbgInfo->DbgEnd(); } + SDDbgInfo::DbgIterator ByvalParmDbgBegin() { return DbgInfo->ByvalParmDbgBegin(); } + SDDbgInfo::DbgIterator ByvalParmDbgEnd() { return DbgInfo->ByvalParmDbgEnd(); } @@ -1257,7 +1301,7 @@ public: SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef Ops, - const SDNodeFlags *Flags = nullptr); + const SDNodeFlags Flags = SDNodeFlags()); /// Constant fold a setcc to true or false. SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, @@ -1274,21 +1318,19 @@ public: const; /// Determine which bits of Op are known to be either zero or one and return - /// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are - /// those that are shared by every vector element. + /// them in Known. For vectors, the known bits are those that are shared by + /// every vector element. /// Targets can implement the computeKnownBitsForTargetNode method in the /// TargetLowering class to allow target nodes to be understood. - void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, - unsigned Depth = 0) const; + void computeKnownBits(SDValue Op, KnownBits &Known, unsigned Depth = 0) const; /// Determine which bits of Op are known to be either zero or one and return - /// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows - /// us to only collect the known bits that are shared by the requested vector - /// elements. + /// them in Known. The DemandedElts argument allows us to only collect the + /// known bits that are shared by the requested vector elements. /// Targets can implement the computeKnownBitsForTargetNode method in the /// TargetLowering class to allow target nodes to be understood. - void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, - const APInt &DemandedElts, unsigned Depth = 0) const; + void computeKnownBits(SDValue Op, KnownBits &Known, const APInt &DemandedElts, + unsigned Depth = 0) const; /// Used to represent the possible overflow behavior of an operation. /// Never: the operation cannot overflow. @@ -1430,10 +1472,6 @@ private: void allnodes_clear(); - SDNode *GetBinarySDNode(unsigned Opcode, const SDLoc &DL, SDVTList VTs, - SDValue N1, SDValue N2, - const SDNodeFlags *Flags = nullptr); - /// Look up the node specified by ID in CSEMap. If it exists, return it. If /// not, return the insertion token that will make insertion faster. This /// overload is for nodes other than Constant or ConstantFP, use the other one @@ -1461,10 +1499,12 @@ private: }; template <> struct GraphTraits : public GraphTraits { - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; + static nodes_iterator nodes_begin(SelectionDAG *G) { return nodes_iterator(G->allnodes_begin()); } + static nodes_iterator nodes_end(SelectionDAG *G) { return nodes_iterator(G->allnodes_end()); } @@ -1475,7 +1515,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, ArrayRef Ops, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO) { - /// Compose node ID and try to find an existing node. FoldingSetNodeID ID; unsigned Opcode = @@ -1506,6 +1545,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, return SDValue(N, 0); } -} // end namespace llvm +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_SELECTIONDAG_H diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 81cc0b39cf873cf4bda066cf029571e9d213ea53..0cd26d35a4829baeb2e480743d95c50765dd6f30 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -24,11 +24,11 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" @@ -37,6 +37,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" @@ -53,14 +54,18 @@ namespace llvm { -class SelectionDAG; +class APInt; +class Constant; +template struct DenseMapInfo; class GlobalValue; class MachineBasicBlock; class MachineConstantPoolValue; +class MCSymbol; +class raw_ostream; class SDNode; +class SelectionDAG; +class Type; class Value; -class MCSymbol; -template struct DenseMapInfo; void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, bool force = false); @@ -229,13 +234,15 @@ template <> struct isPodLike { static const bool value = true; }; /// Allow casting operators to work directly on /// SDValues as if they were SDNode*'s. template<> struct simplify_type { - typedef SDNode* SimpleType; + using SimpleType = SDNode *; + static SimpleType getSimplifiedValue(SDValue &Val) { return Val.getNode(); } }; template<> struct simplify_type { - typedef /*const*/ SDNode* SimpleType; + using SimpleType = /*const*/ SDNode *; + static SimpleType getSimplifiedValue(const SDValue &Val) { return Val.getNode(); } @@ -330,7 +337,8 @@ private: /// simplify_type specializations - Allow casting operators to work directly on /// SDValues as if they were SDNode*'s. template<> struct simplify_type { - typedef SDNode* SimpleType; + using SimpleType = SDNode *; + static SimpleType getSimplifiedValue(SDUse &Val) { return Val.getNode(); } @@ -341,6 +349,11 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: + // This bit is used to determine if the flags are in a defined state. + // Flag bits can only be masked out during intersection if the masking flags + // are defined. + bool AnyDefined : 1; + bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; @@ -355,22 +368,57 @@ private: public: /// Default constructor turns off all optimization flags. SDNodeFlags() - : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), - UnsafeAlgebra(false), NoNaNs(false), NoInfs(false), + : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), + Exact(false), UnsafeAlgebra(false), NoNaNs(false), NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false), AllowContract(false) {} + /// Sets the state of the flags to the defined state. + void setDefined() { AnyDefined = true; } + /// Returns true if the flags are in a defined state. + bool isDefined() const { return AnyDefined; } + // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } - void setNoSignedWrap(bool b) { NoSignedWrap = b; } - void setExact(bool b) { Exact = b; } - void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; } - void setNoNaNs(bool b) { NoNaNs = b; } - void setNoInfs(bool b) { NoInfs = b; } - void setNoSignedZeros(bool b) { NoSignedZeros = b; } - void setAllowReciprocal(bool b) { AllowReciprocal = b; } - void setVectorReduction(bool b) { VectorReduction = b; } - void setAllowContract(bool b) { AllowContract = b; } + void setNoUnsignedWrap(bool b) { + setDefined(); + NoUnsignedWrap = b; + } + void setNoSignedWrap(bool b) { + setDefined(); + NoSignedWrap = b; + } + void setExact(bool b) { + setDefined(); + Exact = b; + } + void setUnsafeAlgebra(bool b) { + setDefined(); + UnsafeAlgebra = b; + } + void setNoNaNs(bool b) { + setDefined(); + NoNaNs = b; + } + void setNoInfs(bool b) { + setDefined(); + NoInfs = b; + } + void setNoSignedZeros(bool b) { + setDefined(); + NoSignedZeros = b; + } + void setAllowReciprocal(bool b) { + setDefined(); + AllowReciprocal = b; + } + void setVectorReduction(bool b) { + setDefined(); + VectorReduction = b; + } + void setAllowContract(bool b) { + setDefined(); + AllowContract = b; + } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -385,17 +433,20 @@ public: bool hasAllowContract() const { return AllowContract; } /// Clear any flags in this flag set that aren't also set in Flags. - void intersectWith(const SDNodeFlags *Flags) { - NoUnsignedWrap &= Flags->NoUnsignedWrap; - NoSignedWrap &= Flags->NoSignedWrap; - Exact &= Flags->Exact; - UnsafeAlgebra &= Flags->UnsafeAlgebra; - NoNaNs &= Flags->NoNaNs; - NoInfs &= Flags->NoInfs; - NoSignedZeros &= Flags->NoSignedZeros; - AllowReciprocal &= Flags->AllowReciprocal; - VectorReduction &= Flags->VectorReduction; - AllowContract &= Flags->AllowContract; + /// If the given Flags are undefined then don't do anything. + void intersectWith(const SDNodeFlags Flags) { + if (!Flags.isDefined()) + return; + NoUnsignedWrap &= Flags.NoUnsignedWrap; + NoSignedWrap &= Flags.NoSignedWrap; + Exact &= Flags.Exact; + UnsafeAlgebra &= Flags.UnsafeAlgebra; + NoNaNs &= Flags.NoNaNs; + NoInfs &= Flags.NoInfs; + NoSignedZeros &= Flags.NoSignedZeros; + AllowReciprocal &= Flags.AllowReciprocal; + VectorReduction &= Flags.VectorReduction; + AllowContract &= Flags.AllowContract; } }; @@ -527,6 +578,8 @@ private: /// Return a pointer to the specified value type. static const EVT *getValueTypeList(EVT VT); + SDNodeFlags Flags; + public: /// Unique and persistent id per SDNode in the DAG. /// Used for debug printing. @@ -567,6 +620,32 @@ public: SDNodeBits.IsMemIntrinsic; } + /// Test if this node is a strict floating point pseudo-op. + bool isStrictFPOpcode() { + switch (NodeType) { + default: + return false; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + return true; + } + } + /// Test if this node has a post-isel opcode, directly /// corresponding to a MachineInstr opcode. bool isMachineOpcode() const { return NodeType < 0; } @@ -624,10 +703,10 @@ public: explicit use_iterator(SDUse *op) : Op(op) {} public: - typedef std::iterator::reference reference; - typedef std::iterator::pointer pointer; + using reference = std::iterator::reference; + using pointer = std::iterator::pointer; use_iterator() = default; use_iterator(const use_iterator &I) : Op(I.Op) {} @@ -753,7 +832,7 @@ public: return OperandList[Num]; } - typedef SDUse* op_iterator; + using op_iterator = SDUse *; op_iterator op_begin() const { return OperandList; } op_iterator op_end() const { return OperandList+NumOperands; } @@ -799,12 +878,12 @@ public: return nullptr; } - /// This could be defined as a virtual function and implemented more simply - /// and directly, but it is not to avoid creating a vtable for this class. - const SDNodeFlags *getFlags() const; + const SDNodeFlags getFlags() const { return Flags; } + void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } /// Clear any flags in this node that aren't also set in Flags. - void intersectFlagsWith(const SDNodeFlags *Flags); + /// If Flags is not in a defined state then this has no effect. + void intersectFlagsWith(const SDNodeFlags Flags); /// Return the number of values defined/returned by this operator. unsigned getNumValues() const { return NumValues; } @@ -825,7 +904,8 @@ public: return getValueType(ResNo).getSizeInBits(); } - typedef const EVT* value_iterator; + using value_iterator = const EVT *; + value_iterator value_begin() const { return ValueList; } value_iterator value_end() const { return ValueList+NumValues; } @@ -1032,43 +1112,6 @@ inline void SDUse::setNode(SDNode *N) { if (N) N->addUse(*this); } -/// Returns true if the opcode is a binary operation with flags. -static bool isBinOpWithFlags(unsigned Opcode) { - switch (Opcode) { - case ISD::SDIV: - case ISD::UDIV: - case ISD::SRA: - case ISD::SRL: - case ISD::MUL: - case ISD::ADD: - case ISD::SUB: - case ISD::SHL: - case ISD::FADD: - case ISD::FDIV: - case ISD::FMUL: - case ISD::FREM: - case ISD::FSUB: - return true; - default: - return false; - } -} - -/// This class is an extension of BinarySDNode -/// used from those opcodes that have associated extra flags. -class BinaryWithFlagsSDNode : public SDNode { -public: - SDNodeFlags Flags; - - BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, - SDVTList VTs, const SDNodeFlags &NodeFlags) - : SDNode(Opc, Order, dl, VTs), Flags(NodeFlags) {} - - static bool classof(const SDNode *N) { - return isBinOpWithFlags(N->getOpcode()); - } -}; - /// This class is used to form a handle around another node that /// is persistent and is updated across invocations of replaceAllUsesWith on its /// operand. This node should be directly created by end-users and not added to @@ -1788,8 +1831,7 @@ class BlockAddressSDNode : public SDNode { BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, int64_t o, unsigned char Flags) : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), - BA(ba), Offset(o), TargetFlags(Flags) { - } + BA(ba), Offset(o), TargetFlags(Flags) {} public: const BlockAddress *getBlockAddress() const { return BA; } @@ -2120,7 +2162,7 @@ public: /// instruction selection proper phase. class MachineSDNode : public SDNode { public: - typedef MachineMemOperand **mmo_iterator; + using mmo_iterator = MachineMemOperand **; private: friend class SelectionDAG; @@ -2192,8 +2234,8 @@ public: }; template <> struct GraphTraits { - typedef SDNode *NodeRef; - typedef SDNodeIterator ChildIteratorType; + using NodeRef = SDNode *; + using ChildIteratorType = SDNodeIterator; static NodeRef getEntryNode(SDNode *N) { return N; } @@ -2210,12 +2252,12 @@ template <> struct GraphTraits { /// /// This needs to be a union because the largest node differs on 32 bit systems /// with 4 and 8 byte pointer alignment, respectively. -typedef AlignedCharArrayUnion - LargestSDNode; +using LargestSDNode = AlignedCharArrayUnion; /// The SDNode class with the greatest alignment requirement. -typedef GlobalAddressSDNode MostAlignedSDNode; +using MostAlignedSDNode = GlobalAddressSDNode; namespace ISD { diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 14fc3a499a082dc9bb707c2d769692d01c7ef753..a7b16e7a9ed229ef8f719e5e43b4360b3b16d5b1 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -24,13 +24,22 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/ilist.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include +#include +#include +#include namespace llvm { +class raw_ostream; + /// This class represents an entry in the slot index list held in the /// SlotIndexes pass. It should not be used directly. See the /// SlotIndex & SlotIndexes classes for the public interface to this @@ -40,7 +49,6 @@ namespace llvm { unsigned index; public: - IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {} MachineInstr* getInstr() const { return mi; } @@ -301,7 +309,7 @@ namespace llvm { return os; } - typedef std::pair IdxMBBPair; + using IdxMBBPair = std::pair; inline bool operator<(SlotIndex V, const IdxMBBPair &IM) { return V < IM.first; @@ -325,7 +333,7 @@ namespace llvm { // IndexListEntry allocator. BumpPtrAllocator ileAllocator; - typedef ilist IndexList; + using IndexList = ilist; IndexList indexList; #ifdef EXPENSIVE_CHECKS @@ -334,7 +342,7 @@ namespace llvm { MachineFunction *mf; - typedef DenseMap Mi2IndexMap; + using Mi2IndexMap = DenseMap; Mi2IndexMap mi2iMap; /// MBBRanges - Map MBB number to (start, stop) indexes. @@ -436,7 +444,7 @@ namespace llvm { const MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "MI must be inserted inna basic block"); MachineBasicBlock::const_iterator I = MI, B = MBB->begin(); - for (;;) { + while (true) { if (I == B) return getMBBStartIdx(MBB); --I; @@ -453,7 +461,7 @@ namespace llvm { const MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "MI must be inserted inna basic block"); MachineBasicBlock::const_iterator I = MI, E = MBB->end(); - for (;;) { + while (true) { ++I; if (I == E) return getMBBEndIdx(MBB); @@ -497,21 +505,25 @@ namespace llvm { /// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block /// begin and basic block) - typedef SmallVectorImpl::const_iterator MBBIndexIterator; + using MBBIndexIterator = SmallVectorImpl::const_iterator; + /// Move iterator to the next IdxMBBPair where the SlotIndex is greater or /// equal to \p To. MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const { return std::lower_bound(I, idx2MBBMap.end(), To); } + /// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex /// that is greater or equal to \p Idx. MBBIndexIterator findMBBIndex(SlotIndex Idx) const { return advanceMBBIndex(idx2MBBMap.begin(), Idx); } + /// Returns an iterator for the begin of the idx2MBBMap. MBBIndexIterator MBBIndexBegin() const { return idx2MBBMap.begin(); } + /// Return an iterator for the end of the idx2MBBMap. MBBIndexIterator MBBIndexEnd() const { return idx2MBBMap.end(); diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h index a18936feea7b0c56ac7ac1a8c808520dfee815bc..8263946ed928058e2880cc5a9d24f18d28d46f86 100644 --- a/include/llvm/CodeGen/StackMaps.h +++ b/include/llvm/CodeGen/StackMaps.h @@ -145,21 +145,27 @@ public: /// /// Statepoint operands take the form: /// , , , , -/// [call arguments], , , +/// [call arguments...], +/// , , /// , , -/// , , [other args], -/// [gc values] +/// , , [deopt args...], +/// +/// Note that the last two sets of arguments are not currently length +/// prefixed. class StatepointOpers { -private: + // TODO:: we should change the STATEPOINT representation so that CC and + // Flags should be part of meta operands, with args and deopt operands, and + // gc operands all prefixed by their length and a type code. This would be + // much more consistent. +public: // These values are aboolute offsets into the operands of the statepoint // instruction. enum { IDPos, NBytesPos, NCallArgsPos, CallTargetPos, MetaEnd }; // These values are relative offests from the start of the statepoint meta // arguments (i.e. the end of the call arguments). - enum { CCOffset = 1, FlagsOffset = 3, NumVMSArgsOffset = 5 }; + enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 }; -public: explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {} /// Get starting index of non call related arguments @@ -220,7 +226,7 @@ public: // OpTypes are used to encode information about the following logical // operand (which may consist of several MachineOperands) for the // OpParser. - typedef enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp } OpType; + using OpType = enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp }; StackMaps(AsmPrinter &AP); @@ -248,9 +254,10 @@ public: private: static const char *WSMP; - typedef SmallVector LocationVec; - typedef SmallVector LiveOutVec; - typedef MapVector ConstantPool; + + using LocationVec = SmallVector; + using LiveOutVec = SmallVector; + using ConstantPool = MapVector; struct FunctionInfo { uint64_t StackSize = 0; @@ -273,8 +280,8 @@ private: LiveOuts(std::move(LiveOuts)) {} }; - typedef MapVector FnInfoMap; - typedef std::vector CallsiteInfoList; + using FnInfoMap = MapVector; + using CallsiteInfoList = std::vector; AsmPrinter &AP; CallsiteInfoList CSInfos; diff --git a/include/llvm/CodeGen/StackProtector.h b/include/llvm/CodeGen/StackProtector.h index 0655f19a323e49a83d5fd387074bf03240cc71c3..72de212d0df9a97399ef49208a91051b19683a80 100644 --- a/include/llvm/CodeGen/StackProtector.h +++ b/include/llvm/CodeGen/StackProtector.h @@ -19,17 +19,20 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" namespace llvm { +class BasicBlock; +class DominatorTree; class Function; +class Instruction; class Module; -class PHINode; +class TargetLoweringBase; +class TargetMachine; +class Type; class StackProtector : public FunctionPass { public: @@ -47,7 +50,7 @@ public: }; /// A mapping of AllocaInsts to their required SSP layout. - typedef ValueMap SSPLayoutMap; + using SSPLayoutMap = ValueMap; private: const TargetMachine *TM = nullptr; @@ -55,7 +58,7 @@ private: /// TLI - Keep a pointer of a TargetLowering to consult for determining /// target type sizes. const TargetLoweringBase *TLI = nullptr; - const Triple Trip; + Triple Trip; Function *F; Module *M; @@ -114,19 +117,11 @@ private: public: static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID) { + StackProtector() : FunctionPass(ID), SSPBufferSize(8) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } - StackProtector(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), Trip(TM->getTargetTriple()), - SSPBufferSize(8) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addPreserved(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override; SSPLayoutKind getSSPLayout(const AllocaInst *AI) const; diff --git a/include/llvm/CodeGen/TailDuplicator.h b/include/llvm/CodeGen/TailDuplicator.h index b667245fd3c0ae85f2557236701fe571ab5a9930..483c0ab1eec9e1e77fad7bc6560778e10cd3f536 100644 --- a/include/llvm/CodeGen/TailDuplicator.h +++ b/include/llvm/CodeGen/TailDuplicator.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/TailDuplicator.h ---------------------------*- C++ -*-===// +//===- llvm/CodeGen/TailDuplicator.h ----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,19 +15,27 @@ #ifndef LLVM_CODEGEN_TAILDUPLICATOR_H #define LLVM_CODEGEN_TAILDUPLICATOR_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include namespace llvm { -extern cl::opt TailDupIndirectBranchSize; +class MachineBasicBlock; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineInstr; +class MachineModuleInfo; +class MachineRegisterInfo; +class TargetRegisterInfo; /// Utility class to perform tail duplication. class TailDuplicator { @@ -46,7 +54,7 @@ class TailDuplicator { // For each virtual register in SSAUpdateVals keep a list of source virtual // registers. - typedef std::vector> AvailableValsTy; + using AvailableValsTy = std::vector>; DenseMap SSAUpdateVals; @@ -62,11 +70,14 @@ public: void initMF(MachineFunction &MF, const MachineBranchProbabilityInfo *MBPI, bool LayoutMode, unsigned TailDupSize = 0); + bool tailDuplicateBlocks(); static bool isSimpleBB(MachineBasicBlock *TailBB); bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB); + /// Returns true if TailBB can successfully be duplicated into PredBB bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB); + /// Tail duplicate a single basic block into its predecessors, and then clean /// up. /// If \p DuplicatePreds is not null, it will be updated to contain the list @@ -77,10 +88,10 @@ public: bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds = nullptr, - llvm::function_ref *RemovalCallback = nullptr); + function_ref *RemovalCallback = nullptr); private: - typedef TargetInstrInfo::RegSubRegPair RegSubRegPair; + using RegSubRegPair = TargetInstrInfo::RegSubRegPair; void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, MachineBasicBlock *BB); @@ -112,9 +123,9 @@ private: void removeDeadBlock( MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback = nullptr); + function_ref *RemovalCallback = nullptr); }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_TAILDUPLICATOR_H diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index adf2b3ea1c9b3f79883330bf30f1725cda33da8d..e4d3cc9cecfcc2c6995fb6e5a6526124bf044e2c 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -41,6 +41,10 @@ public: TargetLoweringObjectFileELF() = default; ~TargetLoweringObjectFileELF() override = default; + /// Emit Obj-C garbage collection and linker options. + void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const override; + void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, const MCSymbol *Sym) const override; @@ -94,9 +98,8 @@ public: void Initialize(MCContext &Ctx, const TargetMachine &TM) override; /// Emit the module flags that specify the garbage collection information. - void emitModuleFlags(MCStreamer &Streamer, - ArrayRef ModuleFlags, - const TargetMachine &TM) const override; + void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const override; MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const override; @@ -149,11 +152,9 @@ public: MCSection *getSectionForJumpTable(const Function &F, const TargetMachine &TM) const override; - /// Emit Obj-C garbage collection and linker options. Only linker option - /// emission is implemented for COFF. - void emitModuleFlags(MCStreamer &Streamer, - ArrayRef ModuleFlags, - const TargetMachine &TM) const override; + /// Emit Obj-C garbage collection and linker options. + void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const override; MCSection *getStaticCtorSection(unsigned Priority, const MCSymbol *KeySym) const override; diff --git a/include/llvm/CodeGen/TargetPassConfig.h b/include/llvm/CodeGen/TargetPassConfig.h index f0c826dc1d457cf6637809fc2863b7e52e9f0ad1..c109b7489ccad0fba0774e75cdc38a8fc9892b08 100644 --- a/include/llvm/CodeGen/TargetPassConfig.h +++ b/include/llvm/CodeGen/TargetPassConfig.h @@ -22,7 +22,7 @@ namespace llvm { class PassConfigImpl; class ScheduleDAGInstrs; -class TargetMachine; +class LLVMTargetMachine; struct MachineSchedContext; // The old pass manager infrastructure is hidden in a legacy namespace now. @@ -103,7 +103,7 @@ private: bool AddingMachinePasses; protected: - TargetMachine *TM; + LLVMTargetMachine *TM; PassConfigImpl *Impl; // Internal data structures bool Initialized; // Flagged after all passes are configured. @@ -119,8 +119,12 @@ protected: /// callers. bool RequireCodeGenSCCOrder; + /// Add the actual instruction selection passes. This does not include + /// preparation passes on IR. + bool addCoreISelPasses(); + public: - TargetPassConfig(TargetMachine *tm, PassManagerBase &pm); + TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm); // Dummy constructor. TargetPassConfig(); @@ -206,6 +210,13 @@ public: /// has not be overriden on the command line with '-regalloc=...' bool usingDefaultRegAlloc() const; + /// High level function that adds all passes necessary to go from llvm IR + /// representation to the MI representation. + /// Adds IR based lowering and target specific optimization passes and finally + /// the core instruction selection passes. + /// \returns true if an error occured, false otherwise. + bool addISelPasses(); + /// Add common target configurable passes that perform LLVM IR to IR /// transforms following machine independent optimization. virtual void addIRPasses(); diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h index 0c5a84e0e3b8f25dfb26cc15416cc818ff77c913..f236679764688c7797b3ec6e39afb0b732fdfc8e 100644 --- a/include/llvm/CodeGen/TargetSchedule.h +++ b/include/llvm/CodeGen/TargetSchedule.h @@ -16,6 +16,7 @@ #ifndef LLVM_CODEGEN_TARGETSCHEDULE_H #define LLVM_CODEGEN_TARGETSCHEDULE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -55,6 +56,9 @@ public: /// Return the MCSchedClassDesc for this instruction. const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const; + /// \brief TargetSubtargetInfo getter. + const TargetSubtargetInfo *getSubtargetInfo() const { return STI; } + /// \brief TargetInstrInfo getter. const TargetInstrInfo *getInstrInfo() const { return TII; } @@ -120,7 +124,7 @@ public: } #endif - typedef const MCWriteProcResEntry *ProcResIter; + using ProcResIter = const MCWriteProcResEntry *; // \brief Get an iterator into the processor resources consumed by this // scheduling class. @@ -189,6 +193,10 @@ public: /// This is typically one cycle. unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *DepMI) const; + + /// \brief Compute the reciprocal throughput of the given instruction. + Optional computeInstrRThroughput(const MachineInstr *MI) const; + Optional computeInstrRThroughput(unsigned Opcode) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index 0a3063663cef81fab397e4691880a229d6fb57d5..b404b4ca701f931f17db19d79a8e01e52c6f043c 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -44,7 +44,7 @@ namespace llvm { bool operator!=(EVT VT) const { if (V.SimpleTy != VT.V.SimpleTy) return true; - if (V.SimpleTy < 0) + if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) return LLVMTy != VT.LLVMTy; return false; } @@ -60,31 +60,48 @@ namespace llvm { /// bits. static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) { MVT M = MVT::getIntegerVT(BitWidth); - if (M.SimpleTy >= 0) + if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) return M; return getExtendedIntegerVT(Context, BitWidth); } /// Returns the EVT that represents a vector NumElements in length, where /// each element is of type VT. - static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) { - MVT M = MVT::getVectorVT(VT.V, NumElements); - if (M.SimpleTy >= 0) + static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, + bool IsScalable = false) { + MVT M = MVT::getVectorVT(VT.V, NumElements, IsScalable); + if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) return M; + + assert(!IsScalable && "We don't support extended scalable types yet"); return getExtendedVectorVT(Context, VT, NumElements); } + /// Returns the EVT that represents a vector EC.Min elements in length, + /// where each element is of type VT. + static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) { + MVT M = MVT::getVectorVT(VT.V, EC); + if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) + return M; + assert (!EC.Scalable && "We don't support extended scalable types yet"); + return getExtendedVectorVT(Context, VT, EC.Min); + } + /// Return a vector with the same number of elements as this vector, but /// with the element type converted to an integer type with the same /// bitwidth. EVT changeVectorElementTypeToInteger() const { - if (!isSimple()) + if (!isSimple()) { + assert (!isScalableVector() && + "We don't support extended scalable types yet"); return changeExtendedVectorElementTypeToInteger(); + } MVT EltTy = getSimpleVT().getVectorElementType(); unsigned BitWidth = EltTy.getSizeInBits(); MVT IntTy = MVT::getIntegerVT(BitWidth); - MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements()); - assert(VecTy.SimpleTy >= 0 && + MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements(), + isScalableVector()); + assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && "Simple vector VT not representable by simple integer vector VT!"); return VecTy; } @@ -104,7 +121,7 @@ namespace llvm { /// Test if the given EVT is simple (as opposed to being extended). bool isSimple() const { - return V.SimpleTy >= 0; + return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE; } /// Test if the given EVT is extended (as opposed to being simple). @@ -132,6 +149,17 @@ namespace llvm { return isSimple() ? V.isVector() : isExtendedVector(); } + /// Return true if this is a vector type where the runtime + /// length is machine dependent + bool isScalableVector() const { + // FIXME: We don't support extended scalable types yet, because the + // matching IR type doesn't exist. Once it has been added, this can + // be changed to call isExtendedScalableVector. + if (!isSimple()) + return false; + return V.isScalableVector(); + } + /// Return true if this is a 16-bit vector type. bool is16BitVector() const { return isSimple() ? V.is16BitVector() : isExtended16BitVector(); @@ -247,6 +275,17 @@ namespace llvm { return getExtendedVectorNumElements(); } + // Given a (possibly scalable) vector type, return the ElementCount + MVT::ElementCount getVectorElementCount() const { + assert((isVector()) && "Invalid vector type!"); + if (isSimple()) + return V.getVectorElementCount(); + + assert(!isScalableVector() && + "We don't support extended scalable types yet"); + return {getExtendedVectorNumElements(), false}; + } + /// Return the size of the specified value type in bits. unsigned getSizeInBits() const { if (isSimple()) @@ -301,7 +340,17 @@ namespace llvm { EVT widenIntegerVectorElementType(LLVMContext &Context) const { EVT EltVT = getVectorElementType(); EltVT = EVT::getIntegerVT(Context, 2 * EltVT.getSizeInBits()); - return EVT::getVectorVT(Context, EltVT, getVectorNumElements()); + return EVT::getVectorVT(Context, EltVT, getVectorElementCount()); + } + + // Return a VT for a vector type with the same element type but + // half the number of elements. The type returned may be an + // extended type. + EVT getHalfNumVectorElementsVT(LLVMContext &Context) const { + EVT EltVT = getVectorElementType(); + auto EltCnt = getVectorElementCount(); + assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); + return EVT::getVectorVT(Context, EltVT, EltCnt / 2); } /// Returns true if the given vector is a power of 2. @@ -316,7 +365,8 @@ namespace llvm { if (!isPow2VectorType()) { unsigned NElts = getVectorNumElements(); unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts); - return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts); + return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts, + isScalableVector()); } else { return *this; diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index f7b1661d7451b5775ce787a4eac95d234ebc463a..b1e62daa5aaeb0ae4133f66121bcc9fa849b35bd 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -19,101 +19,149 @@ class ValueType { int Value = value; } -def OtherVT: ValueType<0 , 0>; // "Other" value -def i1 : ValueType<1 , 1>; // One bit boolean value -def i8 : ValueType<8 , 2>; // 8-bit integer value -def i16 : ValueType<16 , 3>; // 16-bit integer value -def i32 : ValueType<32 , 4>; // 32-bit integer value -def i64 : ValueType<64 , 5>; // 64-bit integer value -def i128 : ValueType<128, 6>; // 128-bit integer value -def f16 : ValueType<16 , 7>; // 16-bit floating point value -def f32 : ValueType<32 , 8>; // 32-bit floating point value -def f64 : ValueType<64 , 9>; // 64-bit floating point value -def f80 : ValueType<80 , 10>; // 80-bit floating point value -def f128 : ValueType<128, 11>; // 128-bit floating point value -def ppcf128: ValueType<128, 12>; // PPC 128-bit floating point value - -def v2i1 : ValueType<2 , 13>; // 2 x i1 vector value -def v4i1 : ValueType<4 , 14>; // 4 x i1 vector value -def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value -def v16i1 : ValueType<16, 16>; // 16 x i1 vector value -def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value -def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value -def v512i1 : ValueType<512, 19>; // 512 x i1 vector value -def v1024i1: ValueType<1024,20>; //1024 x i1 vector value - -def v1i8 : ValueType<16, 21>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 22>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 23>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 24>; // 8 x i8 vector value -def v16i8 : ValueType<128, 25>; // 16 x i8 vector value -def v32i8 : ValueType<256, 26>; // 32 x i8 vector value -def v64i8 : ValueType<512, 27>; // 64 x i8 vector value -def v128i8 : ValueType<1024,28>; //128 x i8 vector value -def v256i8 : ValueType<2048,29>; //256 x i8 vector value - -def v1i16 : ValueType<16 , 30>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 31>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 32>; // 4 x i16 vector value -def v8i16 : ValueType<128, 33>; // 8 x i16 vector value -def v16i16 : ValueType<256, 34>; // 16 x i16 vector value -def v32i16 : ValueType<512, 35>; // 32 x i16 vector value -def v64i16 : ValueType<1024,36>; // 64 x i16 vector value -def v128i16: ValueType<2048,37>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 38>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 39>; // 2 x i32 vector value -def v4i32 : ValueType<128, 40>; // 4 x i32 vector value -def v8i32 : ValueType<256, 41>; // 8 x i32 vector value -def v16i32 : ValueType<512, 42>; // 16 x i32 vector value -def v32i32 : ValueType<1024,43>; // 32 x i32 vector value -def v64i32 : ValueType<2048,44>; // 32 x i32 vector value - -def v1i64 : ValueType<64 , 45>; // 1 x i64 vector value -def v2i64 : ValueType<128, 46>; // 2 x i64 vector value -def v4i64 : ValueType<256, 47>; // 4 x i64 vector value -def v8i64 : ValueType<512, 48>; // 8 x i64 vector value -def v16i64 : ValueType<1024,49>; // 16 x i64 vector value -def v32i64 : ValueType<2048,50>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 51>; // 1 x i128 vector value - -def v2f16 : ValueType<32 , 52>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 53>; // 4 x f16 vector value -def v8f16 : ValueType<128, 54>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 55>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 56>; // 2 x f32 vector value -def v4f32 : ValueType<128, 57>; // 4 x f32 vector value -def v8f32 : ValueType<256, 58>; // 8 x f32 vector value -def v16f32 : ValueType<512, 59>; // 16 x f32 vector value -def v1f64 : ValueType<64, 60>; // 1 x f64 vector value -def v2f64 : ValueType<128, 61>; // 2 x f64 vector value -def v4f64 : ValueType<256, 62>; // 4 x f64 vector value -def v8f64 : ValueType<512, 63>; // 8 x f64 vector value - - -def x86mmx : ValueType<64 , 64>; // X86 MMX value -def FlagVT : ValueType<0 , 65>; // Pre-RA sched glue -def isVoid : ValueType<0 , 66>; // Produces no value -def untyped: ValueType<8 , 67>; // Produces an untyped value -def token : ValueType<0 , 120>; // TokenTy -def MetadataVT: ValueType<0, 121>; // Metadata +def OtherVT: ValueType<0 , 1>; // "Other" value +def i1 : ValueType<1 , 2>; // One bit boolean value +def i8 : ValueType<8 , 3>; // 8-bit integer value +def i16 : ValueType<16 , 4>; // 16-bit integer value +def i32 : ValueType<32 , 5>; // 32-bit integer value +def i64 : ValueType<64 , 6>; // 64-bit integer value +def i128 : ValueType<128, 7>; // 128-bit integer value +def f16 : ValueType<16 , 8>; // 16-bit floating point value +def f32 : ValueType<32 , 9>; // 32-bit floating point value +def f64 : ValueType<64 , 10>; // 64-bit floating point value +def f80 : ValueType<80 , 11>; // 80-bit floating point value +def f128 : ValueType<128, 12>; // 128-bit floating point value +def ppcf128: ValueType<128, 13>; // PPC 128-bit floating point value + +def v1i1 : ValueType<1 , 14>; // 1 x i1 vector value +def v2i1 : ValueType<2 , 15>; // 2 x i1 vector value +def v4i1 : ValueType<4 , 16>; // 4 x i1 vector value +def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value +def v16i1 : ValueType<16, 18>; // 16 x i1 vector value +def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value +def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value +def v512i1 : ValueType<512, 21>; // 512 x i1 vector value +def v1024i1: ValueType<1024,22>; //1024 x i1 vector value + +def v1i8 : ValueType<8, 23>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 24>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 25>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 26>; // 8 x i8 vector value +def v16i8 : ValueType<128, 27>; // 16 x i8 vector value +def v32i8 : ValueType<256, 28>; // 32 x i8 vector value +def v64i8 : ValueType<512, 29>; // 64 x i8 vector value +def v128i8 : ValueType<1024,30>; //128 x i8 vector value +def v256i8 : ValueType<2048,31>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 32>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 33>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 34>; // 4 x i16 vector value +def v8i16 : ValueType<128, 35>; // 8 x i16 vector value +def v16i16 : ValueType<256, 36>; // 16 x i16 vector value +def v32i16 : ValueType<512, 37>; // 32 x i16 vector value +def v64i16 : ValueType<1024,38>; // 64 x i16 vector value +def v128i16: ValueType<2048,39>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 40>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 41>; // 2 x i32 vector value +def v4i32 : ValueType<128, 42>; // 4 x i32 vector value +def v8i32 : ValueType<256, 43>; // 8 x i32 vector value +def v16i32 : ValueType<512, 44>; // 16 x i32 vector value +def v32i32 : ValueType<1024,45>; // 32 x i32 vector value +def v64i32 : ValueType<2048,46>; // 32 x i32 vector value + +def v1i64 : ValueType<64 , 47>; // 1 x i64 vector value +def v2i64 : ValueType<128, 48>; // 2 x i64 vector value +def v4i64 : ValueType<256, 49>; // 4 x i64 vector value +def v8i64 : ValueType<512, 50>; // 8 x i64 vector value +def v16i64 : ValueType<1024,51>; // 16 x i64 vector value +def v32i64 : ValueType<2048,52>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 53>; // 1 x i128 vector value + +def nxv1i1 : ValueType<1, 54>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 55>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 56>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 57>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 58>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 59>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 60>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 61>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 62>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 63>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 64>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 65>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 66>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 67>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 68>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 69>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 70>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 71>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 72>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 73>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 74>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 75>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 76>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,77>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 78>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 79>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 80>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 81>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,82>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,83>; // n x 32 x i64 vector value + +def v2f16 : ValueType<32 , 84>; // 2 x f16 vector value +def v4f16 : ValueType<64 , 85>; // 4 x f16 vector value +def v8f16 : ValueType<128, 86>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 87>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 88>; // 2 x f32 vector value +def v4f32 : ValueType<128, 89>; // 4 x f32 vector value +def v8f32 : ValueType<256, 90>; // 8 x f32 vector value +def v16f32 : ValueType<512, 91>; // 16 x f32 vector value +def v1f64 : ValueType<64, 92>; // 1 x f64 vector value +def v2f64 : ValueType<128, 93>; // 2 x f64 vector value +def v4f64 : ValueType<256, 94>; // 4 x f64 vector value +def v8f64 : ValueType<512, 95>; // 8 x f64 vector value + +def nxv2f16 : ValueType<32 , 96>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 97>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 98>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 99>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 100>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 101>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 102>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 103>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 104>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 105>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 106>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 107>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 108>; // X86 MMX value +def FlagVT : ValueType<0 , 109>; // Pre-RA sched glue +def isVoid : ValueType<0 , 110>; // Produces no value +def untyped: ValueType<8 , 111>; // Produces an untyped value +def token : ValueType<0 , 248>; // TokenTy +def MetadataVT: ValueType<0, 249>; // Metadata // Pseudo valuetype mapped to the current pointer size to any address space. // Should only be used in TableGen. -def iPTRAny : ValueType<0, 122>; +def iPTRAny : ValueType<0, 250>; // Pseudo valuetype to represent "vector of any size" -def vAny : ValueType<0 , 123>; +def vAny : ValueType<0 , 251>; // Pseudo valuetype to represent "float of any format" -def fAny : ValueType<0 , 124>; +def fAny : ValueType<0 , 252>; // Pseudo valuetype to represent "integer of any bit width" -def iAny : ValueType<0 , 125>; +def iAny : ValueType<0 , 253>; // Pseudo valuetype mapped to the current pointer size. -def iPTR : ValueType<0 , 126>; +def iPTR : ValueType<0 , 254>; // Pseudo valuetype to represent "any type of any size". -def Any : ValueType<0 , 127>; +def Any : ValueType<0 , 255>; diff --git a/include/llvm/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h index d7e92094877d1fbbf83e4a0e1d5bcb27d916fe72..b9076353fd07d090511a25364302f55cba4bf119 100644 --- a/include/llvm/CodeGen/VirtRegMap.h +++ b/include/llvm/CodeGen/VirtRegMap.h @@ -102,14 +102,7 @@ namespace llvm { /// @brief creates a mapping for the specified virtual register to /// the specified physical register - void assignVirt2Phys(unsigned virtReg, unsigned physReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg) && - TargetRegisterInfo::isPhysicalRegister(physReg)); - assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && - "attempt to assign physical register to already mapped " - "virtual register"); - Virt2PhysMap[virtReg] = physReg; - } + void assignVirt2Phys(unsigned virtReg, MCPhysReg physReg); /// @brief clears the specified virtual register's, physical /// register mapping diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h index dd730495a5f61e4ff1490442c94be5c1a5969f54..8043024626a0c5ff168901e7eee30d9a7eb17217 100644 --- a/include/llvm/CodeGen/WinEHFuncInfo.h +++ b/include/llvm/CodeGen/WinEHFuncInfo.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/WinEHFuncInfo.h ----------------------------*- C++ -*-===// +//===- llvm/CodeGen/WinEHFuncInfo.h -----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,28 +17,26 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/TinyPtrVector.h" -#include "llvm/IR/Instructions.h" +#include +#include +#include namespace llvm { + class AllocaInst; class BasicBlock; -class CatchReturnInst; -class Constant; +class FuncletPadInst; class Function; class GlobalVariable; +class Instruction; class InvokeInst; -class IntrinsicInst; -class LandingPadInst; -class MCExpr; -class MCSymbol; class MachineBasicBlock; -class Value; +class MCSymbol; // The following structs respresent the .xdata tables for various // Windows-related EH personalities. -typedef PointerUnion MBBOrBasicBlock; +using MBBOrBasicBlock = PointerUnion; struct CxxUnwindMapEntry { int ToState; @@ -99,18 +97,18 @@ struct WinEHFuncInfo { SmallVector TryBlockMap; SmallVector SEHUnwindMap; SmallVector ClrEHUnwindMap; - int UnwindHelpFrameIdx = INT_MAX; - int PSPSymFrameIdx = INT_MAX; + int UnwindHelpFrameIdx = std::numeric_limits::max(); + int PSPSymFrameIdx = std::numeric_limits::max(); int getLastStateNumber() const { return CxxUnwindMap.size() - 1; } void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin, MCSymbol *InvokeEnd); - int EHRegNodeFrameIndex = INT_MAX; - int EHRegNodeEndOffset = INT_MAX; - int EHGuardFrameIndex = INT_MAX; - int SEHSetFrameOffset = INT_MAX; + int EHRegNodeFrameIndex = std::numeric_limits::max(); + int EHRegNodeEndOffset = std::numeric_limits::max(); + int EHGuardFrameIndex = std::numeric_limits::max(); + int SEHSetFrameOffset = std::numeric_limits::max(); WinEHFuncInfo(); }; @@ -125,5 +123,7 @@ void calculateSEHStateNumbers(const Function *ParentFn, WinEHFuncInfo &FuncInfo); void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo); -} + +} // end namespace llvm + #endif // LLVM_CODEGEN_WINEHFUNCINFO_H diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake index 4ce487b8f5f3c67e6e579961809e2789c9286d11..7ae401e5b8a8c7df2826869e3569d6a4cb517642 100644 --- a/include/llvm/Config/abi-breaking.h.cmake +++ b/include/llvm/Config/abi-breaking.h.cmake @@ -15,6 +15,9 @@ /* Define to enable checks that alter the LLVM C++ ABI */ #cmakedefine01 LLVM_ENABLE_ABI_BREAKING_CHECKS +/* Define to enable reverse iteration of unordered llvm containers */ +#cmakedefine01 LLVM_ENABLE_REVERSE_ITERATION + /* Allow selectively disabling link-time mismatch checking so that header-only ADT content from LLVM can be used without linking libSupport. */ #if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index a3c919d39804f8d6b537692a81092cc1ac947a07..1289551f0739a607c235ab9e93ae1d04eae8a083 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -59,9 +59,6 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_ERRNO_H ${HAVE_ERRNO_H} -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_EXECINFO_H ${HAVE_EXECINFO_H} - /* Define to 1 if you have the header file. */ #cmakedefine HAVE_FCNTL_H ${HAVE_FCNTL_H} @@ -353,33 +350,15 @@ /* Host triple LLVM will be executed on */ #cmakedefine LLVM_HOST_TRIPLE "${LLVM_HOST_TRIPLE}" -/* LLVM architecture name for the native architecture, if available */ -#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH} - -/* LLVM name for the native AsmParser init function, if available */ -#cmakedefine LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser - -/* LLVM name for the native AsmPrinter init function, if available */ -#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter - -/* LLVM name for the native Disassembler init function, if available */ -#cmakedefine LLVM_NATIVE_DISASSEMBLER LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler - -/* LLVM name for the native Target init function, if available */ -#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target - -/* LLVM name for the native TargetInfo init function, if available */ -#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo - -/* LLVM name for the native target MC init function, if available */ -#cmakedefine LLVM_NATIVE_TARGETMC LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC - /* Define if this is Unixish platform */ #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX} /* Define if this is Win32ish platform */ #cmakedefine LLVM_ON_WIN32 ${LLVM_ON_WIN32} +/* Define if overriding target triple is enabled */ +#cmakedefine LLVM_TARGET_TRIPLE_ENV "${LLVM_TARGET_TRIPLE_ENV}" + /* Define if we have the Intel JIT API runtime support library */ #cmakedefine01 LLVM_USE_INTEL_JITEVENTS @@ -389,6 +368,9 @@ /* LLVM version information */ #cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}" +/* Whether tools show host and target info when invoked with --version */ +#cmakedefine01 LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO + /* Major version of the LLVM API */ #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} diff --git a/include/llvm/DebugInfo/CodeView/CVRecord.h b/include/llvm/DebugInfo/CodeView/CVRecord.h index 487f3b6446fa5697bb4e6e96d759cf8187b37a03..4c6bbedc6bbddb18e7628fd553a8edb7df01641b 100644 --- a/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -14,6 +14,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -32,6 +33,10 @@ public: uint32_t length() const { return RecordData.size(); } Kind kind() const { return Type; } ArrayRef data() const { return RecordData; } + StringRef str_data() const { + return StringRef(reinterpret_cast(RecordData.data()), + RecordData.size()); + } ArrayRef content() const { return RecordData.drop_front(sizeof(RecordPrefix)); @@ -46,12 +51,19 @@ public: Optional Hash; }; +template struct RemappedRecord { + explicit RemappedRecord(const CVRecord &R) : OriginalRecord(R) {} + + CVRecord OriginalRecord; + SmallVector, 8> Mappings; +}; + } // end namespace codeview template struct VarStreamArrayExtractor> { Error operator()(BinaryStreamRef Stream, uint32_t &Len, - codeview::CVRecord &Item) const { + codeview::CVRecord &Item) { using namespace codeview; const RecordPrefix *Prefix = nullptr; BinaryStreamReader Reader(Stream); diff --git a/include/llvm/DebugInfo/CodeView/CVTypeDumper.h b/include/llvm/DebugInfo/CodeView/CVTypeDumper.h deleted file mode 100644 index 02f14ea2107b2c8486845ef7cb2782375cbc1d75..0000000000000000000000000000000000000000 --- a/include/llvm/DebugInfo/CodeView/CVTypeDumper.h +++ /dev/null @@ -1,61 +0,0 @@ -//===-- CVTypeDumper.h - CodeView type info dumper --------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_CODEVIEW_CVTYPEDUMPER_H -#define LLVM_DEBUGINFO_CODEVIEW_CVTYPEDUMPER_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" -#include "llvm/Support/ScopedPrinter.h" - -namespace llvm { - -namespace codeview { - -class TypeServerHandler; - -/// Dumper for CodeView type streams found in COFF object files and PDB files. -class CVTypeDumper { -public: - explicit CVTypeDumper(TypeDatabase &TypeDB, - TypeServerHandler *Handler = nullptr) - : TypeDB(TypeDB), Handler(Handler) {} - - /// Dumps one type record. Returns false if there was a type parsing error, - /// and true otherwise. This should be called in order, since the dumper - /// maintains state about previous records which are necessary for cross - /// type references. - Error dump(const CVType &Record, TypeVisitorCallbacks &Dumper); - - /// Dumps the type records in Types. Returns false if there was a type stream - /// parse error, and true otherwise. - Error dump(const CVTypeArray &Types, TypeVisitorCallbacks &Dumper); - - /// Dumps the type records in Data. Returns false if there was a type stream - /// parse error, and true otherwise. Use this method instead of the - /// CVTypeArray overload when type records are laid out contiguously in - /// memory. - Error dump(ArrayRef Data, TypeVisitorCallbacks &Dumper); - - static void printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, - TypeIndex TI, TypeDatabase &DB); - -private: - TypeDatabase &TypeDB; - TypeServerHandler *Handler; -}; - -} // end namespace codeview -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_CODEVIEW_TYPEDUMPER_H diff --git a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h index e9012db7602d28018474a36126187dfc6e83dae2..70ccc867cd38517281c591a8358416f42d771875 100644 --- a/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h +++ b/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h @@ -10,39 +10,49 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H #define LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H -#include "llvm/ADT/TinyPtrVector.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeServerHandler.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" namespace llvm { namespace codeview { - -class CVTypeVisitor { -public: - explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks); - - void addTypeServerHandler(TypeServerHandler &Handler); - - Error visitTypeRecord(CVType &Record); - Error visitMemberRecord(CVMemberRecord &Record); - - /// Visits the type records in Data. Sets the error flag on parse failures. - Error visitTypeStream(const CVTypeArray &Types); - Error visitTypeStream(CVTypeRange Types); - - Error visitFieldListMemberStream(ArrayRef FieldList); - Error visitFieldListMemberStream(BinaryStreamReader Reader); - -private: - /// The interface to the class that gets notified of each visitation. - TypeVisitorCallbacks &Callbacks; - - TinyPtrVector Handlers; +class TypeCollection; +class TypeServerHandler; +class TypeVisitorCallbacks; + +enum VisitorDataSource { + VDS_BytesPresent, // The record bytes are passed into the the visitation + // function. The algorithm should first deserialize them + // before passing them on through the pipeline. + VDS_BytesExternal // The record bytes are not present, and it is the + // responsibility of the visitor callback interface to + // supply the bytes. }; +Error visitTypeRecord(CVType &Record, TypeIndex Index, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent, + TypeServerHandler *TS = nullptr); +Error visitTypeRecord(CVType &Record, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent, + TypeServerHandler *TS = nullptr); + +Error visitMemberRecord(CVMemberRecord Record, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent); +Error visitMemberRecord(TypeLeafKind Kind, ArrayRef Record, + TypeVisitorCallbacks &Callbacks); + +Error visitMemberRecordStream(ArrayRef FieldList, + TypeVisitorCallbacks &Callbacks); + +Error visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent, + TypeServerHandler *TS = nullptr); +Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks, + TypeServerHandler *TS = nullptr); +Error visitTypeStream(TypeCollection &Types, TypeVisitorCallbacks &Callbacks, + TypeServerHandler *TS = nullptr); + } // end namespace codeview } // end namespace llvm diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h index 2791c9dc374651b8c1289cecdb1cc323f36b1dcd..6820e26b754c0b9e60a80ee7fb30ab28442cb6d0 100644 --- a/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/include/llvm/DebugInfo/CodeView/CodeView.h @@ -6,6 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// Defines constants and basic types describing CodeView debug information. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H #define LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H @@ -13,6 +17,8 @@ #include #include +#include "llvm/Support/Endian.h" + namespace llvm { namespace codeview { @@ -20,28 +26,28 @@ namespace codeview { /// documentation and headers talk about this as the "leaf" type. enum class TypeRecordKind : uint16_t { #define TYPE_RECORD(lf_ename, value, name) name = value, -#include "TypeRecords.def" +#include "CodeViewTypes.def" }; /// Duplicate copy of the above enum, but using the official CV names. Useful /// for reference purposes and when dealing with unknown record types. enum TypeLeafKind : uint16_t { #define CV_TYPE(name, val) name = val, -#include "TypeRecords.def" +#include "CodeViewTypes.def" }; /// Distinguishes individual records in the Symbols subsection of a .debug$S /// section. Equivalent to SYM_ENUM_e in cvinfo.h. enum class SymbolRecordKind : uint16_t { #define SYMBOL_RECORD(lf_ename, value, name) name = value, -#include "CVSymbolTypes.def" +#include "CodeViewSymbols.def" }; /// Duplicate copy of the above enum, but using the official CV names. Useful /// for reference purposes and when dealing with unknown record types. enum SymbolKind : uint16_t { #define CV_SYMBOL(name, val) name = val, -#include "CVSymbolTypes.def" +#include "CodeViewSymbols.def" }; #define CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(Class) \ @@ -278,7 +284,7 @@ CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(MethodOptions) /// Equivalent to CV_LABEL_TYPE_e. enum class LabelType : uint16_t { Near = 0x0, - Far = 0x4, + Far = 0x4, }; /// Equivalent to CV_modifier_t. @@ -291,7 +297,7 @@ enum class ModifierOptions : uint16_t { }; CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ModifierOptions) -enum class ModuleSubstreamKind : uint32_t { +enum class DebugSubsectionKind : uint32_t { None = 0, Symbols = 0xf1, Lines = 0xf2, @@ -412,6 +418,8 @@ CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(ProcSymFlags) /// Corresponds to COMPILESYM2::Flags bitfield. enum class CompileSym2Flags : uint32_t { + None = 0, + SourceLanguageMask = 0xFF, EC = 1 << 8, NoDbgInfo = 1 << 9, LTCG = 1 << 10, @@ -426,6 +434,8 @@ CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(CompileSym2Flags) /// Corresponds to COMPILESYM3::Flags bitfield. enum class CompileSym3Flags : uint32_t { + None = 0, + SourceLanguageMask = 0xFF, EC = 1 << 8, NoDbgInfo = 1 << 9, LTCG = 1 << 10, @@ -442,6 +452,7 @@ enum class CompileSym3Flags : uint32_t { CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(CompileSym3Flags) enum class ExportFlags : uint16_t { + None = 0, IsConstant = 1 << 0, IsData = 1 << 1, IsPrivate = 1 << 2, @@ -546,9 +557,54 @@ enum class TrampolineType : uint16_t { TrampIncremental, BranchIsland }; // These values correspond to the CV_SourceChksum_t enumeration. enum class FileChecksumKind : uint8_t { None, MD5, SHA1, SHA256 }; -enum LineFlags : uint32_t { - HaveColumns = 1, // CV_LINES_HAVE_COLUMNS -}; +enum LineFlags : uint16_t { + LF_None = 0, + LF_HaveColumns = 1, // CV_LINES_HAVE_COLUMNS +}; + +/// Data in the the SUBSEC_FRAMEDATA subection. +struct FrameData { + support::ulittle32_t RvaStart; + support::ulittle32_t CodeSize; + support::ulittle32_t LocalSize; + support::ulittle32_t ParamsSize; + support::ulittle32_t MaxStackSize; + support::ulittle32_t FrameFunc; + support::ulittle16_t PrologSize; + support::ulittle16_t SavedRegsSize; + support::ulittle32_t Flags; + enum : uint32_t { + HasSEH = 1 << 0, + HasEH = 1 << 1, + IsFunctionStart = 1 << 2, + }; +}; + +// Corresponds to LocalIdAndGlobalIdPair structure. +// This structure information allows cross-referencing between PDBs. For +// example, when a PDB is being built during compilation it is not yet known +// what other modules may end up in the PDB at link time. So certain types of +// IDs may clash between the various compile time PDBs. For each affected +// module, a subsection would be put into the PDB containing a mapping from its +// local IDs to a single ID namespace for all items in the PDB file. +struct CrossModuleExport { + support::ulittle32_t Local; + support::ulittle32_t Global; +}; + +struct CrossModuleImport { + support::ulittle32_t ModuleNameOffset; + support::ulittle32_t Count; // Number of elements + // support::ulittle32_t ids[Count]; // id from referenced module +}; + +enum class CodeViewContainer { ObjectFile, Pdb }; + +inline uint32_t alignOf(CodeViewContainer Container) { + if (Container == CodeViewContainer::ObjectFile) + return 1; + return 4; +} } } diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index b3976826a316c7f85a128c05e9a58927c28d5f9a..db944c7057f728f764585550fcce6fd76e7d6fc7 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -136,6 +136,7 @@ public: Error mapByteVectorTail(ArrayRef &Bytes); Error mapByteVectorTail(std::vector &Bytes); + Error padToAlignment(uint32_t Align); Error skipPadding(); private: diff --git a/include/llvm/DebugInfo/CodeView/CVSymbolTypes.def b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def similarity index 100% rename from include/llvm/DebugInfo/CodeView/CVSymbolTypes.def rename to include/llvm/DebugInfo/CodeView/CodeViewSymbols.def diff --git a/include/llvm/DebugInfo/CodeView/TypeRecords.def b/include/llvm/DebugInfo/CodeView/CodeViewTypes.def similarity index 100% rename from include/llvm/DebugInfo/CodeView/TypeRecords.def rename to include/llvm/DebugInfo/CodeView/CodeViewTypes.def diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..9fc90f13d3473480f13223511c3649e8c0f5301c --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h @@ -0,0 +1,98 @@ +//===- DebugChecksumsSubsection.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCHECKSUMSSUBSECTION_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace codeview { + +class DebugStringTableSubsection; + +struct FileChecksumEntry { + uint32_t FileNameOffset; // Byte offset of filename in global stringtable. + FileChecksumKind Kind; // The type of checksum. + ArrayRef Checksum; // The bytes of the checksum. +}; +} +} + +namespace llvm { +template <> struct VarStreamArrayExtractor { +public: + typedef void ContextType; + + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::FileChecksumEntry &Item); +}; +} + +namespace llvm { +namespace codeview { +class DebugChecksumsSubsectionRef final : public DebugSubsectionRef { + typedef VarStreamArray FileChecksumArray; + typedef FileChecksumArray::Iterator Iterator; + +public: + DebugChecksumsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::FileChecksums) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::FileChecksums; + } + + bool valid() const { return Checksums.valid(); } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return Checksums.begin(); } + Iterator end() const { return Checksums.end(); } + + const FileChecksumArray &getArray() const { return Checksums; } + +private: + FileChecksumArray Checksums; +}; + +class DebugChecksumsSubsection final : public DebugSubsection { +public: + explicit DebugChecksumsSubsection(DebugStringTableSubsection &Strings); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::FileChecksums; + } + + void addChecksum(StringRef FileName, FileChecksumKind Kind, + ArrayRef Bytes); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + uint32_t mapChecksumOffset(StringRef FileName) const; + +private: + DebugStringTableSubsection &Strings; + + DenseMap OffsetMap; + uint32_t SerializedSize = 0; + llvm::BumpPtrAllocator Storage; + std::vector Checksums; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..f755b23422c77f9e1b9e68bd722e8c5539ffc272 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h @@ -0,0 +1,64 @@ +//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSEXSUBSECTION_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +#include + +namespace llvm { +namespace codeview { +class DebugCrossModuleExportsSubsectionRef final : public DebugSubsectionRef { + typedef FixedStreamArray ReferenceArray; + typedef ReferenceArray::Iterator Iterator; + +public: + DebugCrossModuleExportsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CrossScopeExports) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CrossScopeExports; + } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return References.begin(); } + Iterator end() const { return References.end(); } + +private: + FixedStreamArray References; +}; + +class DebugCrossModuleExportsSubsection final : public DebugSubsection { +public: + DebugCrossModuleExportsSubsection() + : DebugSubsection(DebugSubsectionKind::CrossScopeExports) {} + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CrossScopeExports; + } + + void addMapping(uint32_t Local, uint32_t Global); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + +private: + std::map Mappings; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..ea3a9a43d50b392b3d62c95cce4bd7c604b380c8 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h @@ -0,0 +1,88 @@ +//===- DebugCrossExSubsection.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGCROSSIMPSUBSECTION_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +namespace llvm { +namespace codeview { + +struct CrossModuleImportItem { + const CrossModuleImport *Header = nullptr; + llvm::FixedStreamArray Imports; +}; +} +} + +namespace llvm { +template <> struct VarStreamArrayExtractor { +public: + typedef void ContextType; + + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CrossModuleImportItem &Item); +}; +} + +namespace llvm { +namespace codeview { +class DebugStringTableSubsection; + +class DebugCrossModuleImportsSubsectionRef final : public DebugSubsectionRef { + typedef VarStreamArray ReferenceArray; + typedef ReferenceArray::Iterator Iterator; + +public: + DebugCrossModuleImportsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CrossScopeImports) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CrossScopeImports; + } + + Error initialize(BinaryStreamReader Reader); + Error initialize(BinaryStreamRef Stream); + + Iterator begin() const { return References.begin(); } + Iterator end() const { return References.end(); } + +private: + ReferenceArray References; +}; + +class DebugCrossModuleImportsSubsection final : public DebugSubsection { +public: + explicit DebugCrossModuleImportsSubsection( + DebugStringTableSubsection &Strings) + : DebugSubsection(DebugSubsectionKind::CrossScopeImports), + Strings(Strings) {} + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CrossScopeImports; + } + + void addImport(StringRef Module, uint32_t ImportId); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + +private: + DebugStringTableSubsection &Strings; + StringMap> Mappings; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..1e329c7c3f1414f6e79dea3c77cd3cb8f38dc392 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h @@ -0,0 +1,60 @@ +//===- DebugFrameDataSubsection.h ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGFRAMEDATASUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGFRAMEDATASUBSECTION_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +class DebugFrameDataSubsectionRef final : public DebugSubsectionRef { +public: + DebugFrameDataSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::FrameData) {} + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::FrameData; + } + + Error initialize(BinaryStreamReader Reader); + + FixedStreamArray::Iterator begin() const { return Frames.begin(); } + FixedStreamArray::Iterator end() const { return Frames.end(); } + + const void *getRelocPtr() const { return RelocPtr; } + +private: + const uint32_t *RelocPtr = nullptr; + FixedStreamArray Frames; +}; + +class DebugFrameDataSubsection final : public DebugSubsection { +public: + DebugFrameDataSubsection() + : DebugSubsection(DebugSubsectionKind::FrameData) {} + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::FrameData; + } + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + void addFrameData(const FrameData &Frame); + void setFrames(ArrayRef Frames); + +private: + std::vector Frames; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..7484af6631051355cd2fb2ce42bd82ea38ab3ef1 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h @@ -0,0 +1,111 @@ +//===- DebugInlineeLinesSubsection.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_BUGINLINEELINESSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_BUGINLINEELINESSUBSECTION_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { + +class DebugInlineeLinesSubsectionRef; +class DebugChecksumsSubsection; + +enum class InlineeLinesSignature : uint32_t { + Normal, // CV_INLINEE_SOURCE_LINE_SIGNATURE + ExtraFiles // CV_INLINEE_SOURCE_LINE_SIGNATURE_EX +}; + +struct InlineeSourceLineHeader { + TypeIndex Inlinee; // ID of the function that was inlined. + support::ulittle32_t FileID; // Offset into FileChecksums subsection. + support::ulittle32_t SourceLineNum; // First line of inlined code. + // If extra files present: + // ulittle32_t ExtraFileCount; + // ulittle32_t Files[]; +}; + +struct InlineeSourceLine { + const InlineeSourceLineHeader *Header; + FixedStreamArray ExtraFiles; +}; +} + +template <> struct VarStreamArrayExtractor { + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::InlineeSourceLine &Item); + bool HasExtraFiles = false; +}; + +namespace codeview { +class DebugInlineeLinesSubsectionRef final : public DebugSubsectionRef { + typedef VarStreamArray LinesArray; + typedef LinesArray::Iterator Iterator; + +public: + DebugInlineeLinesSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::InlineeLines; + } + + Error initialize(BinaryStreamReader Reader); + bool hasExtraFiles() const; + + Iterator begin() const { return Lines.begin(); } + Iterator end() const { return Lines.end(); } + +private: + InlineeLinesSignature Signature; + VarStreamArray Lines; +}; + +class DebugInlineeLinesSubsection final : public DebugSubsection { +public: + struct Entry { + std::vector ExtraFiles; + InlineeSourceLineHeader Header; + }; + + DebugInlineeLinesSubsection(DebugChecksumsSubsection &Checksums, + bool HasExtraFiles = false); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::InlineeLines; + } + + Error commit(BinaryStreamWriter &Writer) const override; + uint32_t calculateSerializedSize() const override; + + void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine); + void addExtraFile(StringRef FileName); + + bool hasExtraFiles() const { return HasExtraFiles; } + void setHasExtraFiles(bool Has) { HasExtraFiles = Has; } + + std::vector::const_iterator begin() const { return Entries.begin(); } + std::vector::const_iterator end() const { return Entries.end(); } + +private: + DebugChecksumsSubsection &Checksums; + + bool HasExtraFiles = false; + uint32_t ExtraFileCount = 0; + + std::vector Entries; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..f1feb1336cc53e291317be40b4f47ae165a8a84d --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h @@ -0,0 +1,143 @@ +//===- DebugLinesSubsection.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGLINEFRAGMENT_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { + +class DebugChecksumsSubsection; +class DebugStringTableSubsection; + +// Corresponds to the `CV_DebugSLinesHeader_t` structure. +struct LineFragmentHeader { + support::ulittle32_t RelocOffset; // Code offset of line contribution. + support::ulittle16_t RelocSegment; // Code segment of line contribution. + support::ulittle16_t Flags; // See LineFlags enumeration. + support::ulittle32_t CodeSize; // Code size of this line contribution. +}; + +// Corresponds to the `CV_DebugSLinesFileBlockHeader_t` structure. +struct LineBlockFragmentHeader { + support::ulittle32_t NameIndex; // Offset of FileChecksum entry in File + // checksums buffer. The checksum entry then + // contains another offset into the string + // table of the actual name. + support::ulittle32_t NumLines; // Number of lines + support::ulittle32_t BlockSize; // Code size of block, in bytes. + // The following two variable length arrays appear immediately after the + // header. The structure definitions follow. + // LineNumberEntry Lines[NumLines]; + // ColumnNumberEntry Columns[NumLines]; +}; + +// Corresponds to `CV_Line_t` structure +struct LineNumberEntry { + support::ulittle32_t Offset; // Offset to start of code bytes for line number + support::ulittle32_t Flags; // Start:24, End:7, IsStatement:1 +}; + +// Corresponds to `CV_Column_t` structure +struct ColumnNumberEntry { + support::ulittle16_t StartColumn; + support::ulittle16_t EndColumn; +}; + +struct LineColumnEntry { + support::ulittle32_t NameIndex; + FixedStreamArray LineNumbers; + FixedStreamArray Columns; +}; + +class LineColumnExtractor { +public: + Error operator()(BinaryStreamRef Stream, uint32_t &Len, + LineColumnEntry &Item); + + const LineFragmentHeader *Header = nullptr; +}; + +class DebugLinesSubsectionRef final : public DebugSubsectionRef { + friend class LineColumnExtractor; + typedef VarStreamArray LineInfoArray; + typedef LineInfoArray::Iterator Iterator; + +public: + DebugLinesSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::Lines; + } + + Error initialize(BinaryStreamReader Reader); + + Iterator begin() const { return LinesAndColumns.begin(); } + Iterator end() const { return LinesAndColumns.end(); } + + const LineFragmentHeader *header() const { return Header; } + + bool hasColumnInfo() const; + +private: + const LineFragmentHeader *Header = nullptr; + LineInfoArray LinesAndColumns; +}; + +class DebugLinesSubsection final : public DebugSubsection { + struct Block { + Block(uint32_t ChecksumBufferOffset) + : ChecksumBufferOffset(ChecksumBufferOffset) {} + + uint32_t ChecksumBufferOffset; + std::vector Lines; + std::vector Columns; + }; + +public: + DebugLinesSubsection(DebugChecksumsSubsection &Checksums, + DebugStringTableSubsection &Strings); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::Lines; + } + + void createBlock(StringRef FileName); + void addLineInfo(uint32_t Offset, const LineInfo &Line); + void addLineAndColumnInfo(uint32_t Offset, const LineInfo &Line, + uint32_t ColStart, uint32_t ColEnd); + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + void setRelocationAddress(uint16_t Segment, uint32_t Offset); + void setCodeSize(uint32_t Size); + void setFlags(LineFlags Flags); + + bool hasColumnInfo() const; + +private: + DebugChecksumsSubsection &Checksums; + + uint32_t RelocOffset = 0; + uint16_t RelocSegment = 0; + uint32_t CodeSize = 0; + LineFlags Flags = LF_None; + std::vector Blocks; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..be0a2344965b1cb4a2052a63a65a78e8e9919458 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h @@ -0,0 +1,89 @@ +//===- DebugStringTableSubsection.h - CodeView String Table -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSTRINGTABLESUBSECTION_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Error.h" + +#include + +namespace llvm { + +class BinaryStreamReader; +class BinaryStreamRef; +class BinaryStreamWriter; + +namespace codeview { + +/// Represents a read-only view of a CodeView string table. This is a very +/// simple flat buffer consisting of null-terminated strings, where strings +/// are retrieved by their offset in the buffer. DebugStringTableSubsectionRef +/// does not own the underlying storage for the buffer. +class DebugStringTableSubsectionRef : public DebugSubsectionRef { +public: + DebugStringTableSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::StringTable; + } + + Error initialize(BinaryStreamRef Contents); + Error initialize(BinaryStreamReader &Reader); + + Expected getString(uint32_t Offset) const; + + bool valid() const { return Stream.valid(); } + + BinaryStreamRef getBuffer() const { return Stream; } + +private: + BinaryStreamRef Stream; +}; + +/// Represents a read-write view of a CodeView string table. +/// DebugStringTableSubsection owns the underlying storage for the table, and is +/// capable of serializing the string table into a format understood by +/// DebugStringTableSubsectionRef. +class DebugStringTableSubsection : public DebugSubsection { +public: + DebugStringTableSubsection(); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::StringTable; + } + + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + // Return the ID for string S. Assumes S exists in the table. + uint32_t getStringId(StringRef S) const; + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + uint32_t size() const; + + StringMap::const_iterator begin() const { return Strings.begin(); } + + StringMap::const_iterator end() const { return Strings.end(); } + +private: + StringMap Strings; + uint32_t StringSize = 1; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..e427e0006a55be7e83d161b9cafd084f8830af20 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugSubsection.h @@ -0,0 +1,52 @@ +//===- DebugSubsection.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Casting.h" + +namespace llvm { +namespace codeview { + +class DebugSubsectionRef { +public: + explicit DebugSubsectionRef(DebugSubsectionKind Kind) : Kind(Kind) {} + virtual ~DebugSubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { return true; } + + DebugSubsectionKind kind() const { return Kind; } + +protected: + DebugSubsectionKind Kind; +}; + +class DebugSubsection { +public: + explicit DebugSubsection(DebugSubsectionKind Kind) : Kind(Kind) {} + virtual ~DebugSubsection(); + + static bool classof(const DebugSubsection *S) { return true; } + + DebugSubsectionKind kind() const { return Kind; } + + virtual Error commit(BinaryStreamWriter &Writer) const = 0; + virtual uint32_t calculateSerializedSize() const = 0; + +protected: + DebugSubsectionKind Kind; +}; + +} // namespace codeview +} // namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENT_H diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h new file mode 100644 index 0000000000000000000000000000000000000000..6947317420643638e5b429311a3460ab939fc152 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h @@ -0,0 +1,83 @@ +//===- DebugSubsection.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { + +class DebugSubsection; + +// Corresponds to the `CV_DebugSSubsectionHeader_t` structure. +struct DebugSubsectionHeader { + support::ulittle32_t Kind; // codeview::DebugSubsectionKind enum + support::ulittle32_t Length; // number of bytes occupied by this record. +}; + +class DebugSubsectionRecord { +public: + DebugSubsectionRecord(); + DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data, + CodeViewContainer Container); + + static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info, + CodeViewContainer Container); + + uint32_t getRecordLength() const; + DebugSubsectionKind kind() const; + BinaryStreamRef getRecordData() const; + +private: + CodeViewContainer Container; + DebugSubsectionKind Kind; + BinaryStreamRef Data; +}; + +class DebugSubsectionRecordBuilder { +public: + DebugSubsectionRecordBuilder(std::shared_ptr Subsection, + CodeViewContainer Container); + uint32_t calculateSerializedLength(); + Error commit(BinaryStreamWriter &Writer) const; + +private: + std::shared_ptr Subsection; + CodeViewContainer Container; +}; + +} // namespace codeview + +template <> struct VarStreamArrayExtractor { + Error operator()(BinaryStreamRef Stream, uint32_t &Length, + codeview::DebugSubsectionRecord &Info) { + // FIXME: We need to pass the container type through to this function. In + // practice this isn't super important since the subsection header describes + // its length and we can just skip it. It's more important when writing. + if (auto EC = codeview::DebugSubsectionRecord::initialize( + Stream, Info, codeview::CodeViewContainer::Pdb)) + return EC; + Length = alignTo(Info.getRecordLength(), 4); + return Error::success(); + } +}; + +namespace codeview { +typedef VarStreamArray DebugSubsectionArray; +} +} // namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTRECORD_H diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h new file mode 100644 index 0000000000000000000000000000000000000000..75f749dfa933407b5c858f7ba7b8ce5403aa7e51 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h @@ -0,0 +1,114 @@ +//===- DebugSubsectionVisitor.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { + +namespace codeview { + +class DebugChecksumsSubsectionRef; +class DebugSubsectionRecord; +class DebugInlineeLinesSubsectionRef; +class DebugCrossModuleExportsSubsectionRef; +class DebugCrossModuleImportsSubsectionRef; +class DebugFrameDataSubsectionRef; +class DebugLinesSubsectionRef; +class DebugStringTableSubsectionRef; +class DebugSymbolRVASubsectionRef; +class DebugSymbolsSubsectionRef; +class DebugUnknownSubsectionRef; +class StringsAndChecksumsRef; + +class DebugSubsectionVisitor { +public: + virtual ~DebugSubsectionVisitor() = default; + + virtual Error visitUnknown(DebugUnknownSubsectionRef &Unknown) { + return Error::success(); + } + virtual Error visitLines(DebugLinesSubsectionRef &Lines, + const StringsAndChecksumsRef &State) = 0; + virtual Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) = 0; + virtual Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) = 0; + virtual Error + visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &CSE, + const StringsAndChecksumsRef &State) = 0; + virtual Error + visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &CSE, + const StringsAndChecksumsRef &State) = 0; + + virtual Error visitStringTable(DebugStringTableSubsectionRef &ST, + const StringsAndChecksumsRef &State) = 0; + + virtual Error visitSymbols(DebugSymbolsSubsectionRef &CSE, + const StringsAndChecksumsRef &State) = 0; + + virtual Error visitFrameData(DebugFrameDataSubsectionRef &FD, + const StringsAndChecksumsRef &State) = 0; + virtual Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &RVAs, + const StringsAndChecksumsRef &State) = 0; +}; + +Error visitDebugSubsection(const DebugSubsectionRecord &R, + DebugSubsectionVisitor &V, + const StringsAndChecksumsRef &State); + +namespace detail { +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + StringsAndChecksumsRef &State) { + State.initialize(std::forward(FragmentRange)); + + for (const DebugSubsectionRecord &L : FragmentRange) { + if (auto EC = visitDebugSubsection(L, V, State)) + return EC; + } + return Error::success(); +} +} // namespace detail + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V) { + StringsAndChecksumsRef State; + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + const DebugStringTableSubsectionRef &Strings) { + StringsAndChecksumsRef State(Strings); + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +template +Error visitDebugSubsections(T &&FragmentRange, DebugSubsectionVisitor &V, + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) { + StringsAndChecksumsRef State(Strings, Checksums); + return detail::visitDebugSubsections(std::forward(FragmentRange), V, + State); +} + +} // end namespace codeview + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..ad58a293cb09aed7a0961ba1fe5c48e42428afba --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h @@ -0,0 +1,59 @@ +//===- DebugSymbolRVASubsection.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLRVASUBSECTION_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { + +class DebugSymbolRVASubsectionRef final : public DebugSubsectionRef { +public: + typedef FixedStreamArray ArrayType; + + DebugSymbolRVASubsectionRef(); + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::CoffSymbolRVA; + } + + ArrayType::Iterator begin() const { return RVAs.begin(); } + ArrayType::Iterator end() const { return RVAs.end(); } + + Error initialize(BinaryStreamReader &Reader); + +private: + ArrayType RVAs; +}; + +class DebugSymbolRVASubsection final : public DebugSubsection { +public: + DebugSymbolRVASubsection(); + + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::CoffSymbolRVA; + } + + Error commit(BinaryStreamWriter &Writer) const override; + uint32_t calculateSerializedSize() const override; + + void addRVA(uint32_t RVA) { RVAs.push_back(support::ulittle32_t(RVA)); } + +private: + std::vector RVAs; +}; +} // namespace codeview +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..dfda7deb6cb41934d1d9afe8fdb6e62e842d47b9 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h @@ -0,0 +1,56 @@ +//===- DebugSymbolsSubsection.h --------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +class DebugSymbolsSubsectionRef final : public DebugSubsectionRef { +public: + DebugSymbolsSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::Symbols) {} + + static bool classof(const DebugSubsectionRef *S) { + return S->kind() == DebugSubsectionKind::Symbols; + } + + Error initialize(BinaryStreamReader Reader); + + CVSymbolArray::Iterator begin() const { return Records.begin(); } + CVSymbolArray::Iterator end() const { return Records.end(); } + +private: + CVSymbolArray Records; +}; + +class DebugSymbolsSubsection final : public DebugSubsection { +public: + DebugSymbolsSubsection() : DebugSubsection(DebugSubsectionKind::Symbols) {} + static bool classof(const DebugSubsection *S) { + return S->kind() == DebugSubsectionKind::Symbols; + } + + uint32_t calculateSerializedSize() const override; + Error commit(BinaryStreamWriter &Writer) const override; + + void addSymbol(CVSymbol Symbol); + +private: + uint32_t Length = 0; + std::vector Records; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h new file mode 100644 index 0000000000000000000000000000000000000000..ea9a96ca8d68e27e61e3d8dfb68b235598462dd4 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/DebugUnknownSubsection.h @@ -0,0 +1,32 @@ +//===- DebugUnknownSubsection.h -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H +#define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGUNKNOWNFRAGMENT_H + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/Support/BinaryStreamRef.h" + +namespace llvm { +namespace codeview { + +class DebugUnknownSubsectionRef final : public DebugSubsectionRef { +public: + DebugUnknownSubsectionRef(DebugSubsectionKind Kind, BinaryStreamRef Data) + : DebugSubsectionRef(Kind), Data(Data) {} + + BinaryStreamRef getData() const { return Data; } + +private: + BinaryStreamRef Data; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/EnumTables.h b/include/llvm/DebugInfo/CodeView/EnumTables.h index 10d1c581a196601bbd8bafd21cacd9f8f035c0c5..013e440613fc1a31bd4e25b5389ee6f396714965 100644 --- a/include/llvm/DebugInfo/CodeView/EnumTables.h +++ b/include/llvm/DebugInfo/CodeView/EnumTables.h @@ -11,8 +11,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_ENUMTABLES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ScopedPrinter.h" #include diff --git a/include/llvm/DebugInfo/CodeView/Formatters.h b/include/llvm/DebugInfo/CodeView/Formatters.h index 37a91098a8b65e3e625b88e21e26adce1a02a11e..1fbb0dd6f9b00321a2ccee26e4f902ee2e42f044 100644 --- a/include/llvm/DebugInfo/CodeView/Formatters.h +++ b/include/llvm/DebugInfo/CodeView/Formatters.h @@ -12,7 +12,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatProviders.h" +#include "llvm/Support/FormatVariadic.h" namespace llvm { namespace codeview { @@ -35,6 +38,20 @@ inline detail::GuidAdapter fmt_guid(ArrayRef Item) { return detail::GuidAdapter(Item); } } + +template <> struct format_provider { +public: + static void format(const codeview::TypeIndex &V, llvm::raw_ostream &Stream, + StringRef Style) { + if (V.isNoneType()) + Stream << ""; + else { + Stream << formatv("{0:X+4}", V.getIndex()); + if (V.isSimple()) + Stream << " (" << codeview::TypeIndex::simpleTypeName(V) << ")"; + } + } +}; } #endif diff --git a/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h new file mode 100644 index 0000000000000000000000000000000000000000..950815ef897e4d22a686fc50bf5a067434ecd074 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h @@ -0,0 +1,108 @@ +//===- LazyRandomTypeCollection.h ---------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H + +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeDatabase.h" +#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { + +class TypeDatabase; +class TypeVisitorCallbacks; + +/// \brief Provides amortized O(1) random access to a CodeView type stream. +/// Normally to access a type from a type stream, you must know its byte +/// offset into the type stream, because type records are variable-lengthed. +/// However, this is not the way we prefer to access them. For example, given +/// a symbol record one of the fields may be the TypeIndex of the symbol's +/// type record. Or given a type record such as an array type, there might +/// be a TypeIndex for the element type. Sequential access is perfect when +/// we're just dumping every entry, but it's very poor for real world usage. +/// +/// Type streams in PDBs contain an additional field which is a list of pairs +/// containing indices and their corresponding offsets, roughly every ~8KB of +/// record data. This general idea need not be confined to PDBs though. By +/// supplying such an array, the producer of a type stream can allow the +/// consumer much better access time, because the consumer can find the nearest +/// index in this array, and do a linear scan forward only from there. +/// +/// LazyRandomTypeCollection implements this algorithm, but additionally goes +/// one step further by caching offsets of every record that has been visited at +/// least once. This way, even repeated visits of the same record will never +/// require more than one linear scan. For a type stream of N elements divided +/// into M chunks of roughly equal size, this yields a worst case lookup time +/// of O(N/M) and an amortized time of O(1). +class LazyRandomTypeCollection : public TypeCollection { + typedef FixedStreamArray PartialOffsetArray; + +public: + explicit LazyRandomTypeCollection(uint32_t RecordCountHint); + LazyRandomTypeCollection(StringRef Data, uint32_t RecordCountHint); + LazyRandomTypeCollection(ArrayRef Data, uint32_t RecordCountHint); + LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t RecordCountHint, + PartialOffsetArray PartialOffsets); + LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t RecordCountHint); + + void reset(ArrayRef Data); + void reset(StringRef Data); + + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + +private: + const TypeDatabase &database() const { return Database; } + Error ensureTypeExists(TypeIndex Index); + + Error visitRangeForType(TypeIndex TI); + Error fullScanForType(TypeIndex TI); + Error visitRange(TypeIndex Begin, uint32_t BeginOffset, TypeIndex End); + Error visitOneRecord(TypeIndex TI, uint32_t Offset, CVType &Record); + + BumpPtrAllocator Allocator; + StringSaver NameStorage; + + SmallVector TypeNames; + + /// Visited records get automatically added to the type database. + TypeDatabase Database; + + /// The type array to allow random access visitation of. + CVTypeArray Types; + + /// The database visitor which adds new records to the database. + TypeDatabaseVisitor DatabaseVisitor; + + /// A vector mapping type indices to type offset. For every record that has + /// been visited, contains the absolute offset of that record in the record + /// array. + std::vector KnownOffsets; + + /// An array of index offsets for the given type stream, allowing log(N) + /// lookups of a type record by index. Similar to KnownOffsets but only + /// contains offsets for some type indices, some of which may not have + /// ever been visited. + PartialOffsetArray PartialOffsets; +}; + +} // end namespace codeview +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_CODEVIEW_LAZYRANDOMTYPECOLLECTION_H diff --git a/include/llvm/DebugInfo/CodeView/Line.h b/include/llvm/DebugInfo/CodeView/Line.h index 975b503fe30b7b08180649426f3aa79d46b9c520..ac229c3375137dda10dd88a80df18664d6604a3a 100644 --- a/include/llvm/DebugInfo/CodeView/Line.h +++ b/include/llvm/DebugInfo/CodeView/Line.h @@ -127,27 +127,6 @@ public: bool isNeverStepInto() const { return LineInf.isNeverStepInto(); } }; -enum class InlineeLinesSignature : uint32_t { - Normal, // CV_INLINEE_SOURCE_LINE_SIGNATURE - ExtraFiles // CV_INLINEE_SOURCE_LINE_SIGNATURE_EX -}; - -struct InlineeSourceLine { - TypeIndex Inlinee; // ID of the function that was inlined. - ulittle32_t FileID; // Offset into FileChecksums subsection. - ulittle32_t SourceLineNum; // First line of inlined code. - // If extra files present: - // ulittle32_t ExtraFileCount; - // ulittle32_t Files[]; -}; - -struct FileChecksum { - ulittle32_t FileNameOffset; // Byte offset of filename in global string table. - uint8_t ChecksumSize; // Number of bytes of checksum. - uint8_t ChecksumKind; // FileChecksumKind - // Checksum bytes follow. -}; - } // namespace codeview } // namespace llvm diff --git a/include/llvm/DebugInfo/CodeView/ModuleSubstream.h b/include/llvm/DebugInfo/CodeView/ModuleSubstream.h deleted file mode 100644 index a1c5c93cc3f8e14c2d0045e20f2297484793a6af..0000000000000000000000000000000000000000 --- a/include/llvm/DebugInfo/CodeView/ModuleSubstream.h +++ /dev/null @@ -1,87 +0,0 @@ -//===- ModuleSubstream.h ----------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H -#define LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H - -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace codeview { - -// Corresponds to the `CV_DebugSSubsectionHeader_t` structure. -struct ModuleSubsectionHeader { - support::ulittle32_t Kind; // codeview::ModuleSubstreamKind enum - support::ulittle32_t Length; // number of bytes occupied by this record. -}; - -// Corresponds to the `CV_DebugSLinesHeader_t` structure. -struct LineSubstreamHeader { - support::ulittle32_t RelocOffset; // Code offset of line contribution. - support::ulittle16_t RelocSegment; // Code segment of line contribution. - support::ulittle16_t Flags; // See LineFlags enumeration. - support::ulittle32_t CodeSize; // Code size of this line contribution. -}; - -// Corresponds to the `CV_DebugSLinesFileBlockHeader_t` structure. -struct LineFileBlockHeader { - support::ulittle32_t NameIndex; // Index in DBI name buffer of filename. - support::ulittle32_t NumLines; // Number of lines - support::ulittle32_t BlockSize; // Code size of block, in bytes. - // The following two variable length arrays appear immediately after the - // header. The structure definitions follow. - // LineNumberEntry Lines[NumLines]; - // ColumnNumberEntry Columns[NumLines]; -}; - -// Corresponds to `CV_Line_t` structure -struct LineNumberEntry { - support::ulittle32_t Offset; // Offset to start of code bytes for line number - support::ulittle32_t Flags; // Start:24, End:7, IsStatement:1 -}; - -// Corresponds to `CV_Column_t` structure -struct ColumnNumberEntry { - support::ulittle16_t StartColumn; - support::ulittle16_t EndColumn; -}; - -class ModuleSubstream { -public: - ModuleSubstream(); - ModuleSubstream(ModuleSubstreamKind Kind, BinaryStreamRef Data); - static Error initialize(BinaryStreamRef Stream, ModuleSubstream &Info); - uint32_t getRecordLength() const; - ModuleSubstreamKind getSubstreamKind() const; - BinaryStreamRef getRecordData() const; - -private: - ModuleSubstreamKind Kind; - BinaryStreamRef Data; -}; - -typedef VarStreamArray ModuleSubstreamArray; -} // namespace codeview - -template <> struct VarStreamArrayExtractor { - Error operator()(BinaryStreamRef Stream, uint32_t &Length, - codeview::ModuleSubstream &Info) const { - if (auto EC = codeview::ModuleSubstream::initialize(Stream, Info)) - return EC; - Length = Info.getRecordLength(); - return Error::success(); - } -}; -} // namespace llvm - -#endif // LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H diff --git a/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h b/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h deleted file mode 100644 index 1a40654a3f3379eb1e2a1da77d899bfb8e946091..0000000000000000000000000000000000000000 --- a/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h +++ /dev/null @@ -1,132 +0,0 @@ -//===- ModuleSubstreamVisitor.h ---------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H -#define LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/Line.h" -#include "llvm/DebugInfo/CodeView/ModuleSubstream.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include - -namespace llvm { - -namespace codeview { - -struct LineColumnEntry { - support::ulittle32_t NameIndex; - FixedStreamArray LineNumbers; - FixedStreamArray Columns; -}; - -struct FileChecksumEntry { - uint32_t FileNameOffset; // Byte offset of filename in global stringtable. - FileChecksumKind Kind; // The type of checksum. - ArrayRef Checksum; // The bytes of the checksum. -}; - -typedef VarStreamArray LineInfoArray; -typedef VarStreamArray FileChecksumArray; - -class IModuleSubstreamVisitor { -public: - virtual ~IModuleSubstreamVisitor() = default; - - virtual Error visitUnknown(ModuleSubstreamKind Kind, - BinaryStreamRef Data) = 0; - virtual Error visitSymbols(BinaryStreamRef Data); - virtual Error visitLines(BinaryStreamRef Data, - const LineSubstreamHeader *Header, - const LineInfoArray &Lines); - virtual Error visitStringTable(BinaryStreamRef Data); - virtual Error visitFileChecksums(BinaryStreamRef Data, - const FileChecksumArray &Checksums); - virtual Error visitFrameData(BinaryStreamRef Data); - virtual Error visitInlineeLines(BinaryStreamRef Data); - virtual Error visitCrossScopeImports(BinaryStreamRef Data); - virtual Error visitCrossScopeExports(BinaryStreamRef Data); - virtual Error visitILLines(BinaryStreamRef Data); - virtual Error visitFuncMDTokenMap(BinaryStreamRef Data); - virtual Error visitTypeMDTokenMap(BinaryStreamRef Data); - virtual Error visitMergedAssemblyInput(BinaryStreamRef Data); - virtual Error visitCoffSymbolRVA(BinaryStreamRef Data); -}; - -Error visitModuleSubstream(const ModuleSubstream &R, - IModuleSubstreamVisitor &V); -} // end namespace codeview - -template <> class VarStreamArrayExtractor { -public: - VarStreamArrayExtractor(const codeview::LineSubstreamHeader *Header) - : Header(Header) {} - - Error operator()(BinaryStreamRef Stream, uint32_t &Len, - codeview::LineColumnEntry &Item) const { - using namespace codeview; - const LineFileBlockHeader *BlockHeader; - BinaryStreamReader Reader(Stream); - if (auto EC = Reader.readObject(BlockHeader)) - return EC; - bool HasColumn = Header->Flags & LineFlags::HaveColumns; - uint32_t LineInfoSize = - BlockHeader->NumLines * - (sizeof(LineNumberEntry) + (HasColumn ? sizeof(ColumnNumberEntry) : 0)); - if (BlockHeader->BlockSize < sizeof(LineFileBlockHeader)) - return make_error(cv_error_code::corrupt_record, - "Invalid line block record size"); - uint32_t Size = BlockHeader->BlockSize - sizeof(LineFileBlockHeader); - if (LineInfoSize > Size) - return make_error(cv_error_code::corrupt_record, - "Invalid line block record size"); - // The value recorded in BlockHeader->BlockSize includes the size of - // LineFileBlockHeader. - Len = BlockHeader->BlockSize; - Item.NameIndex = BlockHeader->NameIndex; - if (auto EC = Reader.readArray(Item.LineNumbers, BlockHeader->NumLines)) - return EC; - if (HasColumn) { - if (auto EC = Reader.readArray(Item.Columns, BlockHeader->NumLines)) - return EC; - } - return Error::success(); - } - -private: - const codeview::LineSubstreamHeader *Header; -}; - -template <> class VarStreamArrayExtractor { -public: - Error operator()(BinaryStreamRef Stream, uint32_t &Len, - codeview::FileChecksumEntry &Item) const { - using namespace codeview; - const FileChecksum *Header; - BinaryStreamReader Reader(Stream); - if (auto EC = Reader.readObject(Header)) - return EC; - Item.FileNameOffset = Header->FileNameOffset; - Item.Kind = static_cast(Header->ChecksumKind); - if (auto EC = Reader.readBytes(Item.Checksum, Header->ChecksumSize)) - return EC; - Len = sizeof(FileChecksum) + Header->ChecksumSize; - return Error::success(); - } -}; - -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H diff --git a/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h new file mode 100644 index 0000000000000000000000000000000000000000..708b317164fc7ba0bb1d1e5a02d4cde670558c26 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h @@ -0,0 +1,106 @@ +//===- StringsAndChecksums.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_STRINGS_AND_CHECKSUMS_H +#define LLVM_DEBUGINFO_CODEVIEW_STRINGS_AND_CHECKSUMS_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" + +#include + +namespace llvm { +namespace codeview { + +class DebugSubsectionRecord; +class DebugChecksumsSubsectionRef; +class DebugStringTableSubsectionRef; +class DebugChecksumsSubsection; +class DebugStringTableSubsection; + +class StringsAndChecksumsRef { +public: + // If no subsections are known about initially, we find as much as we can. + StringsAndChecksumsRef(); + + // If only a string table subsection is given, we find a checksums subsection. + explicit StringsAndChecksumsRef(const DebugStringTableSubsectionRef &Strings); + + // If both subsections are given, we don't need to find anything. + StringsAndChecksumsRef(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums); + + void setChecksums(const DebugChecksumsSubsectionRef &CS); + + template void initialize(T &&FragmentRange) { + for (const DebugSubsectionRecord &R : FragmentRange) { + if (Strings && Checksums) + return; + if (R.kind() == DebugSubsectionKind::FileChecksums) { + initializeChecksums(R); + continue; + } + if (R.kind() == DebugSubsectionKind::StringTable && !Strings) { + // While in practice we should never encounter a string table even + // though the string table is already initialized, in theory it's + // possible. PDBs are supposed to have one global string table and + // then this subsection should not appear. Whereas object files are + // supposed to have this subsection appear exactly once. However, + // for testing purposes it's nice to be able to test this subsection + // independently of one format or the other, so for some tests we + // manually construct a PDB that contains this subsection in addition + // to a global string table. + initializeStrings(R); + continue; + } + } + } + + const DebugStringTableSubsectionRef &strings() const { return *Strings; } + const DebugChecksumsSubsectionRef &checksums() const { return *Checksums; } + + bool hasStrings() const { return Strings != nullptr; } + bool hasChecksums() const { return Checksums != nullptr; } + +private: + void initializeStrings(const DebugSubsectionRecord &SR); + void initializeChecksums(const DebugSubsectionRecord &FCR); + + std::unique_ptr OwnedStrings; + std::unique_ptr OwnedChecksums; + + const DebugStringTableSubsectionRef *Strings = nullptr; + const DebugChecksumsSubsectionRef *Checksums = nullptr; +}; + +class StringsAndChecksums { +public: + using StringsPtr = std::shared_ptr; + using ChecksumsPtr = std::shared_ptr; + // If no subsections are known about initially, we find as much as we can. + StringsAndChecksums() {} + + void setStrings(const StringsPtr &SP) { Strings = SP; } + void setChecksums(const ChecksumsPtr &CP) { Checksums = CP; } + + const StringsPtr &strings() const { return Strings; } + const ChecksumsPtr &checksums() const { return Checksums; } + + bool hasStrings() const { return Strings != nullptr; } + bool hasChecksums() const { return Checksums != nullptr; } + +private: + StringsPtr Strings; + ChecksumsPtr Checksums; +}; + +} // namespace codeview +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h index c1a5152930fff26458cd4e925f6a6edb3331d121..7080b04807570a218278f1c07195c8b0fc86654d 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h @@ -24,9 +24,9 @@ namespace codeview { class SymbolVisitorDelegate; class SymbolDeserializer : public SymbolVisitorCallbacks { struct MappingInfo { - explicit MappingInfo(ArrayRef RecordData) + MappingInfo(ArrayRef RecordData, CodeViewContainer Container) : Stream(RecordData, llvm::support::little), Reader(Stream), - Mapping(Reader) {} + Mapping(Reader, Container) {} BinaryByteStream Stream; BinaryStreamReader Reader; @@ -34,12 +34,26 @@ class SymbolDeserializer : public SymbolVisitorCallbacks { }; public: - explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate) - : Delegate(Delegate) {} + template static Error deserializeAs(CVSymbol Symbol, T &Record) { + // If we're just deserializing one record, then don't worry about alignment + // as there's nothing that comes after. + SymbolDeserializer S(nullptr, CodeViewContainer::ObjectFile); + if (auto EC = S.visitSymbolBegin(Symbol)) + return EC; + if (auto EC = S.visitKnownRecord(Symbol, Record)) + return EC; + if (auto EC = S.visitSymbolEnd(Symbol)) + return EC; + return Error::success(); + } + + explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate, + CodeViewContainer Container) + : Delegate(Delegate), Container(Container) {} Error visitSymbolBegin(CVSymbol &Record) override { assert(!Mapping && "Already in a symbol mapping!"); - Mapping = llvm::make_unique(Record.content()); + Mapping = llvm::make_unique(Record.content(), Container); return Mapping->Mapping.visitSymbolBegin(Record); } Error visitSymbolEnd(CVSymbol &Record) override { @@ -54,7 +68,7 @@ public: return visitKnownRecordImpl(CVR, Record); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: template Error visitKnownRecordImpl(CVSymbol &CVR, T &Record) { @@ -67,6 +81,7 @@ private: } SymbolVisitorDelegate *Delegate; + CodeViewContainer Container; std::unique_ptr Mapping; }; } diff --git a/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/include/llvm/DebugInfo/CodeView/SymbolDumper.h index a5419b37e7761a1e586eeed278efc80be504ef83..293daa851bddfe46bc302d1eb5a6279acf446933 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolDumper.h +++ b/include/llvm/DebugInfo/CodeView/SymbolDumper.h @@ -20,15 +20,17 @@ namespace llvm { class ScopedPrinter; namespace codeview { -class TypeDatabase; +class TypeCollection; /// Dumper for CodeView symbol streams found in COFF object files and PDB files. class CVSymbolDumper { public: - CVSymbolDumper(ScopedPrinter &W, TypeDatabase &TypeDB, + CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types, + CodeViewContainer Container, std::unique_ptr ObjDelegate, bool PrintRecordBytes) - : W(W), TypeDB(TypeDB), ObjDelegate(std::move(ObjDelegate)), + : W(W), Types(Types), Container(Container), + ObjDelegate(std::move(ObjDelegate)), PrintRecordBytes(PrintRecordBytes) {} /// Dumps one type record. Returns false if there was a type parsing error, @@ -43,7 +45,8 @@ public: private: ScopedPrinter &W; - TypeDatabase &TypeDB; + TypeCollection &Types; + CodeViewContainer Container; std::unique_ptr ObjDelegate; bool PrintRecordBytes; diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h index c5a5549bf818ae74e007cb20d822bd67b42a1e7f..5f85ed28cb3a1968facdcad0e11914f8e048388f 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -35,8 +35,6 @@ protected: public: SymbolRecordKind getKind() const { return Kind; } - -private: SymbolRecordKind Kind; }; @@ -365,7 +363,7 @@ public: : SymbolRecord(SymbolRecordKind::PublicSym32), RecordOffset(RecordOffset) {} - uint32_t Index; + TypeIndex Index; uint32_t Offset; uint16_t Segment; StringRef Name; @@ -381,7 +379,7 @@ public: : SymbolRecord(SymbolRecordKind::RegisterSym), RecordOffset(RecordOffset) {} - uint32_t Index; + TypeIndex Index; RegisterId Register; StringRef Name; @@ -681,7 +679,7 @@ public: : SymbolRecord(SymbolRecordKind::FileStaticSym), RecordOffset(RecordOffset) {} - uint32_t Index; + TypeIndex Index; uint32_t ModFilenameOffset; LocalSymFlags Flags; StringRef Name; @@ -816,7 +814,7 @@ public: uint32_t CodeOffset; uint16_t Register; - uint8_t CookieKind; + FrameCookieKind CookieKind; uint8_t Flags; uint32_t RecordOffset; @@ -873,7 +871,7 @@ public: uint32_t Offset; TypeIndex Type; - uint16_t Register; + RegisterId Register; StringRef Name; uint32_t RecordOffset; diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h index 0a1837a0d935f7e78273f6cc20076839b48520b1..391e8f127665a790f64a280188980f990f8c3633 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h +++ b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h @@ -20,8 +20,12 @@ class BinaryStreamWriter; namespace codeview { class SymbolRecordMapping : public SymbolVisitorCallbacks { public: - explicit SymbolRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {} - explicit SymbolRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {} + explicit SymbolRecordMapping(BinaryStreamReader &Reader, + CodeViewContainer Container) + : IO(Reader), Container(Container) {} + explicit SymbolRecordMapping(BinaryStreamWriter &Writer, + CodeViewContainer Container) + : IO(Writer), Container(Container) {} Error visitSymbolBegin(CVSymbol &Record) override; Error visitSymbolEnd(CVSymbol &Record) override; @@ -29,12 +33,13 @@ public: #define SYMBOL_RECORD(EnumName, EnumVal, Name) \ Error visitKnownRecord(CVSymbol &CVR, Name &Record) override; #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: Optional Kind; CodeViewRecordIO IO; + CodeViewContainer Container; }; } } diff --git a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h index f2e99bd8332605daa18373d497cb97d0db4648eb..42adbdb4e20f315c5437e17af94304057a462329 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h +++ b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h @@ -45,7 +45,19 @@ class SymbolSerializer : public SymbolVisitorCallbacks { } public: - explicit SymbolSerializer(BumpPtrAllocator &Storage); + template + static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage, + CodeViewContainer Container) { + CVSymbol Result; + Result.Type = static_cast(Sym.Kind); + SymbolSerializer Serializer(Storage, Container); + consumeError(Serializer.visitSymbolBegin(Result)); + consumeError(Serializer.visitKnownRecord(Result, Sym)); + consumeError(Serializer.visitSymbolEnd(Result)); + return Result; + } + + SymbolSerializer(BumpPtrAllocator &Storage, CodeViewContainer Container); virtual Error visitSymbolBegin(CVSymbol &Record) override; virtual Error visitSymbolEnd(CVSymbol &Record) override; @@ -55,7 +67,7 @@ public: return visitKnownRecordImpl(CVR, Record); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: template diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h index 96a93bf7e576406aa570c286a0f49b834f10a4d3..5f4205bd6e082b1c9f672ac1aa7628f534d546f7 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h +++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h @@ -59,7 +59,7 @@ public: return Error::success(); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" private: std::vector Pipeline; diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h index aaa9d2e85e136c1189887600816f0c58a648a7b9..2ef7eabdaa9d34c4c78fd12df322010d40851c32 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h +++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h @@ -39,7 +39,7 @@ public: return Error::success(); \ } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" }; } // end namespace codeview diff --git a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h index 2bef3f61adfcccecfca550db2d418460dfdd1374..a2a3c6f18fba1594ca65d0018d07e4e453c9c812 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h +++ b/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h @@ -19,13 +19,15 @@ class BinaryStreamReader; namespace codeview { +class DebugStringTableSubsectionRef; + class SymbolVisitorDelegate { public: virtual ~SymbolVisitorDelegate() = default; virtual uint32_t getRecordOffset(BinaryStreamReader Reader) = 0; virtual StringRef getFileNameForFileOffset(uint32_t FileOffset) = 0; - virtual StringRef getStringTable() = 0; + virtual DebugStringTableSubsectionRef getStringTable() = 0; }; } // end namespace codeview diff --git a/include/llvm/DebugInfo/CodeView/TypeCollection.h b/include/llvm/DebugInfo/CodeView/TypeCollection.h new file mode 100644 index 0000000000000000000000000000000000000000..0f856f57a72751720cfe77aa34d5775eac697b19 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeCollection.h @@ -0,0 +1,38 @@ +//===- TypeCollection.h - A collection of CodeView type records -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPECOLLECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPECOLLECTION_H + +#include "llvm/ADT/StringRef.h" + +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" + +namespace llvm { +namespace codeview { +class TypeCollection { +public: + virtual ~TypeCollection() = default; + + bool empty() { return size() == 0; } + + virtual Optional getFirst() = 0; + virtual Optional getNext(TypeIndex Prev) = 0; + + virtual CVType getType(TypeIndex Index) = 0; + virtual StringRef getTypeName(TypeIndex Index) = 0; + virtual bool contains(TypeIndex Index) = 0; + virtual uint32_t size() = 0; + virtual uint32_t capacity() = 0; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeDatabase.h b/include/llvm/DebugInfo/CodeView/TypeDatabase.h index 54ad862cfa7e582edba734f59451bafa00993481..a743e7f70855f6e37cafad9321c52de7761f0c69 100644 --- a/include/llvm/DebugInfo/CodeView/TypeDatabase.h +++ b/include/llvm/DebugInfo/CodeView/TypeDatabase.h @@ -10,8 +10,10 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEDATABASE_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEDATABASE_H +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Allocator.h" @@ -19,15 +21,17 @@ namespace llvm { namespace codeview { -class TypeDatabase { +class TypeDatabase : public TypeCollection { + friend class RandomAccessTypeVisitor; + public: - TypeDatabase() : TypeNameStorage(Allocator) {} + explicit TypeDatabase(uint32_t Capacity); - /// Gets the type index for the next type record. - TypeIndex getNextTypeIndex() const; + /// Records the name of a type, and reserves its type index. + TypeIndex appendType(StringRef Name, const CVType &Data); /// Records the name of a type, and reserves its type index. - void recordType(StringRef Name, const CVType &Data); + void recordType(StringRef Name, TypeIndex Index, const CVType &Data); /// Saves the name in a StringSet and creates a stable StringRef. StringRef saveTypeName(StringRef TypeName); @@ -35,14 +39,35 @@ public: StringRef getTypeName(TypeIndex Index) const; const CVType &getTypeRecord(TypeIndex Index) const; + CVType &getTypeRecord(TypeIndex Index); - bool containsTypeIndex(TypeIndex Index) const; - + bool contains(TypeIndex Index) const; uint32_t size() const; + uint32_t capacity() const; + bool empty() const; + + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + + Optional largestTypeIndexLessThan(TypeIndex TI) const; private: + TypeIndex getAppendIndex() const; + + void grow(); + void grow(TypeIndex Index); + BumpPtrAllocator Allocator; + uint32_t Count = 0; + TypeIndex LargestTypeIndex; + /// All user defined type records in .debug$T live in here. Type indices /// greater than 0x1000 are user defined. Subtract 0x1000 from the index to /// index into this vector. @@ -50,6 +75,8 @@ private: SmallVector TypeRecords; StringSaver TypeNameStorage; + + BitVector ValidRecords; }; } } diff --git a/include/llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h b/include/llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h index 39d234cf9814d27a8207626b9ba7c13c410d083c..77dbc91a7d38c9bdf0c7d0adfc15c9b9cd046a26 100644 --- a/include/llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h +++ b/include/llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h @@ -10,6 +10,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEDATABASEVISITOR_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEDATABASEVISITOR_H +#include "llvm/ADT/PointerUnion.h" + #include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" @@ -21,11 +23,12 @@ namespace codeview { /// Dumper for CodeView type streams found in COFF object files and PDB files. class TypeDatabaseVisitor : public TypeVisitorCallbacks { public: - explicit TypeDatabaseVisitor(TypeDatabase &TypeDB) : TypeDB(TypeDB) {} + explicit TypeDatabaseVisitor(TypeDatabase &TypeDB) : TypeDB(&TypeDB) {} /// Paired begin/end actions for all types. Receives all record data, /// including the fixed-length record prefix. Error visitTypeBegin(CVType &Record) override; + Error visitTypeBegin(CVType &Record, TypeIndex Index) override; Error visitTypeEnd(CVType &Record) override; Error visitMemberBegin(CVMemberRecord &Record) override; Error visitMemberEnd(CVMemberRecord &Record) override; @@ -36,15 +39,21 @@ public: Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: + StringRef getTypeName(TypeIndex Index) const; + StringRef saveTypeName(StringRef Name); + bool IsInFieldList = false; /// Name of the current type. Only valid before visitTypeEnd. StringRef Name; + /// Current type index. Only valid before visitTypeEnd, and if we are + /// visiting a random access type database. + Optional CurrentTypeIndex; - TypeDatabase &TypeDB; + TypeDatabase *TypeDB; }; } // end namespace codeview diff --git a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h index 0e34437891702351d32abc4d166ecbebd5cfd4dc..965cdfd85f48961d98e2b2420acf9b799002f28a 100644 --- a/include/llvm/DebugInfo/CodeView/TypeDeserializer.h +++ b/include/llvm/DebugInfo/CodeView/TypeDeserializer.h @@ -40,12 +40,28 @@ class TypeDeserializer : public TypeVisitorCallbacks { public: TypeDeserializer() = default; + template static Error deserializeAs(CVType &CVT, T &Record) { + Record.Kind = static_cast(CVT.kind()); + MappingInfo I(CVT.content()); + if (auto EC = I.Mapping.visitTypeBegin(CVT)) + return EC; + if (auto EC = I.Mapping.visitKnownRecord(CVT, Record)) + return EC; + if (auto EC = I.Mapping.visitTypeEnd(CVT)) + return EC; + return Error::success(); + } + Error visitTypeBegin(CVType &Record) override { assert(!Mapping && "Already in a type mapping!"); Mapping = llvm::make_unique(Record.content()); return Mapping->Mapping.visitTypeBegin(Record); } + Error visitTypeBegin(CVType &Record, TypeIndex Index) override { + return visitTypeBegin(Record); + } + Error visitTypeEnd(CVType &Record) override { assert(Mapping && "Not in a type mapping!"); auto EC = Mapping->Mapping.visitTypeEnd(Record); @@ -60,7 +76,7 @@ public: #define MEMBER_RECORD(EnumName, EnumVal, Name) #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template @@ -112,7 +128,7 @@ public: } #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template diff --git a/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h b/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h index 00bb09137e488d4c494c0ed9e6522e86929888e6..afb8b3636361ba94a4277dc5447cd5b2de64feba 100644 --- a/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h +++ b/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h @@ -12,7 +12,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSet.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" @@ -22,17 +21,20 @@ class ScopedPrinter; namespace codeview { +class TypeCollection; + /// Dumper for CodeView type streams found in COFF object files and PDB files. class TypeDumpVisitor : public TypeVisitorCallbacks { public: - TypeDumpVisitor(TypeDatabase &TypeDB, ScopedPrinter *W, bool PrintRecordBytes) - : W(W), PrintRecordBytes(PrintRecordBytes), TypeDB(TypeDB) {} + TypeDumpVisitor(TypeCollection &TpiTypes, ScopedPrinter *W, + bool PrintRecordBytes) + : W(W), PrintRecordBytes(PrintRecordBytes), TpiTypes(TpiTypes) {} /// When dumping types from an IPI stream in a PDB, a type index may refer to /// a type or an item ID. The dumper will lookup the "name" of the index in /// the item database if appropriate. If ItemDB is null, it will use TypeDB, /// which is correct when dumping types from an object file (/Z7). - void setItemDB(TypeDatabase &DB) { ItemDB = &DB; } + void setIpiTypes(TypeCollection &Types) { IpiTypes = &Types; } void printTypeIndex(StringRef FieldName, TypeIndex TI) const; @@ -45,6 +47,7 @@ public: /// Paired begin/end actions for all types. Receives all record data, /// including the fixed-length record prefix. Error visitTypeBegin(CVType &Record) override; + Error visitTypeBegin(CVType &Record, TypeIndex Index) override; Error visitTypeEnd(CVType &Record) override; Error visitMemberBegin(CVMemberRecord &Record) override; Error visitMemberEnd(CVMemberRecord &Record) override; @@ -55,7 +58,7 @@ public: Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: void printMemberAttributes(MemberAttributes Attrs); @@ -65,14 +68,16 @@ private: /// Get the database of indices for the stream that we are dumping. If ItemDB /// is set, then we must be dumping an item (IPI) stream. This will also /// always get the appropriate DB for printing item names. - TypeDatabase &getSourceDB() const { return ItemDB ? *ItemDB : TypeDB; } + TypeCollection &getSourceTypes() const { + return IpiTypes ? *IpiTypes : TpiTypes; + } ScopedPrinter *W; bool PrintRecordBytes = false; - TypeDatabase &TypeDB; - TypeDatabase *ItemDB = nullptr; + TypeCollection &TpiTypes; + TypeCollection *IpiTypes = nullptr; }; } // end namespace codeview diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h index 3c11d248fa721d305acd96c39679d28386b9bd49..10d51c2d6244f12d5971344461a9d4d286288e89 100644 --- a/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -15,8 +15,13 @@ #include namespace llvm { + +class ScopedPrinter; + namespace codeview { +class TypeCollection; + enum class SimpleTypeKind : uint32_t { None = 0x0000, // uncharacterized type (no type) Void = 0x0003, // void @@ -106,6 +111,15 @@ public: bool isNoneType() const { return *this == None(); } + uint32_t toArrayIndex() const { + assert(!isSimple()); + return getIndex() - FirstNonSimpleIndex; + } + + static TypeIndex fromArrayIndex(uint32_t Index) { + return TypeIndex(Index + FirstNonSimpleIndex); + } + SimpleTypeKind getSimpleKind() const { assert(isSimple()); return static_cast(Index & SimpleKindMask); @@ -159,6 +173,39 @@ public: static TypeIndex Float32() { return TypeIndex(SimpleTypeKind::Float32); } static TypeIndex Float64() { return TypeIndex(SimpleTypeKind::Float64); } + TypeIndex &operator+=(unsigned N) { + Index += N; + return *this; + } + + TypeIndex &operator++() { + Index += 1; + return *this; + } + + TypeIndex operator++(int) { + TypeIndex Copy = *this; + operator++(); + return Copy; + } + + TypeIndex &operator-=(unsigned N) { + assert(Index >= N); + Index -= N; + return *this; + } + + TypeIndex &operator--() { + Index -= 1; + return *this; + } + + TypeIndex operator--(int) { + TypeIndex Copy = *this; + operator--(); + return Copy; + } + friend inline bool operator==(const TypeIndex &A, const TypeIndex &B) { return A.getIndex() == B.getIndex(); } @@ -183,10 +230,40 @@ public: return A.getIndex() >= B.getIndex(); } + friend inline TypeIndex operator+(const TypeIndex &A, uint32_t N) { + TypeIndex Result(A); + Result += N; + return Result; + } + + friend inline TypeIndex operator-(const TypeIndex &A, uint32_t N) { + assert(A.getIndex() >= N); + TypeIndex Result(A); + Result -= N; + return Result; + } + + friend inline uint32_t operator-(const TypeIndex &A, const TypeIndex &B) { + assert(A >= B); + return A.toArrayIndex() - B.toArrayIndex(); + } + + static StringRef simpleTypeName(TypeIndex TI); + private: support::ulittle32_t Index; }; +// Used for pseudo-indexing an array of type records. An array of such records +// sorted by TypeIndex can allow log(N) lookups even though such a type record +// stream does not provide random access. +struct TypeIndexOffset { + TypeIndex Type; + support::ulittle32_t Offset; +}; + +void printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, TypeIndex TI, + TypeCollection &Types); } } diff --git a/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h new file mode 100644 index 0000000000000000000000000000000000000000..82ceb50383166e124adeabcea0920679865ee6ac --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h @@ -0,0 +1,33 @@ +//===- TypeIndexDiscovery.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +enum class TiRefKind { TypeRef, IndexRef }; +struct TiReference { + TiRefKind Kind; + uint32_t Offset; + uint32_t Count; +}; + +void discoverTypeIndices(ArrayRef RecordData, + SmallVectorImpl &Refs); +void discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Refs); +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeName.h b/include/llvm/DebugInfo/CodeView/TypeName.h new file mode 100644 index 0000000000000000000000000000000000000000..a987b4afd283a528d2f4ad55560c1e92fec8242d --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeName.h @@ -0,0 +1,22 @@ +//===- TypeName.h --------------------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPENAME_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPENAME_H + +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +namespace llvm { +namespace codeview { +std::string computeTypeName(TypeCollection &Types, TypeIndex Index); +} +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h index 1f10872c8768040b20a87265cdbde2a8ed317059..3a64a437aa4d903c85937af833b340191d1e3359 100644 --- a/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -35,6 +35,7 @@ using support::ulittle16_t; using support::ulittle32_t; typedef CVRecord CVType; +typedef RemappedRecord RemappedType; struct CVMemberRecord { TypeLeafKind Kind; @@ -122,13 +123,13 @@ protected: public: TypeRecordKind getKind() const { return Kind; } -private: TypeRecordKind Kind; }; // LF_MODIFIER class ModifierRecord : public TypeRecord { public: + ModifierRecord() = default; explicit ModifierRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ModifierRecord(TypeIndex ModifiedType, ModifierOptions Modifiers) : TypeRecord(TypeRecordKind::Modifier), ModifiedType(ModifiedType), @@ -144,6 +145,7 @@ public: // LF_PROCEDURE class ProcedureRecord : public TypeRecord { public: + ProcedureRecord() = default; explicit ProcedureRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ProcedureRecord(TypeIndex ReturnType, CallingConvention CallConv, FunctionOptions Options, uint16_t ParameterCount, @@ -168,6 +170,7 @@ public: // LF_MFUNCTION class MemberFunctionRecord : public TypeRecord { public: + MemberFunctionRecord() = default; explicit MemberFunctionRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} MemberFunctionRecord(TypeIndex ReturnType, TypeIndex ClassType, @@ -202,6 +205,7 @@ public: // LF_LABEL class LabelRecord : public TypeRecord { public: + LabelRecord() = default; explicit LabelRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} LabelRecord(LabelType Mode) : TypeRecord(TypeRecordKind::Label), Mode(Mode) {} @@ -212,6 +216,7 @@ public: // LF_MFUNC_ID class MemberFuncIdRecord : public TypeRecord { public: + MemberFuncIdRecord() = default; explicit MemberFuncIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} MemberFuncIdRecord(TypeIndex ClassType, TypeIndex FunctionType, StringRef Name) @@ -229,6 +234,7 @@ public: // LF_ARGLIST class ArgListRecord : public TypeRecord { public: + ArgListRecord() = default; explicit ArgListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ArgListRecord(TypeRecordKind Kind, ArrayRef Indices) @@ -242,6 +248,7 @@ public: // LF_SUBSTR_LIST class StringListRecord : public TypeRecord { public: + StringListRecord() = default; explicit StringListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} StringListRecord(TypeRecordKind Kind, ArrayRef Indices) @@ -266,6 +273,7 @@ public: static const uint32_t PointerSizeShift = 13; static const uint32_t PointerSizeMask = 0xFF; + PointerRecord() = default; explicit PointerRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} PointerRecord(TypeIndex ReferentType, uint32_t Attrs) @@ -278,15 +286,9 @@ public: Attrs(calcAttrs(PK, PM, PO, Size)) {} PointerRecord(TypeIndex ReferentType, PointerKind PK, PointerMode PM, - PointerOptions PO, uint8_t Size, - const MemberPointerInfo &Member) - : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType), - Attrs(calcAttrs(PK, PM, PO, Size)), MemberInfo(Member) {} - - PointerRecord(TypeIndex ReferentType, uint32_t Attrs, - const MemberPointerInfo &Member) + PointerOptions PO, uint8_t Size, const MemberPointerInfo &MPI) : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType), - Attrs(Attrs), MemberInfo(Member) {} + Attrs(calcAttrs(PK, PM, PO, Size)), MemberInfo(MPI) {} TypeIndex getReferentType() const { return ReferentType; } @@ -346,6 +348,7 @@ private: // LF_NESTTYPE class NestedTypeRecord : public TypeRecord { public: + NestedTypeRecord() = default; explicit NestedTypeRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} NestedTypeRecord(TypeIndex Type, StringRef Name) : TypeRecord(TypeRecordKind::NestedType), Type(Type), Name(Name) {} @@ -360,6 +363,7 @@ public: // LF_FIELDLIST class FieldListRecord : public TypeRecord { public: + FieldListRecord() = default; explicit FieldListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} explicit FieldListRecord(ArrayRef Data) : TypeRecord(TypeRecordKind::FieldList), Data(Data) {} @@ -370,6 +374,7 @@ public: // LF_ARRAY class ArrayRecord : public TypeRecord { public: + ArrayRecord() = default; explicit ArrayRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ArrayRecord(TypeIndex ElementType, TypeIndex IndexType, uint64_t Size, StringRef Name) @@ -389,6 +394,7 @@ public: class TagRecord : public TypeRecord { protected: + TagRecord() = default; explicit TagRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} TagRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, StringRef Name, StringRef UniqueName) @@ -421,6 +427,7 @@ public: // LF_CLASS, LF_STRUCTURE, LF_INTERFACE class ClassRecord : public TagRecord { public: + ClassRecord() = default; explicit ClassRecord(TypeRecordKind Kind) : TagRecord(Kind) {} ClassRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, TypeIndex DerivationList, @@ -452,6 +459,7 @@ public: // LF_UNION struct UnionRecord : public TagRecord { + UnionRecord() = default; explicit UnionRecord(TypeRecordKind Kind) : TagRecord(Kind) {} UnionRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, uint64_t Size, StringRef Name, StringRef UniqueName) @@ -473,6 +481,7 @@ struct UnionRecord : public TagRecord { // LF_ENUM class EnumRecord : public TagRecord { public: + EnumRecord() = default; explicit EnumRecord(TypeRecordKind Kind) : TagRecord(Kind) {} EnumRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList, StringRef Name, StringRef UniqueName, TypeIndex UnderlyingType) @@ -487,6 +496,7 @@ public: // LF_BITFIELD class BitFieldRecord : public TypeRecord { public: + BitFieldRecord() = default; explicit BitFieldRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} BitFieldRecord(TypeIndex Type, uint8_t BitSize, uint8_t BitOffset) : TypeRecord(TypeRecordKind::BitField), Type(Type), BitSize(BitSize), @@ -503,6 +513,7 @@ public: // LF_VTSHAPE class VFTableShapeRecord : public TypeRecord { public: + VFTableShapeRecord() = default; explicit VFTableShapeRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} explicit VFTableShapeRecord(ArrayRef Slots) : TypeRecord(TypeRecordKind::VFTableShape), SlotsRef(Slots) {} @@ -523,6 +534,7 @@ public: // LF_TYPESERVER2 class TypeServer2Record : public TypeRecord { public: + TypeServer2Record() = default; explicit TypeServer2Record(TypeRecordKind Kind) : TypeRecord(Kind) {} TypeServer2Record(StringRef Guid, uint32_t Age, StringRef Name) : TypeRecord(TypeRecordKind::TypeServer2), Guid(Guid), Age(Age), @@ -542,6 +554,7 @@ public: // LF_STRING_ID class StringIdRecord : public TypeRecord { public: + StringIdRecord() = default; explicit StringIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} StringIdRecord(TypeIndex Id, StringRef String) : TypeRecord(TypeRecordKind::StringId), Id(Id), String(String) {} @@ -556,6 +569,7 @@ public: // LF_FUNC_ID class FuncIdRecord : public TypeRecord { public: + FuncIdRecord() = default; explicit FuncIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} FuncIdRecord(TypeIndex ParentScope, TypeIndex FunctionType, StringRef Name) : TypeRecord(TypeRecordKind::FuncId), ParentScope(ParentScope), @@ -575,6 +589,7 @@ public: // LF_UDT_SRC_LINE class UdtSourceLineRecord : public TypeRecord { public: + UdtSourceLineRecord() = default; explicit UdtSourceLineRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} UdtSourceLineRecord(TypeIndex UDT, TypeIndex SourceFile, uint32_t LineNumber) : TypeRecord(TypeRecordKind::UdtSourceLine), UDT(UDT), @@ -592,6 +607,7 @@ public: // LF_UDT_MOD_SRC_LINE class UdtModSourceLineRecord : public TypeRecord { public: + UdtModSourceLineRecord() = default; explicit UdtModSourceLineRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} UdtModSourceLineRecord(TypeIndex UDT, TypeIndex SourceFile, uint32_t LineNumber, uint16_t Module) @@ -612,6 +628,7 @@ public: // LF_BUILDINFO class BuildInfoRecord : public TypeRecord { public: + BuildInfoRecord() = default; explicit BuildInfoRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} BuildInfoRecord(ArrayRef ArgIndices) : TypeRecord(TypeRecordKind::BuildInfo), @@ -624,6 +641,7 @@ public: // LF_VFTABLE class VFTableRecord : public TypeRecord { public: + VFTableRecord() = default; explicit VFTableRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} VFTableRecord(TypeIndex CompleteClass, TypeIndex OverriddenVFTable, uint32_t VFPtrOffset, StringRef Name, @@ -651,7 +669,7 @@ public: // LF_ONEMETHOD class OneMethodRecord : public TypeRecord { public: - OneMethodRecord() : TypeRecord(TypeRecordKind::OneMethod) {} + OneMethodRecord() = default; explicit OneMethodRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} OneMethodRecord(TypeIndex Type, MemberAttributes Attrs, int32_t VFTableOffset, StringRef Name) @@ -683,6 +701,7 @@ public: // LF_METHODLIST class MethodOverloadListRecord : public TypeRecord { public: + MethodOverloadListRecord() = default; explicit MethodOverloadListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} MethodOverloadListRecord(ArrayRef Methods) : TypeRecord(TypeRecordKind::MethodOverloadList), Methods(Methods) {} @@ -694,6 +713,7 @@ public: /// For method overload sets. LF_METHOD class OverloadedMethodRecord : public TypeRecord { public: + OverloadedMethodRecord() = default; explicit OverloadedMethodRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} OverloadedMethodRecord(uint16_t NumOverloads, TypeIndex MethodList, StringRef Name) @@ -711,6 +731,7 @@ public: // LF_MEMBER class DataMemberRecord : public TypeRecord { public: + DataMemberRecord() = default; explicit DataMemberRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} DataMemberRecord(MemberAttributes Attrs, TypeIndex Type, uint64_t Offset, StringRef Name) @@ -735,6 +756,7 @@ public: // LF_STMEMBER class StaticDataMemberRecord : public TypeRecord { public: + StaticDataMemberRecord() = default; explicit StaticDataMemberRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} StaticDataMemberRecord(MemberAttributes Attrs, TypeIndex Type, StringRef Name) : TypeRecord(TypeRecordKind::StaticDataMember), Attrs(Attrs), Type(Type), @@ -755,6 +777,7 @@ public: // LF_ENUMERATE class EnumeratorRecord : public TypeRecord { public: + EnumeratorRecord() = default; explicit EnumeratorRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} EnumeratorRecord(MemberAttributes Attrs, APSInt Value, StringRef Name) : TypeRecord(TypeRecordKind::Enumerator), Attrs(Attrs), @@ -775,6 +798,7 @@ public: // LF_VFUNCTAB class VFPtrRecord : public TypeRecord { public: + VFPtrRecord() = default; explicit VFPtrRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} VFPtrRecord(TypeIndex Type) : TypeRecord(TypeRecordKind::VFPtr), Type(Type) {} @@ -787,6 +811,7 @@ public: // LF_BCLASS, LF_BINTERFACE class BaseClassRecord : public TypeRecord { public: + BaseClassRecord() = default; explicit BaseClassRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} BaseClassRecord(MemberAttributes Attrs, TypeIndex Type, uint64_t Offset) : TypeRecord(TypeRecordKind::BaseClass), Attrs(Attrs), Type(Type), @@ -807,6 +832,7 @@ public: // LF_VBCLASS, LF_IVBCLASS class VirtualBaseClassRecord : public TypeRecord { public: + VirtualBaseClassRecord() = default; explicit VirtualBaseClassRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} VirtualBaseClassRecord(TypeRecordKind Kind, MemberAttributes Attrs, TypeIndex BaseType, TypeIndex VBPtrType, @@ -836,6 +862,7 @@ public: /// together. The first will end in an LF_INDEX record that points to the next. class ListContinuationRecord : public TypeRecord { public: + ListContinuationRecord() = default; explicit ListContinuationRecord(TypeRecordKind Kind) : TypeRecord(Kind) {} ListContinuationRecord(TypeIndex ContinuationIndex) : TypeRecord(TypeRecordKind::ListContinuation), diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h index 924ca0470fad4760ccde7be410bdd1c434169bb1..6156223b256017036d8901ffd7f4725f711fa4b6 100644 --- a/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h +++ b/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h @@ -37,7 +37,7 @@ public: Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: Optional TypeKind; diff --git a/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/include/llvm/DebugInfo/CodeView/TypeSerializer.h index 1f4873c4f96938fb1616c98b0ccd15b2a2da7939..f785d4509547503a46a242700e4ef3603742dced 100644 --- a/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ b/include/llvm/DebugInfo/CodeView/TypeSerializer.h @@ -17,7 +17,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -26,6 +25,8 @@ namespace llvm { namespace codeview { +class TypeHasher; + class TypeSerializer : public TypeVisitorCallbacks { struct SubRecord { SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {} @@ -45,14 +46,13 @@ class TypeSerializer : public TypeVisitorCallbacks { } }; - typedef SmallVector, 2> RecordList; + typedef SmallVector, 2> MutableRecordList; static constexpr uint8_t ContinuationLength = 8; BumpPtrAllocator &RecordStorage; RecordSegment CurrentSegment; - RecordList FieldListSegments; + MutableRecordList FieldListSegments; - TypeIndex LastTypeIndex; Optional TypeKind; Optional MemberKind; std::vector RecordBuffer; @@ -60,26 +60,37 @@ class TypeSerializer : public TypeVisitorCallbacks { BinaryStreamWriter Writer; TypeRecordMapping Mapping; - RecordList SeenRecords; - StringMap HashedRecords; + /// Private type record hashing implementation details are handled here. + std::unique_ptr Hasher; + + /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). + SmallVector, 2> SeenRecords; + + /// Temporary storage that we use to copy a record's data while re-writing + /// its type indices. + SmallVector RemapStorage; + + TypeIndex nextTypeIndex() const; bool isInFieldList() const; - TypeIndex calcNextTypeIndex() const; - TypeIndex incrementTypeIndex(); MutableArrayRef getCurrentSubRecordData(); MutableArrayRef getCurrentRecordData(); Error writeRecordPrefix(TypeLeafKind Kind); - TypeIndex insertRecordBytesPrivate(MutableArrayRef Record); Expected> addPadding(MutableArrayRef Record); public: - explicit TypeSerializer(BumpPtrAllocator &Storage); + explicit TypeSerializer(BumpPtrAllocator &Storage, bool Hash = true); + ~TypeSerializer(); + + void reset(); + + BumpPtrAllocator &getAllocator() { return RecordStorage; } - ArrayRef> records() const; - TypeIndex getLastTypeIndex() const; - TypeIndex insertRecordBytes(MutableArrayRef Record); + ArrayRef> records() const; + TypeIndex insertRecordBytes(ArrayRef &Record); + TypeIndex insertRecord(const RemappedType &Record); Expected visitTypeEndGetIndex(CVType &Record); Error visitTypeBegin(CVType &Record) override; @@ -97,7 +108,7 @@ public: return visitKnownMemberImpl(CVR, Record); \ } #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template diff --git a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index 2246f197e78436382c469be03bf7ffb962e64783..3ad2b4e9c92fcfcf0d9d4853e6db1e3cd7b345e2 100644 --- a/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -12,19 +12,85 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Error.h" namespace llvm { namespace codeview { +class TypeIndex; class TypeServerHandler; +class TypeTableBuilder; -/// Merges one type stream into another. Returns true on success. -Error mergeTypeStreams(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, +/// \brief Merge one set of type records into another. This method assumes +/// that all records are type records, and there are no Id records present. +/// +/// \param Dest The table to store the re-written type records into. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// type stream, that contains the index of the corresponding type record +/// in the destination stream. +/// +/// \param Handler (optional) If non-null, an interface that gets invoked +/// to handle type server records. +/// +/// \param Types The collection of types to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeTypeRecords(TypeTableBuilder &Dest, + SmallVectorImpl &SourceToDest, TypeServerHandler *Handler, const CVTypeArray &Types); +/// \brief Merge one set of id records into another. This method assumes +/// that all records are id records, and there are no Type records present. +/// However, since Id records can refer back to Type records, this method +/// assumes that the referenced type records have also been merged into +/// another type stream (for example using the above method), and accepts +/// the mapping from source to dest for that stream so that it can re-write +/// the type record mappings accordingly. +/// +/// \param Dest The table to store the re-written id records into. +/// +/// \param Types The mapping to use for the type records that these id +/// records refer to. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// id stream, that contains the index of the corresponding id record +/// in the destination stream. +/// +/// \param Ids The collection of id records to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids); + +/// \brief Merge a unified set of type and id records, splitting them into +/// separate output streams. +/// +/// \param DestIds The table to store the re-written id records into. +/// +/// \param DestTypes the table to store the re-written type records into. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// id stream, that contains the index of the corresponding id record +/// in the destination stream. +/// +/// \param Handler (optional) If non-null, an interface that gets invoked +/// to handle type server records. +/// +/// \param IdsAndTypes The collection of id records to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeTypeAndIdRecords(TypeTableBuilder &DestIds, + TypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, + TypeServerHandler *Handler, + const CVTypeArray &IdsAndTypes); + } // end namespace codeview } // end namespace llvm diff --git a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h index 102bee4b0801e1a316b05ef7bd9c46fe2b145af3..1069dcd45334939bc86074b608f41dfaf5c23b29 100644 --- a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h +++ b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h @@ -13,8 +13,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeSerializer.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include @@ -37,8 +37,9 @@ private: TypeSerializer Serializer; public: - explicit TypeTableBuilder(BumpPtrAllocator &Allocator) - : Allocator(Allocator), Serializer(Allocator) {} + explicit TypeTableBuilder(BumpPtrAllocator &Allocator, + bool WriteUnique = true) + : Allocator(Allocator), Serializer(Allocator, WriteUnique) {} TypeTableBuilder(const TypeTableBuilder &) = delete; TypeTableBuilder &operator=(const TypeTableBuilder &) = delete; @@ -64,10 +65,14 @@ public: return *ExpectedIndex; } - TypeIndex writeSerializedRecord(MutableArrayRef Record) { + TypeIndex writeSerializedRecord(ArrayRef Record) { return Serializer.insertRecordBytes(Record); } + TypeIndex writeSerializedRecord(const RemappedType &Record) { + return Serializer.insertRecord(Record); + } + template void ForEachRecord(TFunc Func) { uint32_t Index = TypeIndex::FirstNonSimpleIndex; @@ -77,23 +82,24 @@ public: } } - ArrayRef> records() const { - return Serializer.records(); - } + ArrayRef> records() const { return Serializer.records(); } }; class FieldListRecordBuilder { TypeTableBuilder &TypeTable; + BumpPtrAllocator Allocator; TypeSerializer TempSerializer; CVType Type; public: explicit FieldListRecordBuilder(TypeTableBuilder &TypeTable) - : TypeTable(TypeTable), TempSerializer(TypeTable.getAllocator()) { + : TypeTable(TypeTable), TempSerializer(Allocator, false) { Type.Type = TypeLeafKind::LF_FIELDLIST; } void begin() { + TempSerializer.reset(); + if (auto EC = TempSerializer.visitTypeBegin(Type)) consumeError(std::move(EC)); } @@ -109,23 +115,19 @@ public: consumeError(std::move(EC)); } - TypeIndex end() { + TypeIndex end(bool Write) { + TypeIndex Index; if (auto EC = TempSerializer.visitTypeEnd(Type)) { consumeError(std::move(EC)); return TypeIndex(); } - TypeIndex Index; - for (auto Record : TempSerializer.records()) { - Index = TypeTable.writeSerializedRecord(Record); + if (Write) { + for (auto Record : TempSerializer.records()) + Index = TypeTable.writeSerializedRecord(Record); } - return Index; - } - /// Stop building the record. - void reset() { - if (auto EC = TempSerializer.visitTypeEnd(Type)) - consumeError(std::move(EC)); + return Index; } }; diff --git a/include/llvm/DebugInfo/CodeView/TypeTableCollection.h b/include/llvm/DebugInfo/CodeView/TypeTableCollection.h new file mode 100644 index 0000000000000000000000000000000000000000..42b62ba2b6ce55b229c6a1aca39f2bf3ee54a99e --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/TypeTableCollection.h @@ -0,0 +1,42 @@ +//===- TypeTableCollection.h ---------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPETABLECOLLECTION_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPETABLECOLLECTION_H + +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeDatabase.h" + +namespace llvm { +namespace codeview { + +class TypeTableCollection : public TypeCollection { +public: + explicit TypeTableCollection(ArrayRef> Records); + + Optional getFirst() override; + Optional getNext(TypeIndex Prev) override; + + CVType getType(TypeIndex Index) override; + StringRef getTypeName(TypeIndex Index) override; + bool contains(TypeIndex Index) override; + uint32_t size() override; + uint32_t capacity() override; + +private: + bool hasCapacityFor(TypeIndex Index) const; + void ensureTypeExists(TypeIndex Index); + + ArrayRef> Records; + TypeDatabase Database; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h index f251296910411884dc41c84e3f361a69114c3f30..126fb8abb0da814b45fdb6d4b47a200174071684 100644 --- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h +++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h @@ -47,6 +47,14 @@ public: return Error::success(); } + Error visitTypeBegin(CVType &Record, TypeIndex Index) override { + for (auto Visitor : Pipeline) { + if (auto EC = Visitor->visitTypeBegin(Record, Index)) + return EC; + } + return Error::success(); + } + Error visitTypeEnd(CVType &Record) override { for (auto Visitor : Pipeline) { if (auto EC = Visitor->visitTypeEnd(Record)) @@ -86,7 +94,7 @@ public: } #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template Error visitKnownRecordImpl(CVType &CVR, T &Record) { diff --git a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h index 5e27df346b003eca88bb8fffc01051ffc61aa824..d7a473306bc20fd817e3f26c9f725d5747450f19 100644 --- a/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h +++ b/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h @@ -17,8 +17,6 @@ namespace llvm { namespace codeview { class TypeVisitorCallbacks { - friend class CVTypeVisitor; - public: virtual ~TypeVisitorCallbacks() = default; @@ -26,8 +24,15 @@ public: virtual Error visitUnknownType(CVType &Record) { return Error::success(); } /// Paired begin/end actions for all types. Receives all record data, /// including the fixed-length record prefix. visitTypeBegin() should return - /// the type of the Record, or an error if it cannot be determined. + /// the type of the Record, or an error if it cannot be determined. Exactly + /// one of the two visitTypeBegin methods will be called, depending on whether + /// records are being visited sequentially or randomly. An implementation + /// should be prepared to handle both (or assert if it can't handle random + /// access visitation). virtual Error visitTypeBegin(CVType &Record) { return Error::success(); } + virtual Error visitTypeBegin(CVType &Record, TypeIndex Index) { + return Error::success(); + } virtual Error visitTypeEnd(CVType &Record) { return Error::success(); } virtual Error visitUnknownMember(CVMemberRecord &Record) { @@ -53,7 +58,11 @@ public: #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +#undef TYPE_RECORD +#undef TYPE_RECORD_ALIAS +#undef MEMBER_RECORD +#undef MEMBER_RECORD_ALIAS }; } // end namespace codeview diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index e3386a8dcd24c69fa3cb8dceb35ae4326bd8e0c3..2e82a774cc23b38bfb4cf7db36a99e3d9bdc20a0 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -135,6 +135,7 @@ enum DIDumpType { DIDT_GnuPubnames, DIDT_GnuPubtypes, DIDT_Str, + DIDT_StrOffsets, DIDT_StrDwo, DIDT_StrOffsetsDwo, DIDT_AppleNames, @@ -146,6 +147,15 @@ enum DIDumpType { DIDT_TUIndex, }; +/// Container for dump options that control which debug information will be +/// dumped. +struct DIDumpOptions { + DIDumpType DumpType = DIDT_All; + bool DumpEH = false; + bool SummarizeTypes = false; + bool Brief = false; +}; + class DIContext { public: enum DIContextKind { @@ -158,9 +168,12 @@ public: DIContextKind getKind() const { return Kind; } - virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All, - bool DumpEH = false, bool SummarizeTypes = false) = 0; + virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts) = 0; + virtual bool verify(raw_ostream &OS, DIDumpType DumpType = DIDT_All) { + // No verifier? Just say things went well. + return true; + } virtual DILineInfo getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0; virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address, diff --git a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index 7324f6e3eb387fc1bc67527f3cc74ffa355479e6..e363cff158033e25fb8a28ed69d4e5f63e267556 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -10,11 +10,11 @@ #ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include #include diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index f95a013d7552381e14254050fe40ef11f09f6617..3012b39dcc528efe102cf2da038242af389575cb 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -11,9 +11,9 @@ #define LLVM_DEBUGINFO_DWARFACCELERATORTABLE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include @@ -50,6 +50,10 @@ public: : AccelSection(AccelSection), StringSection(StringSection), Relocs(Relocs) {} bool extract(); + uint32_t getNumBuckets(); + uint32_t getNumHashes(); + uint32_t getSizeHdr(); + uint32_t getHeaderDataLength(); void dump(raw_ostream &OS) const; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h index 5919aaddea409857d8d904c869d293c96bb8dc32..f0672bb0ca75853d316917d54030871c9218e735 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAttribute.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAttribute.h @@ -10,8 +10,8 @@ #ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H #define LLVM_DEBUGINFO_DWARFATTRIBUTE_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/Support/Dwarf.h" #include namespace llvm { @@ -31,10 +31,10 @@ struct DWARFAttribute { dwarf::Attribute Attr; /// The form and value for this attribute. DWARFFormValue Value; - + DWARFAttribute(uint32_t O, dwarf::Attribute A = dwarf::Attribute(0), dwarf::Form F = dwarf::Form(0)) : Attr(A), Value(F) {} - + bool isValid() const { return Offset != 0 && Attr != dwarf::Attribute(0); } diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h index b2a4d247ccc6be27f5a6b9c3fe7fbcea66e79cda..b4e4721e3d51982cef98cf5505250722c6e94371 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h @@ -18,9 +18,10 @@ namespace llvm { class DWARFCompileUnit : public DWARFUnit { public: DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, - bool IsDWO, const DWARFUnitSectionBase &UnitSection, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO, + const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, UnitSection, Entry) {} @@ -28,7 +29,7 @@ public: // VTable anchor. ~DWARFCompileUnit() override; - void dump(raw_ostream &OS); + void dump(raw_ostream &OS, DIDumpOptions DumpOpts); static const DWARFSectionKind Section = DW_SECT_INFO; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index f941cdd1060a58782ce768eb7d73ba56bc02f1b4..c72604a12bfda6c7f9f9f3161e47e416effacf5d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -11,12 +11,12 @@ #define LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" @@ -43,12 +43,11 @@ namespace llvm { class MemoryBuffer; class raw_ostream; -// In place of applying the relocations to the data we've read from disk we use -// a separate mapping table to the side and checking that at locations in the -// dwarf where we expect relocated values. This adds a bit of complexity to the -// dwarf parsing/extraction at the benefit of not allocating memory for the -// entire size of the debug info sections. -typedef DenseMap> RelocAddrMap; +/// Reads a value from data extractor and applies a relocation to the result if +/// one exists for the given offset. +uint64_t getRelocatedValue(const DataExtractor &Data, uint32_t Size, + uint32_t *Off, const RelocAddrMap *Relocs, + uint64_t *SecNdx = nullptr); /// DWARFContext /// This data structure is the top level entity that deals with dwarf debug @@ -73,6 +72,17 @@ class DWARFContext : public DIContext { std::unique_ptr AbbrevDWO; std::unique_ptr LocDWO; + /// The maximum DWARF version of all units. + unsigned MaxVersion; + + struct DWOFile { + object::OwningBinary File; + std::unique_ptr Context; + }; + StringMap> DWOFiles; + std::weak_ptr DWP; + bool CheckedForDWP = false; + /// Read compile units from the debug_info section (if necessary) /// and store them in CUs. void parseCompileUnits(); @@ -90,7 +100,7 @@ class DWARFContext : public DIContext { void parseDWOTypeUnits(); public: - DWARFContext() : DIContext(CK_DWARF) {} + DWARFContext() : DIContext(CK_DWARF), MaxVersion(0) {} DWARFContext(DWARFContext &) = delete; DWARFContext &operator=(DWARFContext &) = delete; @@ -98,8 +108,9 @@ public: return DICtx->getKind() == CK_DWARF; } - void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All, - bool DumpEH = false, bool SummarizeTypes = false) override; + void dump(raw_ostream &OS, DIDumpOptions DumpOpts) override; + + bool verify(raw_ostream &OS, DIDumpType DumpType = DIDT_All) override; typedef DWARFUnitSection::iterator_range cu_iterator_range; typedef DWARFUnitSection::iterator_range tu_iterator_range; @@ -165,6 +176,18 @@ public: return DWOCUs[index].get(); } + DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash); + + /// Get a DIE given an exact offset. + DWARFDie getDIEForOffset(uint32_t Offset); + + unsigned getMaxVersion() const { return MaxVersion; } + + void setMaxVersionIfGreater(unsigned Version) { + if (Version > MaxVersion) + MaxVersion = Version; + } + const DWARFUnitIndex &getCUIndex(); DWARFGdbIndex &getGdbIndex(); const DWARFUnitIndex &getTUIndex(); @@ -203,6 +226,7 @@ public: DIInliningInfo getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + virtual StringRef getFileName() const = 0; virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const = 0; virtual const DWARFSection &getInfoSection() = 0; @@ -216,13 +240,18 @@ public: virtual StringRef getEHFrameSection() = 0; virtual const DWARFSection &getLineSection() = 0; virtual StringRef getStringSection() = 0; - virtual StringRef getRangeSection() = 0; + virtual const DWARFSection& getRangeSection() = 0; virtual StringRef getMacinfoSection() = 0; virtual StringRef getPubNamesSection() = 0; virtual StringRef getPubTypesSection() = 0; virtual StringRef getGnuPubNamesSection() = 0; virtual StringRef getGnuPubTypesSection() = 0; + /// DWARF v5 + /// @{ + virtual const DWARFSection &getStringOffsetSection() = 0; + /// @} + // Sections for DWARF5 split dwarf proposal. virtual const DWARFSection &getInfoDWOSection() = 0; virtual const TypeSectionMap &getTypesDWOSections() = 0; @@ -230,9 +259,9 @@ public: virtual const DWARFSection &getLineDWOSection() = 0; virtual const DWARFSection &getLocDWOSection() = 0; virtual StringRef getStringDWOSection() = 0; - virtual StringRef getStringOffsetDWOSection() = 0; - virtual StringRef getRangeDWOSection() = 0; - virtual StringRef getAddrSection() = 0; + virtual const DWARFSection &getStringOffsetDWOSection() = 0; + virtual const DWARFSection &getRangeDWOSection() = 0; + virtual const DWARFSection &getAddrSection() = 0; virtual const DWARFSection& getAppleNamesSection() = 0; virtual const DWARFSection& getAppleTypesSection() = 0; virtual const DWARFSection& getAppleNamespacesSection() = 0; @@ -245,6 +274,8 @@ public: return version == 2 || version == 3 || version == 4 || version == 5; } + std::shared_ptr getDWOContext(StringRef AbsolutePath); + private: /// Return the compile unit that includes an offset (relative to .debug_info). DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); @@ -260,6 +291,7 @@ private: class DWARFContextInMemory : public DWARFContext { virtual void anchor(); + StringRef FileName; bool IsLittleEndian; uint8_t AddressSize; DWARFSection InfoSection; @@ -271,13 +303,18 @@ class DWARFContextInMemory : public DWARFContext { StringRef EHFrameSection; DWARFSection LineSection; StringRef StringSection; - StringRef RangeSection; + DWARFSection RangeSection; StringRef MacinfoSection; StringRef PubNamesSection; StringRef PubTypesSection; StringRef GnuPubNamesSection; StringRef GnuPubTypesSection; + /// DWARF v5 + /// @{ + DWARFSection StringOffsetSection; + /// @} + // Sections for DWARF5 split dwarf proposal. DWARFSection InfoDWOSection; TypeSectionMap TypesDWOSections; @@ -285,9 +322,9 @@ class DWARFContextInMemory : public DWARFContext { DWARFSection LineDWOSection; DWARFSection LocDWOSection; StringRef StringDWOSection; - StringRef StringOffsetDWOSection; - StringRef RangeDWOSection; - StringRef AddrSection; + DWARFSection StringOffsetDWOSection; + DWARFSection RangeDWOSection; + DWARFSection AddrSection; DWARFSection AppleNamesSection; DWARFSection AppleTypesSection; DWARFSection AppleNamespacesSection; @@ -300,6 +337,11 @@ class DWARFContextInMemory : public DWARFContext { StringRef *MapSectionToMember(StringRef Name); + /// If Sec is compressed section, decompresses and updates its contents + /// provided by Data. Otherwise leaves it unchanged. + Error maybeDecompress(const object::SectionRef &Sec, StringRef Name, + StringRef &Data); + public: DWARFContextInMemory(const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr); @@ -308,6 +350,7 @@ public: uint8_t AddrSize, bool isLittleEndian = sys::IsLittleEndianHost); + StringRef getFileName() const override { return FileName; } bool isLittleEndian() const override { return IsLittleEndian; } uint8_t getAddressSize() const override { return AddressSize; } const DWARFSection &getInfoSection() override { return InfoSection; } @@ -319,7 +362,7 @@ public: StringRef getEHFrameSection() override { return EHFrameSection; } const DWARFSection &getLineSection() override { return LineSection; } StringRef getStringSection() override { return StringSection; } - StringRef getRangeSection() override { return RangeSection; } + const DWARFSection &getRangeSection() override { return RangeSection; } StringRef getMacinfoSection() override { return MacinfoSection; } StringRef getPubNamesSection() override { return PubNamesSection; } StringRef getPubTypesSection() override { return PubTypesSection; } @@ -330,6 +373,11 @@ public: const DWARFSection& getAppleNamespacesSection() override { return AppleNamespacesSection; } const DWARFSection& getAppleObjCSection() override { return AppleObjCSection; } + // DWARF v5 + const DWARFSection &getStringOffsetSection() override { + return StringOffsetSection; + } + // Sections for DWARF5 split dwarf proposal. const DWARFSection &getInfoDWOSection() override { return InfoDWOSection; } @@ -342,15 +390,13 @@ public: const DWARFSection &getLocDWOSection() override { return LocDWOSection; } StringRef getStringDWOSection() override { return StringDWOSection; } - StringRef getStringOffsetDWOSection() override { + const DWARFSection &getStringOffsetDWOSection() override { return StringOffsetDWOSection; } - StringRef getRangeDWOSection() override { return RangeDWOSection; } + const DWARFSection &getRangeDWOSection() override { return RangeDWOSection; } - StringRef getAddrSection() override { - return AddrSection; - } + const DWARFSection &getAddrSection() override { return AddrSection; } StringRef getCUIndexSection() override { return CUIndexSection; } StringRef getGdbIndexSection() override { return GdbIndexSection; } diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h index 40eb7e9a88364affaa99d13c2b50218a04bd799e..2d82104ea09846e34916c3625b4afca53a92d6d7 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h @@ -22,19 +22,19 @@ class raw_ostream; class DWARFDebugArangeSet { public: struct Header { - // The total length of the entries for that set, not including the length - // field itself. + /// The total length of the entries for that set, not including the length + /// field itself. uint32_t Length; - // The offset from the beginning of the .debug_info section of the - // compilation unit entry referenced by the table. + /// The offset from the beginning of the .debug_info section of the + /// compilation unit entry referenced by the table. uint32_t CuOffset; - // The DWARF version number. + /// The DWARF version number. uint16_t Version; - // The size in bytes of an address on the target architecture. For segmented - // addressing, this is the size of the offset portion of the address. + /// The size in bytes of an address on the target architecture. For segmented + /// addressing, this is the size of the offset portion of the address. uint8_t AddrSize; - // The size in bytes of a segment descriptor on the target architecture. - // If the target system uses a flat address space, this value is 0. + /// The size in bytes of a segment descriptor on the target architecture. + /// If the target system uses a flat address space, this value is 0. uint8_t SegSize; }; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h index c06771d6afb4397ca7628099b2112c11649152ba..2237aa361d187fd5421166c537e528c73bbb282c 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h @@ -28,7 +28,7 @@ private: void clear(); void extract(DataExtractor DebugArangesData); - // Call appendRange multiple times and then call construct. + /// Call appendRange multiple times and then call construct. void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); void construct(); @@ -58,9 +58,9 @@ private: return LowPC < other.LowPC; } - uint64_t LowPC; // Start of address range. - uint32_t Length; // End of address range (not including this address). - uint32_t CUOffset; // Offset of the compile unit or die. + uint64_t LowPC; /// Start of address range. + uint32_t Length; /// End of address range (not including this address). + uint32_t CUOffset; /// Offset of the compile unit or die. }; struct RangeEndpoint { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index fc2423a2708b8d2237c45b5ae36a2f256ef3a3a8..5c591b3de4916077f0d854644583a01bd14add2f 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -10,8 +10,8 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" -#include "llvm/Support/Dwarf.h" #include namespace llvm { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index e5bb24707b638402869892acb40c5b8bf3142ba3..39a7ef71de97d7a1bc3b47b4acba8619da6eaae2 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -24,12 +24,13 @@ class raw_ostream; class DWARFDebugLine { public: - DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {} + DWARFDebugLine(const RelocAddrMap *LineInfoRelocMap) + : RelocMap(LineInfoRelocMap) {} struct FileNameEntry { FileNameEntry() = default; - const char *Name = nullptr; + StringRef Name; uint64_t DirIdx = 0; uint64_t ModTime = 0; uint64_t Length = 0; @@ -38,50 +39,50 @@ public: struct Prologue { Prologue(); - // The size in bytes of the statement information for this compilation unit - // (not including the total_length field itself). + /// The size in bytes of the statement information for this compilation unit + /// (not including the total_length field itself). uint64_t TotalLength; - // Version identifier for the statement information format. + /// Version identifier for the statement information format. uint16_t Version; - // The number of bytes following the prologue_length field to the beginning - // of the first byte of the statement program itself. + /// In v5, size in bytes of an address (or segment offset). + uint8_t AddressSize; + /// In v5, size in bytes of a segment selector. + uint8_t SegSelectorSize; + /// The number of bytes following the prologue_length field to the beginning + /// of the first byte of the statement program itself. uint64_t PrologueLength; - // The size in bytes of the smallest target machine instruction. Statement - // program opcodes that alter the address register first multiply their - // operands by this value. + /// The size in bytes of the smallest target machine instruction. Statement + /// program opcodes that alter the address register first multiply their + /// operands by this value. uint8_t MinInstLength; - // The maximum number of individual operations that may be encoded in an - // instruction. + /// The maximum number of individual operations that may be encoded in an + /// instruction. uint8_t MaxOpsPerInst; - // The initial value of theis_stmtregister. + /// The initial value of theis_stmtregister. uint8_t DefaultIsStmt; - // This parameter affects the meaning of the special opcodes. See below. + /// This parameter affects the meaning of the special opcodes. See below. int8_t LineBase; - // This parameter affects the meaning of the special opcodes. See below. + /// This parameter affects the meaning of the special opcodes. See below. uint8_t LineRange; - // The number assigned to the first special opcode. + /// The number assigned to the first special opcode. uint8_t OpcodeBase; std::vector StandardOpcodeLengths; - std::vector IncludeDirectories; + std::vector IncludeDirectories; std::vector FileNames; bool IsDWARF64; - uint32_t sizeofTotalLength() const { - return IsDWARF64 ? 12 : 4; - } + uint32_t sizeofTotalLength() const { return IsDWARF64 ? 12 : 4; } - uint32_t sizeofPrologueLength() const { - return IsDWARF64 ? 8 : 4; - } + uint32_t sizeofPrologueLength() const { return IsDWARF64 ? 8 : 4; } - // Length of the prologue in bytes. + /// Length of the prologue in bytes. uint32_t getLength() const { return PrologueLength + sizeofTotalLength() + sizeof(Version) + sizeofPrologueLength(); } - // Length of the line table data in bytes (not including the prologue). + /// Length of the line table data in bytes (not including the prologue). uint32_t getStatementTableLength() const { return TotalLength + sizeofTotalLength() - getLength(); } @@ -92,70 +93,70 @@ public: void clear(); void dump(raw_ostream &OS) const; - bool parse(DataExtractor debug_line_data, uint32_t *offset_ptr); + bool parse(DataExtractor DebugLineData, uint32_t *OffsetPtr); }; - // Standard .debug_line state machine structure. + /// Standard .debug_line state machine structure. struct Row { - explicit Row(bool default_is_stmt = false); + explicit Row(bool DefaultIsStmt = false); /// Called after a row is appended to the matrix. void postAppend(); - void reset(bool default_is_stmt); + void reset(bool DefaultIsStmt); void dump(raw_ostream &OS) const; - - static bool orderByAddress(const Row& LHS, const Row& RHS) { + static void dumpTableHeader(raw_ostream &OS); + static bool orderByAddress(const Row &LHS, const Row &RHS) { return LHS.Address < RHS.Address; } - // The program-counter value corresponding to a machine instruction - // generated by the compiler. + /// The program-counter value corresponding to a machine instruction + /// generated by the compiler. uint64_t Address; - // An unsigned integer indicating a source line number. Lines are numbered - // beginning at 1. The compiler may emit the value 0 in cases where an - // instruction cannot be attributed to any source line. + /// An unsigned integer indicating a source line number. Lines are numbered + /// beginning at 1. The compiler may emit the value 0 in cases where an + /// instruction cannot be attributed to any source line. uint32_t Line; - // An unsigned integer indicating a column number within a source line. - // Columns are numbered beginning at 1. The value 0 is reserved to indicate - // that a statement begins at the 'left edge' of the line. + /// An unsigned integer indicating a column number within a source line. + /// Columns are numbered beginning at 1. The value 0 is reserved to indicate + /// that a statement begins at the 'left edge' of the line. uint16_t Column; - // An unsigned integer indicating the identity of the source file - // corresponding to a machine instruction. + /// An unsigned integer indicating the identity of the source file + /// corresponding to a machine instruction. uint16_t File; - // An unsigned integer representing the DWARF path discriminator value - // for this location. + /// An unsigned integer representing the DWARF path discriminator value + /// for this location. uint32_t Discriminator; - // An unsigned integer whose value encodes the applicable instruction set - // architecture for the current instruction. + /// An unsigned integer whose value encodes the applicable instruction set + /// architecture for the current instruction. uint8_t Isa; - // A boolean indicating that the current instruction is the beginning of a - // statement. - uint8_t IsStmt:1, - // A boolean indicating that the current instruction is the - // beginning of a basic block. - BasicBlock:1, - // A boolean indicating that the current address is that of the - // first byte after the end of a sequence of target machine - // instructions. - EndSequence:1, - // A boolean indicating that the current address is one (of possibly - // many) where execution should be suspended for an entry breakpoint - // of a function. - PrologueEnd:1, - // A boolean indicating that the current address is one (of possibly - // many) where execution should be suspended for an exit breakpoint - // of a function. - EpilogueBegin:1; + /// A boolean indicating that the current instruction is the beginning of a + /// statement. + uint8_t IsStmt : 1, + /// A boolean indicating that the current instruction is the + /// beginning of a basic block. + BasicBlock : 1, + /// A boolean indicating that the current address is that of the + /// first byte after the end of a sequence of target machine + /// instructions. + EndSequence : 1, + /// A boolean indicating that the current address is one (of possibly + /// many) where execution should be suspended for an entry breakpoint + /// of a function. + PrologueEnd : 1, + /// A boolean indicating that the current address is one (of possibly + /// many) where execution should be suspended for an exit breakpoint + /// of a function. + EpilogueBegin : 1; }; - // Represents a series of contiguous machine instructions. Line table for each - // compilation unit may consist of multiple sequences, which are not - // guaranteed to be in the order of ascending instruction address. + /// Represents a series of contiguous machine instructions. Line table for + /// each compilation unit may consist of multiple sequences, which are not + /// guaranteed to be in the order of ascending instruction address. struct Sequence { Sequence(); - // Sequence describes instructions at address range [LowPC, HighPC) - // and is described by line table rows [FirstRowIndex, LastRowIndex). + /// Sequence describes instructions at address range [LowPC, HighPC) + /// and is described by line table rows [FirstRowIndex, LastRowIndex). uint64_t LowPC; uint64_t HighPC; unsigned FirstRowIndex; @@ -164,7 +165,7 @@ public: void reset(); - static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) { + static bool orderByLowPC(const Sequence &LHS, const Sequence &RHS) { return LHS.LowPC < RHS.LowPC; } @@ -172,42 +173,38 @@ public: return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); } - bool containsPC(uint64_t pc) const { - return (LowPC <= pc && pc < HighPC); - } + bool containsPC(uint64_t PC) const { return (LowPC <= PC && PC < HighPC); } }; struct LineTable { LineTable(); - // Represents an invalid row + /// Represents an invalid row const uint32_t UnknownRowIndex = UINT32_MAX; - void appendRow(const DWARFDebugLine::Row &R) { - Rows.push_back(R); - } + void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); } void appendSequence(const DWARFDebugLine::Sequence &S) { Sequences.push_back(S); } - // Returns the index of the row with file/line info for a given address, - // or UnknownRowIndex if there is no such row. - uint32_t lookupAddress(uint64_t address) const; + /// Returns the index of the row with file/line info for a given address, + /// or UnknownRowIndex if there is no such row. + uint32_t lookupAddress(uint64_t Address) const; - bool lookupAddressRange(uint64_t address, uint64_t size, - std::vector &result) const; + bool lookupAddressRange(uint64_t Address, uint64_t Size, + std::vector &Result) const; bool hasFileAtIndex(uint64_t FileIndex) const; - // Extracts filename by its index in filename table in prologue. - // Returns true on success. + /// Extracts filename by its index in filename table in prologue. + /// Returns true on success. bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, std::string &Result) const; - // Fills the Result argument with the file and line information - // corresponding to Address. Returns true on success. + /// Fills the Result argument with the file and line information + /// corresponding to Address. Returns true on success. bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, DILineInfo &Result) const; @@ -216,8 +213,8 @@ public: void clear(); /// Parse prologue and all rows. - bool parse(DataExtractor debug_line_data, const RelocAddrMap *RMap, - uint32_t *offset_ptr); + bool parse(DataExtractor DebugLineData, const RelocAddrMap *RMap, + uint32_t *OffsetPtr); struct Prologue Prologue; typedef std::vector RowVector; @@ -228,25 +225,25 @@ public: SequenceVector Sequences; private: - uint32_t findRowInSeq(const DWARFDebugLine::Sequence &seq, - uint64_t address) const; + uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq, + uint64_t Address) const; }; - const LineTable *getLineTable(uint32_t offset) const; - const LineTable *getOrParseLineTable(DataExtractor debug_line_data, - uint32_t offset); + const LineTable *getLineTable(uint32_t Offset) const; + const LineTable *getOrParseLineTable(DataExtractor DebugLineData, + uint32_t Offset); private: struct ParsingState { ParsingState(struct LineTable *LT); void resetRowAndSequence(); - void appendRowToMatrix(uint32_t offset); + void appendRowToMatrix(uint32_t Offset); - // Line table we're currently parsing. + /// Line table we're currently parsing. struct LineTable *LineTable; - // The row number that starts at zero for the prologue, and increases for - // each row added to the matrix. + /// The row number that starts at zero for the prologue, and increases for + /// each row added to the matrix. unsigned RowNumber; struct Row Row; struct Sequence Sequence; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h index 9d36bb7ad211c5440d5d0ee08f17a781fe756cec..a309fd104f938414792a23dba036852e884118dc 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h @@ -12,7 +12,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include #include diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index 018a049a3ed8184361fb8132fbda37fe94a3e5b9..437060bc8fec14e6e6e8086c0f7b2a4b459efcbb 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -10,7 +10,9 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGRANGELIST_H +#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/DataExtractor.h" + #include #include #include @@ -20,37 +22,45 @@ namespace llvm { class raw_ostream; +struct DWARFAddressRange { + uint64_t LowPC; + uint64_t HighPC; + uint64_t SectionIndex; +}; + /// DWARFAddressRangesVector - represents a set of absolute address ranges. -typedef std::vector> DWARFAddressRangesVector; +typedef std::vector DWARFAddressRangesVector; class DWARFDebugRangeList { public: struct RangeListEntry { - // A beginning address offset. This address offset has the size of an - // address and is relative to the applicable base address of the - // compilation unit referencing this range list. It marks the beginning - // of an address range. + /// A beginning address offset. This address offset has the size of an + /// address and is relative to the applicable base address of the + /// compilation unit referencing this range list. It marks the beginning + /// of an address range. uint64_t StartAddress; - // An ending address offset. This address offset again has the size of - // an address and is relative to the applicable base address of the - // compilation unit referencing this range list. It marks the first - // address past the end of the address range. The ending address must - // be greater than or equal to the beginning address. + /// An ending address offset. This address offset again has the size of + /// an address and is relative to the applicable base address of the + /// compilation unit referencing this range list. It marks the first + /// address past the end of the address range. The ending address must + /// be greater than or equal to the beginning address. uint64_t EndAddress; + /// A section index this range belongs to. + uint64_t SectionIndex; - // The end of any given range list is marked by an end of list entry, - // which consists of a 0 for the beginning address offset - // and a 0 for the ending address offset. + /// The end of any given range list is marked by an end of list entry, + /// which consists of a 0 for the beginning address offset + /// and a 0 for the ending address offset. bool isEndOfListEntry() const { return (StartAddress == 0) && (EndAddress == 0); } - // A base address selection entry consists of: - // 1. The value of the largest representable address offset - // (for example, 0xffffffff when the size of an address is 32 bits). - // 2. An address, which defines the appropriate base address for - // use in interpreting the beginning and ending address offsets of - // subsequent entries of the location list. + /// A base address selection entry consists of: + /// 1. The value of the largest representable address offset + /// (for example, 0xffffffff when the size of an address is 32 bits). + /// 2. An address, which defines the appropriate base address for + /// use in interpreting the beginning and ending address offsets of + /// subsequent entries of the location list. bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { assert(AddressSize == 4 || AddressSize == 8); if (AddressSize == 4) @@ -61,7 +71,7 @@ public: }; private: - // Offset in .debug_ranges section. + /// Offset in .debug_ranges section. uint32_t Offset; uint8_t AddressSize; std::vector Entries; @@ -71,7 +81,7 @@ public: void clear(); void dump(raw_ostream &OS) const; - bool extract(DataExtractor data, uint32_t *offset_ptr); + bool extract(DataExtractor data, uint32_t *offset_ptr, const RelocAddrMap& Relocs); const std::vector &getEntries() { return Entries; } /// getAbsoluteRanges - Returns absolute address ranges defined by this range diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h index 33e24fe3adc909a0f829fcd7098065f6a82974a5..b216491b615a2cb4ab94cdb819ce4ccd83172c9e 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -11,23 +11,23 @@ #define LLVM_DEBUGINFO_DWARFDIE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFAttribute.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" -#include "llvm/Support/Dwarf.h" #include #include #include namespace llvm { - + class DWARFUnit; class raw_ostream; - + //===----------------------------------------------------------------------===// /// Utility class that carries the DWARF compile/type unit and the debug info /// entry in an object. @@ -47,7 +47,7 @@ class DWARFDie { public: DWARFDie() = default; DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry * D) : U(Unit), Die(D) {} - + bool isValid() const { return U && Die; } explicit operator bool() const { return isValid(); } const DWARFDebugInfoEntry *getDebugInfoEntry() const { return Die; } @@ -68,7 +68,7 @@ public: assert(isValid() && "must check validity prior to calling"); return Die->getOffset(); } - + dwarf::Tag getTag() const { auto AbbrevDecl = getAbbreviationDeclarationPtr(); if (AbbrevDecl) @@ -80,7 +80,7 @@ public: assert(isValid() && "must check validity prior to calling"); return Die->hasChildren(); } - + /// Returns true for a valid DIE that terminates a sibling chain. bool isNULL() const { return getAbbreviationDeclarationPtr() == nullptr; @@ -97,13 +97,13 @@ public: /// \returns a valid DWARFDie instance if this object has a parent or an /// invalid DWARFDie instance if it doesn't. DWARFDie getParent() const; - + /// Get the sibling of this DIE object. /// /// \returns a valid DWARFDie instance if this object has a sibling or an /// invalid DWARFDie instance if it doesn't. DWARFDie getSibling() const; - + /// Get the first child of this DIE object. /// /// \returns a valid DWARFDie instance if this object has children or an @@ -113,15 +113,16 @@ public: return DWARFDie(U, Die + 1); return DWARFDie(); } - + /// Dump the DIE and all of its attributes to the supplied stream. /// /// \param OS the stream to use for output. /// \param recurseDepth the depth to recurse to when dumping this DIE and its /// children. /// \param indent the number of characters to indent each line that is output. - void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0) const; - + void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0, + DIDumpOptions DumpOpts = DIDumpOptions()) const; + /// Extract the specified attribute from this DIE. /// /// Extract an attribute value from this DIE only. This call doesn't look @@ -132,7 +133,7 @@ public: /// \returns an optional DWARFFormValue that will have the form value if the /// attribute was successfully extracted. Optional find(dwarf::Attribute Attr) const; - + /// Extract the first value of any attribute in Attrs from this DIE. /// /// Extract the first attribute that matches from this DIE only. This call @@ -180,7 +181,7 @@ public: /// /// \returns anm optional absolute section offset value for the attribute. Optional getRangesBaseAttribute() const; - + /// Get the DW_AT_high_pc attribute value as an address. /// /// In DWARF version 4 and later the high PC can be encoded as an offset from @@ -195,8 +196,9 @@ public: /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. /// Returns true if both attributes are present. - bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const; - + bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, + uint64_t &SectionIndex) const; + /// Get the address ranges for this DIE. /// /// Get the hi/low PC range if both attributes are available or exrtracts the @@ -208,7 +210,7 @@ public: /// \returns a address range vector that might be empty if no address range /// information is available. DWARFAddressRangesVector getAddressRanges() const; - + /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any /// of its children. /// @@ -218,19 +220,19 @@ public: /// /// \param Ranges the addres range vector to fill in. void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const; - + bool addressRangeContainsAddress(const uint64_t Address) const; - + /// If a DIE represents a subprogram (or inlined subroutine), returns its /// mangled name (or short name, if mangled is missing). This name may be /// fetched from specification or abstract origin for this subprogram. /// Returns null if no name is found. const char *getSubroutineName(DINameKind Kind) const; - + /// Return the DIE name resolving DW_AT_sepcification or DW_AT_abstract_origin /// references if necessary. Returns null if no name is found. const char *getName(DINameKind Kind) const; - + /// Returns the declaration line (start line) for a DIE, assuming it specifies /// a subprogram. This may be fetched from specification or abstract origin /// for this subprogram by resolving DW_AT_sepcification or @@ -247,15 +249,10 @@ public: /// DW_AT_call_line attribute in this DIE. /// \param CallColumn filled in with non-zero if successful, zero if there is /// no DW_AT_call_column attribute in this DIE. + /// \param CallDiscriminator filled in with non-zero if successful, zero if + /// there is no DW_AT_GNU_discriminator attribute in this DIE. void getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, - uint32_t &CallColumn) const; - - /// Get inlined chain for a given address, rooted at the current DIE. - /// Returns empty chain if address is not contained in address range - /// of current DIE. - void - getInlinedChainForAddress(const uint64_t Address, - SmallVectorImpl &InlinedChain) const; + uint32_t &CallColumn, uint32_t &CallDiscriminator) const; class attribute_iterator; @@ -263,14 +260,14 @@ public: /// /// \returns an iterator range for the attributes of the current DIE. iterator_range attributes() const; - + class iterator; - + iterator begin() const; iterator end() const; iterator_range children() const; }; - + class DWARFDie::attribute_iterator : public iterator_facade_base { @@ -280,7 +277,7 @@ class DWARFDie::attribute_iterator : DWARFAttribute AttrValue; /// The attribute index within the abbreviation declaration in Die. uint32_t Index; - + /// Update the attribute index and attempt to read the attribute value. If the /// attribute is able to be read, update AttrValue and the Index member /// variable. If the attribute value is not able to be read, an appropriate diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index c8d7a0c1ac7a3014f53979af387d3e0b0c2f833e..d6a3b52f2fe1ac47ece4a91de9be30d068f2292f 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -13,8 +13,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include namespace llvm { @@ -39,26 +39,27 @@ public: private: struct ValueType { - ValueType() { - uval = 0; - } + ValueType() { uval = 0; } union { uint64_t uval; int64_t sval; - const char* cstr; + const char *cstr; }; - const uint8_t* data = nullptr; + const uint8_t *data = nullptr; + uint64_t SectionIndex; /// Section index for reference forms. }; - dwarf::Form Form; // Form for this value. - ValueType Value; // Contains all data for the form. - const DWARFUnit *U = nullptr; // Remember the DWARFUnit at extract time. + dwarf::Form Form; /// Form for this value. + ValueType Value; /// Contains all data for the form. + const DWARFUnit *U = nullptr; /// Remember the DWARFUnit at extract time. public: DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F) {} dwarf::Form getForm() const { return Form; } + uint64_t getRawUValue() const { return Value.uval; } + uint64_t getSectionIndex() const { return Value.SectionIndex; } void setForm(dwarf::Form F) { Form = F; } void setUValue(uint64_t V) { Value.uval = V; } void setSValue(int64_t V) { Value.sval = V; } @@ -73,17 +74,20 @@ public: const DWARFUnit *getUnit() const { return U; } void dump(raw_ostream &OS) const; - /// \brief extracts a value in data at offset *offset_ptr. + /// Extracts a value in \p Data at offset \p *OffsetPtr. /// /// The passed DWARFUnit is allowed to be nullptr, in which /// case no relocation processing will be performed and some /// kind of forms that depend on Unit information are disallowed. + /// \param Data The DataExtractor to use. + /// \param OffsetPtr The offset within DataExtractor where the data starts. + /// \param U The optional DWARFUnit supplying information for some forms. /// \returns whether the extraction succeeded. bool extractValue(const DataExtractor &Data, uint32_t *OffsetPtr, const DWARFUnit *U); bool isInlinedCStr() const { - return Value.data != nullptr && Value.data == (const uint8_t*)Value.cstr; + return Value.data != nullptr && Value.data == (const uint8_t *)Value.cstr; } /// getAsFoo functions below return the extracted value as Foo if only @@ -134,45 +138,45 @@ public: uint8_t AddrSize, llvm::dwarf::DwarfFormat Format); - /// Skip a form in \p debug_info_data at offset specified by \p offset_ptr. + /// Skip a form in \p DebugInfoData at offset specified by \p OffsetPtr. /// /// Skips the bytes for this form in the debug info and updates the offset. /// - /// \param debug_info_data the .debug_info data to use to skip the value. - /// \param offset_ptr a reference to the offset that will be updated. + /// \param DebugInfoData the .debug_info data to use to skip the value. + /// \param OffsetPtr a reference to the offset that will be updated. /// \param U the DWARFUnit to use when skipping the form in case the form /// size differs according to data in the DWARFUnit. /// \returns true on success, false if the form was not skipped. - bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr, + bool skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr, const DWARFUnit *U) const; - /// Skip a form in \p debug_info_data at offset specified by \p offset_ptr. + /// Skip a form in \p DebugInfoData at offset specified by \p OffsetPtr. /// /// Skips the bytes for this form in the debug info and updates the offset. /// - /// \param form the DW_FORM enumeration that indicates the form to skip. - /// \param debug_info_data the .debug_info data to use to skip the value. - /// \param offset_ptr a reference to the offset that will be updated. + /// \param Form the DW_FORM enumeration that indicates the form to skip. + /// \param DebugInfoData the .debug_info data to use to skip the value. + /// \param OffsetPtr a reference to the offset that will be updated. /// \param U the DWARFUnit to use when skipping the form in case the form /// size differs according to data in the DWARFUnit. /// \returns true on success, false if the form was not skipped. - static bool skipValue(dwarf::Form form, DataExtractor debug_info_data, - uint32_t *offset_ptr, const DWARFUnit *U); + static bool skipValue(dwarf::Form Form, DataExtractor DebugInfoData, + uint32_t *OffsetPtr, const DWARFUnit *U); - /// Skip a form in \p debug_info_data at offset specified by \p offset_ptr. + /// Skip a form in \p DebugInfoData at offset specified by \p OffsetPtr. /// /// Skips the bytes for this form in the debug info and updates the offset. /// - /// \param form the DW_FORM enumeration that indicates the form to skip. - /// \param debug_info_data the .debug_info data to use to skip the value. - /// \param offset_ptr a reference to the offset that will be updated. + /// \param Form the DW_FORM enumeration that indicates the form to skip. + /// \param DebugInfoData the .debug_info data to use to skip the value. + /// \param OffsetPtr a reference to the offset that will be updated. /// \param Version DWARF version number. /// \param AddrSize size of an address in bytes. /// \param Format enum value from llvm::dwarf::DwarfFormat. /// \returns true on success, false if the form was not skipped. - static bool skipValue(dwarf::Form form, DataExtractor debug_info_data, - uint32_t *offset_ptr, uint16_t Version, - uint8_t AddrSize, llvm::dwarf::DwarfFormat Format); + static bool skipValue(dwarf::Form Form, DataExtractor DebugInfoData, + uint32_t *OffsetPtr, uint16_t Version, uint8_t AddrSize, + llvm::dwarf::DwarfFormat Format); private: void dumpString(raw_ostream &OS) const; @@ -180,149 +184,146 @@ private: namespace dwarf { - /// Take an optional DWARFFormValue and try to extract a string value from it. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \returns an optional value that contains a value if the form value - /// was valid and was a string. - inline Optional toString(const Optional& V) { - if (V) - return V->getAsCString(); - return None; - } - - /// Take an optional DWARFFormValue and extract a string value from it. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \param Default the default value to return in case of failure. - /// \returns the string value or Default if the V doesn't have a value or the - /// form value's encoding wasn't a string. - inline const char* - toString(const Optional& V, const char *Default) { - return toString(V).getValueOr(Default); - } - - /// Take an optional DWARFFormValue and try to extract an unsigned constant. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \returns an optional value that contains a value if the form value - /// was valid and has a unsigned constant form. - inline Optional toUnsigned(const Optional& V) { - if (V) - return V->getAsUnsignedConstant(); - return None; - } - - /// Take an optional DWARFFormValue and extract a unsigned constant. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \param Default the default value to return in case of failure. - /// \returns the extracted unsigned value or Default if the V doesn't have a - /// value or the form value's encoding wasn't an unsigned constant form. - inline uint64_t - toUnsigned(const Optional& V, uint64_t Default) { - return toUnsigned(V).getValueOr(Default); - } - - /// Take an optional DWARFFormValue and try to extract an reference. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \returns an optional value that contains a value if the form value - /// was valid and has a reference form. - inline Optional toReference(const Optional& V) { - if (V) - return V->getAsReference(); - return None; - } - - /// Take an optional DWARFFormValue and extract a reference. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \param Default the default value to return in case of failure. - /// \returns the extracted reference value or Default if the V doesn't have a - /// value or the form value's encoding wasn't a reference form. - inline uint64_t - toReference(const Optional& V, uint64_t Default) { - return toReference(V).getValueOr(Default); - } - - /// Take an optional DWARFFormValue and try to extract an signed constant. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \returns an optional value that contains a value if the form value - /// was valid and has a signed constant form. - inline Optional toSigned(const Optional& V) { - if (V) - return V->getAsSignedConstant(); - return None; - } - - /// Take an optional DWARFFormValue and extract a signed integer. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \param Default the default value to return in case of failure. - /// \returns the extracted signed integer value or Default if the V doesn't - /// have a value or the form value's encoding wasn't a signed integer form. - inline int64_t - toSigned(const Optional& V, int64_t Default) { - return toSigned(V).getValueOr(Default); - } - - /// Take an optional DWARFFormValue and try to extract an address. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \returns an optional value that contains a value if the form value - /// was valid and has a address form. - inline Optional toAddress(const Optional& V) { - if (V) - return V->getAsAddress(); - return None; - } - - /// Take an optional DWARFFormValue and extract a address. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \param Default the default value to return in case of failure. - /// \returns the extracted address value or Default if the V doesn't have a - /// value or the form value's encoding wasn't an address form. - inline uint64_t - toAddress(const Optional& V, uint64_t Default) { - return toAddress(V).getValueOr(Default); - } - - /// Take an optional DWARFFormValue and try to extract an section offset. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \returns an optional value that contains a value if the form value - /// was valid and has a section offset form. - inline Optional toSectionOffset(const Optional& V) { - if (V) - return V->getAsSectionOffset(); - return None; - } - - /// Take an optional DWARFFormValue and extract a section offset. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \param Default the default value to return in case of failure. - /// \returns the extracted section offset value or Default if the V doesn't - /// have a value or the form value's encoding wasn't a section offset form. - inline uint64_t - toSectionOffset(const Optional& V, uint64_t Default) { - return toSectionOffset(V).getValueOr(Default); - } - - /// Take an optional DWARFFormValue and try to extract block data. - /// - /// \param V and optional DWARFFormValue to attempt to extract the value from. - /// \returns an optional value that contains a value if the form value - /// was valid and has a block form. - inline Optional> - toBlock(const Optional& V) { - if (V) - return V->getAsBlock(); - return None; - } +/// Take an optional DWARFFormValue and try to extract a string value from it. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \returns an optional value that contains a value if the form value +/// was valid and was a string. +inline Optional toString(const Optional &V) { + if (V) + return V->getAsCString(); + return None; +} + +/// Take an optional DWARFFormValue and extract a string value from it. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \param Default the default value to return in case of failure. +/// \returns the string value or Default if the V doesn't have a value or the +/// form value's encoding wasn't a string. +inline const char *toString(const Optional &V, + const char *Default) { + return toString(V).getValueOr(Default); +} + +/// Take an optional DWARFFormValue and try to extract an unsigned constant. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \returns an optional value that contains a value if the form value +/// was valid and has a unsigned constant form. +inline Optional toUnsigned(const Optional &V) { + if (V) + return V->getAsUnsignedConstant(); + return None; +} + +/// Take an optional DWARFFormValue and extract a unsigned constant. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \param Default the default value to return in case of failure. +/// \returns the extracted unsigned value or Default if the V doesn't have a +/// value or the form value's encoding wasn't an unsigned constant form. +inline uint64_t toUnsigned(const Optional &V, + uint64_t Default) { + return toUnsigned(V).getValueOr(Default); +} + +/// Take an optional DWARFFormValue and try to extract an reference. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \returns an optional value that contains a value if the form value +/// was valid and has a reference form. +inline Optional toReference(const Optional &V) { + if (V) + return V->getAsReference(); + return None; +} + +/// Take an optional DWARFFormValue and extract a reference. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \param Default the default value to return in case of failure. +/// \returns the extracted reference value or Default if the V doesn't have a +/// value or the form value's encoding wasn't a reference form. +inline uint64_t toReference(const Optional &V, + uint64_t Default) { + return toReference(V).getValueOr(Default); +} + +/// Take an optional DWARFFormValue and try to extract an signed constant. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \returns an optional value that contains a value if the form value +/// was valid and has a signed constant form. +inline Optional toSigned(const Optional &V) { + if (V) + return V->getAsSignedConstant(); + return None; +} + +/// Take an optional DWARFFormValue and extract a signed integer. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \param Default the default value to return in case of failure. +/// \returns the extracted signed integer value or Default if the V doesn't +/// have a value or the form value's encoding wasn't a signed integer form. +inline int64_t toSigned(const Optional &V, int64_t Default) { + return toSigned(V).getValueOr(Default); +} + +/// Take an optional DWARFFormValue and try to extract an address. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \returns an optional value that contains a value if the form value +/// was valid and has a address form. +inline Optional toAddress(const Optional &V) { + if (V) + return V->getAsAddress(); + return None; +} + +/// Take an optional DWARFFormValue and extract a address. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \param Default the default value to return in case of failure. +/// \returns the extracted address value or Default if the V doesn't have a +/// value or the form value's encoding wasn't an address form. +inline uint64_t toAddress(const Optional &V, uint64_t Default) { + return toAddress(V).getValueOr(Default); +} + +/// Take an optional DWARFFormValue and try to extract an section offset. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \returns an optional value that contains a value if the form value +/// was valid and has a section offset form. +inline Optional toSectionOffset(const Optional &V) { + if (V) + return V->getAsSectionOffset(); + return None; +} + +/// Take an optional DWARFFormValue and extract a section offset. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \param Default the default value to return in case of failure. +/// \returns the extracted section offset value or Default if the V doesn't +/// have a value or the form value's encoding wasn't a section offset form. +inline uint64_t toSectionOffset(const Optional &V, + uint64_t Default) { + return toSectionOffset(V).getValueOr(Default); +} + +/// Take an optional DWARFFormValue and try to extract block data. +/// +/// \param V and optional DWARFFormValue to attempt to extract the value from. +/// \returns an optional value that contains a value if the form value +/// was valid and has a block form. +inline Optional> toBlock(const Optional &V) { + if (V) + return V->getAsBlock(); + return None; +} } // end namespace dwarf diff --git a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h index 7a52218663b9d3a9a13a4bc91be1a4011d58a11e..8d1ac5c83c234ed0886657f4fd680bcb711c7b2f 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h +++ b/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h @@ -29,25 +29,25 @@ class DWARFGdbIndex { uint32_t ConstantPoolOffset; struct CompUnitEntry { - uint64_t Offset; // Offset of a CU in the .debug_info section. - uint64_t Length; // Length of that CU. + uint64_t Offset; /// Offset of a CU in the .debug_info section. + uint64_t Length; /// Length of that CU. }; SmallVector CuList; struct AddressEntry { - uint64_t LowAddress; // The low address. - uint64_t HighAddress; // The high address. - uint32_t CuIndex; // The CU index. + uint64_t LowAddress; /// The low address. + uint64_t HighAddress; /// The high address. + uint32_t CuIndex; /// The CU index. }; SmallVector AddressArea; struct SymTableEntry { - uint32_t NameOffset; // Offset of the symbol's name in the constant pool. - uint32_t VecOffset; // Offset of the CU vector in the constant pool. + uint32_t NameOffset; /// Offset of the symbol's name in the constant pool. + uint32_t VecOffset; /// Offset of the CU vector in the constant pool. }; SmallVector SymbolTable; - // Each value is CU index + attributes. + /// Each value is CU index + attributes. SmallVector>, 0> ConstantPoolVectors; diff --git a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h index af01bddeed153db52083c4fe6f549d47a363ae78..f143de334737ac004adfde2d8da6062b4caf4e24 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h +++ b/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h @@ -16,7 +16,19 @@ namespace llvm { -typedef DenseMap> RelocAddrMap; +/// RelocAddrEntry contains relocated value and section index. +/// Section index is -1LL if relocation points to absolute symbol. +struct RelocAddrEntry { + uint64_t SectionIndex; + uint64_t Value; +}; + +/// In place of applying the relocations to the data we've read from disk we use +/// a separate mapping table to the side and checking that at locations in the +/// dwarf where we expect relocated values. This adds a bit of complexity to the +/// dwarf parsing/extraction at the benefit of not allocating memory for the +/// entire size of the debug info sections. +typedef DenseMap RelocAddrMap; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h index 703316005887cc241a912d86640e2f61e6480239..2041d40eb53af3f7e65a792347117b35cc28ee55 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h @@ -30,8 +30,9 @@ private: public: DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, bool IsDWO, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, + StringRef LS, bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *Entry) : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, IsDWO, diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 40eb4434bd61e2918b3c4fb56f446074c7d89427..945b8999ff22f6972df99a3f8556bdfb54d18452 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -10,11 +10,12 @@ #ifndef LLVM_DEBUGINFO_DWARF_DWARFUNIT_H #define LLVM_DEBUGINFO_DWARF_DWARFUNIT_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" @@ -24,11 +25,11 @@ #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include #include #include #include +#include #include #include @@ -55,8 +56,9 @@ protected: ~DWARFUnitSectionBase() = default; virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, StringRef LS, bool isLittleEndian, bool isDWO) = 0; }; @@ -87,9 +89,9 @@ public: private: void parseImpl(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, - bool IsDWO) override { + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, const DWARFSection &SOS, const DWARFSection *AOS, + StringRef LS, bool LE, bool IsDWO) override { if (Parsed) return; const auto &Index = getDWARFUnitIndex(Context, UnitType::Section); @@ -110,16 +112,17 @@ private: class DWARFUnit { DWARFContext &Context; - // Section containing this DWARFUnit. + /// Section containing this DWARFUnit. const DWARFSection &InfoSection; const DWARFDebugAbbrev *Abbrev; - StringRef RangeSection; + const DWARFSection *RangeSection; uint32_t RangeSectionBase; StringRef LineSection; StringRef StringSection; - StringRef StringOffsetSection; - StringRef AddrOffsetSection; + const DWARFSection &StringOffsetSection; + uint64_t StringOffsetSectionBase = 0; + const DWARFSection *AddrOffsetSection; uint32_t AddrOffsetSectionBase; bool isLittleEndian; bool isDWO; @@ -132,22 +135,17 @@ class DWARFUnit { uint8_t UnitType; uint8_t AddrSize; uint64_t BaseAddr; - // The compile unit debug information entry items. + /// The compile unit debug information entry items. std::vector DieArray; + + /// Map from range's start address to end address and corresponding DIE. + /// IntervalMap does not support range removal, as a result, we use the + /// std::map::upper_bound for address range lookup. + std::map> AddrDieMap; typedef iterator_range::iterator> die_iterator_range; - class DWOHolder { - object::OwningBinary DWOFile; - std::unique_ptr DWOContext; - DWARFUnit *DWOU = nullptr; - - public: - DWOHolder(StringRef DWOPath); - - DWARFUnit *getUnit() const { return DWOU; } - }; - std::unique_ptr DWO; + std::shared_ptr DWO; const DWARFUnitIndex::Entry *IndexEntry; @@ -165,9 +163,9 @@ protected: public: DWARFUnit(DWARFContext &Context, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, bool IsDWO, - const DWARFUnitSectionBase &UnitSection, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, StringRef SS, + const DWARFSection &SOS, const DWARFSection *AOS, StringRef LS, + bool LE, bool IsDWO, const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry = nullptr); virtual ~DWARFUnit(); @@ -176,21 +174,27 @@ public: StringRef getLineSection() const { return LineSection; } StringRef getStringSection() const { return StringSection; } - StringRef getStringOffsetSection() const { return StringOffsetSection; } + const DWARFSection &getStringOffsetSection() const { + return StringOffsetSection; + } - void setAddrOffsetSection(StringRef AOS, uint32_t Base) { + void setAddrOffsetSection(const DWARFSection *AOS, uint32_t Base) { AddrOffsetSection = AOS; AddrOffsetSectionBase = Base; } - void setRangesSection(StringRef RS, uint32_t Base) { + /// Recursively update address to Die map. + void updateAddressDieMap(DWARFDie Die); + + void setRangesSection(const DWARFSection *RS, uint32_t Base) { RangeSection = RS; RangeSectionBase = Base; } bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const; // FIXME: Result should be uint64_t in DWARF64. - bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const; + bool getStringOffsetSectionItem(uint32_t Index, uint64_t &Result) const; + uint64_t getStringOffsetSectionRelocation(uint32_t Index) const; DataExtractor getDebugInfoExtractor() const { return DataExtractor(InfoSection.Data, isLittleEndian, AddrSize); @@ -201,6 +205,9 @@ public: } const RelocAddrMap *getRelocMap() const { return &InfoSection.Relocs; } + const RelocAddrMap &getStringOffsetsRelocMap() const { + return StringOffsetSection.Relocs; + } bool extract(DataExtractor debug_info, uint32_t* offset_ptr); @@ -303,9 +310,9 @@ public: [](const DWARFDebugInfoEntry &LHS, uint32_t Offset) { return LHS.getOffset() < Offset; }); - if (it == DieArray.end()) - return DWARFDie(); - return DWARFDie(this, &*it); + if (it != DieArray.end() && it->getOffset() == Offset) + return DWARFDie(this, &*it); + return DWARFDie(); } uint32_t getLineTableOffset() const { @@ -339,10 +346,10 @@ private: /// it was actually constructed. bool parseDWO(); - /// getSubprogramForAddress - Returns subprogram DIE with address range + /// getSubroutineForAddress - Returns subprogram DIE with address range /// encompassing the provided address. The pointer is alive as long as parsed /// compile unit DIEs are not cleared. - DWARFDie getSubprogramForAddress(uint64_t Address); + DWARFDie getSubroutineForAddress(uint64_t Address); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h new file mode 100644 index 0000000000000000000000000000000000000000..9eb5c45faba8b36255f89baeb58ef601f832a111 --- /dev/null +++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -0,0 +1,108 @@ +//===- DWARFVerifier.h ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H +#define LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H + +#include +#include +#include + +namespace llvm { +class raw_ostream; +struct DWARFAttribute; +class DWARFContext; +class DWARFDie; +class DWARFUnit; +class DWARFAcceleratorTable; + +/// A class that verifies DWARF debug information given a DWARF Context. +class DWARFVerifier { + raw_ostream &OS; + DWARFContext &DCtx; + /// A map that tracks all references (converted absolute references) so we + /// can verify each reference points to a valid DIE and not an offset that + /// lies between to valid DIEs. + std::map> ReferenceToDIEOffsets; + uint32_t NumDebugInfoErrors = 0; + uint32_t NumDebugLineErrors = 0; + uint32_t NumAppleNamesErrors = 0; + + /// Verifies the attribute's DWARF attribute and its value. + /// + /// This function currently checks for: + /// - DW_AT_ranges values is a valid .debug_ranges offset + /// - DW_AT_stmt_list is a valid .debug_line offset + /// + /// \param Die The DWARF DIE that owns the attribute value + /// \param AttrValue The DWARF attribute value to check + void verifyDebugInfoAttribute(const DWARFDie &Die, DWARFAttribute &AttrValue); + + /// Verifies the attribute's DWARF form. + /// + /// This function currently checks for: + /// - All DW_FORM_ref values that are CU relative have valid CU offsets + /// - All DW_FORM_ref_addr values have valid .debug_info offsets + /// - All DW_FORM_strp values have valid .debug_str offsets + /// + /// \param Die The DWARF DIE that owns the attribute value + /// \param AttrValue The DWARF attribute value to check + void verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue); + + /// Verifies the all valid references that were found when iterating through + /// all of the DIE attributes. + /// + /// This function will verify that all references point to DIEs whose DIE + /// offset matches. This helps to ensure if a DWARF link phase moved things + /// around, that it doesn't create invalid references by failing to relocate + /// CU relative and absolute references. + void verifyDebugInfoReferences(); + + /// Verify the the DW_AT_stmt_list encoding and value and ensure that no + /// compile units that have the same DW_AT_stmt_list value. + void verifyDebugLineStmtOffsets(); + + /// Verify that all of the rows in the line table are valid. + /// + /// This function currently checks for: + /// - addresses within a sequence that decrease in value + /// - invalid file indexes + void verifyDebugLineRows(); + +public: + DWARFVerifier(raw_ostream &S, DWARFContext &D) + : OS(S), DCtx(D) {} + /// Verify the information in the .debug_info section. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// \returns true if the .debug_info verifies successfully, false otherwise. + bool handleDebugInfo(); + + /// Verify the information in the .debug_line section. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// \returns true if the .debug_line verifies successfully, false otherwise. + bool handleDebugLine(); + + /// Verify the information in the .apple_names accelerator table. + /// + /// Any errors are reported to the stream that was this object was + /// constructed with. + /// + /// \returns true if the .apple_names verifies successfully, false otherwise. + bool handleAppleNames(); +}; + +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H diff --git a/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/include/llvm/DebugInfo/MSF/MappedBlockStream.h index c91f6f725c80685398ac51a425f8e62bf754d3e0..02f3cb09b0045298e70e3391c3942e6b8c178d85 100644 --- a/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -17,7 +17,6 @@ #include "llvm/DebugInfo/MSF/MSFStreamLayout.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStream.h" -#include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -43,18 +42,20 @@ class MappedBlockStream : public BinaryStream { friend class WritableMappedBlockStream; public: static std::unique_ptr - createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, BinaryStreamRef MsfData); + createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, + BinaryStreamRef MsfData, BumpPtrAllocator &Allocator); static std::unique_ptr createIndexedStream(const MSFLayout &Layout, BinaryStreamRef MsfData, - uint32_t StreamIndex); + uint32_t StreamIndex, BumpPtrAllocator &Allocator); static std::unique_ptr - createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData); + createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); static std::unique_ptr - createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData); + createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); llvm::support::endianness getEndian() const override { return llvm::support::little; @@ -67,20 +68,17 @@ public: uint32_t getLength() override; - uint32_t getNumBytesCopied() const; - - llvm::BumpPtrAllocator &getAllocator() { return Pool; } + llvm::BumpPtrAllocator &getAllocator() { return Allocator; } void invalidateCache(); uint32_t getBlockSize() const { return BlockSize; } - uint32_t getNumBlocks() const { return NumBlocks; } + uint32_t getNumBlocks() const { return StreamLayout.Blocks.size(); } uint32_t getStreamLength() const { return StreamLayout.Length; } protected: - MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &StreamLayout, - BinaryStreamRef MsfData); + MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, + BinaryStreamRef MsfData, BumpPtrAllocator &Allocator); private: const MSFStreamLayout &getStreamLayout() const { return StreamLayout; } @@ -91,31 +89,40 @@ private: ArrayRef &Buffer); const uint32_t BlockSize; - const uint32_t NumBlocks; const MSFStreamLayout StreamLayout; BinaryStreamRef MsfData; typedef MutableArrayRef CacheEntry; - llvm::BumpPtrAllocator Pool; + + // We just store the allocator by reference. We use this to allocate + // contiguous memory for things like arrays or strings that cross a block + // boundary, and this memory is expected to outlive the stream. For example, + // someone could create a stream, read some stuff, then close the stream, and + // we would like outstanding references to fields to remain valid since the + // entire file is mapped anyway. Because of that, the user must supply the + // allocator to allocate broken records from. + BumpPtrAllocator &Allocator; DenseMap> CacheMap; }; class WritableMappedBlockStream : public WritableBinaryStream { public: static std::unique_ptr - createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, WritableBinaryStreamRef MsfData); + createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, + WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator); static std::unique_ptr createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, - uint32_t StreamIndex); + uint32_t StreamIndex, BumpPtrAllocator &Allocator); static std::unique_ptr createDirectoryStream(const MSFLayout &Layout, - WritableBinaryStreamRef MsfData); + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); static std::unique_ptr - createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData); + createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); llvm::support::endianness getEndian() const override { return llvm::support::little; @@ -139,9 +146,10 @@ public: uint32_t getStreamLength() const { return ReadInterface.getStreamLength(); } protected: - WritableMappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, + WritableMappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, - WritableBinaryStreamRef MsfData); + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator); private: MappedBlockStream ReadInterface; diff --git a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h index 9bf0738315653d1e9fec065e7e1b3712b1614450..9713dce362d28d873471669fc6e34282851995b0 100644 --- a/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h +++ b/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h @@ -34,12 +34,11 @@ public: std::unique_ptr getChildAtIndex(uint32_t Index) const override { std::unique_ptr Child = Enumerator->getChildAtIndex(Index); - return make_concrete_child(std::move(Child)); + return unique_dyn_cast_or_null(Child); } std::unique_ptr getNext() override { - std::unique_ptr Child = Enumerator->getNext(); - return make_concrete_child(std::move(Child)); + return unique_dyn_cast_or_null(Enumerator->getNext()); } void reset() override { Enumerator->reset(); } @@ -50,11 +49,6 @@ public: } private: - std::unique_ptr - make_concrete_child(std::unique_ptr Child) const { - ChildType *ConcreteChild = dyn_cast_or_null(Child.release()); - return std::unique_ptr(ConcreteChild); - } std::unique_ptr Enumerator; }; diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h index 941e16a35fac2139d598af48e90c0677b99a326e..ffae6645e94b722028b3383b7f79c31fe9cddff6 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_PDB_DIA_DIAENUMDEBUGSTREAMS_H #include "DIASupport.h" +#include "llvm/DebugInfo/PDB/IPDBDataStream.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" namespace llvm { diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h index 106b84cecfffa7518a002a0cbb349b334086aef4..08f0de124ede5117e76d52502cbf019385652d21 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBLineNumber.h" namespace llvm { namespace pdb { diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h index 6c00d6a5e29d36a171fee437301d0ee0748397d5..e69d18f5ba3705f5557a1a6d3104d4d894c0472f 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBSourceFile.h" namespace llvm { namespace pdb { diff --git a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h index b206ff59a6a49a6c185851bfe879599db5bdd028..f779cd1f4be35171adf15d608a0109442193afd2 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h @@ -12,6 +12,7 @@ #include "DIASupport.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" namespace llvm { namespace pdb { diff --git a/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h b/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h index 1e40c46f8a27eb25e59a55174b6326ffdb48bf6c..3710eb29e7f98bbe9d80a01c749e1ac4ae1e8e3f 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIARawSymbol.h @@ -102,6 +102,8 @@ public: uint32_t getVirtualBaseDispIndex() const override; uint32_t getVirtualBaseOffset() const override; uint32_t getVirtualTableShapeId() const override; + std::unique_ptr + getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; PDB_UniqueId getGuid() const override; diff --git a/include/llvm/DebugInfo/PDB/DIA/DIASession.h b/include/llvm/DebugInfo/PDB/DIA/DIASession.h index 350442556bef88b176fbd45ee3b738bee6ae89f6..3f5818631e7bc1c9c1c56a868ab18fdf9da3b7a6 100644 --- a/include/llvm/DebugInfo/PDB/DIA/DIASession.h +++ b/include/llvm/DebugInfo/PDB/DIA/DIASession.h @@ -31,7 +31,7 @@ public: uint64_t getLoadAddress() const override; void setLoadAddress(uint64_t Address) override; - std::unique_ptr getGlobalScope() override; + std::unique_ptr getGlobalScope() const override; std::unique_ptr getSymbolById(uint32_t SymbolId) const override; std::unique_ptr diff --git a/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h b/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h index 49866b8bb2f22eb5beef36881addb45bde430fca..fab086c62c72e84dec8632d7d59e7fc3b241d6a9 100644 --- a/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h +++ b/include/llvm/DebugInfo/PDB/IPDBRawSymbol.h @@ -21,6 +21,9 @@ class raw_ostream; namespace pdb { +class PDBSymbolTypeVTable; +class PDBSymbolTypeVTableShape; + /// IPDBRawSymbol defines an interface used to represent an arbitrary symbol. /// It exposes a monolithic interface consisting of accessors for the union of /// all properties that are valid for any symbol type. This interface is then @@ -110,6 +113,8 @@ public: virtual Variant getValue() const = 0; virtual uint32_t getVirtualBaseDispIndex() const = 0; virtual uint32_t getVirtualBaseOffset() const = 0; + virtual std::unique_ptr + getVirtualBaseTableType() const = 0; virtual uint32_t getVirtualTableShapeId() const = 0; virtual PDB_DataKind getDataKind() const = 0; virtual PDB_SymType getSymTag() const = 0; diff --git a/include/llvm/DebugInfo/PDB/IPDBSession.h b/include/llvm/DebugInfo/PDB/IPDBSession.h index 696736a907a6f260edec5d357e22e0d6a3eb801a..85d9fe1248599cb92a8bf2650923e18902601d1b 100644 --- a/include/llvm/DebugInfo/PDB/IPDBSession.h +++ b/include/llvm/DebugInfo/PDB/IPDBSession.h @@ -29,20 +29,12 @@ public: virtual uint64_t getLoadAddress() const = 0; virtual void setLoadAddress(uint64_t Address) = 0; - virtual std::unique_ptr getGlobalScope() = 0; + virtual std::unique_ptr getGlobalScope() const = 0; virtual std::unique_ptr getSymbolById(uint32_t SymbolId) const = 0; template std::unique_ptr getConcreteSymbolById(uint32_t SymbolId) const { - auto Symbol(getSymbolById(SymbolId)); - if (!Symbol) - return nullptr; - - T *ConcreteSymbol = dyn_cast(Symbol.get()); - if (!ConcreteSymbol) - return nullptr; - (void)Symbol.release(); - return std::unique_ptr(ConcreteSymbol); + return unique_dyn_cast_or_null(getSymbolById(SymbolId)); } virtual std::unique_ptr diff --git a/include/llvm/DebugInfo/PDB/Native/ModInfo.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h similarity index 64% rename from include/llvm/DebugInfo/PDB/Native/ModInfo.h rename to include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h index d26d0d6184496951ac0ff2e2b7af4c5e78c28d0e..8200f51e3da9d2b6bfb5f464d6796fb8adb8709a 100644 --- a/include/llvm/DebugInfo/PDB/Native/ModInfo.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h @@ -1,4 +1,4 @@ -//===- ModInfo.h - PDB module information -----------------------*- C++ -*-===// +//===- DbiModuleDescriptor.h - PDB module information -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_MODINFO_H -#define LLVM_DEBUGINFO_PDB_RAW_MODINFO_H +#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTOR_H +#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTOR_H #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" @@ -22,21 +22,21 @@ namespace llvm { namespace pdb { -class ModInfo { +class DbiModuleDescriptor { friend class DbiStreamBuilder; public: - ModInfo(); - ModInfo(const ModInfo &Info); - ~ModInfo(); + DbiModuleDescriptor(); + DbiModuleDescriptor(const DbiModuleDescriptor &Info); + ~DbiModuleDescriptor(); - static Error initialize(BinaryStreamRef Stream, ModInfo &Info); + static Error initialize(BinaryStreamRef Stream, DbiModuleDescriptor &Info); bool hasECInfo() const; uint16_t getTypeServerIndex() const; uint16_t getModuleStreamIndex() const; uint32_t getSymbolDebugInfoByteSize() const; - uint32_t getLineInfoByteSize() const; + uint32_t getC11LineInfoByteSize() const; uint32_t getC13LineInfoByteSize() const; uint32_t getNumberOfFiles() const; uint32_t getSourceFileNameIndex() const; @@ -53,20 +53,12 @@ private: const ModuleInfoHeader *Layout = nullptr; }; -struct ModuleInfoEx { - ModuleInfoEx(const ModInfo &Info) : Info(Info) {} - ModuleInfoEx(const ModuleInfoEx &Ex) = default; - - ModInfo Info; - std::vector SourceFiles; -}; - } // end namespace pdb -template <> struct VarStreamArrayExtractor { +template <> struct VarStreamArrayExtractor { Error operator()(BinaryStreamRef Stream, uint32_t &Length, - pdb::ModInfo &Info) const { - if (auto EC = pdb::ModInfo::initialize(Stream, Info)) + pdb::DbiModuleDescriptor &Info) { + if (auto EC = pdb::DbiModuleDescriptor::initialize(Stream, Info)) return EC; Length = Info.getRecordLength(); return Error::success(); @@ -75,4 +67,4 @@ template <> struct VarStreamArrayExtractor { } // end namespace llvm -#endif // LLVM_DEBUGINFO_PDB_RAW_MODINFO_H +#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTOR_H diff --git a/include/llvm/DebugInfo/PDB/Native/ModInfoBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h similarity index 57% rename from include/llvm/DebugInfo/PDB/Native/ModInfoBuilder.h rename to include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h index 605fd2483c3b8ad411a9835dabe930972339fad9..a89e26ae943c9b4a6e33704af1d46bd67860effa 100644 --- a/include/llvm/DebugInfo/PDB/Native/ModInfoBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -1,4 +1,4 @@ -//===- ModInfoBuilder.h - PDB module information ----------------*- C++ -*-===// +//===- DbiModuleDescriptorBuilder.h - PDB module information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,10 +7,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DEBUGINFO_PDB_RAW_MODINFOBUILDER_H -#define LLVM_DEBUGINFO_PDB_RAW_MODINFOBUILDER_H +#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/Error.h" @@ -21,28 +24,40 @@ namespace llvm { class BinaryStreamWriter; +namespace codeview { +class DebugSubsectionRecordBuilder; +} + namespace msf { class MSFBuilder; struct MSFLayout; } namespace pdb { -class ModInfoBuilder { +class DbiModuleDescriptorBuilder { friend class DbiStreamBuilder; public: - ModInfoBuilder(StringRef ModuleName, uint32_t ModIndex, msf::MSFBuilder &Msf); + DbiModuleDescriptorBuilder(StringRef ModuleName, uint32_t ModIndex, + msf::MSFBuilder &Msf); + ~DbiModuleDescriptorBuilder(); - ModInfoBuilder(const ModInfoBuilder &) = delete; - ModInfoBuilder &operator=(const ModInfoBuilder &) = delete; + DbiModuleDescriptorBuilder(const DbiModuleDescriptorBuilder &) = delete; + DbiModuleDescriptorBuilder & + operator=(const DbiModuleDescriptorBuilder &) = delete; void setObjFileName(StringRef Name); void addSymbol(codeview::CVSymbol Symbol); + void + addDebugSubsection(std::shared_ptr Subsection); + uint16_t getStreamIndex() const; StringRef getModuleName() const { return ModuleName; } StringRef getObjFileName() const { return ObjFileName; } + unsigned getModuleIndex() const { return Layout.Mod; } + ArrayRef source_files() const { return makeArrayRef(SourceFiles); } @@ -56,6 +71,8 @@ public: WritableBinaryStreamRef MsfBuffer); private: + uint32_t calculateC13DebugInfoSize() const; + void addSourceFile(StringRef Path); msf::MSFBuilder &MSF; @@ -64,6 +81,10 @@ private: std::string ObjFileName; std::vector SourceFiles; std::vector Symbols; + + std::vector> + C13Builders; + ModuleInfoHeader Layout; }; @@ -71,4 +92,4 @@ private: } // end namespace llvm -#endif // LLVM_DEBUGINFO_PDB_RAW_MODINFOBUILDER_H +#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULEDESCRIPTORBUILDER_H diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h new file mode 100644 index 0000000000000000000000000000000000000000..2885081628f6b25513ed0336fd3ceae0509cd06f --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h @@ -0,0 +1,116 @@ +//===- DbiModuleList.h - PDB module information list ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H +#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +namespace codeview {} +namespace pdb { + +class DbiModuleList; +struct FileInfoSubstreamHeader; + +class DbiModuleSourceFilesIterator + : public iterator_facade_base { + typedef iterator_facade_base + BaseType; + +public: + DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi, + uint16_t Filei); + DbiModuleSourceFilesIterator() = default; + DbiModuleSourceFilesIterator & + operator=(const DbiModuleSourceFilesIterator &R) = default; + + bool operator==(const DbiModuleSourceFilesIterator &R) const; + + const StringRef &operator*() const { return ThisValue; } + StringRef &operator*() { return ThisValue; } + + bool operator<(const DbiModuleSourceFilesIterator &RHS) const; + std::ptrdiff_t operator-(const DbiModuleSourceFilesIterator &R) const; + DbiModuleSourceFilesIterator &operator+=(std::ptrdiff_t N); + DbiModuleSourceFilesIterator &operator-=(std::ptrdiff_t N); + +private: + void setValue(); + + bool isEnd() const; + bool isCompatible(const DbiModuleSourceFilesIterator &R) const; + bool isUniversalEnd() const; + + StringRef ThisValue; + const DbiModuleList *Modules{nullptr}; + uint32_t Modi{0}; + uint16_t Filei{0}; +}; + +class DbiModuleList { + friend DbiModuleSourceFilesIterator; + +public: + Error initialize(BinaryStreamRef ModInfo, BinaryStreamRef FileInfo); + + Expected getFileName(uint32_t Index) const; + uint32_t getModuleCount() const; + uint32_t getSourceFileCount() const; + uint16_t getSourceFileCount(uint32_t Modi) const; + + iterator_range + source_files(uint32_t Modi) const; + + DbiModuleDescriptor getModuleDescriptor(uint32_t Modi) const; + +private: + Error initializeModInfo(BinaryStreamRef ModInfo); + Error initializeFileInfo(BinaryStreamRef FileInfo); + + VarStreamArray Descriptors; + + FixedStreamArray FileNameOffsets; + FixedStreamArray ModFileCountArray; + + // For each module, there are multiple filenames, which can be obtained by + // knowing the index of the file. Given the index of the file, one can use + // that as an offset into the FileNameOffsets array, which contains the + // absolute offset of the file name in NamesBuffer. Thus, for each module + // we store the first index in the FileNameOffsets array for this module. + // The number of files for the corresponding module is stored in + // ModFileCountArray. + std::vector ModuleInitialFileIndex; + + // In order to provide random access into the Descriptors array, we iterate it + // once up front to find the offsets of the individual items and store them in + // this array. + std::vector ModuleDescriptorOffsets; + + const FileInfoSubstreamHeader *FileInfoHeader = nullptr; + + BinaryStreamRef ModInfoSubstream; + BinaryStreamRef FileInfoSubstream; + BinaryStreamRef NamesBuffer; +}; +} +} + +#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H \ No newline at end of file diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h index f49f5aaefacadda7784f9c170afe51d22425255c..7123e88cd64235a13fe4c6d9742b3ccfd191e6f3 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -10,16 +10,15 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H #define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H -#include "llvm/DebugInfo/CodeView/ModuleSubstream.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -68,9 +67,7 @@ public: /// not present, returns InvalidStreamIndex. uint32_t getDebugStreamIndex(DbgHeaderType Type) const; - ArrayRef modules() const; - - Expected getFileNameForIndex(uint32_t Index) const; + const DbiModuleList &modules() const; FixedStreamArray getSectionHeaders(); @@ -80,35 +77,30 @@ public: void visitSectionContributions(ISectionContribVisitor &Visitor) const; private: - Error initializeModInfoArray(); Error initializeSectionContributionData(); Error initializeSectionHeadersData(); Error initializeSectionMapData(); - Error initializeFileInfo(); Error initializeFpoRecords(); PDBFile &Pdb; std::unique_ptr Stream; - std::vector ModuleInfos; - StringTable ECNames; + PDBStringTable ECNames; - BinaryStreamRef ModInfoSubstream; BinaryStreamRef SecContrSubstream; BinaryStreamRef SecMapSubstream; - BinaryStreamRef FileInfoSubstream; BinaryStreamRef TypeServerMapSubstream; BinaryStreamRef ECSubstream; - BinaryStreamRef NamesBuffer; + DbiModuleList Modules; FixedStreamArray DbgStreams; - PdbRaw_DbiSecContribVer SectionContribVersion; + PdbRaw_DbiSecContribVer SectionContribVersion = + PdbRaw_DbiSecContribVer::DbiSecContribVer60; FixedStreamArray SectionContribs; FixedStreamArray SectionContribs2; FixedStreamArray SectionMap; - FixedStreamArray FileNameOffsets; std::unique_ptr SectionHeaderStream; FixedStreamArray SectionHeaders; diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h index 16426bd93847a685e5eb34b0ba32433377919028..aeb2e2ab026a519a6c3a09a3dd0b4320cdd1724c 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h @@ -31,7 +31,7 @@ struct coff_section; namespace pdb { class DbiStream; struct DbiStreamHeader; -class ModInfoBuilder; +class DbiModuleDescriptorBuilder; class PDBFile; class DbiStreamBuilder { @@ -49,7 +49,6 @@ public: void setPdbDllRbld(uint16_t R); void setFlags(uint16_t F); void setMachineType(PDB_Machine M); - void setSectionContribs(ArrayRef SecMap); void setSectionMap(ArrayRef SecMap); // Add given bytes as a new stream. @@ -57,17 +56,16 @@ public: uint32_t calculateSerializedLength() const; - Expected addModuleInfo(StringRef ModuleName); + Expected addModuleInfo(StringRef ModuleName); Error addModuleSourceFile(StringRef Module, StringRef File); + Expected getSourceFileNameIndex(StringRef FileName); Error finalizeMsfLayout(); Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef MsfBuffer); - // A helper function to create Section Contributions from COFF input - // section headers. - static std::vector - createSectionContribs(ArrayRef SecHdrs); + void addSectionContrib(DbiModuleDescriptorBuilder *ModuleDbi, + const llvm::object::coff_section *SecHdr); // A helper function to create a Section Map from a COFF section header. static std::vector @@ -81,6 +79,7 @@ private: Error finalize(); uint32_t calculateModiSubstreamSize() const; + uint32_t calculateNamesOffset() const; uint32_t calculateSectionContribsStreamSize() const; uint32_t calculateSectionMapStreamSize() const; uint32_t calculateFileInfoSubstreamSize() const; @@ -103,14 +102,14 @@ private: const DbiStreamHeader *Header; - StringMap> ModiMap; - std::vector ModiList; + StringMap> ModiMap; + std::vector ModiList; StringMap SourceFileNames; WritableBinaryStreamRef NamesBuffer; MutableBinaryByteStream FileInfoBuffer; - ArrayRef SectionContribs; + std::vector SectionContribs; ArrayRef SectionMap; llvm::SmallVector DbgStreams; }; diff --git a/include/llvm/DebugInfo/PDB/Native/InfoStream.h b/include/llvm/DebugInfo/PDB/Native/InfoStream.h index 1c38c2b6194fc1f7ae81677ed0cb95e6b6cce422..fc91fc7097bd4cf843507be207fea883b8f2a7fe 100644 --- a/include/llvm/DebugInfo/PDB/Native/InfoStream.h +++ b/include/llvm/DebugInfo/PDB/Native/InfoStream.h @@ -35,6 +35,7 @@ public: uint32_t getStreamSize() const; + bool containsIdStream() const; PdbRaw_ImplVer getVersion() const; uint32_t getSignature() const; uint32_t getAge() const; diff --git a/include/llvm/DebugInfo/PDB/Native/ModStream.h b/include/llvm/DebugInfo/PDB/Native/ModStream.h deleted file mode 100644 index d65e195dbb95b080f5183f6b4d32d7c75d905c2d..0000000000000000000000000000000000000000 --- a/include/llvm/DebugInfo/PDB/Native/ModStream.h +++ /dev/null @@ -1,62 +0,0 @@ -//===- ModStream.h - PDB Module Info Stream Access ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_PDB_RAW_MODSTREAM_H -#define LLVM_DEBUGINFO_PDB_RAW_MODSTREAM_H - -#include "llvm/ADT/iterator_range.h" -#include "llvm/DebugInfo/CodeView/CVRecord.h" -#include "llvm/DebugInfo/CodeView/ModuleSubstream.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace pdb { -class PDBFile; -class ModInfo; - -class ModStream { -public: - ModStream(const ModInfo &Module, - std::unique_ptr Stream); - ~ModStream(); - - Error reload(); - - uint32_t signature() const { return Signature; } - - iterator_range - symbols(bool *HadError) const; - - iterator_range - lines(bool *HadError) const; - - Error commit(); - -private: - const ModInfo &Mod; - - uint32_t Signature; - - std::unique_ptr Stream; - - codeview::CVSymbolArray SymbolsSubstream; - BinaryStreamRef LinesSubstream; - BinaryStreamRef C13LinesSubstream; - BinaryStreamRef GlobalRefsSubstream; - - codeview::ModuleSubstreamArray LineInfo; -}; -} -} - -#endif diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h new file mode 100644 index 0000000000000000000000000000000000000000..c744696ae25088c3afae44c6e504288e67b73311 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h @@ -0,0 +1,76 @@ +//===- ModuleDebugStream.h - PDB Module Info Stream Access ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H +#define LLVM_DEBUGINFO_PDB_RAW_MODULEDEBUGSTREAM_H + +#include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace pdb { +class PDBFile; +class DbiModuleDescriptor; + +class ModuleDebugStreamRef { + typedef codeview::DebugSubsectionArray::Iterator DebugSubsectionIterator; + +public: + ModuleDebugStreamRef(const DbiModuleDescriptor &Module, + std::unique_ptr Stream); + ModuleDebugStreamRef(ModuleDebugStreamRef &&Other) = default; + ~ModuleDebugStreamRef(); + + Error reload(); + + uint32_t signature() const { return Signature; } + + iterator_range + symbols(bool *HadError) const; + + const codeview::CVSymbolArray &getSymbolArray() const { + return SymbolsSubstream; + } + + ModuleDebugStreamRef &operator=(ModuleDebugStreamRef &&Other) = default; + + llvm::iterator_range subsections() const; + + bool hasDebugSubsections() const; + + Error commit(); + + Expected + findChecksumsSubsection() const; + +private: + const DbiModuleDescriptor &Mod; + + uint32_t Signature; + + std::shared_ptr Stream; + + codeview::CVSymbolArray SymbolsSubstream; + BinaryStreamRef C11LinesSubstream; + BinaryStreamRef C13LinesSubstream; + BinaryStreamRef GlobalRefsSubstream; + + codeview::DebugSubsectionArray Subsections; +}; +} +} + +#endif diff --git a/include/llvm/DebugInfo/CodeView/TypeDumperBase.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h similarity index 100% rename from include/llvm/DebugInfo/CodeView/TypeDumperBase.h rename to include/llvm/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.h diff --git a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h index 8eeaf3e0ea49b9d326814044a35dee19a3300bfe..22ed61910d94a6f2584696ed0363b38aa8deebf7 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h @@ -10,7 +10,7 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVECOMPILANDSYMBOL_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVECOMPILANDSYMBOL_H -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" namespace llvm { @@ -18,7 +18,7 @@ namespace pdb { class NativeCompilandSymbol : public NativeRawSymbol { public: - NativeCompilandSymbol(NativeSession &Session, const ModuleInfoEx &MI); + NativeCompilandSymbol(NativeSession &Session, DbiModuleDescriptor MI); PDB_SymType getSymTag() const override; bool isEditAndContinueEnabled() const override; uint32_t getLexicalParentId() const override; @@ -26,7 +26,7 @@ public: std::string getName() const override; private: - ModuleInfoEx Module; + DbiModuleDescriptor Module; }; } // namespace pdb diff --git a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h index 60a55ee50cc48bac5d2cdc2e8230a81b691e54e8..6aa1460dbb4e6d4c6440bf9e550100007cc9f1a8 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeEnumModules.h @@ -11,18 +11,18 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMMODULES_H #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" namespace llvm { namespace pdb { +class DbiModuleList; class NativeSession; class NativeEnumModules : public IPDBEnumChildren { public: - explicit NativeEnumModules(NativeSession &Session, - ArrayRef Modules, - uint32_t Index = 0); + NativeEnumModules(NativeSession &Session, const DbiModuleList &Modules, + uint32_t Index = 0); uint32_t getChildCount() const override; std::unique_ptr getChildAtIndex(uint32_t Index) const override; @@ -32,7 +32,7 @@ public: private: NativeSession &Session; - ArrayRef Modules; + const DbiModuleList &Modules; uint32_t Index; }; } diff --git a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h index 655bed9ac17c1815939d344f5b6263cc013d688b..e1e78035ff38953967cd463ecbd20aa5157a78de 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h @@ -101,6 +101,8 @@ public: uint32_t getVirtualBaseDispIndex() const override; uint32_t getVirtualBaseOffset() const override; uint32_t getVirtualTableShapeId() const override; + std::unique_ptr + getVirtualBaseTableType() const override; PDB_DataKind getDataKind() const override; PDB_SymType getSymTag() const override; PDB_UniqueId getGuid() const override; diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h index bbe207738e02115ce599ebef63051ecdf9faebac..e6da266f796d545a7c96f8b64f4b6ffa5c88e314 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -32,7 +32,7 @@ public: uint64_t getLoadAddress() const override; void setLoadAddress(uint64_t Address) override; - std::unique_ptr getGlobalScope() override; + std::unique_ptr getGlobalScope() const override; std::unique_ptr getSymbolById(uint32_t SymbolId) const override; std::unique_ptr diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/include/llvm/DebugInfo/PDB/Native/PDBFile.h index fbca62d6e9d936e7f4fc59c43d9d4602a8fd7979..4d3c569c3cdfb12c775673d86bef3127380551fb 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBFile.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBFile.h @@ -33,7 +33,7 @@ namespace pdb { class DbiStream; class GlobalsStream; class InfoStream; -class StringTable; +class PDBStringTable; class PDBFileBuilder; class PublicsStream; class SymbolStream; @@ -95,7 +95,7 @@ public: Expected getPDBIpiStream(); Expected getPDBPublicsStream(); Expected getPDBSymbolStream(); - Expected getStringTable(); + Expected getStringTable(); BumpPtrAllocator &getAllocator() { return Allocator; } @@ -106,7 +106,9 @@ public: bool hasPDBPublicsStream(); bool hasPDBSymbolStream(); bool hasPDBTpiStream() const; - bool hasStringTable(); + bool hasPDBStringTable(); + + uint32_t getPointerSize(); private: Expected> @@ -131,7 +133,7 @@ private: std::unique_ptr Symbols; std::unique_ptr DirectoryStream; std::unique_ptr StringTableStream; - std::unique_ptr Strings; + std::unique_ptr Strings; }; } } diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index 3898af5afc9e42adda0f8fa066a3e846e6c5ad4d..cd7d3b0637933656f461f1732357b4570b185d3a 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -15,8 +15,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" @@ -46,12 +46,14 @@ public: DbiStreamBuilder &getDbiBuilder(); TpiStreamBuilder &getTpiBuilder(); TpiStreamBuilder &getIpiBuilder(); - StringTableBuilder &getStringTableBuilder(); + PDBStringTableBuilder &getStringTableBuilder(); Error commit(StringRef Filename); -private: + Expected getNamedStreamIndex(StringRef Name) const; Error addNamedStream(StringRef Name, uint32_t Size); + +private: Expected finalizeMsfLayout(); BumpPtrAllocator &Allocator; @@ -62,7 +64,7 @@ private: std::unique_ptr Tpi; std::unique_ptr Ipi; - StringTableBuilder Strings; + PDBStringTableBuilder Strings; NamedStreamMap NamedStreams; }; } diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h new file mode 100644 index 0000000000000000000000000000000000000000..86ef1136b41d778b465e7ffc9554bfda6931defc --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h @@ -0,0 +1,66 @@ +//===- PDBStringTable.h - PDB String Table -----------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLE_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/Support/BinaryStreamArray.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace llvm { +class BinaryStreamReader; + +namespace msf { +class MappedBlockStream; +} + +namespace pdb { + +struct PDBStringTableHeader; + +class PDBStringTable { +public: + Error reload(BinaryStreamReader &Reader); + + uint32_t getByteSize() const; + uint32_t getNameCount() const; + uint32_t getHashVersion() const; + uint32_t getSignature() const; + + Expected getStringForID(uint32_t ID) const; + Expected getIDForString(StringRef Str) const; + + FixedStreamArray name_ids() const; + + const codeview::DebugStringTableSubsectionRef &getStringTable() const; + +private: + Error readHeader(BinaryStreamReader &Reader); + Error readStrings(BinaryStreamReader &Reader); + Error readHashTable(BinaryStreamReader &Reader); + Error readEpilogue(BinaryStreamReader &Reader); + + const PDBStringTableHeader *Header = nullptr; + codeview::DebugStringTableSubsectionRef Strings; + FixedStreamArray IDs; + uint32_t ByteSize = 0; + uint32_t NameCount = 0; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h new file mode 100644 index 0000000000000000000000000000000000000000..b57707ee792319dbab4b04fc280b86a1ba9c0cd3 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h @@ -0,0 +1,59 @@ +//===- PDBStringTableBuilder.h - PDB String Table Builder -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file creates the "/names" stream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +class BinaryStreamWriter; +class WritableBinaryStreamRef; + +namespace msf { +struct MSFLayout; +} + +namespace pdb { + +class PDBFileBuilder; + +class PDBStringTableBuilder { +public: + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + uint32_t calculateSerializedSize() const; + Error commit(BinaryStreamWriter &Writer) const; + + void setStrings(const codeview::DebugStringTableSubsection &Strings); + +private: + uint32_t calculateHashTableSize() const; + Error writeHeader(BinaryStreamWriter &Writer) const; + Error writeStrings(BinaryStreamWriter &Writer) const; + Error writeHashTable(BinaryStreamWriter &Writer) const; + Error writeEpilogue(BinaryStreamWriter &Writer) const; + + codeview::DebugStringTableSubsection Strings; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_PDBSTRINGTABLEBUILDER_H diff --git a/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h b/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h index d965e1008e95aad65a32e373f6804413bbb8ebae..196ba4d6ffbdc3c57a18b3542a5abc443507aba3 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h @@ -11,9 +11,7 @@ #define LLVM_DEBUGINFO_PDB_PDBTYPESERVERHANDLER_H #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeServerHandler.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" @@ -40,7 +38,7 @@ private: bool RevisitAlways; std::unique_ptr Session; - SmallVector, 4> SearchPaths; + StringSet<> SearchPaths; }; } } diff --git a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h index 4a541edd6a7b464457594b04827742a0228b34f5..4570c80c76d7c33318565fd6fd41002c7022c4f5 100644 --- a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h +++ b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h @@ -35,6 +35,7 @@ public: uint32_t getSymHash() const; uint32_t getAddrMap() const; uint32_t getNumBuckets() const { return NumBuckets; } + Expected getSymbolArray() const; iterator_range getSymbols(bool *HadError) const; FixedStreamArray getHashBuckets() const { diff --git a/include/llvm/DebugInfo/PDB/Native/RawConstants.h b/include/llvm/DebugInfo/PDB/Native/RawConstants.h index f5d4df8feb2ed2307771b4e2c46380a64267ee75..bb1d097b5123fcb6c37b83ccf67fd6fd913b9a83 100644 --- a/include/llvm/DebugInfo/PDB/Native/RawConstants.h +++ b/include/llvm/DebugInfo/PDB/Native/RawConstants.h @@ -12,7 +12,6 @@ #include "llvm/ADT/BitmaskEnum.h" #include "llvm/DebugInfo/CodeView/CodeView.h" - #include namespace llvm { @@ -99,15 +98,19 @@ enum class DbgHeaderType : uint16_t { }; enum class OMFSegDescFlags : uint16_t { + None = 0, Read = 1 << 0, // Segment is readable. Write = 1 << 1, // Segment is writable. Execute = 1 << 2, // Segment is executable. AddressIs32Bit = 1 << 3, // Descriptor describes a 32-bit linear address. IsSelector = 1 << 8, // Frame represents a selector. IsAbsoluteAddress = 1 << 9, // Frame represents an absolute address. - IsGroup = 1 << 10 // If set, descriptor represents a group. + IsGroup = 1 << 10, // If set, descriptor represents a group. + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ IsGroup) }; +LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); + } // end namespace pdb } // end namespace llvm diff --git a/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/include/llvm/DebugInfo/PDB/Native/RawTypes.h index 1b2631efce70e7d9d43cc172cb9572c38f814bd3..771272d6a47d1d0862a326fb1099a1ca6c02ba40 100644 --- a/include/llvm/DebugInfo/PDB/Native/RawTypes.h +++ b/include/llvm/DebugInfo/PDB/Native/RawTypes.h @@ -73,13 +73,6 @@ struct SecMapEntry { support::ulittle32_t SecByteLength; // Byte count of the segment or group. }; -// Used for serialized hash table in TPI stream. -// In the reference, it is an array of TI and cbOff pair. -struct TypeIndexOffset { - codeview::TypeIndex Type; - support::ulittle32_t Offset; -}; - /// Some of the values are stored in bitfields. Since this needs to be portable /// across compilers and architectures (big / little endian in particular) we /// can't use the actual structures below, but must instead do the shifting @@ -200,7 +193,7 @@ struct FileInfoSubstreamHeader { }; struct ModInfoFlags { - /// uint16_t fWritten : 1; // True if ModInfo is dirty + /// uint16_t fWritten : 1; // True if DbiModuleDescriptor is dirty /// uint16_t fECEnabled : 1; // Is EC symbolic info present? (What is EC?) /// uint16_t unused : 6; // Reserved /// uint16_t iTSM : 8; // Type Server Index for this module @@ -211,7 +204,7 @@ struct ModInfoFlags { }; /// The header preceeding each entry in the Module Info substream of the DBI -/// stream. +/// stream. Corresponds to the type MODI in the reference implementation. struct ModuleInfoHeader { /// Currently opened module. This field is a pointer in the reference /// implementation, but that won't work on 64-bit systems, and anyway it @@ -231,8 +224,8 @@ struct ModuleInfoHeader { /// Size of local symbol debug info in above stream support::ulittle32_t SymBytes; - /// Size of line number debug info in above stream - support::ulittle32_t LineBytes; + /// Size of C11 line number info in above stream + support::ulittle32_t C11Bytes; /// Size of C13 line number info in above stream support::ulittle32_t C13Bytes; @@ -243,9 +236,12 @@ struct ModuleInfoHeader { /// Padding so the next field is 4-byte aligned. char Padding1[2]; - /// Array of [0..NumFiles) DBI name buffer offsets. This field is a pointer - /// in the reference implementation, but as with `Mod`, we ignore it for now - /// since it is unused. + /// Array of [0..NumFiles) DBI name buffer offsets. In the reference + /// implementation this field is a pointer. But since you can't portably + /// serialize a pointer, on 64-bit platforms they copy all the values except + /// this one into the 32-bit version of the struct and use that for + /// serialization. Regardless, this field is unused, it is only there to + /// store a pointer that can be accessed at runtime. support::ulittle32_t FileNameOffs; /// Name Index for src file name @@ -307,13 +303,13 @@ struct InfoStreamHeader { }; /// The header preceeding the /names stream. -struct StringTableHeader { - support::ulittle32_t Signature; - support::ulittle32_t HashVersion; - support::ulittle32_t ByteSize; +struct PDBStringTableHeader { + support::ulittle32_t Signature; // PDBStringTableSignature + support::ulittle32_t HashVersion; // 1 or 2 + support::ulittle32_t ByteSize; // Number of bytes of names buffer. }; -const uint32_t StringTableSignature = 0xEFFEEFFE; +const uint32_t PDBStringTableSignature = 0xEFFEEFFE; } // namespace pdb } // namespace llvm diff --git a/include/llvm/DebugInfo/PDB/Native/StringTable.h b/include/llvm/DebugInfo/PDB/Native/StringTable.h deleted file mode 100644 index dd5e30e6182708e0089def42e0917adca5073540..0000000000000000000000000000000000000000 --- a/include/llvm/DebugInfo/PDB/Native/StringTable.h +++ /dev/null @@ -1,56 +0,0 @@ -//===- StringTable.h - PDB String Table -------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H -#define LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/BinaryStreamArray.h" -#include "llvm/Support/BinaryStreamRef.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include -#include - -namespace llvm { -class BinaryStreamReader; - -namespace pdb { - -class StringTable { -public: - StringTable(); - - Error load(BinaryStreamReader &Stream); - - uint32_t getByteSize() const; - - uint32_t getNameCount() const { return NameCount; } - uint32_t getHashVersion() const { return HashVersion; } - uint32_t getSignature() const { return Signature; } - - StringRef getStringForID(uint32_t ID) const; - uint32_t getIDForString(StringRef Str) const; - - FixedStreamArray name_ids() const; - -private: - BinaryStreamRef NamesBuffer; - FixedStreamArray IDs; - uint32_t ByteSize = 0; - uint32_t Signature = 0; - uint32_t HashVersion = 0; - uint32_t NameCount = 0; -}; - -} // end namespace pdb -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_PDB_RAW_STRINGTABLE_H diff --git a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h b/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h deleted file mode 100644 index dd0f40b1978d8afc871c1e4506dbf310d0b86116..0000000000000000000000000000000000000000 --- a/include/llvm/DebugInfo/PDB/Native/StringTableBuilder.h +++ /dev/null @@ -1,44 +0,0 @@ -//===- StringTableBuilder.h - PDB String Table Builder ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file creates the "/names" stream. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H -#define LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" -#include - -namespace llvm { -class BinaryStreamWriter; - -namespace pdb { - -class StringTableBuilder { -public: - // If string S does not exist in the string table, insert it. - // Returns the ID for S. - uint32_t insert(StringRef S); - - uint32_t finalize(); - Error commit(BinaryStreamWriter &Writer) const; - -private: - DenseMap Strings; - uint32_t StringSize = 1; -}; - -} // end namespace pdb -} // end namespace llvm - -#endif // LLVM_DEBUGINFO_PDB_RAW_STRINGTABLEBUILDER_H diff --git a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h index 41d5e6ad64a0e43a59efbd48c86b5cb814eeface..17695f587849ee620131fb7b718fcd17b9149bb6 100644 --- a/include/llvm/DebugInfo/PDB/Native/SymbolStream.h +++ b/include/llvm/DebugInfo/PDB/Native/SymbolStream.h @@ -27,6 +27,10 @@ public: ~SymbolStream(); Error reload(); + const codeview::CVSymbolArray &getSymbolArray() const { + return SymbolRecords; + } + iterator_range getSymbols(bool *HadError) const; diff --git a/include/llvm/DebugInfo/PDB/Native/TpiHashing.h b/include/llvm/DebugInfo/PDB/Native/TpiHashing.h index dd2698c354a20125cc8e3421f1117ec9e4154dba..156abb59a6be1b9e44a5949e96f6e3d582124fb7 100644 --- a/include/llvm/DebugInfo/PDB/Native/TpiHashing.h +++ b/include/llvm/DebugInfo/PDB/Native/TpiHashing.h @@ -38,7 +38,7 @@ public: #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) #define MEMBER_RECORD(EnumName, EnumVal, Name) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" private: template diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/include/llvm/DebugInfo/PDB/Native/TpiStream.h index 62dde0ef08b767765f1e3fe2be9865d0e043bbb5..0ee697696ca54147b2d16f006713d57bb817a48f 100644 --- a/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -21,6 +21,9 @@ #include "llvm/Support/Error.h" namespace llvm { +namespace codeview { +class LazyRandomTypeCollection; +} namespace msf { class MappedBlockStream; } @@ -31,8 +34,7 @@ class TpiStream { friend class TpiStreamBuilder; public: - TpiStream(const PDBFile &File, - std::unique_ptr Stream); + TpiStream(PDBFile &File, std::unique_ptr Stream); ~TpiStream(); Error reload(); @@ -40,31 +42,34 @@ public: uint32_t TypeIndexBegin() const; uint32_t TypeIndexEnd() const; - uint32_t NumTypeRecords() const; + uint32_t getNumTypeRecords() const; uint16_t getTypeHashStreamIndex() const; uint16_t getTypeHashStreamAuxIndex() const; uint32_t getHashKeySize() const; - uint32_t NumHashBuckets() const; + uint32_t getNumHashBuckets() const; FixedStreamArray getHashValues() const; - FixedStreamArray getTypeIndexOffsets() const; + FixedStreamArray getTypeIndexOffsets() const; HashTable &getHashAdjusters(); codeview::CVTypeRange types(bool *HadError) const; + const codeview::CVTypeArray &typeArray() const { return TypeRecords; } + + codeview::LazyRandomTypeCollection &typeCollection() { return *Types; } Error commit(); private: - Error verifyHashValues(); - - const PDBFile &Pdb; + PDBFile &Pdb; std::unique_ptr Stream; + std::unique_ptr Types; + codeview::CVTypeArray TypeRecords; std::unique_ptr HashStream; FixedStreamArray HashValues; - FixedStreamArray TypeIndexOffsets; + FixedStreamArray TypeIndexOffsets; HashTable HashAdjusters; const TpiStreamHeader *Header; diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index a29ed0b610d364c5a106b8ed41fc1431ed91c2e8..411720d6f56b5694308a627066d05bd7b3921890 100644 --- a/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -58,6 +58,8 @@ public: Error finalizeMsfLayout(); + uint32_t getRecordCount() const { return TypeRecords.size(); } + Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); uint32_t calculateSerializedLength(); @@ -72,10 +74,10 @@ private: size_t TypeRecordBytes = 0; - Optional VerHeader; + PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80; std::vector> TypeRecords; std::vector TypeHashes; - std::vector TypeIndexOffsets; + std::vector TypeIndexOffsets; uint32_t HashStreamIndex = kInvalidStreamIndex; std::unique_ptr HashValueStream; diff --git a/include/llvm/DebugInfo/PDB/PDBContext.h b/include/llvm/DebugInfo/PDB/PDBContext.h index 84ab8ed173cb0b2be0b2dec71ad0effaa58a3e93..0ce49f5ef92237fac249f4a9d13ecaf93e6d712b 100644 --- a/include/llvm/DebugInfo/PDB/PDBContext.h +++ b/include/llvm/DebugInfo/PDB/PDBContext.h @@ -41,8 +41,7 @@ namespace pdb { return DICtx->getKind() == CK_PDB; } - void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All, - bool DumpEH = false, bool SummarizeTypes = false) override; + void dump(raw_ostream &OS, DIDumpOptions DIDumpOpts) override; DILineInfo getLineInfoForAddress( uint64_t Address, diff --git a/include/llvm/DebugInfo/PDB/PDBSymbol.h b/include/llvm/DebugInfo/PDB/PDBSymbol.h index 652f2136105650d00bc931b9b6eec83b3d4b70b4..9e883d2f99a7abc4d008101093ca3b4ed0d10335 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -62,6 +62,7 @@ class PDBSymbol { protected: PDBSymbol(const IPDBSession &PDBSession, std::unique_ptr Symbol); + PDBSymbol(PDBSymbol &Symbol); public: static std::unique_ptr @@ -88,20 +89,18 @@ public: template std::unique_ptr findOneChild() const { auto Enumerator(findAllChildren()); + if (!Enumerator) + return nullptr; return Enumerator->getNext(); } - template T *cast() { return llvm::dyn_cast(this); } - - template const T *cast() const { - return llvm::dyn_cast(this); - } - std::unique_ptr clone() const; template std::unique_ptr> findAllChildren() const { auto BaseIter = RawSymbol->findChildren(T::Tag); + if (!BaseIter) + return nullptr; return llvm::make_unique>(std::move(BaseIter)); } std::unique_ptr findAllChildren(PDB_SymType Type) const; @@ -128,18 +127,11 @@ protected: template std::unique_ptr getConcreteSymbolByIdHelper(uint32_t Id) const { - auto Sym = getSymbolByIdHelper(Id); - if (!Sym) - return nullptr; - ConcreteType *Result = Sym->cast(); - if (!Result) - return nullptr; - Sym.release(); - return std::unique_ptr(Result); + return unique_dyn_cast_or_null(getSymbolByIdHelper(Id)); } const IPDBSession &Session; - const std::unique_ptr RawSymbol; + std::unique_ptr RawSymbol; }; } // namespace llvm diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolExe.h b/include/llvm/DebugInfo/PDB/PDBSymbolExe.h index 5b3f50d153eb6078007570b1f497430c33403466..2c2d74665040e06ec2b82989f72369b012b6d0d5 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbolExe.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbolExe.h @@ -37,6 +37,8 @@ public: FORWARD_SYMBOL_METHOD(getSignature) FORWARD_SYMBOL_METHOD(getSymbolsFileName) + uint32_t getPointerByteSize() const; + private: void dumpChildren(raw_ostream &OS, StringRef Label, PDB_SymType ChildType, int Indent) const; diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h b/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h index 5686f8716a0cc820aaff78630c5a11d5d714f2a6..c2f02ea6f126ca14a7127760e0288d31b4eba434 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h @@ -27,6 +27,8 @@ public: void dump(PDBSymDumper &Dumper) const override; + bool isDestructor() const; + std::unique_ptr> getArguments() const; DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::Function) diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h index 0924efb8aa9c004f2d78db377a2f746e8ecca8ad..d607a3d8117028a397fb5b7e950721f2f9096d99 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h @@ -13,6 +13,9 @@ #include "PDBSymbol.h" #include "PDBTypes.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" + namespace llvm { class raw_ostream; diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h index 47a4525a47bac91f232c3526dcc7d217d9e586ba..e9e7fe8c9865ca44554d84dd9d7d383bec2026d6 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h @@ -10,7 +10,9 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEUDT_H #define LLVM_DEBUGINFO_PDB_PDBSYMBOLTYPEUDT_H +#include "IPDBSession.h" #include "PDBSymbol.h" +#include "PDBSymbolTypeBaseClass.h" #include "PDBTypes.h" namespace llvm { @@ -18,11 +20,17 @@ namespace llvm { class raw_ostream; namespace pdb { + class PDBSymbolTypeUDT : public PDBSymbol { public: PDBSymbolTypeUDT(const IPDBSession &PDBSession, std::unique_ptr UDTSymbol); + std::unique_ptr clone() const { + return getSession().getConcreteSymbolById( + getSymIndexId()); + } + DECLARE_PDB_SYMBOL_CONCRETE_TYPE(PDB_SymType::UDT) void dump(PDBSymDumper &Dumper) const override; diff --git a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h index 17612ff460643ea08802486ac4326e2130ebeda7..e270c2b7eb952de7698b7f11c9f5f05f781547e2 100644 --- a/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h +++ b/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h @@ -28,6 +28,7 @@ public: void dump(PDBSymDumper &Dumper) const override; FORWARD_SYMBOL_ID_METHOD(getClassParent) + FORWARD_SYMBOL_METHOD(getOffset) FORWARD_SYMBOL_METHOD(isConstType) FORWARD_SYMBOL_ID_METHOD(getLexicalParent) FORWARD_SYMBOL_ID_METHOD(getType) diff --git a/include/llvm/DebugInfo/PDB/UDTLayout.h b/include/llvm/DebugInfo/PDB/UDTLayout.h new file mode 100644 index 0000000000000000000000000000000000000000..6bc3660fbe51717dc3a98b83378ffd1784907799 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/UDTLayout.h @@ -0,0 +1,189 @@ +//===- UDTLayout.h - UDT layout info ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_UDTLAYOUT_H +#define LLVM_DEBUGINFO_PDB_UDTLAYOUT_H + +#include "PDBSymbol.h" +#include "PDBTypes.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" + +#include +#include + +namespace llvm { + +class raw_ostream; + +namespace pdb { + +class PDBSymTypeBaseClass; +class PDBSymbolData; +class PDBSymbolTypeUDT; +class PDBSymbolTypeVTable; + +class ClassLayout; +class BaseClassLayout; +class LayoutItemBase; +class UDTLayoutBase; + +class LayoutItemBase { +public: + LayoutItemBase(const UDTLayoutBase *Parent, const PDBSymbol *Symbol, + const std::string &Name, uint32_t OffsetInParent, + uint32_t Size, bool IsElided); + virtual ~LayoutItemBase() {} + + uint32_t deepPaddingSize() const; + virtual uint32_t immediatePadding() const { return 0; } + virtual uint32_t tailPadding() const; + + const UDTLayoutBase *getParent() const { return Parent; } + StringRef getName() const { return Name; } + uint32_t getOffsetInParent() const { return OffsetInParent; } + uint32_t getSize() const { return SizeOf; } + uint32_t getLayoutSize() const { return LayoutSize; } + const PDBSymbol *getSymbol() const { return Symbol; } + const BitVector &usedBytes() const { return UsedBytes; } + bool isElided() const { return IsElided; } + virtual bool isVBPtr() const { return false; } + + uint32_t containsOffset(uint32_t Off) const { + uint32_t Begin = getOffsetInParent(); + uint32_t End = Begin + getSize(); + return (Off >= Begin && Off < End); + } + +protected: + const PDBSymbol *Symbol = nullptr; + const UDTLayoutBase *Parent = nullptr; + BitVector UsedBytes; + std::string Name; + uint32_t OffsetInParent = 0; + uint32_t SizeOf = 0; + uint32_t LayoutSize = 0; + bool IsElided = false; +}; + +class VBPtrLayoutItem : public LayoutItemBase { +public: + VBPtrLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr Sym, uint32_t Offset, + uint32_t Size); + virtual bool isVBPtr() const { return true; } + +private: + std::unique_ptr Type; +}; + +class DataMemberLayoutItem : public LayoutItemBase { +public: + DataMemberLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr DataMember); + + const PDBSymbolData &getDataMember(); + bool hasUDTLayout() const; + const ClassLayout &getUDTLayout() const; + +private: + std::unique_ptr DataMember; + std::unique_ptr UdtLayout; +}; + +class VTableLayoutItem : public LayoutItemBase { +public: + VTableLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr VTable); + + uint32_t getElementSize() const { return ElementSize; } + +private: + uint32_t ElementSize = 0; + std::unique_ptr VTable; +}; + +class UDTLayoutBase : public LayoutItemBase { + template using UniquePtrVector = std::vector>; + +public: + UDTLayoutBase(const UDTLayoutBase *Parent, const PDBSymbol &Sym, + const std::string &Name, uint32_t OffsetInParent, uint32_t Size, + bool IsElided); + + uint32_t tailPadding() const override; + + ArrayRef layout_items() const { return LayoutItems; } + + ArrayRef bases() const { return AllBases; } + ArrayRef regular_bases() const { return NonVirtualBases; } + ArrayRef virtual_bases() const { return VirtualBases; } + + uint32_t directVirtualBaseCount() const { return DirectVBaseCount; } + + ArrayRef> funcs() const { return Funcs; } + + ArrayRef> other_items() const { return Other; } + +protected: + bool hasVBPtrAtOffset(uint32_t Off) const; + void initializeChildren(const PDBSymbol &Sym); + + void addChildToLayout(std::unique_ptr Child); + + uint32_t DirectVBaseCount = 0; + + UniquePtrVector Other; + UniquePtrVector Funcs; + UniquePtrVector ChildStorage; + std::vector LayoutItems; + + std::vector AllBases; + ArrayRef NonVirtualBases; + ArrayRef VirtualBases; + + VTableLayoutItem *VTable = nullptr; + VBPtrLayoutItem *VBPtr = nullptr; +}; + +class BaseClassLayout : public UDTLayoutBase { +public: + BaseClassLayout(const UDTLayoutBase &Parent, uint32_t OffsetInParent, + bool Elide, std::unique_ptr Base); + + const PDBSymbolTypeBaseClass &getBase() const { return *Base; } + bool isVirtualBase() const { return IsVirtualBase; } + bool isEmptyBase() { return SizeOf == 1 && LayoutSize == 0; } + +private: + std::unique_ptr Base; + bool IsVirtualBase; +}; + +class ClassLayout : public UDTLayoutBase { +public: + explicit ClassLayout(const PDBSymbolTypeUDT &UDT); + explicit ClassLayout(std::unique_ptr UDT); + + ClassLayout(ClassLayout &&Other) = default; + + const PDBSymbolTypeUDT &getClass() const { return UDT; } + uint32_t immediatePadding() const override; + +private: + BitVector ImmediateUsedBytes; + std::unique_ptr OwnedStorage; + const PDBSymbolTypeUDT &UDT; +}; +} +} // namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_UDTLAYOUT_H diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h index 9253adf7eedd491628c14b138c4fc48e6504d55b..5103cc03a6bdb161229008588bacb69f2af3a427 100644 --- a/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -56,8 +56,9 @@ public: Expected symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); void flush(); - static std::string DemangleName(const std::string &Name, - const SymbolizableModule *ModInfo); + static std::string + DemangleName(const std::string &Name, + const SymbolizableModule *DbiModuleDescriptor); private: // Bundles together object file with code/data and object file with diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index f68337c43271893dca1f9cbc2f667794f9925071..1586f7b80669e3f18ec7c147cc8e8ad03c8d4cb2 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -27,10 +27,10 @@ #include "llvm/Support/Mutex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include #include #include #include -#include namespace llvm { diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 7e7f7358938a4e6efab06d7529e69534666b2db0..2fccf8a0f625ea7a91bc2e3fa371d5fd157c8b87 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -20,9 +20,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/LambdaResolver.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -172,6 +172,11 @@ private: return nullptr; } + void removeModulesFromBaseLayer(BaseLayerT &BaseLayer) { + for (auto &BLH : BaseLayerHandles) + BaseLayer.removeModuleSet(BLH); + } + std::unique_ptr ExternalSymbolResolver; std::unique_ptr> MemMgr; std::unique_ptr StubsMgr; @@ -204,6 +209,11 @@ public: CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)), CloneStubsIntoPartitions(CloneStubsIntoPartitions) {} + ~CompileOnDemandLayer() { + while (!LogicalDylibs.empty()) + removeModuleSet(LogicalDylibs.begin()); + } + /// @brief Add a module to the compile-on-demand layer. template @@ -239,6 +249,7 @@ public: /// This will remove all modules in the layers below that were derived from /// the module represented by H. void removeModuleSet(ModuleSetHandleT H) { + H->removeModulesFromBaseLayer(BaseLayer); LogicalDylibs.erase(H); } @@ -478,6 +489,8 @@ private: return 0; } + LD.BaseLayerHandles.push_back(PartH); + return CalledAddr; } diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index a32278b8a81ecb4963aca9421a527bccbc03363d..71d847c0626449da15bcc5c731ea6e795ff02e0a 100644 --- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -14,8 +14,8 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H #define LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index f16dd021ea5186fdda1fedf95e221bb3b9e1a224..f81d054440fc9d226a744b35ddb3214cb4f83a63 100644 --- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -14,8 +14,8 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H #define LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H -#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/Object/ObjectFile.h" #include diff --git a/include/llvm/ExecutionEngine/Orc/OrcError.h b/include/llvm/ExecutionEngine/Orc/OrcError.h index 2fe4a5ee0588998a51ef21087638fb7cadcb4fb9..cbb40fad0223095a5ed31e40d2d166f792bf785e 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcError.h +++ b/include/llvm/ExecutionEngine/Orc/OrcError.h @@ -27,26 +27,16 @@ enum class OrcErrorCode : int { RemoteMProtectAddrUnrecognized, RemoteIndirectStubsOwnerDoesNotExist, RemoteIndirectStubsOwnerIdAlreadyInUse, + RPCConnectionClosed, + RPCCouldNotNegotiateFunction, RPCResponseAbandoned, UnexpectedRPCCall, UnexpectedRPCResponse, - UnknownRPCFunction + UnknownErrorCodeFromRemote }; std::error_code orcError(OrcErrorCode ErrCode); -class RPCFunctionNotSupported : public ErrorInfo { -public: - static char ID; - - RPCFunctionNotSupported(std::string RPCFunctionSignature); - std::error_code convertToErrorCode() const override; - void log(raw_ostream &OS) const override; - const std::string &getFunctionSignature() const; -private: - std::string RPCFunctionSignature; -}; - } // End namespace orc. } // End namespace llvm. diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h index 02f59d6a831a88de72db48f314bd137e53330c6d..a19c30631c573e33665ae71b06537f2962d2be48 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h @@ -144,16 +144,16 @@ public: void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override { - UnfinalizedEHFrames.push_back( - std::make_pair(LoadAddr, static_cast(Size))); + UnfinalizedEHFrames.push_back({LoadAddr, Size}); } - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override { - auto Err = Client.deregisterEHFrames(LoadAddr, Size); - // FIXME: Add error poll. - assert(!Err && "Failed to register remote EH frames."); - (void)Err; + void deregisterEHFrames() override { + for (auto &Frame : RegisteredEHFrames) { + auto Err = Client.deregisterEHFrames(Frame.Addr, Frame.Size); + // FIXME: Add error poll. + assert(!Err && "Failed to register remote EH frames."); + (void)Err; + } } void notifyObjectLoaded(RuntimeDyld &Dyld, @@ -320,7 +320,7 @@ public: Unfinalized.clear(); for (auto &EHFrame : UnfinalizedEHFrames) { - if (auto Err = Client.registerEHFrames(EHFrame.first, EHFrame.second)) { + if (auto Err = Client.registerEHFrames(EHFrame.Addr, EHFrame.Size)) { // FIXME: Replace this once finalizeMemory can return an Error. handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) { if (ErrMsg) { @@ -331,7 +331,8 @@ public: return false; } } - UnfinalizedEHFrames.clear(); + RegisteredEHFrames = std::move(UnfinalizedEHFrames); + UnfinalizedEHFrames = {}; return false; } @@ -387,7 +388,13 @@ public: ResourceIdMgr::ResourceId Id; std::vector Unmapped; std::vector Unfinalized; - std::vector> UnfinalizedEHFrames; + + struct EHFrame { + JITTargetAddress Addr; + uint64_t Size; + }; + std::vector UnfinalizedEHFrames; + std::vector RegisteredEHFrames; }; /// Remote indirect stubs manager. diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h index 359a9d81b22b558d03aa780e3d35dd099c5768c9..1cb2448a3a44b38490f47e8b6a3b9c621f37d50a 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h +++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h @@ -12,6 +12,7 @@ #include "OrcError.h" #include "llvm/Support/thread.h" +#include #include #include @@ -114,6 +115,35 @@ public: static const char* getName() { return "std::string"; } }; +template <> +class RPCTypeName { +public: + static const char* getName() { return "Error"; } +}; + +template +class RPCTypeName> { +public: + static const char* getName() { + std::lock_guard Lock(NameMutex); + if (Name.empty()) + raw_string_ostream(Name) << "Expected<" + << RPCTypeNameSequence() + << ">"; + return Name.data(); + } + +private: + static std::mutex NameMutex; + static std::string Name; +}; + +template +std::mutex RPCTypeName>::NameMutex; + +template +std::string RPCTypeName>::Name; + template class RPCTypeName> { public: @@ -243,8 +273,10 @@ class SequenceSerialization { public: template - static Error serialize(ChannelT &C, const CArgT &CArg) { - return SerializationTraits::serialize(C, CArg); + static Error serialize(ChannelT &C, CArgT &&CArg) { + return SerializationTraits::type>:: + serialize(C, std::forward(CArg)); } template @@ -258,19 +290,21 @@ class SequenceSerialization { public: template - static Error serialize(ChannelT &C, const CArgT &CArg, - const CArgTs&... CArgs) { + static Error serialize(ChannelT &C, CArgT &&CArg, + CArgTs &&... CArgs) { if (auto Err = - SerializationTraits::serialize(C, CArg)) + SerializationTraits::type>:: + serialize(C, std::forward(CArg))) return Err; if (auto Err = SequenceTraits::emitSeparator(C)) return Err; - return SequenceSerialization::serialize(C, CArgs...); + return SequenceSerialization:: + serialize(C, std::forward(CArgs)...); } template static Error deserialize(ChannelT &C, CArgT &CArg, - CArgTs&... CArgs) { + CArgTs &... CArgs) { if (auto Err = SerializationTraits::deserialize(C, CArg)) return Err; @@ -281,8 +315,9 @@ public: }; template -Error serializeSeq(ChannelT &C, const ArgTs &... Args) { - return SequenceSerialization::serialize(C, Args...); +Error serializeSeq(ChannelT &C, ArgTs &&... Args) { + return SequenceSerialization::type...>:: + serialize(C, std::forward(Args)...); } template @@ -290,6 +325,207 @@ Error deserializeSeq(ChannelT &C, ArgTs &... Args) { return SequenceSerialization::deserialize(C, Args...); } +template +class SerializationTraits { +public: + + using WrappedErrorSerializer = + std::function; + + using WrappedErrorDeserializer = + std::function; + + template + static void registerErrorType(std::string Name, SerializeFtor Serialize, + DeserializeFtor Deserialize) { + assert(!Name.empty() && + "The empty string is reserved for the Success value"); + + const std::string *KeyName = nullptr; + { + // We're abusing the stability of std::map here: We take a reference to the + // key of the deserializers map to save us from duplicating the string in + // the serializer. This should be changed to use a stringpool if we switch + // to a map type that may move keys in memory. + std::lock_guard Lock(DeserializersMutex); + auto I = + Deserializers.insert(Deserializers.begin(), + std::make_pair(std::move(Name), + std::move(Deserialize))); + KeyName = &I->first; + } + + { + assert(KeyName != nullptr && "No keyname pointer"); + std::lock_guard Lock(SerializersMutex); + // FIXME: Move capture Serialize once we have C++14. + Serializers[ErrorInfoT::classID()] = + [KeyName, Serialize](ChannelT &C, const ErrorInfoBase &EIB) -> Error { + assert(EIB.dynamicClassID() == ErrorInfoT::classID() && + "Serializer called for wrong error type"); + if (auto Err = serializeSeq(C, *KeyName)) + return Err; + return Serialize(C, static_cast(EIB)); + }; + } + } + + static Error serialize(ChannelT &C, Error &&Err) { + std::lock_guard Lock(SerializersMutex); + + if (!Err) + return serializeSeq(C, std::string()); + + return handleErrors(std::move(Err), + [&C](const ErrorInfoBase &EIB) { + auto SI = Serializers.find(EIB.dynamicClassID()); + if (SI == Serializers.end()) + return serializeAsStringError(C, EIB); + return (SI->second)(C, EIB); + }); + } + + static Error deserialize(ChannelT &C, Error &Err) { + std::lock_guard Lock(DeserializersMutex); + + std::string Key; + if (auto Err = deserializeSeq(C, Key)) + return Err; + + if (Key.empty()) { + ErrorAsOutParameter EAO(&Err); + Err = Error::success(); + return Error::success(); + } + + auto DI = Deserializers.find(Key); + assert(DI != Deserializers.end() && "No deserializer for error type"); + return (DI->second)(C, Err); + } + +private: + + static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) { + std::string ErrMsg; + { + raw_string_ostream ErrMsgStream(ErrMsg); + EIB.log(ErrMsgStream); + } + return serialize(C, make_error(std::move(ErrMsg), + inconvertibleErrorCode())); + } + + static std::recursive_mutex SerializersMutex; + static std::recursive_mutex DeserializersMutex; + static std::map Serializers; + static std::map Deserializers; +}; + +template +std::recursive_mutex SerializationTraits::SerializersMutex; + +template +std::recursive_mutex SerializationTraits::DeserializersMutex; + +template +std::map::WrappedErrorSerializer> +SerializationTraits::Serializers; + +template +std::map::WrappedErrorDeserializer> +SerializationTraits::Deserializers; + +/// Registers a serializer and deserializer for the given error type on the +/// given channel type. +template +void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize, + DeserializeFtor &&Deserialize) { + SerializationTraits::template registerErrorType( + std::move(Name), + std::forward(Serialize), + std::forward(Deserialize)); +} + +/// Registers serialization/deserialization for StringError. +template +void registerStringError() { + static bool AlreadyRegistered = false; + if (!AlreadyRegistered) { + registerErrorSerialization( + "StringError", + [](ChannelT &C, const StringError &SE) { + return serializeSeq(C, SE.getMessage()); + }, + [](ChannelT &C, Error &Err) -> Error { + ErrorAsOutParameter EAO(&Err); + std::string Msg; + if (auto E2 = deserializeSeq(C, Msg)) + return E2; + Err = + make_error(std::move(Msg), + orcError( + OrcErrorCode::UnknownErrorCodeFromRemote)); + return Error::success(); + }); + AlreadyRegistered = true; + } +} + +/// SerializationTraits for Expected from an Expected. +template +class SerializationTraits, Expected> { +public: + + static Error serialize(ChannelT &C, Expected &&ValOrErr) { + if (ValOrErr) { + if (auto Err = serializeSeq(C, true)) + return Err; + return SerializationTraits::serialize(C, *ValOrErr); + } + if (auto Err = serializeSeq(C, false)) + return Err; + return serializeSeq(C, ValOrErr.takeError()); + } + + static Error deserialize(ChannelT &C, Expected &ValOrErr) { + ExpectedAsOutParameter EAO(&ValOrErr); + bool HasValue; + if (auto Err = deserializeSeq(C, HasValue)) + return Err; + if (HasValue) + return SerializationTraits::deserialize(C, *ValOrErr); + Error Err = Error::success(); + if (auto E2 = deserializeSeq(C, Err)) + return E2; + ValOrErr = std::move(Err); + return Error::success(); + } +}; + +/// SerializationTraits for Expected from a T2. +template +class SerializationTraits, T2> { +public: + + static Error serialize(ChannelT &C, T2 &&Val) { + return serializeSeq(C, Expected(std::forward(Val))); + } +}; + +/// SerializationTraits for Expected from an Error. +template +class SerializationTraits, Error> { +public: + + static Error serialize(ChannelT &C, Error &&Err) { + return serializeSeq(C, Expected(std::move(Err))); + } +}; + /// SerializationTraits default specialization for std::pair. template class SerializationTraits> { diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h index fe7e1ba6ff78bb6c3274989b180726dabc353891..6212f64ff3195696dfdae71ec61d5100696f22a9 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h +++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h @@ -32,6 +32,109 @@ namespace llvm { namespace orc { namespace rpc { +/// Base class of all fatal RPC errors (those that necessarily result in the +/// termination of the RPC session). +class RPCFatalError : public ErrorInfo { +public: + static char ID; +}; + +/// RPCConnectionClosed is returned from RPC operations if the RPC connection +/// has already been closed due to either an error or graceful disconnection. +class ConnectionClosed : public ErrorInfo { +public: + static char ID; + std::error_code convertToErrorCode() const override; + void log(raw_ostream &OS) const override; +}; + +/// BadFunctionCall is returned from handleOne when the remote makes a call with +/// an unrecognized function id. +/// +/// This error is fatal because Orc RPC needs to know how to parse a function +/// call to know where the next call starts, and if it doesn't recognize the +/// function id it cannot parse the call. +template +class BadFunctionCall + : public ErrorInfo, RPCFatalError> { +public: + static char ID; + + BadFunctionCall(FnIdT FnId, SeqNoT SeqNo) + : FnId(std::move(FnId)), SeqNo(std::move(SeqNo)) {} + + std::error_code convertToErrorCode() const override { + return orcError(OrcErrorCode::UnexpectedRPCCall); + } + + void log(raw_ostream &OS) const override { + OS << "Call to invalid RPC function id '" << FnId << "' with " + "sequence number " << SeqNo; + } + +private: + FnIdT FnId; + SeqNoT SeqNo; +}; + +template +char BadFunctionCall::ID = 0; + +/// InvalidSequenceNumberForResponse is returned from handleOne when a response +/// call arrives with a sequence number that doesn't correspond to any in-flight +/// function call. +/// +/// This error is fatal because Orc RPC needs to know how to parse the rest of +/// the response call to know where the next call starts, and if it doesn't have +/// a result parser for this sequence number it can't do that. +template +class InvalidSequenceNumberForResponse + : public ErrorInfo, RPCFatalError> { +public: + static char ID; + + InvalidSequenceNumberForResponse(SeqNoT SeqNo) + : SeqNo(std::move(SeqNo)) {} + + std::error_code convertToErrorCode() const override { + return orcError(OrcErrorCode::UnexpectedRPCCall); + }; + + void log(raw_ostream &OS) const override { + OS << "Response has unknown sequence number " << SeqNo; + } +private: + SeqNoT SeqNo; +}; + +template +char InvalidSequenceNumberForResponse::ID = 0; + +/// This non-fatal error will be passed to asynchronous result handlers in place +/// of a result if the connection goes down before a result returns, or if the +/// function to be called cannot be negotiated with the remote. +class ResponseAbandoned : public ErrorInfo { +public: + static char ID; + + std::error_code convertToErrorCode() const override; + void log(raw_ostream &OS) const override; +}; + +/// This error is returned if the remote does not have a handler installed for +/// the given RPC function. +class CouldNotNegotiate : public ErrorInfo { +public: + static char ID; + + CouldNotNegotiate(std::string Signature); + std::error_code convertToErrorCode() const override; + void log(raw_ostream &OS) const override; + const std::string &getSignature() const { return Signature; } +private: + std::string Signature; +}; + template class Function; // RPC Function class. @@ -259,30 +362,122 @@ template <> class ResultTraits : public ResultTraits {}; template class ResultTraits> : public ResultTraits {}; +// Determines whether an RPC function's defined error return type supports +// error return value. +template +class SupportsErrorReturn { +public: + static const bool value = false; +}; + +template <> +class SupportsErrorReturn { +public: + static const bool value = true; +}; + +template +class SupportsErrorReturn> { +public: + static const bool value = true; +}; + +// RespondHelper packages return values based on whether or not the declared +// RPC function return type supports error returns. +template +class RespondHelper; + +// RespondHelper specialization for functions that support error returns. +template <> +class RespondHelper { +public: + + // Send Expected. + template + static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, + SequenceNumberT SeqNo, + Expected ResultOrErr) { + if (!ResultOrErr && ResultOrErr.template errorIsA()) + return ResultOrErr.takeError(); + + // Open the response message. + if (auto Err = C.startSendMessage(ResponseId, SeqNo)) + return Err; + + // Serialize the result. + if (auto Err = + SerializationTraits>::serialize( + C, std::move(ResultOrErr))) + return Err; + + // Close the response message. + return C.endSendMessage(); + } + + template + static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, + SequenceNumberT SeqNo, Error Err) { + if (Err && Err.isA()) + return Err; + if (auto Err2 = C.startSendMessage(ResponseId, SeqNo)) + return Err2; + if (auto Err2 = serializeSeq(C, std::move(Err))) + return Err2; + return C.endSendMessage(); + } + +}; + +// RespondHelper specialization for functions that do not support error returns. +template <> +class RespondHelper { +public: + + template + static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, + SequenceNumberT SeqNo, + Expected ResultOrErr) { + if (auto Err = ResultOrErr.takeError()) + return Err; + + // Open the response message. + if (auto Err = C.startSendMessage(ResponseId, SeqNo)) + return Err; + + // Serialize the result. + if (auto Err = + SerializationTraits::serialize( + C, *ResultOrErr)) + return Err; + + // Close the response message. + return C.endSendMessage(); + } + + template + static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, + SequenceNumberT SeqNo, Error Err) { + if (Err) + return Err; + if (auto Err2 = C.startSendMessage(ResponseId, SeqNo)) + return Err2; + return C.endSendMessage(); + } + +}; + + // Send a response of the given wire return type (WireRetT) over the // channel, with the given sequence number. template -static Error respond(ChannelT &C, const FunctionIdT &ResponseId, - SequenceNumberT SeqNo, Expected ResultOrErr) { - // If this was an error bail out. - // FIXME: Send an "error" message to the client if this is not a channel - // failure? - if (auto Err = ResultOrErr.takeError()) - return Err; - - // Open the response message. - if (auto Err = C.startSendMessage(ResponseId, SeqNo)) - return Err; - - // Serialize the result. - if (auto Err = - SerializationTraits::serialize( - C, *ResultOrErr)) - return Err; - - // Close the response message. - return C.endSendMessage(); +Error respond(ChannelT &C, const FunctionIdT &ResponseId, + SequenceNumberT SeqNo, Expected ResultOrErr) { + return RespondHelper::value>:: + template sendResult(C, ResponseId, SeqNo, std::move(ResultOrErr)); } // Send an empty response message on the given channel to indicate that @@ -291,11 +486,8 @@ template Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo, Error Err) { - if (Err) - return Err; - if (auto Err2 = C.startSendMessage(ResponseId, SeqNo)) - return Err2; - return C.endSendMessage(); + return RespondHelper::value>:: + sendResult(C, ResponseId, SeqNo, std::move(Err)); } // Converts a given type to the equivalent error return type. @@ -500,7 +692,7 @@ public: // Create an error instance representing an abandoned response. static Error createAbandonedResponseError() { - return errorCodeToError(orcError(OrcErrorCode::RPCResponseAbandoned)); + return make_error(); } }; @@ -567,6 +759,72 @@ private: HandlerT Handler; }; +template +class ResponseHandlerImpl, HandlerT> + : public ResponseHandler { +public: + ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {} + + // Handle the result by deserializing it from the channel then passing it + // to the user defined handler. + Error handleResponse(ChannelT &C) override { + using HandlerArgType = typename ResponseHandlerArg< + typename HandlerTraits::Type>::ArgType; + HandlerArgType Result((typename HandlerArgType::value_type())); + + if (auto Err = + SerializationTraits, + HandlerArgType>::deserialize(C, Result)) + return Err; + if (auto Err = C.endReceiveMessage()) + return Err; + return Handler(std::move(Result)); + } + + // Abandon this response by calling the handler with an 'abandoned response' + // error. + void abandon() override { + if (auto Err = Handler(this->createAbandonedResponseError())) { + // Handlers should not fail when passed an abandoned response error. + report_fatal_error(std::move(Err)); + } + } + +private: + HandlerT Handler; +}; + +template +class ResponseHandlerImpl + : public ResponseHandler { +public: + ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {} + + // Handle the result by deserializing it from the channel then passing it + // to the user defined handler. + Error handleResponse(ChannelT &C) override { + Error Result = Error::success(); + if (auto Err = + SerializationTraits::deserialize(C, Result)) + return Err; + if (auto Err = C.endReceiveMessage()) + return Err; + return Handler(std::move(Result)); + } + + // Abandon this response by calling the handler with an 'abandoned response' + // error. + void abandon() override { + if (auto Err = Handler(this->createAbandonedResponseError())) { + // Handlers should not fail when passed an abandoned response error. + report_fatal_error(std::move(Err)); + } + } + +private: + HandlerT Handler; +}; + // Create a ResponseHandler from a given user handler. template std::unique_ptr> createResponseHandler(HandlerT H) { @@ -814,12 +1072,9 @@ public: if (auto FnIdOrErr = getRemoteFunctionId(LazyAutoNegotiation, false)) FnId = *FnIdOrErr; else { - // This isn't a channel error so we don't want to abandon other pending - // responses, but we still need to run the user handler with an error to - // let them know the call failed. - if (auto Err = Handler(errorCodeToError( - orcError(OrcErrorCode::UnknownRPCFunction)))) - report_fatal_error(std::move(Err)); + // Negotiation failed. Notify the handler then return the negotiate-failed + // error. + cantFail(Handler(make_error())); return FnIdOrErr.takeError(); } @@ -885,7 +1140,8 @@ public: return I->second(C, SeqNo); // else: No handler found. Report error to client? - return errorCodeToError(orcError(OrcErrorCode::UnexpectedRPCCall)); + return make_error>(FnId, + SeqNo); } /// Helper for handling setter procedures - this method returns a functor that @@ -995,7 +1251,8 @@ protected: // Unlock the pending results map to prevent recursive lock. Lock.unlock(); abandonPendingResponses(); - return errorCodeToError(orcError(OrcErrorCode::UnexpectedRPCResponse)); + return make_error< + InvalidSequenceNumberForResponse>(SeqNo); } } @@ -1041,7 +1298,7 @@ protected: Impl.template callB(Func::getPrototype())) { RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr; if (*RemoteIdOrErr == getInvalidFunctionId()) - return make_error(Func::getPrototype()); + return make_error(Func::getPrototype()); return *RemoteIdOrErr; } else return RemoteIdOrErr.takeError(); @@ -1049,7 +1306,7 @@ protected: // No key was available in the map and we weren't allowed to try to // negotiate one, so return an unknown function error. - return make_error(Func::getPrototype()); + return make_error(Func::getPrototype()); } using WrappedHandlerFn = std::function; diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index babcc7f26aab5e4268b0b5cac4b5da8c58ddb080..aabb44eef99dc2b350459dffea186fa52a626cbe 100644 --- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -23,8 +23,8 @@ #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" -#include #include +#include #include #include #include @@ -120,6 +120,10 @@ private: buildInitialSymbolTable(PFC->Objects); } + ~ConcreteLinkedObjectSet() override { + MemMgr->deregisterEHFrames(); + } + void setHandle(ObjSetHandleT H) { PFC->Handle = H; } diff --git a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h index 39753edaefc5dd45e372b6533aa10f3c9d80c0fe..52a546f7c6eb9ce458a0f38c86d70d4e67a04df6 100644 --- a/include/llvm/ExecutionEngine/Orc/RawByteChannel.h +++ b/include/llvm/ExecutionEngine/Orc/RawByteChannel.h @@ -121,11 +121,19 @@ class SerializationTraits::value>::type> { public: static Error serialize(ChannelT &C, bool V) { - return C.appendBytes(reinterpret_cast(&V), 1); + uint8_t Tmp = V ? 1 : 0; + if (auto Err = + C.appendBytes(reinterpret_cast(&Tmp), 1)) + return Err; + return Error::success(); } static Error deserialize(ChannelT &C, bool &V) { - return C.readBytes(reinterpret_cast(&V), 1); + uint8_t Tmp = 0; + if (auto Err = C.readBytes(reinterpret_cast(&Tmp), 1)) + return Err; + V = Tmp != 0; + return Error::success(); } }; diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h index 5638717790bba215181c1ec7fa8749cc97b9d391..a9778514b9f1833de54993fbeade82130f3a38ce 100644 --- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h +++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h @@ -14,10 +14,10 @@ #ifndef LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H #define LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H +#include "llvm-c/ExecutionEngine.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Support/CBindingWrapping.h" -#include "llvm-c/ExecutionEngine.h" #include #include #include @@ -69,13 +69,8 @@ public: /// Deregister EH frames in the current proces. static void deregisterEHFramesInProcess(uint8_t *Addr, size_t Size); - void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override { - registerEHFramesInProcess(Addr, Size); - } - - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override { - deregisterEHFramesInProcess(Addr, Size); - } + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; + void deregisterEHFrames() override; /// This method returns the address of the specified function or variable in /// the current process. @@ -139,6 +134,13 @@ public: /// MCJIT or RuntimeDyld. Use getSymbolAddress instead. virtual void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true); + +private: + struct EHFrame { + uint8_t *Addr; + size_t Size; + }; + std::vector EHFrames; }; // Create wrappers for C Binding types (see CBindingWrapping.h). diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index 13a5f9922c517a7e43b1a8bb3d1d670b3aeee768..9470866dc0d6fe72028e08f57e449f7e4250d64e 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -150,8 +150,7 @@ public: /// be the case for local execution) these two values will be the same. virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) = 0; - virtual void deregisterEHFrames(uint8_t *addr, uint64_t LoadAddr, - size_t Size) = 0; + virtual void deregisterEHFrames() = 0; /// This method is called when object loading is complete and section page /// permissions can be applied. It is up to the memory manager implementation diff --git a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h index f5f52b5d2f9226e9aa96208d43bc44090f92b1c7..de89f405af4c4b15d7e29525c6445861ff7385f2 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyldChecker.h +++ b/include/llvm/ExecutionEngine/RuntimeDyldChecker.h @@ -10,6 +10,8 @@ #ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLDCHECKER_H #define LLVM_EXECUTIONENGINE_RUNTIMEDYLDCHECKER_H +#include "llvm/ADT/Optional.h" + #include #include #include @@ -97,6 +99,10 @@ public: StringRef SectionName, bool LocalAddress); + /// \brief If there is a section at the given local address, return its load + /// address, otherwise return none. + Optional getSectionLoadAddress(void *LocalAddress) const; + private: std::unique_ptr Impl; }; diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h index 6fc1dd2f285a190316d39f2ba29eae5b1a097186..3efcc637b6edab75536409f2a55524606993c4a8 100644 --- a/include/llvm/IR/Argument.h +++ b/include/llvm/IR/Argument.h @@ -27,8 +27,7 @@ namespace llvm { /// for a specific function. When used in the body of said function, the /// argument of course represents the value of the actual argument that the /// function was called with. -class Argument : public Value { - virtual void anchor(); +class Argument final : public Value { Function *Parent; unsigned ArgNo; @@ -108,18 +107,14 @@ public: bool hasSExtAttr() const; /// Add attributes to an argument. - void addAttr(AttributeList AS); + void addAttrs(AttrBuilder &B); - void addAttr(Attribute::AttrKind Kind) { - addAttr(AttributeList::get(getContext(), getArgNo() + 1, Kind)); - } + void addAttr(Attribute::AttrKind Kind); - /// Remove attributes from an argument. - void removeAttr(AttributeList AS); + void addAttr(Attribute Attr); - void removeAttr(Attribute::AttrKind Kind) { - removeAttr(AttributeList::get(getContext(), getArgNo() + 1, Kind)); - } + /// Remove attributes from an argument. + void removeAttr(Attribute::AttrKind Kind); /// Check if an argument has a given attribute. bool hasAttribute(Attribute::AttrKind Kind) const; diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 457682b9b2e72facddeaee1842d8c644823942e8..0e8adda82cbe7c0b0495f5bad5f1ef5f0e43c4ce 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -16,13 +16,13 @@ #ifndef LLVM_IR_ATTRIBUTES_H #define LLVM_IR_ATTRIBUTES_H +#include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/PointerLikeTypeTraits.h" -#include "llvm-c/Types.h" #include #include #include @@ -35,6 +35,7 @@ namespace llvm { class AttrBuilder; class AttributeImpl; class AttributeListImpl; +class AttributeList; class AttributeSetNode; template struct DenseMapInfo; class Function; @@ -213,7 +214,7 @@ class AttributeSet { template friend struct DenseMapInfo; private: - AttributeSet(AttributeSetNode *ASN) : SetNode(ASN) {} + explicit AttributeSet(AttributeSetNode *ASN) : SetNode(ASN) {} public: /// AttributeSet is a trivially copyable value type. @@ -227,14 +228,48 @@ public: bool operator==(const AttributeSet &O) { return SetNode == O.SetNode; } bool operator!=(const AttributeSet &O) { return !(*this == O); } + /// Add an argument attribute. Returns a new set because attribute sets are + /// immutable. + AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const; + + /// Add a target-dependent attribute. Returns a new set because attribute sets + /// are immutable. + AttributeSet addAttribute(LLVMContext &C, StringRef Kind, + StringRef Value = StringRef()) const; + + /// Add attributes to the attribute set. Returns a new set because attribute + /// sets are immutable. + AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const; + + /// Remove the specified attribute from this set. Returns a new set because + /// attribute sets are immutable. + AttributeSet removeAttribute(LLVMContext &C, Attribute::AttrKind Kind) const; + + /// Remove the specified attribute from this set. Returns a new set because + /// attribute sets are immutable. + AttributeSet removeAttribute(LLVMContext &C, StringRef Kind) const; + + /// Remove the specified attributes from this set. Returns a new set because + /// attribute sets are immutable. + AttributeSet removeAttributes(LLVMContext &C, + const AttrBuilder &AttrsToRemove) const; + + /// Return the number of attributes in this set. unsigned getNumAttributes() const; + /// Return true if attributes exists in this set. bool hasAttributes() const { return SetNode != nullptr; } + /// Return true if the attribute exists in this set. bool hasAttribute(Attribute::AttrKind Kind) const; + + /// Return true if the attribute exists in this set. bool hasAttribute(StringRef Kind) const; + /// Return the attribute object. Attribute getAttribute(Attribute::AttrKind Kind) const; + + /// Return the target-dependent attribute object. Attribute getAttribute(StringRef Kind) const; unsigned getAlignment() const; @@ -242,11 +277,15 @@ public: uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; std::pair> getAllocSizeArgs() const; - std::string getAsString(bool InAttrGrp) const; + std::string getAsString(bool InAttrGrp = false) const; + + using iterator = const Attribute *; - typedef const Attribute *iterator; iterator begin() const; iterator end() const; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const; +#endif }; //===----------------------------------------------------------------------===// @@ -280,12 +319,13 @@ template <> struct DenseMapInfo { /// the AttributeList object. The function attributes are at index /// `AttributeList::FunctionIndex', the return value is at index /// `AttributeList::ReturnIndex', and the attributes for the parameters start at -/// index `1'. +/// index `AttributeList::FirstArgIndex'. class AttributeList { public: enum AttrIndex : unsigned { ReturnIndex = 0U, - FunctionIndex = ~0U + FunctionIndex = ~0U, + FirstArgIndex = 1, }; private: @@ -304,24 +344,20 @@ public: /// \brief Create an AttributeList with the specified parameters in it. static AttributeList get(LLVMContext &C, ArrayRef> Attrs); - static AttributeList - get(LLVMContext &C, ArrayRef> Attrs); - - /// \brief Create an AttributeList from a vector of AttributeSetNodes. The - /// index of each set is implied by its position in the array \p Attrs: - /// 0 : Return attributes - /// 1 to n-1 : Argument attributes - /// n : Function attributes - /// Any element that has no entries should be left null. - static AttributeList get(LLVMContext &C, ArrayRef Attrs); + static AttributeList get(LLVMContext &C, + ArrayRef> Attrs); - static AttributeList - getImpl(LLVMContext &C, - ArrayRef> Attrs); + /// \brief Create an AttributeList from attribute sets for a function, its + /// return value, and all of its arguments. + static AttributeList get(LLVMContext &C, AttributeSet FnAttrs, + AttributeSet RetAttrs, + ArrayRef ArgAttrs); private: explicit AttributeList(AttributeListImpl *LI) : pImpl(LI) {} + static AttributeList getImpl(LLVMContext &C, ArrayRef AttrSets); + public: AttributeList() = default; @@ -338,78 +374,138 @@ public: static AttributeList get(LLVMContext &C, unsigned Index, const AttrBuilder &B); - /// \brief Add an attribute to the attribute set at the given index. Because - /// attribute sets are immutable, this returns a new set. + /// \brief Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const; - /// \brief Add an attribute to the attribute set at the given index. Because - /// attribute sets are immutable, this returns a new set. + /// \brief Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. AttributeList addAttribute(LLVMContext &C, unsigned Index, StringRef Kind, StringRef Value = StringRef()) const; - /// Add an attribute to the attribute set at the given indices. Because - /// attribute sets are immutable, this returns a new set. - AttributeList addAttribute(LLVMContext &C, ArrayRef Indices, - Attribute A) const; + /// Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. + AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute A) const; - /// \brief Add attributes to the attribute set at the given index. Because - /// attribute sets are immutable, this returns a new set. + /// \brief Add attributes to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. AttributeList addAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const; + const AttrBuilder &B) const; - AttributeList addAttributes(LLVMContext &C, unsigned Index, - AttributeSet AS) const; + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, + Attribute::AttrKind Kind) const { + return addAttribute(C, ArgNo + FirstArgIndex, Kind); + } - AttributeList addAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &B) const; + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, + StringRef Kind, + StringRef Value = StringRef()) const { + return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value); + } + + /// Add an attribute to the attribute list at the given arg indices. Returns a + /// new list because attribute lists are immutable. + AttributeList addParamAttribute(LLVMContext &C, ArrayRef ArgNos, + Attribute A) const; + + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + AttributeList addParamAttributes(LLVMContext &C, unsigned ArgNo, + const AttrBuilder &B) const { + return addAttributes(C, ArgNo + FirstArgIndex, B); + } /// \brief Remove the specified attribute at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const; /// \brief Remove the specified attribute at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const; /// \brief Remove the specified attributes at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. - AttributeList removeAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const; - - /// \brief Remove the specified attributes at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &Attrs) const; + const AttrBuilder &AttrsToRemove) const; /// \brief Remove all attributes at the specified index from this - /// attribute list. Because attribute lists are immutable, this returns the - /// new list. + /// attribute list. Returns a new list because attribute lists are immutable. AttributeList removeAttributes(LLVMContext &C, unsigned Index) const; - /// \brief Add the dereferenceable attribute to the attribute set at the given - /// index. Because attribute sets are immutable, this returns a new set. + /// \brief Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo, + Attribute::AttrKind Kind) const { + return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// \brief Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo, + StringRef Kind) const { + return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// \brief Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo, + const AttrBuilder &AttrsToRemove) const { + return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove); + } + + /// \brief Remove all attributes at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo) const { + return removeAttributes(C, ArgNo + FirstArgIndex); + } + + /// \Brief Add the dereferenceable attribute to the attribute set at the given + /// index. Returns a new list because attribute lists are immutable. AttributeList addDereferenceableAttr(LLVMContext &C, unsigned Index, uint64_t Bytes) const; + /// \Brief Add the dereferenceable attribute to the attribute set at the given + /// arg index. Returns a new list because attribute lists are immutable. + AttributeList addDereferenceableParamAttr(LLVMContext &C, unsigned ArgNo, + uint64_t Bytes) const { + return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes); + } + /// \brief Add the dereferenceable_or_null attribute to the attribute set at - /// the given index. Because attribute sets are immutable, this returns a new - /// set. + /// the given index. Returns a new list because attribute lists are immutable. AttributeList addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index, uint64_t Bytes) const; + /// \brief Add the dereferenceable_or_null attribute to the attribute set at + /// the given arg index. Returns a new list because attribute lists are + /// immutable. + AttributeList addDereferenceableOrNullParamAttr(LLVMContext &C, + unsigned ArgNo, + uint64_t Bytes) const { + return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes); + } + /// Add the allocsize attribute to the attribute set at the given index. - /// Because attribute sets are immutable, this returns a new set. + /// Returns a new list because attribute lists are immutable. AttributeList addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg, const Optional &NumElemsArg); + /// Add the allocsize attribute to the attribute set at the given arg index. + /// Returns a new list because attribute lists are immutable. + AttributeList addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, + unsigned ElemSizeArg, + const Optional &NumElemsArg) { + return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg); + } + //===--------------------------------------------------------------------===// // AttributeList Accessors //===--------------------------------------------------------------------===// @@ -420,8 +516,9 @@ public: /// \brief The attributes for the specified index are returned. AttributeSet getAttributes(unsigned Index) const; - /// \brief The attributes for the specified index are returned. - AttributeSet getParamAttributes(unsigned Index) const; + /// \brief The attributes for the argument or parameter at the given index are + /// returned. + AttributeSet getParamAttributes(unsigned ArgNo) const; /// \brief The attributes for the ret value are returned. AttributeSet getRetAttributes() const; @@ -438,6 +535,21 @@ public: /// \brief Return true if attribute exists at the given index. bool hasAttributes(unsigned Index) const; + /// \brief Return true if the attribute exists for the given argument + bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + return hasAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// \brief Return true if the attribute exists for the given argument + bool hasParamAttr(unsigned ArgNo, StringRef Kind) const { + return hasAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// \brief Return true if attributes exists for the given argument + bool hasParamAttrs(unsigned ArgNo) const { + return hasAttributes(ArgNo + FirstArgIndex); + } + /// \brief Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but /// may be faster. bool hasFnAttribute(Attribute::AttrKind Kind) const; @@ -446,6 +558,9 @@ public: /// may be faster. bool hasFnAttribute(StringRef Kind) const; + /// \brief Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind). + bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const; + /// \brief Return true if the specified attribute is set for at least one /// parameter or for the return value. If Index is not nullptr, the index /// of a parameter with the specified attribute is provided. @@ -458,8 +573,21 @@ public: /// \brief Return the attribute object that exists at the given index. Attribute getAttribute(unsigned Index, StringRef Kind) const; + /// \brief Return the attribute object that exists at the arg index. + Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + return getAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// \brief Return the attribute object that exists at the given index. + Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const { + return getAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// \brief Return the alignment of the return value. + unsigned getRetAlignment() const; + /// \brief Return the alignment for the specified function parameter. - unsigned getParamAlignment(unsigned Index) const; + unsigned getParamAlignment(unsigned ArgNo) const; /// \brief Get the stack alignment. unsigned getStackAlignment(unsigned Index) const; @@ -467,10 +595,22 @@ public: /// \brief Get the number of dereferenceable bytes (or zero if unknown). uint64_t getDereferenceableBytes(unsigned Index) const; + /// \brief Get the number of dereferenceable bytes (or zero if unknown) of an + /// arg. + uint64_t getParamDereferenceableBytes(unsigned ArgNo) const { + return getDereferenceableBytes(ArgNo + FirstArgIndex); + } + /// \brief Get the number of dereferenceable_or_null bytes (or zero if /// unknown). uint64_t getDereferenceableOrNullBytes(unsigned Index) const; + /// \brief Get the number of dereferenceable_or_null bytes (or zero if + /// unknown) of an arg. + uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const { + return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex); + } + /// Get the allocsize argument numbers (or pair(0, 0) if unknown). std::pair> getAllocSizeArgs(unsigned Index) const; @@ -478,39 +618,31 @@ public: /// \brief Return the attributes at the index as a string. std::string getAsString(unsigned Index, bool InAttrGrp = false) const; - typedef ArrayRef::iterator iterator; + //===--------------------------------------------------------------------===// + // AttributeList Introspection + //===--------------------------------------------------------------------===// + + typedef const AttributeSet *iterator; + iterator begin() const; + iterator end() const; - iterator begin(unsigned Slot) const; - iterator end(unsigned Slot) const; + unsigned getNumAttrSets() const; + + /// Use these to iterate over the valid attribute indices. + unsigned index_begin() const { return AttributeList::FunctionIndex; } + unsigned index_end() const { return getNumAttrSets() - 1; } /// operator==/!= - Provide equality predicates. bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; } bool operator!=(const AttributeList &RHS) const { return pImpl != RHS.pImpl; } - //===--------------------------------------------------------------------===// - // AttributeList Introspection - //===--------------------------------------------------------------------===// - /// \brief Return a raw pointer that uniquely identifies this attribute list. void *getRawPointer() const { return pImpl; } /// \brief Return true if there are no attributes. - bool isEmpty() const { - return getNumSlots() == 0; - } - - /// \brief Return the number of slots used in this attribute list. This is - /// the number of arguments that have an attribute set on them (including the - /// function itself). - unsigned getNumSlots() const; - - /// \brief Return the index for the given slot. - unsigned getSlotIndex(unsigned Slot) const; - - /// \brief Return the attributes at the given slot. - AttributeList getSlotAttributes(unsigned Slot) const; + bool isEmpty() const { return pImpl == nullptr; } void dump() const; }; @@ -661,11 +793,11 @@ public: bool empty() const { return Attrs.none(); } // Iterators for target-dependent attributes. - typedef std::pair td_type; - typedef std::map::iterator td_iterator; - typedef std::map::const_iterator td_const_iterator; - typedef iterator_range td_range; - typedef iterator_range td_const_range; + using td_type = std::pair; + using td_iterator = std::map::iterator; + using td_const_iterator = std::map::const_iterator; + using td_range = iterator_range; + using td_const_range = iterator_range; td_iterator td_begin() { return TargetDepAttrs.begin(); } td_iterator td_end() { return TargetDepAttrs.end(); } diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td index 7b63638a3f6acd015df70770b6474512bfe10805..75867a6e583357fd334e39ccb35fb89f3e5fec83 100644 --- a/include/llvm/IR/Attributes.td +++ b/include/llvm/IR/Attributes.td @@ -137,6 +137,9 @@ def SExt : EnumAttr<"signext">; /// +1 bias 0 means unaligned (different from alignstack=(1)). def StackAlignment : EnumAttr<"alignstack">; +/// Function can be speculated. +def Speculatable : EnumAttr<"speculatable">; + /// Stack protection. def StackProtect : EnumAttr<"ssp">; diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h index bd210e1abf31dd476c3b85d3a67a415424df9555..23f838b640e0b033f42fe80b91d5b93a47382ec7 100644 --- a/include/llvm/IR/BasicBlock.h +++ b/include/llvm/IR/BasicBlock.h @@ -14,14 +14,15 @@ #ifndef LLVM_IR_BASICBLOCK_H #define LLVM_IR_BASICBLOCK_H +#include "llvm-c/Types.h" +#include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/Twine.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Value.h" #include "llvm/Support/CBindingWrapping.h" -#include "llvm-c/Types.h" +#include "llvm/Support/Compiler.h" #include #include @@ -31,7 +32,10 @@ class CallInst; class Function; class LandingPadInst; class LLVMContext; +class Module; +class PHINode; class TerminatorInst; +class ValueSymbolTable; /// \brief LLVM Basic Block Representation /// @@ -48,10 +52,10 @@ class TerminatorInst; /// occur because it may be useful in the intermediate stage of constructing or /// modifying a program. However, the verifier will ensure that basic blocks /// are "well formed". -class BasicBlock : public Value, // Basic blocks are data objects also - public ilist_node_with_parent { +class BasicBlock final : public Value, // Basic blocks are data objects also + public ilist_node_with_parent { public: - typedef SymbolTableList InstListType; + using InstListType = SymbolTableList; private: friend class BlockAddress; @@ -74,16 +78,16 @@ private: public: BasicBlock(const BasicBlock &) = delete; BasicBlock &operator=(const BasicBlock &) = delete; - ~BasicBlock() override; + ~BasicBlock(); /// \brief Get the context in which this basic block lives. LLVMContext &getContext() const; /// Instruction iterators... - typedef InstListType::iterator iterator; - typedef InstListType::const_iterator const_iterator; - typedef InstListType::reverse_iterator reverse_iterator; - typedef InstListType::const_reverse_iterator const_reverse_iterator; + using iterator = InstListType::iterator; + using const_iterator = InstListType::const_iterator; + using reverse_iterator = InstListType::reverse_iterator; + using const_reverse_iterator = InstListType::const_reverse_iterator; /// \brief Creates a new BasicBlock. /// @@ -258,6 +262,50 @@ public: inline const Instruction &back() const { return InstList.back(); } inline Instruction &back() { return InstList.back(); } + /// Iterator to walk just the phi nodes in the basic block. + template + class phi_iterator_impl + : public iterator_facade_base, + std::forward_iterator_tag, PHINodeT> { + friend BasicBlock; + + PHINodeT *PN; + + phi_iterator_impl(PHINodeT *PN) : PN(PN) {} + + public: + // Allow default construction to build variables, but this doesn't build + // a useful iterator. + phi_iterator_impl() = default; + + // Allow conversion between instantiations where valid. + template + phi_iterator_impl(const phi_iterator_impl &Arg) + : PN(Arg.PN) {} + + bool operator==(const phi_iterator_impl &Arg) const { return PN == Arg.PN; } + + PHINodeT &operator*() const { return *PN; } + + using phi_iterator_impl::iterator_facade_base::operator++; + phi_iterator_impl &operator++() { + assert(PN && "Cannot increment the end iterator!"); + PN = dyn_cast(std::next(BBIteratorT(PN))); + return *this; + } + }; + typedef phi_iterator_impl<> phi_iterator; + typedef phi_iterator_impl + const_phi_iterator; + + /// Returns a range that iterates over the phis in the basic block. + /// + /// Note that this cannot be used with basic blocks that have no terminator. + iterator_range phis() const { + return const_cast(this)->phis(); + } + iterator_range phis(); + /// \brief Return the underlying instruction list container. /// /// Currently you need to access the underlying instruction list container diff --git a/include/llvm/IR/CFG.h b/include/llvm/IR/CFG.h index 52de11a06bafb13ddfb23029c5fca80f7a1231c7..e259e42e1ce4c6348b449df2d939734fe74c98a5 100644 --- a/include/llvm/IR/CFG.h +++ b/include/llvm/IR/CFG.h @@ -37,9 +37,9 @@ namespace llvm { template // Predecessor Iterator class PredIterator : public std::iterator { - typedef std::iterator super; - typedef PredIterator Self; + using super = + std::iterator; + using Self = PredIterator; USE_iterator It; inline void advancePastNonTerminators() { @@ -49,8 +49,8 @@ class PredIterator : public std::iteratoruser_begin()) { @@ -90,11 +90,11 @@ public: } }; -typedef PredIterator pred_iterator; -typedef PredIterator const_pred_iterator; -typedef iterator_range pred_range; -typedef iterator_range pred_const_range; +using pred_iterator = PredIterator; +using const_pred_iterator = + PredIterator; +using pred_range = iterator_range; +using pred_const_range = iterator_range; inline pred_iterator pred_begin(BasicBlock *BB) { return pred_iterator(BB); } inline const_pred_iterator pred_begin(const BasicBlock *BB) { @@ -118,12 +118,12 @@ inline pred_const_range predecessors(const BasicBlock *BB) { // BasicBlock succ_iterator helpers //===----------------------------------------------------------------------===// -typedef TerminatorInst::SuccIterator - succ_iterator; -typedef TerminatorInst::SuccIterator - succ_const_iterator; -typedef iterator_range succ_range; -typedef iterator_range succ_const_range; +using succ_iterator = + TerminatorInst::SuccIterator; +using succ_const_iterator = + TerminatorInst::SuccIterator; +using succ_range = iterator_range; +using succ_const_range = iterator_range; inline succ_iterator succ_begin(BasicBlock *BB) { return succ_iterator(BB->getTerminator()); @@ -160,8 +160,8 @@ struct isPodLike> { // graph of basic blocks... template <> struct GraphTraits { - typedef BasicBlock *NodeRef; - typedef succ_iterator ChildIteratorType; + using NodeRef = BasicBlock *; + using ChildIteratorType = succ_iterator; static NodeRef getEntryNode(BasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); } @@ -169,8 +169,8 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef const BasicBlock *NodeRef; - typedef succ_const_iterator ChildIteratorType; + using NodeRef = const BasicBlock *; + using ChildIteratorType = succ_const_iterator; static NodeRef getEntryNode(const BasicBlock *BB) { return BB; } @@ -184,16 +184,18 @@ template <> struct GraphTraits { // instead of the successor edges. // template <> struct GraphTraits> { - typedef BasicBlock *NodeRef; - typedef pred_iterator ChildIteratorType; + using NodeRef = BasicBlock *; + using ChildIteratorType = pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } }; template <> struct GraphTraits> { - typedef const BasicBlock *NodeRef; - typedef const_pred_iterator ChildIteratorType; + using NodeRef = const BasicBlock *; + using ChildIteratorType = const_pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } @@ -211,7 +213,7 @@ template <> struct GraphTraits : public GraphTraits { static NodeRef getEntryNode(Function *F) { return &F->getEntryBlock(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; static nodes_iterator nodes_begin(Function *F) { return nodes_iterator(F->begin()); @@ -228,7 +230,7 @@ template <> struct GraphTraits : static NodeRef getEntryNode(const Function *F) { return &F->getEntryBlock(); } // nodes_iterator/begin/end - Allow iteration over all nodes in the graph - typedef pointer_iterator nodes_iterator; + using nodes_iterator = pointer_iterator; static nodes_iterator nodes_begin(const Function *F) { return nodes_iterator(F->begin()); diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index 6a465709cc51d62877c6b33a60658bae2685419f..96fbebf42c38b043780443aa579d6b242593615c 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -26,9 +26,9 @@ #ifndef LLVM_IR_CALLSITE_H #define LLVM_IR_CALLSITE_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" @@ -36,10 +36,10 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/Support/Casting.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include #include #include @@ -207,7 +207,7 @@ public: /// The type of iterator to use when looping over actual arguments at this /// call site. - typedef IterTy arg_iterator; + using arg_iterator = IterTy; iterator_range args() const { return make_range(arg_begin(), arg_end()); @@ -231,7 +231,7 @@ public: /// Type of iterator to use when looping over data operands at this call site /// (see below). - typedef IterTy data_operand_iterator; + using data_operand_iterator = IterTy; /// data_operands_begin/data_operands_end - Return iterators iterating over /// the call / invoke argument list and bundle operands. For invokes, this is @@ -339,6 +339,10 @@ public: CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr)); } + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + CALLSITE_DELEGATE_SETTER(addParamAttr(ArgNo, Kind)); + } + void removeAttribute(unsigned i, Attribute::AttrKind Kind) { CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); } @@ -347,6 +351,10 @@ public: CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind)); } + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + CALLSITE_DELEGATE_SETTER(removeParamAttr(ArgNo, Kind)); + } + /// Return true if this function has the given attribute. bool hasFnAttr(Attribute::AttrKind Kind) const { CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); @@ -357,9 +365,14 @@ public: CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind)); } + /// Return true if this return value has the given attribute. + bool hasRetAttr(Attribute::AttrKind Kind) const { + CALLSITE_DELEGATE_GETTER(hasRetAttr(Kind)); + } + /// Return true if the call or the callee has the given attribute. - bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const { - CALLSITE_DELEGATE_GETTER(paramHasAttr(i, Kind)); + bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + CALLSITE_DELEGATE_GETTER(paramHasAttr(ArgNo, Kind)); } Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { @@ -381,28 +394,31 @@ public: CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, Kind)); } + /// Extract the alignment of the return value. + unsigned getRetAlignment() const { + CALLSITE_DELEGATE_GETTER(getRetAlignment()); + } + /// Extract the alignment for a call or parameter (0=unknown). - uint16_t getParamAlignment(uint16_t i) const { - CALLSITE_DELEGATE_GETTER(getParamAlignment(i)); + unsigned getParamAlignment(unsigned ArgNo) const { + CALLSITE_DELEGATE_GETTER(getParamAlignment(ArgNo)); } /// Extract the number of dereferenceable bytes for a call or parameter /// (0=unknown). - uint64_t getDereferenceableBytes(uint16_t i) const { + uint64_t getDereferenceableBytes(unsigned i) const { CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i)); } /// Extract the number of dereferenceable_or_null bytes for a call or /// parameter (0=unknown). - uint64_t getDereferenceableOrNullBytes(uint16_t i) const { + uint64_t getDereferenceableOrNullBytes(unsigned i) const { CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i)); } - /// Determine if the parameter or return value is marked with NoAlias - /// attribute. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - CALLSITE_DELEGATE_GETTER(doesNotAlias(n)); + /// Determine if the return value is marked with NoAlias attribute. + bool returnDoesNotAlias() const { + CALLSITE_DELEGATE_GETTER(returnDoesNotAlias()); } /// Return true if the call should not be treated as a call to a builtin. @@ -554,24 +570,24 @@ public: /// Determine whether this argument is passed by value. bool isByValArgument(unsigned ArgNo) const { - return paramHasAttr(ArgNo + 1, Attribute::ByVal); + return paramHasAttr(ArgNo, Attribute::ByVal); } /// Determine whether this argument is passed in an alloca. bool isInAllocaArgument(unsigned ArgNo) const { - return paramHasAttr(ArgNo + 1, Attribute::InAlloca); + return paramHasAttr(ArgNo, Attribute::InAlloca); } /// Determine whether this argument is passed by value or in an alloca. bool isByValOrInAllocaArgument(unsigned ArgNo) const { - return paramHasAttr(ArgNo + 1, Attribute::ByVal) || - paramHasAttr(ArgNo + 1, Attribute::InAlloca); + return paramHasAttr(ArgNo, Attribute::ByVal) || + paramHasAttr(ArgNo, Attribute::InAlloca); } /// Determine if there are is an inalloca argument. Only the last argument can /// have the inalloca attribute. bool hasInAllocaArgument() const { - return paramHasAttr(arg_size(), Attribute::InAlloca); + return !arg_empty() && paramHasAttr(arg_size() - 1, Attribute::InAlloca); } bool doesNotAccessMemory(unsigned OpNo) const { @@ -592,9 +608,9 @@ public: /// This may be because it has the nonnull attribute, or because at least /// one byte is dereferenceable and the pointer is in addrspace(0). bool isReturnNonNull() const { - if (paramHasAttr(0, Attribute::NonNull)) + if (hasRetAttr(Attribute::NonNull)) return true; - else if (getDereferenceableBytes(0) > 0 && + else if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 && getType()->getPointerAddressSpace() == 0) return true; diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 9cfbda1f68575fe43f4a22e558beb45a14e77bf5..801e88aba4d1b41864cfcc7240ece7ac6247735d 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -1,4 +1,4 @@ -//===-- llvm/CallingConv.h - LLVM Calling Conventions -----------*- C++ -*-===// +//===- llvm/CallingConv.h - LLVM Calling Conventions ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,8 +20,9 @@ namespace llvm { /// the well-known calling conventions. /// namespace CallingConv { + /// LLVM IR allows to use arbitrary numbers as calling convention identifiers. - typedef unsigned ID; + using ID = unsigned; /// A set of enums which specify the assigned numeric values for known llvm /// calling conventions. @@ -196,11 +197,20 @@ namespace CallingConv { /// Register calling convention used for parameters transfer optimization X86_RegCall = 92, + /// Calling convention used for Mesa hull shaders. (= tessellation control + /// shaders) + AMDGPU_HS = 93, + + /// Calling convention used for special MSP430 rtlib functions + /// which have an "optimized" convention using additional registers. + MSP430_BUILTIN = 94, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; -} // End CallingConv namespace -} // End llvm namespace +} // end namespace CallingConv + +} // end namespace llvm -#endif +#endif // LLVM_IR_CALLINGCONV_H diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h index 3b3694e7e60d0c2ba96f5b203c77abf5de1d88a7..82afd9a2691f5a0a197c7ecb174a70dd70d48d02 100644 --- a/include/llvm/IR/Constant.h +++ b/include/llvm/IR/Constant.h @@ -40,8 +40,6 @@ class APInt; /// don't have to worry about the lifetime of the objects. /// @brief LLVM Constant Representation class Constant : public User { - void anchor() override; - protected: Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps) : User(ty, vty, Ops, NumOps) {} diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h index 17c39a6ef9b564479b26983fdd0d4de623c201be..6a50a8801f86f5cf0158d76b440950835eb8016f 100644 --- a/include/llvm/IR/ConstantRange.h +++ b/include/llvm/IR/ConstantRange.h @@ -41,26 +41,20 @@ namespace llvm { class MDNode; /// This class represents a range of values. -/// -class ConstantRange { +class LLVM_NODISCARD ConstantRange { APInt Lower, Upper; - // If we have move semantics, pass APInts by value and move them into place. - typedef APInt APIntMoveTy; - public: /// Initialize a full (the default) or empty set for the specified bit width. - /// explicit ConstantRange(uint32_t BitWidth, bool isFullSet = true); /// Initialize a range to hold the single specified value. - /// - ConstantRange(APIntMoveTy Value); + ConstantRange(APInt Value); /// @brief Initialize a range of values explicitly. This will assert out if /// Lower==Upper and Lower != Min or Max value for its type. It will also /// assert out if the two APInt's are not the same bit width. - ConstantRange(APIntMoveTy Lower, APIntMoveTy Upper); + ConstantRange(APInt Lower, APInt Upper); /// Produce the smallest range such that all values that may satisfy the given /// predicate with any value contained within Other is contained in the @@ -99,7 +93,7 @@ public: /// /// NB! The returned set does *not* contain **all** possible values of X for /// which "X BinOpC Y" does not wrap -- some viable values of X may be - /// missing, so you cannot use this to contrain X's range. E.g. in the last + /// missing, so you cannot use this to constrain X's range. E.g. in the last /// example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), but (-2) /// is not in the set returned. /// @@ -122,46 +116,36 @@ public: bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const; /// Return the lower value for this range. - /// const APInt &getLower() const { return Lower; } /// Return the upper value for this range. - /// const APInt &getUpper() const { return Upper; } /// Get the bit width of this ConstantRange. - /// uint32_t getBitWidth() const { return Lower.getBitWidth(); } /// Return true if this set contains all of the elements possible /// for this data-type. - /// bool isFullSet() const; /// Return true if this set contains no members. - /// bool isEmptySet() const; /// Return true if this set wraps around the top of the range. /// For example: [100, 8). - /// bool isWrappedSet() const; /// Return true if this set wraps around the INT_MIN of /// its bitwidth. For example: i8 [120, 140). - /// bool isSignWrappedSet() const; /// Return true if the specified value is in the set. - /// bool contains(const APInt &Val) const; /// Return true if the other range is a subset of this one. - /// bool contains(const ConstantRange &CR) const; /// If this set contains a single element, return it, otherwise return null. - /// const APInt *getSingleElement() const { if (Upper == Lower + 1) return &Lower; @@ -177,35 +161,30 @@ public: } /// Return true if this set contains exactly one member. - /// bool isSingleElement() const { return getSingleElement() != nullptr; } /// Return the number of elements in this set. - /// APInt getSetSize() const; /// Compare set size of this range with the range CR. - /// - bool isSizeStrictlySmallerThanOf(const ConstantRange &CR) const; + bool isSizeStrictlySmallerThan(const ConstantRange &CR) const; + + // Compare set size of this range with Value. + bool isSizeLargerThan(uint64_t MaxSize) const; /// Return the largest unsigned value contained in the ConstantRange. - /// APInt getUnsignedMax() const; /// Return the smallest unsigned value contained in the ConstantRange. - /// APInt getUnsignedMin() const; /// Return the largest signed value contained in the ConstantRange. - /// APInt getSignedMax() const; /// Return the smallest signed value contained in the ConstantRange. - /// APInt getSignedMin() const; /// Return true if this range is equal to another range. - /// bool operator==(const ConstantRange &CR) const { return Lower == CR.Lower && Upper == CR.Upper; } @@ -216,8 +195,8 @@ public: /// Subtract the specified constant from the endpoints of this constant range. ConstantRange subtract(const APInt &CI) const; - /// \brief Subtract the specified range from this range (aka relative - /// complement of the sets). + /// Subtract the specified range from this range (aka relative complement of + /// the sets). ConstantRange difference(const ConstantRange &CR) const; /// Return the range that results from the intersection of @@ -226,7 +205,6 @@ public: /// smallest possible set size that does so. Because there may be two /// intersections with the same set size, A.intersectWith(B) might not /// be equal to B.intersectWith(A). - /// ConstantRange intersectWith(const ConstantRange &CR) const; /// Return the range that results from the union of this range @@ -234,7 +212,6 @@ public: /// elements of both sets, but may contain more. For example, [3, 9) union /// [12,15) is [3, 15), which includes 9, 10, and 11, which were not included /// in either set before. - /// ConstantRange unionWith(const ConstantRange &CR) const; /// Return a new range representing the possible values resulting @@ -334,15 +311,12 @@ public: ConstantRange lshr(const ConstantRange &Other) const; /// Return a new range that is the logical not of the current set. - /// ConstantRange inverse() const; /// Print out the bounds to a stream. - /// void print(raw_ostream &OS) const; /// Allow printing from a debugger easily. - /// void dump() const; }; diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h index ad83b21c7bf3fb00961642e2645096e5b394859e..003a6d5d075dd3d1ddf1fb8fc0b4ce184bf3db02 100644 --- a/include/llvm/IR/Constants.h +++ b/include/llvm/IR/Constants.h @@ -26,6 +26,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" @@ -57,8 +58,6 @@ template struct ConstantAggrKeyType; class ConstantData : public Constant { friend class Constant; - void anchor() override; - Value *handleOperandChangeImpl(Value *From, Value *To) { llvm_unreachable("Constant data does not have operands!"); } @@ -69,11 +68,8 @@ protected: void *operator new(size_t s) { return User::operator new(s, 0); } public: - ConstantData() = delete; ConstantData(const ConstantData &) = delete; - void *operator new(size_t, unsigned) = delete; - /// Methods to support type inquiry through isa, cast, and dyn_cast. static bool classof(const Value *V) { return V->getValueID() >= ConstantDataFirstVal && @@ -92,7 +88,6 @@ class ConstantInt final : public ConstantData { ConstantInt(IntegerType *Ty, const APInt& V); - void anchor() override; void destroyConstantImpl(); public: @@ -196,7 +191,7 @@ public: /// common code. It also correctly performs the comparison without the /// potential for an assertion from getZExtValue(). bool isZero() const { - return Val == 0; + return Val.isNullValue(); } /// This is just a convenience method to make client code smaller for a @@ -204,7 +199,7 @@ public: /// potential for an assertion from getZExtValue(). /// @brief Determine if the value is one. bool isOne() const { - return Val == 1; + return Val.isOneValue(); } /// This function will return true iff every bit in this constant is set @@ -245,7 +240,7 @@ public: /// @returns true iff this constant is greater or equal to the given number. /// @brief Determine if the value is greater or equal to the given number. bool uge(uint64_t Num) const { - return Val.getActiveBits() > 64 || Val.getZExtValue() >= Num; + return Val.uge(Num); } /// getLimitedValue - If the value is smaller than the specified limit, @@ -273,7 +268,6 @@ class ConstantFP final : public ConstantData { ConstantFP(Type *Ty, const APFloat& V); - void anchor() override; void destroyConstantImpl(); public: @@ -452,7 +446,14 @@ class ConstantStruct final : public ConstantAggregate { public: // ConstantStruct accessors static Constant *get(StructType *T, ArrayRef V); - static Constant *get(StructType *T, ...) LLVM_END_WITH_NULL; + + template + static typename std::enable_if::value, + Constant *>::type + get(StructType *T, Csts *... Vs) { + SmallVector Values({Vs...}); + return get(T, Values); + } /// Return an anonymous struct that has the specified elements. /// If the struct is possibly empty, then you must specify a context. @@ -580,7 +581,7 @@ class ConstantDataSequential : public ConstantData { protected: explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data) : ConstantData(ty, VT), DataElements(Data), Next(nullptr) {} - ~ConstantDataSequential() override { delete Next; } + ~ConstantDataSequential() { delete Next; } static Constant *getImpl(StringRef Bytes, Type *Ty); @@ -630,8 +631,8 @@ public: /// The size of the elements is known to be a multiple of one byte. uint64_t getElementByteSize() const; - /// This method returns true if this is an array of i8. - bool isString() const; + /// This method returns true if this is an array of \p CharSize integers. + bool isString(unsigned CharSize = 8) const; /// This method returns true if the array "isString", ends with a null byte, /// and does not contains any other null bytes. @@ -684,13 +685,9 @@ class ConstantDataArray final : public ConstantDataSequential { return User::operator new(s, 0); } - void anchor() override; - public: ConstantDataArray(const ConstantDataArray &) = delete; - void *operator new(size_t, unsigned) = delete; - /// get() constructors - Return a constant with array type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. @@ -747,13 +744,9 @@ class ConstantDataVector final : public ConstantDataSequential { return User::operator new(s, 0); } - void anchor() override; - public: ConstantDataVector(const ConstantDataVector &) = delete; - void *operator new(size_t, unsigned) = delete; - /// get() constructors - Return a constant with vector type with an element /// count and element type matching the ArrayRef passed in. Note that this /// can return a ConstantAggregateZero object. @@ -830,8 +823,6 @@ class BlockAddress final : public Constant { Value *handleOperandChangeImpl(Value *From, Value *To); public: - void *operator new(size_t, unsigned) = delete; - /// Return a BlockAddress for the specified function and basic block. static BlockAddress *get(Function *F, BasicBlock *BB); diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h index 69bd5c847a8d06e051ac840fc03201bfdd9b0d42..8e6bb4baccafb7b02c34efbab54a066637ab7e47 100644 --- a/include/llvm/IR/DIBuilder.h +++ b/include/llvm/IR/DIBuilder.h @@ -86,6 +86,10 @@ namespace llvm { /// Construct any deferred debug info descriptors. void finalize(); + /// Finalize a specific subprogram - no new variables may be added to this + /// subprogram afterwards. + void finalizeSubprogram(DISubprogram *SP); + /// A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. /// \param Lang Source programming language, eg. dwarf::DW_LANG_C99 @@ -577,15 +581,14 @@ namespace llvm { /// These flags are used to emit dwarf attributes. /// \param isOptimized True if optimization is ON. /// \param TParams Function template parameters. - DISubprogram *createFunction(DIScope *Scope, StringRef Name, - StringRef LinkageName, DIFile *File, - unsigned LineNo, DISubroutineType *Ty, - bool isLocalToUnit, bool isDefinition, - unsigned ScopeLine, - DINode::DIFlags Flags = DINode::FlagZero, - bool isOptimized = false, - DITemplateParameterArray TParams = nullptr, - DISubprogram *Decl = nullptr); + /// \param ThrownTypes Exception types this function may throw. + DISubprogram *createFunction( + DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, + unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, + bool isDefinition, unsigned ScopeLine, + DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false, + DITemplateParameterArray TParams = nullptr, + DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr); /// Identical to createFunction, /// except that the resulting DbgNode is meant to be RAUWed. @@ -595,7 +598,7 @@ namespace llvm { bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false, DITemplateParameterArray TParams = nullptr, - DISubprogram *Decl = nullptr); + DISubprogram *Decl = nullptr, DITypeArray ThrownTypes = nullptr); /// Create a new descriptor for the specified C++ method. /// See comments in \a DISubprogram* for descriptions of these fields. @@ -619,23 +622,23 @@ namespace llvm { /// This flags are used to emit dwarf attributes. /// \param isOptimized True if optimization is ON. /// \param TParams Function template parameters. + /// \param ThrownTypes Exception types this function may throw. DISubprogram *createMethod( DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned Virtuality = 0, unsigned VTableIndex = 0, int ThisAdjustment = 0, DIType *VTableHolder = nullptr, DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false, - DITemplateParameterArray TParams = nullptr); + DITemplateParameterArray TParams = nullptr, + DITypeArray ThrownTypes = nullptr); /// This creates new descriptor for a namespace with the specified /// parent scope. /// \param Scope Namespace scope /// \param Name Name of this namespace - /// \param File Source file - /// \param LineNo Line number /// \param ExportSymbols True for C++ inline namespaces. - DINamespace *createNameSpace(DIScope *Scope, StringRef Name, DIFile *File, - unsigned LineNo, bool ExportSymbols); + DINamespace *createNameSpace(DIScope *Scope, StringRef Name, + bool ExportSymbols); /// This creates new descriptor for a module with the specified /// parent scope. @@ -778,6 +781,9 @@ namespace llvm { } }; + // Create wrappers for C Binding types (see CBindingWrapping.h). + DEFINE_ISA_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef) + } // end namespace llvm #endif // LLVM_IR_DIBUILDER_H diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index 1930d48577d4fc9ece1bfabc1d2dda0e7264b78b..daf8f8da689d678ac696f515efb449b43a173dbe 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -1,4 +1,4 @@ -//===--------- llvm/DataLayout.h - Data size & alignment info ---*- C++ -*-===// +//===- llvm/DataLayout.h - Data size & alignment info -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,27 +20,32 @@ #ifndef LLVM_IR_DATALAYOUT_H #define LLVM_IR_DATALAYOUT_H -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include // This needs to be outside of the namespace, to avoid conflict with llvm-c // decl. -typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef; +using LLVMTargetDataRef = struct LLVMOpaqueTargetData *; namespace llvm { -class Value; -class StructType; -class StructLayout; -class Triple; class GlobalVariable; class LLVMContext; -template -class ArrayRef; +class Module; +class StructLayout; +class Triple; +class Value; /// Enum used to categorize the alignment types stored by LayoutAlignElem enum AlignTypeEnum { @@ -72,6 +77,7 @@ struct LayoutAlignElem { static LayoutAlignElem get(AlignTypeEnum align_type, unsigned abi_align, unsigned pref_align, uint32_t bit_width); + bool operator==(const LayoutAlignElem &rhs) const; }; @@ -90,6 +96,7 @@ struct PointerAlignElem { /// Initializer static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign, unsigned PrefAlign, uint32_t TypeByteWidth); + bool operator==(const PointerAlignElem &rhs) const; }; @@ -121,7 +128,7 @@ private: /// \brief Primitive type alignment data. This is sorted by type and bit /// width during construction. - typedef SmallVector AlignmentsTy; + using AlignmentsTy = SmallVector; AlignmentsTy Alignments; AlignmentsTy::const_iterator @@ -136,7 +143,7 @@ private: /// \brief The string representation used to create this DataLayout std::string StringRepresentation; - typedef SmallVector PointersTy; + using PointersTy = SmallVector; PointersTy Pointers; PointersTy::const_iterator @@ -147,7 +154,7 @@ private: PointersTy::iterator findPointerLowerBound(uint32_t AddressSpace); // The StructType -> StructLayout map. - mutable void *LayoutMap; + mutable void *LayoutMap = nullptr; /// Pointers in these address spaces are non-integral, and don't have a /// well-defined bitwise representation. @@ -172,16 +179,16 @@ private: public: /// Constructs a DataLayout from a specification string. See reset(). - explicit DataLayout(StringRef LayoutDescription) : LayoutMap(nullptr) { + explicit DataLayout(StringRef LayoutDescription) { reset(LayoutDescription); } /// Initialize target data from properties stored in the module. explicit DataLayout(const Module *M); - void init(const Module *M); + DataLayout(const DataLayout &DL) { *this = DL; } - DataLayout(const DataLayout &DL) : LayoutMap(nullptr) { *this = DL; } + ~DataLayout(); // Not virtual, do not subclass this class DataLayout &operator=(const DataLayout &DL) { clear(); @@ -200,7 +207,7 @@ public: bool operator==(const DataLayout &Other) const; bool operator!=(const DataLayout &Other) const { return !(*this == Other); } - ~DataLayout(); // Not virtual, do not subclass this class + void init(const Module *M); /// Parse a data layout string (with fallback to default values). void reset(StringRef LayoutDescription); @@ -489,6 +496,7 @@ class StructLayout { unsigned IsPadded : 1; unsigned NumElements : 31; uint64_t MemberOffsets[1]; // variable sized array! + public: uint64_t getSizeInBytes() const { return StructSize; } @@ -515,6 +523,7 @@ public: private: friend class DataLayout; // Only DataLayout can create this class + StructLayout(StructType *ST, const DataLayout &DL); }; @@ -560,6 +569,6 @@ inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { } } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_IR_DATALAYOUT_H diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h index 04f46197b1c330c607fa3f855c7353b425d8c8fe..1d8e7e2855fd1b7a4302c10e454e41967714cb4c 100644 --- a/include/llvm/IR/DebugInfo.h +++ b/include/llvm/IR/DebugInfo.h @@ -21,17 +21,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" -#include namespace llvm { -class Module; + class DbgDeclareInst; class DbgValueInst; -template -class DenseMap; +class Module; /// \brief Find subprogram that is enclosing this scope. DISubprogram *getDISubprogram(const MDNode *Scope); @@ -95,13 +90,13 @@ private: bool addScope(DIScope *Scope); public: - typedef SmallVectorImpl::const_iterator - compile_unit_iterator; - typedef SmallVectorImpl::const_iterator subprogram_iterator; - typedef SmallVectorImpl::const_iterator - global_variable_expression_iterator; - typedef SmallVectorImpl::const_iterator type_iterator; - typedef SmallVectorImpl::const_iterator scope_iterator; + using compile_unit_iterator = + SmallVectorImpl::const_iterator; + using subprogram_iterator = SmallVectorImpl::const_iterator; + using global_variable_expression_iterator = + SmallVectorImpl::const_iterator; + using type_iterator = SmallVectorImpl::const_iterator; + using scope_iterator = SmallVectorImpl::const_iterator; iterator_range compile_units() const { return make_range(CUs.begin(), CUs.end()); @@ -140,4 +135,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_IR_DEBUGINFO_H diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 8a924b40143aa732d65c6b33a05b5bcd82bd71ee..9374fe4fae7667c0b31de04bc2ad65e7e5468a72 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -17,11 +17,14 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include #include #include @@ -56,8 +59,6 @@ namespace llvm { -template class Optional; - /// Holds a subclass of DINode. /// /// FIXME: This class doesn't currently make much sense. Previously it was a @@ -92,9 +93,9 @@ public: bool operator!=(const TypedDINodeRef &X) const { return MD != X.MD; } }; -typedef TypedDINodeRef DINodeRef; -typedef TypedDINodeRef DIScopeRef; -typedef TypedDINodeRef DITypeRef; +using DINodeRef = TypedDINodeRef; +using DIScopeRef = TypedDINodeRef; +using DITypeRef = TypedDINodeRef; class DITypeRefArray { const MDTuple *N = nullptr; @@ -147,7 +148,7 @@ public: /// Tagged DWARF-like metadata node. /// /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*, -/// defined in llvm/Support/Dwarf.h). Called \a DINode because it's +/// defined in llvm/BinaryFormat/Dwarf.h). Called \a DINode because it's /// potentially used for non-DWARF output. class DINode : public MDNode { friend class LLVMContextImpl; @@ -238,7 +239,8 @@ public: }; template struct simplify_type> { - typedef Metadata *SimpleType; + using SimpleType = Metadata *; + static SimpleType getSimplifiedValue(const TypedDINodeRef &MD) { return MD; } @@ -433,7 +435,7 @@ public: /// Return the raw underlying file. /// - /// An \a DIFile is an \a DIScope, but it doesn't point at a separate file + /// A \a DIFile is a \a DIScope, but it doesn't point at a separate file /// (it\em is the file). If \c this is an \a DIFile, we need to return \c /// this. Otherwise, return the first operand, which is where all other /// subclasses store their file pointer. @@ -797,15 +799,18 @@ public: assert(getTag() == dwarf::DW_TAG_ptr_to_member_type); return DITypeRef(getExtraData()); } + DIObjCProperty *getObjCProperty() const { return dyn_cast_or_null(getExtraData()); } + Constant *getStorageOffsetInBits() const { assert(getTag() == dwarf::DW_TAG_member && isBitField()); if (auto *C = cast_or_null(getExtraData())) return C->getValue(); return nullptr; } + Constant *getConstant() const { assert(getTag() == dwarf::DW_TAG_member && isStaticMember()); if (auto *C = cast_or_null(getExtraData())) @@ -968,9 +973,11 @@ public: #endif replaceOperandWith(4, Elements.get()); } + void replaceVTableHolder(DITypeRef VTableHolder) { replaceOperandWith(5, VTableHolder); } + void replaceTemplateParams(DITemplateParameterArray TemplateParams) { replaceOperandWith(6, TemplateParams.get()); } @@ -1029,6 +1036,7 @@ public: DITypeRefArray getTypeArray() const { return cast_or_null(getRawTypeArray()); } + Metadata *getRawTypeArray() const { return getOperand(3); } static bool classof(const Metadata *MD) { @@ -1317,6 +1325,7 @@ public: unsigned getLine() const { return SubclassData32; } unsigned getColumn() const { return SubclassData16; } DILocalScope *getScope() const { return cast(getRawScope()); } + DILocation *getInlinedAt() const { return cast_or_null(getRawInlinedAt()); } @@ -1450,7 +1459,6 @@ public: static bool classof(const Metadata *MD) { return MD->getMetadataID() == DILocationKind; } - }; /// Subprogram description. @@ -1509,14 +1517,14 @@ class DISubprogram : public DILocalScope { unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags, bool IsOptimized, DICompileUnit *Unit, DITemplateParameterArray TemplateParams, DISubprogram *Declaration, - DILocalVariableArray Variables, StorageType Storage, - bool ShouldCreate = true) { + DILocalVariableArray Variables, DITypeArray ThrownTypes, + StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, Scope, getCanonicalMDString(Context, Name), getCanonicalMDString(Context, LinkageName), File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit, TemplateParams.get(), Declaration, Variables.get(), - Storage, ShouldCreate); + ThrownTypes.get(), Storage, ShouldCreate); } static DISubprogram * getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, @@ -1525,15 +1533,16 @@ class DISubprogram : public DILocalScope { Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit, Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables, - StorageType Storage, bool ShouldCreate = true); + Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate = true); TempDISubprogram cloneImpl() const { - return getTemporary( - getContext(), getScope(), getName(), getLinkageName(), getFile(), - getLine(), getType(), isLocalToUnit(), isDefinition(), getScopeLine(), - getContainingType(), getVirtuality(), getVirtualIndex(), - getThisAdjustment(), getFlags(), isOptimized(), getUnit(), - getTemplateParams(), getDeclaration(), getVariables()); + return getTemporary(getContext(), getScope(), getName(), getLinkageName(), + getFile(), getLine(), getType(), isLocalToUnit(), + isDefinition(), getScopeLine(), getContainingType(), + getVirtuality(), getVirtualIndex(), getThisAdjustment(), + getFlags(), isOptimized(), getUnit(), + getTemplateParams(), getDeclaration(), getVariables(), + getThrownTypes()); } public: @@ -1546,11 +1555,12 @@ public: bool IsOptimized, DICompileUnit *Unit, DITemplateParameterArray TemplateParams = nullptr, DISubprogram *Declaration = nullptr, - DILocalVariableArray Variables = nullptr), + DILocalVariableArray Variables = nullptr, + DITypeArray ThrownTypes = nullptr), (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit, - TemplateParams, Declaration, Variables)) + TemplateParams, Declaration, Variables, ThrownTypes)) DEFINE_MDNODE_GET( DISubprogram, (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File, @@ -1558,10 +1568,12 @@ public: unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit, Metadata *TemplateParams = nullptr, - Metadata *Declaration = nullptr, Metadata *Variables = nullptr), + Metadata *Declaration = nullptr, Metadata *Variables = nullptr, + Metadata *ThrownTypes = nullptr), (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment, - Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables)) + Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables, + ThrownTypes)) TempDISubprogram clone() const { return cloneImpl(); } @@ -1610,11 +1622,7 @@ public: DIScopeRef getScope() const { return DIScopeRef(getRawScope()); } StringRef getName() const { return getStringOperand(2); } - StringRef getDisplayName() const { return getStringOperand(3); } - StringRef getLinkageName() const { return getStringOperand(4); } - - MDString *getRawName() const { return getOperandAs(2); } - MDString *getRawLinkageName() const { return getOperandAs(4); } + StringRef getLinkageName() const { return getStringOperand(3); } DISubroutineType *getType() const { return cast_or_null(getRawType()); @@ -1626,9 +1634,7 @@ public: DICompileUnit *getUnit() const { return cast_or_null(getRawUnit()); } - void replaceUnit(DICompileUnit *CU) { - replaceOperandWith(7, CU); - } + void replaceUnit(DICompileUnit *CU) { replaceOperandWith(5, CU); } DITemplateParameterArray getTemplateParams() const { return cast_or_null(getRawTemplateParams()); } @@ -1638,14 +1644,26 @@ public: DILocalVariableArray getVariables() const { return cast_or_null(getRawVariables()); } + DITypeArray getThrownTypes() const { + return cast_or_null(getRawThrownTypes()); + } Metadata *getRawScope() const { return getOperand(1); } - Metadata *getRawType() const { return getOperand(5); } - Metadata *getRawContainingType() const { return getOperand(6); } - Metadata *getRawUnit() const { return getOperand(7); } - Metadata *getRawTemplateParams() const { return getOperand(8); } - Metadata *getRawDeclaration() const { return getOperand(9); } - Metadata *getRawVariables() const { return getOperand(10); } + MDString *getRawName() const { return getOperandAs(2); } + MDString *getRawLinkageName() const { return getOperandAs(3); } + Metadata *getRawType() const { return getOperand(4); } + Metadata *getRawUnit() const { return getOperand(5); } + Metadata *getRawDeclaration() const { return getOperand(6); } + Metadata *getRawVariables() const { return getOperand(7); } + Metadata *getRawContainingType() const { + return getNumOperands() > 8 ? getOperandAs(8) : nullptr; + } + Metadata *getRawTemplateParams() const { + return getNumOperands() > 9 ? getOperandAs(9) : nullptr; + } + Metadata *getRawThrownTypes() const { + return getNumOperands() > 10 ? getOperandAs(10) : nullptr; + } /// Check if this subprogram describes the given function. /// @@ -1841,45 +1859,40 @@ class DINamespace : public DIScope { friend class LLVMContextImpl; friend class MDNode; - unsigned Line; unsigned ExportSymbols : 1; - DINamespace(LLVMContext &Context, StorageType Storage, unsigned Line, - bool ExportSymbols, ArrayRef Ops) + DINamespace(LLVMContext &Context, StorageType Storage, bool ExportSymbols, + ArrayRef Ops) : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops), - Line(Line), ExportSymbols(ExportSymbols) {} + ExportSymbols(ExportSymbols) {} ~DINamespace() = default; static DINamespace *getImpl(LLVMContext &Context, DIScope *Scope, - DIFile *File, StringRef Name, unsigned Line, - bool ExportSymbols, StorageType Storage, - bool ShouldCreate = true) { - return getImpl(Context, Scope, File, getCanonicalMDString(Context, Name), - Line, ExportSymbols, Storage, ShouldCreate); + StringRef Name, bool ExportSymbols, + StorageType Storage, bool ShouldCreate = true) { + return getImpl(Context, Scope, getCanonicalMDString(Context, Name), + ExportSymbols, Storage, ShouldCreate); } static DINamespace *getImpl(LLVMContext &Context, Metadata *Scope, - Metadata *File, MDString *Name, unsigned Line, - bool ExportSymbols, StorageType Storage, - bool ShouldCreate = true); + MDString *Name, bool ExportSymbols, + StorageType Storage, bool ShouldCreate = true); TempDINamespace cloneImpl() const { - return getTemporary(getContext(), getScope(), getFile(), getName(), - getLine(), getExportSymbols()); + return getTemporary(getContext(), getScope(), getName(), + getExportSymbols()); } public: - DEFINE_MDNODE_GET(DINamespace, (DIScope * Scope, DIFile *File, StringRef Name, - unsigned Line, bool ExportSymbols), - (Scope, File, Name, Line, ExportSymbols)) DEFINE_MDNODE_GET(DINamespace, - (Metadata * Scope, Metadata *File, MDString *Name, - unsigned Line, bool ExportSymbols), - (Scope, File, Name, Line, ExportSymbols)) + (DIScope *Scope, StringRef Name, bool ExportSymbols), + (Scope, Name, ExportSymbols)) + DEFINE_MDNODE_GET(DINamespace, + (Metadata *Scope, MDString *Name, bool ExportSymbols), + (Scope, Name, ExportSymbols)) TempDINamespace clone() const { return cloneImpl(); } - unsigned getLine() const { return Line; } bool getExportSymbols() const { return ExportSymbols; } DIScope *getScope() const { return cast_or_null(getRawScope()); } StringRef getName() const { return getStringOperand(2); } @@ -2080,6 +2093,7 @@ public: return F->getFilename(); return ""; } + StringRef getDirectory() const { if (auto *F = getFile()) return F->getDirectory(); @@ -2103,9 +2117,6 @@ public: /// variable, or the location of a single piece of a variable, or (when using /// DW_OP_stack_value) is the constant variable value. /// -/// FIXME: Instead of DW_OP_plus taking an argument, this should use DW_OP_const -/// and have DW_OP_plus consume the topmost elements on the stack. -/// /// TODO: Co-allocate the expression elements. /// TODO: Separate from MDNode, or otherwise drop Distinct and Temporary /// storage types. @@ -2136,6 +2147,7 @@ public: ArrayRef getElements() const { return Elements; } unsigned getNumElements() const { return Elements.size(); } + uint64_t getElement(unsigned I) const { assert(I < Elements.size() && "Index out of range"); return Elements[I]; @@ -2144,7 +2156,8 @@ public: /// Determine whether this represents a standalone constant value. bool isConstant() const; - typedef ArrayRef::iterator element_iterator; + using element_iterator = ArrayRef::iterator; + element_iterator elements_begin() const { return getElements().begin(); } element_iterator elements_end() const { return getElements().end(); } @@ -2232,6 +2245,9 @@ public: expr_op_iterator expr_op_end() const { return expr_op_iterator(elements_end()); } + iterator_range expr_ops() const { + return {expr_op_begin(), expr_op_end()}; + } /// @} bool isValid() const; @@ -2240,7 +2256,7 @@ public: return MD->getMetadataID() == DIExpressionKind; } - /// Is the first element a DW_OP_deref?. + /// Return whether the first element a DW_OP_deref. bool startsWithDeref() const { return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref; } @@ -2262,6 +2278,21 @@ public: /// Return whether this is a piece of an aggregate variable. bool isFragment() const { return getFragmentInfo().hasValue(); } + + /// Append \p Ops with operations to apply the \p Offset. + static void appendOffset(SmallVectorImpl &Ops, int64_t Offset); + + /// If this is a constant offset, extract it. If there is no expression, + /// return true with an offset of zero. + bool extractIfOffset(int64_t &Offset) const; + + /// Constants for DIExpression::prepend. + enum { NoDeref = false, WithDeref = true, WithStackValue = true }; + + /// Prepend \p DIExpr with a deref and offset operation and optionally turn it + /// into a stack value. + static DIExpression *prepend(const DIExpression *DIExpr, bool Deref, + int64_t Offset = 0, bool StackValue = false); }; /// Global variables. @@ -2488,6 +2519,7 @@ public: return F->getFilename(); return ""; } + StringRef getDirectory() const { if (auto *F = getFile()) return F->getDirectory(); @@ -2588,10 +2620,13 @@ public: TempDIGlobalVariableExpression clone() const { return cloneImpl(); } Metadata *getRawVariable() const { return getOperand(0); } + DIGlobalVariable *getVariable() const { return cast_or_null(getRawVariable()); } + Metadata *getRawExpression() const { return getOperand(1); } + DIExpression *getExpression() const { return cast_or_null(getRawExpression()); } @@ -2604,7 +2639,8 @@ public: /// Macro Info DWARF-like metadata node. /// /// A metadata node with a DWARF macro info (i.e., a constant named -/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h). Called \a DIMacroNode +/// \c DW_MACINFO_*, defined in llvm/BinaryFormat/Dwarf.h). Called \a +/// DIMacroNode /// because it's potentially used for non-DWARF output. class DIMacroNode : public MDNode { friend class LLVMContextImpl; diff --git a/include/llvm/IR/DebugLoc.h b/include/llvm/IR/DebugLoc.h index 202be3da14da3830929ec276beb5bec1012cda64..eef1212abc4b7d97849a4161792dff205b312142 100644 --- a/include/llvm/IR/DebugLoc.h +++ b/include/llvm/IR/DebugLoc.h @@ -80,6 +80,16 @@ namespace llvm { static DebugLoc get(unsigned Line, unsigned Col, const MDNode *Scope, const MDNode *InlinedAt = nullptr); + enum { ReplaceLastInlinedAt = true }; + /// Rebuild the entire inlined-at chain for this instruction so that the top of + /// the chain now is inlined-at the new call site. + /// \param InlinedAt The new outermost inlined-at in the chain. + /// \param ReplaceLast Replace the last location in the inlined-at chain. + static DebugLoc appendInlinedAt(DebugLoc DL, DILocation *InlinedAt, + LLVMContext &Ctx, + DenseMap &Cache, + bool ReplaceLast = false); + unsigned getLine() const; unsigned getCol() const; MDNode *getScope() const; diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h index 05e99157b8dc7838df43a8bb02a1a4b00581167c..a92321a44511237554dc00b4d3ed4d9036b4746e 100644 --- a/include/llvm/IR/DerivedTypes.h +++ b/include/llvm/IR/DerivedTypes.h @@ -1,4 +1,4 @@ -//===-- llvm/DerivedTypes.h - Classes for handling data types ---*- C++ -*-===// +//===- llvm/DerivedTypes.h - Classes for handling data types ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,6 +19,7 @@ #define LLVM_IR_DERIVEDTYPES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" @@ -122,7 +123,8 @@ public: bool isVarArg() const { return getSubclassData()!=0; } Type *getReturnType() const { return ContainedTys[0]; } - typedef Type::subtype_iterator param_iterator; + using param_iterator = Type::subtype_iterator; + param_iterator param_begin() const { return ContainedTys + 1; } param_iterator param_end() const { return &ContainedTys[NumContainedTys]; } ArrayRef params() const { @@ -197,8 +199,7 @@ public: /// generator for a target expects). /// class StructType : public CompositeType { - StructType(LLVMContext &C) - : CompositeType(C, StructTyID), SymbolTableEntry(nullptr) {} + StructType(LLVMContext &C) : CompositeType(C, StructTyID) {} enum { /// This is the contents of the SubClassData field. @@ -212,7 +213,7 @@ class StructType : public CompositeType { /// symbol table entry (maintained by LLVMContext) for the struct. /// This is null if the type is an literal struct or if it is a identified /// type that has an empty name. - void *SymbolTableEntry; + void *SymbolTableEntry = nullptr; public: StructType(const StructType &) = delete; @@ -228,7 +229,14 @@ public: static StructType *create(LLVMContext &Context, ArrayRef Elements, StringRef Name, bool isPacked = false); static StructType *create(LLVMContext &Context, ArrayRef Elements); - static StructType *create(StringRef Name, Type *elt1, ...) LLVM_END_WITH_NULL; + template + static typename std::enable_if::value, + StructType *>::type + create(StringRef Name, Type *elt1, Tys *... elts) { + assert(elt1 && "Cannot create a struct type with no elements with this"); + SmallVector StructFields({elt1, elts...}); + return create(StructFields, Name); + } /// This static method is the primary way to create a literal StructType. static StructType *get(LLVMContext &Context, ArrayRef Elements, @@ -240,7 +248,15 @@ public: /// This static method is a convenience method for creating structure types by /// specifying the elements as arguments. Note that this method always returns /// a non-packed struct, and requires at least one element type. - static StructType *get(Type *elt1, ...) LLVM_END_WITH_NULL; + template + static typename std::enable_if::value, + StructType *>::type + get(Type *elt1, Tys *... elts) { + assert(elt1 && "Cannot create a struct type with no elements with this"); + LLVMContext &Ctx = elt1->getContext(); + SmallVector StructFields({elt1, elts...}); + return llvm::StructType::get(Ctx, StructFields); + } bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; } @@ -269,13 +285,21 @@ public: /// Specify a body for an opaque identified type. void setBody(ArrayRef Elements, bool isPacked = false); - void setBody(Type *elt1, ...) LLVM_END_WITH_NULL; + + template + typename std::enable_if::value, void>::type + setBody(Type *elt1, Tys *... elts) { + assert(elt1 && "Cannot create a struct type with no elements with this"); + SmallVector StructFields({elt1, elts...}); + setBody(StructFields); + } /// Return true if the specified type is valid as a element type. static bool isValidElementType(Type *ElemTy); // Iterator access to the elements. - typedef Type::subtype_iterator element_iterator; + using element_iterator = Type::subtype_iterator; + element_iterator element_begin() const { return ContainedTys; } element_iterator element_end() const { return &ContainedTys[NumContainedTys];} ArrayRef const elements() const { diff --git a/include/llvm/IR/DerivedUser.h b/include/llvm/IR/DerivedUser.h new file mode 100644 index 0000000000000000000000000000000000000000..4d681e0db611181a9f7713f2bb4129cf0d941ca5 --- /dev/null +++ b/include/llvm/IR/DerivedUser.h @@ -0,0 +1,41 @@ +//===-- DerivedUser.h - Base for non-IR Users -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_DERIVEDUSER_H +#define LLVM_IR_DERIVEDUSER_H + +#include "llvm/IR/User.h" + +namespace llvm { + +/// Extension point for the Value hierarchy. All classes outside of lib/IR +/// that wish to inherit from User should instead inherit from DerivedUser +/// instead. Inheriting from this class is discouraged. +/// +/// Generally speaking, Value is the base of a closed class hierarchy +/// that can't be extended by code outside of lib/IR. This class creates a +/// loophole that allows classes outside of lib/IR to extend User to leverage +/// its use/def list machinery. +class DerivedUser : public User { +protected: + typedef void (*DeleteValueTy)(DerivedUser *); + +private: + friend Value; + DeleteValueTy DeleteValue; + +public: + DerivedUser(Type *Ty, unsigned VK, Use *U, unsigned NumOps, + DeleteValueTy DeleteValue) + : User(Ty, VK, U, NumOps), DeleteValue(DeleteValue) {} +}; + +} // namespace llvm + +#endif // LLVM_IR_DERIVEDUSER_H diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 458c3cf29b0d1d027432e07e60c0f364cec2efab..15d3325771136f969a31ba84584d9e45101d6196 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -15,7 +15,7 @@ #ifndef LLVM_IR_DIAGNOSTICINFO_H #define LLVM_IR_DIAGNOSTICINFO_H -#include "llvm/ADT/None.h" +#include "llvm-c/Types.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -23,10 +23,9 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm-c/Types.h" -#include #include #include +#include #include #include @@ -120,18 +119,18 @@ public: virtual void print(DiagnosticPrinter &DP) const = 0; }; -typedef std::function DiagnosticHandlerFunction; +using DiagnosticHandlerFunction = std::function; /// Diagnostic information for inline asm reporting. /// This is basically a message and an optional location. class DiagnosticInfoInlineAsm : public DiagnosticInfo { private: /// Optional line information. 0 if not set. - unsigned LocCookie; + unsigned LocCookie = 0; /// Message to be reported. const Twine &MsgStr; /// Optional origin of the problem. - const Instruction *Instr; + const Instruction *Instr = nullptr; public: /// \p MsgStr is the message to be reported to the frontend. @@ -139,8 +138,7 @@ public: /// for the whole life time of the Diagnostic. DiagnosticInfoInlineAsm(const Twine &MsgStr, DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(0), MsgStr(MsgStr), - Instr(nullptr) {} + : DiagnosticInfo(DK_InlineAsm, Severity), MsgStr(MsgStr) {} /// \p LocCookie if non-zero gives the line number for this report. /// \p MsgStr gives the message. @@ -149,7 +147,7 @@ public: DiagnosticInfoInlineAsm(unsigned LocCookie, const Twine &MsgStr, DiagnosticSeverity Severity = DS_Error) : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(LocCookie), - MsgStr(MsgStr), Instr(nullptr) {} + MsgStr(MsgStr) {} /// \p Instr gives the original instruction that triggered the diagnostic. /// \p MsgStr gives the message. @@ -294,10 +292,10 @@ public: DiagnosticInfoSampleProfile(StringRef FileName, const Twine &Msg, DiagnosticSeverity Severity = DS_Error) : DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName), - LineNum(0), Msg(Msg) {} + Msg(Msg) {} DiagnosticInfoSampleProfile(const Twine &Msg, DiagnosticSeverity Severity = DS_Error) - : DiagnosticInfo(DK_SampleProfile, Severity), LineNum(0), Msg(Msg) {} + : DiagnosticInfo(DK_SampleProfile, Severity), Msg(Msg) {} /// \see DiagnosticInfo::print. void print(DiagnosticPrinter &DP) const override; @@ -316,7 +314,7 @@ private: /// Line number where the diagnostic occurred. If 0, no line number will /// be emitted in the message. - unsigned LineNum; + unsigned LineNum = 0; /// Message to report. const Twine &Msg; @@ -351,8 +349,9 @@ class DiagnosticLocation { StringRef Filename; unsigned Line = 0; unsigned Column = 0; + public: - DiagnosticLocation() {} + DiagnosticLocation() = default; DiagnosticLocation(const DebugLoc &DL); DiagnosticLocation(const DISubprogram *SP); @@ -796,6 +795,7 @@ private: const Twine &Msg) : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, Loc, Msg) {} + friend void emitOptimizationRemarkAnalysisFPCommute( LLVMContext &Ctx, const char *PassName, const Function &Fn, const DiagnosticLocation &Loc, const Twine &Msg); @@ -1012,6 +1012,7 @@ public: void print(DiagnosticPrinter &DP) const override; }; + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h index cae03d33a7eedc02bbb68cd83bea14b18b7dfcdc..9be6acc33591979d4979fff8d110984c28ea7f89 100644 --- a/include/llvm/IR/Dominators.h +++ b/include/llvm/IR/Dominators.h @@ -42,7 +42,7 @@ extern template void Calculate>( DominatorTreeBaseByGraphTraits>> &DT, Function &F); -typedef DomTreeNodeBase DomTreeNode; +using DomTreeNode = DomTreeNodeBase; class BasicBlockEdge { const BasicBlock *Start; @@ -66,11 +66,12 @@ public: return End; } + /// Check if this is the only edge between Start and End. bool isSingleEdge() const; }; template <> struct DenseMapInfo { - typedef DenseMapInfo BBInfo; + using BBInfo = DenseMapInfo; static unsigned getHashValue(const BasicBlockEdge *V); @@ -113,7 +114,7 @@ template <> struct DenseMapInfo { /// preceding statements; this is stated only to assist human understanding. class DominatorTree : public DominatorTreeBase { public: - typedef DominatorTreeBase Base; + using Base = DominatorTreeBase; DominatorTree() : DominatorTreeBase(false) {} explicit DominatorTree(Function &F) : DominatorTreeBase(false) { @@ -143,6 +144,11 @@ public: bool dominates(const Instruction *Def, const Use &U) const; bool dominates(const Instruction *Def, const Instruction *User) const; bool dominates(const Instruction *Def, const BasicBlock *BB) const; + + /// Return true if an edge dominates a use. + /// + /// If BBE is not a unique edge between start and end of the edge, it can + /// never dominate the use. bool dominates(const BasicBlockEdge &BBE, const Use &U) const; bool dominates(const BasicBlockEdge &BBE, const BasicBlock *BB) const; @@ -157,6 +163,10 @@ public: /// This should only be used for debugging as it aborts the program if the /// verification fails. void verifyDomTree() const; + + // Pop up a GraphViz/gv window with the Dominator Tree rendered using `dot`. + void viewGraph(const Twine &Name, const Twine &Title); + void viewGraph(); }; //===------------------------------------- @@ -164,9 +174,9 @@ public: // iterable by generic graph iterators. template struct DomTreeGraphTraitsBase { - typedef Node *NodeRef; - typedef ChildIterator ChildIteratorType; - typedef df_iterator> nodes_iterator; + using NodeRef = Node *; + using ChildIteratorType = ChildIterator; + using nodes_iterator = df_iterator>; static NodeRef getEntryNode(NodeRef N) { return N; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } @@ -208,7 +218,7 @@ class DominatorTreeAnalysis : public AnalysisInfoMixin { public: /// \brief Provide the result typedef for this analysis pass. - typedef DominatorTree Result; + using Result = DominatorTree; /// \brief Run the analysis pass over a function and produce a dominator tree. DominatorTree run(Function &F, FunctionAnalysisManager &); diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 787361ffca039d488731395abecf44a3c6334562..3496806d936232422540a399e6404baf7f88f385 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -1,4 +1,4 @@ -//===-- llvm/Function.h - Class to represent a single function --*- C++ -*-===// +//===- llvm/Function.h - Class to represent a single function ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,18 +19,22 @@ #define LLVM_IR_FUNCTION_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/StringRef.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include #include @@ -40,27 +44,31 @@ namespace llvm { -template class Optional; class AssemblyAnnotationWriter; -class FunctionType; -class LLVMContext; +class Constant; class DISubprogram; +class LLVMContext; +class Module; +template class Optional; +class raw_ostream; +class Type; +class User; class Function : public GlobalObject, public ilist_node { public: - typedef SymbolTableList BasicBlockListType; + using BasicBlockListType = SymbolTableList; // BasicBlock iterators... - typedef BasicBlockListType::iterator iterator; - typedef BasicBlockListType::const_iterator const_iterator; + using iterator = BasicBlockListType::iterator; + using const_iterator = BasicBlockListType::const_iterator; - typedef Argument *arg_iterator; - typedef const Argument *const_arg_iterator; + using arg_iterator = Argument *; + using const_arg_iterator = const Argument *; private: // Important things that make up a function! - BasicBlockListType BasicBlocks; ///< The basic blocks - mutable Argument *Arguments; ///< The formal arguments + BasicBlockListType BasicBlocks; ///< The basic blocks + mutable Argument *Arguments = nullptr; ///< The formal arguments size_t NumArgs; std::unique_ptr SymTab; ///< Symbol table of args/instructions @@ -115,7 +123,7 @@ private: public: Function(const Function&) = delete; void operator=(const Function&) = delete; - ~Function() override; + ~Function(); static Function *Create(FunctionType *Ty, LinkageTypes Linkage, const Twine &N = "", Module *M = nullptr) { @@ -124,10 +132,12 @@ public: // Provide fast operand accessors. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + /// Returns the FunctionType for me. FunctionType *getFunctionType() const { return cast(getValueType()); } + /// Returns the type of the ret val. Type *getReturnType() const { return getFunctionType()->getReturnType(); } @@ -211,7 +221,7 @@ public: /// @brief Remove function attribute from this function. void removeFnAttr(StringRef Kind) { - setAttributes(AttributeSets.removeAttribute( + setAttributes(getAttributes().removeAttribute( getContext(), AttributeList::FunctionIndex, Kind)); } @@ -279,7 +289,16 @@ public: void addAttribute(unsigned i, Attribute Attr); /// @brief adds the attributes to the list of attributes. - void addAttributes(unsigned i, AttributeList Attrs); + void addAttributes(unsigned i, const AttrBuilder &Attrs); + + /// @brief adds the attribute to the list of attributes for the given arg. + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + + /// @brief adds the attribute to the list of attributes for the given arg. + void addParamAttr(unsigned ArgNo, Attribute Attr); + + /// @brief adds the attributes to the list of attributes for the given arg. + void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs); /// @brief removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); @@ -288,13 +307,27 @@ public: void removeAttribute(unsigned i, StringRef Kind); /// @brief removes the attributes from the list of attributes. - void removeAttributes(unsigned i, AttributeList Attrs); + void removeAttributes(unsigned i, const AttrBuilder &Attrs); + + /// @brief removes the attribute from the list of attributes. + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + + /// @brief removes the attribute from the list of attributes. + void removeParamAttr(unsigned ArgNo, StringRef Kind); + + /// @brief removes the attribute from the list of attributes. + void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs); /// @brief check if an attributes is in the list of attributes. bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const { return getAttributes().hasAttribute(i, Kind); } + /// @brief check if an attributes is in the list of attributes. + bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const { + return getAttributes().hasParamAttribute(ArgNo, Kind); + } + Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { return AttributeSets.getAttribute(i, Kind); } @@ -306,27 +339,50 @@ public: /// @brief adds the dereferenceable attribute to the list of attributes. void addDereferenceableAttr(unsigned i, uint64_t Bytes); + /// @brief adds the dereferenceable attribute to the list of attributes for + /// the given arg. + void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes); + /// @brief adds the dereferenceable_or_null attribute to the list of /// attributes. void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes); + /// @brief adds the dereferenceable_or_null attribute to the list of + /// attributes for the given arg. + void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes); + /// @brief Extract the alignment for a call or parameter (0=unknown). - unsigned getParamAlignment(unsigned i) const { - return AttributeSets.getParamAlignment(i); + unsigned getParamAlignment(unsigned ArgNo) const { + return AttributeSets.getParamAlignment(ArgNo); } /// @brief Extract the number of dereferenceable bytes for a call or /// parameter (0=unknown). + /// @param i AttributeList index, referring to a return value or argument. uint64_t getDereferenceableBytes(unsigned i) const { return AttributeSets.getDereferenceableBytes(i); } + /// @brief Extract the number of dereferenceable bytes for a parameter. + /// @param ArgNo Index of an argument, with 0 being the first function arg. + uint64_t getParamDereferenceableBytes(unsigned ArgNo) const { + return AttributeSets.getParamDereferenceableBytes(ArgNo); + } + /// @brief Extract the number of dereferenceable_or_null bytes for a call or /// parameter (0=unknown). + /// @param i AttributeList index, referring to a return value or argument. uint64_t getDereferenceableOrNullBytes(unsigned i) const { return AttributeSets.getDereferenceableOrNullBytes(i); } + /// @brief Extract the number of dereferenceable_or_null bytes for a + /// parameter. + /// @param ArgNo AttributeList ArgNo, referring to an argument. + uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const { + return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo); + } + /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { return hasFnAttribute(Attribute::ReadNone); @@ -411,6 +467,14 @@ public: removeFnAttr(Attribute::Convergent); } + /// @brief Determine if the call has sideeffects. + bool isSpeculatable() const { + return hasFnAttribute(Attribute::Speculatable); + } + void setSpeculatable() { + addFnAttr(Attribute::Speculatable); + } + /// Determine if the function is known not to recurse, directly or /// indirectly. bool doesNotRecurse() const { @@ -435,44 +499,20 @@ public: } /// @brief Determine if the function returns a structure through first - /// pointer argument. + /// or second pointer argument. bool hasStructRetAttr() const { - return AttributeSets.hasAttribute(1, Attribute::StructRet) || - AttributeSets.hasAttribute(2, Attribute::StructRet); + return AttributeSets.hasParamAttribute(0, Attribute::StructRet) || + AttributeSets.hasParamAttribute(1, Attribute::StructRet); } /// @brief Determine if the parameter or return value is marked with NoAlias /// attribute. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - return AttributeSets.hasAttribute(n, Attribute::NoAlias); - } - void setDoesNotAlias(unsigned n) { - addAttribute(n, Attribute::NoAlias); - } - - /// @brief Determine if the parameter can be captured. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotCapture(unsigned n) const { - return AttributeSets.hasAttribute(n, Attribute::NoCapture); - } - void setDoesNotCapture(unsigned n) { - addAttribute(n, Attribute::NoCapture); - } - - bool doesNotAccessMemory(unsigned n) const { - return AttributeSets.hasAttribute(n, Attribute::ReadNone); - } - void setDoesNotAccessMemory(unsigned n) { - addAttribute(n, Attribute::ReadNone); - } - - bool onlyReadsMemory(unsigned n) const { - return doesNotAccessMemory(n) || - AttributeSets.hasAttribute(n, Attribute::ReadOnly); + bool returnDoesNotAlias() const { + return AttributeSets.hasAttribute(AttributeList::ReturnIndex, + Attribute::NoAlias); } - void setOnlyReadsMemory(unsigned n) { - addAttribute(n, Attribute::ReadOnly); + void setReturnDoesNotAlias() { + addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); } /// Optimize this function for minimum size (-Oz). @@ -485,7 +525,7 @@ public: /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a Function) from the Function Src to this one. - void copyAttributesFrom(const GlobalValue *Src) override; + void copyAttributesFrom(const Function *Src); /// deleteBody - This method deletes the body of the function, and converts /// the linkage to external. @@ -498,12 +538,12 @@ public: /// removeFromParent - This method unlinks 'this' from the containing module, /// but does not delete it. /// - void removeFromParent() override; + void removeFromParent(); /// eraseFromParent - This method unlinks 'this' from the containing module /// and deletes it. /// - void eraseFromParent() override; + void eraseFromParent(); /// Steal arguments from another function. /// diff --git a/include/llvm/IR/GetElementPtrTypeIterator.h b/include/llvm/IR/GetElementPtrTypeIterator.h index 490bff29cf3890bdaf76b35ea9b611bb670bf9b4..3c143ea5f703eb7df62f6d88687b0c104f8bab21 100644 --- a/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/include/llvm/IR/GetElementPtrTypeIterator.h @@ -21,7 +21,9 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include #include +#include #include namespace llvm { @@ -29,13 +31,13 @@ namespace llvm { template class generic_gep_type_iterator : public std::iterator { - typedef std::iterator super; + using super = std::iterator; ItTy OpIt; PointerUnion CurTy; enum : uint64_t { Unbounded = -1ull }; uint64_t NumElements = Unbounded; + generic_gep_type_iterator() = default; public: @@ -121,7 +123,7 @@ namespace llvm { } }; - typedef generic_gep_type_iterator<> gep_type_iterator; + using gep_type_iterator = generic_gep_type_iterator<>; inline gep_type_iterator gep_type_begin(const User *GEP) { auto *GEPOp = cast(GEP); diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h index 37a291dfeb7a00fc29204c45e3cfb763c47dd87e..d4bf0d7e1ed41feaad0e1ea2e7ec46f9b2f15fc5 100644 --- a/include/llvm/IR/GlobalAlias.h +++ b/include/llvm/IR/GlobalAlias.h @@ -59,15 +59,19 @@ public: // Linkage, Type, Parent and AddressSpace taken from the Aliasee. static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee); + void copyAttributesFrom(const GlobalValue *Src) { + GlobalValue::copyAttributesFrom(Src); + } + /// removeFromParent - This method unlinks 'this' from the containing module, /// but does not delete it. /// - void removeFromParent() override; + void removeFromParent(); /// eraseFromParent - This method unlinks 'this' from the containing module /// and deletes it. /// - void eraseFromParent() override; + void eraseFromParent(); /// These methods retrieve and set alias target. void setAliasee(Constant *Aliasee); diff --git a/include/llvm/IR/GlobalIFunc.h b/include/llvm/IR/GlobalIFunc.h index bfaa9960cb13eca89315dc7fb98aa40fb1d326c5..d90c7c78ed268838718dd5f074de6db88bcec491 100644 --- a/include/llvm/IR/GlobalIFunc.h +++ b/include/llvm/IR/GlobalIFunc.h @@ -47,12 +47,16 @@ public: LinkageTypes Linkage, const Twine &Name, Constant *Resolver, Module *Parent); + void copyAttributesFrom(const GlobalIFunc *Src) { + GlobalValue::copyAttributesFrom(Src); + } + /// This method unlinks 'this' from the containing module, but does not /// delete it. - void removeFromParent() final; + void removeFromParent(); /// This method unlinks 'this' from the containing module and deletes it. - void eraseFromParent() final; + void eraseFromParent(); /// These methods retrieve and set ifunc resolver function. void setResolver(Constant *Resolver) { diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h index f3789bafefe3e8830cc2904d4b2fe9321fe2051c..fc38f698027b1d1cf3766d181ca86baf2479adf7 100644 --- a/include/llvm/IR/GlobalObject.h +++ b/include/llvm/IR/GlobalObject.h @@ -150,8 +150,10 @@ public: void addTypeMetadata(unsigned Offset, Metadata *TypeID); - void copyAttributesFrom(const GlobalValue *Src) override; +protected: + void copyAttributesFrom(const GlobalObject *Src); +public: // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Value *V) { return V->getValueID() == Value::FunctionVal || diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h index bb30fa8be8674430690ba6b2a7e873f1e5d8bfe6..d65d43cc5957dda76a73627821b413d63fffd445 100644 --- a/include/llvm/IR/GlobalValue.h +++ b/include/llvm/IR/GlobalValue.h @@ -23,9 +23,9 @@ #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Value.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" #include #include #include @@ -161,6 +161,10 @@ protected: Parent = parent; } + ~GlobalValue() { + removeDeadConstantUsers(); // remove any dead constants using this. + } + public: enum ThreadLocalMode { NotThreadLocal = 0, @@ -172,10 +176,6 @@ public: GlobalValue(const GlobalValue &) = delete; - ~GlobalValue() override { - removeDeadConstantUsers(); // remove any dead constants using this. - } - unsigned getAlignment() const; enum class UnnamedAddr { @@ -435,14 +435,20 @@ public: bool isWeakForLinker() const { return isWeakForLinker(getLinkage()); } +protected: /// Copy all additional attributes (those not needed to create a GlobalValue) /// from the GlobalValue Src to this one. - virtual void copyAttributesFrom(const GlobalValue *Src); + void copyAttributesFrom(const GlobalValue *Src); - /// If special LLVM prefix that is used to inform the asm printer to not emit - /// usual symbol prefix before the symbol name is used then return linkage - /// name after skipping this special LLVM prefix. - static StringRef getRealLinkageName(StringRef Name) { +public: + /// If the given string begins with the GlobalValue name mangling escape + /// character '\1', drop it. + /// + /// This function applies a specific mangling that is used in PGO profiles, + /// among other things. If you're trying to get a symbol name for an + /// arbitrary GlobalValue, this is not the function you're looking for; see + /// Mangler.h. + static StringRef dropLLVMManglingEscape(StringRef Name) { if (!Name.empty() && Name[0] == '\1') return Name.substr(1); return Name; @@ -530,10 +536,10 @@ public: /// This method unlinks 'this' from the containing module, but does not delete /// it. - virtual void removeFromParent() = 0; + void removeFromParent(); /// This method unlinks 'this' from the containing module and deletes it. - virtual void eraseFromParent() = 0; + void eraseFromParent(); /// Get the module that this global value is contained inside of... Module *getParent() { return Parent; } diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h index 3b545d811d44bbae6605cf1b7e5678b35c06643a..8255a4f298c088dadaac66eba436125181a473f9 100644 --- a/include/llvm/IR/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -23,6 +23,7 @@ #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Value.h" @@ -41,6 +42,7 @@ class DIGlobalVariableExpression; class GlobalVariable : public GlobalObject, public ilist_node { friend class SymbolTableListTraits; + AttributeSet Attrs; bool isConstantGlobal : 1; // Is this a global constant? bool isExternallyInitializedConstant : 1; // Is this a global whose value // can change from its initial @@ -64,7 +66,7 @@ public: GlobalVariable(const GlobalVariable &) = delete; GlobalVariable &operator=(const GlobalVariable &) = delete; - ~GlobalVariable() override { + ~GlobalVariable() { dropAllReferences(); // FIXME: needed by operator delete @@ -76,8 +78,6 @@ public: return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -156,17 +156,17 @@ public: /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a GlobalVariable) from the GlobalVariable Src to this one. - void copyAttributesFrom(const GlobalValue *Src) override; + void copyAttributesFrom(const GlobalVariable *Src); /// removeFromParent - This method unlinks 'this' from the containing module, /// but does not delete it. /// - void removeFromParent() override; + void removeFromParent(); /// eraseFromParent - This method unlinks 'this' from the containing module /// and deletes it. /// - void eraseFromParent() override; + void eraseFromParent(); /// Drop all references in preparation to destroy the GlobalVariable. This /// drops not only the reference to the initializer but also to any metadata. @@ -178,6 +178,68 @@ public: /// Fill the vector with all debug info attachements. void getDebugInfo(SmallVectorImpl &GVs) const; + /// Add attribute to this global. + void addAttribute(Attribute::AttrKind Kind) { + Attrs = Attrs.addAttribute(getContext(), Kind); + } + + /// Add attribute to this global. + void addAttribute(StringRef Kind, StringRef Val = StringRef()) { + Attrs = Attrs.addAttribute(getContext(), Kind, Val); + } + + /// Return true if the attribute exists. + bool hasAttribute(Attribute::AttrKind Kind) const { + return Attrs.hasAttribute(Kind); + } + + /// Return true if the attribute exists. + bool hasAttribute(StringRef Kind) const { + return Attrs.hasAttribute(Kind); + } + + /// Return true if any attributes exist. + bool hasAttributes() const { + return Attrs.hasAttributes(); + } + + /// Return the attribute object. + Attribute getAttribute(Attribute::AttrKind Kind) const { + return Attrs.getAttribute(Kind); + } + + /// Return the attribute object. + Attribute getAttribute(StringRef Kind) const { + return Attrs.getAttribute(Kind); + } + + /// Return the attribute set for this global + AttributeSet getAttributes() const { + return Attrs; + } + + /// Return attribute set as list with index. + /// FIXME: This may not be required once ValueEnumerators + /// in bitcode-writer can enumerate attribute-set. + AttributeList getAttributesAsList(unsigned index) const { + if (!hasAttributes()) + return AttributeList(); + std::pair AS[1] = {{index, Attrs}}; + return AttributeList::get(getContext(), AS); + } + + /// Set attribute list for this global + void setAttributes(AttributeSet A) { + Attrs = A; + } + + /// Check if section name is present + bool hasImplicitSection() const { + return getAttributes().hasAttribute("bss-section") || + getAttributes().hasAttribute("data-section") || + getAttributes().hasAttribute("rodata-section"); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Value *V) { return V->getValueID() == Value::GlobalVariableVal; diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index bc689f3b01d79c7ed9379cdc4dbc297788dc6286..ec33f82f70224ee9ff11a5094c6ef7dc15cc71d5 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_IRBUILDER_H #define LLVM_IR_IRBUILDER_H +#include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" @@ -41,11 +42,10 @@ #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" -#include "llvm-c/Types.h" +#include #include #include #include -#include #include namespace llvm { @@ -435,6 +435,26 @@ public: MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + /// \brief Create and insert an element unordered-atomic memcpy between the + /// specified pointers. + /// + /// If the pointers aren't i8*, they will be converted. If a TBAA tag is + /// specified, it will be added to the instruction. Likewise with alias.scope + /// and noalias tags. + CallInst *CreateElementUnorderedAtomicMemCpy( + Value *Dst, Value *Src, uint64_t Size, uint32_t ElementSize, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { + return CreateElementUnorderedAtomicMemCpy( + Dst, Src, getInt64(Size), ElementSize, TBAATag, TBAAStructTag, ScopeTag, + NoAliasTag); + } + + CallInst *CreateElementUnorderedAtomicMemCpy( + Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + /// \brief Create and insert a memmove between the specified /// pointers. /// @@ -454,6 +474,45 @@ public: MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + /// \brief Create a vector fadd reduction intrinsic of the source vector. + /// The first parameter is a scalar accumulator value for ordered reductions. + CallInst *CreateFAddReduce(Value *Acc, Value *Src); + + /// \brief Create a vector fmul reduction intrinsic of the source vector. + /// The first parameter is a scalar accumulator value for ordered reductions. + CallInst *CreateFMulReduce(Value *Acc, Value *Src); + + /// \brief Create a vector int add reduction intrinsic of the source vector. + CallInst *CreateAddReduce(Value *Src); + + /// \brief Create a vector int mul reduction intrinsic of the source vector. + CallInst *CreateMulReduce(Value *Src); + + /// \brief Create a vector int AND reduction intrinsic of the source vector. + CallInst *CreateAndReduce(Value *Src); + + /// \brief Create a vector int OR reduction intrinsic of the source vector. + CallInst *CreateOrReduce(Value *Src); + + /// \brief Create a vector int XOR reduction intrinsic of the source vector. + CallInst *CreateXorReduce(Value *Src); + + /// \brief Create a vector integer max reduction intrinsic of the source + /// vector. + CallInst *CreateIntMaxReduce(Value *Src, bool IsSigned = false); + + /// \brief Create a vector integer min reduction intrinsic of the source + /// vector. + CallInst *CreateIntMinReduce(Value *Src, bool IsSigned = false); + + /// \brief Create a vector float max reduction intrinsic of the source + /// vector. + CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false); + + /// \brief Create a vector float min reduction intrinsic of the source + /// vector. + CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false); + /// \brief Create a lifetime.start intrinsic. /// /// If the pointer isn't i8* it will be converted. diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h index 5d2f72d211ff7e82e9d37d5acd4f3b32c7afba89..7f03fcd19b650df29611f86850812c17b8b442f3 100644 --- a/include/llvm/IR/InlineAsm.h +++ b/include/llvm/IR/InlineAsm.h @@ -28,7 +28,7 @@ class FunctionType; class PointerType; template class ConstantUniqueMap; -class InlineAsm : public Value { +class InlineAsm final : public Value { public: enum AsmDialect { AD_ATT, @@ -48,7 +48,6 @@ private: InlineAsm(FunctionType *Ty, const std::string &AsmString, const std::string &Constraints, bool hasSideEffects, bool isAlignStack, AsmDialect asmDialect); - ~InlineAsm() override; /// When the ConstantUniqueMap merges two types and makes two InlineAsms /// identical, it destroys one of them with this method. @@ -95,7 +94,7 @@ public: isClobber // '~x' }; - typedef std::vector ConstraintCodeVector; + using ConstraintCodeVector = std::vector; struct SubConstraintInfo { /// MatchingInput - If this is not -1, this is an output constraint where an @@ -112,9 +111,9 @@ public: SubConstraintInfo() = default; }; - typedef std::vector SubConstraintInfoVector; + using SubConstraintInfoVector = std::vector; struct ConstraintInfo; - typedef std::vector ConstraintInfoVector; + using ConstraintInfoVector = std::vector; struct ConstraintInfo { /// Type - The basic type of the constraint: input/output/clobber diff --git a/include/llvm/IR/InstIterator.h b/include/llvm/IR/InstIterator.h index 28fc473f1490f8a8f2edca08517c240da124122b..2988fc935dd5d169aa523f6bf1ede8de9e81c6c8 100644 --- a/include/llvm/IR/InstIterator.h +++ b/include/llvm/IR/InstIterator.h @@ -31,20 +31,20 @@ namespace llvm { // inst_iterator and const_inst_iterator's. // template class InstIterator { - typedef BB_t BBty; - typedef BB_i_t BBIty; - typedef BI_t BIty; - typedef II_t IIty; + using BBty = BB_t; + using BBIty = BB_i_t; + using BIty = BI_t; + using IIty = II_t; BB_t *BBs; // BasicBlocksType BB_i_t BB; // BasicBlocksType::iterator BI_t BI; // BasicBlock::iterator public: - typedef std::bidirectional_iterator_tag iterator_category; - typedef IIty value_type; - typedef signed difference_type; - typedef IIty* pointer; - typedef IIty& reference; + using iterator_category = std::bidirectional_iterator_tag; + using value_type = IIty; + using difference_type = signed; + using pointer = IIty *; + using reference = IIty &; // Default constructor InstIterator() = default; @@ -119,13 +119,15 @@ private: } }; -typedef InstIterator, Function::iterator, - BasicBlock::iterator, Instruction> inst_iterator; -typedef InstIterator, - Function::const_iterator, BasicBlock::const_iterator, - const Instruction> const_inst_iterator; -typedef iterator_range inst_range; -typedef iterator_range const_inst_range; +using inst_iterator = + InstIterator, Function::iterator, + BasicBlock::iterator, Instruction>; +using const_inst_iterator = + InstIterator, + Function::const_iterator, BasicBlock::const_iterator, + const Instruction>; +using inst_range = iterator_range; +using const_inst_range = iterator_range; inline inst_iterator inst_begin(Function *F) { return inst_iterator(*F); } inline inst_iterator inst_end(Function *F) { return inst_iterator(*F, true); } diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index 518094735d72f71e10ac563fb3da556da6a4ce84..b3c6644c7e811ada358816a3066d7a113b6b5b70 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -1,4 +1,4 @@ -//===-- llvm/InstrTypes.h - Important Instruction subclasses ----*- C++ -*-===// +//===- llvm/InstrTypes.h - Important Instruction subclasses -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,19 +17,21 @@ #define LLVM_IR_INSTRTYPES_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include @@ -60,30 +62,15 @@ protected: Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd) : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {} - // Out of line virtual method, so the vtable, etc has a home. - ~TerminatorInst() override; - - /// Virtual methods - Terminators should overload these and provide inline - /// overrides of non-V methods. - virtual BasicBlock *getSuccessorV(unsigned idx) const = 0; - virtual unsigned getNumSuccessorsV() const = 0; - virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0; - public: /// Return the number of successors that this terminator has. - unsigned getNumSuccessors() const { - return getNumSuccessorsV(); - } + unsigned getNumSuccessors() const; /// Return the specified successor. - BasicBlock *getSuccessor(unsigned idx) const { - return getSuccessorV(idx); - } + BasicBlock *getSuccessor(unsigned idx) const; /// Update the specified successor to point at the provided block. - void setSuccessor(unsigned idx, BasicBlock *B) { - setSuccessorV(idx, B); - } + void setSuccessor(unsigned idx, BasicBlock *B); // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { @@ -114,17 +101,17 @@ public: template // Successor Iterator class SuccIterator : public std::iterator { - typedef std::iterator - super; + using super = + std::iterator; public: - typedef typename super::pointer pointer; - typedef typename super::reference reference; + using pointer = typename super::pointer; + using reference = typename super::reference; private: Term TermInst; unsigned idx; - typedef SuccIterator Self; + using Self = SuccIterator; inline bool index_is_valid(unsigned idx) { return idx < TermInst->getNumSuccessors(); @@ -260,11 +247,11 @@ public: } }; - typedef SuccIterator succ_iterator; - typedef SuccIterator - succ_const_iterator; - typedef iterator_range succ_range; - typedef iterator_range succ_const_range; + using succ_iterator = SuccIterator; + using succ_const_iterator = + SuccIterator; + using succ_range = iterator_range; + using succ_const_range = iterator_range; private: inline succ_iterator succ_begin() { return succ_iterator(this); } @@ -307,11 +294,6 @@ public: return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - - // Out of line virtual method, so the vtable, etc has a home. - ~UnaryInstruction() override; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -341,14 +323,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value) class BinaryOperator : public Instruction { protected: - void init(BinaryOps iType); BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty, const Twine &Name, Instruction *InsertBefore); BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd); + void init(BinaryOps iType); + // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; + BinaryOperator *cloneImpl() const; public: @@ -357,8 +341,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -576,8 +558,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value) /// if (isa(Instr)) { ... } /// @brief Base class of casting instructions. class CastInst : public UnaryInstruction { - void anchor() override; - protected: /// @brief Constructor with insert-before-instruction semantics for subclasses CastInst(Type *Ty, unsigned iType, Value *S, @@ -922,18 +902,12 @@ protected: Value *LHS, Value *RHS, const Twine &Name, BasicBlock *InsertAtEnd); - void anchor() override; // Out of line virtual method. - public: - CmpInst() = delete; - // allocate space for exactly two operands void *operator new(size_t s) { return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Construct a compare instruction, given the opcode, the predicate and /// the two operands. Optionally (if InstBefore is specified) insert the /// instruction into a BasicBlock right before the specified instruction. @@ -1059,18 +1033,6 @@ public: return isFalseWhenEqual(getPredicate()); } - /// @brief Determine if Pred1 implies Pred2 is true when two compares have - /// matching operands. - bool isImpliedTrueByMatchingCmp(Predicate Pred2) const { - return isImpliedTrueByMatchingCmp(getPredicate(), Pred2); - } - - /// @brief Determine if Pred1 implies Pred2 is false when two compares have - /// matching operands. - bool isImpliedFalseByMatchingCmp(Predicate Pred2) const { - return isImpliedFalseByMatchingCmp(getPredicate(), Pred2); - } - /// @returns true if the predicate is unsigned, false otherwise. /// @brief Determine if the predicate is an unsigned operation. static bool isUnsigned(Predicate predicate); @@ -1137,8 +1099,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value) //===----------------------------------------------------------------------===// class FuncletPadInst : public Instruction { private: - void init(Value *ParentPad, ArrayRef Args, const Twine &NameStr); - FuncletPadInst(const FuncletPadInst &CPI); explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad, @@ -1148,11 +1108,14 @@ private: ArrayRef Args, unsigned Values, const Twine &NameStr, BasicBlock *InsertAtEnd); + void init(Value *ParentPad, ArrayRef Args, const Twine &NameStr); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; friend class CatchPadInst; friend class CleanupPadInst; + FuncletPadInst *cloneImpl() const; public: @@ -1273,7 +1236,8 @@ public: ArrayRef inputs() const { return Inputs; } - typedef typename std::vector::const_iterator input_iterator; + using input_iterator = typename std::vector::const_iterator; + size_t input_size() const { return Inputs.size(); } input_iterator input_begin() const { return Inputs.begin(); } input_iterator input_end() const { return Inputs.end(); } @@ -1281,8 +1245,8 @@ public: StringRef getTag() const { return Tag; } }; -typedef OperandBundleDefT OperandBundleDef; -typedef OperandBundleDefT ConstOperandBundleDef; +using OperandBundleDef = OperandBundleDefT; +using ConstOperandBundleDef = OperandBundleDefT; /// \brief A mixin to add operand bundle functionality to llvm instruction /// classes. @@ -1565,8 +1529,8 @@ protected: return OperandBundleUse(BOI.Tag, Inputs); } - typedef BundleOpInfo *bundle_op_iterator; - typedef const BundleOpInfo *const_bundle_op_iterator; + using bundle_op_iterator = BundleOpInfo *; + using const_bundle_op_iterator = const BundleOpInfo *; /// \brief Return the start of the list of BundleOpInfo instances associated /// with this OperandBundleUser. @@ -1666,6 +1630,6 @@ protected: } }; -} // end llvm namespace +} // end namespace llvm #endif // LLVM_IR_INSTRTYPES_H diff --git a/include/llvm/IR/Instruction.def b/include/llvm/IR/Instruction.def index 18711abb8060d6f2a90486dd35d46fc826883f83..86617299c44ac6a2713a4fc8a4be7e325eace047 100644 --- a/include/llvm/IR/Instruction.def +++ b/include/llvm/IR/Instruction.def @@ -102,6 +102,10 @@ #define LAST_OTHER_INST(num) #endif +#ifndef HANDLE_USER_INST +#define HANDLE_USER_INST(num, opc, Class) HANDLE_OTHER_INST(num, opc, Class) +#endif + // Terminator Instructions - These instructions are used to terminate a basic // block of the program. Every basic block must end with one of these // instructions for it to be a well formed basic block. @@ -185,8 +189,8 @@ HANDLE_OTHER_INST(52, FCmp , FCmpInst ) // Floating point comparison instr. HANDLE_OTHER_INST(53, PHI , PHINode ) // PHI node instruction HANDLE_OTHER_INST(54, Call , CallInst ) // Call a function HANDLE_OTHER_INST(55, Select , SelectInst ) // select instruction -HANDLE_OTHER_INST(56, UserOp1, Instruction) // May be used internally in a pass -HANDLE_OTHER_INST(57, UserOp2, Instruction) // Internal to passes only +HANDLE_USER_INST (56, UserOp1, Instruction) // May be used internally in a pass +HANDLE_USER_INST (57, UserOp2, Instruction) // Internal to passes only HANDLE_OTHER_INST(58, VAArg , VAArgInst ) // vaarg instruction HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector HANDLE_OTHER_INST(60, InsertElement, InsertElementInst) // insert into vector @@ -220,6 +224,8 @@ HANDLE_OTHER_INST(64, LandingPad, LandingPadInst) // Landing pad instruction. #undef HANDLE_OTHER_INST #undef LAST_OTHER_INST +#undef HANDLE_USER_INST + #ifdef HANDLE_INST #undef HANDLE_INST #endif diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 90c3175122fd8f240d196bc6bec3b58e91efdc66..00c431834e31436516449078008d9629836fda7c 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -16,9 +16,9 @@ #define LLVM_IR_INSTRUCTION_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/None.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/IR/User.h" @@ -36,6 +36,10 @@ class FastMathFlags; class MDNode; struct AAMDNodes; +template <> struct ilist_alloc_traits { + static inline void deleteNode(Instruction *V); +}; + class Instruction : public User, public ilist_node_with_parent { BasicBlock *Parent; @@ -47,13 +51,13 @@ class Instruction : public User, HasMetadataBit = 1 << 15 }; +protected: + ~Instruction(); // Use deleteValue() to delete a generic Instruction. + public: Instruction(const Instruction &) = delete; Instruction &operator=(const Instruction &) = delete; - // Out of line virtual method, so the vtable, etc has a home. - ~Instruction() override; - /// Specialize the methods defined in Value, as we know that an instruction /// can only be used by other instructions. Instruction *user_back() { return cast(*user_begin());} @@ -356,9 +360,9 @@ public: /// Copy I's fast-math flags void copyFastMathFlags(const Instruction *I); - /// Convenience method to copy supported wrapping, exact, and fast-math flags - /// from V to this instruction. - void copyIRFlags(const Value *V); + /// Convenience method to copy supported exact, fast-math, and (optionally) + /// wrapping flags from V to this instruction. + void copyIRFlags(const Value *V, bool IncludeWrapFlags = true); /// Logical 'and' of any supported wrapping, exact, and fast-math flags of /// V and this instruction. @@ -456,6 +460,12 @@ public: /// higher. bool isAtomic() const; + /// Return true if this atomic instruction loads from memory. + bool hasAtomicLoad() const; + + /// Return true if this atomic instruction stores to memory. + bool hasAtomicStore() const; + /// Return true if this instruction may throw an exception. bool mayThrow() const; @@ -634,6 +644,10 @@ private: Instruction *cloneImpl() const; }; +inline void ilist_alloc_traits::deleteNode(Instruction *V) { + V->deleteValue(); +} + } // end namespace llvm #endif // LLVM_IR_INSTRUCTION_H diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 9e4b49925a691fe775689a23e58a34eee6167a66..b3032f54aa424caf819b3a904b653a0ba37a885f 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -1,4 +1,4 @@ -//===-- llvm/Instructions.h - Instruction subclass definitions --*- C++ -*-===// +//===- llvm/Instructions.h - Instruction subclass definitions ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,28 +17,33 @@ #define LLVM_IR_INSTRUCTIONS_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include #include #include +#include namespace llvm { @@ -84,9 +89,6 @@ public: AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, unsigned Align, const Twine &Name, BasicBlock *InsertAtEnd); - // Out of line virtual method, so the vtable, etc. has a home. - ~AllocaInst() override; - /// Return true if there is an allocation size parameter to the allocation /// instruction that is not 1. bool isArrayAllocation() const; @@ -264,6 +266,7 @@ public: } bool isSimple() const { return !isAtomic() && !isVolatile(); } + bool isUnordered() const { return (getOrdering() == AtomicOrdering::NotAtomic || getOrdering() == AtomicOrdering::Unordered) && @@ -273,10 +276,11 @@ public: Value *getPointerOperand() { return getOperand(0); } const Value *getPointerOperand() const { return getOperand(0); } static unsigned getPointerOperandIndex() { return 0U; } + Type *getPointerOperandType() const { return getPointerOperand()->getType(); } /// Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { - return getPointerOperand()->getType()->getPointerAddressSpace(); + return getPointerOperandType()->getPointerAddressSpace(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -333,8 +337,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Return true if this is a store to a volatile memory location. bool isVolatile() const { return getSubclassDataFromInstruction() & 1; } @@ -385,6 +387,7 @@ public: } bool isSimple() const { return !isAtomic() && !isVolatile(); } + bool isUnordered() const { return (getOrdering() == AtomicOrdering::NotAtomic || getOrdering() == AtomicOrdering::Unordered) && @@ -397,10 +400,11 @@ public: Value *getPointerOperand() { return getOperand(1); } const Value *getPointerOperand() const { return getOperand(1); } static unsigned getPointerOperandIndex() { return 1U; } + Type *getPointerOperandType() const { return getPointerOperand()->getType(); } /// Returns the address space of the pointer operand. unsigned getPointerAddressSpace() const { - return getPointerOperand()->getType()->getPointerAddressSpace(); + return getPointerOperandType()->getPointerAddressSpace(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -454,8 +458,6 @@ public: return User::operator new(s, 0); } - void *operator new(size_t, unsigned) = delete; - /// Returns the ordering effect of this fence. AtomicOrdering getOrdering() const { return AtomicOrdering(getSubclassDataFromInstruction() >> 1); @@ -532,8 +534,6 @@ public: return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Return true if this is a cmpxchg from a volatile memory /// location. /// @@ -722,8 +722,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - BinOp getOperation() const { return static_cast(getSubclassDataFromInstruction() >> 5); } @@ -834,10 +832,7 @@ class GetElementPtrInst : public Instruction { Type *SourceElementType; Type *ResultElementType; - void anchor() override; - GetElementPtrInst(const GetElementPtrInst &GEPI); - void init(Value *Ptr, ArrayRef IdxList, const Twine &NameStr); /// Constructors - Create a getelementptr instruction with a base pointer an /// list of indices. The first ctor can optionally insert before an existing @@ -850,6 +845,8 @@ class GetElementPtrInst : public Instruction { ArrayRef IdxList, unsigned Values, const Twine &NameStr, BasicBlock *InsertAtEnd); + void init(Value *Ptr, ArrayRef IdxList, const Twine &NameStr); + protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -1103,8 +1100,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value) /// must be identical types. /// Represent an integer comparison operator. class ICmpInst: public CmpInst { - void anchor() override; - void AssertOK() { assert(getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && getPredicate() <= CmpInst::LAST_ICMP_PREDICATE && @@ -1417,8 +1412,6 @@ protected: CallInst *cloneImpl() const; public: - ~CallInst() override; - static CallInst *Create(Value *Func, ArrayRef Args, ArrayRef Bundles = None, const Twine &NameStr = "", @@ -1656,12 +1649,24 @@ public: /// adds the attribute to the list of attributes. void addAttribute(unsigned i, Attribute Attr); + /// Adds the attribute to the indicated argument + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + + /// Adds the attribute to the indicated argument + void addParamAttr(unsigned ArgNo, Attribute Attr); + /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, StringRef Kind); + /// Removes the attribute from the given argument + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + + /// Removes the attribute from the given argument + void removeParamAttr(unsigned ArgNo, StringRef Kind); + /// adds the dereferenceable attribute to the list of attributes. void addDereferenceableAttr(unsigned i, uint64_t Bytes); @@ -1681,8 +1686,11 @@ public: return hasFnAttrImpl(Kind); } - /// Determine whether the call or the callee has the given attributes. - bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const; + /// Determine whether the return value has the given attribute. + bool hasRetAttr(Attribute::AttrKind Kind) const; + + /// Determine whether the argument or parameter has the given attribute. + bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const; /// Get the attribute of a given kind at a position. Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { @@ -1694,6 +1702,18 @@ public: return getAttributes().getAttribute(i, Kind); } + /// Get the attribute of a given kind from a given arg + Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + return getAttributes().getParamAttr(ArgNo, Kind); + } + + /// Get the attribute of a given kind from a given arg + Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + return getAttributes().getParamAttr(ArgNo, Kind); + } + /// Return true if the data operand at index \p i has the attribute \p /// A. /// @@ -1709,9 +1729,12 @@ public: /// (\p i - 1) in the operand list. bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const; + /// Extract the alignment of the return value. + unsigned getRetAlignment() const { return Attrs.getRetAlignment(); } + /// Extract the alignment for a call or parameter (0=unknown). - unsigned getParamAlignment(unsigned i) const { - return Attrs.getParamAlignment(i); + unsigned getParamAlignment(unsigned ArgNo) const { + return Attrs.getParamAlignment(ArgNo); } /// Extract the number of dereferenceable bytes for a call or @@ -1726,11 +1749,9 @@ public: return Attrs.getDereferenceableOrNullBytes(i); } - /// @brief Determine if the parameter or return value is marked with NoAlias - /// attribute. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - return Attrs.hasAttribute(n, Attribute::NoAlias); + /// @brief Determine if the return value is marked with NoAlias attribute. + bool returnDoesNotAlias() const { + return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); } /// Return true if the call should not be treated as a call to a @@ -1821,7 +1842,7 @@ public: return false; // Be friendly and also check the callee. - return paramHasAttr(1, Attribute::StructRet); + return paramHasAttr(0, Attribute::StructRet); } /// Determine if any call argument is an aggregate passed by value. @@ -2249,6 +2270,19 @@ public: return Mask; } + /// Change values in a shuffle permute mask assuming the two vector operands + /// of length InVecNumElts have swapped position. + static void commuteShuffleMask(MutableArrayRef Mask, + unsigned InVecNumElts) { + for (int &Idx : Mask) { + if (Idx == -1) + continue; + Idx = Idx < (int)InVecNumElts ? Idx + InVecNumElts : Idx - InVecNumElts; + assert(Idx >= 0 && Idx < (int)InVecNumElts * 2 && + "shufflevector mask index out of range"); + } + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ShuffleVector; @@ -2276,6 +2310,7 @@ class ExtractValueInst : public UnaryInstruction { SmallVector Indices; ExtractValueInst(const ExtractValueInst &EVI); + /// Constructors - Create a extractvalue instruction with a base aggregate /// value and a list of indices. The first ctor can optionally insert before /// an existing instruction, the second appends the new instruction to the @@ -2288,9 +2323,6 @@ class ExtractValueInst : public UnaryInstruction { ArrayRef Idxs, const Twine &NameStr, BasicBlock *InsertAtEnd); - // allocate space for exactly one operand - void *operator new(size_t s) { return User::operator new(s, 1); } - void init(ArrayRef Idxs, const Twine &NameStr); protected: @@ -2321,7 +2353,8 @@ public: /// Null is returned if the indices are invalid for the specified type. static Type *getIndexedType(Type *Agg, ArrayRef Idxs); - typedef const unsigned* idx_iterator; + using idx_iterator = const unsigned*; + inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { @@ -2424,8 +2457,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - static InsertValueInst *Create(Value *Agg, Value *Val, ArrayRef Idxs, const Twine &NameStr = "", @@ -2443,7 +2474,8 @@ public: /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - typedef const unsigned* idx_iterator; + using idx_iterator = const unsigned*; + inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { @@ -2534,7 +2566,6 @@ class PHINode : public Instruction { unsigned ReservedSpace; PHINode(const PHINode &PN); - // allocate space for exactly zero operands explicit PHINode(Type *Ty, unsigned NumReservedValues, const Twine &NameStr = "", @@ -2553,12 +2584,6 @@ class PHINode : public Instruction { allocHungoffUses(ReservedSpace); } - void *operator new(size_t s) { - return User::operator new(s); - } - - void anchor() override; - protected: // Note: Instruction needs to be a friend here to call cloneImpl. friend class Instruction; @@ -2573,8 +2598,6 @@ protected: } public: - void *operator new(size_t, unsigned) = delete; - /// Constructors - NumReservedValues is a hint for the number of incoming /// edges that this phi node will have (use 0 if you really have no idea). static PHINode *Create(Type *Ty, unsigned NumReservedValues, @@ -2594,8 +2617,8 @@ public: // Block iterator interface. This provides access to the list of incoming // basic blocks, which parallels the list of incoming values. - typedef BasicBlock **block_iterator; - typedef BasicBlock * const *const_block_iterator; + using block_iterator = BasicBlock **; + using const_block_iterator = BasicBlock * const *; block_iterator block_begin() { Use::UserRef *ref = @@ -2644,9 +2667,11 @@ public: "All operands to PHI node must be the same type as the PHI node!"); setOperand(i, V); } + static unsigned getOperandNumForIncomingValue(unsigned i) { return i; } + static unsigned getIncomingValueNumForOperand(unsigned i) { return i; } @@ -2790,8 +2815,6 @@ protected: LandingPadInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - /// Constructors - NumReservedClauses is a hint for the number of incoming /// clauses that this landingpad will have (use 0 if you really have no idea). static LandingPadInst *Create(Type *RetTy, unsigned NumReservedClauses, @@ -2890,8 +2913,6 @@ protected: ReturnInst *cloneImpl() const; public: - ~ReturnInst() override; - static ReturnInst* Create(LLVMContext &C, Value *retVal = nullptr, Instruction *InsertBefore = nullptr) { return new(!!retVal) ReturnInst(C, retVal, InsertBefore); @@ -2925,9 +2946,15 @@ public: } private: - BasicBlock *getSuccessorV(unsigned idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned idx, BasicBlock *B) override; + friend TerminatorInst; + + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("ReturnInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *B) { + llvm_unreachable("ReturnInst has no successors!"); + } }; template <> @@ -3033,11 +3060,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - BasicBlock *getSuccessorV(unsigned idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned idx, BasicBlock *B) override; }; template <> @@ -3091,8 +3113,6 @@ protected: SwitchInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - // -2 static const unsigned DefaultPseudoIndex = static_cast(~0L-1); @@ -3111,7 +3131,7 @@ public: protected: // Expose the switch type we're parameterized with to the iterator. - typedef SwitchInstT SwitchInstType; + using SwitchInstType = SwitchInstT; SwitchInstT *SI; ptrdiff_t Index; @@ -3152,8 +3172,8 @@ public: } }; - typedef CaseHandleImpl - ConstCaseHandle; + using ConstCaseHandle = + CaseHandleImpl; class CaseHandle : public CaseHandleImpl { @@ -3180,7 +3200,7 @@ public: : public iterator_facade_base, std::random_access_iterator_tag, CaseHandleT> { - typedef typename CaseHandleT::SwitchInstType SwitchInstT; + using SwitchInstT = typename CaseHandleT::SwitchInstType; CaseHandleT Case; @@ -3242,8 +3262,8 @@ public: const CaseHandleT &operator*() const { return Case; } }; - typedef CaseIteratorImpl CaseIt; - typedef CaseIteratorImpl ConstCaseIt; + using CaseIt = CaseIteratorImpl; + using ConstCaseIt = CaseIteratorImpl; static SwitchInst *Create(Value *Value, BasicBlock *Default, unsigned NumCases, @@ -3397,11 +3417,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - BasicBlock *getSuccessorV(unsigned idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned idx, BasicBlock *B) override; }; template <> @@ -3451,8 +3466,6 @@ protected: IndirectBrInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - static IndirectBrInst *Create(Value *Address, unsigned NumDests, Instruction *InsertBefore = nullptr) { return new IndirectBrInst(Address, NumDests, InsertBefore); @@ -3502,11 +3515,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - BasicBlock *getSuccessorV(unsigned idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned idx, BasicBlock *B) override; }; template <> @@ -3627,6 +3635,7 @@ public: return new (Values) InvokeInst(Func, IfNormal, IfException, Args, None, Values, NameStr, InsertAtEnd); } + static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef Args, ArrayRef Bundles, @@ -3742,12 +3751,18 @@ public: /// adds the attribute to the list of attributes. void addAttribute(unsigned i, Attribute Attr); + /// Adds the attribute to the indicated argument + void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, Attribute::AttrKind Kind); /// removes the attribute from the list of attributes. void removeAttribute(unsigned i, StringRef Kind); + /// Removes the attribute from the given argument + void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); + /// adds the dereferenceable attribute to the list of attributes. void addDereferenceableAttr(unsigned i, uint64_t Bytes); @@ -3767,8 +3782,11 @@ public: return hasFnAttrImpl(Kind); } - /// Determine whether the call or the callee has the given attributes. - bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const; + /// Determine whether the return value has the given attribute. + bool hasRetAttr(Attribute::AttrKind Kind) const; + + /// Determine whether the argument or parameter has the given attribute. + bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const; /// Get the attribute of a given kind at a position. Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { @@ -3796,9 +3814,12 @@ public: /// (\p i - 1) in the operand list. bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const; + /// Extract the alignment of the return value. + unsigned getRetAlignment() const { return Attrs.getRetAlignment(); } + /// Extract the alignment for a call or parameter (0=unknown). - unsigned getParamAlignment(unsigned i) const { - return Attrs.getParamAlignment(i); + unsigned getParamAlignment(unsigned ArgNo) const { + return Attrs.getParamAlignment(ArgNo); } /// Extract the number of dereferenceable bytes for a call or @@ -3813,11 +3834,9 @@ public: return Attrs.getDereferenceableOrNullBytes(i); } - /// @brief Determine if the parameter or return value is marked with NoAlias - /// attribute. - /// @param n The parameter to check. 1 is the first parameter, 0 is the return - bool doesNotAlias(unsigned n) const { - return Attrs.hasAttribute(n, Attribute::NoAlias); + /// @brief Determine if the return value is marked with NoAlias attribute. + bool returnDoesNotAlias() const { + return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); } /// Return true if the call should not be treated as a call to a @@ -3902,7 +3921,7 @@ public: return false; // Be friendly and also check the callee. - return paramHasAttr(1, Attribute::StructRet); + return paramHasAttr(0, Attribute::StructRet); } /// Determine if any call argument is an aggregate passed by value. @@ -3974,10 +3993,6 @@ public: } private: - BasicBlock *getSuccessorV(unsigned idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned idx, BasicBlock *B) override; - template bool hasFnAttrImpl(AttrKind Kind) const { if (Attrs.hasAttribute(AttributeList::FunctionIndex, Kind)) return true; @@ -4073,9 +4088,15 @@ public: } private: - BasicBlock *getSuccessorV(unsigned idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned idx, BasicBlock *B) override; + friend TerminatorInst; + + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("ResumeInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *NewSucc) { + llvm_unreachable("ResumeInst has no successors!"); + } }; template <> @@ -4127,8 +4148,6 @@ protected: CatchSwitchInst *cloneImpl() const; public: - void *operator new(size_t, unsigned) = delete; - static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumHandlers, const Twine &NameStr = "", @@ -4180,13 +4199,14 @@ private: } public: - typedef std::pointer_to_unary_function DerefFnTy; - typedef mapped_iterator handler_iterator; - typedef iterator_range handler_range; - typedef std::pointer_to_unary_function - ConstDerefFnTy; - typedef mapped_iterator const_handler_iterator; - typedef iterator_range const_handler_range; + using DerefFnTy = std::pointer_to_unary_function; + using handler_iterator = mapped_iterator; + using handler_range = iterator_range; + using ConstDerefFnTy = + std::pointer_to_unary_function; + using const_handler_iterator = + mapped_iterator; + using const_handler_range = iterator_range; /// Returns an iterator that points to the first handler in CatchSwitchInst. handler_iterator handler_begin() { @@ -4254,11 +4274,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -private: - BasicBlock *getSuccessorV(unsigned Idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned Idx, BasicBlock *B) override; }; template <> @@ -4421,9 +4436,17 @@ public: } private: - BasicBlock *getSuccessorV(unsigned Idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned Idx, BasicBlock *B) override; + friend TerminatorInst; + + BasicBlock *getSuccessor(unsigned Idx) const { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + return getSuccessor(); + } + + void setSuccessor(unsigned Idx, BasicBlock *B) { + assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); + setSuccessor(B); + } }; template <> @@ -4509,9 +4532,17 @@ public: } private: - BasicBlock *getSuccessorV(unsigned Idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned Idx, BasicBlock *B) override; + friend TerminatorInst; + + BasicBlock *getSuccessor(unsigned Idx) const { + assert(Idx == 0); + return getUnwindDest(); + } + + void setSuccessor(unsigned Idx, BasicBlock *B) { + assert(Idx == 0); + setUnwindDest(B); + } // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. @@ -4551,8 +4582,6 @@ public: return User::operator new(s, 0); } - void *operator new(size_t, unsigned) = delete; - unsigned getNumSuccessors() const { return 0; } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -4564,9 +4593,15 @@ public: } private: - BasicBlock *getSuccessorV(unsigned idx) const override; - unsigned getNumSuccessorsV() const override; - void setSuccessorV(unsigned idx, BasicBlock *B) override; + friend TerminatorInst; + + BasicBlock *getSuccessor(unsigned idx) const { + llvm_unreachable("UnreachableInst has no successors!"); + } + + void setSuccessor(unsigned idx, BasicBlock *B) { + llvm_unreachable("UnreachableInst has no successors!"); + } }; //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index f69b5bfc0be2643a251232c882988fecaef78915..e0dd3ca7d01e39f7ea4fed33b49a259ac3bc4f3b 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -171,6 +171,7 @@ namespace llvm { ebStrict }; + bool isUnaryOp() const; RoundingMode getRoundingMode() const; ExceptionBehavior getExceptionBehavior() const; @@ -182,6 +183,18 @@ namespace llvm { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: return true; default: return false; } @@ -192,25 +205,91 @@ namespace llvm { }; /// This class represents atomic memcpy intrinsic - /// TODO: Integrate this class into MemIntrinsic hierarchy. - class ElementAtomicMemCpyInst : public IntrinsicInst { + /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is + /// C&P of all methods from that hierarchy + class ElementUnorderedAtomicMemCpyInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_SOURCE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + public: - Value *getRawDest() const { return getArgOperand(0); } - Value *getRawSource() const { return getArgOperand(1); } + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + /// Return the arguments to the instruction. + Value *getRawSource() const { + return const_cast(getArgOperand(ARG_SOURCE)); + } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } + + bool isVolatile() const { return false; } + + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } + + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + /// This is just like getRawSource, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getSource() const { return getRawSource()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } - Value *getNumElements() const { return getArgOperand(2); } - void setNumElements(Value *V) { setArgOperand(2, V); } + unsigned getSourceAddressSpace() const { + return cast(getRawSource()->getType())->getAddressSpace(); + } - uint64_t getSrcAlignment() const { return getParamAlignment(1); } - uint64_t getDstAlignment() const { return getParamAlignment(2); } + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setSource(Value *Ptr) { + assert(getRawSource()->getType() == Ptr->getType() && + "setSource called with pointer of wrong type!"); + setArgOperand(ARG_SOURCE, Ptr); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } - uint64_t getElementSizeInBytes() const { - Value *Arg = getArgOperand(3); - return cast(Arg)->getZExtValue(); + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); } static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memcpy_element_atomic; + return I->getIntrinsicID() == Intrinsic::memcpy_element_unordered_atomic; } static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h index 2f6bdf8ecf19faa8e672641aa9e72c0ffb0acc6f..fc79da7ae0e66b5acfd45b2dd085a523a87ce22d 100644 --- a/include/llvm/IR/Intrinsics.h +++ b/include/llvm/IR/Intrinsics.h @@ -100,7 +100,7 @@ namespace Intrinsic { Void, VarArg, MMX, Token, Metadata, Half, Float, Double, Integer, Vector, Pointer, Struct, Argument, ExtendArgument, TruncArgument, HalfVecArgument, - SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfPtrsToElt + SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt } Kind; union { @@ -119,25 +119,43 @@ namespace Intrinsic { AK_AnyVector, AK_AnyPointer }; + unsigned getArgumentNumber() const { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == PtrToElt || Kind == VecOfPtrsToElt); + Kind == PtrToElt); return Argument_Info >> 3; } ArgKind getArgumentKind() const { assert(Kind == Argument || Kind == ExtendArgument || Kind == TruncArgument || Kind == HalfVecArgument || - Kind == SameVecWidthArgument || Kind == PtrToArgument || - Kind == VecOfPtrsToElt); + Kind == SameVecWidthArgument || Kind == PtrToArgument); return (ArgKind)(Argument_Info & 7); } + // VecOfAnyPtrsToElt uses both an overloaded argument (for address space) + // and a reference argument (for matching vector width and element types) + unsigned getOverloadArgNumber() const { + assert(Kind == VecOfAnyPtrsToElt); + return Argument_Info >> 16; + } + unsigned getRefArgNumber() const { + assert(Kind == VecOfAnyPtrsToElt); + return Argument_Info & 0xFFFF; + } + static IITDescriptor get(IITDescriptorKind K, unsigned Field) { IITDescriptor Result = { K, { Field } }; return Result; } + + static IITDescriptor get(IITDescriptorKind K, unsigned short Hi, + unsigned short Lo) { + unsigned Field = Hi << 16 | Lo; + IITDescriptor Result = {K, {Field}}; + return Result; + } }; /// Return the IIT table descriptor for the specified intrinsic into an array diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 5b796e7dfcbfa3d15b067010de108d5e710c507d..45936a6e9b66d5cf43d6eec07c4c8b991008b781 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -98,6 +98,18 @@ def IntrNoDuplicate : IntrinsicProperty; // Parallels the convergent attribute on LLVM IR functions. def IntrConvergent : IntrinsicProperty; +// This property indicates that the intrinsic is safe to speculate. +def IntrSpeculatable : IntrinsicProperty; + +// This property can be used to override the 'has no other side effects' +// language of the IntrNoMem, IntrReadMem, IntrWriteMem, and IntrArgMemOnly +// intrinsic properties. By default, intrinsics are assumed to have side +// effects, so this property is only necessary if you have defined one of +// the memory properties listed above. +// For this property, 'side effects' has the same meaning as 'side effects' +// defined by the hasSideEffects property of the TableGen Instruction class. +def IntrHasSideEffects : IntrinsicProperty; + //===----------------------------------------------------------------------===// // Types used by intrinsics. //===----------------------------------------------------------------------===// @@ -143,7 +155,7 @@ class LLVMVectorSameWidth } class LLVMPointerTo : LLVMMatchType; class LLVMPointerToElt : LLVMMatchType; -class LLVMVectorOfPointersToElt : LLVMMatchType; +class LLVMVectorOfAnyPointersToElt : LLVMMatchType; // Match the type of another intrinsic parameter that is expected to be a // vector type, but change the element count to be half as many @@ -392,7 +404,7 @@ def int_memset : Intrinsic<[], // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. -let IntrProperties = [IntrNoMem] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_fma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; @@ -428,10 +440,12 @@ let IntrProperties = [IntrNoMem] in { } def int_minnum : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative] + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, Commutative] >; def int_maxnum : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative] + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, Commutative] >; // NOTE: these are internal interfaces. @@ -443,7 +457,7 @@ def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>; // Internal interface for object size checking def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem]>, + [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__builtin_object_size">; //===--------------- Constrained Floating Point Intrinsics ----------------===// @@ -475,8 +489,64 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + + // These intrinsics are sensitive to the rounding mode so we need constrained + // versions of each of them. When strict rounding and exception control are + // not required the non-constrained versions of these intrinsics should be + // used. + def int_experimental_constrained_sqrt : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_powi : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_i32_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_sin : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_cos : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_pow : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log10: Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log2 : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_exp2 : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_rint : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_nearbyint : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; } -// FIXME: Add intrinsic for fcmp, fptrunc, fpext, fptoui and fptosi. +// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi. +// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round? //===------------------------- Expect Intrinsics --------------------------===// @@ -488,7 +558,7 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, // // None of these intrinsics accesses memory at all. -let IntrProperties = [IntrNoMem] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; @@ -499,10 +569,11 @@ let IntrProperties = [IntrNoMem] in { //===------------------------ Debugger Intrinsics -------------------------===// // -// None of these intrinsics accesses memory at all...but that doesn't mean the -// optimizers can change them aggressively. Special handling needed in a few -// places. -let IntrProperties = [IntrNoMem] in { +// None of these intrinsics accesses memory at all...but that doesn't +// mean the optimizers can change them aggressively. Special handling +// needed in a few places. These synthetic intrinsics have no +// side-effects and just mark information about their operands. +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_dbg_declare : Intrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, @@ -580,24 +651,24 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], // Expose the carry flag from add operations on two integrals. def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; //===------------------------- Memory Use Markers -------------------------===// // @@ -615,9 +686,16 @@ def int_invariant_end : Intrinsic<[], llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<2>]>; +// invariant.group.barrier can't be marked with 'readnone' (IntrNoMem), +// because it would cause CSE of two barriers with the same argument. +// Readonly and argmemonly says that barrier only reads its argument and +// it can be CSE only if memory didn't change between 2 barriers call, +// which is valid. +// The argument also can't be marked with 'returned' attribute, because +// it would remove barrier. def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], - [IntrNoMem]>; + [IntrReadMem, IntrArgMemOnly]>; //===------------------------ Stackmap Intrinsics -------------------------===// // @@ -739,14 +817,14 @@ def int_masked_load : Intrinsic<[llvm_anyvector_ty], [IntrReadMem, IntrArgMemOnly]>; def int_masked_gather: Intrinsic<[llvm_anyvector_ty], - [LLVMVectorOfPointersToElt<0>, llvm_i32_ty, + [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrReadMem]>; def int_masked_scatter: Intrinsic<[], [llvm_anyvector_ty, - LLVMVectorOfPointersToElt<0>, llvm_i32_ty, + LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>]>; def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], @@ -773,14 +851,71 @@ def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty], def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; +// Xray intrinsics +//===----------------------------------------------------------------------===// +// Custom event logging for x-ray. +// Takes a pointer to a string and the length of the string. +def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], + [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>; +//===----------------------------------------------------------------------===// + //===------ Memory intrinsics with element-wise atomicity guarantees ------===// // -def int_memcpy_element_atomic : Intrinsic<[], - [llvm_anyptr_ty, llvm_anyptr_ty, - llvm_i64_ty, llvm_i32_ty], - [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, - WriteOnly<0>, ReadOnly<1>]>; +// @llvm.memcpy.element.unordered.atomic.*(dest, src, length, elementsize) +def int_memcpy_element_unordered_atomic + : Intrinsic<[], + [ + llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty + ], + [ + IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + ReadOnly<1> + ]>; + +//===------------------------ Reduction Intrinsics ------------------------===// +// +def int_experimental_vector_reduce_fadd : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyfloat_ty, + llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_fmul : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyfloat_ty, + llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_add : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_mul : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_and : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_or : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_xor : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_smax : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_smin : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_umax : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_umin : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_fmax : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_fmin : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; //===----- Intrinsics that are used to provide predicate information -----===// diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 5415c6b0d1518f0ea9ff8baff9b7a3279a51832c..8017223c4ab006696943cd916c7dcd9b7de4ae75 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// class AMDGPUReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; class AMDGPUReadPreloadRegisterIntrinsicNamed - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin; + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin; let TargetPrefix = "r600" in { @@ -47,7 +47,8 @@ def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">, // AS 7 is PARAM_I_ADDRESS, used for kernel arguments def int_r600_implicitarg_ptr : GCCBuiltin<"__builtin_r600_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + Intrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; def int_r600_rat_store_typed : // 1st parameter: Data @@ -57,15 +58,15 @@ def int_r600_rat_store_typed : GCCBuiltin<"__builtin_r600_rat_store_typed">; def int_r600_recipsqrt_ieee : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_r600_recipsqrt_clamped : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_r600_cube : Intrinsic< - [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem] + [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; } // End TargetPrefix = "r600" @@ -82,31 +83,51 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named def int_amdgcn_dispatch_ptr : GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + Intrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_queue_ptr : GCCBuiltin<"__builtin_amdgcn_queue_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + Intrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_kernarg_segment_ptr : GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + Intrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicitarg_ptr : GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + Intrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_groupstaticsize : GCCBuiltin<"__builtin_amdgcn_groupstaticsize">, - Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_dispatch_id : GCCBuiltin<"__builtin_amdgcn_dispatch_id">, - Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_implicit_buffer_ptr : GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">, - Intrinsic<[LLVMQualPointerType], [], [IntrNoMem]>; + Intrinsic<[LLVMQualPointerType], [], + [IntrNoMem, IntrSpeculatable]>; + +// Set EXEC to the 64-bit value given. +// This is always moved to the beginning of the basic block. +def int_amdgcn_init_exec : Intrinsic<[], + [llvm_i64_ty], // 64-bit literal constant + [IntrConvergent]>; + +// Set EXEC according to a thread count packed in an SGPR input: +// thread_count = (input >> bitoffset) & 0x7f; +// This is always moved to the beginning of the basic block. +def int_amdgcn_init_exec_from_input : Intrinsic<[], + [llvm_i32_ty, // 32-bit SGPR input + llvm_i32_ty], // bit offset of the thread count + [IntrConvergent]>; + //===----------------------------------------------------------------------===// // Instruction Intrinsics @@ -135,115 +156,129 @@ def int_amdgcn_div_scale : Intrinsic< // second. (0 = first, 1 = second). [llvm_anyfloat_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_div_fmas : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], - [IntrNoMem] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_div_fixup : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem] + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_trig_preop : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sin : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cos : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_log_clamp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rcp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] + Intrinsic<[llvm_float_ty], [llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">, Intrinsic< - [llvm_float_ty], [llvm_float_ty], [IntrNoMem] + [llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_rsq_clamp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; def int_amdgcn_ldexp : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_frexp_mant : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_frexp_exp : Intrinsic< - [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem] + [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem, IntrSpeculatable] >; // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0 // and always uses rtz, so is not suitable for implementing the OpenCL // fract function. It should be ok on VI. def int_amdgcn_fract : Intrinsic< - [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cvt_pkrtz : Intrinsic< - [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] + [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_class : Intrinsic< - [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem] + [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_fmed3 : GCCBuiltin<"__builtin_amdgcn_fmed3">, Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem] + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubeid : GCCBuiltin<"__builtin_amdgcn_cubeid">, Intrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] + [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubema : GCCBuiltin<"__builtin_amdgcn_cubema">, Intrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] + [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubesc : GCCBuiltin<"__builtin_amdgcn_cubesc">, Intrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] + [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">, Intrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] + [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; // v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz // should be used. def int_amdgcn_sffbh : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable] +>; // Fields should mirror atomicrmw @@ -527,7 +562,19 @@ def int_amdgcn_s_decperflevel : def int_amdgcn_s_getreg : GCCBuiltin<"__builtin_amdgcn_s_getreg">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrReadMem, IntrSpeculatable] +>; + +// int_amdgcn_s_getpc is provided to allow a specific style of position +// independent code to determine the high part of its address when it is +// known (through convention) that the code and any data of interest does +// not cross a 4Gb address boundary. Use for any other purpose may not +// produce the desired results as optimizations may cause code movement, +// especially as we explicitly use IntrNoMem to allow optimizations. +def int_amdgcn_s_getpc : + GCCBuiltin<"__builtin_amdgcn_s_getpc">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; // __builtin_amdgcn_interp_mov , , , // param values: 0 = P10, 1 = P20, 2 = P0 @@ -535,23 +582,24 @@ def int_amdgcn_interp_mov : GCCBuiltin<"__builtin_amdgcn_interp_mov">, Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; // __builtin_amdgcn_interp_p1 , , , +// This intrinsic reads from lds, but the memory values are constant, +// so it behaves like IntrNoMem. def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; // This intrinsic reads from lds, but the memory - // values are constant, so it behaves like IntrNoMem. + [IntrNoMem, IntrSpeculatable]>; // __builtin_amdgcn_interp_p2 , , , , def int_amdgcn_interp_p2 : GCCBuiltin<"__builtin_amdgcn_interp_p2">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is - // IntrNoMem. + [IntrNoMem, IntrSpeculatable]>; + // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. // Pixel shaders only: whether the current pixel is live (i.e. not a helper // invocation for derivative computation). @@ -574,48 +622,68 @@ def int_amdgcn_ds_swizzle : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_lerp : GCCBuiltin<"__builtin_amdgcn_lerp">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_sad_u8 : GCCBuiltin<"__builtin_amdgcn_sad_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_msad_u8 : GCCBuiltin<"__builtin_amdgcn_msad_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_sad_hi_u8 : GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_sad_u16 : GCCBuiltin<"__builtin_amdgcn_sad_u16">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_qsad_pk_u16_u8 : GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_mqsad_pk_u16_u8 : GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_mqsad_u32_u8 : GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">, - Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_cvt_pk_u8_f32 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; def int_amdgcn_icmp : Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty], @@ -629,10 +697,22 @@ def int_amdgcn_readfirstlane : GCCBuiltin<"__builtin_amdgcn_readfirstlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; +// The lane argument must be uniform across the currently active threads of the +// current wave. Otherwise, the result is undefined. def int_amdgcn_readlane : GCCBuiltin<"__builtin_amdgcn_readlane">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; +def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + +def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] +>; + //===----------------------------------------------------------------------===// // CI+ Intrinsics //===----------------------------------------------------------------------===// @@ -714,6 +794,7 @@ def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>; // Emit 2.5 ulp, no denormal division. Should only be inserted by // pass based on !fpmath metadata. def int_amdgcn_fdiv_fast : Intrinsic< - [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] + [llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; } diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index 18ed24be56d4e59f126deec4734432e726dc46e4..fe3861301689a6850eb83e3a80b4f2b62671e5d0 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -22,12 +22,26 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". // and return value are essentially chains, used to force ordering during ISel. def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// 16-bit multiplications +def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + //===----------------------------------------------------------------------===// // Saturating Arithmetic def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + [Commutative, IntrNoMem]>; def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">, @@ -35,6 +49,176 @@ def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">, def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// Accumulating multiplications +def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +// Parallel 16-bit saturation +def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// Packing and unpacking +def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +// Parallel selection, reads the GE flags. +def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; + +// Parallel 8-bit addition and subtraction +def int_arm_qadd8 : GCCBuiltin<"__builtin_arm_qadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qsub8 : GCCBuiltin<"__builtin_arm_qsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// Writes to the GE bits. +def int_arm_sadd8 : GCCBuiltin<"__builtin_arm_sadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_shadd8 : GCCBuiltin<"__builtin_arm_shadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shsub8 : GCCBuiltin<"__builtin_arm_shsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// Writes to the GE bits. +def int_arm_ssub8 : GCCBuiltin<"__builtin_arm_ssub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// Writes to the GE bits. +def int_arm_uadd8 : GCCBuiltin<"__builtin_arm_uadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_uhadd8 : GCCBuiltin<"__builtin_arm_uhadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhsub8 : GCCBuiltin<"__builtin_arm_uhsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqadd8 : GCCBuiltin<"__builtin_arm_uqadd8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqsub8 : GCCBuiltin<"__builtin_arm_uqsub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// Writes to the GE bits. +def int_arm_usub8 : GCCBuiltin<"__builtin_arm_usub8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +// Sum of 8-bit absolute differences +def int_arm_usad8 : GCCBuiltin<"__builtin_arm_usad8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_usada8 : GCCBuiltin<"__builtin_arm_usada8">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +// Parallel 16-bit addition and subtraction +def int_arm_qadd16 : GCCBuiltin<"__builtin_arm_qadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qasx : GCCBuiltin<"__builtin_arm_qasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qsax : GCCBuiltin<"__builtin_arm_qsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_qsub16 : GCCBuiltin<"__builtin_arm_qsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// Writes to the GE bits. +def int_arm_sadd16 : GCCBuiltin<"__builtin_arm_sadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// Writes to the GE bits. +def int_arm_sasx : GCCBuiltin<"__builtin_arm_sasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_shadd16 : GCCBuiltin<"__builtin_arm_shadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shasx : GCCBuiltin<"__builtin_arm_shasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shsax : GCCBuiltin<"__builtin_arm_shsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_shsub16 : GCCBuiltin<"__builtin_arm_shsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// Writes to the GE bits. +def int_arm_ssax : GCCBuiltin<"__builtin_arm_ssax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// Writes to the GE bits. +def int_arm_ssub16 : GCCBuiltin<"__builtin_arm_ssub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// Writes to the GE bits. +def int_arm_uadd16 : GCCBuiltin<"__builtin_arm_uadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// Writes to the GE bits. +def int_arm_uasx : GCCBuiltin<"__builtin_arm_uasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_uhadd16 : GCCBuiltin<"__builtin_arm_uhadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhasx : GCCBuiltin<"__builtin_arm_uhasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhsax : GCCBuiltin<"__builtin_arm_uhsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uhsub16 : GCCBuiltin<"__builtin_arm_uhsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqadd16 : GCCBuiltin<"__builtin_arm_uqadd16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqasx : GCCBuiltin<"__builtin_arm_uqasx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqsax : GCCBuiltin<"__builtin_arm_uqsax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_uqsub16 : GCCBuiltin<"__builtin_arm_uqsub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +// Writes to the GE bits. +def int_arm_usax : GCCBuiltin<"__builtin_arm_usax">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +// Writes to the GE bits. +def int_arm_usub16 : GCCBuiltin<"__builtin_arm_usub16">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + +// Parallel 16-bit multiplication +def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; +def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; +def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + + //===----------------------------------------------------------------------===// // Load, Store and Clear exclusive diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index 64240a929782607eb3682785b108feb5eaeac39b..6321bb81b8cbc4e6c14c238dd7882a3c62a14aa6 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -1132,4 +1132,6 @@ def int_ppc_tsuspend : GCCBuiltin<"__builtin_tsuspend">, def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">, Intrinsic<[llvm_i64_ty], [], []>; + +def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>; } diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index d3cce634479893136a941e8609142e8ca2ab8770..1c466e73eb1bbeeae8ca14eca78e5d15210534be 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -785,12 +785,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem, Commutative]>; } -// Cacheability support ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa">, - Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>; -} - // Test instruction with bitwise comparison. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, @@ -2346,8 +2340,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty], [IntrNoMem, Commutative]>; - def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">, - Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>; } //===----------------------------------------------------------------------===// @@ -3228,6 +3220,29 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem]>; } +//===----------------------------------------------------------------------===// +// LWP +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_llwpcb : + GCCBuiltin<"__builtin_ia32_llwpcb">, + Intrinsic<[], [llvm_ptr_ty], []>; + def int_x86_slwpcb : + GCCBuiltin<"__builtin_ia32_slwpcb">, + Intrinsic<[llvm_ptr_ty], [], []>; + def int_x86_lwpins32 : + GCCBuiltin<"__builtin_ia32_lwpins32">, + Intrinsic<[llvm_i8_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_lwpins64 : + GCCBuiltin<"__builtin_ia32_lwpins64">, + Intrinsic<[llvm_i8_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_lwpval32 : + GCCBuiltin<"__builtin_ia32_lwpval32">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_x86_lwpval64 : + GCCBuiltin<"__builtin_ia32_lwpval64">, + Intrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty], []>; +} + //===----------------------------------------------------------------------===// // MMX @@ -6345,10 +6360,6 @@ let TargetPrefix = "x86" in { GCCBuiltin<"__builtin_ia32_cmpsd_mask">, Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_movntdqa : - GCCBuiltin<"__builtin_ia32_movntdqa512">, - Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index d13d5ddaeb3c6665f7f5873896ee299a0a8f2b47..ad011fb72e6a1a05f2e68e5b5f47d5b74fc832f5 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -1,4 +1,4 @@ -//===-- llvm/LLVMContext.h - Class for managing "global" state --*- C++ -*-===// +//===- llvm/LLVMContext.h - Class for managing "global" state ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -37,7 +37,9 @@ class StringRef; class Twine; namespace yaml { + class Output; + } // end namespace yaml /// This is an important class for using LLVM in a threaded context. It @@ -134,17 +136,17 @@ public: void enableDebugTypeODRUniquing(); void disableDebugTypeODRUniquing(); - typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context, - unsigned LocCookie); + using InlineAsmDiagHandlerTy = void (*)(const SMDiagnostic&, void *Context, + unsigned LocCookie); /// Defines the type of a diagnostic handler. /// \see LLVMContext::setDiagnosticHandler. /// \see LLVMContext::diagnose. - typedef void (*DiagnosticHandlerTy)(const DiagnosticInfo &DI, void *Context); + using DiagnosticHandlerTy = void (*)(const DiagnosticInfo &DI, void *Context); /// Defines the type of a yield callback. /// \see LLVMContext::setYieldCallback. - typedef void (*YieldCallbackTy)(LLVMContext *Context, void *OpaqueHandle); + using YieldCallbackTy = void (*)(LLVMContext *Context, void *OpaqueHandle); /// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked /// when problems with inline asm are detected by the backend. The first diff --git a/include/llvm/IR/LegacyPassManager.h b/include/llvm/IR/LegacyPassManager.h index 5257a0eed488cd9b1574a146e8e5d91a9f3bb8f6..9a376a151505e0770a663dd5b6fe5890c92e7f37 100644 --- a/include/llvm/IR/LegacyPassManager.h +++ b/include/llvm/IR/LegacyPassManager.h @@ -98,6 +98,9 @@ private: // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_STDCXX_CONVERSION_FUNCTIONS(legacy::PassManagerBase, LLVMPassManagerRef) +/// If -time-passes has been specified, report the timings immediately and then +/// reset the timers to zero. +void reportAndResetTimings(); } // End llvm namespace #endif diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 0647e4253d3cf63c086ab00d40797f5db4956b3e..d538c2595393e3dfd4c404b841d91d7991fc9e5a 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -19,16 +19,17 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" -#include "llvm/ADT/ilist_node.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Constant.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" +#include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include @@ -78,7 +79,7 @@ public: protected: Metadata(unsigned ID, StorageType Storage) : SubclassID(ID), Storage(Storage), SubclassData16(0), SubclassData32(0) { - static_assert(sizeof(*this) == 8, "Metdata fields poorly packed"); + static_assert(sizeof(*this) == 8, "Metadata fields poorly packed"); } ~Metadata() = default; @@ -133,6 +134,14 @@ public: /// @} }; +// Create wrappers for C Binding types (see CBindingWrapping.h). +DEFINE_ISA_CONVERSION_FUNCTIONS(Metadata, LLVMMetadataRef) + +// Specialized opaque metadata conversions. +inline Metadata **unwrap(LLVMMetadataRef *MDs) { + return reinterpret_cast(MDs); +} + #define HANDLE_METADATA(CLASS) class CLASS; #include "llvm/IR/Metadata.def" @@ -165,12 +174,13 @@ class MetadataAsValue : public Value { Metadata *MD; MetadataAsValue(Type *Ty, Metadata *MD); - ~MetadataAsValue() override; /// \brief Drop use of metadata (during teardown). void dropUse() { MD = nullptr; } public: + ~MetadataAsValue(); + static MetadataAsValue *get(LLVMContext &Context, Metadata *MD); static MetadataAsValue *getIfExists(LLVMContext &Context, Metadata *MD); Metadata *getMetadata() const { return MD; } @@ -1213,6 +1223,7 @@ public: // FIXME: Fix callers and remove condition on N. unsigned size() const { return N ? N->getNumOperands() : 0u; } + bool empty() const { return N ? N->getNumOperands() == 0 : true; } T *operator[](unsigned I) const { return cast_or_null(N->getOperand(I)); } // FIXME: Fix callers and remove condition on N. diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 70c57cf90addcb022df17c63e43b1a2a90bb8f93..d47d82a57bffb9c50ff30b7eefcd9ed1454f9670 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -1,4 +1,4 @@ -//===-- llvm/Module.h - C++ class to represent a VM module ------*- C++ -*-===// +//===- llvm/Module.h - C++ class to represent a VM module -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,7 +15,12 @@ #ifndef LLVM_IR_MODULE_H #define LLVM_IR_MODULE_H +#include "llvm-c/Types.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -23,20 +28,26 @@ #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/DataTypes.h" +#include +#include +#include +#include +#include +#include namespace llvm { -template class Optional; + class Error; class FunctionType; class GVMaterializer; class LLVMContext; class MemoryBuffer; class RandomNumberGenerator; -class StructType; template class SmallPtrSetImpl; +class StructType; /// A Module instance is used to store all the information related to an /// LLVM module. Modules are the top level container of all other LLVM @@ -54,47 +65,47 @@ class Module { /// @{ public: /// The type for the list of global variables. - typedef SymbolTableList GlobalListType; + using GlobalListType = SymbolTableList; /// The type for the list of functions. - typedef SymbolTableList FunctionListType; + using FunctionListType = SymbolTableList; /// The type for the list of aliases. - typedef SymbolTableList AliasListType; + using AliasListType = SymbolTableList; /// The type for the list of ifuncs. - typedef SymbolTableList IFuncListType; + using IFuncListType = SymbolTableList; /// The type for the list of named metadata. - typedef ilist NamedMDListType; + using NamedMDListType = ilist; /// The type of the comdat "symbol" table. - typedef StringMap ComdatSymTabType; + using ComdatSymTabType = StringMap; /// The Global Variable iterator. - typedef GlobalListType::iterator global_iterator; + using global_iterator = GlobalListType::iterator; /// The Global Variable constant iterator. - typedef GlobalListType::const_iterator const_global_iterator; + using const_global_iterator = GlobalListType::const_iterator; /// The Function iterators. - typedef FunctionListType::iterator iterator; + using iterator = FunctionListType::iterator; /// The Function constant iterator - typedef FunctionListType::const_iterator const_iterator; + using const_iterator = FunctionListType::const_iterator; /// The Function reverse iterator. - typedef FunctionListType::reverse_iterator reverse_iterator; + using reverse_iterator = FunctionListType::reverse_iterator; /// The Function constant reverse iterator. - typedef FunctionListType::const_reverse_iterator const_reverse_iterator; + using const_reverse_iterator = FunctionListType::const_reverse_iterator; /// The Global Alias iterators. - typedef AliasListType::iterator alias_iterator; + using alias_iterator = AliasListType::iterator; /// The Global Alias constant iterator - typedef AliasListType::const_iterator const_alias_iterator; + using const_alias_iterator = AliasListType::const_iterator; /// The Global IFunc iterators. - typedef IFuncListType::iterator ifunc_iterator; + using ifunc_iterator = IFuncListType::iterator; /// The Global IFunc constant iterator - typedef IFuncListType::const_iterator const_ifunc_iterator; + using const_ifunc_iterator = IFuncListType::const_iterator; /// The named metadata iterators. - typedef NamedMDListType::iterator named_metadata_iterator; + using named_metadata_iterator = NamedMDListType::iterator; /// The named metadata constant iterators. - typedef NamedMDListType::const_iterator const_named_metadata_iterator; + using const_named_metadata_iterator = NamedMDListType::const_iterator; /// This enumeration defines the supported behaviors of module flags. enum ModFlagBehavior { @@ -128,9 +139,12 @@ public: /// during the append operation. AppendUnique = 6, + /// Takes the max of the two values, which are required to be integers. + Max = 7, + // Markers: ModFlagBehaviorFirstVal = Error, - ModFlagBehaviorLastVal = AppendUnique + ModFlagBehaviorLastVal = Max }; /// Checks if Metadata represents a valid ModFlagBehavior, and stores the @@ -141,6 +155,7 @@ public: ModFlagBehavior Behavior; MDString *Key; Metadata *Val; + ModuleFlagEntry(ModFlagBehavior B, MDString *K, Metadata *V) : Behavior(B), Key(K), Val(V) {} }; @@ -319,7 +334,7 @@ public: /// exist, add a prototype for the function and return it. This function /// guarantees to return a constant of pointer to the specified function type /// or a ConstantExpr BitCast of that type if the named function has a - /// different type. This version of the method takes a null terminated list of + /// different type. This version of the method takes a list of /// function arguments, which makes it easier for clients to use. template Constant *getOrInsertFunction(StringRef Name, @@ -483,9 +498,11 @@ public: const GlobalListType &getGlobalList() const { return GlobalList; } /// Get the Module's list of global variables. GlobalListType &getGlobalList() { return GlobalList; } + static GlobalListType Module::*getSublistAccess(GlobalVariable*) { return &Module::GlobalList; } + /// Get the Module's list of functions (constant). const FunctionListType &getFunctionList() const { return FunctionList; } /// Get the Module's list of functions. @@ -493,31 +510,39 @@ public: static FunctionListType Module::*getSublistAccess(Function*) { return &Module::FunctionList; } + /// Get the Module's list of aliases (constant). const AliasListType &getAliasList() const { return AliasList; } /// Get the Module's list of aliases. AliasListType &getAliasList() { return AliasList; } + static AliasListType Module::*getSublistAccess(GlobalAlias*) { return &Module::AliasList; } + /// Get the Module's list of ifuncs (constant). const IFuncListType &getIFuncList() const { return IFuncList; } /// Get the Module's list of ifuncs. IFuncListType &getIFuncList() { return IFuncList; } + static IFuncListType Module::*getSublistAccess(GlobalIFunc*) { return &Module::IFuncList; } + /// Get the Module's list of named metadata (constant). const NamedMDListType &getNamedMDList() const { return NamedMDList; } /// Get the Module's list of named metadata. NamedMDListType &getNamedMDList() { return NamedMDList; } + static NamedMDListType Module::*getSublistAccess(NamedMDNode*) { return &Module::NamedMDList; } + /// Get the symbol table of global variable and function identifiers const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; } /// Get the Module's symbol table of global variable and function identifiers. ValueSymbolTable &getValueSymbolTable() { return *ValSymTab; } + /// Get the Module's symbol table for COMDATs (constant). const ComdatSymTabType &getComdatSymbolTable() const { return ComdatSymTab; } /// Get the Module's symbol table for COMDATs. @@ -602,11 +627,11 @@ public: /// @name Convenience iterators /// @{ - typedef concat_iterator - global_object_iterator; - typedef concat_iterator - const_global_object_iterator; + using global_object_iterator = + concat_iterator; + using const_global_object_iterator = + concat_iterator; iterator_range global_objects() { return concat(functions(), globals()); @@ -627,13 +652,12 @@ public: return global_objects().end(); } - typedef concat_iterator - global_value_iterator; - typedef concat_iterator - const_global_value_iterator; + using global_value_iterator = + concat_iterator; + using const_global_value_iterator = + concat_iterator; iterator_range global_values() { return concat(functions(), globals(), aliases(), ifuncs()); @@ -682,28 +706,35 @@ public: : public std::iterator { NamedMDNode *CUs; unsigned Idx; + void SkipNoDebugCUs(); + public: explicit debug_compile_units_iterator(NamedMDNode *CUs, unsigned Idx) : CUs(CUs), Idx(Idx) { SkipNoDebugCUs(); } + debug_compile_units_iterator &operator++() { ++Idx; SkipNoDebugCUs(); return *this; } + debug_compile_units_iterator operator++(int) { debug_compile_units_iterator T(*this); ++Idx; return T; } + bool operator==(const debug_compile_units_iterator &I) const { return Idx == I.Idx; } + bool operator!=(const debug_compile_units_iterator &I) const { return Idx != I.Idx; } + DICompileUnit *operator*() const; DICompileUnit *operator->() const; }; @@ -833,6 +864,6 @@ inline Module *unwrap(LLVMModuleProviderRef MP) { return reinterpret_cast(MP); } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_IR_MODULE_H diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index 09f6c1897009529392c6bd6e114d09825e3c9a8a..b43d588658628bd24e83cfe4a95b3a9d905488f2 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -1,4 +1,4 @@ -//===-- llvm/ModuleSummaryIndex.h - Module Summary Index --------*- C++ -*-===// +//===- llvm/ModuleSummaryIndex.h - Module Summary Index ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,21 +16,34 @@ #ifndef LLVM_IR_MODULESUMMARYINDEX_H #define LLVM_IR_MODULESUMMARYINDEX_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" - +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace llvm { namespace yaml { + template struct MappingTraits; -} + +} // end namespace yaml /// \brief Class to accumulate and hold information about a callee. struct CalleeInfo { @@ -45,58 +58,59 @@ struct CalleeInfo { } }; -/// Struct to hold value either by GUID or GlobalValue*. Values in combined -/// indexes as well as indirect calls are GUIDs, all others are GlobalValues. +class GlobalValueSummary; + +using GlobalValueSummaryList = std::vector>; + +struct GlobalValueSummaryInfo { + /// The GlobalValue corresponding to this summary. This is only used in + /// per-module summaries. + const GlobalValue *GV = nullptr; + + /// List of global value summary structures for a particular value held + /// in the GlobalValueMap. Requires a vector in the case of multiple + /// COMDAT values of the same name. + GlobalValueSummaryList SummaryList; +}; + +/// Map from global value GUID to corresponding summary structures. Use a +/// std::map rather than a DenseMap so that pointers to the map's value_type +/// (which are used by ValueInfo) are not invalidated by insertion. Also it will +/// likely incur less overhead, as the value type is not very small and the size +/// of the map is unknown, resulting in inefficiencies due to repeated +/// insertions and resizing. +using GlobalValueSummaryMapTy = + std::map; + +/// Struct that holds a reference to a particular GUID in a global value +/// summary. struct ValueInfo { - /// The value representation used in this instance. - enum ValueInfoKind { - VI_GUID, - VI_Value, - }; + const GlobalValueSummaryMapTy::value_type *Ref = nullptr; - /// Union of the two possible value types. - union ValueUnion { - GlobalValue::GUID Id; - const GlobalValue *GV; - ValueUnion(GlobalValue::GUID Id) : Id(Id) {} - ValueUnion(const GlobalValue *GV) : GV(GV) {} - }; + ValueInfo() = default; + ValueInfo(const GlobalValueSummaryMapTy::value_type *Ref) : Ref(Ref) {} - /// The value being represented. - ValueUnion TheValue; - /// The value representation. - ValueInfoKind Kind; - /// Constructor for a GUID value - ValueInfo(GlobalValue::GUID Id = 0) : TheValue(Id), Kind(VI_GUID) {} - /// Constructor for a GlobalValue* value - ValueInfo(const GlobalValue *V) : TheValue(V), Kind(VI_Value) {} - /// Accessor for GUID value - GlobalValue::GUID getGUID() const { - assert(Kind == VI_GUID && "Not a GUID type"); - return TheValue.Id; - } - /// Accessor for GlobalValue* value - const GlobalValue *getValue() const { - assert(Kind == VI_Value && "Not a Value type"); - return TheValue.GV; - } - bool isGUID() const { return Kind == VI_GUID; } + operator bool() const { return Ref; } + + GlobalValue::GUID getGUID() const { return Ref->first; } + const GlobalValue *getValue() const { return Ref->second.GV; } + + ArrayRef> getSummaryList() const { + return Ref->second.SummaryList; + } }; template <> struct DenseMapInfo { - static inline ValueInfo getEmptyKey() { return ValueInfo((GlobalValue *)-1); } - static inline ValueInfo getTombstoneKey() { - return ValueInfo((GlobalValue *)-2); - } - static bool isEqual(ValueInfo L, ValueInfo R) { - if (L.isGUID() != R.isGUID()) - return false; - return L.isGUID() ? (L.getGUID() == R.getGUID()) - : (L.getValue() == R.getValue()); + static inline ValueInfo getEmptyKey() { + return ValueInfo((GlobalValueSummaryMapTy::value_type *)-1); } - static unsigned getHashValue(ValueInfo I) { - return I.isGUID() ? I.getGUID() : (uintptr_t)I.getValue(); + + static inline ValueInfo getTombstoneKey() { + return ValueInfo((GlobalValueSummaryMapTy::value_type *)-2); } + + static bool isEqual(ValueInfo L, ValueInfo R) { return L.Ref == R.Ref; } + static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.Ref; } }; /// \brief Function and variable summary information to aid decisions and @@ -121,16 +135,18 @@ public: /// be renamed or references something that can't be renamed). unsigned NotEligibleToImport : 1; - /// Indicate that the global value must be considered a live root for - /// index-based liveness analysis. Used for special LLVM values such as - /// llvm.global_ctors that the linker does not know about. - unsigned LiveRoot : 1; + /// In per-module summary, indicate that the global value must be considered + /// a live root for index-based liveness analysis. Used for special LLVM + /// values such as llvm.global_ctors that the linker does not know about. + /// + /// In combined summary, indicate that the global value is live. + unsigned Live : 1; /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, - bool NotEligibleToImport, bool LiveRoot) + bool NotEligibleToImport, bool Live) : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport), - LiveRoot(LiveRoot) {} + Live(Live) {} }; private: @@ -142,7 +158,7 @@ private: /// This is the hash of the name of the symbol in the original file. It is /// identical to the GUID for global symbols, but differs for local since the /// GUID includes the module level id in the hash. - GlobalValue::GUID OriginalName; + GlobalValue::GUID OriginalName = 0; /// \brief Path of module IR containing value's definition, used to locate /// module during importing. @@ -159,10 +175,11 @@ private: /// are listed in the derived FunctionSummary object. std::vector RefEdgeList; + bool isLive() const { return Flags.Live; } + protected: - /// GlobalValueSummary constructor. GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector Refs) - : Kind(K), Flags(Flags), OriginalName(0), RefEdgeList(std::move(Refs)) {} + : Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {} public: virtual ~GlobalValueSummary() = default; @@ -201,19 +218,17 @@ public: /// Return true if this global value can't be imported. bool notEligibleToImport() const { return Flags.NotEligibleToImport; } - /// Return true if this global value must be considered a root for live - /// value analysis on the index. - bool liveRoot() const { return Flags.LiveRoot; } - - /// Flag that this global value must be considered a root for live - /// value analysis on the index. - void setLiveRoot() { Flags.LiveRoot = true; } + void setLive(bool Live) { Flags.Live = Live; } /// Flag that this global value cannot be imported. void setNotEligibleToImport() { Flags.NotEligibleToImport = true; } /// Return the list of values referenced by this global value definition. ArrayRef refs() const { return RefEdgeList; } + + friend class ModuleSummaryIndex; + friend void computeDeadSymbols(class ModuleSummaryIndex &, + const DenseSet &); }; /// \brief Alias summary information. @@ -221,7 +236,6 @@ class AliasSummary : public GlobalValueSummary { GlobalValueSummary *AliaseeSummary; public: - /// Summary constructors. AliasSummary(GVFlags Flags, std::vector Refs) : GlobalValueSummary(AliasKind, Flags, std::move(Refs)) {} @@ -248,7 +262,7 @@ public: class FunctionSummary : public GlobalValueSummary { public: /// call edge pair. - typedef std::pair EdgeTy; + using EdgeTy = std::pair; /// An "identifier" for a virtual function. This contains the type identifier /// represented as a GUID and the offset from the address point to the virtual @@ -297,7 +311,6 @@ private: std::unique_ptr TIdInfo; public: - /// Summary constructors. FunctionSummary(GVFlags Flags, unsigned NumInsts, std::vector Refs, std::vector CGEdges, std::vector TypeTests, @@ -383,12 +396,15 @@ public: template <> struct DenseMapInfo { static FunctionSummary::VFuncId getEmptyKey() { return {0, uint64_t(-1)}; } + static FunctionSummary::VFuncId getTombstoneKey() { return {0, uint64_t(-2)}; } + static bool isEqual(FunctionSummary::VFuncId L, FunctionSummary::VFuncId R) { return L.GUID == R.GUID && L.Offset == R.Offset; } + static unsigned getHashValue(FunctionSummary::VFuncId I) { return I.GUID; } }; @@ -396,14 +412,17 @@ template <> struct DenseMapInfo { static FunctionSummary::ConstVCall getEmptyKey() { return {{0, uint64_t(-1)}, {}}; } + static FunctionSummary::ConstVCall getTombstoneKey() { return {{0, uint64_t(-2)}, {}}; } + static bool isEqual(FunctionSummary::ConstVCall L, FunctionSummary::ConstVCall R) { return DenseMapInfo::isEqual(L.VFunc, R.VFunc) && L.Args == R.Args; } + static unsigned getHashValue(FunctionSummary::ConstVCall I) { return I.VFunc.GUID; } @@ -418,7 +437,6 @@ template <> struct DenseMapInfo { class GlobalVarSummary : public GlobalValueSummary { public: - /// Summary constructors. GlobalVarSummary(GVFlags Flags, std::vector Refs) : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)) {} @@ -485,33 +503,20 @@ struct TypeIdSummary { }; /// 160 bits SHA1 -typedef std::array ModuleHash; - -/// List of global value summary structures for a particular value held -/// in the GlobalValueMap. Requires a vector in the case of multiple -/// COMDAT values of the same name. -typedef std::vector> GlobalValueSummaryList; - -/// Map from global value GUID to corresponding summary structures. -/// Use a std::map rather than a DenseMap since it will likely incur -/// less overhead, as the value type is not very small and the size -/// of the map is unknown, resulting in inefficiencies due to repeated -/// insertions and resizing. -typedef std::map - GlobalValueSummaryMapTy; +using ModuleHash = std::array; /// Type used for iterating through the global value summary map. -typedef GlobalValueSummaryMapTy::const_iterator const_gvsummary_iterator; -typedef GlobalValueSummaryMapTy::iterator gvsummary_iterator; +using const_gvsummary_iterator = GlobalValueSummaryMapTy::const_iterator; +using gvsummary_iterator = GlobalValueSummaryMapTy::iterator; /// String table to hold/own module path strings, which additionally holds the /// module ID assigned to each module during the plugin step, as well as a hash /// of the module. The StringMap makes a copy of and owns inserted strings. -typedef StringMap> ModulePathStringTableTy; +using ModulePathStringTableTy = StringMap>; /// Map of global value GUID to its summary, used to identify values defined in /// a particular module, and provide efficient access to their summary. -typedef std::map GVSummaryMapTy; +using GVSummaryMapTy = std::map; /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. @@ -533,9 +538,22 @@ private: /// GUIDs, it will be mapped to 0. std::map OidGuidMap; + /// Indicates that summary-based GlobalValue GC has run, and values with + /// GVFlags::Live==false are really dead. Otherwise, all values must be + /// considered live. + bool WithGlobalValueDeadStripping = false; + + std::set CfiFunctionDefs; + std::set CfiFunctionDecls; + // YAML I/O support. friend yaml::MappingTraits; + GlobalValueSummaryMapTy::value_type * + getOrInsertValuePtr(GlobalValue::GUID GUID) { + return &*GlobalValueMap.emplace(GUID, GlobalValueSummaryInfo{}).first; + } + public: gvsummary_iterator begin() { return GlobalValueMap.begin(); } const_gvsummary_iterator begin() const { return GlobalValueMap.begin(); } @@ -543,21 +561,34 @@ public: const_gvsummary_iterator end() const { return GlobalValueMap.end(); } size_t size() const { return GlobalValueMap.size(); } - /// Get the list of global value summary objects for a given value name. - const GlobalValueSummaryList &getGlobalValueSummaryList(StringRef ValueName) { - return GlobalValueMap[GlobalValue::getGUID(ValueName)]; + bool withGlobalValueDeadStripping() const { + return WithGlobalValueDeadStripping; + } + void setWithGlobalValueDeadStripping() { + WithGlobalValueDeadStripping = true; + } + + bool isGlobalValueLive(const GlobalValueSummary *GVS) const { + return !WithGlobalValueDeadStripping || GVS->isLive(); + } + bool isGUIDLive(GlobalValue::GUID GUID) const; + + /// Return a ValueInfo for GUID if it exists, otherwise return ValueInfo(). + ValueInfo getValueInfo(GlobalValue::GUID GUID) const { + auto I = GlobalValueMap.find(GUID); + return ValueInfo(I == GlobalValueMap.end() ? nullptr : &*I); } - /// Get the list of global value summary objects for a given value name. - const const_gvsummary_iterator - findGlobalValueSummaryList(StringRef ValueName) const { - return GlobalValueMap.find(GlobalValue::getGUID(ValueName)); + /// Return a ValueInfo for \p GUID. + ValueInfo getOrInsertValueInfo(GlobalValue::GUID GUID) { + return ValueInfo(getOrInsertValuePtr(GUID)); } - /// Get the list of global value summary objects for a given value GUID. - const const_gvsummary_iterator - findGlobalValueSummaryList(GlobalValue::GUID ValueGUID) const { - return GlobalValueMap.find(ValueGUID); + /// Return a ValueInfo for \p GV and mark it as belonging to GV. + ValueInfo getOrInsertValueInfo(const GlobalValue *GV) { + auto VP = getOrInsertValuePtr(GV->getGUID()); + VP->second.GV = GV; + return ValueInfo(VP); } /// Return the GUID for \p OriginalId in the OidGuidMap. @@ -566,20 +597,27 @@ public: return I == OidGuidMap.end() ? 0 : I->second; } + std::set &cfiFunctionDefs() { return CfiFunctionDefs; } + const std::set &cfiFunctionDefs() const { return CfiFunctionDefs; } + + std::set &cfiFunctionDecls() { return CfiFunctionDecls; } + const std::set &cfiFunctionDecls() const { return CfiFunctionDecls; } + /// Add a global value summary for a value of the given name. void addGlobalValueSummary(StringRef ValueName, std::unique_ptr Summary) { - addOriginalName(GlobalValue::getGUID(ValueName), - Summary->getOriginalName()); - GlobalValueMap[GlobalValue::getGUID(ValueName)].push_back( - std::move(Summary)); + addGlobalValueSummary(getOrInsertValueInfo(GlobalValue::getGUID(ValueName)), + std::move(Summary)); } - /// Add a global value summary for a value of the given GUID. - void addGlobalValueSummary(GlobalValue::GUID ValueGUID, + /// Add a global value summary for the given ValueInfo. + void addGlobalValueSummary(ValueInfo VI, std::unique_ptr Summary) { - addOriginalName(ValueGUID, Summary->getOriginalName()); - GlobalValueMap[ValueGUID].push_back(std::move(Summary)); + addOriginalName(VI.getGUID(), Summary->getOriginalName()); + // Here we have a notionally const VI, but the value it points to is owned + // by the non-const *this. + const_cast(VI.Ref) + ->second.SummaryList.push_back(std::move(Summary)); } /// Add an original name for the value of the given GUID. @@ -597,16 +635,16 @@ public: /// not found. GlobalValueSummary *findSummaryInModule(GlobalValue::GUID ValueGUID, StringRef ModuleId) const { - auto CalleeInfoList = findGlobalValueSummaryList(ValueGUID); - if (CalleeInfoList == end()) { + auto CalleeInfo = getValueInfo(ValueGUID); + if (!CalleeInfo) { return nullptr; // This function does not have a summary } auto Summary = - llvm::find_if(CalleeInfoList->second, + llvm::find_if(CalleeInfo.getSummaryList(), [&](const std::unique_ptr &Summary) { return Summary->modulePath() == ModuleId; }); - if (Summary == CalleeInfoList->second.end()) + if (Summary == CalleeInfo.getSummaryList().end()) return nullptr; return Summary->get(); } @@ -648,13 +686,6 @@ public: return It->second.second; } - /// Add the given per-module index into this module index/summary, - /// assigning it the given module ID. Each module merged in should have - /// a unique ID, necessary for consistent renaming of promoted - /// static (local) variables. - void mergeFrom(std::unique_ptr Other, - uint64_t NextModuleId); - /// Convenience method for creating a promoted global name /// for the given value name of a local, and its original module's ID. static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash) { @@ -671,14 +702,13 @@ public: return Pair.first; } - /// Add a new module path with the given \p Hash, mapped to the given \p - /// ModID, and return an iterator to the entry in the index. - ModulePathStringTableTy::iterator - addModulePath(StringRef ModPath, uint64_t ModId, - ModuleHash Hash = ModuleHash{{0}}) { - return ModulePathStringTable.insert(std::make_pair( - ModPath, - std::make_pair(ModId, Hash))).first; + typedef ModulePathStringTableTy::value_type ModuleInfo; + + /// Add a new module with the given \p Hash, mapped to the given \p + /// ModID, and return a reference to the module. + ModuleInfo *addModule(StringRef ModPath, uint64_t ModId, + ModuleHash Hash = ModuleHash{{0}}) { + return &*ModulePathStringTable.insert({ModPath, {ModId, Hash}}).first; } /// Check if the given Module has any functions available for exporting @@ -707,13 +737,6 @@ public: return &I->second; } - /// Remove entries in the GlobalValueMap that have empty summaries due to the - /// eager nature of map entry creation during VST parsing. These would - /// also be suppressed during combined index generation in mergeFrom(), - /// but if there was only one module or this was the first module we might - /// not invoke mergeFrom. - void removeEmptySummaryEntries(); - /// Collect for the given module the list of function it defines /// (GUID -> Summary). void collectDefinedFunctionsForModule(StringRef ModulePath, @@ -725,6 +748,6 @@ public: StringMap &ModuleToDefinedGVSummaries) const; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_IR_MODULESUMMARYINDEX_H diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h index 80719c696935ec4e941a29ac62bff10f8850027e..5d7b8b997d37b4e42cecb52a105e282cb086eac5 100644 --- a/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -128,6 +128,8 @@ template <> struct MappingTraits { }; struct FunctionSummaryYaml { + unsigned Linkage; + bool NotEligibleToImport, Live; std::vector TypeTests; std::vector TypeTestAssumeVCalls, TypeCheckedLoadVCalls; @@ -168,6 +170,9 @@ namespace yaml { template <> struct MappingTraits { static void mapping(IO &io, FunctionSummaryYaml& summary) { + io.mapOptional("Linkage", summary.Linkage); + io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); + io.mapOptional("Live", summary.Live); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); io.mapOptional("TypeCheckedLoadVCalls", summary.TypeCheckedLoadVCalls); @@ -183,6 +188,7 @@ template <> struct MappingTraits { LLVM_YAML_IS_STRING_MAP(TypeIdSummary) LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) namespace llvm { namespace yaml { @@ -199,12 +205,12 @@ template <> struct CustomMappingTraits { } auto &Elem = V[KeyInt]; for (auto &FSum : FSums) { - GlobalValueSummary::GVFlags GVFlags(GlobalValue::ExternalLinkage, false, - false); - Elem.push_back(llvm::make_unique( - GVFlags, 0, ArrayRef{}, - ArrayRef{}, std::move(FSum.TypeTests), - std::move(FSum.TypeTestAssumeVCalls), + Elem.SummaryList.push_back(llvm::make_unique( + GlobalValueSummary::GVFlags( + static_cast(FSum.Linkage), + FSum.NotEligibleToImport, FSum.Live), + 0, ArrayRef{}, ArrayRef{}, + std::move(FSum.TypeTests), std::move(FSum.TypeTestAssumeVCalls), std::move(FSum.TypeCheckedLoadVCalls), std::move(FSum.TypeTestAssumeConstVCalls), std::move(FSum.TypeCheckedLoadConstVCalls))); @@ -213,11 +219,13 @@ template <> struct CustomMappingTraits { static void output(IO &io, GlobalValueSummaryMapTy &V) { for (auto &P : V) { std::vector FSums; - for (auto &Sum : P.second) { + for (auto &Sum : P.second.SummaryList) { if (auto *FSum = dyn_cast(Sum.get())) FSums.push_back(FunctionSummaryYaml{ - FSum->type_tests(), FSum->type_test_assume_vcalls(), - FSum->type_checked_load_vcalls(), + FSum->flags().Linkage, + static_cast(FSum->flags().NotEligibleToImport), + static_cast(FSum->flags().Live), FSum->type_tests(), + FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); } @@ -231,6 +239,25 @@ template <> struct MappingTraits { static void mapping(IO &io, ModuleSummaryIndex& index) { io.mapOptional("GlobalValueMap", index.GlobalValueMap); io.mapOptional("TypeIdMap", index.TypeIdMap); + io.mapOptional("WithGlobalValueDeadStripping", + index.WithGlobalValueDeadStripping); + + if (io.outputting()) { + std::vector CfiFunctionDefs(index.CfiFunctionDefs.begin(), + index.CfiFunctionDefs.end()); + io.mapOptional("CfiFunctionDefs", CfiFunctionDefs); + std::vector CfiFunctionDecls(index.CfiFunctionDecls.begin(), + index.CfiFunctionDecls.end()); + io.mapOptional("CfiFunctionDecls", CfiFunctionDecls); + } else { + std::vector CfiFunctionDefs; + io.mapOptional("CfiFunctionDefs", CfiFunctionDefs); + index.CfiFunctionDefs = {CfiFunctionDefs.begin(), CfiFunctionDefs.end()}; + std::vector CfiFunctionDecls; + io.mapOptional("CfiFunctionDecls", CfiFunctionDecls); + index.CfiFunctionDecls = {CfiFunctionDecls.begin(), + CfiFunctionDecls.end()}; + } } }; diff --git a/include/llvm/IR/OperandTraits.h b/include/llvm/IR/OperandTraits.h index e97a8009ccc03cb406298c79c98927b49484f0bd..c618aff3df9aea354c7128bd52de62f62860cfcf 100644 --- a/include/llvm/IR/OperandTraits.h +++ b/include/llvm/IR/OperandTraits.h @@ -30,6 +30,9 @@ namespace llvm { template struct FixedNumOperandTraits { static Use *op_begin(SubClass* U) { + static_assert( + !std::is_polymorphic::value, + "adding virtual methods to subclasses of User breaks use lists"); return reinterpret_cast(U) - ARITY; } static Use *op_end(SubClass* U) { @@ -65,6 +68,9 @@ struct OptionalOperandTraits : public FixedNumOperandTraits { template struct VariadicOperandTraits { static Use *op_begin(SubClass* U) { + static_assert( + !std::is_polymorphic::value, + "adding virtual methods to subclasses of User breaks use lists"); return reinterpret_cast(U) - static_cast(U)->getNumOperands(); } static Use *op_end(SubClass* U) { @@ -82,9 +88,6 @@ struct VariadicOperandTraits { /// HungoffOperandTraits - determine the allocation regime of the Use array /// when it is not a prefix to the User object, but allocated at an unrelated /// heap address. -/// Assumes that the User subclass that is determined by this traits class -/// has an OperandList member of type User::op_iterator. [Note: this is now -/// trivially satisfied, because User has that member for historic reasons.] /// /// This is the traits class that is needed when the Use array must be /// resizable. diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index 997a85340c2591e43df0dfe58f0a947c809bb77b..c7f4697e93e74090aebb0a3a4af56814b80132c0 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -29,18 +29,12 @@ namespace llvm { /// This is a utility class that provides an abstraction for the common /// functionality between Instructions and ConstantExprs. class Operator : public User { -protected: - // NOTE: Cannot use = delete because it's not legal to delete - // an overridden method that's not deleted in the base class. Cannot leave - // this unimplemented because that leads to an ODR-violation. - ~Operator() override; - public: // The Operator class is intended to be used as a utility, and is never itself // instantiated. Operator() = delete; + ~Operator() = delete; - void *operator new(size_t, unsigned) = delete; void *operator new(size_t s) = delete; /// Return the opcode for this Instruction or ConstantExpr. @@ -334,8 +328,15 @@ public: return I->getType()->isFPOrFPVectorTy() || I->getOpcode() == Instruction::FCmp; } + + static inline bool classof(const ConstantExpr *CE) { + return CE->getType()->isFPOrFPVectorTy() || + CE->getOpcode() == Instruction::FCmp; + } + static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); } }; diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index c845112baa4531c588495327374d89ea1882a081..d03b7b65f81e2c6e683494a3ab957f285a412811 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -39,8 +39,8 @@ #define LLVM_IR_PASSMANAGER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -48,9 +48,15 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/TypeName.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/type_traits.h" +#include +#include +#include +#include #include #include +#include +#include +#include #include namespace llvm { @@ -469,15 +475,16 @@ public: } template void addPass(PassT Pass) { - typedef detail::PassModel - PassModelT; + using PassModelT = + detail::PassModel; + Passes.emplace_back(new PassModelT(std::move(Pass))); } private: - typedef detail::PassConcept - PassConceptT; + using PassConceptT = + detail::PassConcept; std::vector> Passes; @@ -486,12 +493,14 @@ private: }; extern template class PassManager; + /// \brief Convenience typedef for a pass manager over modules. -typedef PassManager ModulePassManager; +using ModulePassManager = PassManager; extern template class PassManager; + /// \brief Convenience typedef for a pass manager over functions. -typedef PassManager FunctionPassManager; +using FunctionPassManager = PassManager; /// \brief A container for analyses that lazily runs them and caches their /// results. @@ -504,11 +513,11 @@ public: private: // Now that we've defined our invalidator, we can define the concept types. - typedef detail::AnalysisResultConcept - ResultConceptT; - typedef detail::AnalysisPassConcept - PassConceptT; + using ResultConceptT = + detail::AnalysisResultConcept; + using PassConceptT = + detail::AnalysisPassConcept; /// \brief List of analysis pass IDs and associated concept pointers. /// @@ -516,18 +525,18 @@ private: /// erases. Provides the analysis ID to enable finding iterators to a given /// entry in maps below, and provides the storage for the actual result /// concept. - typedef std::list>> - AnalysisResultListT; + using AnalysisResultListT = + std::list>>; /// \brief Map type from IRUnitT pointer to our custom list type. - typedef DenseMap AnalysisResultListMapT; + using AnalysisResultListMapT = DenseMap; /// \brief Map type from a pair of analysis ID and IRUnitT pointer to an /// iterator into a particular result list (which is where the actual analysis /// result is stored). - typedef DenseMap, - typename AnalysisResultListT::iterator> - AnalysisResultMapT; + using AnalysisResultMapT = + DenseMap, + typename AnalysisResultListT::iterator>; public: /// API to communicate dependencies between analyses during invalidation. @@ -558,10 +567,10 @@ public: /// dependecies on it will become invalid as a result. template bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA) { - typedef detail::AnalysisResultModel - ResultModelT; + using ResultModelT = + detail::AnalysisResultModel; + return invalidateImpl(PassT::ID(), IR, PA); } @@ -672,9 +681,11 @@ public: "This analysis pass was not registered prior to being queried"); ResultConceptT &ResultConcept = getResultImpl(PassT::ID(), IR, ExtraArgs...); - typedef detail::AnalysisResultModel - ResultModelT; + + using ResultModelT = + detail::AnalysisResultModel; + return static_cast(ResultConcept).Result; } @@ -692,9 +703,10 @@ public: if (!ResultConcept) return nullptr; - typedef detail::AnalysisResultModel - ResultModelT; + using ResultModelT = + detail::AnalysisResultModel; + return &static_cast(ResultConcept)->Result; } @@ -717,10 +729,10 @@ public: /// hashtable.) template bool registerPass(PassBuilderT &&PassBuilder) { - typedef decltype(PassBuilder()) PassT; - typedef detail::AnalysisPassModel - PassModelT; + using PassT = decltype(PassBuilder()); + using PassModelT = + detail::AnalysisPassModel; auto &PassPtr = AnalysisPasses[PassT::ID()]; if (PassPtr) @@ -876,7 +888,8 @@ private: } /// \brief Map type from module analysis pass ID to pass concept pointer. - typedef DenseMap> AnalysisPassMapT; + using AnalysisPassMapT = + DenseMap>; /// \brief Collection of module analysis passes, indexed by ID. AnalysisPassMapT AnalysisPasses; @@ -896,12 +909,14 @@ private: }; extern template class AnalysisManager; + /// \brief Convenience typedef for the Module analysis manager. -typedef AnalysisManager ModuleAnalysisManager; +using ModuleAnalysisManager = AnalysisManager; extern template class AnalysisManager; + /// \brief Convenience typedef for the Function analysis manager. -typedef AnalysisManager FunctionAnalysisManager; +using FunctionAnalysisManager = AnalysisManager; /// \brief An analysis over an "outer" IR unit that provides access to an /// analysis manager over an "inner" IR unit. The inner unit must be contained @@ -927,20 +942,14 @@ public: class Result { public: explicit Result(AnalysisManagerT &InnerAM) : InnerAM(&InnerAM) {} + Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)) { // We have to null out the analysis manager in the moved-from state // because we are taking ownership of the responsibilty to clear the // analysis state. Arg.InnerAM = nullptr; } - Result &operator=(Result &&RHS) { - InnerAM = RHS.InnerAM; - // We have to null out the analysis manager in the moved-from state - // because we are taking ownership of the responsibilty to clear the - // analysis state. - RHS.InnerAM = nullptr; - return *this; - } + ~Result() { // InnerAM is cleared in a moved from state where there is nothing to do. if (!InnerAM) @@ -951,6 +960,15 @@ public: InnerAM->clear(); } + Result &operator=(Result &&RHS) { + InnerAM = RHS.InnerAM; + // We have to null out the analysis manager in the moved-from state + // because we are taking ownership of the responsibilty to clear the + // analysis state. + RHS.InnerAM = nullptr; + return *this; + } + /// \brief Accessor for the analysis manager. AnalysisManagerT &getManager() { return *InnerAM; } @@ -988,6 +1006,7 @@ public: private: friend AnalysisInfoMixin< InnerAnalysisManagerProxy>; + static AnalysisKey Key; AnalysisManagerT *InnerAM; @@ -998,8 +1017,8 @@ AnalysisKey InnerAnalysisManagerProxy::Key; /// Provide the \c FunctionAnalysisManager to \c Module proxy. -typedef InnerAnalysisManagerProxy - FunctionAnalysisManagerModuleProxy; +using FunctionAnalysisManagerModuleProxy = + InnerAnalysisManagerProxy; /// Specialization of the invalidate method for the \c /// FunctionAnalysisManagerModuleProxy's result. @@ -1097,6 +1116,7 @@ public: private: friend AnalysisInfoMixin< OuterAnalysisManagerProxy>; + static AnalysisKey Key; const AnalysisManagerT *AM; @@ -1109,8 +1129,8 @@ AnalysisKey extern template class OuterAnalysisManagerProxy; /// Provide the \c ModuleAnalysisManager to \c Function proxy. -typedef OuterAnalysisManagerProxy - ModuleAnalysisManagerFunctionProxy; +using ModuleAnalysisManagerFunctionProxy = + OuterAnalysisManagerProxy; /// \brief Trivial adaptor that maps from a module to its functions. /// @@ -1274,6 +1294,6 @@ RepeatedPass createRepeatedPass(int Count, PassT P) { return RepeatedPass(Count, std::move(P)); } -} +} // end namespace llvm -#endif +#endif // LLVM_IR_PASSMANAGER_H diff --git a/include/llvm/IR/PassManagerInternal.h b/include/llvm/IR/PassManagerInternal.h index 387dc4c65c4331760dc676ee11348fbb1fd5d15c..9195d4dfa4287d26729ecca5cc54653e2a4f6b69 100644 --- a/include/llvm/IR/PassManagerInternal.h +++ b/include/llvm/IR/PassManagerInternal.h @@ -27,7 +27,6 @@ namespace llvm { template class AllAnalysesOn; template class AnalysisManager; -class Invalidator; class PreservedAnalyses; /// \brief Implementation details of the pass manager interfaces. @@ -116,7 +115,7 @@ struct AnalysisResultConcept { /// \brief SFINAE metafunction for computing whether \c ResultT provides an /// \c invalidate member function. template class ResultHasInvalidateMethod { - typedef char EnabledType; + using EnabledType = char; struct DisabledType { char a, b; }; @@ -124,7 +123,7 @@ template class ResultHasInvalidateMethod { // Purely to help out MSVC which fails to disable the below specialization, // explicitly enable using the result type's invalidate routine if we can // successfully call that routine. - template struct Nonce { typedef EnabledType Type; }; + template struct Nonce { using Type = EnabledType; }; template static typename Nonce().invalidate( std::declval(), std::declval()))>::Type @@ -280,9 +279,9 @@ struct AnalysisPassModel : AnalysisPassConcept - ResultModelT; + using ResultModelT = + AnalysisResultModel; /// \brief The model delegates to the \c PassT::run method. /// diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 40f9c21f646bcbaad041bed2a45ec3971c7dc294..015a17e8e7ca5c3a00d16c66f3c2f0f05c095c66 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -29,11 +29,19 @@ #ifndef LLVM_IR_PATTERNMATCH_H #define LLVM_IR_PATTERNMATCH_H +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include namespace llvm { namespace PatternMatch { @@ -172,7 +180,9 @@ inline match_nan m_NaN() { return match_nan(); } struct apint_match { const APInt *&Res; + apint_match(const APInt *&R) : Res(R) {} + template bool match(ITy *V) { if (auto *CI = dyn_cast(V)) { Res = &CI->getValue(); @@ -230,7 +240,9 @@ template struct cst_pred_ty : public Predicate { /// satisfy a specified predicate, and bind them to an APInt. template struct api_pred_ty : public Predicate { const APInt *&Res; + api_pred_ty(const APInt *&R) : Res(R) {} + template bool match(ITy *V) { if (const auto *CI = dyn_cast(V)) if (this->isValue(CI->getValue())) { @@ -250,7 +262,7 @@ template struct api_pred_ty : public Predicate { }; struct is_one { - bool isValue(const APInt &C) { return C == 1; } + bool isValue(const APInt &C) { return C.isOneValue(); } }; /// \brief Match an integer 1 or a vector with all elements equal to 1. @@ -267,15 +279,15 @@ inline cst_pred_ty m_AllOnes() { } inline api_pred_ty m_AllOnes(const APInt *&V) { return V; } -struct is_sign_bit { - bool isValue(const APInt &C) { return C.isSignBit(); } +struct is_sign_mask { + bool isValue(const APInt &C) { return C.isSignMask(); } }; /// \brief Match an integer or vector with only the sign bit(s) set. -inline cst_pred_ty m_SignBit() { - return cst_pred_ty(); +inline cst_pred_ty m_SignMask() { + return cst_pred_ty(); } -inline api_pred_ty m_SignBit(const APInt *&V) { return V; } +inline api_pred_ty m_SignMask(const APInt *&V) { return V; } struct is_power2 { bool isValue(const APInt &C) { return C.isPowerOf2(); } @@ -294,6 +306,7 @@ inline api_pred_ty m_MaxSignedValue(const APInt *&V) { return template struct bind_ty { Class *&VR; + bind_ty(Class *&V) : VR(V) {} template bool match(ITy *V) { @@ -326,6 +339,7 @@ inline bind_ty m_ConstantFP(ConstantFP *&C) { return C; } /// \brief Match a specified Value*. struct specificval_ty { const Value *Val; + specificval_ty(const Value *V) : Val(V) {} template bool match(ITy *V) { return V == Val; } @@ -338,6 +352,7 @@ inline specificval_ty m_Specific(const Value *V) { return V; } /// that value. struct specific_fpval { double Val; + specific_fpval(double V) : Val(V) {} template bool match(ITy *V) { @@ -360,6 +375,7 @@ inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); } struct bind_const_intval_ty { uint64_t &VR; + bind_const_intval_ty(uint64_t &V) : VR(V) {} template bool match(ITy *V) { @@ -376,6 +392,7 @@ struct bind_const_intval_ty { // value. struct specific_intval { uint64_t Val; + specific_intval(uint64_t V) : Val(V) {} template bool match(ITy *V) { @@ -869,17 +886,21 @@ template struct not_match { template bool match(OpTy *V) { if (auto *O = dyn_cast(V)) - if (O->getOpcode() == Instruction::Xor) - return matchIfNot(O->getOperand(0), O->getOperand(1)); + if (O->getOpcode() == Instruction::Xor) { + if (isAllOnes(O->getOperand(1))) + return L.match(O->getOperand(0)); + if (isAllOnes(O->getOperand(0))) + return L.match(O->getOperand(1)); + } return false; } private: - bool matchIfNot(Value *LHS, Value *RHS) { - return (isa(RHS) || isa(RHS) || + bool isAllOnes(Value *V) { + return (isa(V) || isa(V) || // FIXME: Remove CV. - isa(RHS)) && - cast(RHS)->isAllOnesValue() && L.match(LHS); + isa(V)) && + cast(V)->isAllOnesValue(); } }; @@ -939,6 +960,7 @@ template inline fneg_match m_FNeg(const LHS &L) { struct br_match { BasicBlock *&Succ; + br_match(BasicBlock *&Succ) : Succ(Succ) {} template bool match(OpTy *V) { @@ -956,6 +978,7 @@ inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); } template struct brc_match { Cond_t Cond; BasicBlock *&T, *&F; + brc_match(const Cond_t &C, BasicBlock *&t, BasicBlock *&f) : Cond(C), T(t), F(f) {} @@ -1004,7 +1027,7 @@ struct MaxMin_match { (TrueVal != RHS || FalseVal != LHS)) return false; typename CmpInst_t::Predicate Pred = - LHS == TrueVal ? Cmp->getPredicate() : Cmp->getSwappedPredicate(); + LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate(); // Does "(x pred y) ? x : y" represent the desired max/min operation? if (!Pred_t::match(Pred)) return false; @@ -1115,7 +1138,7 @@ inline MaxMin_match m_OrdFMax(const LHS &L, /// semantics. In the presence of 'NaN' we have to preserve the original /// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate. /// -/// max(L, R) iff L and R are not NaN +/// min(L, R) iff L and R are not NaN /// m_OrdFMin(L, R) = R iff L or R are NaN template inline MaxMin_match m_OrdFMin(const LHS &L, @@ -1131,13 +1154,28 @@ inline MaxMin_match m_OrdFMin(const LHS &L, /// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate. /// /// max(L, R) iff L and R are not NaN -/// m_UnordFMin(L, R) = L iff L or R are NaN +/// m_UnordFMax(L, R) = L iff L or R are NaN template inline MaxMin_match m_UnordFMax(const LHS &L, const RHS &R) { return MaxMin_match(L, R); } +/// \brief Match an 'unordered' floating point minimum function. +/// Floating point has one special value 'NaN'. Therefore, there is no total +/// order. However, if we can ignore the 'NaN' value (for example, because of a +/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' +/// semantics. In the presence of 'NaN' we have to preserve the original +/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. +/// +/// min(L, R) iff L and R are not NaN +/// m_UnordFMin(L, R) = L iff L or R are NaN +template +inline MaxMin_match +m_UnordFMin(const LHS &L, const RHS &R) { + return MaxMin_match(L, R); +} + //===----------------------------------------------------------------------===// // Matchers for overflow check patterns: e.g. (a + b) u< a // @@ -1184,24 +1222,10 @@ m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { return UAddWithOverflow_match(L, R, S); } -/// \brief Match an 'unordered' floating point minimum function. -/// Floating point has one special value 'NaN'. Therefore, there is no total -/// order. However, if we can ignore the 'NaN' value (for example, because of a -/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum' -/// semantics. In the presence of 'NaN' we have to preserve the original -/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate. -/// -/// max(L, R) iff L and R are not NaN -/// m_UnordFMin(L, R) = L iff L or R are NaN -template -inline MaxMin_match -m_UnordFMin(const LHS &L, const RHS &R) { - return MaxMin_match(L, R); -} - template struct Argument_match { unsigned OpI; Opnd_t Val; + Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {} template bool match(OpTy *V) { @@ -1219,6 +1243,7 @@ inline Argument_match m_Argument(const Opnd_t &Op) { /// \brief Intrinsic matchers. struct IntrinsicID_match { unsigned ID; + IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {} template bool match(OpTy *V) { @@ -1239,21 +1264,23 @@ template struct m_Intrinsic_Ty; template struct m_Intrinsic_Ty { - typedef match_combine_and> Ty; + using Ty = match_combine_and>; }; template struct m_Intrinsic_Ty { - typedef match_combine_and::Ty, Argument_match> - Ty; + using Ty = + match_combine_and::Ty, Argument_match>; }; template struct m_Intrinsic_Ty { - typedef match_combine_and::Ty, - Argument_match> Ty; + using Ty = + match_combine_and::Ty, + Argument_match>; }; template struct m_Intrinsic_Ty { - typedef match_combine_and::Ty, - Argument_match> Ty; + using Ty = + match_combine_and::Ty, + Argument_match>; }; /// \brief Match intrinsic calls like this: @@ -1437,4 +1464,4 @@ m_c_UMax(const LHS &L, const RHS &R) { } // end namespace PatternMatch } // end namespace llvm -#endif +#endif // LLVM_IR_PATTERNMATCH_H diff --git a/include/llvm/IR/ProfileSummary.h b/include/llvm/IR/ProfileSummary.h index f4248014c6e172edf3536e06b608e191e8bdb3ee..d85ce8c443ecbb084fe02da481010bc3a92dcbe2 100644 --- a/include/llvm/IR/ProfileSummary.h +++ b/include/llvm/IR/ProfileSummary.h @@ -1,4 +1,4 @@ -//===-- ProfileSummary.h - Profile summary data structure. ------*- C++ -*-===// +//===- ProfileSummary.h - Profile summary data structure. -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,21 +11,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_PROFILE_SUMMARY_H -#define LLVM_SUPPORT_PROFILE_SUMMARY_H +#ifndef LLVM_IR_PROFILESUMMARY_H +#define LLVM_IR_PROFILESUMMARY_H +#include #include -#include #include -#include "llvm/Support/Casting.h" - namespace llvm { class LLVMContext; class Metadata; -class MDTuple; -class MDNode; // The profile summary is one or more (Cutoff, MinCount, NumCounts) triplets. // The semantics of counts depend on the type of profile. For instrumentation @@ -37,12 +33,13 @@ struct ProfileSummaryEntry { uint32_t Cutoff; ///< The required percentile of counts. uint64_t MinCount; ///< The minimum count for this percentile. uint64_t NumCounts; ///< Number of counts >= the minimum count. + ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinCount, uint64_t TheNumCounts) : Cutoff(TheCutoff), MinCount(TheMinCount), NumCounts(TheNumCounts) {} }; -typedef std::vector SummaryEntryVector; +using SummaryEntryVector = std::vector; class ProfileSummary { public: @@ -59,6 +56,7 @@ private: public: static const int Scale = 1000000; + ProfileSummary(Kind K, SummaryEntryVector DetailedSummary, uint64_t TotalCount, uint64_t MaxCount, uint64_t MaxInternalCount, uint64_t MaxFunctionCount, @@ -67,6 +65,7 @@ public: TotalCount(TotalCount), MaxCount(MaxCount), MaxInternalCount(MaxInternalCount), MaxFunctionCount(MaxFunctionCount), NumCounts(NumCounts), NumFunctions(NumFunctions) {} + Kind getKind() const { return PSK; } /// \brief Return summary information as metadata. Metadata *getMD(LLVMContext &Context); @@ -82,4 +81,5 @@ public: }; } // end namespace llvm -#endif + +#endif // LLVM_IR_PROFILESUMMARY_H diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h index 03151cd7c8f70890d66d19ee99c2f332f794674a..265e7eb348bfb68fda5e67e34830c3fb58c5c51c 100644 --- a/include/llvm/IR/Statepoint.h +++ b/include/llvm/IR/Statepoint.h @@ -1,4 +1,4 @@ -//===-- llvm/IR/Statepoint.h - gc.statepoint utilities ----------*- C++ -*-===// +//===- llvm/IR/Statepoint.h - gc.statepoint utilities -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,17 +17,19 @@ #ifndef LLVM_IR_STATEPOINT_H #define LLVM_IR_STATEPOINT_H -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -87,7 +89,7 @@ protected: } public: - typedef typename CallSiteTy::arg_iterator arg_iterator; + using arg_iterator = typename CallSiteTy::arg_iterator; enum { IDPos = 0, @@ -226,24 +228,24 @@ public: return cast(NumVMSArgs)->getZExtValue(); } - typename CallSiteTy::arg_iterator vm_state_begin() const { + typename CallSiteTy::arg_iterator deopt_begin() const { auto I = gc_transition_args_end() + 1; assert((getCallSite().arg_end() - I) >= 0); return I; } - typename CallSiteTy::arg_iterator vm_state_end() const { - auto I = vm_state_begin() + getNumTotalVMSArgs(); + typename CallSiteTy::arg_iterator deopt_end() const { + auto I = deopt_begin() + getNumTotalVMSArgs(); assert((getCallSite().arg_end() - I) >= 0); return I; } /// range adapter for vm state arguments - iterator_range vm_state_args() const { - return make_range(vm_state_begin(), vm_state_end()); + iterator_range deopt_operands() const { + return make_range(deopt_begin(), deopt_end()); } typename CallSiteTy::arg_iterator gc_args_begin() const { - return vm_state_end(); + return deopt_end(); } typename CallSiteTy::arg_iterator gc_args_end() const { return getCallSite().arg_end(); @@ -287,8 +289,8 @@ public: (void)arg_end(); (void)gc_transition_args_begin(); (void)gc_transition_args_end(); - (void)vm_state_begin(); - (void)vm_state_end(); + (void)deopt_begin(); + (void)deopt_end(); (void)gc_args_begin(); (void)gc_args_end(); } @@ -300,8 +302,9 @@ public: class ImmutableStatepoint : public StatepointBase { - typedef StatepointBase Base; + using Base = + StatepointBase; public: explicit ImmutableStatepoint(const Instruction *I) : Base(I) {} @@ -312,7 +315,7 @@ public: /// to a gc.statepoint. class Statepoint : public StatepointBase { - typedef StatepointBase Base; + using Base = StatepointBase; public: explicit Statepoint(Instruction *I) : Base(I) {} @@ -327,6 +330,7 @@ public: return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate || I->getIntrinsicID() == Intrinsic::experimental_gc_result; } + static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -369,6 +373,7 @@ public: static inline bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::experimental_gc_relocate; } + static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } @@ -403,6 +408,7 @@ public: static inline bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::experimental_gc_result; } + static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } diff --git a/include/llvm/IR/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h index 49a5fb21297d6577e8fece4c1fbbe84b89f98ab7..87ce902c2811b19c67b28fe4a047868c7a6d120d 100644 --- a/include/llvm/IR/SymbolTableListTraits.h +++ b/include/llvm/IR/SymbolTableListTraits.h @@ -48,7 +48,7 @@ class ValueSymbolTable; template struct SymbolTableListParentType {}; #define DEFINE_SYMBOL_TABLE_PARENT_TYPE(NODE, PARENT) \ - template <> struct SymbolTableListParentType { typedef PARENT type; }; + template <> struct SymbolTableListParentType { using type = PARENT; }; DEFINE_SYMBOL_TABLE_PARENT_TYPE(Instruction, BasicBlock) DEFINE_SYMBOL_TABLE_PARENT_TYPE(BasicBlock, Function) DEFINE_SYMBOL_TABLE_PARENT_TYPE(Argument, Function) @@ -65,10 +65,10 @@ template class SymbolTableList; // template class SymbolTableListTraits : public ilist_alloc_traits { - typedef SymbolTableList ListTy; - typedef typename simple_ilist::iterator iterator; - typedef - typename SymbolTableListParentType::type ItemParentClass; + using ListTy = SymbolTableList; + using iterator = typename simple_ilist::iterator; + using ItemParentClass = + typename SymbolTableListParentType::type; public: SymbolTableListTraits() = default; diff --git a/include/llvm/IR/TrackingMDRef.h b/include/llvm/IR/TrackingMDRef.h index 12b196432006d737cce4cbc7239478c62eee14a2..bdec904ad1e12d0f9de52e6724cbfb8a1ab5fbbf 100644 --- a/include/llvm/IR/TrackingMDRef.h +++ b/include/llvm/IR/TrackingMDRef.h @@ -139,31 +139,35 @@ public: bool hasTrivialDestructor() const { return Ref.hasTrivialDestructor(); } }; -typedef TypedTrackingMDRef TrackingMDNodeRef; -typedef TypedTrackingMDRef TrackingValueAsMetadataRef; +using TrackingMDNodeRef = TypedTrackingMDRef; +using TrackingValueAsMetadataRef = TypedTrackingMDRef; // Expose the underlying metadata to casting. template <> struct simplify_type { - typedef Metadata *SimpleType; + using SimpleType = Metadata *; + static SimpleType getSimplifiedValue(TrackingMDRef &MD) { return MD.get(); } }; template <> struct simplify_type { - typedef Metadata *SimpleType; + using SimpleType = Metadata *; + static SimpleType getSimplifiedValue(const TrackingMDRef &MD) { return MD.get(); } }; template struct simplify_type> { - typedef T *SimpleType; + using SimpleType = T *; + static SimpleType getSimplifiedValue(TypedTrackingMDRef &MD) { return MD.get(); } }; template struct simplify_type> { - typedef T *SimpleType; + using SimpleType = T *; + static SimpleType getSimplifiedValue(const TypedTrackingMDRef &MD) { return MD.get(); } diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index e6a0df937e9bb67f49c6dc783c405bbf88899896..b37b59288e3f47e0818ba8f20e1b49c4436725ad 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -1,4 +1,4 @@ -//===-- llvm/Type.h - Classes for handling data types -----------*- C++ -*-===// +//===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,19 +20,20 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include +#include +#include namespace llvm { -class PointerType; +template struct GraphTraits; class IntegerType; -class raw_ostream; -class Module; class LLVMContext; -class LLVMContextImpl; +class PointerType; +class raw_ostream; class StringRef; -template struct GraphTraits; /// The instances of the Type class are immutable: once they are created, /// they are never changed. Also note that only one instance of a particular @@ -86,9 +87,9 @@ private: protected: friend class LLVMContextImpl; + explicit Type(LLVMContext &C, TypeID tid) - : Context(C), ID(tid), SubclassData(0), - NumContainedTys(0), ContainedTys(nullptr) {} + : Context(C), ID(tid), SubclassData(0) {} ~Type() = default; unsigned getSubclassData() const { return SubclassData; } @@ -100,14 +101,14 @@ protected: } /// Keeps track of how many Type*'s there are in the ContainedTys list. - unsigned NumContainedTys; + unsigned NumContainedTys = 0; /// A pointer to the array of Types contained by this Type. For example, this /// includes the arguments of a function type, the elements of a structure, /// the pointee of a pointer, the element type of an array, etc. This pointer /// may be 0 for types that don't contain other types (Integer, Double, /// Float). - Type * const *ContainedTys; + Type * const *ContainedTys = nullptr; static bool isSequentialType(TypeID TyID) { return TyID == ArrayTyID || TyID == VectorTyID; @@ -122,6 +123,7 @@ public: /// inlined with the operands when printing an instruction. void print(raw_ostream &O, bool IsForDebug = false, bool NoDetails = false) const; + void dump() const; /// Return the LLVMContext in which this type was uniqued. @@ -299,14 +301,16 @@ public: //===--------------------------------------------------------------------===// // Type Iteration support. // - typedef Type * const *subtype_iterator; + using subtype_iterator = Type * const *; + subtype_iterator subtype_begin() const { return ContainedTys; } subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];} ArrayRef subtypes() const { return makeArrayRef(subtype_begin(), subtype_end()); } - typedef std::reverse_iterator subtype_reverse_iterator; + using subtype_reverse_iterator = std::reverse_iterator; + subtype_reverse_iterator subtype_rbegin() const { return subtype_reverse_iterator(subtype_end()); } @@ -348,6 +352,7 @@ public: } inline uint64_t getArrayNumElements() const; + Type *getArrayElementType() const { assert(getTypeID() == ArrayTyID); return ContainedTys[0]; @@ -444,8 +449,8 @@ template <> struct isa_impl { // graph of sub types. template <> struct GraphTraits { - typedef Type *NodeRef; - typedef Type::subtype_iterator ChildIteratorType; + using NodeRef = Type *; + using ChildIteratorType = Type::subtype_iterator; static NodeRef getEntryNode(Type *T) { return T; } static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); } @@ -453,8 +458,8 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef const Type *NodeRef; - typedef Type::subtype_iterator ChildIteratorType; + using NodeRef = const Type *; + using ChildIteratorType = Type::subtype_iterator; static NodeRef getEntryNode(NodeRef T) { return T; } static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); } @@ -474,6 +479,6 @@ inline LLVMTypeRef *wrap(Type **Tys) { return reinterpret_cast(const_cast(Tys)); } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_IR_TYPE_H diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h index 48c4f1161aa1d39a0ecf72722afeb3cb33d3ec4a..c050c388d398f8c857c177543d43f801ba736a41 100644 --- a/include/llvm/IR/TypeFinder.h +++ b/include/llvm/IR/TypeFinder.h @@ -44,8 +44,8 @@ public: void run(const Module &M, bool onlyNamed); void clear(); - typedef std::vector::iterator iterator; - typedef std::vector::const_iterator const_iterator; + using iterator = std::vector::iterator; + using const_iterator = std::vector::const_iterator; iterator begin() { return StructTypes.begin(); } iterator end() { return StructTypes.end(); } diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h index 05b68ccbb38e801087e6ead57c6015de2bcdc8eb..0ac13935c7cea0e42d71e3c0e05c5703ece0a4db 100644 --- a/include/llvm/IR/Use.h +++ b/include/llvm/IR/Use.h @@ -1,4 +1,4 @@ -//===-- llvm/Use.h - Definition of the Use class ----------------*- C++ -*-===// +//===- llvm/Use.h - Definition of the Use class -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,16 +25,16 @@ #ifndef LLVM_IR_USE_H #define LLVM_IR_USE_H +#include "llvm-c/Types.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/CBindingWrapping.h" -#include "llvm-c/Types.h" +#include "llvm/Support/Compiler.h" namespace llvm { -class Value; -class User; -class Use; template struct simplify_type; +class User; +class Value; /// \brief A Use represents the edge between a Value definition and its users. /// @@ -61,9 +61,33 @@ public: /// that also works with less standard-compliant compilers void swap(Use &RHS); + /// Pointer traits for the UserRef PointerIntPair. This ensures we always + /// use the LSB regardless of pointer alignment on different targets. + struct UserRefPointerTraits { + static inline void *getAsVoidPointer(User *P) { return P; } + + static inline User *getFromVoidPointer(void *P) { + return (User *)P; + } + + enum { NumLowBitsAvailable = 1 }; + }; + // A type for the word following an array of hung-off Uses in memory, which is // a pointer back to their User with the bottom bit set. - typedef PointerIntPair UserRef; + using UserRef = PointerIntPair; + + /// Pointer traits for the Prev PointerIntPair. This ensures we always use + /// the two LSBs regardless of pointer alignment on different targets. + struct PrevPointerTraits { + static inline void *getAsVoidPointer(Use **P) { return P; } + + static inline Use **getFromVoidPointer(void *P) { + return (Use **)P; + } + + enum { NumLowBitsAvailable = 2 }; + }; private: /// Destructor - Only for zap() @@ -75,9 +99,11 @@ private: enum PrevPtrTag { zeroDigitTag, oneDigitTag, stopTag, fullStopTag }; /// Constructor - Use(PrevPtrTag tag) : Val(nullptr) { Prev.setInt(tag); } + Use(PrevPtrTag tag) { Prev.setInt(tag); } public: + friend class Value; + operator Value *() const { return Val; } Value *get() const { return Val; } @@ -113,9 +139,9 @@ public: private: const Use *getImpliedUser() const LLVM_READONLY; - Value *Val; + Value *Val = nullptr; Use *Next; - PointerIntPair Prev; + PointerIntPair Prev; void setPrev(Use **NewPrev) { Prev.setPointer(NewPrev); } @@ -133,18 +159,18 @@ private: if (Next) Next->setPrev(StrippedPrev); } - - friend class Value; }; /// \brief Allow clients to treat uses just like values when using /// casting operators. template <> struct simplify_type { - typedef Value *SimpleType; + using SimpleType = Value *; + static SimpleType getSimplifiedValue(Use &Val) { return Val.get(); } }; template <> struct simplify_type { - typedef /*const*/ Value *SimpleType; + using SimpleType = /*const*/ Value *; + static SimpleType getSimplifiedValue(const Use &Val) { return Val.get(); } }; diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h index ebe99223facd0b28aa0fe2e345aba63ad843e034..a8b394fc6302e847dfd74a93a64da1273c415e02 100644 --- a/include/llvm/IR/UseListOrder.h +++ b/include/llvm/IR/UseListOrder.h @@ -37,7 +37,7 @@ struct UseListOrder { UseListOrder &operator=(UseListOrder &&) = default; }; -typedef std::vector UseListOrderStack; +using UseListOrderStack = std::vector; } // end namespace llvm diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index 54758a9b6d6a8bed2626bd8cccec7ec60ec3d9fa..109a3d5e7be8d02d31dd38f14cbefb0f721b49fd 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -1,4 +1,4 @@ -//===-- llvm/User.h - User class definition ---------------------*- C++ -*-===// +//===- llvm/User.h - User class definition ----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -46,8 +46,6 @@ class User : public Value { template friend struct HungoffOperandTraits; - virtual void anchor(); - LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void * allocateFixedOperandUser(size_t, unsigned, unsigned); @@ -93,9 +91,11 @@ protected: /// should be called if there are no uses. void growHungoffUses(unsigned N, bool IsPhi = false); +protected: + ~User() = default; // Use deleteValue() to delete a generic Instruction. + public: User(const User &) = delete; - ~User() override = default; /// \brief Free memory allocated for User and Use objects. void operator delete(void *Usr); @@ -114,6 +114,7 @@ protected: ? OperandTraits::op_end(const_cast(that))[Idx] : OperandTraits::op_begin(const_cast(that))[Idx]; } + template Use &Op() { return OpFrom(this); } @@ -205,10 +206,10 @@ public: // --------------------------------------------------------------------------- // Operand Iterator interface... // - typedef Use* op_iterator; - typedef const Use* const_op_iterator; - typedef iterator_range op_range; - typedef iterator_range const_op_range; + using op_iterator = Use*; + using const_op_iterator = const Use*; + using op_range = iterator_range; + using const_op_range = iterator_range; op_iterator op_begin() { return getOperandList(); } const_op_iterator op_begin() const { return getOperandList(); } @@ -252,6 +253,7 @@ public: ptrdiff_t, const Value *, const Value *> { explicit const_value_op_iterator(const Use *U = nullptr) : iterator_adaptor_base(U) {} + const Value *operator*() const { return *I; } const Value *operator->() const { return operator*(); } }; @@ -290,6 +292,7 @@ public: return isa(V) || isa(V); } }; + // Either Use objects, or a Use pointer can be prepended to User. static_assert(alignof(Use) >= alignof(User), "Alignment is insufficient after objects prepended to User"); @@ -297,13 +300,15 @@ static_assert(alignof(Use *) >= alignof(User), "Alignment is insufficient after objects prepended to User"); template<> struct simplify_type { - typedef Value* SimpleType; + using SimpleType = Value*; + static SimpleType getSimplifiedValue(User::op_iterator &Val) { return Val->get(); } }; template<> struct simplify_type { - typedef /*const*/ Value* SimpleType; + using SimpleType = /*const*/ Value*; + static SimpleType getSimplifiedValue(User::const_op_iterator &Val) { return Val->get(); } diff --git a/include/llvm/IR/Value.def b/include/llvm/IR/Value.def index 48842d7f9cd56421c31f6685e2f95003ecce4436..cebd7f7297ef3b9b40080d17eb117c31c47c4754 100644 --- a/include/llvm/IR/Value.def +++ b/include/llvm/IR/Value.def @@ -20,10 +20,14 @@ #if !(defined HANDLE_GLOBAL_VALUE || defined HANDLE_CONSTANT || \ defined HANDLE_INSTRUCTION || defined HANDLE_INLINE_ASM_VALUE || \ defined HANDLE_METADATA_VALUE || defined HANDLE_VALUE || \ - defined HANDLE_CONSTANT_MARKER) + defined HANDLE_CONSTANT_MARKER || defined HANDLE_MEMORY_VALUE) #error "Missing macro definition of HANDLE_VALUE*" #endif +#ifndef HANDLE_MEMORY_VALUE +#define HANDLE_MEMORY_VALUE(ValueName) HANDLE_VALUE(ValueName) +#endif + #ifndef HANDLE_GLOBAL_VALUE #define HANDLE_GLOBAL_VALUE(ValueName) HANDLE_CONSTANT(ValueName) #endif @@ -54,9 +58,13 @@ HANDLE_VALUE(Argument) HANDLE_VALUE(BasicBlock) -HANDLE_VALUE(MemoryUse) -HANDLE_VALUE(MemoryDef) -HANDLE_VALUE(MemoryPhi) + +// FIXME: It's awkward that Value.def knows about classes in Analysis. While +// this doesn't introduce a strict link or include dependency, we should remove +// the circular dependency eventually. +HANDLE_MEMORY_VALUE(MemoryUse) +HANDLE_MEMORY_VALUE(MemoryDef) +HANDLE_MEMORY_VALUE(MemoryPhi) HANDLE_GLOBAL_VALUE(Function) HANDLE_GLOBAL_VALUE(GlobalAlias) @@ -94,6 +102,7 @@ HANDLE_CONSTANT_MARKER(ConstantDataLastVal, ConstantTokenNone) HANDLE_CONSTANT_MARKER(ConstantAggregateFirstVal, ConstantArray) HANDLE_CONSTANT_MARKER(ConstantAggregateLastVal, ConstantVector) +#undef HANDLE_MEMORY_VALUE #undef HANDLE_GLOBAL_VALUE #undef HANDLE_CONSTANT #undef HANDLE_INSTRUCTION diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index a4b48d7f3539b1c186afad9ad6010fbe4f465860..ccd40e5765841fc3bfeef929b09244bef02c6bfa 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -1,4 +1,4 @@ -//===-- llvm/Value.h - Definition of the Value class ------------*- C++ -*-===// +//===- llvm/Value.h - Definition of the Value class -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,13 +14,14 @@ #ifndef LLVM_IR_VALUE_H #define LLVM_IR_VALUE_H +#include "llvm-c/Types.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Use.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Casting.h" -#include "llvm-c/Types.h" #include #include +#include namespace llvm { @@ -44,12 +45,12 @@ class LLVMContext; class Module; class ModuleSlotTracker; class raw_ostream; +template class StringMapEntry; class StringRef; class Twine; class Type; -template class StringMapEntry; -typedef StringMapEntry ValueName; +using ValueName = StringMapEntry; //===----------------------------------------------------------------------===// // Value Class @@ -69,6 +70,8 @@ typedef StringMapEntry ValueName; /// objects that watch it and listen to RAUW and Destroy events. See /// llvm/IR/ValueHandle.h for details. class Value { + // The least-significant bit of the first word of Value *must* be zero: + // http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm Type *VTy; Use *UseList; @@ -120,9 +123,11 @@ private: template // UseT == 'Use' or 'const Use' class use_iterator_impl : public std::iterator { + friend class Value; + UseT *U; + explicit use_iterator_impl(UseT *u) : U(u) {} - friend class Value; public: use_iterator_impl() : U() {} @@ -198,10 +203,19 @@ private: protected: Value(Type *Ty, unsigned scid); + /// Value's destructor should be virtual by design, but that would require + /// that Value and all of its subclasses have a vtable that effectively + /// duplicates the information in the value ID. As a size optimization, the + /// destructor has been protected, and the caller should manually call + /// deleteValue. + ~Value(); // Use deleteValue() to delete a generic Value. + public: Value(const Value &) = delete; void operator=(const Value &) = delete; - virtual ~Value(); + + /// Delete a pointer to a generic Value. + void deleteValue(); /// \brief Support for debugging, callable in GDB: V->dump() void dump() const; @@ -309,8 +323,9 @@ public: return UseList == nullptr; } - typedef use_iterator_impl use_iterator; - typedef use_iterator_impl const_use_iterator; + using use_iterator = use_iterator_impl; + using const_use_iterator = use_iterator_impl; + use_iterator materialized_use_begin() { return use_iterator(UseList); } const_use_iterator materialized_use_begin() const { return const_use_iterator(UseList); @@ -345,8 +360,9 @@ public: return UseList == nullptr; } - typedef user_iterator_impl user_iterator; - typedef user_iterator_impl const_user_iterator; + using user_iterator = user_iterator_impl; + using const_user_iterator = user_iterator_impl; + user_iterator materialized_user_begin() { return user_iterator(UseList); } const_user_iterator materialized_user_begin() const { return const_user_iterator(UseList); @@ -482,6 +498,17 @@ public: static_cast(this)->stripPointerCasts()); } + /// \brief Strip off pointer casts, all-zero GEPs, aliases and barriers. + /// + /// Returns the original uncasted value. If this is called on a non-pointer + /// value, it returns 'this'. This function should be used only in + /// Alias analysis. + const Value *stripPointerCastsAndBarriers() const; + Value *stripPointerCastsAndBarriers() { + return const_cast( + static_cast(this)->stripPointerCastsAndBarriers()); + } + /// \brief Strip off pointer casts and all-zero GEPs. /// /// Returns the original uncasted value. If this is called on a non-pointer @@ -549,7 +576,6 @@ public: /// block. const Value *DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const; - Value *DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) { return const_cast( static_cast(this)->DoPHITranslation(CurBB, PredBB)); @@ -595,7 +621,7 @@ private: Use *Merged; Use **Next = &Merged; - for (;;) { + while (true) { if (!L) { *Next = R; break; @@ -629,6 +655,13 @@ protected: void setValueSubclassData(unsigned short D) { SubclassData = D; } }; +struct ValueDeleter { void operator()(Value *V) { V->deleteValue(); } }; + +/// Use this instead of std::unique_ptr or std::unique_ptr. +/// Those don't work because Value and Instruction's destructors are protected, +/// aren't virtual, and won't destroy the complete object. +typedef std::unique_ptr unique_value; + inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) { V.print(OS); return OS; diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h index 4838bac9e0f75c56985c5a7fc296ce8968529794..b45cc7b6dc029bf3ecc8e8950478eed9834aa951 100644 --- a/include/llvm/IR/ValueHandle.h +++ b/include/llvm/IR/ValueHandle.h @@ -17,10 +17,10 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include namespace llvm { -class ValueHandleBase; -template struct simplify_type; /// \brief This is the common base class of value handles. /// @@ -29,68 +29,71 @@ template struct simplify_type; /// below for details. class ValueHandleBase { friend class Value; + protected: /// \brief This indicates what sub class the handle actually is. /// /// This is to avoid having a vtable for the light-weight handle pointers. The /// fully general Callback version does have a vtable. - enum HandleBaseKind { - Assert, - Callback, - Tracking, - Weak - }; + enum HandleBaseKind { Assert, Callback, Weak, WeakTracking }; ValueHandleBase(const ValueHandleBase &RHS) : ValueHandleBase(RHS.PrevPair.getInt(), RHS) {} ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS) - : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) { - if (isValid(V)) + : PrevPair(nullptr, Kind), Val(RHS.getValPtr()) { + if (isValid(getValPtr())) AddToExistingUseList(RHS.getPrevPtr()); } private: PointerIntPair PrevPair; - ValueHandleBase *Next; + ValueHandleBase *Next = nullptr; + Value *Val = nullptr; - Value* V; + void setValPtr(Value *V) { Val = V; } public: explicit ValueHandleBase(HandleBaseKind Kind) - : PrevPair(nullptr, Kind), Next(nullptr), V(nullptr) {} + : PrevPair(nullptr, Kind) {} ValueHandleBase(HandleBaseKind Kind, Value *V) - : PrevPair(nullptr, Kind), Next(nullptr), V(V) { - if (isValid(V)) + : PrevPair(nullptr, Kind), Val(V) { + if (isValid(getValPtr())) AddToUseList(); } ~ValueHandleBase() { - if (isValid(V)) + if (isValid(getValPtr())) RemoveFromUseList(); } Value *operator=(Value *RHS) { - if (V == RHS) return RHS; - if (isValid(V)) RemoveFromUseList(); - V = RHS; - if (isValid(V)) AddToUseList(); + if (getValPtr() == RHS) + return RHS; + if (isValid(getValPtr())) + RemoveFromUseList(); + setValPtr(RHS); + if (isValid(getValPtr())) + AddToUseList(); return RHS; } Value *operator=(const ValueHandleBase &RHS) { - if (V == RHS.V) return RHS.V; - if (isValid(V)) RemoveFromUseList(); - V = RHS.V; - if (isValid(V)) AddToExistingUseList(RHS.getPrevPtr()); - return V; + if (getValPtr() == RHS.getValPtr()) + return RHS.getValPtr(); + if (isValid(getValPtr())) + RemoveFromUseList(); + setValPtr(RHS.getValPtr()); + if (isValid(getValPtr())) + AddToExistingUseList(RHS.getPrevPtr()); + return getValPtr(); } - Value *operator->() const { return V; } - Value &operator*() const { return *V; } + Value *operator->() const { return getValPtr(); } + Value &operator*() const { return *getValPtr(); } protected: - Value *getValPtr() const { return V; } + Value *getValPtr() const { return Val; } static bool isValid(Value *V) { return V && @@ -105,7 +108,7 @@ protected: /// /// This should only be used if a derived class has manually removed the /// handle from the use list. - void clearValPtr() { V = nullptr; } + void clearValPtr() { setValPtr(nullptr); } public: // Callbacks made from Value. @@ -131,19 +134,16 @@ private: void AddToUseList(); }; -/// \brief Value handle that is nullable, but tries to track the Value. +/// \brief A nullable Value handle that is nullable. /// -/// This is a value handle that tries hard to point to a Value, even across -/// RAUW operations, but will null itself out if the value is destroyed. this -/// is useful for advisory sorts of information, but should not be used as the -/// key of a map (since the map would have to rearrange itself when the pointer -/// changes). +/// This is a value handle that points to a value, and nulls itself +/// out if that value is deleted. class WeakVH : public ValueHandleBase { public: WeakVH() : ValueHandleBase(Weak) {} WeakVH(Value *P) : ValueHandleBase(Weak, P) {} WeakVH(const WeakVH &RHS) - : ValueHandleBase(Weak, RHS) {} + : ValueHandleBase(Weak, RHS) {} WeakVH &operator=(const WeakVH &RHS) = default; @@ -162,14 +162,63 @@ public: // Specialize simplify_type to allow WeakVH to participate in // dyn_cast, isa, etc. template <> struct simplify_type { - typedef Value *SimpleType; + using SimpleType = Value *; + static SimpleType getSimplifiedValue(WeakVH &WVH) { return WVH; } }; template <> struct simplify_type { - typedef Value *SimpleType; + using SimpleType = Value *; + static SimpleType getSimplifiedValue(const WeakVH &WVH) { return WVH; } }; +/// \brief Value handle that is nullable, but tries to track the Value. +/// +/// This is a value handle that tries hard to point to a Value, even across +/// RAUW operations, but will null itself out if the value is destroyed. this +/// is useful for advisory sorts of information, but should not be used as the +/// key of a map (since the map would have to rearrange itself when the pointer +/// changes). +class WeakTrackingVH : public ValueHandleBase { +public: + WeakTrackingVH() : ValueHandleBase(WeakTracking) {} + WeakTrackingVH(Value *P) : ValueHandleBase(WeakTracking, P) {} + WeakTrackingVH(const WeakTrackingVH &RHS) + : ValueHandleBase(WeakTracking, RHS) {} + + WeakTrackingVH &operator=(const WeakTrackingVH &RHS) = default; + + Value *operator=(Value *RHS) { + return ValueHandleBase::operator=(RHS); + } + Value *operator=(const ValueHandleBase &RHS) { + return ValueHandleBase::operator=(RHS); + } + + operator Value*() const { + return getValPtr(); + } + + bool pointsToAliveValue() const { + return ValueHandleBase::isValid(getValPtr()); + } +}; + +// Specialize simplify_type to allow WeakTrackingVH to participate in +// dyn_cast, isa, etc. +template <> struct simplify_type { + using SimpleType = Value *; + + static SimpleType getSimplifiedValue(WeakTrackingVH &WVH) { return WVH; } +}; +template <> struct simplify_type { + using SimpleType = Value *; + + static SimpleType getSimplifiedValue(const WeakTrackingVH &WVH) { + return WVH; + } +}; + /// \brief Value handle that asserts if the Value is deleted. /// /// This is a Value Handle that points to a value and asserts out if the value @@ -191,7 +240,7 @@ class AssertingVH : public ValueHandleBase #endif { - friend struct DenseMapInfo >; + friend struct DenseMapInfo>; #ifndef NDEBUG Value *getRawValPtr() const { return ValueHandleBase::getValPtr(); } @@ -237,20 +286,23 @@ public: // Specialize DenseMapInfo to allow AssertingVH to participate in DenseMap. template -struct DenseMapInfo > { +struct DenseMapInfo> { static inline AssertingVH getEmptyKey() { AssertingVH Res; Res.setRawValPtr(DenseMapInfo::getEmptyKey()); return Res; } + static inline AssertingVH getTombstoneKey() { AssertingVH Res; Res.setRawValPtr(DenseMapInfo::getTombstoneKey()); return Res; } + static unsigned getHashValue(const AssertingVH &Val) { return DenseMapInfo::getHashValue(Val.getRawValPtr()); } + static bool isEqual(const AssertingVH &LHS, const AssertingVH &RHS) { return DenseMapInfo::isEqual(LHS.getRawValPtr(), RHS.getRawValPtr()); @@ -258,7 +310,7 @@ struct DenseMapInfo > { }; template -struct isPodLike > { +struct isPodLike> { #ifdef NDEBUG static const bool value = true; #else @@ -272,39 +324,37 @@ struct isPodLike > { /// to a Value (or subclass) across some operations which may move that value, /// but should never destroy it or replace it with some unacceptable type. /// -/// It is an error to do anything with a TrackingVH whose value has been -/// destroyed, except to destruct it. -/// /// It is an error to attempt to replace a value with one of a type which is /// incompatible with any of its outstanding TrackingVHs. -template -class TrackingVH : public ValueHandleBase { - void CheckValidity() const { - Value *VP = ValueHandleBase::getValPtr(); - - // Null is always ok. - if (!VP) return; +/// +/// It is an error to read from a TrackingVH that does not point to a valid +/// value. A TrackingVH is said to not point to a valid value if either it +/// hasn't yet been assigned a value yet or because the value it was tracking +/// has since been deleted. +/// +/// Assigning a value to a TrackingVH is always allowed, even if said TrackingVH +/// no longer points to a valid value. +template class TrackingVH { + WeakTrackingVH InnerHandle; - // Check that this value is valid (i.e., it hasn't been deleted). We - // explicitly delay this check until access to avoid requiring clients to be - // unnecessarily careful w.r.t. destruction. - assert(ValueHandleBase::isValid(VP) && "Tracked Value was deleted!"); +public: + ValueTy *getValPtr() const { + assert(InnerHandle.pointsToAliveValue() && + "TrackingVH must be non-null and valid on dereference!"); // Check that the value is a member of the correct subclass. We would like // to check this property on assignment for better debugging, but we don't // want to require a virtual interface on this VH. Instead we allow RAUW to // replace this value with a value of an invalid type, and check it here. - assert(isa(VP) && + assert(isa(InnerHandle) && "Tracked Value was replaced by one with an invalid type!"); + return cast(InnerHandle); } - ValueTy *getValPtr() const { - CheckValidity(); - return (ValueTy*)ValueHandleBase::getValPtr(); - } void setValPtr(ValueTy *P) { - CheckValidity(); - ValueHandleBase::operator=(GetAsValue(P)); + // Assigning to non-valid TrackingVH's are fine so we just unconditionally + // assign here. + InnerHandle = GetAsValue(P); } // Convert a ValueTy*, which may be const, to the type the base @@ -313,8 +363,8 @@ class TrackingVH : public ValueHandleBase { static Value *GetAsValue(const Value *V) { return const_cast(V); } public: - TrackingVH() : ValueHandleBase(Tracking) {} - TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, GetAsValue(P)) {} + TrackingVH() = default; + TrackingVH(ValueTy *P) { setValPtr(P); } operator ValueTy*() const { return getValPtr(); @@ -359,7 +409,8 @@ public: /// /// Called when this->getValPtr() is destroyed, inside ~Value(), so you /// may call any non-virtual Value method on getValPtr(), but no subclass - /// methods. If WeakVH were implemented as a CallbackVH, it would use this + /// methods. If WeakTrackingVH were implemented as a CallbackVH, it would use + /// this /// method to call setValPtr(NULL). AssertingVH would use this method to /// cause an assertion failure. /// @@ -370,7 +421,8 @@ public: /// \brief Callback for Value RAUW. /// /// Called when this->getValPtr()->replaceAllUsesWith(new_value) is called, - /// _before_ any of the uses have actually been replaced. If WeakVH were + /// _before_ any of the uses have actually been replaced. If WeakTrackingVH + /// were /// implemented as a CallbackVH, it would use this method to call /// setValPtr(new_value). AssertingVH would do nothing in this method. virtual void allUsesReplacedWith(Value *) {} @@ -450,10 +502,12 @@ public: PoisoningVH(ValueTy *P) : CallbackVH(GetAsValue(P)) {} PoisoningVH(const PoisoningVH &RHS) : CallbackVH(RHS), Poisoned(RHS.Poisoned) {} + ~PoisoningVH() { if (Poisoned) clearValPtr(); } + PoisoningVH &operator=(const PoisoningVH &RHS) { if (Poisoned) clearValPtr(); @@ -478,14 +532,17 @@ template struct DenseMapInfo> { Res.setRawValPtr(DenseMapInfo::getEmptyKey()); return Res; } + static inline PoisoningVH getTombstoneKey() { PoisoningVH Res; Res.setRawValPtr(DenseMapInfo::getTombstoneKey()); return Res; } + static unsigned getHashValue(const PoisoningVH &Val) { return DenseMapInfo::getHashValue(Val.getRawValPtr()); } + static bool isEqual(const PoisoningVH &LHS, const PoisoningVH &RHS) { return DenseMapInfo::isEqual(LHS.getRawValPtr(), RHS.getRawValPtr()); @@ -500,6 +557,6 @@ template struct isPodLike> { #endif }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_IR_VALUEHANDLE_H diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h index 9648e1989f94835baeaccb8cd8a142254d4bd0ad..11d5823ee479b591a808dd1990a15719767b358b 100644 --- a/include/llvm/IR/ValueMap.h +++ b/include/llvm/IR/ValueMap.h @@ -46,7 +46,6 @@ namespace llvm { template class ValueMapCallbackVH; - template class ValueMapIterator; template @@ -57,7 +56,7 @@ class ValueMapConstIterator; /// as possible with future versions of ValueMap. template struct ValueMapConfig { - typedef MutexT mutex_type; + using mutex_type = MutexT; /// If FollowRAUW is true, the ValueMap will update mappings on RAUW. If it's /// false, the ValueMap will leave the original mapping in place. @@ -87,21 +86,21 @@ template> class ValueMap { friend class ValueMapCallbackVH; - typedef ValueMapCallbackVH ValueMapCVH; - typedef DenseMap> MapT; - typedef DenseMap MDMapT; - typedef typename Config::ExtraData ExtraData; + using ValueMapCVH = ValueMapCallbackVH; + using MapT = DenseMap>; + using MDMapT = DenseMap; + using ExtraData = typename Config::ExtraData; + MapT Map; Optional MDMap; ExtraData Data; - bool MayMapMetadata = true; public: - typedef KeyT key_type; - typedef ValueT mapped_type; - typedef std::pair value_type; - typedef unsigned size_type; + using key_type = KeyT; + using mapped_type = ValueT; + using value_type = std::pair; + using size_type = unsigned; explicit ValueMap(unsigned NumInitBuckets = 64) : Map(NumInitBuckets), Data() {} @@ -132,8 +131,9 @@ public: return Where->second.get(); } - typedef ValueMapIterator iterator; - typedef ValueMapConstIterator const_iterator; + using iterator = ValueMapIterator; + using const_iterator = ValueMapConstIterator; + inline iterator begin() { return iterator(Map.begin()); } inline iterator end() { return iterator(Map.end()); } inline const_iterator begin() const { return const_iterator(Map.begin()); } @@ -244,8 +244,8 @@ class ValueMapCallbackVH final : public CallbackVH { friend class ValueMap; friend struct DenseMapInfo; - typedef ValueMap ValueMapT; - typedef typename std::remove_pointer::type KeySansPointerT; + using ValueMapT = ValueMap; + using KeySansPointerT = typename std::remove_pointer::type; ValueMapT *Map; @@ -298,7 +298,7 @@ public: template struct DenseMapInfo> { - typedef ValueMapCallbackVH VH; + using VH = ValueMapCallbackVH; static inline VH getEmptyKey() { return VH(DenseMapInfo::getEmptyKey()); @@ -330,8 +330,8 @@ class ValueMapIterator : public std::iterator, ptrdiff_t> { - typedef typename DenseMapT::iterator BaseT; - typedef typename DenseMapT::mapped_type ValueT; + using BaseT = typename DenseMapT::iterator; + using ValueT = typename DenseMapT::mapped_type; BaseT I; @@ -344,7 +344,9 @@ public: struct ValueTypeProxy { const KeyT first; ValueT& second; + ValueTypeProxy *operator->() { return this; } + operator std::pair() const { return std::make_pair(first, second); } @@ -380,8 +382,8 @@ class ValueMapConstIterator : public std::iterator, ptrdiff_t> { - typedef typename DenseMapT::const_iterator BaseT; - typedef typename DenseMapT::mapped_type ValueT; + using BaseT = typename DenseMapT::const_iterator; + using ValueT = typename DenseMapT::mapped_type; BaseT I; diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h index 9e86751dae6f7652593811bb0d5f2b07aaeddacc..26cbbfabfc0c5f72885377326518d2d7a15f6c69 100644 --- a/include/llvm/IR/ValueSymbolTable.h +++ b/include/llvm/IR/ValueSymbolTable.h @@ -49,13 +49,13 @@ class ValueSymbolTable { /// @{ public: /// @brief A mapping of names to values. - typedef StringMap ValueMap; + using ValueMap = StringMap; /// @brief An iterator over a ValueMap. - typedef ValueMap::iterator iterator; + using iterator = ValueMap::iterator; /// @brief A const_iterator over a ValueMap. - typedef ValueMap::const_iterator const_iterator; + using const_iterator = ValueMap::const_iterator; /// @} /// @name Constructors diff --git a/include/llvm/IR/Verifier.h b/include/llvm/IR/Verifier.h index 71f727c3d4fc98a90d6a9afc34fc4769a8c6adbc..15e52d9e074252235ae447a11208454bf2b4cc68 100644 --- a/include/llvm/IR/Verifier.h +++ b/include/llvm/IR/Verifier.h @@ -21,13 +21,17 @@ #ifndef LLVM_IR_VERIFIER_H #define LLVM_IR_VERIFIER_H +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/PassManager.h" +#include namespace llvm { +class APInt; class Function; class FunctionPass; -class ModulePass; +class Instruction; +class MDNode; class Module; class raw_ostream; struct VerifierSupport; @@ -47,7 +51,7 @@ class TBAAVerifier { /// the offset of the access. If zero, only a zero offset is allowed. /// /// \c BitWidth has no meaning if \c IsInvalid is true. - typedef std::pair TBAABaseNodeSummary; + using TBAABaseNodeSummary = std::pair; DenseMap TBAABaseNodes; /// Maps an alleged scalar TBAA node to a boolean that is true if the said @@ -101,12 +105,14 @@ FunctionPass *createVerifierPass(bool FatalErrors = true); /// and debug info errors. class VerifierAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; + static AnalysisKey Key; public: struct Result { bool IRBroken, DebugInfoBroken; }; + Result run(Module &M, ModuleAnalysisManager &); Result run(Function &F, FunctionAnalysisManager &); }; @@ -136,7 +142,6 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +} // end namespace llvm -} // End llvm namespace - -#endif +#endif // LLVM_IR_VERIFIER_H diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 15c8ff6d04def344cee7b792ec89455d05da5a8c..abb0aa3e3caf5f164a0ee6820843f8f1f902b00e 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -86,7 +86,6 @@ void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&); void initializeCFGSimplifyPassPass(PassRegistry&); -void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeCFGViewerLegacyPassPass(PassRegistry&); void initializeCFLAndersAAWrapperPassPass(PassRegistry&); void initializeCFLSteensAAWrapperPassPass(PassRegistry&); @@ -130,6 +129,7 @@ void initializeEfficiencySanitizerPass(PassRegistry&); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); +void initializeExpandReductionsPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&); void initializeFEntryInserterPass(PassRegistry&); void initializeFinalizeMachineBundlesPass(PassRegistry&); @@ -144,6 +144,7 @@ void initializeGCModuleInfoPass(PassRegistry&); void initializeGCOVProfilerLegacyPassPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); void initializeGVNLegacyPassPass(PassRegistry&); +void initializeGVNSinkLegacyPassPass(PassRegistry&); void initializeGlobalDCELegacyPassPass(PassRegistry&); void initializeGlobalMergePass(PassRegistry&); void initializeGlobalOptLegacyPassPass(PassRegistry&); @@ -173,19 +174,21 @@ void initializeIntervalPartitionPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAVerificationPassPass(PassRegistry&); void initializeLCSSAWrapperPassPass(PassRegistry&); +void initializeLateCFGSimplifyPassPass(PassRegistry&); void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&); void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&); void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&); +void initializeLazyValueInfoPrinterPass(PassRegistry&); void initializeLazyValueInfoWrapperPassPass(PassRegistry&); void initializeLegacyLICMPassPass(PassRegistry&); void initializeLegacyLoopSinkPassPass(PassRegistry&); -void initializeLazyValueInfoPrinterPass(PassRegistry&); void initializeLegalizerPass(PassRegistry&); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); void initializeLintPass(PassRegistry&); void initializeLiveDebugValuesPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); +void initializeLiveRangeShrinkPass(PassRegistry&); void initializeLiveRegMatrixPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); void initializeLiveVariablesPass(PassRegistry&); @@ -193,6 +196,7 @@ void initializeLoadCombinePass(PassRegistry&); void initializeLoadStoreVectorizerPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); +void initializeLocalizerPass(PassRegistry&); void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&); void initializeLoopDeletionLegacyPassPass(PassRegistry&); @@ -300,6 +304,7 @@ void initializeProcessImplicitDefsPass(PassRegistry&); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); +void initializeRABasicPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); void initializeReassociateLegacyPassPass(PassRegistry&); void initializeRegBankSelectPass(PassRegistry&); @@ -319,16 +324,19 @@ void initializeSCCPLegacyPassPass(PassRegistry&); void initializeSCEVAAWrapperPassPass(PassRegistry&); void initializeSLPVectorizerPass(PassRegistry&); void initializeSROALegacyPassPass(PassRegistry&); -void initializeSafeStackPass(PassRegistry&); +void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeSanitizerCoverageModulePass(PassRegistry&); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); +void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); void initializeScalarizerPass(PassRegistry&); +void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry&); void initializeSimpleInlinerPass(PassRegistry&); +void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry&); void initializeSingleLoopExtractorPass(PassRegistry&); void initializeSinkingLegacyPassPass(PassRegistry&); void initializeSjLjEHPreparePass(PassRegistry&); diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h index ede6637dfa4dd61f29ca653c5a9c738148123283..73106f77ca55ca5be32299800d110911b6850e76 100644 --- a/include/llvm/LTO/Config.h +++ b/include/llvm/LTO/Config.h @@ -39,13 +39,16 @@ struct Config { std::string CPU; TargetOptions Options; std::vector MAttrs; - Reloc::Model RelocModel = Reloc::PIC_; + Optional RelocModel = Reloc::PIC_; CodeModel::Model CodeModel = CodeModel::Default; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; TargetMachine::CodeGenFileType CGFileType = TargetMachine::CGFT_ObjectFile; unsigned OptLevel = 2; bool DisableVerify = false; + /// Use the new pass manager + bool UseNewPM = false; + /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h index 693568f5b9a968b393ab02b8289019c5a4afc4fe..d678a68ed8605e955d386e9d3198d10e66a6448e 100644 --- a/include/llvm/LTO/LTO.h +++ b/include/llvm/LTO/LTO.h @@ -97,7 +97,7 @@ private: // [begin, end) for each module std::vector> ModuleSymIndices; - StringRef SourceFileName, COFFLinkerOpts; + StringRef TargetTriple, SourceFileName, COFFLinkerOpts; std::vector ComdatTable; public: @@ -126,6 +126,7 @@ public: using irsymtab::Symbol::getCommonSize; using irsymtab::Symbol::getCommonAlignment; using irsymtab::Symbol::getCOFFWeakExternalFallback; + using irsymtab::Symbol::isExecutable; }; /// A range over the symbols in this InputFile. @@ -137,6 +138,9 @@ public: /// Returns the path to the InputFile. StringRef getName() const; + /// Returns the input file's target triple. + StringRef getTargetTriple() const { return TargetTriple; } + /// Returns the source file path specified at compile time. StringRef getSourceFileName() const { return SourceFileName; } @@ -277,6 +281,16 @@ private: bool HasModule = false; std::unique_ptr CombinedModule; std::unique_ptr Mover; + + // This stores the information about a regular LTO module that we have added + // to the link. It will either be linked immediately (for modules without + // summaries) or after summary-based dead stripping (for modules with + // summaries). + struct AddedModule { + std::unique_ptr M; + std::vector Keep; + }; + std::vector ModsWithSummaries; } RegularLTO; struct ThinLTOState { @@ -299,9 +313,10 @@ private: /// The unmangled name of the global. std::string IRName; - /// Keep track if the symbol is visible outside of ThinLTO (i.e. in - /// either a regular object or the regular LTO partition). - bool VisibleOutsideThinLTO = false; + /// Keep track if the symbol is visible outside of a module with a summary + /// (i.e. in either a regular object or a regular LTO module without a + /// summary). + bool VisibleOutsideSummary = false; bool UnnamedAddr = true; @@ -335,8 +350,9 @@ private: // Global mapping from mangled symbol names to resolutions. StringMap GlobalResolutions; - void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res, - unsigned Partition); + void addModuleToGlobalRes(ArrayRef Syms, + ArrayRef Res, unsigned Partition, + bool InSummary); // These functions take a range of symbol resolutions [ResI, ResE) and consume // the resolutions used by a single input module by incrementing ResI. After @@ -344,10 +360,13 @@ private: // the remaining modules in the InputFile. Error addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE); - Error addRegularLTO(BitcodeModule BM, - ArrayRef Syms, - const SymbolResolution *&ResI, - const SymbolResolution *ResE); + + Expected + addRegularLTO(BitcodeModule BM, ArrayRef Syms, + const SymbolResolution *&ResI, const SymbolResolution *ResE); + Error linkRegularLTO(RegularLTOState::AddedModule Mod, + bool LivenessFromIndex); + Error addThinLTO(BitcodeModule BM, ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE); @@ -362,8 +381,9 @@ private: /// each global symbol based on its internal resolution of that symbol. struct SymbolResolution { SymbolResolution() - : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0) { - } + : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0), + LinkerRedefined(0) {} + /// The linker has chosen this definition of the symbol. unsigned Prevailing : 1; @@ -373,6 +393,10 @@ struct SymbolResolution { /// The definition of this symbol is visible outside of the LTO unit. unsigned VisibleToRegularObj : 1; + + /// Linker redefined version of the symbol which appeared in -wrap or -defsym + /// linker option. + unsigned LinkerRedefined : 1; }; } // namespace lto diff --git a/include/llvm/LTO/legacy/LTOModule.h b/include/llvm/LTO/legacy/LTOModule.h index 2a8758587a112c642a79788c1659b6faf3c5d847..017e223ed8a616520719ff4a12b019562e04a4f1 100644 --- a/include/llvm/LTO/legacy/LTOModule.h +++ b/include/llvm/LTO/legacy/LTOModule.h @@ -158,7 +158,7 @@ public: private: /// Parse metadata from the module - // FIXME: it only parses "Linker Options" metadata at the moment + // FIXME: it only parses "llvm.linker.options" metadata at the moment void parseMetadata(); /// Parse the symbols from the module and model-level ASM and add them to diff --git a/include/llvm/LinkAllIR.h b/include/llvm/LinkAllIR.h index f078c73f979ea5b94a08d9fe69963638f38aa753..de1d305f8e7724d5dca8c378bf6b83ea1566e442 100644 --- a/include/llvm/LinkAllIR.h +++ b/include/llvm/LinkAllIR.h @@ -16,13 +16,13 @@ #ifndef LLVM_LINKALLIR_H #define LLVM_LINKALLIR_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Memory.h" diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 39a86e838bde3bb2e7c3857bce9203f6543d141a..c309ddbe2f0279fe4d5c33624f409a5f1590b16b 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -16,8 +16,8 @@ #define LLVM_LINKALLPASSES_H #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysisEvaluator.h" +#include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" @@ -38,6 +38,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/Support/Valgrind.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -48,7 +49,6 @@ #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Vectorize.h" -#include "llvm/Support/Valgrind.h" #include namespace { @@ -206,6 +206,7 @@ namespace { (void) llvm::createMemDerefPrinter(); (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); + (void) llvm::createScalarizeMaskedMemIntrinPass(); (void)new llvm::IntervalPartition(); (void)new llvm::ScalarEvolutionWrapperPass(); diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h index 643902377dd31350e50a5e3aa4ad1c003631059b..ef33250204ecff8d52bf2e4326b83bd7e9a4dff5 100644 --- a/include/llvm/MC/ConstantPools.h +++ b/include/llvm/MC/ConstantPools.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/SMLoc.h" #include +#include namespace llvm { @@ -42,9 +43,9 @@ struct ConstantPoolEntry { // A class to keep track of assembler-generated constant pools that are use to // implement the ldr-pseudo. class ConstantPool { - typedef SmallVector EntryVecTy; + using EntryVecTy = SmallVector; EntryVecTy Entries; - DenseMap CachedEntries; + std::map CachedEntries; public: // Initialize a new empty constant pool @@ -63,6 +64,8 @@ public: // Return true if the constant pool is empty bool empty(); + + void clearCache(); }; class AssemblerConstantPools { @@ -80,12 +83,13 @@ class AssemblerConstantPools { // sections in a stable order to ensure that we have print the // constant pools in a deterministic order when printing an assembly // file. - typedef MapVector ConstantPoolMapTy; + using ConstantPoolMapTy = MapVector; ConstantPoolMapTy ConstantPools; public: void emitAll(MCStreamer &Streamer); void emitForCurrentSection(MCStreamer &Streamer); + void clearCacheForCurrentSection(MCStreamer &Streamer); const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr, unsigned Size, SMLoc Loc); diff --git a/include/llvm/MC/LaneBitmask.h b/include/llvm/MC/LaneBitmask.h index 89e60928405dfd60b2ba1889bc59b6e1d9f4492d..5ca06d1148e2864cc2769d698bfb65fac7dd8b7b 100644 --- a/include/llvm/MC/LaneBitmask.h +++ b/include/llvm/MC/LaneBitmask.h @@ -1,4 +1,4 @@ -//===-- llvm/MC/LaneBitmask.h -----------------------------------*- C++ -*-===// +//===- llvm/MC/LaneBitmask.h ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -30,14 +30,16 @@ #ifndef LLVM_MC_LANEBITMASK_H #define LLVM_MC_LANEBITMASK_H +#include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" namespace llvm { + struct LaneBitmask { // When changing the underlying type, change the format string as well. - typedef unsigned Type; + using Type = unsigned; enum : unsigned { BitWidth = 8*sizeof(Type) }; constexpr static const char *const FormatStr = "%08X"; @@ -84,6 +86,7 @@ namespace llvm { OS << format(LaneBitmask::FormatStr, LaneMask.getAsInteger()); }); } -} + +} // end namespace llvm #endif // LLVM_MC_LANEBITMASK_H diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index bd2717de9960bdf0517f8840711eb54860f0d42e..234762f36dd47eebfccb77237cc58d895bbf40fc 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -51,12 +51,6 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment }; } // end namespace LCOMM -enum class DebugCompressionType { - DCT_None, // no compression - DCT_Zlib, // zlib style complession - DCT_ZlibGnu // zlib-gnu style compression -}; - /// This class is intended to be used as a base class for asm /// properties and features specific to the target. class MCAsmInfo { @@ -65,8 +59,8 @@ protected: // Properties to be set by the target writer, used to configure asm printer. // - /// Pointer size in bytes. Default is 4. - unsigned PointerSize = 4; + /// Code pointer size in bytes. Default is 4. + unsigned CodePointerSize = 4; /// Size of the stack slot reserved for callee-saved registers, in bytes. /// Default is same as pointer size. @@ -366,7 +360,7 @@ protected: bool PreserveAsmComments; /// Compress DWARF debug sections. Defaults to no compression. - DebugCompressionType CompressDebugSections = DebugCompressionType::DCT_None; + DebugCompressionType CompressDebugSections = DebugCompressionType::None; /// True if the integrated assembler should interpret 'a >> b' constant /// expressions as logical rather than arithmetic. @@ -384,8 +378,8 @@ public: explicit MCAsmInfo(); virtual ~MCAsmInfo(); - /// Get the pointer size in bytes. - unsigned getPointerSize() const { return PointerSize; } + /// Get the code pointer size in bytes. + unsigned getCodePointerSize() const { return CodePointerSize; } /// Get the callee-saved register stack slot /// size in bytes. diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index c29abaa03a6de7ff9b6fbe2275293864ae216250..63f7057a7076f263115c3dd40808bf8b4971f794 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -11,11 +11,11 @@ #define LLVM_MC_MCASSEMBLER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCFixup.h" @@ -60,36 +60,36 @@ class MCAssembler { friend class MCAsmLayout; public: - typedef std::vector SectionListType; - typedef std::vector SymbolDataListType; + using SectionListType = std::vector; + using SymbolDataListType = std::vector; - typedef pointee_iterator const_iterator; - typedef pointee_iterator iterator; + using const_iterator = pointee_iterator; + using iterator = pointee_iterator; - typedef pointee_iterator - const_symbol_iterator; - typedef pointee_iterator symbol_iterator; + using const_symbol_iterator = + pointee_iterator; + using symbol_iterator = pointee_iterator; - typedef iterator_range symbol_range; - typedef iterator_range const_symbol_range; + using symbol_range = iterator_range; + using const_symbol_range = iterator_range; - typedef std::vector::const_iterator - const_indirect_symbol_iterator; - typedef std::vector::iterator indirect_symbol_iterator; + using const_indirect_symbol_iterator = + std::vector::const_iterator; + using indirect_symbol_iterator = std::vector::iterator; - typedef std::vector::const_iterator - const_data_region_iterator; - typedef std::vector::iterator data_region_iterator; + using const_data_region_iterator = + std::vector::const_iterator; + using data_region_iterator = std::vector::iterator; /// MachO specific deployment target version info. // A Major version of 0 indicates that no version information was supplied // and so the corresponding load command should not be emitted. - typedef struct { + using VersionMinInfoType = struct { MCVersionMinType Kind; unsigned Major; unsigned Minor; unsigned Update; - } VersionMinInfoType; + }; private: MCContext &Context; diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h index 41521a6549b86d33648b35dc0742fd371e0ce437..c3f1cecc97f48753577ca6911c31b700726c347a 100644 --- a/include/llvm/MC/MCCodeView.h +++ b/include/llvm/MC/MCCodeView.h @@ -14,10 +14,10 @@ #ifndef LLVM_MC_MCCODEVIEW_H #define LLVM_MC_MCCODEVIEW_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" -#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCFragment.h" +#include "llvm/MC/MCObjectStreamer.h" #include #include diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index b3106936e27f3656afecff4d68b377434344735f..2c60014adf2396b67d27dd3dd0a3cb4df1a06cdc 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -17,12 +17,12 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -46,17 +46,19 @@ namespace llvm { class MCSectionELF; class MCSectionMachO; class MCSectionWasm; + class MCStreamer; class MCSymbol; class MCSymbolELF; class MCSymbolWasm; class SMLoc; + class SourceMgr; /// Context object for machine code objects. This class owns all of the /// sections that it creates. /// class MCContext { public: - typedef StringMap SymbolTable; + using SymbolTable = StringMap; private: /// The SourceMgr for this object, if any. @@ -223,10 +225,12 @@ namespace llvm { std::string SectionName; StringRef GroupName; unsigned UniqueID; + WasmSectionKey(StringRef SectionName, StringRef GroupName, unsigned UniqueID) : SectionName(SectionName), GroupName(GroupName), UniqueID(UniqueID) { } + bool operator<(const WasmSectionKey &Other) const { if (SectionName != Other.SectionName) return SectionName < Other.SectionName; diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 0d69c2005cb43218655b10f64c9f6585037764dc..79f1b9525019c82b4fb98c2f991ceef24d105893 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -168,10 +168,10 @@ public: MCLineDivisions[Sec].push_back(LineEntry); } - typedef std::vector MCDwarfLineEntryCollection; - typedef MCDwarfLineEntryCollection::iterator iterator; - typedef MCDwarfLineEntryCollection::const_iterator const_iterator; - typedef MapVector MCLineDivisionMap; + using MCDwarfLineEntryCollection = std::vector; + using iterator = MCDwarfLineEntryCollection::iterator; + using const_iterator = MCDwarfLineEntryCollection::const_iterator; + using MCLineDivisionMap = MapVector; private: // A collection of MCDwarfLineEntry for each section. diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index f22fc11f9b0734ad39f58c2c76d82ea7beae0c67..2efd37924e2efabb91c6f2d2699245a01bc2a2c3 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -11,8 +11,8 @@ #define LLVM_MC_MCELFOBJECTWRITER_H #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index c850abf42e2c6fad9d34c72dc2af7fcced08a7cb..a91a31414bdb17df84b057f33368a03276faf0e4 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -28,7 +28,8 @@ class MCSymbol; class MCValue; class raw_ostream; class StringRef; -typedef DenseMap SectionAddrMap; + +using SectionAddrMap = DenseMap; /// \brief Base class for the full range of assembler expressions which are /// needed for parsing. diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index fc8257f90a9f7abfbac134de1eb55a31584761e5..0aca922e3cf5823b939af8db78336e55cb981a81 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -11,10 +11,10 @@ #define LLVM_MC_MCFRAGMENT_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/SMLoc.h" @@ -200,8 +200,8 @@ protected: Sec) {} public: - typedef SmallVectorImpl::const_iterator const_fixup_iterator; - typedef SmallVectorImpl::iterator fixup_iterator; + using const_fixup_iterator = SmallVectorImpl::const_iterator; + using fixup_iterator = SmallVectorImpl::iterator; SmallVectorImpl &getFixups() { return Fixups; } const SmallVectorImpl &getFixups() const { return Fixups; } diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h index 702279659371714314fc2fd15a4a74780195a332..9bf440ea96d21d1ec95da674dba1ac7af7c6697f 100644 --- a/include/llvm/MC/MCInst.h +++ b/include/llvm/MC/MCInst.h @@ -176,8 +176,9 @@ public: void addOperand(const MCOperand &Op) { Operands.push_back(Op); } - typedef SmallVectorImpl::iterator iterator; - typedef SmallVectorImpl::const_iterator const_iterator; + using iterator = SmallVectorImpl::iterator; + using const_iterator = SmallVectorImpl::const_iterator; + void clear() { Operands.clear(); } void erase(iterator I) { Operands.erase(I); } size_t size() const { return Operands.size(); } diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h index 0c3525bbeda65ca7120e2978f926585a00826854..f0fd07f43cf33ac567eef97cb14ae6c7d2e1c62b 100644 --- a/include/llvm/MC/MCLinkerOptimizationHint.h +++ b/include/llvm/MC/MCLinkerOptimizationHint.h @@ -111,7 +111,7 @@ class MCLOHDirective { const MCAsmLayout &Layout) const; public: - typedef SmallVectorImpl LOHArgs; + using LOHArgs = SmallVectorImpl; MCLOHDirective(MCLOHType Kind, const LOHArgs &Args) : Kind(Kind), Args(Args.begin(), Args.end()) { @@ -140,7 +140,7 @@ class MCLOHContainer { SmallVector Directives; public: - typedef SmallVectorImpl LOHDirectives; + using LOHDirectives = SmallVectorImpl; MCLOHContainer() = default; @@ -179,8 +179,8 @@ public: }; // Add types for specialized template using MCSymbol. -typedef MCLOHDirective::LOHArgs MCLOHArgs; -typedef MCLOHContainer::LOHDirectives MCLOHDirectives; +using MCLOHArgs = MCLOHDirective::LOHArgs; +using MCLOHDirectives = MCLOHContainer::LOHDirectives; } // end namespace llvm diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index b93638f86408476d459aa31181a133c40181d13b..2d2480a27223cd294c6acaed20cc28212eef952a 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -12,11 +12,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/StringTableBuilder.h" -#include "llvm/Support/MachO.h" #include #include #include diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 8b2a1261b220442a7f42edd4bfcd52a08b09f933..4d634447987b26d94bb3ff3342fe5b0ebaec4112 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -109,6 +109,9 @@ protected: MCSection *DwarfLineDWOSection; MCSection *DwarfLocDWOSection; MCSection *DwarfStrOffDWOSection; + + /// The DWARF v5 string offset and address table sections. + MCSection *DwarfStrOffSection; MCSection *DwarfAddrSection; // These are for Fission DWP files. @@ -260,6 +263,7 @@ public: MCSection *getDwarfLineDWOSection() const { return DwarfLineDWOSection; } MCSection *getDwarfLocDWOSection() const { return DwarfLocDWOSection; } MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; } + MCSection *getDwarfStrOffSection() const { return DwarfStrOffSection; } MCSection *getDwarfAddrSection() const { return DwarfAddrSection; } MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index 11f8dfa24484d5cfbd357bbdc3c5de22ffc7d1bd..7c1189e46ab237aec24f8898d0c5cf9d6414d7be 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -98,7 +98,8 @@ public: void EmitSLEB128Value(const MCExpr *Value) override; void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override; void ChangeSection(MCSection *Section, const MCExpr *Subsection) override; - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo& STI) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool = false) override; /// \brief Emit an instruction to a special fragment, because this instruction /// can change its size during relaxation. diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h index 7ddc7722e512beb8e6dd56690e992a41951a1ee7..7836ece2d6885d4796444737d7414a555071af1a 100644 --- a/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/include/llvm/MC/MCParser/MCAsmLexer.h @@ -161,6 +161,7 @@ protected: // Can only create subclasses. bool IsAtStartOfStatement = true; AsmCommentConsumer *CommentConsumer = nullptr; + bool AltMacroMode; MCAsmLexer(); virtual AsmToken LexToken() = 0; @@ -175,6 +176,14 @@ public: MCAsmLexer &operator=(const MCAsmLexer &) = delete; virtual ~MCAsmLexer(); + bool IsaAltMacroMode() { + return AltMacroMode; + } + + void SetAltMacroMode(bool AltMacroSet) { + AltMacroMode = AltMacroSet; + } + /// Consume the next token from the input stream and return it. /// /// The lexer will continuosly return the end-of-file token once the end of diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 6763374185ec169ea73c0829b7df9151cffdccf3..3a659f048ccf6f41a8158e84a0ab72c9f78c9282 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -11,9 +11,9 @@ #define LLVM_MC_MCPARSER_MCASMPARSER_H #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" @@ -67,9 +67,9 @@ public: /// assembly parsers. class MCAsmParser { public: - typedef bool (*DirectiveHandler)(MCAsmParserExtension*, StringRef, SMLoc); - typedef std::pair - ExtensionDirectiveHandler; + using DirectiveHandler = bool (*)(MCAsmParserExtension*, StringRef, SMLoc); + using ExtensionDirectiveHandler = + std::pair; struct MCPendingError { SMLoc Loc; diff --git a/include/llvm/MC/MCParser/MCTargetAsmParser.h b/include/llvm/MC/MCParser/MCTargetAsmParser.h index c81a7624011fa096c9a8e300ba6f131aa0a5fe87..b8d3180cd49c97ca1a4c042a625428728d7e0341 100644 --- a/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -27,7 +27,7 @@ class MCStreamer; class MCSubtargetInfo; template class SmallVectorImpl; -typedef SmallVectorImpl> OperandVector; +using OperandVector = SmallVectorImpl>; enum AsmRewriteKind { AOK_Delete = 0, // Rewrite should be ignored. diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h index 015d0b96d9f27b4cecaff12cc46ff4a06d9c6163..de98abe0dc46cc6f8b8985398d57ff709f0d0901 100644 --- a/include/llvm/MC/MCRegisterInfo.h +++ b/include/llvm/MC/MCRegisterInfo.h @@ -27,13 +27,13 @@ namespace llvm { /// An unsigned integer type large enough to represent all physical registers, /// but not necessarily virtual registers. -typedef uint16_t MCPhysReg; +using MCPhysReg = uint16_t; /// MCRegisterClass - Base class of TargetRegisterClass. class MCRegisterClass { public: - typedef const MCPhysReg* iterator; - typedef const MCPhysReg* const_iterator; + using iterator = const MCPhysReg*; + using const_iterator = const MCPhysReg*; const iterator RegsBegin; const uint8_t *const RegSet; @@ -134,7 +134,7 @@ struct MCRegisterDesc { /// class MCRegisterInfo { public: - typedef const MCRegisterClass *regclass_iterator; + using regclass_iterator = const MCRegisterClass *; /// DwarfLLVMRegPair - Emitted by tablegen so Dwarf<->LLVM reg mappings can be /// performed with a binary search. diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 2974d8f1b80b029d705f050588acef79ed2dc418..cc306d47250d4fa2afaaaa1867c8e0995a591b18 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -14,8 +14,8 @@ #ifndef LLVM_MC_MCSECTION_H #define LLVM_MC_MCSECTION_H -#include "llvm/ADT/ilist.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/SectionKind.h" #include @@ -47,13 +47,13 @@ public: BundleLockedAlignToEnd }; - typedef iplist FragmentListType; + using FragmentListType = iplist; - typedef FragmentListType::const_iterator const_iterator; - typedef FragmentListType::iterator iterator; + using const_iterator = FragmentListType::const_iterator; + using iterator = FragmentListType::iterator; - typedef FragmentListType::const_reverse_iterator const_reverse_iterator; - typedef FragmentListType::reverse_iterator reverse_iterator; + using const_reverse_iterator = FragmentListType::const_reverse_iterator; + using reverse_iterator = FragmentListType::reverse_iterator; private: MCSymbol *Begin; diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h index 3bc5408a4f75e204f004928f9153137acf0b7cdd..89db09cbdbdc175aec55a89d47308228f96dcdfe 100644 --- a/include/llvm/MC/MCSectionMachO.h +++ b/include/llvm/MC/MCSectionMachO.h @@ -15,8 +15,8 @@ #define LLVM_MC_MCSECTIONMACHO_H #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCSection.h" -#include "llvm/Support/MachO.h" namespace llvm { diff --git a/include/llvm/MC/MCSectionWasm.h b/include/llvm/MC/MCSectionWasm.h index 4e19196175c074703df2656bf6123c763f17435e..29d62a7a6f82b29483ee41361a30e0725cdb2977 100644 --- a/include/llvm/MC/MCSectionWasm.h +++ b/include/llvm/MC/MCSectionWasm.h @@ -26,6 +26,7 @@ class MCSymbol; /// This represents a section on wasm. class MCSectionWasm final : public MCSection { +private: /// This is the name of the section. The referenced memory is owned by /// TargetLoweringObjectFileWasm's WasmUniqueMap. StringRef SectionName; @@ -40,10 +41,11 @@ class MCSectionWasm final : public MCSection { const MCSymbolWasm *Group; - // The offset of the MC function section in the wasm code section. + // The offset of the MC function/data section in the wasm code/data section. + // For data relocations the offset is relative to start of the data payload + // itself and does not include the size of the section header. uint64_t SectionOffset; -private: friend class MCContext; MCSectionWasm(StringRef Section, unsigned type, unsigned flags, SectionKind K, const MCSymbolWasm *group, unsigned UniqueID, MCSymbol *Begin) diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index c0d322e3ed3acf92ed96c7b8fba1a3028d80ad4a..5390e79424248c560f6cf7e91b9cd96ad73bdd5a 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -44,12 +44,11 @@ class MCInstPrinter; class MCSection; class MCStreamer; class MCSymbolRefExpr; -class MCSymbolWasm; class MCSubtargetInfo; class raw_ostream; class Twine; -typedef std::pair MCSectionSubPair; +using MCSectionSubPair = std::pair; /// Target specific streamer interface. This is used so that targets can /// implement support for target specific assembly directives. @@ -128,6 +127,7 @@ public: virtual void emitArch(unsigned Arch); virtual void emitArchExtension(unsigned ArchExt); virtual void emitObjectArch(unsigned Arch); + void emitTargetAttributes(const MCSubtargetInfo &STI); virtual void finishAttributeSection(); virtual void emitInst(uint32_t Inst, char Suffix = '\0'); @@ -836,7 +836,9 @@ public: } /// \brief Emit the given \p Instruction into the current section. - virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); + /// PrintSchedInfo == true then schedul comment should be added to output + virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool PrintSchedInfo = false); /// \brief Set the bundle alignment mode from now on in the section. /// The argument is the power of 2 to which the alignment is set. The diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h index bbdac8fad5f56044c1b11d59f95c23f0e53e6c71..d1d5d070bf5bb6ded81451e8f66aa5938c0d04da 100644 --- a/include/llvm/MC/MCSubtargetInfo.h +++ b/include/llvm/MC/MCSubtargetInfo.h @@ -27,6 +27,9 @@ namespace llvm { +class MachineInstr; +class MCInst; + //===----------------------------------------------------------------------===// /// /// MCSubtargetInfo - Generic base class for all target subtargets. @@ -61,6 +64,7 @@ public: MCSubtargetInfo() = delete; MCSubtargetInfo &operator=(const MCSubtargetInfo &) = delete; MCSubtargetInfo &operator=(MCSubtargetInfo &&) = delete; + virtual ~MCSubtargetInfo() = default; /// getTargetTriple - Return the target triple string. const Triple &getTargetTriple() const { return TargetTriple; } @@ -82,6 +86,10 @@ public: FeatureBits = FeatureBits_; } + bool hasFeature(unsigned Feature) const { + return FeatureBits[Feature]; + } + protected: /// Initialize the scheduling model and feature bits. /// @@ -167,6 +175,15 @@ public: auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } + + /// Returns string representation of scheduler comment + virtual std::string getSchedInfoStr(const MachineInstr &MI) const { + return {}; + } + + virtual std::string getSchedInfoStr(MCInst const &MCI) const { + return {}; + } }; } // end namespace llvm diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index e8432afd8627fb1165bb62b63b0c92b4ce9b8d67..9b1cc6e7d7e8c3caf6a00e50c7c780398398694f 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -145,10 +145,10 @@ protected: /// MCSymbol contains a uint64_t so is probably aligned to 8. On a 32-bit /// system, the name is a pointer so isn't going to satisfy the 8 byte /// alignment of uint64_t. Account for that here. - typedef union { + using NameEntryStorageTy = union { const StringMapEntry *NameEntry; uint64_t AlignmentPadding; - } NameEntryStorageTy; + }; MCSymbol(SymbolKind Kind, const StringMapEntry *Name, bool isTemporary) : IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false), diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h index 4445be006eb0d268ee630d55d4d21aaf7fe02727..1b87095552d6be4b2ddabed66f7d2ed1d1a9dc70 100644 --- a/include/llvm/MC/MCSymbolWasm.h +++ b/include/llvm/MC/MCSymbolWasm.h @@ -9,10 +9,11 @@ #ifndef LLVM_MC_MCSYMBOLWASM_H #define LLVM_MC_MCSYMBOLWASM_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Wasm.h" namespace llvm { + class MCSymbolWasm : public MCSymbol { private: bool IsFunction = false; @@ -52,6 +53,7 @@ public: Params = std::move(Pars); } }; -} -#endif +} // end namespace llvm + +#endif // LLVM_MC_MCSYMBOLWASM_H diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h index 06f58d49803081414653a40f4a4b01407b5e83b0..5509bb3bdc7c7260e527b1478a377a132f419366 100644 --- a/include/llvm/MC/MCTargetOptions.h +++ b/include/llvm/MC/MCTargetOptions.h @@ -23,6 +23,12 @@ enum class ExceptionHandling { WinEH, /// Windows Exception Handling }; +enum class DebugCompressionType { + None, /// No compression + GNU, /// zlib-gnu style compression + Z, /// zlib style complession +}; + class StringRef; class MCTargetOptions { @@ -54,6 +60,7 @@ public: int DwarfVersion = 0; std::string ABIName; + std::string SplitDwarfFile; /// Additional paths to search for `.include` directives when using the /// integrated assembler. diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h index 6e458eaac9c83d5e2ac03196106b44662a41f90e..bebc0a8258100f1418a5732cea597d755c2c7ac2 100644 --- a/include/llvm/MC/MCWasmObjectWriter.h +++ b/include/llvm/MC/MCWasmObjectWriter.h @@ -11,44 +11,16 @@ #define LLVM_MC_MCWASMOBJECTWRITER_H #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCValue.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/raw_ostream.h" -#include namespace llvm { -class MCAssembler; -class MCContext; + class MCFixup; -class MCFragment; class MCObjectWriter; -class MCSectionWasm; -class MCSymbol; -class MCSymbolWasm; class MCValue; class raw_pwrite_stream; -// Information about a single relocation. -struct WasmRelocationEntry { - uint64_t Offset; // Where is the relocation. - const MCSymbolWasm *Symbol; // The symbol to relocate with. - uint64_t Addend; // A value to add to the symbol. - unsigned Type; // The type of the relocation. - MCSectionWasm *FixupSection;// The section the relocation is targeting. - - WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, - uint64_t Addend, unsigned Type, - MCSectionWasm *FixupSection) - : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), - FixupSection(FixupSection) {} - - void print(raw_ostream &Out) const { - Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend - << ", Type=" << Type << ", FixupSection=" << FixupSection; - } - void dump() const { print(errs()); } -}; - class MCWasmObjectTargetWriter { const unsigned Is64Bit : 1; @@ -56,16 +28,10 @@ protected: explicit MCWasmObjectTargetWriter(bool Is64Bit_); public: - virtual ~MCWasmObjectTargetWriter() {} - - virtual unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const = 0; + virtual ~MCWasmObjectTargetWriter(); - virtual bool needsRelocateWithSymbol(const MCSymbol &Sym, - unsigned Type) const; - - virtual void sortRelocs(const MCAssembler &Asm, - std::vector &Relocs); + virtual unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup) const = 0; /// \name Accessors /// @{ @@ -80,6 +46,7 @@ public: /// \returns The constructed object writer. MCObjectWriter *createWasmObjectWriter(MCWasmObjectTargetWriter *MOTW, raw_pwrite_stream &OS); + } // End llvm namespace #endif diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index d423957d9b79d344726e04922387888aa1c5afb3..6c5fb9d5c92be770bdecdba685c599d8674a30b1 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -20,9 +20,14 @@ #include "llvm/Object/Binary.h" #include "llvm/Support/Chrono.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include +#include +#include +#include +#include +#include namespace llvm { namespace object { @@ -32,25 +37,28 @@ class Archive; class ArchiveMemberHeader { public: friend class Archive; + ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr, uint64_t Size, Error *Err); // ArchiveMemberHeader() = default; /// Get the name without looking up long names. - Expected getRawName() const; + Expected getRawName() const; /// Get the name looking up long names. - Expected getName(uint64_t Size) const; + Expected getName(uint64_t Size) const; /// Members are not larger than 4GB. Expected getSize() const; Expected getAccessMode() const; Expected> getLastModified() const; - llvm::StringRef getRawLastModified() const { + + StringRef getRawLastModified() const { return StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified)).rtrim(' '); } + Expected getUID() const; Expected getGID() const; @@ -75,11 +83,13 @@ private: class Archive : public Binary { virtual void anchor(); + public: class Child { friend Archive; - const Archive *Parent; friend ArchiveMemberHeader; + + const Archive *Parent; ArchiveMemberHeader Header; /// \brief Includes header but not padding byte. StringRef Data; @@ -103,17 +113,22 @@ public: Expected getName() const; Expected getFullName() const; Expected getRawName() const { return Header.getRawName(); } + Expected> getLastModified() const { return Header.getLastModified(); } + StringRef getRawLastModified() const { return Header.getRawLastModified(); } + Expected getUID() const { return Header.getUID(); } Expected getGID() const { return Header.getGID(); } + Expected getAccessMode() const { return Header.getAccessMode(); } + /// \return the size of the archive member without the header or padding. Expected getSize() const; /// \return the size in the archive header for this member. @@ -130,11 +145,12 @@ public: class child_iterator { Child C; - Error *E; + Error *E = nullptr; public: - child_iterator() : C(Child(nullptr, nullptr, nullptr)), E(nullptr) {} + child_iterator() : C(Child(nullptr, nullptr, nullptr)) {} child_iterator(const Child &C, Error *E) : C(C), E(E) {} + const Child *operator->() const { return &C; } const Child &operator*() const { return C; } @@ -171,14 +187,15 @@ public: uint32_t StringIndex; // Extra index to the string. public: - bool operator ==(const Symbol &other) const { - return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex); - } - Symbol(const Archive *p, uint32_t symi, uint32_t stri) : Parent(p) , SymbolIndex(symi) , StringIndex(stri) {} + + bool operator ==(const Symbol &other) const { + return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex); + } + StringRef getName() const; Expected getMember() const; Symbol getNext() const; @@ -186,8 +203,10 @@ public: class symbol_iterator { Symbol symbol; + public: symbol_iterator(const Symbol &s) : symbol(s) {} + const Symbol *operator->() const { return &symbol; } const Symbol &operator*() const { return symbol; } @@ -264,7 +283,7 @@ private: mutable std::vector> ThinBuffers; }; -} -} +} // end namespace object +} // end namespace llvm -#endif +#endif // LLVM_OBJECT_ARCHIVE_H diff --git a/include/llvm/Object/ArchiveWriter.h b/include/llvm/Object/ArchiveWriter.h index 3e84a5814d796403b247c41b1e15f499940ddcf2..1ed758d40df2e4d38e989511e04e8860faae11db 100644 --- a/include/llvm/Object/ArchiveWriter.h +++ b/include/llvm/Object/ArchiveWriter.h @@ -22,6 +22,7 @@ namespace llvm { struct NewArchiveMember { std::unique_ptr Buf; + StringRef MemberName; sys::TimePoint ModTime; unsigned UID = 0, GID = 0, Perms = 0644; diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h index 00d06e3c7437ac3f4fb3fc85c494a7dd63a3232a..3f5a233c1ee185d8ba357e067b7cc0d54771e78b 100644 --- a/include/llvm/Object/Binary.h +++ b/include/llvm/Object/Binary.h @@ -14,10 +14,12 @@ #ifndef LLVM_OBJECT_BINARY_H #define LLVM_OBJECT_BINARY_H -#include "llvm/Object/Error.h" -#include "llvm/Support/ErrorOr.h" -#include "llvm/Support/FileSystem.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include +#include +#include namespace llvm { @@ -28,9 +30,6 @@ namespace object { class Binary { private: - Binary() = delete; - Binary(const Binary &other) = delete; - unsigned int TypeID; protected: @@ -43,7 +42,6 @@ protected: ID_MachOUniversalBinary, ID_COFFImportFile, ID_IR, // LLVM IR - ID_ModuleSummaryIndex, // Module summary index // Object and children. ID_StartObjects, @@ -59,6 +57,8 @@ protected: ID_MachO64L, // MachO 64-bit, little endian ID_MachO64B, // MachO 64-bit, big endian + ID_WinRes, // Windows resource (.res) file. + ID_Wasm, ID_EndObjects @@ -79,6 +79,8 @@ protected: } public: + Binary() = delete; + Binary(const Binary &other) = delete; virtual ~Binary(); StringRef getData() const; @@ -93,9 +95,7 @@ public: return TypeID > ID_StartObjects && TypeID < ID_EndObjects; } - bool isSymbolic() const { - return isIR() || isObject(); - } + bool isSymbolic() const { return isIR() || isObject() || isCOFFImportFile(); } bool isArchive() const { return TypeID == ID_Archive; @@ -127,12 +127,22 @@ public: return TypeID == ID_IR; } - bool isModuleSummaryIndex() const { return TypeID == ID_ModuleSummaryIndex; } - bool isLittleEndian() const { return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B || TypeID == ID_MachO32B || TypeID == ID_MachO64B); } + + bool isWinRes() const { return TypeID == ID_WinRes; } + + Triple::ObjectFormatType getTripleObjectFormat() const { + if (isCOFF()) + return Triple::COFF; + if (isMachO()) + return Triple::MachO; + if (isELF()) + return Triple::ELF; + return Triple::UnknownObjectFormat; + } }; /// @brief Create a Binary from Source, autodetecting the file type. @@ -162,7 +172,7 @@ OwningBinary::OwningBinary(std::unique_ptr Bin, std::unique_ptr Buf) : Bin(std::move(Bin)), Buf(std::move(Buf)) {} -template OwningBinary::OwningBinary() {} +template OwningBinary::OwningBinary() = default; template OwningBinary::OwningBinary(OwningBinary &&Other) @@ -190,7 +200,9 @@ template const T* OwningBinary::getBinary() const { } Expected> createBinary(StringRef Path); -} -} -#endif +} // end namespace object + +} // end namespace llvm + +#endif // LLVM_OBJECT_BINARY_H diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index 696042d29dabdedd169099e924d054e2839c9b63..ae695a529597bc782d749759de76fa7484ba9c7e 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -14,28 +14,42 @@ #ifndef LLVM_OBJECT_COFF_H #define LLVM_OBJECT_COFF_H -#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CVDebugRecord.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/COFF.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include +#include +#include +#include namespace llvm { + template class ArrayRef; namespace object { -class ImportDirectoryEntryRef; + +class BaseRelocRef; class DelayImportDirectoryEntryRef; class ExportDirectoryEntryRef; +class ImportDirectoryEntryRef; class ImportedSymbolRef; -class BaseRelocRef; -typedef content_iterator import_directory_iterator; -typedef content_iterator - delay_import_directory_iterator; -typedef content_iterator export_directory_iterator; -typedef content_iterator imported_symbol_iterator; -typedef content_iterator base_reloc_iterator; +class ResourceSectionRef; + +using import_directory_iterator = content_iterator; +using delay_import_directory_iterator = + content_iterator; +using export_directory_iterator = content_iterator; +using imported_symbol_iterator = content_iterator; +using base_reloc_iterator = content_iterator; /// The DOS compatible header at the front of all PE/COFF executables. struct dos_header { @@ -190,10 +204,10 @@ struct import_lookup_table_entry { } }; -typedef import_lookup_table_entry - import_lookup_table_entry32; -typedef import_lookup_table_entry - import_lookup_table_entry64; +using import_lookup_table_entry32 = + import_lookup_table_entry; +using import_lookup_table_entry64 = + import_lookup_table_entry; struct delay_import_directory_table_entry { // dumpbin reports this field as "Characteristics" instead of "Attributes". @@ -226,8 +240,8 @@ union export_address_table_entry { support::ulittle32_t ForwarderRVA; }; -typedef support::ulittle32_t export_name_pointer_table_entry; -typedef support::ulittle16_t export_ordinal_table_entry; +using export_name_pointer_table_entry = support::ulittle32_t; +using export_ordinal_table_entry = support::ulittle16_t; struct StringTableOffset { support::ulittle32_t Zeroes; @@ -250,8 +264,8 @@ struct coff_symbol { uint8_t NumberOfAuxSymbols; }; -typedef coff_symbol coff_symbol16; -typedef coff_symbol coff_symbol32; +using coff_symbol16 = coff_symbol; +using coff_symbol32 = coff_symbol; // Contains only common parts of coff_symbol16 and coff_symbol32. struct coff_symbol_generic { @@ -264,9 +278,9 @@ struct coff_symbol_generic { class COFFSymbolRef { public: - COFFSymbolRef(const coff_symbol16 *CS) : CS16(CS), CS32(nullptr) {} - COFFSymbolRef(const coff_symbol32 *CS) : CS16(nullptr), CS32(CS) {} - COFFSymbolRef() : CS16(nullptr), CS32(nullptr) {} + COFFSymbolRef() = default; + COFFSymbolRef(const coff_symbol16 *CS) : CS16(CS) {} + COFFSymbolRef(const coff_symbol32 *CS) : CS32(CS) {} const void *getRawPtr() const { return CS16 ? static_cast(CS16) : CS32; @@ -396,8 +410,8 @@ public: private: bool isSet() const { return CS16 || CS32; } - const coff_symbol16 *CS16; - const coff_symbol32 *CS32; + const coff_symbol16 *CS16 = nullptr; + const coff_symbol32 *CS32 = nullptr; }; struct coff_section { @@ -418,6 +432,7 @@ struct coff_section { return (Characteristics & COFF::IMAGE_SCN_LNK_NRELOC_OVFL) && NumberOfRelocations == UINT16_MAX; } + uint32_t getAlignment() const { // The IMAGE_SCN_TYPE_NO_PAD bit is a legacy way of getting to // IMAGE_SCN_ALIGN_1BYTES. @@ -508,6 +523,7 @@ struct coff_import_header { support::ulittle32_t SizeOfData; support::ulittle16_t OrdinalHint; support::ulittle16_t TypeInfo; + int getType() const { return TypeInfo & 0x3; } int getNameType() const { return (TypeInfo >> 2) & 0x7; } }; @@ -518,6 +534,7 @@ struct coff_import_directory_table_entry { support::ulittle32_t ForwarderChain; support::ulittle32_t NameRVA; support::ulittle32_t ImportAddressTableRVA; + bool isNull() const { return ImportLookupTableRVA == 0 && TimeDateStamp == 0 && ForwarderChain == 0 && NameRVA == 0 && ImportAddressTableRVA == 0; @@ -532,6 +549,7 @@ struct coff_tls_directory { IntTy AddressOfCallBacks; support::ulittle32_t SizeOfZeroFill; support::ulittle32_t Characteristics; + uint32_t getAlignment() const { // Bit [20:24] contains section alignment. uint32_t Shift = (Characteristics & 0x00F00000) >> 20; @@ -541,8 +559,8 @@ struct coff_tls_directory { } }; -typedef coff_tls_directory coff_tls_directory32; -typedef coff_tls_directory coff_tls_directory64; +using coff_tls_directory32 = coff_tls_directory; +using coff_tls_directory64 = coff_tls_directory; struct coff_load_configuration32 { support::ulittle32_t Characteristics; @@ -603,10 +621,47 @@ struct coff_base_reloc_block_header { struct coff_base_reloc_block_entry { support::ulittle16_t Data; + int getType() const { return Data >> 12; } int getOffset() const { return Data & ((1 << 12) - 1); } }; +struct coff_resource_dir_entry { + union { + support::ulittle32_t NameOffset; + support::ulittle32_t ID; + uint32_t getNameOffset() const { + return maskTrailingOnes(31) & NameOffset; + } + } Identifier; + union { + support::ulittle32_t DataEntryOffset; + support::ulittle32_t SubdirOffset; + + bool isSubDir() const { return SubdirOffset >> 31; } + uint32_t value() const { + return maskTrailingOnes(31) & SubdirOffset; + } + + } Offset; +}; + +struct coff_resource_data_entry { + support::ulittle32_t DataRVA; + support::ulittle32_t DataSize; + support::ulittle32_t Codepage; + support::ulittle32_t Reserved; +}; + +struct coff_resource_dir_table { + support::ulittle32_t Characteristics; + support::ulittle32_t TimeDateStamp; + support::ulittle16_t MajorVersion; + support::ulittle16_t MinorVersion; + support::ulittle16_t NumberOfNameEntries; + support::ulittle16_t NumberOfIDEntries; +}; + class COFFObjectFile : public ObjectFile { private: friend class ImportDirectoryEntryRef; @@ -652,6 +707,7 @@ public: return reinterpret_cast(SymbolTable32); return uintptr_t(0); } + uint16_t getMachine() const { if (COFFHeader) return COFFHeader->Machine; @@ -659,6 +715,7 @@ public: return COFFBigObjHeader->Machine; llvm_unreachable("no COFF header!"); } + uint16_t getSizeOfOptionalHeader() const { if (COFFHeader) return COFFHeader->isImportLibrary() ? 0 @@ -668,6 +725,7 @@ public: return 0; llvm_unreachable("no COFF header!"); } + uint16_t getCharacteristics() const { if (COFFHeader) return COFFHeader->isImportLibrary() ? 0 : COFFHeader->Characteristics; @@ -677,6 +735,7 @@ public: return 0; llvm_unreachable("no COFF header!"); } + uint32_t getTimeDateStamp() const { if (COFFHeader) return COFFHeader->TimeDateStamp; @@ -684,6 +743,7 @@ public: return COFFBigObjHeader->TimeDateStamp; llvm_unreachable("no COFF header!"); } + uint32_t getNumberOfSections() const { if (COFFHeader) return COFFHeader->isImportLibrary() ? 0 : COFFHeader->NumberOfSections; @@ -691,6 +751,7 @@ public: return COFFBigObjHeader->NumberOfSections; llvm_unreachable("no COFF header!"); } + uint32_t getPointerToSymbolTable() const { if (COFFHeader) return COFFHeader->isImportLibrary() ? 0 @@ -699,6 +760,7 @@ public: return COFFBigObjHeader->PointerToSymbolTable; llvm_unreachable("no COFF header!"); } + uint32_t getRawNumberOfSymbols() const { if (COFFHeader) return COFFHeader->isImportLibrary() ? 0 : COFFHeader->NumberOfSymbols; @@ -706,11 +768,13 @@ public: return COFFBigObjHeader->NumberOfSymbols; llvm_unreachable("no COFF header!"); } + uint32_t getNumberOfSymbols() const { if (!SymbolTable16 && !SymbolTable32) return 0; return getRawNumberOfSymbols(); } + protected: void moveSymbolNext(DataRefImpl &Symb) const override; Expected getSymbolName(DataRefImpl Symb) const override; @@ -725,6 +789,7 @@ protected: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -746,6 +811,7 @@ protected: public: COFFObjectFile(MemoryBufferRef Object, std::error_code &EC); + basic_symbol_iterator symbol_begin() const override; basic_symbol_iterator symbol_end() const override; section_iterator section_begin() const override; @@ -797,6 +863,7 @@ public: std::error_code getDataDirectory(uint32_t index, const data_directory *&Res) const; std::error_code getSection(int32_t index, const coff_section *&Res) const; + template std::error_code getSymbol(uint32_t Index, const coff_symbol_type *&Res) const { @@ -821,6 +888,7 @@ public: } return object_error::parse_failed; } + template std::error_code getAuxSymbol(uint32_t index, const T *&Res) const { ErrorOr s = getSymbol(index); @@ -829,6 +897,7 @@ public: Res = reinterpret_cast(s->getRawPtr()); return std::error_code(); } + std::error_code getSymbolName(COFFSymbolRef Symbol, StringRef &Res) const; std::error_code getSymbolName(const coff_symbol_generic *Symbol, StringRef &Res) const; @@ -885,7 +954,7 @@ public: // The iterator for the import directory table. class ImportDirectoryEntryRef { public: - ImportDirectoryEntryRef() : OwningObject(nullptr) {} + ImportDirectoryEntryRef() = default; ImportDirectoryEntryRef(const coff_import_directory_table_entry *Table, uint32_t I, const COFFObjectFile *Owner) : ImportTable(Table), Index(I), OwningObject(Owner) {} @@ -911,12 +980,12 @@ public: private: const coff_import_directory_table_entry *ImportTable; uint32_t Index; - const COFFObjectFile *OwningObject; + const COFFObjectFile *OwningObject = nullptr; }; class DelayImportDirectoryEntryRef { public: - DelayImportDirectoryEntryRef() : OwningObject(nullptr) {} + DelayImportDirectoryEntryRef() = default; DelayImportDirectoryEntryRef(const delay_import_directory_table_entry *T, uint32_t I, const COFFObjectFile *Owner) : Table(T), Index(I), OwningObject(Owner) {} @@ -936,13 +1005,13 @@ public: private: const delay_import_directory_table_entry *Table; uint32_t Index; - const COFFObjectFile *OwningObject; + const COFFObjectFile *OwningObject = nullptr; }; // The iterator for the export directory table entry. class ExportDirectoryEntryRef { public: - ExportDirectoryEntryRef() : OwningObject(nullptr) {} + ExportDirectoryEntryRef() = default; ExportDirectoryEntryRef(const export_directory_table_entry *Table, uint32_t I, const COFFObjectFile *Owner) : ExportTable(Table), Index(I), OwningObject(Owner) {} @@ -962,12 +1031,12 @@ public: private: const export_directory_table_entry *ExportTable; uint32_t Index; - const COFFObjectFile *OwningObject; + const COFFObjectFile *OwningObject = nullptr; }; class ImportedSymbolRef { public: - ImportedSymbolRef() : OwningObject(nullptr) {} + ImportedSymbolRef() = default; ImportedSymbolRef(const import_lookup_table_entry32 *Entry, uint32_t I, const COFFObjectFile *Owner) : Entry32(Entry), Entry64(nullptr), Index(I), OwningObject(Owner) {} @@ -987,12 +1056,12 @@ private: const import_lookup_table_entry32 *Entry32; const import_lookup_table_entry64 *Entry64; uint32_t Index; - const COFFObjectFile *OwningObject; + const COFFObjectFile *OwningObject = nullptr; }; class BaseRelocRef { public: - BaseRelocRef() : OwningObject(nullptr) {} + BaseRelocRef() = default; BaseRelocRef(const coff_base_reloc_block_header *Header, const COFFObjectFile *Owner) : Header(Header), Index(0), OwningObject(Owner) {} @@ -1006,7 +1075,24 @@ public: private: const coff_base_reloc_block_header *Header; uint32_t Index; - const COFFObjectFile *OwningObject; + const COFFObjectFile *OwningObject = nullptr; +}; + +class ResourceSectionRef { +public: + ResourceSectionRef() = default; + explicit ResourceSectionRef(StringRef Ref) : BBS(Ref, support::little) {} + + ErrorOr> getEntryNameString(const coff_resource_dir_entry &Entry); + ErrorOr + getEntrySubDir(const coff_resource_dir_entry &Entry); + ErrorOr getBaseTable(); + +private: + BinaryByteStream BBS; + + ErrorOr getTableAtOffset(uint32_t Offset); + ErrorOr> getDirStringAtOffset(uint32_t Offset); }; // Corresponds to `_FPO_DATA` structure in the PE/COFF spec. @@ -1034,6 +1120,7 @@ struct FpoData { }; } // end namespace object + } // end namespace llvm -#endif +#endif // LLVM_OBJECT_COFF_H diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h index 4192fe7e5c90a593522c61298b1f91227fe219b8..78044a2832faf7f5c52d234a9eedea70be92675a 100644 --- a/include/llvm/Object/COFFImportFile.h +++ b/include/llvm/Object/COFFImportFile.h @@ -9,13 +9,15 @@ // // COFF short import file is a special kind of file which contains // only symbol names for DLL-exported symbols. This class implements -// SymbolicFile interface for the file. +// exporting of Symbols to create libraries and a SymbolicFile +// interface for the file type. // //===----------------------------------------------------------------------===// #ifndef LLVM_OBJECT_COFF_IMPORT_FILE_H #define LLVM_OBJECT_COFF_IMPORT_FILE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/Object/COFF.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" @@ -53,7 +55,7 @@ public: basic_symbol_iterator symbol_end() const override { DataRefImpl Symb; - Symb.p = isCode() ? 2 : 1; + Symb.p = isData() ? 1 : 2; return BasicSymbolRef(Symb, this); } @@ -63,11 +65,41 @@ public: } private: - bool isCode() const { - return getCOFFImportHeader()->getType() == COFF::IMPORT_CODE; + bool isData() const { + return getCOFFImportHeader()->getType() == COFF::IMPORT_DATA; } }; +struct COFFShortExport { + std::string Name; + std::string ExtName; + + uint16_t Ordinal = 0; + bool Noname = false; + bool Data = false; + bool Private = false; + bool Constant = false; + + bool isWeak() { + return ExtName.size() && ExtName != Name; + } + + friend bool operator==(const COFFShortExport &L, const COFFShortExport &R) { + return L.Name == R.Name && L.ExtName == R.ExtName && + L.Ordinal == R.Ordinal && L.Noname == R.Noname && + L.Data == R.Data && L.Private == R.Private; + } + + friend bool operator!=(const COFFShortExport &L, const COFFShortExport &R) { + return !(L == R); + } +}; + +std::error_code writeImportLibrary(StringRef DLLName, + StringRef Path, + ArrayRef Exports, + COFF::MachineTypes Machine); + } // namespace object } // namespace llvm diff --git a/include/llvm/Object/COFFModuleDefinition.h b/include/llvm/Object/COFFModuleDefinition.h new file mode 100644 index 0000000000000000000000000000000000000000..a0e8eacdb7a3a79b2ceb3c88fc4fd63bce6bd04e --- /dev/null +++ b/include/llvm/Object/COFFModuleDefinition.h @@ -0,0 +1,49 @@ +//===--- COFFModuleDefinition.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// Parsed results are directly written to Config global variable. +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_OBJECT_COFF_MODULE_DEFINITION_H +#define LLVM_OBJECT_COFF_MODULE_DEFINITION_H + +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" + +namespace llvm { +namespace object { + +struct COFFModuleDefinition { + std::vector Exports; + std::string OutputFile; + uint64_t ImageBase = 0; + uint64_t StackReserve = 0; + uint64_t StackCommit = 0; + uint64_t HeapReserve = 0; + uint64_t HeapCommit = 0; + uint32_t MajorImageVersion = 0; + uint32_t MinorImageVersion = 0; + uint32_t MajorOSVersion = 0; + uint32_t MinorOSVersion = 0; +}; + +Expected +parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine); + +} // End namespace object. +} // End namespace llvm. + +#endif diff --git a/include/llvm/Object/Decompressor.h b/include/llvm/Object/Decompressor.h index a11857d546aaeb64fc392b891039e215fdfddacf..c8e888d285e4a820166a737f9881ff80920e5cc6 100644 --- a/include/llvm/Object/Decompressor.h +++ b/include/llvm/Object/Decompressor.h @@ -10,8 +10,8 @@ #ifndef LLVM_OBJECT_DECOMPRESSOR_H #define LLVM_OBJECT_DECOMPRESSOR_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Object/ObjectFile.h" namespace llvm { @@ -30,7 +30,10 @@ public: /// @brief Resize the buffer and uncompress section data into it. /// @param Out Destination buffer. - Error decompress(SmallString<32> &Out); + template Error resizeAndDecompress(T &Out) { + Out.resize(DecompressedSize); + return decompress({Out.data(), (size_t)DecompressedSize}); + } /// @brief Uncompress section data to raw buffer provided. /// @param Buffer Destination buffer. diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index 7a3155b3953ecd8fe55c5b85baf3f158eb2530ef..670c0bbce3ac6e94b526e374f4c67a183ad1cb1b 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -14,14 +14,25 @@ #ifndef LLVM_OBJECT_ELF_H #define LLVM_OBJECT_ELF_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFTypes.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include +#include namespace llvm { namespace object { StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type); +StringRef getELFSectionTypeName(uint32_t Machine, uint32_t Type); // Subclasses of ELFFile may need this for template instantiation inline std::pair @@ -41,27 +52,27 @@ template class ELFFile { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - typedef typename ELFT::uint uintX_t; - typedef typename ELFT::Ehdr Elf_Ehdr; - typedef typename ELFT::Shdr Elf_Shdr; - typedef typename ELFT::Sym Elf_Sym; - typedef typename ELFT::Dyn Elf_Dyn; - typedef typename ELFT::Phdr Elf_Phdr; - typedef typename ELFT::Rel Elf_Rel; - typedef typename ELFT::Rela Elf_Rela; - typedef typename ELFT::Verdef Elf_Verdef; - typedef typename ELFT::Verdaux Elf_Verdaux; - typedef typename ELFT::Verneed Elf_Verneed; - typedef typename ELFT::Vernaux Elf_Vernaux; - typedef typename ELFT::Versym Elf_Versym; - typedef typename ELFT::Hash Elf_Hash; - typedef typename ELFT::GnuHash Elf_GnuHash; - typedef typename ELFT::DynRange Elf_Dyn_Range; - typedef typename ELFT::ShdrRange Elf_Shdr_Range; - typedef typename ELFT::SymRange Elf_Sym_Range; - typedef typename ELFT::RelRange Elf_Rel_Range; - typedef typename ELFT::RelaRange Elf_Rela_Range; - typedef typename ELFT::PhdrRange Elf_Phdr_Range; + using uintX_t = typename ELFT::uint; + using Elf_Ehdr = typename ELFT::Ehdr; + using Elf_Shdr = typename ELFT::Shdr; + using Elf_Sym = typename ELFT::Sym; + using Elf_Dyn = typename ELFT::Dyn; + using Elf_Phdr = typename ELFT::Phdr; + using Elf_Rel = typename ELFT::Rel; + using Elf_Rela = typename ELFT::Rela; + using Elf_Verdef = typename ELFT::Verdef; + using Elf_Verdaux = typename ELFT::Verdaux; + using Elf_Verneed = typename ELFT::Verneed; + using Elf_Vernaux = typename ELFT::Vernaux; + using Elf_Versym = typename ELFT::Versym; + using Elf_Hash = typename ELFT::Hash; + using Elf_GnuHash = typename ELFT::GnuHash; + using Elf_Dyn_Range = typename ELFT::DynRange; + using Elf_Shdr_Range = typename ELFT::ShdrRange; + using Elf_Sym_Range = typename ELFT::SymRange; + using Elf_Rel_Range = typename ELFT::RelRange; + using Elf_Rela_Range = typename ELFT::RelaRange; + using Elf_Phdr_Range = typename ELFT::PhdrRange; const uint8_t *base() const { return reinterpret_cast(Buf.data()); @@ -70,7 +81,6 @@ public: size_t getBufSize() const { return Buf.size(); } private: - StringRef Buf; public: @@ -161,10 +171,10 @@ public: Expected> getSectionContents(const Elf_Shdr *Sec) const; }; -typedef ELFFile> ELF32LEFile; -typedef ELFFile> ELF64LEFile; -typedef ELFFile> ELF32BEFile; -typedef ELFFile> ELF64BEFile; +using ELF32LEFile = ELFFile>; +using ELF64LEFile = ELFFile>; +using ELF32BEFile = ELFFile>; +using ELF64BEFile = ELFFile>; template inline Expected @@ -194,7 +204,7 @@ ELFFile::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, ArrayRef ShndxTable) const { uint32_t Index = Sym->st_shndx; if (Index == ELF::SHN_XINDEX) { - auto ErrorOrIndex = object::getExtendedSymbolTableIndex( + auto ErrorOrIndex = getExtendedSymbolTableIndex( Sym, Syms.begin(), ShndxTable); if (!ErrorOrIndex) return ErrorOrIndex.takeError(); @@ -225,10 +235,7 @@ ELFFile::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols, uint32_t Index = *IndexOrErr; if (Index == 0) return nullptr; - auto SectionsOrErr = sections(); - if (!SectionsOrErr) - return SectionsOrErr.takeError(); - return object::getSection(*SectionsOrErr, Index); + return getSection(Index); } template @@ -519,7 +526,8 @@ inline unsigned hashSysV(StringRef SymbolName) { } return h; } + } // end namespace object } // end namespace llvm -#endif +#endif // LLVM_OBJECT_ELF_H diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index 9e95f2958aa4fe14ecc673fc7c6a05163e820eae..2ba3b13f49da861c2ba9857af7924fb81d83756e 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELF.h" @@ -27,8 +28,8 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -42,13 +43,11 @@ namespace llvm { namespace object { class elf_symbol_iterator; -class ELFSymbolRef; -class ELFRelocationRef; class ELFObjectFileBase : public ObjectFile { - friend class ELFSymbolRef; - friend class ELFSectionRef; friend class ELFRelocationRef; + friend class ELFSectionRef; + friend class ELFSymbolRef; protected: ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source); @@ -65,7 +64,8 @@ protected: virtual ErrorOr getRelocationAddend(DataRefImpl Rel) const = 0; public: - typedef iterator_range elf_symbol_iterator_range; + using elf_symbol_iterator_range = iterator_range; + virtual elf_symbol_iterator_range getDynamicSymbolIterators() const = 0; elf_symbol_iterator_range symbols() const; @@ -201,14 +201,14 @@ template class ELFObjectFile : public ELFObjectFileBase { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - typedef typename ELFFile::uintX_t uintX_t; + using uintX_t = typename ELFFile::uintX_t; - typedef typename ELFFile::Elf_Sym Elf_Sym; - typedef typename ELFFile::Elf_Shdr Elf_Shdr; - typedef typename ELFFile::Elf_Ehdr Elf_Ehdr; - typedef typename ELFFile::Elf_Rel Elf_Rel; - typedef typename ELFFile::Elf_Rela Elf_Rela; - typedef typename ELFFile::Elf_Dyn Elf_Dyn; + using Elf_Sym = typename ELFFile::Elf_Sym; + using Elf_Shdr = typename ELFFile::Elf_Shdr; + using Elf_Ehdr = typename ELFFile::Elf_Ehdr; + using Elf_Rel = typename ELFFile::Elf_Rel; + using Elf_Rela = typename ELFFile::Elf_Rela; + using Elf_Dyn = typename ELFFile::Elf_Dyn; protected: ELFFile EF; @@ -235,6 +235,7 @@ protected: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -398,10 +399,10 @@ public: bool isRelocatableObject() const override; }; -typedef ELFObjectFile> ELF32LEObjectFile; -typedef ELFObjectFile> ELF64LEObjectFile; -typedef ELFObjectFile> ELF32BEObjectFile; -typedef ELFObjectFile> ELF64BEObjectFile; +using ELF32LEObjectFile = ELFObjectFile>; +using ELF64LEObjectFile = ELFObjectFile>; +using ELF32BEObjectFile = ELFObjectFile>; +using ELF64BEObjectFile = ELFObjectFile>; template void ELFObjectFile::moveSymbolNext(DataRefImpl &Sym) const { @@ -645,6 +646,17 @@ uint64_t ELFObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec)->sh_addr; } +template +uint64_t ELFObjectFile::getSectionIndex(DataRefImpl Sec) const { + auto SectionsOrErr = EF.sections(); + handleAllErrors(std::move(SectionsOrErr.takeError()), + [](const ErrorInfoBase &) { + llvm_unreachable("unable to get section index"); + }); + const Elf_Shdr *First = SectionsOrErr->begin(); + return getSection(Sec) - First; +} + template uint64_t ELFObjectFile::getSectionSize(DataRefImpl Sec) const { return getSection(Sec)->sh_size; diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h index 3e03fd8b980e72d58ada5a51ae8b1b421afd7671..808144694acb8fe4a25c5b95e5cb330dc136ce94 100644 --- a/include/llvm/Object/ELFTypes.h +++ b/include/llvm/Object/ELFTypes.h @@ -11,10 +11,15 @@ #define LLVM_OBJECT_ELFTYPES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/Error.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/Error.h" +#include +#include +#include +#include namespace llvm { namespace object { @@ -45,58 +50,58 @@ public: static const endianness TargetEndianness = E; static const bool Is64Bits = Is64; - typedef typename std::conditional::type uint; - typedef Elf_Ehdr_Impl> Ehdr; - typedef Elf_Shdr_Impl> Shdr; - typedef Elf_Sym_Impl> Sym; - typedef Elf_Dyn_Impl> Dyn; - typedef Elf_Phdr_Impl> Phdr; - typedef Elf_Rel_Impl, false> Rel; - typedef Elf_Rel_Impl, true> Rela; - typedef Elf_Verdef_Impl> Verdef; - typedef Elf_Verdaux_Impl> Verdaux; - typedef Elf_Verneed_Impl> Verneed; - typedef Elf_Vernaux_Impl> Vernaux; - typedef Elf_Versym_Impl> Versym; - typedef Elf_Hash_Impl> Hash; - typedef Elf_GnuHash_Impl> GnuHash; - typedef Elf_Chdr_Impl> Chdr; - typedef ArrayRef DynRange; - typedef ArrayRef ShdrRange; - typedef ArrayRef SymRange; - typedef ArrayRef RelRange; - typedef ArrayRef RelaRange; - typedef ArrayRef PhdrRange; - - typedef packed Half; - typedef packed Word; - typedef packed Sword; - typedef packed Xword; - typedef packed Sxword; - typedef packed Addr; - typedef packed Off; -}; - -typedef ELFType ELF32LE; -typedef ELFType ELF32BE; -typedef ELFType ELF64LE; -typedef ELFType ELF64BE; + using uint = typename std::conditional::type; + using Ehdr = Elf_Ehdr_Impl>; + using Shdr = Elf_Shdr_Impl>; + using Sym = Elf_Sym_Impl>; + using Dyn = Elf_Dyn_Impl>; + using Phdr = Elf_Phdr_Impl>; + using Rel = Elf_Rel_Impl, false>; + using Rela = Elf_Rel_Impl, true>; + using Verdef = Elf_Verdef_Impl>; + using Verdaux = Elf_Verdaux_Impl>; + using Verneed = Elf_Verneed_Impl>; + using Vernaux = Elf_Vernaux_Impl>; + using Versym = Elf_Versym_Impl>; + using Hash = Elf_Hash_Impl>; + using GnuHash = Elf_GnuHash_Impl>; + using Chdr = Elf_Chdr_Impl>; + using DynRange = ArrayRef; + using ShdrRange = ArrayRef; + using SymRange = ArrayRef; + using RelRange = ArrayRef; + using RelaRange = ArrayRef; + using PhdrRange = ArrayRef; + + using Half = packed; + using Word = packed; + using Sword = packed; + using Xword = packed; + using Sxword = packed; + using Addr = packed; + using Off = packed; +}; + +using ELF32LE = ELFType; +using ELF32BE = ELFType; +using ELF64LE = ELFType; +using ELF64BE = ELFType; // Use an alignment of 2 for the typedefs since that is the worst case for // ELF files in archives. // Templates to choose Elf_Addr and Elf_Off depending on is64Bits. template struct ELFDataTypeTypedefHelperCommon { - typedef support::detail::packed_endian_specific_integral< - uint16_t, target_endianness, 2> Elf_Half; - typedef support::detail::packed_endian_specific_integral< - uint32_t, target_endianness, 2> Elf_Word; - typedef support::detail::packed_endian_specific_integral< - int32_t, target_endianness, 2> Elf_Sword; - typedef support::detail::packed_endian_specific_integral< - uint64_t, target_endianness, 2> Elf_Xword; - typedef support::detail::packed_endian_specific_integral< - int64_t, target_endianness, 2> Elf_Sxword; + using Elf_Half = support::detail::packed_endian_specific_integral< + uint16_t, target_endianness, 2>; + using Elf_Word = support::detail::packed_endian_specific_integral< + uint32_t, target_endianness, 2>; + using Elf_Sword = support::detail::packed_endian_specific_integral< + int32_t, target_endianness, 2>; + using Elf_Xword = support::detail::packed_endian_specific_integral< + uint64_t, target_endianness, 2>; + using Elf_Sxword = support::detail::packed_endian_specific_integral< + int64_t, target_endianness, 2>; }; template struct ELFDataTypeTypedefHelper; @@ -105,34 +110,34 @@ template struct ELFDataTypeTypedefHelper; template struct ELFDataTypeTypedefHelper> : ELFDataTypeTypedefHelperCommon { - typedef uint32_t value_type; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Addr; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Off; + using value_type = uint32_t; + using Elf_Addr = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; + using Elf_Off = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; }; /// ELF 64bit types. template struct ELFDataTypeTypedefHelper> : ELFDataTypeTypedefHelperCommon { - typedef uint64_t value_type; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Addr; - typedef support::detail::packed_endian_specific_integral< - value_type, TargetEndianness, 2> Elf_Off; + using value_type = uint64_t; + using Elf_Addr = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; + using Elf_Off = support::detail::packed_endian_specific_integral< + value_type, TargetEndianness, 2>; }; // I really don't like doing this, but the alternative is copypasta. #define LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) \ - typedef typename ELFT::Addr Elf_Addr; \ - typedef typename ELFT::Off Elf_Off; \ - typedef typename ELFT::Half Elf_Half; \ - typedef typename ELFT::Word Elf_Word; \ - typedef typename ELFT::Sword Elf_Sword; \ - typedef typename ELFT::Xword Elf_Xword; \ - typedef typename ELFT::Sxword Elf_Sxword; + using Elf_Addr = typename ELFT::Addr; \ + using Elf_Off = typename ELFT::Off; \ + using Elf_Half = typename ELFT::Half; \ + using Elf_Word = typename ELFT::Word; \ + using Elf_Sword = typename ELFT::Sword; \ + using Elf_Xword = typename ELFT::Xword; \ + using Elf_Sxword = typename ELFT::Sxword; #define LLD_ELF_COMMA , #define LLVM_ELF_IMPORT_TYPES(E, W) \ @@ -222,6 +227,7 @@ struct Elf_Sym_Impl : Elf_Sym_Base { uint64_t getValue() const { return st_value; } void setBinding(unsigned char b) { setBindingAndType(b, getType()); } void setType(unsigned char t) { setBindingAndType(getBinding(), t); } + void setBindingAndType(unsigned char b, unsigned char t) { st_info = (b << 4) + (t & 0x0f); } @@ -238,22 +244,29 @@ struct Elf_Sym_Impl : Elf_Sym_Base { } bool isAbsolute() const { return st_shndx == ELF::SHN_ABS; } + bool isCommon() const { return getType() == ELF::STT_COMMON || st_shndx == ELF::SHN_COMMON; } + bool isDefined() const { return !isUndefined(); } + bool isProcessorSpecific() const { return st_shndx >= ELF::SHN_LOPROC && st_shndx <= ELF::SHN_HIPROC; } + bool isOSSpecific() const { return st_shndx >= ELF::SHN_LOOS && st_shndx <= ELF::SHN_HIOS; } + bool isReserved() const { // ELF::SHN_HIRESERVE is 0xffff so st_shndx <= ELF::SHN_HIRESERVE is always // true and some compilers warn about it. return st_shndx >= ELF::SHN_LORESERVE; } + bool isUndefined() const { return st_shndx == ELF::SHN_UNDEF; } + bool isExternal() const { return getBinding() != ELF::STB_LOCAL; } @@ -277,14 +290,12 @@ struct Elf_Versym_Impl { Elf_Half vs_index; // Version index with flags (e.g. VERSYM_HIDDEN) }; -template struct Elf_Verdaux_Impl; - /// Elf_Verdef: This is the structure of entries in the SHT_GNU_verdef section /// (.gnu.version_d). This structure is identical for ELF32 and ELF64. template struct Elf_Verdef_Impl { LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - typedef Elf_Verdaux_Impl Elf_Verdaux; + using Elf_Verdaux = Elf_Verdaux_Impl; Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT) Elf_Half vd_flags; // Bitwise flags (VER_DEF_*) Elf_Half vd_ndx; // Version index, used in .gnu.version entries @@ -361,10 +372,10 @@ template struct Elf_Dyn_Impl : Elf_Dyn_Base { using Elf_Dyn_Base::d_tag; using Elf_Dyn_Base::d_un; - typedef typename std::conditional::type intX_t; - typedef typename std::conditional::type uintX_t; + using intX_t = typename std::conditional::type; + using uintX_t = typename std::conditional::type; intX_t getTag() const { return d_tag; } uintX_t getVal() const { return d_un.d_val; } uintX_t getPtr() const { return d_un.d_ptr; } @@ -430,6 +441,7 @@ struct Elf_Rel_Impl, false> { return (t << 32) | ((t >> 8) & 0xff000000) | ((t >> 24) & 0x00ff0000) | ((t >> 40) & 0x0000ff00) | ((t >> 56) & 0x000000ff); } + void setRInfo(uint64_t R, bool IsMips64EL) { if (IsMips64EL) r_info = (R >> 32) | ((R & 0xff000000) << 8) | ((R & 0x00ff0000) << 24) | @@ -483,15 +495,15 @@ struct Elf_Ehdr_Impl { Elf_Half e_shnum; // Number of entries in the section header table Elf_Half e_shstrndx; // Section header table index of section name // string table + bool checkMagic() const { return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0; } + unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; } unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; } }; -template struct Elf_Phdr_Impl; - template struct Elf_Phdr_Impl> { LLVM_ELF_IMPORT_TYPES(TargetEndianness, false) @@ -582,7 +594,7 @@ struct Elf_Chdr_Impl> { template struct Elf_Mips_RegInfo; -template +template struct Elf_Mips_RegInfo> { LLVM_ELF_IMPORT_TYPES(TargetEndianness, false) Elf_Word ri_gprmask; // bit-mask of used general registers @@ -590,7 +602,7 @@ struct Elf_Mips_RegInfo> { Elf_Addr ri_gp_value; // gp register value }; -template +template struct Elf_Mips_RegInfo> { LLVM_ELF_IMPORT_TYPES(TargetEndianness, true) Elf_Word ri_gprmask; // bit-mask of used general registers @@ -609,7 +621,7 @@ template struct Elf_Mips_Options { Elf_Word info; // Kind-specific information Elf_Mips_RegInfo &getRegInfo() { - assert(kind == llvm::ELF::ODK_REGINFO); + assert(kind == ELF::ODK_REGINFO); return *reinterpret_cast *>( (uint8_t *)this + sizeof(Elf_Mips_Options)); } @@ -637,4 +649,4 @@ template struct Elf_Mips_ABIFlags { } // end namespace object. } // end namespace llvm. -#endif +#endif // LLVM_OBJECT_ELFTYPES_H diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h index 0ea89011e8836f58289e1d2fe7f9dd5e48f3307b..3bce7813ee930baa8298ffc12ee8f5705fd9b4d6 100644 --- a/include/llvm/Object/IRObjectFile.h +++ b/include/llvm/Object/IRObjectFile.h @@ -15,10 +15,12 @@ #define LLVM_OBJECT_IROBJECTFILE_H #include "llvm/ADT/PointerUnion.h" +#include "llvm/Object/IRSymtab.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Object/SymbolicFile.h" namespace llvm { +class BitcodeModule; class Mangler; class Module; class GlobalValue; @@ -61,7 +63,20 @@ public: static Expected> create(MemoryBufferRef Object, LLVMContext &Context); }; + +/// The contents of a bitcode file and its irsymtab. Any underlying data +/// for the irsymtab are owned by Symtab and Strtab. +struct IRSymtabFile { + std::vector Mods; + SmallVector Symtab, Strtab; + irsymtab::Reader TheReader; +}; + +/// Reads a bitcode file, creating its irsymtab if necessary. +Expected readIRSymtab(MemoryBufferRef MBRef); + } + } #endif diff --git a/include/llvm/Object/IRSymtab.h b/include/llvm/Object/IRSymtab.h index 7ce6fa50e6da32ab521f7fac039510b7d1989106..5b832141a8651b16b7ead053321f892abcf35c93 100644 --- a/include/llvm/Object/IRSymtab.h +++ b/include/llvm/Object/IRSymtab.h @@ -25,31 +25,43 @@ #define LLVM_OBJECT_IRSYMTAB_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include +#include +#include namespace llvm { + +struct BitcodeFileContents; + namespace irsymtab { + namespace storage { // The data structures in this namespace define the low-level serialization // format. Clients that just want to read a symbol table should use the // irsymtab::Reader class. -typedef support::ulittle32_t Word; +using Word = support::ulittle32_t; /// A reference to a string in the string table. struct Str { - Word Offset; + Word Offset, Size; + StringRef get(StringRef Strtab) const { - return Strtab.data() + Offset; + return {Strtab.data() + Offset, Size}; } }; /// A reference to a range of objects in the symbol table. template struct Range { Word Offset, Size; + ArrayRef get(StringRef Symtab) const { return {reinterpret_cast(Symtab.data() + Offset), Size}; } @@ -59,6 +71,9 @@ template struct Range { /// table. struct Module { Word Begin, End; + + /// The index of the first Uncommon for this Module. + Word UncBegin; }; /// This is equivalent to an IR comdat. @@ -82,7 +97,8 @@ struct Symbol { Word Flags; enum FlagBits { FB_visibility, // 2 bits - FB_undefined = FB_visibility + 2, + FB_has_uncommon = FB_visibility + 2, + FB_undefined, FB_weak, FB_common, FB_indirect, @@ -92,11 +108,8 @@ struct Symbol { FB_global, FB_format_specific, FB_unnamed_addr, + FB_executable, }; - - /// The index into the Uncommon table, or -1 if this symbol does not have an - /// Uncommon. - Word UncommonIndex; }; /// This data structure contains rarely used symbol fields and is optionally @@ -115,13 +128,13 @@ struct Header { Range Symbols; Range Uncommons; - Str SourceFileName; + Str TargetTriple, SourceFileName; /// COFF-specific: linker directives. Str COFFLinkerOpts; }; -} +} // end namespace storage /// Fills in Symtab and Strtab with a valid symbol and string table for Mods. Error build(ArrayRef Mods, SmallVector &Symtab, @@ -151,26 +164,32 @@ struct Symbol { int getComdatIndex() const { return ComdatIndex; } using S = storage::Symbol; + GlobalValue::VisibilityTypes getVisibility() const { return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); } + bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } bool isWeak() const { return (Flags >> S::FB_weak) & 1; } bool isCommon() const { return (Flags >> S::FB_common) & 1; } bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } bool isUsed() const { return (Flags >> S::FB_used) & 1; } bool isTLS() const { return (Flags >> S::FB_tls) & 1; } + bool canBeOmittedFromSymbolTable() const { return (Flags >> S::FB_may_omit) & 1; } + bool isGlobal() const { return (Flags >> S::FB_global) & 1; } bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } + bool isExecutable() const { return (Flags >> S::FB_executable) & 1; } uint64_t getCommonSize() const { assert(isCommon()); return CommonSize; } + uint32_t getCommonAlignment() const { assert(isCommon()); return CommonAlign; @@ -195,9 +214,11 @@ class Reader { ArrayRef Uncommons; StringRef str(storage::Str S) const { return S.get(Strtab); } + template ArrayRef range(storage::Range R) const { return R.get(Symtab); } + const storage::Header &header() const { return *reinterpret_cast(Symtab.data()); } @@ -213,7 +234,7 @@ public: Uncommons = range(header().Uncommons); } - typedef iterator_range> symbol_range; + using symbol_range = iterator_range>; /// Returns the symbol table for the entire bitcode file. /// The symbols enumerated by this method are ephemeral, but they can be @@ -225,6 +246,8 @@ public: /// copied into an irsymtab::Symbol object. symbol_range module_symbols(unsigned I) const; + StringRef getTargetTriple() const { return str(header().TargetTriple); } + /// Returns the source file path specified at compile time. StringRef getSourceFileName() const { return str(header().SourceFileName); } @@ -245,15 +268,9 @@ public: /// Reader::module_symbols(). class Reader::SymbolRef : public Symbol { const storage::Symbol *SymI, *SymE; + const storage::Uncommon *UncI; const Reader *R; -public: - SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, - const Reader *R) - : SymI(SymI), SymE(SymE), R(R) { - read(); - } - void read() { if (SymI == SymE) return; @@ -263,16 +280,24 @@ public: ComdatIndex = SymI->ComdatIndex; Flags = SymI->Flags; - uint32_t UncI = SymI->UncommonIndex; - if (UncI != -1u) { - const storage::Uncommon &Unc = R->Uncommons[UncI]; - CommonSize = Unc.CommonSize; - CommonAlign = Unc.CommonAlign; - COFFWeakExternFallbackName = R->str(Unc.COFFWeakExternFallbackName); + if (Flags & (1 << storage::Symbol::FB_has_uncommon)) { + CommonSize = UncI->CommonSize; + CommonAlign = UncI->CommonAlign; + COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); } } + +public: + SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, + const storage::Uncommon *UncI, const Reader *R) + : SymI(SymI), SymE(SymE), UncI(UncI), R(R) { + read(); + } + void moveNext() { ++SymI; + if (Flags & (1 << storage::Symbol::FB_has_uncommon)) + ++UncI; read(); } @@ -280,19 +305,29 @@ public: }; inline Reader::symbol_range Reader::symbols() const { - return {SymbolRef(Symbols.begin(), Symbols.end(), this), - SymbolRef(Symbols.end(), Symbols.end(), this)}; + return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this), + SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)}; } inline Reader::symbol_range Reader::module_symbols(unsigned I) const { const storage::Module &M = Modules[I]; const storage::Symbol *MBegin = Symbols.begin() + M.Begin, *MEnd = Symbols.begin() + M.End; - return {SymbolRef(MBegin, MEnd, this), SymbolRef(MEnd, MEnd, this)}; + return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this), + SymbolRef(MEnd, MEnd, nullptr, this)}; } -} +/// The contents of the irsymtab in a bitcode file. Any underlying data for the +/// irsymtab are owned by Symtab and Strtab. +struct FileContents { + SmallVector Symtab, Strtab; + Reader TheReader; +}; -} +/// Reads the contents of a bitcode file, creating its irsymtab if necessary. +Expected readBitcode(const BitcodeFileContents &BFC); + +} // end namespace irsymtab +} // end namespace llvm -#endif +#endif // LLVM_OBJECT_IRSYMTAB_H diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index 1ee571cce738eca551c6fa511e8b806a4b3c6bde..3fc726f4ccb8e4e489aaf6fcedf848abd35faf91 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -16,10 +16,25 @@ #define LLVM_OBJECT_MACHO_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/MachO.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include namespace llvm { namespace object { @@ -28,11 +43,10 @@ namespace object { /// data in code entry in the table in a Mach-O object file. class DiceRef { DataRefImpl DicePimpl; - const ObjectFile *OwningObject; + const ObjectFile *OwningObject = nullptr; public: - DiceRef() : OwningObject(nullptr) { } - + DiceRef() = default; DiceRef(DataRefImpl DiceP, const ObjectFile *Owner); bool operator==(const DiceRef &Other) const; @@ -47,7 +61,7 @@ public: DataRefImpl getRawDataRefImpl() const; const ObjectFile *getObjectFile() const; }; -typedef content_iterator dice_iterator; +using dice_iterator = content_iterator; /// ExportEntry encapsulates the current-state-of-the-walk used when doing a /// non-recursive walk of the trie data structure. This allows you to iterate @@ -71,6 +85,7 @@ public: private: friend class MachOObjectFile; + void moveToFirst(); void moveToEnd(); uint64_t readULEB128(const uint8_t *&p); @@ -80,25 +95,26 @@ private: // Represents a node in the mach-o exports trie. struct NodeState { NodeState(const uint8_t *Ptr); + const uint8_t *Start; const uint8_t *Current; - uint64_t Flags; - uint64_t Address; - uint64_t Other; - const char *ImportName; - unsigned ChildCount; - unsigned NextChildIndex; - unsigned ParentStringLength; - bool IsExportNode; + uint64_t Flags = 0; + uint64_t Address = 0; + uint64_t Other = 0; + const char *ImportName = nullptr; + unsigned ChildCount = 0; + unsigned NextChildIndex = 0; + unsigned ParentStringLength = 0; + bool IsExportNode = false; }; ArrayRef Trie; SmallString<256> CumulativeString; SmallVector Stack; - bool Malformed; - bool Done; + bool Malformed = false; + bool Done = false; }; -typedef content_iterator export_iterator; +using export_iterator = content_iterator; // Segment info so SegIndex/SegOffset pairs in a Mach-O Bind or Rebase entry // can be checked and translated. Only the SegIndex/SegOffset pairs from @@ -106,7 +122,7 @@ typedef content_iterator export_iterator; // address() methods below. class BindRebaseSegInfo { public: - BindRebaseSegInfo(const object::MachOObjectFile *Obj); + BindRebaseSegInfo(const MachOObjectFile *Obj); // Used to check a Mach-O Bind or Rebase entry for errors when iterating. const char *checkSegAndOffset(int32_t SegIndex, uint64_t SegOffset, @@ -130,6 +146,7 @@ private: int32_t SegmentIndex; }; const SectionInfo &findSection(int32_t SegIndex, uint64_t SegOffset); + SmallVector Sections; int32_t MaxSegIndex; }; @@ -159,6 +176,7 @@ public: private: friend class MachOObjectFile; + void moveToFirst(); void moveToEnd(); uint64_t readULEB128(const char **error); @@ -167,15 +185,15 @@ private: const MachOObjectFile *O; ArrayRef Opcodes; const uint8_t *Ptr; - uint64_t SegmentOffset; - int32_t SegmentIndex; - uint64_t RemainingLoopCount; - uint64_t AdvanceAmount; - uint8_t RebaseType; + uint64_t SegmentOffset = 0; + int32_t SegmentIndex = -1; + uint64_t RemainingLoopCount = 0; + uint64_t AdvanceAmount = 0; + uint8_t RebaseType = 0; uint8_t PointerSize; - bool Done; + bool Done = false; }; -typedef content_iterator rebase_iterator; +using rebase_iterator = content_iterator; /// MachOBindEntry encapsulates the current state in the decompression of /// binding opcodes. This allows you to iterate through the compressed table of @@ -209,6 +227,7 @@ public: private: friend class MachOObjectFile; + void moveToFirst(); void moveToEnd(); uint64_t readULEB128(const char **error); @@ -218,21 +237,21 @@ private: const MachOObjectFile *O; ArrayRef Opcodes; const uint8_t *Ptr; - uint64_t SegmentOffset; - int32_t SegmentIndex; + uint64_t SegmentOffset = 0; + int32_t SegmentIndex = -1; StringRef SymbolName; - bool LibraryOrdinalSet; - int Ordinal; - uint32_t Flags; - int64_t Addend; - uint64_t RemainingLoopCount; - uint64_t AdvanceAmount; - uint8_t BindType; + bool LibraryOrdinalSet = false; + int Ordinal = 0; + uint32_t Flags = 0; + int64_t Addend = 0; + uint64_t RemainingLoopCount = 0; + uint64_t AdvanceAmount = 0; + uint8_t BindType = 0; uint8_t PointerSize; Kind TableKind; - bool Done; + bool Done = false; }; -typedef content_iterator bind_iterator; +using bind_iterator = content_iterator; class MachOObjectFile : public ObjectFile { public: @@ -240,8 +259,8 @@ public: const char *Ptr; // Where in memory the load command is. MachO::load_command C; // The command itself. }; - typedef SmallVector LoadCommandList; - typedef LoadCommandList::const_iterator load_command_iterator; + using LoadCommandList = SmallVector; + using load_command_iterator = LoadCommandList::const_iterator; static Expected> create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, @@ -271,6 +290,7 @@ public: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -530,6 +550,8 @@ public: bool isRelocatableObject() const override; + StringRef mapDebugSectionName(StringRef Name) const override; + bool hasPageZeroSegment() const { return HasPageZeroSegment; } static bool classof(const Binary *v) { @@ -563,7 +585,7 @@ public: case MachO::PLATFORM_BRIDGEOS: return "bridgeos"; default: std::string ret; - llvm::raw_string_ostream ss(ret); + raw_string_ostream ss(ret); ss << format_hex(platform, 8, true); return ss.str(); } @@ -576,7 +598,7 @@ public: case MachO::TOOL_LD: return "ld"; default: std::string ret; - llvm::raw_string_ostream ss(ret); + raw_string_ostream ss(ret); ss << format_hex(tools, 8, true); return ss.str(); } @@ -595,7 +617,6 @@ public: } private: - MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, Error &Err, uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0); @@ -606,23 +627,23 @@ private: MachO::mach_header_64 Header64; MachO::mach_header Header; }; - typedef SmallVector SectionList; + using SectionList = SmallVector; SectionList Sections; - typedef SmallVector LibraryList; + using LibraryList = SmallVector; LibraryList Libraries; LoadCommandList LoadCommands; - typedef SmallVector LibraryShortName; + using LibraryShortName = SmallVector; using BuildToolList = SmallVector; BuildToolList BuildTools; mutable LibraryShortName LibrariesShortNames; std::unique_ptr BindRebaseSectionTable; - const char *SymtabLoadCmd; - const char *DysymtabLoadCmd; - const char *DataInCodeLoadCmd; - const char *LinkOptHintsLoadCmd; - const char *DyldInfoLoadCmd; - const char *UuidLoadCmd; - bool HasPageZeroSegment; + const char *SymtabLoadCmd = nullptr; + const char *DysymtabLoadCmd = nullptr; + const char *DataInCodeLoadCmd = nullptr; + const char *LinkOptHintsLoadCmd = nullptr; + const char *DyldInfoLoadCmd = nullptr; + const char *UuidLoadCmd = nullptr; + bool HasPageZeroSegment = false; }; /// DiceRef @@ -679,7 +700,7 @@ inline const ObjectFile *DiceRef::getObjectFile() const { return OwningObject; } -} -} +} // end namespace object +} // end namespace llvm -#endif +#endif // LLVM_OBJECT_MACHO_H diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h index a14c4ca0122376b92420118070efc76f56740a4e..8a6f0fc56971533ed5fed355c1d06bbc677df07f 100644 --- a/include/llvm/Object/MachOUniversal.h +++ b/include/llvm/Object/MachOUniversal.h @@ -16,10 +16,10 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Binary.h" #include "llvm/Object/MachO.h" -#include "llvm/Support/MachO.h" namespace llvm { class StringRef; diff --git a/include/llvm/Object/ModuleSummaryIndexObjectFile.h b/include/llvm/Object/ModuleSummaryIndexObjectFile.h deleted file mode 100644 index 713022264ea7ad08ef5aa2d1fb1126ad176dba86..0000000000000000000000000000000000000000 --- a/include/llvm/Object/ModuleSummaryIndexObjectFile.h +++ /dev/null @@ -1,99 +0,0 @@ -//===- ModuleSummaryIndexObjectFile.h - Summary index file implementation -=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the ModuleSummaryIndexObjectFile template class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H -#define LLVM_OBJECT_MODULESUMMARYINDEXOBJECTFILE_H - -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/Object/SymbolicFile.h" - -namespace llvm { -class ModuleSummaryIndex; -class Module; - -namespace object { -class ObjectFile; - -/// This class is used to read just the module summary index related -/// sections out of the given object (which may contain a single module's -/// bitcode or be a combined index bitcode file). It builds a ModuleSummaryIndex -/// object. -class ModuleSummaryIndexObjectFile : public SymbolicFile { - std::unique_ptr Index; - -public: - ModuleSummaryIndexObjectFile(MemoryBufferRef Object, - std::unique_ptr I); - ~ModuleSummaryIndexObjectFile() override; - - // TODO: Walk through GlobalValueMap entries for symbols. - // However, currently these interfaces are not used by any consumers. - void moveSymbolNext(DataRefImpl &Symb) const override { - llvm_unreachable("not implemented"); - } - std::error_code printSymbolName(raw_ostream &OS, - DataRefImpl Symb) const override { - llvm_unreachable("not implemented"); - return std::error_code(); - } - uint32_t getSymbolFlags(DataRefImpl Symb) const override { - llvm_unreachable("not implemented"); - return 0; - } - basic_symbol_iterator symbol_begin() const override { - llvm_unreachable("not implemented"); - return basic_symbol_iterator(BasicSymbolRef()); - } - basic_symbol_iterator symbol_end() const override { - llvm_unreachable("not implemented"); - return basic_symbol_iterator(BasicSymbolRef()); - } - - const ModuleSummaryIndex &getIndex() const { - return const_cast(this)->getIndex(); - } - ModuleSummaryIndex &getIndex() { return *Index; } - std::unique_ptr takeIndex(); - - static inline bool classof(const Binary *v) { - return v->isModuleSummaryIndex(); - } - - /// \brief Finds and returns bitcode embedded in the given object file, or an - /// error code if not found. - static ErrorOr findBitcodeInObject(const ObjectFile &Obj); - - /// \brief Finds and returns bitcode in the given memory buffer (which may - /// be either a bitcode file or a native object file with embedded bitcode), - /// or an error code if not found. - static ErrorOr - findBitcodeInMemBuffer(MemoryBufferRef Object); - - /// \brief Parse module summary index in the given memory buffer. - /// Return new ModuleSummaryIndexObjectFile instance containing parsed module - /// summary/index. - static Expected> - create(MemoryBufferRef Object); -}; -} - -/// Parse the module summary index out of an IR file and return the module -/// summary index object if found, or nullptr if not. If Identifier is -/// non-empty, it is used as the module ID (module path) in the resulting -/// index. This can be used when the index is being read from a file -/// containing minimized bitcode just for the thin link. -Expected> -getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier = ""); -} - -#endif diff --git a/include/llvm/Object/ModuleSymbolTable.h b/include/llvm/Object/ModuleSymbolTable.h index 333301d5b456c02366044a7d0797d06e8f6172eb..9e9322885388490fa97499a57e8d5d50256dcece 100644 --- a/include/llvm/Object/ModuleSymbolTable.h +++ b/include/llvm/Object/ModuleSymbolTable.h @@ -1,4 +1,4 @@ -//===- ModuleSymbolTable.h - symbol table for in-memory IR ----------------===// +//===- ModuleSymbolTable.h - symbol table for in-memory IR ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,22 +16,24 @@ #ifndef LLVM_OBJECT_MODULESYMBOLTABLE_H #define LLVM_OBJECT_MODULESYMBOLTABLE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/PointerUnion.h" -#include "llvm/ADT/Triple.h" #include "llvm/IR/Mangler.h" #include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Allocator.h" +#include #include #include +#include namespace llvm { class GlobalValue; -class RecordStreamer; class ModuleSymbolTable { public: - typedef std::pair AsmSymbol; - typedef PointerUnion Symbol; + using AsmSymbol = std::pair; + using Symbol = PointerUnion; private: Module *FirstMod = nullptr; @@ -57,6 +59,6 @@ public: function_ref AsmSymbol); }; -} +} // end namespace llvm -#endif +#endif // LLVM_OBJECT_MODULESYMBOLTABLE_H diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index b689dc2ac03ac3a2830e97039342fabdcbaad85a..6b5b9d95fcf3760d9726fc9b48264de78155f087 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -15,38 +15,46 @@ #define LLVM_OBJECT_OBJECTFILE_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" #include "llvm/Object/SymbolicFile.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include +#include +#include +#include +#include namespace llvm { + class ARMAttributeParser; namespace object { -class ObjectFile; class COFFObjectFile; class MachOObjectFile; -class WasmObjectFile; - +class ObjectFile; +class SectionRef; class SymbolRef; class symbol_iterator; -class SectionRef; -typedef content_iterator section_iterator; +class WasmObjectFile; + +using section_iterator = content_iterator; /// This is a value type class that represents a single relocation in the list /// of relocations in the object file. class RelocationRef { DataRefImpl RelocationPimpl; - const ObjectFile *OwningObject; + const ObjectFile *OwningObject = nullptr; public: - RelocationRef() : OwningObject(nullptr) { } - + RelocationRef() = default; RelocationRef(DataRefImpl RelocationP, const ObjectFile *Owner); bool operator==(const RelocationRef &Other) const; @@ -65,18 +73,19 @@ public: DataRefImpl getRawDataRefImpl() const; const ObjectFile *getObject() const; }; -typedef content_iterator relocation_iterator; + +using relocation_iterator = content_iterator; /// This is a value type class that represents a single section in the list of /// sections in the object file. class SectionRef { friend class SymbolRef; + DataRefImpl SectionPimpl; - const ObjectFile *OwningObject; + const ObjectFile *OwningObject = nullptr; public: - SectionRef() : OwningObject(nullptr) { } - + SectionRef() = default; SectionRef(DataRefImpl SectionP, const ObjectFile *Owner); bool operator==(const SectionRef &Other) const; @@ -87,6 +96,7 @@ public: std::error_code getName(StringRef &Result) const; uint64_t getAddress() const; + uint64_t getIndex() const; uint64_t getSize() const; std::error_code getContents(StringRef &Result) const; @@ -119,8 +129,6 @@ class SymbolRef : public BasicSymbolRef { friend class SectionRef; public: - SymbolRef() : BasicSymbolRef() {} - enum Type { ST_Unknown, // Type not specified ST_Data, @@ -130,6 +138,7 @@ public: ST_Other }; + SymbolRef() = default; SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner); SymbolRef(const BasicSymbolRef &B) : BasicSymbolRef(B) { assert(isa(BasicSymbolRef::getObject())); @@ -179,8 +188,6 @@ public: /// to create. class ObjectFile : public SymbolicFile { virtual void anchor(); - ObjectFile() = delete; - ObjectFile(const ObjectFile &other) = delete; protected: ObjectFile(unsigned int Type, MemoryBufferRef Source); @@ -198,6 +205,7 @@ protected: // Implementations assume that the DataRefImpl is valid and has not been // modified externally. It's UB otherwise. friend class SymbolRef; + virtual Expected getSymbolName(DataRefImpl Symb) const = 0; std::error_code printSymbolName(raw_ostream &OS, DataRefImpl Symb) const override; @@ -211,10 +219,12 @@ protected: // Same as above for SectionRef. friend class SectionRef; + virtual void moveSectionNext(DataRefImpl &Sec) const = 0; virtual std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const = 0; virtual uint64_t getSectionAddress(DataRefImpl Sec) const = 0; + virtual uint64_t getSectionIndex(DataRefImpl Sec) const = 0; virtual uint64_t getSectionSize(DataRefImpl Sec) const = 0; virtual std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const = 0; @@ -242,12 +252,15 @@ protected: uint64_t getSymbolValue(DataRefImpl Symb) const; public: + ObjectFile() = delete; + ObjectFile(const ObjectFile &other) = delete; + uint64_t getCommonSymbolSize(DataRefImpl Symb) const { assert(getSymbolFlags(Symb) & SymbolRef::SF_Common); return getCommonSymbolSizeImpl(Symb); } - typedef iterator_range symbol_iterator_range; + using symbol_iterator_range = iterator_range; symbol_iterator_range symbols() const { return symbol_iterator_range(symbol_begin(), symbol_end()); } @@ -255,7 +268,7 @@ public: virtual section_iterator section_begin() const = 0; virtual section_iterator section_end() const = 0; - typedef iterator_range section_iterator_range; + using section_iterator_range = iterator_range; section_iterator_range sections() const { return section_iterator_range(section_begin(), section_end()); } @@ -280,6 +293,9 @@ public: return std::error_code(); } + /// Maps a debug section name to a standard DWARF section name. + virtual StringRef mapDebugSectionName(StringRef Name) const { return Name; } + /// True if this is a relocatable object (.o/.obj). virtual bool isRelocatableObject() const = 0; @@ -291,13 +307,12 @@ public: createObjectFile(StringRef ObjectPath); static Expected> - createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type); + createObjectFile(MemoryBufferRef Object, llvm::file_magic Type); static Expected> createObjectFile(MemoryBufferRef Object) { - return createObjectFile(Object, sys::fs::file_magic::unknown); + return createObjectFile(Object, llvm::file_magic::unknown); } - static inline bool classof(const Binary *v) { return v->isObject(); } @@ -354,7 +369,6 @@ inline const ObjectFile *SymbolRef::getObject() const { return cast(O); } - /// SectionRef inline SectionRef::SectionRef(DataRefImpl SectionP, const ObjectFile *Owner) @@ -385,6 +399,10 @@ inline uint64_t SectionRef::getAddress() const { return OwningObject->getSectionAddress(SectionPimpl); } +inline uint64_t SectionRef::getIndex() const { + return OwningObject->getSectionIndex(SectionPimpl); +} + inline uint64_t SectionRef::getSize() const { return OwningObject->getSectionSize(SectionPimpl); } @@ -479,8 +497,8 @@ inline const ObjectFile *RelocationRef::getObject() const { return OwningObject; } - } // end namespace object + } // end namespace llvm -#endif +#endif // LLVM_OBJECT_OBJECTFILE_H diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h index 3a0a62d9283b3b3fb049b1de6c29d4aac05914cd..c358d399643525e3f2fd819cca9495574c7267b8 100644 --- a/include/llvm/Object/RelocVisitor.h +++ b/include/llvm/Object/RelocVisitor.h @@ -1,4 +1,4 @@ -//===-- RelocVisitor.h - Visitor for object file relocations -*- C++ -*-===// +//===- RelocVisitor.h - Visitor for object file relocations -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,264 +16,102 @@ #ifndef LLVM_OBJECT_RELOCVISITOR_H #define LLVM_OBJECT_RELOCVISITOR_H +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include +#include namespace llvm { namespace object { -struct RelocToApply { - // The computed value after applying the relevant relocations. - int64_t Value; - - // The width of the value; how many bytes to touch when applying the - // relocation. - char Width; - RelocToApply(int64_t Value, char Width) : Value(Value), Width(Width) {} - RelocToApply() : Value(0), Width(0) {} -}; - /// @brief Base class for object file relocation visitors. class RelocVisitor { public: - explicit RelocVisitor(const ObjectFile &Obj) - : ObjToVisit(Obj), HasError(false) {} + explicit RelocVisitor(const ObjectFile &Obj) : ObjToVisit(Obj) {} // TODO: Should handle multiple applied relocations via either passing in the // previously computed value or just count paired relocations as a single // visit. - RelocToApply visit(uint32_t RelocType, RelocationRef R, uint64_t Value = 0) { + uint64_t visit(uint32_t Rel, RelocationRef R, uint64_t Value = 0) { if (isa(ObjToVisit)) - return visitELF(RelocType, R, Value); + return visitELF(Rel, R, Value); if (isa(ObjToVisit)) - return visitCOFF(RelocType, R, Value); + return visitCOFF(Rel, R, Value); if (isa(ObjToVisit)) - return visitMachO(RelocType, R, Value); + return visitMachO(Rel, R, Value); HasError = true; - return RelocToApply(); + return 0; } bool error() { return HasError; } private: const ObjectFile &ObjToVisit; - bool HasError; + bool HasError = false; - RelocToApply visitELF(uint32_t RelocType, RelocationRef R, uint64_t Value) { + uint64_t visitELF(uint32_t Rel, RelocationRef R, uint64_t Value) { if (ObjToVisit.getBytesInAddress() == 8) { // 64-bit object file switch (ObjToVisit.getArch()) { case Triple::x86_64: - switch (RelocType) { - case llvm::ELF::R_X86_64_NONE: - return visitELF_X86_64_NONE(R); - case llvm::ELF::R_X86_64_64: - return visitELF_X86_64_64(R, Value); - case llvm::ELF::R_X86_64_PC32: - return visitELF_X86_64_PC32(R, Value); - case llvm::ELF::R_X86_64_32: - return visitELF_X86_64_32(R, Value); - case llvm::ELF::R_X86_64_32S: - return visitELF_X86_64_32S(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitX86_64(Rel, R, Value); case Triple::aarch64: case Triple::aarch64_be: - switch (RelocType) { - case llvm::ELF::R_AARCH64_ABS32: - return visitELF_AARCH64_ABS32(R, Value); - case llvm::ELF::R_AARCH64_ABS64: - return visitELF_AARCH64_ABS64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitAarch64(Rel, R, Value); case Triple::bpfel: case Triple::bpfeb: - switch (RelocType) { - case llvm::ELF::R_BPF_64_64: - return visitELF_BPF_64_64(R, Value); - case llvm::ELF::R_BPF_64_32: - return visitELF_BPF_64_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitBpf(Rel, R, Value); case Triple::mips64el: case Triple::mips64: - switch (RelocType) { - case llvm::ELF::R_MIPS_32: - return visitELF_MIPS64_32(R, Value); - case llvm::ELF::R_MIPS_64: - return visitELF_MIPS64_64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitMips64(Rel, R, Value); case Triple::ppc64le: case Triple::ppc64: - switch (RelocType) { - case llvm::ELF::R_PPC64_ADDR32: - return visitELF_PPC64_ADDR32(R, Value); - case llvm::ELF::R_PPC64_ADDR64: - return visitELF_PPC64_ADDR64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitPPC64(Rel, R, Value); case Triple::systemz: - switch (RelocType) { - case llvm::ELF::R_390_32: - return visitELF_390_32(R, Value); - case llvm::ELF::R_390_64: - return visitELF_390_64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitSystemz(Rel, R, Value); case Triple::sparcv9: - switch (RelocType) { - case llvm::ELF::R_SPARC_32: - case llvm::ELF::R_SPARC_UA32: - return visitELF_SPARCV9_32(R, Value); - case llvm::ELF::R_SPARC_64: - case llvm::ELF::R_SPARC_UA64: - return visitELF_SPARCV9_64(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitSparc64(Rel, R, Value); case Triple::amdgcn: - switch (RelocType) { - case llvm::ELF::R_AMDGPU_ABS32: - return visitELF_AMDGPU_ABS32(R, Value); - case llvm::ELF::R_AMDGPU_ABS64: - return visitELF_AMDGPU_ABS64(R, Value); - default: - HasError = true; - return RelocToApply(); - } - default: - HasError = true; - return RelocToApply(); - } - } else if (ObjToVisit.getBytesInAddress() == 4) { // 32-bit object file - switch (ObjToVisit.getArch()) { - case Triple::x86: - switch (RelocType) { - case llvm::ELF::R_386_NONE: - return visitELF_386_NONE(R); - case llvm::ELF::R_386_32: - return visitELF_386_32(R, Value); - case llvm::ELF::R_386_PC32: - return visitELF_386_PC32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::ppc: - switch (RelocType) { - case llvm::ELF::R_PPC_ADDR32: - return visitELF_PPC_ADDR32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::arm: - case Triple::armeb: - switch (RelocType) { - default: - HasError = true; - return RelocToApply(); - case llvm::ELF::R_ARM_ABS32: - return visitELF_ARM_ABS32(R, Value); - } - case Triple::lanai: - switch (RelocType) { - case llvm::ELF::R_LANAI_32: - return visitELF_Lanai_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::mipsel: - case Triple::mips: - switch (RelocType) { - case llvm::ELF::R_MIPS_32: - return visitELF_MIPS_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::sparc: - switch (RelocType) { - case llvm::ELF::R_SPARC_32: - case llvm::ELF::R_SPARC_UA32: - return visitELF_SPARC_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } - case Triple::hexagon: - switch (RelocType) { - case llvm::ELF::R_HEX_32: - return visitELF_HEX_32(R, Value); - default: - HasError = true; - return RelocToApply(); - } + return visitAmdgpu(Rel, R, Value); default: HasError = true; - return RelocToApply(); + return 0; } - } else { - report_fatal_error("Invalid word size in object file"); } - } - RelocToApply visitCOFF(uint32_t RelocType, RelocationRef R, uint64_t Value) { - switch (ObjToVisit.getArch()) { - case Triple::x86: - switch (RelocType) { - case COFF::IMAGE_REL_I386_SECREL: - return visitCOFF_I386_SECREL(R, Value); - case COFF::IMAGE_REL_I386_DIR32: - return visitCOFF_I386_DIR32(R, Value); - } - break; - case Triple::x86_64: - switch (RelocType) { - case COFF::IMAGE_REL_AMD64_SECREL: - return visitCOFF_AMD64_SECREL(R, Value); - case COFF::IMAGE_REL_AMD64_ADDR64: - return visitCOFF_AMD64_ADDR64(R, Value); - } - break; - } - HasError = true; - return RelocToApply(); - } + // 32-bit object file + assert(ObjToVisit.getBytesInAddress() == 4 && + "Invalid word size in object file"); - RelocToApply visitMachO(uint32_t RelocType, RelocationRef R, uint64_t Value) { switch (ObjToVisit.getArch()) { - default: break; - case Triple::x86_64: - switch (RelocType) { - default: break; - case MachO::X86_64_RELOC_UNSIGNED: - return visitMACHO_X86_64_UNSIGNED(R, Value); - } + case Triple::x86: + return visitX86(Rel, R, Value); + case Triple::ppc: + return visitPPC32(Rel, R, Value); + case Triple::arm: + case Triple::armeb: + return visitARM(Rel, R, Value); + case Triple::lanai: + return visitLanai(Rel, R, Value); + case Triple::mipsel: + case Triple::mips: + return visitMips32(Rel, R, Value); + case Triple::sparc: + return visitSparc32(Rel, R, Value); + case Triple::hexagon: + return visitHexagon(Rel, R, Value); + default: + HasError = true; + return 0; } - HasError = true; - return RelocToApply(); } int64_t getELFAddend(RelocationRef R) { @@ -283,206 +121,197 @@ private: return *AddendOrErr; } - uint8_t getLengthMachO64(RelocationRef R) { - const MachOObjectFile *Obj = cast(R.getObject()); - return Obj->getRelocationLength(R.getRawDataRefImpl()); - } - - /// Operations - - /// 386-ELF - RelocToApply visitELF_386_NONE(RelocationRef R) { - return RelocToApply(0, 0); - } - - // Ideally the Addend here will be the addend in the data for - // the relocation. It's not actually the case for Rel relocations. - RelocToApply visitELF_386_32(RelocationRef R, uint64_t Value) { - return RelocToApply(Value, 4); - } - - RelocToApply visitELF_386_PC32(RelocationRef R, uint64_t Value) { - uint64_t Address = R.getOffset(); - return RelocToApply(Value - Address, 4); - } - - /// X86-64 ELF - RelocToApply visitELF_X86_64_NONE(RelocationRef R) { - return RelocToApply(0, 0); - } - RelocToApply visitELF_X86_64_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); - } - RelocToApply visitELF_X86_64_PC32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint64_t Address = R.getOffset(); - return RelocToApply(Value + Addend - Address, 4); - } - RelocToApply visitELF_X86_64_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - RelocToApply visitELF_X86_64_32S(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// BPF ELF - RelocToApply visitELF_BPF_64_32(RelocationRef R, uint64_t Value) { - uint32_t Res = Value & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - RelocToApply visitELF_BPF_64_64(RelocationRef R, uint64_t Value) { - return RelocToApply(Value, 8); - } - - /// PPC64 ELF - RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - RelocToApply visitELF_PPC64_ADDR64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); - } - - /// PPC32 ELF - RelocToApply visitELF_PPC_ADDR32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// Lanai ELF - RelocToApply visitELF_Lanai_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// MIPS ELF - RelocToApply visitELF_MIPS_32(RelocationRef R, uint64_t Value) { - uint32_t Res = Value & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - /// MIPS64 ELF - RelocToApply visitELF_MIPS64_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint32_t Res = (Value + Addend) & 0xFFFFFFFF; - return RelocToApply(Res, 4); - } - - RelocToApply visitELF_MIPS64_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - uint64_t Res = (Value + Addend); - return RelocToApply(Res, 8); + uint64_t visitX86_64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_X86_64_NONE: + return 0; + case ELF::R_X86_64_64: + return Value + getELFAddend(R); + case ELF::R_X86_64_PC32: + return Value + getELFAddend(R) - R.getOffset(); + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + } + HasError = true; + return 0; } - // AArch64 ELF - RelocToApply visitELF_AARCH64_ABS32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int64_t Res = Value + Addend; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return RelocToApply(static_cast(Res), 4); + uint64_t visitAarch64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_AARCH64_ABS32: { + int64_t Res = Value + getELFAddend(R); + if (Res < INT32_MIN || Res > UINT32_MAX) + HasError = true; + return static_cast(Res); + } + case ELF::R_AARCH64_ABS64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_AARCH64_ABS64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitBpf(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_BPF_64_32: + return Value & 0xFFFFFFFF; + case ELF::R_BPF_64_64: + return Value; + } + HasError = true; + return 0; } - // SystemZ ELF - RelocToApply visitELF_390_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int64_t Res = Value + Addend; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return RelocToApply(static_cast(Res), 4); + uint64_t visitMips64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_MIPS_32: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + case ELF::R_MIPS_64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_390_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitPPC64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_PPC64_ADDR32: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + case ELF::R_PPC64_ADDR64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_SPARC_32(RelocationRef R, uint32_t Value) { - int32_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitSystemz(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_390_32: { + int64_t Res = Value + getELFAddend(R); + if (Res < INT32_MIN || Res > UINT32_MAX) + HasError = true; + return static_cast(Res); + } + case ELF::R_390_64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_SPARCV9_32(RelocationRef R, uint64_t Value) { - int32_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitSparc64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_SPARC_32: + case ELF::R_SPARC_64: + case ELF::R_SPARC_UA32: + case ELF::R_SPARC_UA64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_SPARCV9_64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitAmdgpu(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_AMDGPU_ABS32: + case ELF::R_AMDGPU_ABS64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - RelocToApply visitELF_ARM_ABS32(RelocationRef R, uint64_t Value) { - int64_t Res = Value; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return RelocToApply(static_cast(Res), 4); + uint64_t visitX86(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_386_NONE: + return 0; + case ELF::R_386_32: + return Value; + case ELF::R_386_PC32: + return Value - R.getOffset(); + } + HasError = true; + return 0; } - RelocToApply visitELF_HEX_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitPPC32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_PPC_ADDR32) + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + HasError = true; + return 0; } - RelocToApply visitELF_AMDGPU_ABS32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 4); + uint64_t visitARM(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_ARM_ABS32) { + if ((int64_t)Value < INT32_MIN || (int64_t)Value > UINT32_MAX) + HasError = true; + return static_cast(Value); + } + HasError = true; + return 0; } - RelocToApply visitELF_AMDGPU_ABS64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return RelocToApply(Value + Addend, 8); + uint64_t visitLanai(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_LANAI_32) + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + HasError = true; + return 0; } - /// I386 COFF - RelocToApply visitCOFF_I386_SECREL(RelocationRef R, uint64_t Value) { - return RelocToApply(static_cast(Value), /*Width=*/4); + uint64_t visitMips32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_MIPS_32) + return Value & 0xFFFFFFFF; + HasError = true; + return 0; } - RelocToApply visitCOFF_I386_DIR32(RelocationRef R, uint64_t Value) { - return RelocToApply(static_cast(Value), /*Width=*/4); + uint64_t visitSparc32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32) + return Value + getELFAddend(R); + HasError = true; + return 0; } - /// AMD64 COFF - RelocToApply visitCOFF_AMD64_SECREL(RelocationRef R, uint64_t Value) { - return RelocToApply(static_cast(Value), /*Width=*/4); + uint64_t visitHexagon(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_HEX_32) + return Value + getELFAddend(R); + HasError = true; + return 0; } - RelocToApply visitCOFF_AMD64_ADDR64(RelocationRef R, uint64_t Value) { - return RelocToApply(Value, /*Width=*/8); + uint64_t visitCOFF(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (ObjToVisit.getArch()) { + case Triple::x86: + switch (Rel) { + case COFF::IMAGE_REL_I386_SECREL: + case COFF::IMAGE_REL_I386_DIR32: + return static_cast(Value); + } + break; + case Triple::x86_64: + switch (Rel) { + case COFF::IMAGE_REL_AMD64_SECREL: + return static_cast(Value); + case COFF::IMAGE_REL_AMD64_ADDR64: + return Value; + } + break; + } + HasError = true; + return 0; } - // X86_64 MachO - RelocToApply visitMACHO_X86_64_UNSIGNED(RelocationRef R, uint64_t Value) { - uint8_t Length = getLengthMachO64(R); - Length = 1< +#include +#include #include namespace llvm { @@ -19,12 +23,11 @@ namespace llvm { template class StackMapV2Parser { public: - template class AccessorIterator { public: - AccessorIterator(AccessorT A) : A(A) {} + AccessorIterator& operator++() { A = A.next(); return *this; } AccessorIterator operator++(int) { auto tmp = *this; @@ -48,8 +51,8 @@ public: /// Accessor for function records. class FunctionAccessor { friend class StackMapV2Parser; - public: + public: /// Get the function address. uint64_t getFunctionAddress() const { return read(P); @@ -80,13 +83,12 @@ public: /// Accessor for constants. class ConstantAccessor { friend class StackMapV2Parser; - public: + public: /// Return the value of this constant. uint64_t getValue() const { return read(P); } private: - ConstantAccessor(const uint8_t *P) : P(P) {} const static int ConstantAccessorSize = sizeof(uint64_t); @@ -98,20 +100,16 @@ public: const uint8_t *P; }; - // Forward-declare RecordAccessor so we can friend it below. - class RecordAccessor; - enum class LocationKind : uint8_t { Register = 1, Direct = 2, Indirect = 3, Constant = 4, ConstantIndex = 5 }; - /// Accessor for location records. class LocationAccessor { friend class StackMapV2Parser; friend class RecordAccessor; - public: + public: /// Get the Kind for this location. LocationKind getKind() const { return LocationKind(P[KindOffset]); @@ -144,7 +142,6 @@ public: } private: - LocationAccessor(const uint8_t *P) : P(P) {} LocationAccessor next() const { @@ -163,8 +160,8 @@ public: class LiveOutAccessor { friend class StackMapV2Parser; friend class RecordAccessor; - public: + public: /// Get the Dwarf register number for this live-out. uint16_t getDwarfRegNum() const { return read(P + DwarfRegNumOffset); @@ -176,7 +173,6 @@ public: } private: - LiveOutAccessor(const uint8_t *P) : P(P) {} LiveOutAccessor next() const { @@ -194,10 +190,10 @@ public: /// Accessor for stackmap records. class RecordAccessor { friend class StackMapV2Parser; - public: - typedef AccessorIterator location_iterator; - typedef AccessorIterator liveout_iterator; + public: + using location_iterator = AccessorIterator; + using liveout_iterator = AccessorIterator; /// Get the patchpoint/stackmap ID for this record. uint64_t getID() const { @@ -254,7 +250,6 @@ public: return liveout_iterator(getLiveOut(0)); } - /// End iterator for live-outs. liveout_iterator liveouts_end() const { return liveout_iterator(getLiveOut(getNumLiveOuts())); @@ -266,7 +261,6 @@ public: } private: - RecordAccessor(const uint8_t *P) : P(P) {} unsigned getNumLiveOutsOffset() const { @@ -316,9 +310,9 @@ public: } } - typedef AccessorIterator function_iterator; - typedef AccessorIterator constant_iterator; - typedef AccessorIterator record_iterator; + using function_iterator = AccessorIterator; + using constant_iterator = AccessorIterator; + using record_iterator = AccessorIterator; /// Get the version number of this stackmap. (Always returns 2). unsigned getVersion() const { return 2; } @@ -413,7 +407,6 @@ public: } private: - template static T read(const uint8_t *P) { return support::endian::read(P); @@ -441,6 +434,6 @@ private: std::vector StackMapRecordOffsets; }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_STACKMAPPARSER_H diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h index ef0f96f7834abd11085196ef0e3a19ccdee9120c..97eeba6611a2ac67c641d6a4a1bd362a59bda1aa 100644 --- a/include/llvm/Object/SymbolicFile.h +++ b/include/llvm/Object/SymbolicFile.h @@ -14,10 +14,20 @@ #ifndef LLVM_OBJECT_SYMBOLICFILE_H #define LLVM_OBJECT_SYMBOLICFILE_H +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Binary.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" #include -#include +#include +#include +#include +#include +#include namespace llvm { namespace object { @@ -29,6 +39,7 @@ union DataRefImpl { uint32_t a, b; } d; uintptr_t p; + DataRefImpl() { std::memset(this, 0, sizeof(DataRefImpl)); } }; @@ -87,7 +98,7 @@ class SymbolicFile; /// symbols in the object file. class BasicSymbolRef { DataRefImpl SymbolPimpl; - const SymbolicFile *OwningObject; + const SymbolicFile *OwningObject = nullptr; public: enum Flags : unsigned { @@ -108,7 +119,7 @@ public: // (IR only) }; - BasicSymbolRef() : OwningObject(nullptr) { } + BasicSymbolRef() = default; BasicSymbolRef(DataRefImpl SymbolP, const SymbolicFile *Owner); bool operator==(const BasicSymbolRef &Other) const; @@ -125,12 +136,12 @@ public: const SymbolicFile *getObject() const; }; -typedef content_iterator basic_symbol_iterator; +using basic_symbol_iterator = content_iterator; class SymbolicFile : public Binary { public: - ~SymbolicFile() override; SymbolicFile(unsigned int Type, MemoryBufferRef Source); + ~SymbolicFile() override; // virtual interface. virtual void moveSymbolNext(DataRefImpl &Symb) const = 0; @@ -145,19 +156,19 @@ public: virtual basic_symbol_iterator symbol_end() const = 0; // convenience wrappers. - typedef iterator_range basic_symbol_iterator_range; + using basic_symbol_iterator_range = iterator_range; basic_symbol_iterator_range symbols() const { return basic_symbol_iterator_range(symbol_begin(), symbol_end()); } // construction aux. static Expected> - createSymbolicFile(MemoryBufferRef Object, sys::fs::file_magic Type, + createSymbolicFile(MemoryBufferRef Object, llvm::file_magic Type, LLVMContext *Context); static Expected> createSymbolicFile(MemoryBufferRef Object) { - return createSymbolicFile(Object, sys::fs::file_magic::unknown, nullptr); + return createSymbolicFile(Object, llvm::file_magic::unknown, nullptr); } static Expected> createSymbolicFile(StringRef ObjectPath); @@ -199,7 +210,7 @@ inline const SymbolicFile *BasicSymbolRef::getObject() const { return OwningObject; } -} -} +} // end namespace object +} // end namespace llvm -#endif +#endif // LLVM_OBJECT_SYMBOLICFILE_H diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h index 4833db0f2e50471a2674d81f1e2cf3e4f690a514..10edc461b9e9c8616c5eb35ae045bdb9713d14af 100644 --- a/include/llvm/Object/Wasm.h +++ b/include/llvm/Object/Wasm.h @@ -17,11 +17,13 @@ #ifndef LLVM_OBJECT_WASM_H #define LLVM_OBJECT_WASM_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Wasm.h" #include #include #include @@ -39,18 +41,22 @@ public: DEBUG_FUNCTION_NAME, }; - WasmSymbol(StringRef Name, SymbolType Type) : Name(Name), Type(Type) {} + WasmSymbol(StringRef Name, SymbolType Type, uint32_t Section, + uint32_t ElementIndex) + : Name(Name), Type(Type), Section(Section), ElementIndex(ElementIndex) {} StringRef Name; SymbolType Type; + uint32_t Section; + uint32_t ElementIndex; }; class WasmSection { public: - WasmSection() : Type(0), Offset(0) {} + WasmSection() = default; - uint32_t Type; // Section type (See below) - uint32_t Offset; // Offset with in the file + uint32_t Type = 0; // Section type (See below) + uint32_t Offset = 0; // Offset with in the file StringRef Name; // Section name (User-defined sections only) ArrayRef Content; // Section content std::vector Relocations; // Relocations for this section @@ -61,7 +67,8 @@ public: WasmObjectFile(MemoryBufferRef Object, Error &Err); const wasm::WasmObjectHeader &getHeader() const; - const WasmSymbol &getWasmSymbol(DataRefImpl Symb) const; + const WasmSymbol &getWasmSymbol(const DataRefImpl &Symb) const; + const WasmSymbol &getWasmSymbol(const SymbolRef &Symbol) const; const WasmSection &getWasmSection(const SectionRef &Section) const; const wasm::WasmRelocation &getWasmRelocation(const RelocationRef& Ref) const; @@ -74,17 +81,23 @@ public: const std::vector& memories() const { return Memories; } const std::vector& globals() const { return Globals; } const std::vector& exports() const { return Exports; } + + uint32_t getNumberOfSymbols() const { + return Symbols.size(); + } + const std::vector& elements() const { return ElemSegments; } + const std::vector& dataSegments() const { return DataSegments; } + const std::vector& functions() const { return Functions; } const ArrayRef& code() const { return CodeSection; } uint32_t startFunction() const { return StartFunction; } -protected: void moveSymbolNext(DataRefImpl &Symb) const override; uint32_t getSymbolFlags(DataRefImpl Symb) const override; @@ -106,6 +119,7 @@ protected: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -179,7 +193,7 @@ private: std::vector Symbols; std::vector Functions; ArrayRef CodeSection; - uint32_t StartFunction; + uint32_t StartFunction = -1; }; } // end namespace object diff --git a/include/llvm/Object/WindowsResource.h b/include/llvm/Object/WindowsResource.h new file mode 100644 index 0000000000000000000000000000000000000000..21fe37dd1e0a50de52919ecdd496e4112f5a3bed --- /dev/null +++ b/include/llvm/Object/WindowsResource.h @@ -0,0 +1,189 @@ +//===-- WindowsResource.h ---------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file declares the .res file class. .res files are intermediate +// products of the typical resource-compilation process on Windows. This +// process is as follows: +// +// .rc file(s) ---(rc.exe)---> .res file(s) ---(cvtres.exe)---> COFF file +// +// .rc files are human-readable scripts that list all resources a program uses. +// +// They are compiled into .res files, which are a list of the resources in +// binary form. +// +// Finally the data stored in the .res is compiled into a COFF file, where it +// is organized in a directory tree structure for optimized access by the +// program during runtime. +// +// Ref: msdn.microsoft.com/en-us/library/windows/desktop/ms648007(v=vs.85).aspx +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_INCLUDE_LLVM_OBJECT_RESFILE_H +#define LLVM_INCLUDE_LLVM_OBJECT_RESFILE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ScopedPrinter.h" + +#include + +namespace llvm { +namespace object { + +class WindowsResource; + +class ResourceEntryRef { +public: + Error moveNext(bool &End); + bool checkTypeString() const { return IsStringType; } + ArrayRef getTypeString() const { return Type; } + uint16_t getTypeID() const { return TypeID; } + bool checkNameString() const { return IsStringName; } + ArrayRef getNameString() const { return Name; } + uint16_t getNameID() const { return NameID; } + uint16_t getLanguage() const { return Suffix->Language; } + uint16_t getMajorVersion() const { return Suffix->Version >> 16; } + uint16_t getMinorVersion() const { return Suffix->Version; } + uint32_t getCharacteristics() const { return Suffix->Characteristics; } + ArrayRef getData() const { return Data; } + +private: + friend class WindowsResource; + + ResourceEntryRef(BinaryStreamRef Ref, const WindowsResource *Owner, + Error &Err); + + Error loadNext(); + + struct HeaderSuffix { + support::ulittle32_t DataVersion; + support::ulittle16_t MemoryFlags; + support::ulittle16_t Language; + support::ulittle32_t Version; + support::ulittle32_t Characteristics; + }; + + BinaryStreamReader Reader; + bool IsStringType; + ArrayRef Type; + uint16_t TypeID; + bool IsStringName; + ArrayRef Name; + uint16_t NameID; + const HeaderSuffix *Suffix = nullptr; + ArrayRef Data; + const WindowsResource *OwningRes = nullptr; +}; + +class WindowsResource : public Binary { +public: + Expected getHeadEntry(); + + static bool classof(const Binary *V) { return V->isWinRes(); } + + static Expected> + createWindowsResource(MemoryBufferRef Source); + +private: + friend class ResourceEntryRef; + + WindowsResource(MemoryBufferRef Source); + + BinaryByteStream BBS; +}; + +class WindowsResourceParser { +public: + class TreeNode; + WindowsResourceParser(); + Error parse(WindowsResource *WR); + void printTree(raw_ostream &OS) const; + const TreeNode &getTree() const { return Root; } + const ArrayRef> getData() const { return Data; } + const ArrayRef> getStringTable() const { + return StringTable; + } + + class TreeNode { + public: + template + using Children = std::map>; + + void print(ScopedPrinter &Writer, StringRef Name) const; + uint32_t getTreeSize() const; + uint32_t getStringIndex() const { return StringIndex; } + uint32_t getDataIndex() const { return DataIndex; } + uint16_t getMajorVersion() const { return MajorVersion; } + uint16_t getMinorVersion() const { return MinorVersion; } + uint32_t getCharacteristics() const { return Characteristics; } + bool checkIsDataNode() const { return IsDataNode; } + const Children &getIDChildren() const { return IDChildren; } + const Children &getStringChildren() const { + return StringChildren; + } + + private: + friend class WindowsResourceParser; + + static uint32_t StringCount; + static uint32_t DataCount; + + static std::unique_ptr createStringNode(); + static std::unique_ptr createIDNode(); + static std::unique_ptr createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics); + + explicit TreeNode(bool IsStringNode); + TreeNode(uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics); + + void addEntry(const ResourceEntryRef &Entry, bool &IsNewTypeString, + bool &IsNewNameString); + TreeNode &addTypeNode(const ResourceEntryRef &Entry, bool &IsNewTypeString); + TreeNode &addNameNode(const ResourceEntryRef &Entry, bool &IsNewNameString); + TreeNode &addLanguageNode(const ResourceEntryRef &Entry); + TreeNode &addChild(uint32_t ID, bool IsDataNode = false, + uint16_t MajorVersion = 0, uint16_t MinorVersion = 0, + uint32_t Characteristics = 0); + TreeNode &addChild(ArrayRef NameRef, bool &IsNewString); + + bool IsDataNode = false; + uint32_t StringIndex; + uint32_t DataIndex; + Children IDChildren; + Children StringChildren; + uint16_t MajorVersion = 0; + uint16_t MinorVersion = 0; + uint32_t Characteristics = 0; + }; + +private: + TreeNode Root; + std::vector> Data; + std::vector> StringTable; +}; + +Error writeWindowsResourceCOFF(std::unique_ptr &OutputBuffer, + llvm::COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser); + +} // namespace object +} // namespace llvm + +#endif diff --git a/include/llvm/ObjectYAML/COFFYAML.h b/include/llvm/ObjectYAML/COFFYAML.h index 65ad1dde67f52ef844a27435d18c8b70760d5bb4..719cb1acf6efe4724c77e5b99af05884c71d24f5 100644 --- a/include/llvm/ObjectYAML/COFFYAML.h +++ b/include/llvm/ObjectYAML/COFFYAML.h @@ -15,8 +15,10 @@ #define LLVM_OBJECTYAML_COFFYAML_H #include "llvm/ADT/Optional.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h" +#include "llvm/ObjectYAML/CodeViewYAMLTypes.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/COFF.h" namespace llvm { @@ -56,6 +58,8 @@ namespace COFFYAML { COFF::section Header; unsigned Alignment = 0; yaml::BinaryRef SectionData; + std::vector DebugS; + std::vector DebugT; std::vector Relocations; StringRef Name; Section(); diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h new file mode 100644 index 0000000000000000000000000000000000000000..8180e0fc83f4c579b3aac706700ce5a730a746e9 --- /dev/null +++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h @@ -0,0 +1,134 @@ +//===- CodeViewYAMLDebugSections.h - CodeView YAMLIO debug sections -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H +#define LLVM_OBJECTYAML_CODEVIEWYAMLDEBUGSECTIONS_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/ObjectYAML/YAML.h" + +namespace llvm { + +namespace codeview { +class DebugStringTableSubsection; +class DebugStringTableSubsectionRef; +class DebugChecksumsSubsectionRef; +class DebugStringTableSubsection; +class DebugChecksumsSubsection; +class StringsAndChecksums; +class StringsAndChecksumsRef; +} +namespace CodeViewYAML { + +namespace detail { +struct YAMLSubsectionBase; +} + +struct YAMLFrameData { + uint32_t RvaStart; + uint32_t CodeSize; + uint32_t LocalSize; + uint32_t ParamsSize; + uint32_t MaxStackSize; + StringRef FrameFunc; + uint32_t PrologSize; + uint32_t SavedRegsSize; + uint32_t Flags; +}; + +struct YAMLCrossModuleImport { + StringRef ModuleName; + std::vector ImportIds; +}; + +struct SourceLineEntry { + uint32_t Offset; + uint32_t LineStart; + uint32_t EndDelta; + bool IsStatement; +}; + +struct SourceColumnEntry { + uint16_t StartColumn; + uint16_t EndColumn; +}; + +struct SourceLineBlock { + StringRef FileName; + std::vector Lines; + std::vector Columns; +}; + +struct HexFormattedString { + std::vector Bytes; +}; + +struct SourceFileChecksumEntry { + StringRef FileName; + codeview::FileChecksumKind Kind; + HexFormattedString ChecksumBytes; +}; + +struct SourceLineInfo { + uint32_t RelocOffset; + uint32_t RelocSegment; + codeview::LineFlags Flags; + uint32_t CodeSize; + + std::vector Blocks; +}; + +struct InlineeSite { + uint32_t Inlinee; + StringRef FileName; + uint32_t SourceLineNum; + std::vector ExtraFiles; +}; + +struct InlineeInfo { + bool HasExtraFiles; + std::vector Sites; +}; + +struct YAMLDebugSubsection { + static Expected + fromCodeViewSubection(const codeview::StringsAndChecksumsRef &SC, + const codeview::DebugSubsectionRecord &SS); + + std::shared_ptr Subsection; +}; + +struct DebugSubsectionState {}; + +Expected>> +toCodeViewSubsectionList(BumpPtrAllocator &Allocator, + ArrayRef Subsections, + const codeview::StringsAndChecksums &SC); + +std::vector +fromDebugS(ArrayRef Data, const codeview::StringsAndChecksumsRef &SC); + +void initializeStringsAndChecksums(ArrayRef Sections, + codeview::StringsAndChecksums &SC); + +} // namespace CodeViewYAML +} // namespace llvm + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::YAMLDebugSubsection) + +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::YAMLDebugSubsection) + +#endif diff --git a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h new file mode 100644 index 0000000000000000000000000000000000000000..9b411e8b074fcd241cf5aca3f073a2322f282ec5 --- /dev/null +++ b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h @@ -0,0 +1,43 @@ +//===- CodeViewYAMLSymbols.h - CodeView YAMLIO Symbol implementation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLSYMBOLS_H +#define LLVM_OBJECTYAML_CODEVIEWYAMLSYMBOLS_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/ObjectYAML/YAML.h" + +namespace llvm { +namespace CodeViewYAML { +namespace detail { +struct SymbolRecordBase; +} + +struct SymbolRecord { + std::shared_ptr Symbol; + + codeview::CVSymbol + toCodeViewSymbol(BumpPtrAllocator &Allocator, + codeview::CodeViewContainer Container) const; + static Expected fromCodeViewSymbol(codeview::CVSymbol Symbol); +}; + +} // namespace CodeViewYAML +} // namespace llvm + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SymbolRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::SymbolRecord) + +#endif diff --git a/include/llvm/ObjectYAML/CodeViewYAMLTypes.h b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h new file mode 100644 index 0000000000000000000000000000000000000000..e97d5f92bf7fe72a5a583d19ff73d7312c9d5388 --- /dev/null +++ b/include/llvm/ObjectYAML/CodeViewYAMLTypes.h @@ -0,0 +1,56 @@ +//===- CodeViewYAMLTypes.h - CodeView YAMLIO Type Record implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H +#define LLVM_OBJECTYAML_CODEVIEWYAMLTYPES_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { +namespace codeview { +class TypeTableBuilder; +} +namespace CodeViewYAML { +namespace detail { +struct LeafRecordBase; +struct MemberRecordBase; +} + +struct MemberRecord { + std::shared_ptr Member; +}; + +struct LeafRecord { + std::shared_ptr Leaf; + + codeview::CVType toCodeViewRecord(BumpPtrAllocator &Allocator) const; + codeview::CVType toCodeViewRecord(codeview::TypeTableBuilder &TS) const; + static Expected fromCodeViewRecord(codeview::CVType Type); +}; + +std::vector fromDebugT(ArrayRef DebugT); +ArrayRef toDebugT(ArrayRef, BumpPtrAllocator &Alloc); +} // namespace CodeViewYAML +} // namespace llvm + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::LeafRecord) +LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::MemberRecord) + +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::LeafRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::MemberRecord) + +#endif diff --git a/include/llvm/ObjectYAML/DWARFYAML.h b/include/llvm/ObjectYAML/DWARFYAML.h index ec34de1f08814335e826959f38f2259a2c69d975..75e9112e121afd5cb9d93046b63d3d9b9b012ae3 100644 --- a/include/llvm/ObjectYAML/DWARFYAML.h +++ b/include/llvm/ObjectYAML/DWARFYAML.h @@ -16,8 +16,8 @@ #ifndef LLVM_OBJECTYAML_DWARFYAML_H #define LLVM_OBJECTYAML_DWARFYAML_H +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/Dwarf.h" namespace llvm { namespace DWARFYAML { @@ -236,12 +236,12 @@ template <> struct MappingTraits { static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF); }; -#define HANDLE_DW_TAG(unused, name) \ +#define HANDLE_DW_TAG(unused, name, unused2, unused3) \ io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name); template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Tag &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -251,7 +251,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::LineNumberOps &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -261,27 +261,27 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::LineNumberExtendedOps &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; -#define HANDLE_DW_AT(unused, name) \ +#define HANDLE_DW_AT(unused, name, unused2, unused3) \ io.enumCase(value, "DW_AT_" #name, dwarf::DW_AT_##name); template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Attribute &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; -#define HANDLE_DW_FORM(unused, name) \ +#define HANDLE_DW_FORM(unused, name, unused2, unused3) \ io.enumCase(value, "DW_FORM_" #name, dwarf::DW_FORM_##name); template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::Form &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; @@ -291,7 +291,7 @@ template <> struct ScalarEnumerationTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, dwarf::UnitType &value) { -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" io.enumFallback(value); } }; diff --git a/include/llvm/ObjectYAML/ELFYAML.h b/include/llvm/ObjectYAML/ELFYAML.h index 81a4ec28c94fa9e5239e0c54770891bb7843c6b3..9d62ec27ad31a7546dc475806971f3da63934020 100644 --- a/include/llvm/ObjectYAML/ELFYAML.h +++ b/include/llvm/ObjectYAML/ELFYAML.h @@ -16,8 +16,8 @@ #ifndef LLVM_OBJECTYAML_ELFYAML_H #define LLVM_OBJECTYAML_ELFYAML_H +#include "llvm/BinaryFormat/ELF.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/ELF.h" namespace llvm { namespace ELFYAML { diff --git a/include/llvm/ObjectYAML/MachOYAML.h b/include/llvm/ObjectYAML/MachOYAML.h index ae858c8f4aafd0c7841cb0221c69c9f9893d7851..59aca9a1ddf2c1f99737f4d489e28804e5640b55 100644 --- a/include/llvm/ObjectYAML/MachOYAML.h +++ b/include/llvm/ObjectYAML/MachOYAML.h @@ -16,9 +16,9 @@ #ifndef LLVM_OBJECTYAML_MACHOYAML_H #define LLVM_OBJECTYAML_MACHOYAML_H -#include "llvm/ObjectYAML/YAML.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/ObjectYAML/DWARFYAML.h" -#include "llvm/Support/MachO.h" +#include "llvm/ObjectYAML/YAML.h" namespace llvm { namespace MachOYAML { @@ -209,7 +209,7 @@ template <> struct MappingTraits { template <> struct ScalarEnumerationTraits { static void enumeration(IO &io, MachO::LoadCommandType &value) { -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" io.enumFallback(value); } }; @@ -278,7 +278,7 @@ template <> struct ScalarTraits { static void mapping(IO &IO, MachO::LCStruct &LoadCommand); \ }; -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" // Extra structures used by load commands template <> struct MappingTraits { diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h index b1af8bbdfa6e97c9b79b52b066973aed9c688c8e..447dbd7a603dad6db0f751d0388b72ab9462b529 100644 --- a/include/llvm/ObjectYAML/WasmYAML.h +++ b/include/llvm/ObjectYAML/WasmYAML.h @@ -16,8 +16,8 @@ #ifndef LLVM_OBJECTYAML_WASMYAML_H #define LLVM_OBJECTYAML_WASMYAML_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/ObjectYAML/YAML.h" -#include "llvm/Support/Wasm.h" namespace llvm { namespace WasmYAML { @@ -34,17 +34,6 @@ struct FileHeader { yaml::Hex32 Version; }; -struct Import { - StringRef Module; - StringRef Field; - ExportKind Kind; - union { - uint32_t SigIndex; - ValueType GlobalType; - }; - bool GlobalMutable; -}; - struct Limits { yaml::Hex32 Flags; yaml::Hex32 Initial; @@ -74,6 +63,18 @@ struct Global { wasm::WasmInitExpr InitExpr; }; +struct Import { + StringRef Module; + StringRef Field; + ExportKind Kind; + union { + uint32_t SigIndex; + Global GlobalImport; + Table TableImport; + Limits Memory; + }; +}; + struct LocalDecl { ValueType Type; uint32_t Count; @@ -88,7 +89,7 @@ struct Relocation { RelocType Type; uint32_t Index; yaml::Hex32 Offset; - yaml::Hex32 Addend; + int32_t Addend; }; struct DataSegment { @@ -97,6 +98,11 @@ struct DataSegment { yaml::BinaryRef Content; }; +struct NameEntry { + uint32_t Index; + StringRef Name; +}; + struct Signature { Signature() : Form(wasm::WASM_TYPE_FUNC) {} @@ -122,6 +128,11 @@ struct CustomSection : Section { StringRef Name; yaml::BinaryRef Payload; + + // The follow is used by the "name" custom section. + // TODO(sbc): Add support for more then just functions names. The wasm + // name section can support multiple sub-sections. + std::vector FunctionNames; }; struct TypeSection : Section { @@ -244,6 +255,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Global) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Function) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::LocalDecl) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Relocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::NameEntry) LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) namespace llvm { @@ -297,6 +309,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::Relocation &Relocation); }; +template <> struct MappingTraits { + static void mapping(IO &IO, WasmYAML::NameEntry &NameEntry); +}; + template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::LocalDecl &LocalDecl); }; diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h index 99d329693de2e7888bcd979d5055f8b0a26660ff..c519a4a824c5166db5b91797eaf77c793a78a77c 100644 --- a/include/llvm/Option/Arg.h +++ b/include/llvm/Option/Arg.h @@ -1,4 +1,4 @@ -//===--- Arg.h - Parsed Argument Classes ------------------------*- C++ -*-===// +//===- Arg.h - Parsed Argument Classes --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,7 +21,11 @@ #include namespace llvm { + +class raw_ostream; + namespace opt { + class ArgList; /// \brief A concrete instance of a particular driver option. @@ -29,9 +33,6 @@ class ArgList; /// The Arg class encodes just enough information to be able to /// derive the argument values efficiently. class Arg { - Arg(const Arg &) = delete; - void operator=(const Arg &) = delete; - private: /// \brief The option this argument is an instance of. const Option Opt; @@ -65,6 +66,8 @@ public: const char *Value0, const Arg *BaseArg = nullptr); Arg(const Option Opt, StringRef Spelling, unsigned Index, const char *Value0, const char *Value1, const Arg *BaseArg = nullptr); + Arg(const Arg &) = delete; + Arg &operator=(const Arg &) = delete; ~Arg(); const Option &getOption() const { return Opt; } @@ -89,6 +92,7 @@ public: void claim() const { getBaseArg().Claimed = true; } unsigned getNumValues() const { return Values.size(); } + const char *getValue(unsigned N = 0) const { return Values[N]; } @@ -122,6 +126,7 @@ public: }; } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_ARG_H diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h index 53cb0d8dec4d4356926dddb8543768e9bacab15e..aaea68bf8e2784a247ce9fddfa8586798bb1f464 100644 --- a/include/llvm/Option/ArgList.h +++ b/include/llvm/Option/ArgList.h @@ -1,4 +1,4 @@ -//===--- ArgList.h - Argument List Management -------------------*- C++ -*-===// +//===- ArgList.h - Argument List Management ---------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,58 +10,84 @@ #ifndef LLVM_OPTION_ARGLIST_H #define LLVM_OPTION_ARGLIST_H -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/Option.h" +#include +#include +#include +#include #include #include #include +#include #include namespace llvm { + +class raw_ostream; + namespace opt { -class ArgList; -class Option; /// arg_iterator - Iterates through arguments stored inside an ArgList. +template class arg_iterator { - /// The current argument. - SmallVectorImpl::const_iterator Current; - - /// The argument list we are iterating over. - const ArgList &Args; - - /// Optional filters on the arguments which will be match. Most clients - /// should never want to iterate over arguments without filters, so we won't - /// bother to factor this into two separate iterator implementations. - // - // FIXME: Make efficient; the idea is to provide efficient iteration over - // all arguments which match a particular id and then just provide an - // iterator combinator which takes multiple iterators which can be - // efficiently compared and returns them in order. - OptSpecifier Id0, Id1, Id2; + /// The current argument and the end of the sequence we're iterating. + BaseIter Current, End; + + /// Optional filters on the arguments which will be match. To avoid a + /// zero-sized array, we store one specifier even if we're asked for none. + OptSpecifier Ids[NumOptSpecifiers ? NumOptSpecifiers : 1]; + + void SkipToNextArg() { + for (; Current != End; ++Current) { + // Skip erased elements. + if (!*Current) + continue; + + // Done if there are no filters. + if (!NumOptSpecifiers) + return; + + // Otherwise require a match. + const Option &O = (*Current)->getOption(); + for (auto Id : Ids) { + if (!Id.isValid()) + break; + if (O.matches(Id)) + return; + } + } + } - void SkipToNextArg(); + using Traits = std::iterator_traits; public: - typedef Arg * const * value_type; - typedef Arg * const & reference; - typedef Arg * const * pointer; - typedef std::forward_iterator_tag iterator_category; - typedef std::ptrdiff_t difference_type; - - arg_iterator(SmallVectorImpl::const_iterator it, const ArgList &Args, - OptSpecifier Id0 = 0U, OptSpecifier Id1 = 0U, - OptSpecifier Id2 = 0U) - : Current(it), Args(Args), Id0(Id0), Id1(Id1), Id2(Id2) { + using value_type = typename Traits::value_type; + using reference = typename Traits::reference; + using pointer = typename Traits::pointer; + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + + arg_iterator( + BaseIter Current, BaseIter End, + const OptSpecifier (&Ids)[NumOptSpecifiers ? NumOptSpecifiers : 1] = {}) + : Current(Current), End(End) { + for (unsigned I = 0; I != NumOptSpecifiers; ++I) + this->Ids[I] = Ids[I]; SkipToNextArg(); } + // FIXME: This conversion function makes no sense. operator const Arg*() { return *Current; } + reference operator*() const { return *Current; } pointer operator->() const { return Current; } @@ -93,44 +119,71 @@ public: /// and to iterate over groups of arguments. class ArgList { public: - typedef SmallVector arglist_type; - typedef arglist_type::iterator iterator; - typedef arglist_type::const_iterator const_iterator; - typedef arglist_type::reverse_iterator reverse_iterator; - typedef arglist_type::const_reverse_iterator const_reverse_iterator; + using arglist_type = SmallVector; + using iterator = arg_iterator; + using const_iterator = arg_iterator; + using reverse_iterator = arg_iterator; + using const_reverse_iterator = + arg_iterator; + + template using filtered_iterator = + arg_iterator; + template using filtered_reverse_iterator = + arg_iterator; private: /// The internal list of arguments. arglist_type Args; + using OptRange = std::pair; + static OptRange emptyRange() { return {-1u, 0u}; } + + /// The first and last index of each different OptSpecifier ID. + DenseMap OptRanges; + + /// Get the range of indexes in which options with the specified IDs might + /// reside, or (0, 0) if there are no such options. + OptRange getRange(std::initializer_list Ids) const; + protected: // Make the default special members protected so they won't be used to slice // derived objects, but can still be used by derived objects to implement // their own special members. ArgList() = default; + // Explicit move operations to ensure the container is cleared post-move // otherwise it could lead to a double-delete in the case of moving of an // InputArgList which deletes the contents of the container. If we could fix // up the ownership here (delegate storage/ownership to the derived class so // it can be a container of unique_ptr) this would be simpler. - ArgList(ArgList &&RHS) : Args(std::move(RHS.Args)) { RHS.Args.clear(); } + ArgList(ArgList &&RHS) + : Args(std::move(RHS.Args)), OptRanges(std::move(RHS.OptRanges)) { + RHS.Args.clear(); + RHS.OptRanges.clear(); + } + ArgList &operator=(ArgList &&RHS) { Args = std::move(RHS.Args); RHS.Args.clear(); + OptRanges = std::move(RHS.OptRanges); + RHS.OptRanges.clear(); return *this; } + // Protect the dtor to ensure this type is never destroyed polymorphically. ~ArgList() = default; -public: + // Implicitly convert a value to an OptSpecifier. Used to work around a bug + // in MSVC's implementation of narrowing conversion checking. + static OptSpecifier toOptSpecifier(OptSpecifier S) { return S; } +public: /// @name Arg Access /// @{ /// append - Append \p A to the arg list. void append(Arg *A); - arglist_type &getArgs() { return Args; } const arglist_type &getArgs() const { return Args; } unsigned size() const { return Args.size(); } @@ -139,30 +192,38 @@ public: /// @name Arg Iteration /// @{ - iterator begin() { return Args.begin(); } - iterator end() { return Args.end(); } + iterator begin() { return {Args.begin(), Args.end()}; } + iterator end() { return {Args.end(), Args.end()}; } - reverse_iterator rbegin() { return Args.rbegin(); } - reverse_iterator rend() { return Args.rend(); } + reverse_iterator rbegin() { return {Args.rbegin(), Args.rend()}; } + reverse_iterator rend() { return {Args.rend(), Args.rend()}; } - const_iterator begin() const { return Args.begin(); } - const_iterator end() const { return Args.end(); } + const_iterator begin() const { return {Args.begin(), Args.end()}; } + const_iterator end() const { return {Args.end(), Args.end()}; } - const_reverse_iterator rbegin() const { return Args.rbegin(); } - const_reverse_iterator rend() const { return Args.rend(); } + const_reverse_iterator rbegin() const { return {Args.rbegin(), Args.rend()}; } + const_reverse_iterator rend() const { return {Args.rend(), Args.rend()}; } - arg_iterator filtered_begin(OptSpecifier Id0 = 0U, OptSpecifier Id1 = 0U, - OptSpecifier Id2 = 0U) const { - return arg_iterator(Args.begin(), *this, Id0, Id1, Id2); - } - arg_iterator filtered_end() const { - return arg_iterator(Args.end(), *this); + template + iterator_range> + filtered(OptSpecifiers ...Ids) const { + OptRange Range = getRange({toOptSpecifier(Ids)...}); + auto B = Args.begin() + Range.first; + auto E = Args.begin() + Range.second; + using Iterator = filtered_iterator; + return make_range(Iterator(B, E, {toOptSpecifier(Ids)...}), + Iterator(E, E, {toOptSpecifier(Ids)...})); } - iterator_range filtered(OptSpecifier Id0 = 0U, - OptSpecifier Id1 = 0U, - OptSpecifier Id2 = 0U) const { - return make_range(filtered_begin(Id0, Id1, Id2), filtered_end()); + template + iterator_range> + filtered_reverse(OptSpecifiers ...Ids) const { + OptRange Range = getRange({toOptSpecifier(Ids)...}); + auto B = Args.rend() - Range.second; + auto E = Args.rend() - Range.first; + using Iterator = filtered_reverse_iterator; + return make_range(Iterator(B, E, {toOptSpecifier(Ids)...}), + Iterator(E, E, {toOptSpecifier(Ids)...})); } /// @} @@ -179,43 +240,34 @@ public: /// hasArg - Does the arg list contain any option matching \p Id. /// /// \p Claim Whether the argument should be claimed, if it exists. - bool hasArgNoClaim(OptSpecifier Id) const { - return getLastArgNoClaim(Id) != nullptr; - } - bool hasArg(OptSpecifier Id) const { - return getLastArg(Id) != nullptr; + template + bool hasArgNoClaim(OptSpecifiers ...Ids) const { + return getLastArgNoClaim(Ids...) != nullptr; } - bool hasArg(OptSpecifier Id0, OptSpecifier Id1) const { - return getLastArg(Id0, Id1) != nullptr; + template + bool hasArg(OptSpecifiers ...Ids) const { + return getLastArg(Ids...) != nullptr; } - bool hasArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { - return getLastArg(Id0, Id1, Id2) != nullptr; + + /// Return the last argument matching \p Id, or null. + template + Arg *getLastArg(OptSpecifiers ...Ids) const { + Arg *Res = nullptr; + for (Arg *A : filtered(Ids...)) { + Res = A; + Res->claim(); + } + return Res; } - /// getLastArg - Return the last argument matching \p Id, or null. - /// - /// \p Claim Whether the argument should be claimed, if it exists. - Arg *getLastArgNoClaim(OptSpecifier Id) const; - Arg *getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1) const; - Arg *getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2) const; - Arg *getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, - OptSpecifier Id3) const; - Arg *getLastArg(OptSpecifier Id) const; - Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1) const; - Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const; - Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, - OptSpecifier Id3) const; - Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, - OptSpecifier Id3, OptSpecifier Id4) const; - Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, - OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5) const; - Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, - OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5, - OptSpecifier Id6) const; - Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, - OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5, - OptSpecifier Id6, OptSpecifier Id7) const; + /// Return the last argument matching \p Id, or null. Do not "claim" the + /// option (don't mark it as having been used). + template + Arg *getLastArgNoClaim(OptSpecifiers ...Ids) const { + for (Arg *A : filtered_reverse(Ids...)) + return A; + return nullptr; + } /// getArgString - Return the input argument string at \p Index. virtual const char *getArgString(unsigned Index) const = 0; @@ -230,8 +282,7 @@ public: /// @{ /// getLastArgValue - Return the value of the last argument, or a default. - StringRef getLastArgValue(OptSpecifier Id, - StringRef Default = "") const; + StringRef getLastArgValue(OptSpecifier Id, StringRef Default = "") const; /// getAllArgValues - Get the values of all instances of the given argument /// as strings. @@ -273,7 +324,7 @@ public: /// AddAllArgValues - Render the argument values of all arguments /// matching the given ids. void AddAllArgValues(ArgStringList &Output, OptSpecifier Id0, - OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const; + OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const; /// AddAllArgsTranslated - Render all the arguments matching the /// given ids, but forced to separate args and using the provided @@ -340,10 +391,12 @@ private: public: InputArgList(const char* const *ArgBegin, const char* const *ArgEnd); + InputArgList(InputArgList &&RHS) : ArgList(std::move(RHS)), ArgStrings(std::move(RHS.ArgStrings)), SynthesizedStrings(std::move(RHS.SynthesizedStrings)), NumInputArgStrings(RHS.NumInputArgStrings) {} + InputArgList &operator=(InputArgList &&RHS) { releaseMemory(); ArgList::operator=(std::move(RHS)); @@ -352,6 +405,7 @@ public: NumInputArgStrings = RHS.NumInputArgStrings; return *this; } + ~InputArgList() { releaseMemory(); } const char *getArgString(unsigned Index) const override { @@ -424,7 +478,6 @@ public: append(MakePositionalArg(BaseArg, Opt, Value)); } - /// AddSeparateArg - Construct a new Positional arg for the given option /// \p Id, with the provided \p Value and append it to the argument /// list. @@ -433,7 +486,6 @@ public: append(MakeSeparateArg(BaseArg, Opt, Value)); } - /// AddJoinedArg - Construct a new Positional arg for the given option /// \p Id, with the provided \p Value and append it to the argument list. void AddJoinedArg(const Arg *BaseArg, const Option Opt, @@ -441,7 +493,6 @@ public: append(MakeJoinedArg(BaseArg, Opt, Value)); } - /// MakeFlagArg - Construct a new FlagArg for the given option \p Id. Arg *MakeFlagArg(const Arg *BaseArg, const Option Opt) const; @@ -464,6 +515,7 @@ public: }; } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_ARGLIST_H diff --git a/include/llvm/Option/OptSpecifier.h b/include/llvm/Option/OptSpecifier.h index 0b2aaaec3afc8dc7a21f4a465e12030abe49c0d5..84c3cf8ad534d202eb9c64fbe45f335194c8dd34 100644 --- a/include/llvm/Option/OptSpecifier.h +++ b/include/llvm/Option/OptSpecifier.h @@ -1,4 +1,4 @@ -//===--- OptSpecifier.h - Option Specifiers ---------------------*- C++ -*-===// +//===- OptSpecifier.h - Option Specifiers -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,32 +10,30 @@ #ifndef LLVM_OPTION_OPTSPECIFIER_H #define LLVM_OPTION_OPTSPECIFIER_H -#include "llvm/Support/Compiler.h" - namespace llvm { namespace opt { - class Option; - /// OptSpecifier - Wrapper class for abstracting references to option IDs. - class OptSpecifier { - unsigned ID; +class Option; + +/// OptSpecifier - Wrapper class for abstracting references to option IDs. +class OptSpecifier { + unsigned ID = 0; - private: - explicit OptSpecifier(bool) = delete; +public: + OptSpecifier() = default; + explicit OptSpecifier(bool) = delete; + /*implicit*/ OptSpecifier(unsigned ID) : ID(ID) {} + /*implicit*/ OptSpecifier(const Option *Opt); - public: - OptSpecifier() : ID(0) {} - /*implicit*/ OptSpecifier(unsigned ID) : ID(ID) {} - /*implicit*/ OptSpecifier(const Option *Opt); + bool isValid() const { return ID != 0; } - bool isValid() const { return ID != 0; } + unsigned getID() const { return ID; } - unsigned getID() const { return ID; } + bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); } + bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); } +}; - bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); } - bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); } - }; -} -} +} // end namespace opt +} // end namespace llvm -#endif +#endif // LLVM_OPTION_OPTSPECIFIER_H diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h index 390e52774fea567ce97fbcc37aef16c838eec7d6..e0169b927319f9f92f891b5a0073af8be209ec29 100644 --- a/include/llvm/Option/OptTable.h +++ b/include/llvm/Option/OptTable.h @@ -1,4 +1,4 @@ -//===--- OptTable.h - Option Table ------------------------------*- C++ -*-===// +//===- OptTable.h - Option Table --------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,19 @@ #define LLVM_OPTION_OPTTABLE_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Option/OptSpecifier.h" +#include +#include +#include namespace llvm { + class raw_ostream; + namespace opt { + class Arg; class ArgList; class InputArgList; @@ -53,12 +60,12 @@ private: ArrayRef OptionInfos; bool IgnoreCase; - unsigned TheInputOptionID; - unsigned TheUnknownOptionID; + unsigned TheInputOptionID = 0; + unsigned TheUnknownOptionID = 0; /// The index of the first option which can be parsed (i.e., is not a /// special option like 'input' or 'unknown', and is not an option group). - unsigned FirstSearchableIndex; + unsigned FirstSearchableIndex = 0; /// The union of all option prefixes. If an argument does not begin with /// one of these, it is an input. @@ -113,6 +120,14 @@ public: return getInfo(id).MetaVar; } + /// Find flags from OptTable which starts with Cur. + /// + /// \param [in] Cur - String prefix that all returned flags need + // to start with. + /// + /// \return The vector of flags which start with Cur. + std::vector findByPrefix(StringRef Cur) const; + /// \brief Parse a single argument; returning the new argument and /// updating Index. /// @@ -168,7 +183,9 @@ public: void PrintHelp(raw_ostream &OS, const char *Name, const char *Title, bool ShowHidden = false) const; }; + } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_OPTTABLE_H diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h index 139f281b3c4ce97c385bcd475e5237dbcf02f7fd..c08834f90598add2483db14bdb8554aef7330b1e 100644 --- a/include/llvm/Option/Option.h +++ b/include/llvm/Option/Option.h @@ -1,4 +1,4 @@ -//===--- Option.h - Abstract Driver Options ---------------------*- C++ -*-===// +//===- Option.h - Abstract Driver Options -----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,15 +12,23 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Option/OptSpecifier.h" #include "llvm/Option/OptTable.h" #include "llvm/Support/ErrorHandling.h" +#include +#include namespace llvm { + +class raw_ostream; + namespace opt { + class Arg; class ArgList; + /// ArgStringList - Type used for constructing argv lists for subprocesses. -typedef SmallVector ArgStringList; +using ArgStringList = SmallVector; /// Base flags for all options. Custom flags may be added after. enum DriverFlag { @@ -202,6 +210,7 @@ public: }; } // end namespace opt + } // end namespace llvm -#endif +#endif // LLVM_OPTION_OPTION_H diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index e9c8ca3072c71b0f708efa4f00dc557d550711dd..2dd6935cf01c6d74b593d3c593751bd378cf39fb 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -384,7 +384,7 @@ extern bool isFunctionInPrintList(StringRef FunctionName); // Include support files that contain important APIs commonly used by Passes, // but that we want to separate out to make it easier to read the header files. // -#include "llvm/PassSupport.h" #include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" #endif diff --git a/include/llvm/PassInfo.h b/include/llvm/PassInfo.h index 21ade85b682fb82c07db4e49b2b691b800043b06..81dface3c9a05386c7a91375cdd0c3272354713f 100644 --- a/include/llvm/PassInfo.h +++ b/include/llvm/PassInfo.h @@ -32,7 +32,6 @@ class TargetMachine; class PassInfo { public: typedef Pass* (*NormalCtor_t)(); - typedef Pass *(*TargetMachineCtor_t)(TargetMachine *); private: StringRef PassName; // Nice name for Pass @@ -44,24 +43,20 @@ private: std::vector ItfImpl; // Interfaces implemented by this pass NormalCtor_t NormalCtor; - TargetMachineCtor_t TargetMachineCtor; public: /// PassInfo ctor - Do not call this directly, this should only be invoked /// through RegisterPass. PassInfo(StringRef name, StringRef arg, const void *pi, NormalCtor_t normal, - bool isCFGOnly, bool is_analysis, - TargetMachineCtor_t machine = nullptr) + bool isCFGOnly, bool is_analysis) : PassName(name), PassArgument(arg), PassID(pi), IsCFGOnlyPass(isCFGOnly), - IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal), - TargetMachineCtor(machine) {} + IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal) {} /// PassInfo ctor - Do not call this directly, this should only be invoked /// through RegisterPass. This version is for use by analysis groups; it /// does not auto-register the pass. PassInfo(StringRef name, const void *pi) : PassName(name), PassArgument(""), PassID(pi), IsCFGOnlyPass(false), - IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr), - TargetMachineCtor(nullptr) {} + IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr) {} /// getPassName - Return the friendly name for the pass, never returns null /// @@ -101,16 +96,6 @@ public: NormalCtor = Ctor; } - /// getTargetMachineCtor - Return a pointer to a function, that when called - /// with a TargetMachine, creates an instance of the pass and returns it. - /// This pointer may be null if there is no constructor with a TargetMachine - /// for the pass. - /// - TargetMachineCtor_t getTargetMachineCtor() const { return TargetMachineCtor; } - void setTargetMachineCtor(TargetMachineCtor_t Ctor) { - TargetMachineCtor = Ctor; - } - /// createPass() - Use this method to create an instance of this pass. Pass *createPass() const { assert((!isAnalysisGroup() || NormalCtor) && diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h index 852d79fbd443593c907f8c9ae2f822229c526bf3..602f45ac5178747de7ec5349065ca438d77636cc 100644 --- a/include/llvm/PassSupport.h +++ b/include/llvm/PassSupport.h @@ -31,8 +31,6 @@ namespace llvm { -class TargetMachine; - #define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \ static void *initialize##passName##PassOnce(PassRegistry &Registry) { \ PassInfo *PI = new PassInfo( \ @@ -78,10 +76,6 @@ class TargetMachine; template Pass *callDefaultCtor() { return new PassName(); } -template Pass *callTargetMachineCtor(TargetMachine *TM) { - return new PassName(TM); -} - //===--------------------------------------------------------------------------- /// RegisterPass template - This template class is used to notify the system /// that a Pass is available for use, and registers it into the internal @@ -93,11 +87,7 @@ template Pass *callTargetMachineCtor(TargetMachine *TM) { /// static RegisterPass tmp("passopt", "My Pass Name"); /// /// This statement will cause your pass to be created by calling the default -/// constructor exposed by the pass. If you have a different constructor that -/// must be called, create a global constructor function (which takes the -/// arguments you need and returns a Pass*) and register your pass like this: -/// -/// static RegisterPass tmp("passopt", "My Name"); +/// constructor exposed by the pass. /// template struct RegisterPass : public PassInfo { // Register Pass using default constructor... diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h index efa36d957fbd69be288ab806a46e69d8b75dd996..12b05e4ff0c5b9ebd2cefbe55f2a897691e28089 100644 --- a/include/llvm/Passes/PassBuilder.h +++ b/include/llvm/Passes/PassBuilder.h @@ -192,6 +192,39 @@ public: buildFunctionSimplificationPipeline(OptimizationLevel Level, bool DebugLogging = false); + /// Construct the core LLVM module canonicalization and simplification + /// pipeline. + /// + /// This pipeline focuses on canonicalizing and simplifying the entire module + /// of IR. Much like the function simplification pipeline above, it is + /// suitable to run repeatedly over the IR and is not expected to destroy + /// important information. It does, however, perform inlining and other + /// heuristic based simplifications that are not strictly reversible. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager + buildModuleSimplificationPipeline(OptimizationLevel Level, + bool DebugLogging = false); + + /// Construct the core LLVM module optimization pipeline. + /// + /// This pipeline focuses on optimizing the execution speed of the IR. It + /// uses cost modeling and thresholds to balance code growth against runtime + /// improvements. It includes vectorization and other information destroying + /// transformations. It also cannot generally be run repeatedly on a module + /// without potentially seriously regressing either runtime performance of + /// the code or serious code size growth. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging = false); + /// Build a per-module default optimization pipeline. /// /// This provides a good default optimization pipeline for per-module @@ -206,6 +239,36 @@ public: ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool DebugLogging = false); + /// Build a pre-link, ThinLTO-targeting default optimization pipeline to + /// a pass manager. + /// + /// This adds the pre-link optimizations tuned to prepare a module for + /// a ThinLTO run. It works to minimize the IR which needs to be analyzed + /// without making irreversible decisions which could be made better during + /// the LTO run. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager + buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false); + + /// Build an ThinLTO default optimization pipeline to a pass manager. + /// + /// This provides a good default optimization pipeline for link-time + /// optimization and code generation. It is particularly tuned to fit well + /// when IR coming into the LTO phase was first run through \c + /// addPreLinkLTODefaultPipeline, and the two coordinate closely. + /// + /// Note that \p Level cannot be `O0` here. The pipelines produced are + /// only intended for use when attempting to optimize code. If frontends + /// require some transformations for semantic reasons, they should explicitly + /// build them. + ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging = false); + /// Build a pre-link, LTO-targeting default optimization pipeline to a pass /// manager. /// diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h index b9a9f53776984366dd7dd03784c5e16d29b975da..b2f73fda2bae65d4a7a83b33b1168a1538677402 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -18,11 +18,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index f97bbfd9e0d8ee7b95d21b05ca5a14fdb81eca5e..0dbb2cf9f2696d649661837e1b260effbdcf13d5 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfData.inc" @@ -53,40 +54,19 @@ class Instruction; class MDNode; class Module; -/// Return the name of data section containing profile counter variables. -inline StringRef getInstrProfCountersSectionName(bool AddSegment) { - return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR - : INSTR_PROF_CNTS_SECT_NAME_STR; -} - -/// Return the name of data section containing names of instrumented -/// functions. -inline StringRef getInstrProfNameSectionName(bool AddSegment) { - return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR - : INSTR_PROF_NAME_SECT_NAME_STR; -} - -/// Return the name of the data section containing per-function control -/// data. -inline StringRef getInstrProfDataSectionName(bool AddSegment) { - return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR - ",regular,live_support" - : INSTR_PROF_DATA_SECT_NAME_STR; -} - -/// Return the name of data section containing pointers to value profile -/// counters/nodes. -inline StringRef getInstrProfValuesSectionName(bool AddSegment) { - return AddSegment ? "__DATA," INSTR_PROF_VALS_SECT_NAME_STR - : INSTR_PROF_VALS_SECT_NAME_STR; -} +enum InstrProfSectKind { +#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind, +#include "llvm/ProfileData/InstrProfData.inc" +}; -/// Return the name of data section containing nodes holdling value -/// profiling data. -inline StringRef getInstrProfVNodesSectionName(bool AddSegment) { - return AddSegment ? "__DATA," INSTR_PROF_VNODES_SECT_NAME_STR - : INSTR_PROF_VNODES_SECT_NAME_STR; -} +/// Return the name of the profile section corresponding to \p IPSK. +/// +/// The name of the section depends on the object format type \p OF. If +/// \p AddSegmentInfo is true, a segment prefix and additional linker hints may +/// be added to the section name (this is the default). +std::string getInstrProfSectionName(InstrProfSectKind IPSK, + Triple::ObjectFormatType OF, + bool AddSegmentInfo = true); /// Return the name profile runtime entry point to do value profiling /// for a given site. @@ -99,13 +79,6 @@ inline StringRef getInstrProfValueRangeProfFuncName() { return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; } -/// Return the name of the section containing function coverage mapping -/// data. -inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { - return AddSegment ? "__LLVM_COV," INSTR_PROF_COVMAP_SECT_NAME_STR - : INSTR_PROF_COVMAP_SECT_NAME_STR; -} - /// Return the name prefix of variables containing instrumented function names. inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } @@ -239,12 +212,12 @@ StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, /// third field is the uncompressed strings; otherwise it is the /// compressed string. When the string compression is off, the /// second field will have value zero. -Error collectPGOFuncNameStrings(const std::vector &NameStrs, +Error collectPGOFuncNameStrings(ArrayRef NameStrs, bool doCompression, std::string &Result); /// Produce \c Result string with the same format described above. The input /// is vector of PGO function name variables that are referenced. -Error collectPGOFuncNameStrings(const std::vector &NameVars, +Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression = true); /// \c NameStrings is a string composed of one of more sub-strings encoded in @@ -994,7 +967,7 @@ struct Header { } // end namespace RawInstrProf // Parse MemOP Size range option. -void getMemOPSizeRangeFromOption(std::string Str, int64_t &RangeStart, +void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); } // end namespace llvm diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc index 6ef1625d81c42d094e1456adb81a872498a56bad..be0dd4ad04bf611b3922fa6dec0112a0d72c5ea3 100644 --- a/include/llvm/ProfileData/InstrProfData.inc +++ b/include/llvm/ProfileData/InstrProfData.inc @@ -246,6 +246,31 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \ /* COVMAP_HEADER end. */ +#ifdef INSTR_PROF_SECT_ENTRY +#define INSTR_PROF_DATA_DEFINED +INSTR_PROF_SECT_ENTRY(IPSK_data, \ + INSTR_PROF_QUOTE(INSTR_PROF_DATA_COMMON), \ + INSTR_PROF_QUOTE(INSTR_PROF_DATA_COFF), "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_cnts, \ + INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COMMON), \ + INSTR_PROF_QUOTE(INSTR_PROF_CNTS_COFF), "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_name, \ + INSTR_PROF_QUOTE(INSTR_PROF_NAME_COMMON), \ + INSTR_PROF_QUOTE(INSTR_PROF_NAME_COFF), "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vals, \ + INSTR_PROF_QUOTE(INSTR_PROF_VALS_COMMON), \ + INSTR_PROF_QUOTE(INSTR_PROF_VALS_COFF), "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_vnodes, \ + INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COMMON), \ + INSTR_PROF_QUOTE(INSTR_PROF_VNODES_COFF), "__DATA,") +INSTR_PROF_SECT_ENTRY(IPSK_covmap, \ + INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COMMON), \ + INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_COFF), "__LLVM_COV,") + +#undef INSTR_PROF_SECT_ENTRY +#endif + + #ifdef INSTR_PROF_VALUE_PROF_DATA #define INSTR_PROF_DATA_DEFINED @@ -622,17 +647,47 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, * specified via command line. */ #define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename +/* section name strings common to all targets other + than WIN32 */ +#define INSTR_PROF_DATA_COMMON __llvm_prf_data +#define INSTR_PROF_NAME_COMMON __llvm_prf_names +#define INSTR_PROF_CNTS_COMMON __llvm_prf_cnts +#define INSTR_PROF_VALS_COMMON __llvm_prf_vals +#define INSTR_PROF_VNODES_COMMON __llvm_prf_vnds +#define INSTR_PROF_COVMAP_COMMON __llvm_covmap +/* Win32 */ +#define INSTR_PROF_DATA_COFF .lprfd +#define INSTR_PROF_NAME_COFF .lprfn +#define INSTR_PROF_CNTS_COFF .lprfc +#define INSTR_PROF_VALS_COFF .lprfv +#define INSTR_PROF_VNODES_COFF .lprfnd +#define INSTR_PROF_COVMAP_COFF .lcovmap + +#ifdef _WIN32 /* Runtime section names and name strings. */ -#define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data -#define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names -#define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts +#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COFF +#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COFF +#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COFF /* Array of pointers. Each pointer points to a list * of value nodes associated with one value site. */ -#define INSTR_PROF_VALS_SECT_NAME __llvm_prf_vals +#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COFF /* Value profile nodes section. */ -#define INSTR_PROF_VNODES_SECT_NAME __llvm_prf_vnds -#define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap +#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COFF +#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COFF +#else +/* Runtime section names and name strings. */ +#define INSTR_PROF_DATA_SECT_NAME INSTR_PROF_DATA_COMMON +#define INSTR_PROF_NAME_SECT_NAME INSTR_PROF_NAME_COMMON +#define INSTR_PROF_CNTS_SECT_NAME INSTR_PROF_CNTS_COMMON +/* Array of pointers. Each pointer points to a list + * of value nodes associated with one value site. + */ +#define INSTR_PROF_VALS_SECT_NAME INSTR_PROF_VALS_COMMON +/* Value profile nodes section. */ +#define INSTR_PROF_VNODES_SECT_NAME INSTR_PROF_VNODES_COMMON +#define INSTR_PROF_COVMAP_SECT_NAME INSTR_PROF_COVMAP_COMMON +#endif #define INSTR_PROF_DATA_SECT_NAME_STR \ INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME) diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index a35ae4f92bd140996e0308fce5e924e4ef40c8ec..7a705ca5416da707cf2bf34ba25bbcf69e012137 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -184,7 +184,8 @@ raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); typedef std::map BodySampleMap; class FunctionSamples; -typedef std::map CallsiteSampleMap; +typedef StringMap FunctionSamplesMap; +typedef std::map CallsiteSampleMap; /// Representation of the samples collected for a function. /// @@ -252,18 +253,41 @@ public: } /// Return the function samples at the given callsite location. - FunctionSamples &functionSamplesAt(const LineLocation &Loc) { + FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { return CallsiteSamples[Loc]; } - /// Return a pointer to function samples at the given callsite location. - const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc) const { + /// Returns the FunctionSamplesMap at the given \p Loc. + const FunctionSamplesMap * + findFunctionSamplesMapAt(const LineLocation &Loc) const { auto iter = CallsiteSamples.find(Loc); - if (iter == CallsiteSamples.end()) { + if (iter == CallsiteSamples.end()) return nullptr; - } else { - return &iter->second; - } + return &iter->second; + } + + /// Returns a pointer to FunctionSamples at the given callsite location \p Loc + /// with callee \p CalleeName. If no callsite can be found, relax the + /// restriction to return the FunctionSamples at callsite location \p Loc + /// with the maximum total sample count. + const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc, + StringRef CalleeName) const { + auto iter = CallsiteSamples.find(Loc); + if (iter == CallsiteSamples.end()) + return nullptr; + auto FS = iter->second.find(CalleeName); + if (FS != iter->second.end()) + return &FS->getValue(); + // If we cannot find exact match of the callee name, return the FS with + // the max total count. + uint64_t MaxTotalSamples = 0; + const FunctionSamples *R = nullptr; + for (const auto &NameFS : iter->second) + if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { + MaxTotalSamples = NameFS.second.getTotalSamples(); + R = &NameFS.second; + } + return R; } bool empty() const { return TotalSamples == 0; } @@ -297,8 +321,9 @@ public: } for (const auto &I : Other.getCallsiteSamples()) { const LineLocation &Loc = I.first; - const FunctionSamples &Rec = I.second; - MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight)); + FunctionSamplesMap &FSMap = functionSamplesAt(Loc); + for (const auto &Rec : I.second) + MergeResult(Result, FSMap[Rec.first()].merge(Rec.second, Weight)); } return Result; } @@ -314,7 +339,8 @@ public: if (!F || !F->getSubprogram()) S.insert(Function::getGUID(Name)); for (auto CS : CallsiteSamples) - CS.second.findImportedFunctions(S, M, Threshold); + for (const auto &NameFS : CS.second) + NameFS.second.findImportedFunctions(S, M, Threshold); } /// Set the name of the function. diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h index 9d69af32dd46a52bd397d231021f6dafb72ff22f..86af1038d74e42c2a95d7b62824dc5c1a4e1f2e3 100644 --- a/include/llvm/ProfileData/SampleProfWriter.h +++ b/include/llvm/ProfileData/SampleProfWriter.h @@ -43,16 +43,7 @@ public: /// Write all the sample profiles in the given map of samples. /// /// \returns status code of the file update operation. - std::error_code write(const StringMap &ProfileMap) { - if (std::error_code EC = writeHeader(ProfileMap)) - return EC; - for (const auto &I : ProfileMap) { - const FunctionSamples &Profile = I.second; - if (std::error_code EC = write(Profile)) - return EC; - } - return sampleprof_error::success; - } + std::error_code write(const StringMap &ProfileMap); raw_ostream &getOutputStream() { return *OutputStream; } diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def index 46d253bf0ec772a563e3d96d7a047424d238f194..8eccebcd932a04e82d749cd55808cf276e7ae293 100644 --- a/include/llvm/Support/AArch64TargetParser.def +++ b/include/llvm/Support/AArch64TargetParser.def @@ -20,8 +20,7 @@ AARCH64_ARCH("invalid", AK_INVALID, nullptr, nullptr, ARMBuildAttrs::CPUArch::v8_A, FK_NONE, AArch64::AEK_NONE) AARCH64_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, - (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | - AArch64::AEK_SIMD | AArch64::AEK_LSE)) + (AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD)) AARCH64_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | @@ -52,38 +51,37 @@ AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) #endif AARCH64_CPU_NAME("cortex-a35", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, - ( AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a73", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRYPTO)) + (AArch64::AEK_NONE)) AARCH64_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("exynos-m2", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("exynos-m3", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("falkor", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("kryo", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO)) + (AArch64::AEK_CRC)) AARCH64_CPU_NAME("thunderx2t99", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_LSE | AArch64::AEK_CRC | - AArch64::AEK_CRYPTO)) + (AArch64::AEK_NONE)) AARCH64_CPU_NAME("thunderx", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt88", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt81", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) AARCH64_CPU_NAME("thunderxt83", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, - (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_PROFILE)) + (AArch64::AEK_CRC | AArch64::AEK_PROFILE)) // Invalid CPU AARCH64_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, AArch64::AEK_INVALID) #undef AARCH64_CPU_NAME diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/include/llvm/Support/AMDGPUCodeObjectMetadata.h similarity index 98% rename from lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h rename to include/llvm/Support/AMDGPUCodeObjectMetadata.h index 816e8c744b27d1ba9a5c71b1ec0f36cb7ae02141..d274c5ee9184269aae881a9c7f78f3ff4851793b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h +++ b/include/llvm/Support/AMDGPUCodeObjectMetadata.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H +#ifndef LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H +#define LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H #include #include @@ -419,4 +419,4 @@ struct Metadata final { } // end namespace AMDGPU } // end namespace llvm -#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H +#endif // LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def index 18bf9af43226268b9c73e53958abe839d38d8394..65cb2715a6a5ea2a310a50f5f82ec67833d05efd 100644 --- a/include/llvm/Support/ARMTargetParser.def +++ b/include/llvm/Support/ARMTargetParser.def @@ -78,33 +78,33 @@ ARM_ARCH("armv7-a", AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7, FK_NEON, ARM::AEK_DSP) ARM_ARCH("armv7ve", AK_ARMV7VE, "7VE", "v7ve", ARMBuildAttrs::CPUArch::v7, FK_NEON, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | - ARM::AEK_HWDIVARM | ARM::AEK_HWDIV | ARM::AEK_DSP)) + ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP)) ARM_ARCH("armv7-r", AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7, - FK_NONE, (ARM::AEK_HWDIV | ARM::AEK_DSP)) + FK_NONE, (ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP)) ARM_ARCH("armv7-m", AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7, - FK_NONE, ARM::AEK_HWDIV) + FK_NONE, ARM::AEK_HWDIVTHUMB) ARM_ARCH("armv7e-m", AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M, - FK_NONE, (ARM::AEK_HWDIV | ARM::AEK_DSP)) + FK_NONE, (ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP)) ARM_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC)) + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC)) ARM_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC)) + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC)) ARM_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a", ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS)) + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS)) ARM_ARCH("armv8-r", AK_ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R, FK_NEON_FP_ARMV8, - (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIV | + (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC)) ARM_ARCH("armv8-m.base", AK_ARMV8MBaseline, "8-M.Baseline", "v8m.base", - ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIV) + ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIVTHUMB) ARM_ARCH("armv8-m.main", AK_ARMV8MMainline, "8-M.Mainline", "v8m.main", - ARMBuildAttrs::CPUArch::v8_M_Main, FK_FPV5_D16, ARM::AEK_HWDIV) + ARMBuildAttrs::CPUArch::v8_M_Main, FK_FPV5_D16, ARM::AEK_HWDIVTHUMB) // Non-standard Arch names. ARM_ARCH("iwmmxt", AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE, FK_NONE, ARM::AEK_NONE) @@ -128,7 +128,7 @@ ARM_ARCH_EXT_NAME("crc", ARM::AEK_CRC, "+crc", "-crc") ARM_ARCH_EXT_NAME("crypto", ARM::AEK_CRYPTO, "+crypto","-crypto") ARM_ARCH_EXT_NAME("dsp", ARM::AEK_DSP, "+dsp", "-dsp") ARM_ARCH_EXT_NAME("fp", ARM::AEK_FP, nullptr, nullptr) -ARM_ARCH_EXT_NAME("idiv", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV), nullptr, nullptr) +ARM_ARCH_EXT_NAME("idiv", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr) ARM_ARCH_EXT_NAME("mp", ARM::AEK_MP, nullptr, nullptr) ARM_ARCH_EXT_NAME("simd", ARM::AEK_SIMD, nullptr, nullptr) ARM_ARCH_EXT_NAME("sec", ARM::AEK_SEC, nullptr, nullptr) @@ -147,9 +147,9 @@ ARM_ARCH_EXT_NAME("xscale", ARM::AEK_XSCALE, nullptr, nullptr) #endif ARM_HW_DIV_NAME("invalid", ARM::AEK_INVALID) ARM_HW_DIV_NAME("none", ARM::AEK_NONE) -ARM_HW_DIV_NAME("thumb", ARM::AEK_HWDIV) +ARM_HW_DIV_NAME("thumb", ARM::AEK_HWDIVTHUMB) ARM_HW_DIV_NAME("arm", ARM::AEK_HWDIVARM) -ARM_HW_DIV_NAME("arm,thumb", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV)) +ARM_HW_DIV_NAME("arm,thumb", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB)) #undef ARM_HW_DIV_NAME #ifndef ARM_CPU_NAME @@ -205,20 +205,20 @@ ARM_CPU_NAME("cortex-a5", AK_ARMV7A, FK_NEON_VFPV4, false, (ARM::AEK_SEC | ARM::AEK_MP)) ARM_CPU_NAME("cortex-a7", AK_ARMV7A, FK_NEON_VFPV4, false, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIV)) -ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, true, ARM::AEK_SEC) + ARM::AEK_HWDIVTHUMB)) +ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, false, ARM::AEK_SEC) ARM_CPU_NAME("cortex-a9", AK_ARMV7A, FK_NEON_FP16, false, (ARM::AEK_SEC | ARM::AEK_MP)) ARM_CPU_NAME("cortex-a12", AK_ARMV7A, FK_NEON_VFPV4, false, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIV)) + ARM::AEK_HWDIVTHUMB)) ARM_CPU_NAME("cortex-a15", AK_ARMV7A, FK_NEON_VFPV4, false, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIV)) + ARM::AEK_HWDIVTHUMB)) ARM_CPU_NAME("cortex-a17", AK_ARMV7A, FK_NEON_VFPV4, false, (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | - ARM::AEK_HWDIV)) + ARM::AEK_HWDIVTHUMB)) ARM_CPU_NAME("krait", AK_ARMV7A, FK_NEON_VFPV4, false, - (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV)) + (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB)) ARM_CPU_NAME("cortex-r4", AK_ARMV7R, FK_NONE, true, ARM::AEK_NONE) ARM_CPU_NAME("cortex-r4f", AK_ARMV7R, FK_VFPV3_D16, false, ARM::AEK_NONE) ARM_CPU_NAME("cortex-r5", AK_ARMV7R, FK_VFPV3_D16, false, @@ -236,7 +236,7 @@ ARM_CPU_NAME("cortex-m23", AK_ARMV8MBaseline, FK_NONE, false, ARM::AEK_NONE) ARM_CPU_NAME("cortex-m33", AK_ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP) ARM_CPU_NAME("cortex-a32", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a35", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) -ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, ARM::AEK_CRC) +ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("cortex-a73", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) @@ -249,7 +249,7 @@ ARM_CPU_NAME("kryo", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, ARM::AEK_NONE) ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, ARM::AEK_NONE) ARM_CPU_NAME("swift", AK_ARMV7S, FK_NEON_VFPV4, true, - (ARM::AEK_HWDIVARM | ARM::AEK_HWDIV)) + (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB)) // Invalid CPU ARM_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, ARM::AEK_INVALID) #undef ARM_CPU_NAME diff --git a/include/llvm/Support/ArrayRecycler.h b/include/llvm/Support/ArrayRecycler.h index 4698f12b3bbc81ce01f83ad850c546756ee2b811..68696be6bf3d13e695552bd5ded42eff8d303c2e 100644 --- a/include/llvm/Support/ArrayRecycler.h +++ b/include/llvm/Support/ArrayRecycler.h @@ -47,7 +47,9 @@ template class ArrayRecycler { FreeList *Entry = Bucket[Idx]; if (!Entry) return nullptr; + __asan_unpoison_memory_region(Entry, Capacity::get(Idx).getSize()); Bucket[Idx] = Entry->Next; + __msan_allocated_memory(Entry, Capacity::get(Idx).getSize()); return reinterpret_cast(Entry); } @@ -59,6 +61,7 @@ template class ArrayRecycler { Bucket.resize(size_t(Idx) + 1); Entry->Next = Bucket[Idx]; Bucket[Idx] = Entry; + __asan_poison_memory_region(Ptr, Capacity::get(Idx).getSize()); } public: diff --git a/include/llvm/Support/BinaryStreamArray.h b/include/llvm/Support/BinaryStreamArray.h index 3b1301d3cc0bdd604e44b364f25eb10473e4a0c5..3f5562ba751952494adad861571bb27bb11e165d 100644 --- a/include/llvm/Support/BinaryStreamArray.h +++ b/include/llvm/Support/BinaryStreamArray.h @@ -81,11 +81,11 @@ template struct VarStreamArrayExtractor { /// MyExtractor E(SomeContext); /// VarStreamArray MyTypeArray3(E); /// + template class VarStreamArrayIterator; template > - class VarStreamArray { friend class VarStreamArrayIterator; @@ -93,24 +93,37 @@ public: typedef VarStreamArrayIterator Iterator; VarStreamArray() = default; + explicit VarStreamArray(const Extractor &E) : E(E) {} explicit VarStreamArray(BinaryStreamRef Stream) : Stream(Stream) {} + VarStreamArray(BinaryStreamRef Stream, const Extractor &E) : Stream(Stream), E(E) {} - VarStreamArray(const VarStreamArray &Other) - : Stream(Other.Stream), E(Other.E) {} - Iterator begin(bool *HadError = nullptr) const { return Iterator(*this, E, HadError); } + bool valid() const { return Stream.valid(); } + Iterator end() const { return Iterator(E); } + bool empty() const { return Stream.getLength() == 0; } + + /// \brief given an offset into the array's underlying stream, return an + /// iterator to the record at that offset. This is considered unsafe + /// since the behavior is undefined if \p Offset does not refer to the + /// beginning of a valid record. + Iterator at(uint32_t Offset) const { + return Iterator(*this, E, Offset, nullptr); + } + const Extractor &getExtractor() const { return E; } + Extractor &getExtractor() { return E; } BinaryStreamRef getUnderlyingStream() const { return Stream; } + void setUnderlyingStream(BinaryStreamRef S) { Stream = S; } private: BinaryStreamRef Stream; @@ -126,8 +139,13 @@ class VarStreamArrayIterator public: VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, - bool *HadError = nullptr) - : IterRef(Array.Stream), Array(&Array), HadError(HadError), Extract(E) { + bool *HadError) + : VarStreamArrayIterator(Array, E, 0, HadError) {} + + VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, + uint32_t Offset, bool *HadError) + : IterRef(Array.Stream.drop_front(Offset)), Extract(E), + Array(&Array), AbsOffset(Offset), HadError(HadError) { if (IterRef.getLength() == 0) moveToEnd(); else { @@ -138,6 +156,7 @@ public: } } } + VarStreamArrayIterator() = default; explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} ~VarStreamArrayIterator() = default; @@ -162,10 +181,16 @@ public: return ThisValue; } + ValueType &operator*() { + assert(Array && !HasError); + return ThisValue; + } + IterType &operator+=(unsigned N) { for (unsigned I = 0; I < N; ++I) { // We are done with the current record, discard it so that we are // positioned at the next record. + AbsOffset += ThisLen; IterRef = IterRef.drop_front(ThisLen); if (IterRef.getLength() == 0) { // There is nothing after the current record, we must make this an end @@ -186,6 +211,9 @@ public: return *this; } + uint32_t offset() const { return AbsOffset; } + uint32_t getRecordLength() const { return ThisLen; } + private: void moveToEnd() { Array = nullptr; @@ -200,11 +228,12 @@ private: ValueType ThisValue; BinaryStreamRef IterRef; + Extractor Extract; const ArrayType *Array{nullptr}; uint32_t ThisLen{0}; + uint32_t AbsOffset{0}; bool HasError{false}; bool *HadError{nullptr}; - Extractor Extract; }; template class FixedStreamArrayIterator; @@ -218,6 +247,8 @@ template class FixedStreamArray { friend class FixedStreamArrayIterator; public: + typedef FixedStreamArrayIterator Iterator; + FixedStreamArray() = default; explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { assert(Stream.getLength() % sizeof(T) == 0); @@ -259,6 +290,12 @@ public: return FixedStreamArrayIterator(*this, size()); } + const T &front() const { return *begin(); } + const T &back() const { + FixedStreamArrayIterator I = end(); + return *(--I); + } + BinaryStreamRef getUnderlyingStream() const { return Stream; } private: @@ -268,7 +305,7 @@ private: template class FixedStreamArrayIterator : public iterator_facade_base, - std::random_access_iterator_tag, T> { + std::random_access_iterator_tag, const T> { public: FixedStreamArrayIterator(const FixedStreamArray &Array, uint32_t Index) @@ -282,6 +319,7 @@ public: } const T &operator*() const { return Array[Index]; } + const T &operator*() { return Array[Index]; } bool operator==(const FixedStreamArrayIterator &R) const { assert(Array == R.Array); @@ -294,7 +332,7 @@ public: } FixedStreamArrayIterator &operator-=(std::ptrdiff_t N) { - assert(Index >= N); + assert(std::ptrdiff_t(Index) >= N); Index -= N; return *this; } diff --git a/include/llvm/Support/BinaryStreamReader.h b/include/llvm/Support/BinaryStreamReader.h index d994fa0f49d0bad4c9c869fe60386283b14e84e0..738c042add3e3db5097c092c18faacdd4c68ea5c 100644 --- a/include/llvm/Support/BinaryStreamReader.h +++ b/include/llvm/Support/BinaryStreamReader.h @@ -14,9 +14,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/type_traits.h" #include @@ -31,7 +31,22 @@ namespace llvm { /// are overridable. class BinaryStreamReader { public: - explicit BinaryStreamReader(BinaryStreamRef Stream); + BinaryStreamReader() = default; + explicit BinaryStreamReader(BinaryStreamRef Ref); + explicit BinaryStreamReader(BinaryStream &Stream); + explicit BinaryStreamReader(ArrayRef Data, + llvm::support::endianness Endian); + explicit BinaryStreamReader(StringRef Data, llvm::support::endianness Endian); + + BinaryStreamReader(const BinaryStreamReader &Other) + : Stream(Other.Stream), Offset(Other.Offset) {} + + BinaryStreamReader &operator=(const BinaryStreamReader &Other) { + Stream = Other.Stream; + Offset = Other.Offset; + return *this; + } + virtual ~BinaryStreamReader() {} /// Read as much as possible from the underlying string at the current offset @@ -90,6 +105,13 @@ public: /// returns an appropriate error code. Error readCString(StringRef &Dest); + /// Similar to readCString, however read a null-terminated UTF16 string + /// instead. + /// + /// \returns a success error code if the data was successfully read, otherwise + /// returns an appropriate error code. + Error readWideString(ArrayRef &Dest); + /// Read a \p Length byte string into \p Dest. Whether a copy occurs depends /// on the implementation of the underlying stream. Updates the stream's /// offset to point after the newly read data. @@ -176,7 +198,7 @@ public: BinaryStreamRef S; if (auto EC = readStreamRef(S, Size)) return EC; - Array = VarStreamArray(S, Array.getExtractor()); + Array.setUnderlyingStream(S); return Error::success(); } @@ -225,9 +247,14 @@ public: /// \returns the next byte in the stream. uint8_t peek() const; + Error padToAlignment(uint32_t Align); + + std::pair + split(uint32_t Offset) const; + private: BinaryStreamRef Stream; - uint32_t Offset; + uint32_t Offset = 0; }; } // namespace llvm diff --git a/include/llvm/Support/BinaryStreamRef.h b/include/llvm/Support/BinaryStreamRef.h index 23ce02fd7ca41f5f1f83051e79dfecd161724285..e3bd4bf0860e771316b37d7083f514098158361d 100644 --- a/include/llvm/Support/BinaryStreamRef.h +++ b/include/llvm/Support/BinaryStreamRef.h @@ -16,36 +16,74 @@ #include "llvm/Support/Error.h" #include #include +#include namespace llvm { /// Common stuff for mutable and immutable StreamRefs. -template class BinaryStreamRefBase { -public: - BinaryStreamRefBase() : Stream(nullptr), ViewOffset(0), Length(0) {} - BinaryStreamRefBase(StreamType &Stream, uint32_t Offset, uint32_t Length) - : Stream(&Stream), ViewOffset(Offset), Length(Length) {} +template class BinaryStreamRefBase { +protected: + BinaryStreamRefBase() = default; + BinaryStreamRefBase(std::shared_ptr SharedImpl, uint32_t Offset, + uint32_t Length) + : SharedImpl(SharedImpl), BorrowedImpl(SharedImpl.get()), + ViewOffset(Offset), Length(Length) {} + BinaryStreamRefBase(StreamType &BorrowedImpl, uint32_t Offset, + uint32_t Length) + : BorrowedImpl(&BorrowedImpl), ViewOffset(Offset), Length(Length) {} + BinaryStreamRefBase(const BinaryStreamRefBase &Other) { + SharedImpl = Other.SharedImpl; + BorrowedImpl = Other.BorrowedImpl; + ViewOffset = Other.ViewOffset; + Length = Other.Length; + } - llvm::support::endianness getEndian() const { return Stream->getEndian(); } +public: + llvm::support::endianness getEndian() const { + return BorrowedImpl->getEndian(); + } uint32_t getLength() const { return Length; } - const StreamType *getStream() const { return Stream; } /// Return a new BinaryStreamRef with the first \p N elements removed. RefType drop_front(uint32_t N) const { - if (!Stream) + if (!BorrowedImpl) return RefType(); N = std::min(N, Length); - return RefType(*Stream, ViewOffset + N, Length - N); + RefType Result(static_cast(*this)); + Result.ViewOffset += N; + Result.Length -= N; + return Result; } - /// Return a new BinaryStreamRef with only the first \p N elements remaining. - RefType keep_front(uint32_t N) const { - if (!Stream) + /// Return a new BinaryStreamRef with the first \p N elements removed. + RefType drop_back(uint32_t N) const { + if (!BorrowedImpl) return RefType(); + N = std::min(N, Length); - return RefType(*Stream, ViewOffset, N); + RefType Result(static_cast(*this)); + Result.Length -= N; + return Result; + } + + /// Return a new BinaryStreamRef with only the first \p N elements remaining. + RefType keep_front(uint32_t N) const { + assert(N <= getLength()); + return drop_back(getLength() - N); + } + + /// Return a new BinaryStreamRef with only the last \p N elements remaining. + RefType keep_back(uint32_t N) const { + assert(N <= getLength()); + return drop_front(getLength() - N); + } + + /// Return a new BinaryStreamRef with the first and last \p N elements + /// removed. + RefType drop_symmetric(uint32_t N) const { + return drop_front(N).drop_back(N); } /// Return a new BinaryStreamRef with the first \p Offset elements removed, @@ -54,8 +92,10 @@ public: return drop_front(Offset).keep_front(Len); } + bool valid() const { return BorrowedImpl != nullptr; } + bool operator==(const RefType &Other) const { - if (Stream != Other.Stream) + if (BorrowedImpl != Other.BorrowedImpl) return false; if (ViewOffset != Other.ViewOffset) return false; @@ -73,9 +113,10 @@ protected: return Error::success(); } - StreamType *Stream; - uint32_t ViewOffset; - uint32_t Length; + std::shared_ptr SharedImpl; + StreamType *BorrowedImpl = nullptr; + uint32_t ViewOffset = 0; + uint32_t Length = 0; }; /// \brief BinaryStreamRef is to BinaryStream what ArrayRef is to an Array. It @@ -86,13 +127,22 @@ protected: /// and use inheritance to achieve polymorphism. Instead, you should pass /// around BinaryStreamRefs by value and achieve polymorphism that way. class BinaryStreamRef - : public BinaryStreamRefBase { + : public BinaryStreamRefBase { + friend BinaryStreamRefBase; + friend class WritableBinaryStreamRef; + BinaryStreamRef(std::shared_ptr Impl, uint32_t ViewOffset, + uint32_t Length) + : BinaryStreamRefBase(Impl, ViewOffset, Length) {} + public: BinaryStreamRef() = default; - BinaryStreamRef(BinaryStream &Stream) - : BinaryStreamRefBase(Stream, 0, Stream.getLength()) {} - BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, uint32_t Length) - : BinaryStreamRefBase(Stream, Offset, Length) {} + BinaryStreamRef(BinaryStream &Stream); + BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, uint32_t Length); + explicit BinaryStreamRef(ArrayRef Data, + llvm::support::endianness Endian); + explicit BinaryStreamRef(StringRef Data, llvm::support::endianness Endian); + + BinaryStreamRef(const BinaryStreamRef &Other); // Use BinaryStreamRef.slice() instead. BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset, @@ -105,12 +155,7 @@ public: /// bounds of this BinaryStreamRef's view and the implementation could read /// the data, and an appropriate error code otherwise. Error readBytes(uint32_t Offset, uint32_t Size, - ArrayRef &Buffer) const { - if (auto EC = checkOffset(Offset, Size)) - return EC; - - return Stream->readBytes(ViewOffset + Offset, Size, Buffer); - } + ArrayRef &Buffer) const; /// Given an Offset into this BinaryStreamRef, return a reference to the /// largest buffer the stream could support without necessitating a copy. @@ -118,33 +163,25 @@ public: /// \returns a success error code if implementation could read the data, /// and an appropriate error code otherwise. Error readLongestContiguousChunk(uint32_t Offset, - ArrayRef &Buffer) const { - if (auto EC = checkOffset(Offset, 1)) - return EC; - - if (auto EC = - Stream->readLongestContiguousChunk(ViewOffset + Offset, Buffer)) - return EC; - // This StreamRef might refer to a smaller window over a larger stream. In - // that case we will have read out more bytes than we should return, because - // we should not read past the end of the current view. - uint32_t MaxLength = Length - Offset; - if (Buffer.size() > MaxLength) - Buffer = Buffer.slice(0, MaxLength); - return Error::success(); - } + ArrayRef &Buffer) const; }; class WritableBinaryStreamRef - : public BinaryStreamRefBase { + : public BinaryStreamRefBase { + friend BinaryStreamRefBase; + WritableBinaryStreamRef(std::shared_ptr Impl, + uint32_t ViewOffset, uint32_t Length) + : BinaryStreamRefBase(Impl, ViewOffset, Length) {} + public: WritableBinaryStreamRef() = default; - WritableBinaryStreamRef(WritableBinaryStream &Stream) - : BinaryStreamRefBase(Stream, 0, Stream.getLength()) {} + WritableBinaryStreamRef(WritableBinaryStream &Stream); WritableBinaryStreamRef(WritableBinaryStream &Stream, uint32_t Offset, - uint32_t Length) - : BinaryStreamRefBase(Stream, Offset, Length) {} + uint32_t Length); + explicit WritableBinaryStreamRef(MutableArrayRef Data, + llvm::support::endianness Endian); + WritableBinaryStreamRef(const WritableBinaryStreamRef &Other); // Use WritableBinaryStreamRef.slice() instead. WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint32_t Offset, @@ -156,17 +193,13 @@ public: /// \returns a success error code if the data could fit within the underlying /// stream at the specified location and the implementation could write the /// data, and an appropriate error code otherwise. - Error writeBytes(uint32_t Offset, ArrayRef Data) const { - if (auto EC = checkOffset(Offset, Data.size())) - return EC; - - return Stream->writeBytes(ViewOffset + Offset, Data); - } + Error writeBytes(uint32_t Offset, ArrayRef Data) const; - operator BinaryStreamRef() { return BinaryStreamRef(*Stream); } + /// Conver this WritableBinaryStreamRef to a read-only BinaryStreamRef. + operator BinaryStreamRef() const; /// \brief For buffered streams, commits changes to the backing store. - Error commit() { return Stream->commit(); } + Error commit(); }; } // end namespace llvm diff --git a/include/llvm/Support/BinaryStreamWriter.h b/include/llvm/Support/BinaryStreamWriter.h index 64f26b24543df3a545991993e1b8a9b8af54e176..a4495a1ce27d4ca98f9ffbc8bf8825eed42bc561 100644 --- a/include/llvm/Support/BinaryStreamWriter.h +++ b/include/llvm/Support/BinaryStreamWriter.h @@ -20,6 +20,7 @@ #include "llvm/Support/Error.h" #include #include +#include namespace llvm { @@ -31,7 +32,20 @@ namespace llvm { class BinaryStreamWriter { public: BinaryStreamWriter() = default; - explicit BinaryStreamWriter(WritableBinaryStreamRef Stream); + explicit BinaryStreamWriter(WritableBinaryStreamRef Ref); + explicit BinaryStreamWriter(WritableBinaryStream &Stream); + explicit BinaryStreamWriter(MutableArrayRef Data, + llvm::support::endianness Endian); + + BinaryStreamWriter(const BinaryStreamWriter &Other) + : Stream(Other.Stream), Offset(Other.Offset) {} + + BinaryStreamWriter &operator=(const BinaryStreamWriter &Other) { + Stream = Other.Stream; + Offset = Other.Offset; + return *this; + } + virtual ~BinaryStreamWriter() {} /// Write the bytes specified in \p Buffer to the underlying stream. @@ -150,6 +164,9 @@ public: return writeStreamRef(Array.getUnderlyingStream()); } + /// Splits the Writer into two Writers at a given offset. + std::pair split(uint32_t Off) const; + void setOffset(uint32_t Off) { Offset = Off; } uint32_t getOffset() const { return Offset; } uint32_t getLength() const { return Stream.getLength(); } diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h index e8eb50d53eb605ab3ab0de071538b5f7df184683..b403d7fbf117dbef47bd89993ade8cd8c0000704 100644 --- a/include/llvm/Support/BranchProbability.h +++ b/include/llvm/Support/BranchProbability.h @@ -112,6 +112,13 @@ public: return *this; } + BranchProbability &operator*=(uint32_t RHS) { + assert(N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + N = (uint64_t(N) * RHS > D) ? D : N * RHS; + return *this; + } + BranchProbability &operator/=(uint32_t RHS) { assert(N != UnknownN && "Unknown probability cannot participate in arithmetics."); @@ -135,6 +142,11 @@ public: return Prob *= RHS; } + BranchProbability operator*(uint32_t RHS) const { + BranchProbability Prob(*this); + return Prob *= RHS; + } + BranchProbability operator/(uint32_t RHS) const { BranchProbability Prob(*this); return Prob /= RHS; diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h index d4633aa7d3c6cebc1431616b282374978a339ce3..f60f99d376ad433163ed8904d62bb1d421e8bc36 100644 --- a/include/llvm/Support/CBindingWrapping.h +++ b/include/llvm/Support/CBindingWrapping.h @@ -14,8 +14,8 @@ #ifndef LLVM_SUPPORT_CBINDINGWRAPPING_H #define LLVM_SUPPORT_CBINDINGWRAPPING_H -#include "llvm/Support/Casting.h" #include "llvm-c/Types.h" +#include "llvm/Support/Casting.h" #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref) \ inline ty *unwrap(ref P) { \ diff --git a/include/llvm/Support/CMakeLists.txt b/include/llvm/Support/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c58ccf216303c16ef0cdf1f5554cb4ff7d95ac29 --- /dev/null +++ b/include/llvm/Support/CMakeLists.txt @@ -0,0 +1,59 @@ +# Figure out if we can track VC revisions. +function(find_first_existing_file out_var) + foreach(file ${ARGN}) + if(EXISTS "${file}") + set(${out_var} "${file}" PARENT_SCOPE) + return() + endif() + endforeach() +endfunction() + +macro(find_first_existing_vc_file out_var path) + find_program(git_executable NAMES git git.exe git.cmd) + # Run from a subdirectory to force git to print an absolute path. + execute_process(COMMAND ${git_executable} rev-parse --git-dir + WORKING_DIRECTORY ${path}/cmake + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_dir + ERROR_QUIET) + if(git_result EQUAL 0) + string(STRIP "${git_dir}" git_dir) + set(${out_var} "${git_dir}/logs/HEAD") + # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD + if (NOT EXISTS "${git_dir}/logs/HEAD") + file(WRITE "${git_dir}/logs/HEAD" "") + endif() + else() + find_first_existing_file(${out_var} + "${path}/.svn/wc.db" # SVN 1.7 + "${path}/.svn/entries" # SVN 1.6 + ) + endif() +endmacro() + +find_first_existing_vc_file(llvm_vc "${LLVM_MAIN_SRC_DIR}") + +# The VC revision include that we want to generate. +set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/VCSRevision.h") + +set(get_svn_script "${LLVM_CMAKE_PATH}/GenerateVersionFromCVS.cmake") + +if(DEFINED llvm_vc) + # Create custom target to generate the VC revision include. + add_custom_command(OUTPUT "${version_inc}" + DEPENDS "${llvm_vc}" "${get_svn_script}" + COMMAND + ${CMAKE_COMMAND} "-DSOURCE_DIR=${LLVM_MAIN_SRC_DIR}" + "-DNAME=LLVM_REVISION" + "-DHEADER_FILE=${version_inc}" + -P "${get_svn_script}") + + # Mark the generated header as being generated. + set_source_files_properties("${version_inc}" + PROPERTIES GENERATED TRUE + HEADER_FILE_ONLY TRUE) +else() + file(WRITE "${version_inc}" "") +endif() + +add_custom_target(llvm_vcsrevision_h DEPENDS "${version_inc}") diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h deleted file mode 100644 index 19223306bd07ec7687a2f6bc27c6150f0491841b..0000000000000000000000000000000000000000 --- a/include/llvm/Support/COFF.h +++ /dev/null @@ -1,680 +0,0 @@ -//===-- llvm/Support/COFF.h -------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains an definitions used in Windows COFF Files. -// -// Structures and enums defined within this file where created using -// information from Microsoft's publicly available PE/COFF format document: -// -// Microsoft Portable Executable and Common Object File Format Specification -// Revision 8.1 - February 15, 2008 -// -// As of 5/2/2010, hosted by Microsoft at: -// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_COFF_H -#define LLVM_SUPPORT_COFF_H - -#include "llvm/Support/DataTypes.h" -#include -#include - -namespace llvm { -namespace COFF { - - // The maximum number of sections that a COFF object can have (inclusive). - const int32_t MaxNumberOfSections16 = 65279; - - // The PE signature bytes that follows the DOS stub header. - static const char PEMagic[] = { 'P', 'E', '\0', '\0' }; - - static const char BigObjMagic[] = { - '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b', - '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8', - }; - - static const char ClGlObjMagic[] = { - '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d', - '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2', - }; - - // Sizes in bytes of various things in the COFF format. - enum { - Header16Size = 20, - Header32Size = 56, - NameSize = 8, - Symbol16Size = 18, - Symbol32Size = 20, - SectionSize = 40, - RelocationSize = 10 - }; - - struct header { - uint16_t Machine; - int32_t NumberOfSections; - uint32_t TimeDateStamp; - uint32_t PointerToSymbolTable; - uint32_t NumberOfSymbols; - uint16_t SizeOfOptionalHeader; - uint16_t Characteristics; - }; - - struct BigObjHeader { - enum : uint16_t { MinBigObjectVersion = 2 }; - - uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). - uint16_t Sig2; ///< Must be 0xFFFF. - uint16_t Version; - uint16_t Machine; - uint32_t TimeDateStamp; - uint8_t UUID[16]; - uint32_t unused1; - uint32_t unused2; - uint32_t unused3; - uint32_t unused4; - uint32_t NumberOfSections; - uint32_t PointerToSymbolTable; - uint32_t NumberOfSymbols; - }; - - enum MachineTypes { - MT_Invalid = 0xffff, - - IMAGE_FILE_MACHINE_UNKNOWN = 0x0, - IMAGE_FILE_MACHINE_AM33 = 0x13, - IMAGE_FILE_MACHINE_AMD64 = 0x8664, - IMAGE_FILE_MACHINE_ARM = 0x1C0, - IMAGE_FILE_MACHINE_ARMNT = 0x1C4, - IMAGE_FILE_MACHINE_ARM64 = 0xAA64, - IMAGE_FILE_MACHINE_EBC = 0xEBC, - IMAGE_FILE_MACHINE_I386 = 0x14C, - IMAGE_FILE_MACHINE_IA64 = 0x200, - IMAGE_FILE_MACHINE_M32R = 0x9041, - IMAGE_FILE_MACHINE_MIPS16 = 0x266, - IMAGE_FILE_MACHINE_MIPSFPU = 0x366, - IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466, - IMAGE_FILE_MACHINE_POWERPC = 0x1F0, - IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1, - IMAGE_FILE_MACHINE_R4000 = 0x166, - IMAGE_FILE_MACHINE_SH3 = 0x1A2, - IMAGE_FILE_MACHINE_SH3DSP = 0x1A3, - IMAGE_FILE_MACHINE_SH4 = 0x1A6, - IMAGE_FILE_MACHINE_SH5 = 0x1A8, - IMAGE_FILE_MACHINE_THUMB = 0x1C2, - IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169 - }; - - enum Characteristics { - C_Invalid = 0, - - /// The file does not contain base relocations and must be loaded at its - /// preferred base. If this cannot be done, the loader will error. - IMAGE_FILE_RELOCS_STRIPPED = 0x0001, - /// The file is valid and can be run. - IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002, - /// COFF line numbers have been stripped. This is deprecated and should be - /// 0. - IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004, - /// COFF symbol table entries for local symbols have been removed. This is - /// deprecated and should be 0. - IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008, - /// Aggressively trim working set. This is deprecated and must be 0. - IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010, - /// Image can handle > 2GiB addresses. - IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020, - /// Little endian: the LSB precedes the MSB in memory. This is deprecated - /// and should be 0. - IMAGE_FILE_BYTES_REVERSED_LO = 0x0080, - /// Machine is based on a 32bit word architecture. - IMAGE_FILE_32BIT_MACHINE = 0x0100, - /// Debugging info has been removed. - IMAGE_FILE_DEBUG_STRIPPED = 0x0200, - /// If the image is on removable media, fully load it and copy it to swap. - IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400, - /// If the image is on network media, fully load it and copy it to swap. - IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800, - /// The image file is a system file, not a user program. - IMAGE_FILE_SYSTEM = 0x1000, - /// The image file is a DLL. - IMAGE_FILE_DLL = 0x2000, - /// This file should only be run on a uniprocessor machine. - IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000, - /// Big endian: the MSB precedes the LSB in memory. This is deprecated - /// and should be 0. - IMAGE_FILE_BYTES_REVERSED_HI = 0x8000 - }; - - struct symbol { - char Name[NameSize]; - uint32_t Value; - int32_t SectionNumber; - uint16_t Type; - uint8_t StorageClass; - uint8_t NumberOfAuxSymbols; - }; - - enum SymbolSectionNumber : int32_t { - IMAGE_SYM_DEBUG = -2, - IMAGE_SYM_ABSOLUTE = -1, - IMAGE_SYM_UNDEFINED = 0 - }; - - /// Storage class tells where and what the symbol represents - enum SymbolStorageClass { - SSC_Invalid = 0xff, - - IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, ///< Physical end of function - IMAGE_SYM_CLASS_NULL = 0, ///< No symbol - IMAGE_SYM_CLASS_AUTOMATIC = 1, ///< Stack variable - IMAGE_SYM_CLASS_EXTERNAL = 2, ///< External symbol - IMAGE_SYM_CLASS_STATIC = 3, ///< Static - IMAGE_SYM_CLASS_REGISTER = 4, ///< Register variable - IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, ///< External definition - IMAGE_SYM_CLASS_LABEL = 6, ///< Label - IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, ///< Undefined label - IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, ///< Member of structure - IMAGE_SYM_CLASS_ARGUMENT = 9, ///< Function argument - IMAGE_SYM_CLASS_STRUCT_TAG = 10, ///< Structure tag - IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, ///< Member of union - IMAGE_SYM_CLASS_UNION_TAG = 12, ///< Union tag - IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, ///< Type definition - IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, ///< Undefined static - IMAGE_SYM_CLASS_ENUM_TAG = 15, ///< Enumeration tag - IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, ///< Member of enumeration - IMAGE_SYM_CLASS_REGISTER_PARAM = 17, ///< Register parameter - IMAGE_SYM_CLASS_BIT_FIELD = 18, ///< Bit field - /// ".bb" or ".eb" - beginning or end of block - IMAGE_SYM_CLASS_BLOCK = 100, - /// ".bf" or ".ef" - beginning or end of function - IMAGE_SYM_CLASS_FUNCTION = 101, - IMAGE_SYM_CLASS_END_OF_STRUCT = 102, ///< End of structure - IMAGE_SYM_CLASS_FILE = 103, ///< File name - /// Line number, reformatted as symbol - IMAGE_SYM_CLASS_SECTION = 104, - IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, ///< Duplicate tag - /// External symbol in dmert public lib - IMAGE_SYM_CLASS_CLR_TOKEN = 107 - }; - - enum SymbolBaseType { - IMAGE_SYM_TYPE_NULL = 0, ///< No type information or unknown base type. - IMAGE_SYM_TYPE_VOID = 1, ///< Used with void pointers and functions. - IMAGE_SYM_TYPE_CHAR = 2, ///< A character (signed byte). - IMAGE_SYM_TYPE_SHORT = 3, ///< A 2-byte signed integer. - IMAGE_SYM_TYPE_INT = 4, ///< A natural integer type on the target. - IMAGE_SYM_TYPE_LONG = 5, ///< A 4-byte signed integer. - IMAGE_SYM_TYPE_FLOAT = 6, ///< A 4-byte floating-point number. - IMAGE_SYM_TYPE_DOUBLE = 7, ///< An 8-byte floating-point number. - IMAGE_SYM_TYPE_STRUCT = 8, ///< A structure. - IMAGE_SYM_TYPE_UNION = 9, ///< An union. - IMAGE_SYM_TYPE_ENUM = 10, ///< An enumerated type. - IMAGE_SYM_TYPE_MOE = 11, ///< A member of enumeration (a specific value). - IMAGE_SYM_TYPE_BYTE = 12, ///< A byte; unsigned 1-byte integer. - IMAGE_SYM_TYPE_WORD = 13, ///< A word; unsigned 2-byte integer. - IMAGE_SYM_TYPE_UINT = 14, ///< An unsigned integer of natural size. - IMAGE_SYM_TYPE_DWORD = 15 ///< An unsigned 4-byte integer. - }; - - enum SymbolComplexType { - IMAGE_SYM_DTYPE_NULL = 0, ///< No complex type; simple scalar variable. - IMAGE_SYM_DTYPE_POINTER = 1, ///< A pointer to base type. - IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type. - IMAGE_SYM_DTYPE_ARRAY = 3, ///< An array of base type. - - /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT)) - SCT_COMPLEX_TYPE_SHIFT = 4 - }; - - enum AuxSymbolType { - IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1 - }; - - struct section { - char Name[NameSize]; - uint32_t VirtualSize; - uint32_t VirtualAddress; - uint32_t SizeOfRawData; - uint32_t PointerToRawData; - uint32_t PointerToRelocations; - uint32_t PointerToLineNumbers; - uint16_t NumberOfRelocations; - uint16_t NumberOfLineNumbers; - uint32_t Characteristics; - }; - - enum SectionCharacteristics : uint32_t { - SC_Invalid = 0xffffffff, - - IMAGE_SCN_TYPE_NOLOAD = 0x00000002, - IMAGE_SCN_TYPE_NO_PAD = 0x00000008, - IMAGE_SCN_CNT_CODE = 0x00000020, - IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, - IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, - IMAGE_SCN_LNK_OTHER = 0x00000100, - IMAGE_SCN_LNK_INFO = 0x00000200, - IMAGE_SCN_LNK_REMOVE = 0x00000800, - IMAGE_SCN_LNK_COMDAT = 0x00001000, - IMAGE_SCN_GPREL = 0x00008000, - IMAGE_SCN_MEM_PURGEABLE = 0x00020000, - IMAGE_SCN_MEM_16BIT = 0x00020000, - IMAGE_SCN_MEM_LOCKED = 0x00040000, - IMAGE_SCN_MEM_PRELOAD = 0x00080000, - IMAGE_SCN_ALIGN_1BYTES = 0x00100000, - IMAGE_SCN_ALIGN_2BYTES = 0x00200000, - IMAGE_SCN_ALIGN_4BYTES = 0x00300000, - IMAGE_SCN_ALIGN_8BYTES = 0x00400000, - IMAGE_SCN_ALIGN_16BYTES = 0x00500000, - IMAGE_SCN_ALIGN_32BYTES = 0x00600000, - IMAGE_SCN_ALIGN_64BYTES = 0x00700000, - IMAGE_SCN_ALIGN_128BYTES = 0x00800000, - IMAGE_SCN_ALIGN_256BYTES = 0x00900000, - IMAGE_SCN_ALIGN_512BYTES = 0x00A00000, - IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000, - IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, - IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, - IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, - IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, - IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, - IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, - IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, - IMAGE_SCN_MEM_SHARED = 0x10000000, - IMAGE_SCN_MEM_EXECUTE = 0x20000000, - IMAGE_SCN_MEM_READ = 0x40000000, - IMAGE_SCN_MEM_WRITE = 0x80000000 - }; - - struct relocation { - uint32_t VirtualAddress; - uint32_t SymbolTableIndex; - uint16_t Type; - }; - - enum RelocationTypeI386 { - IMAGE_REL_I386_ABSOLUTE = 0x0000, - IMAGE_REL_I386_DIR16 = 0x0001, - IMAGE_REL_I386_REL16 = 0x0002, - IMAGE_REL_I386_DIR32 = 0x0006, - IMAGE_REL_I386_DIR32NB = 0x0007, - IMAGE_REL_I386_SEG12 = 0x0009, - IMAGE_REL_I386_SECTION = 0x000A, - IMAGE_REL_I386_SECREL = 0x000B, - IMAGE_REL_I386_TOKEN = 0x000C, - IMAGE_REL_I386_SECREL7 = 0x000D, - IMAGE_REL_I386_REL32 = 0x0014 - }; - - enum RelocationTypeAMD64 { - IMAGE_REL_AMD64_ABSOLUTE = 0x0000, - IMAGE_REL_AMD64_ADDR64 = 0x0001, - IMAGE_REL_AMD64_ADDR32 = 0x0002, - IMAGE_REL_AMD64_ADDR32NB = 0x0003, - IMAGE_REL_AMD64_REL32 = 0x0004, - IMAGE_REL_AMD64_REL32_1 = 0x0005, - IMAGE_REL_AMD64_REL32_2 = 0x0006, - IMAGE_REL_AMD64_REL32_3 = 0x0007, - IMAGE_REL_AMD64_REL32_4 = 0x0008, - IMAGE_REL_AMD64_REL32_5 = 0x0009, - IMAGE_REL_AMD64_SECTION = 0x000A, - IMAGE_REL_AMD64_SECREL = 0x000B, - IMAGE_REL_AMD64_SECREL7 = 0x000C, - IMAGE_REL_AMD64_TOKEN = 0x000D, - IMAGE_REL_AMD64_SREL32 = 0x000E, - IMAGE_REL_AMD64_PAIR = 0x000F, - IMAGE_REL_AMD64_SSPAN32 = 0x0010 - }; - - enum RelocationTypesARM { - IMAGE_REL_ARM_ABSOLUTE = 0x0000, - IMAGE_REL_ARM_ADDR32 = 0x0001, - IMAGE_REL_ARM_ADDR32NB = 0x0002, - IMAGE_REL_ARM_BRANCH24 = 0x0003, - IMAGE_REL_ARM_BRANCH11 = 0x0004, - IMAGE_REL_ARM_TOKEN = 0x0005, - IMAGE_REL_ARM_BLX24 = 0x0008, - IMAGE_REL_ARM_BLX11 = 0x0009, - IMAGE_REL_ARM_SECTION = 0x000E, - IMAGE_REL_ARM_SECREL = 0x000F, - IMAGE_REL_ARM_MOV32A = 0x0010, - IMAGE_REL_ARM_MOV32T = 0x0011, - IMAGE_REL_ARM_BRANCH20T = 0x0012, - IMAGE_REL_ARM_BRANCH24T = 0x0014, - IMAGE_REL_ARM_BLX23T = 0x0015 - }; - - enum COMDATType { - IMAGE_COMDAT_SELECT_NODUPLICATES = 1, - IMAGE_COMDAT_SELECT_ANY, - IMAGE_COMDAT_SELECT_SAME_SIZE, - IMAGE_COMDAT_SELECT_EXACT_MATCH, - IMAGE_COMDAT_SELECT_ASSOCIATIVE, - IMAGE_COMDAT_SELECT_LARGEST, - IMAGE_COMDAT_SELECT_NEWEST - }; - - // Auxiliary Symbol Formats - struct AuxiliaryFunctionDefinition { - uint32_t TagIndex; - uint32_t TotalSize; - uint32_t PointerToLinenumber; - uint32_t PointerToNextFunction; - char unused[2]; - }; - - struct AuxiliarybfAndefSymbol { - uint8_t unused1[4]; - uint16_t Linenumber; - uint8_t unused2[6]; - uint32_t PointerToNextFunction; - uint8_t unused3[2]; - }; - - struct AuxiliaryWeakExternal { - uint32_t TagIndex; - uint32_t Characteristics; - uint8_t unused[10]; - }; - - enum WeakExternalCharacteristics { - IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1, - IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2, - IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3 - }; - - struct AuxiliarySectionDefinition { - uint32_t Length; - uint16_t NumberOfRelocations; - uint16_t NumberOfLinenumbers; - uint32_t CheckSum; - uint32_t Number; - uint8_t Selection; - char unused; - }; - - struct AuxiliaryCLRToken { - uint8_t AuxType; - uint8_t unused1; - uint32_t SymbolTableIndex; - char unused2[12]; - }; - - union Auxiliary { - AuxiliaryFunctionDefinition FunctionDefinition; - AuxiliarybfAndefSymbol bfAndefSymbol; - AuxiliaryWeakExternal WeakExternal; - AuxiliarySectionDefinition SectionDefinition; - }; - - /// @brief The Import Directory Table. - /// - /// There is a single array of these and one entry per imported DLL. - struct ImportDirectoryTableEntry { - uint32_t ImportLookupTableRVA; - uint32_t TimeDateStamp; - uint32_t ForwarderChain; - uint32_t NameRVA; - uint32_t ImportAddressTableRVA; - }; - - /// @brief The PE32 Import Lookup Table. - /// - /// There is an array of these for each imported DLL. It represents either - /// the ordinal to import from the target DLL, or a name to lookup and import - /// from the target DLL. - /// - /// This also happens to be the same format used by the Import Address Table - /// when it is initially written out to the image. - struct ImportLookupTableEntry32 { - uint32_t data; - - /// @brief Is this entry specified by ordinal, or name? - bool isOrdinal() const { return data & 0x80000000; } - - /// @brief Get the ordinal value of this entry. isOrdinal must be true. - uint16_t getOrdinal() const { - assert(isOrdinal() && "ILT entry is not an ordinal!"); - return data & 0xFFFF; - } - - /// @brief Set the ordinal value and set isOrdinal to true. - void setOrdinal(uint16_t o) { - data = o; - data |= 0x80000000; - } - - /// @brief Get the Hint/Name entry RVA. isOrdinal must be false. - uint32_t getHintNameRVA() const { - assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!"); - return data; - } - - /// @brief Set the Hint/Name entry RVA and set isOrdinal to false. - void setHintNameRVA(uint32_t rva) { data = rva; } - }; - - /// @brief The DOS compatible header at the front of all PEs. - struct DOSHeader { - uint16_t Magic; - uint16_t UsedBytesInTheLastPage; - uint16_t FileSizeInPages; - uint16_t NumberOfRelocationItems; - uint16_t HeaderSizeInParagraphs; - uint16_t MinimumExtraParagraphs; - uint16_t MaximumExtraParagraphs; - uint16_t InitialRelativeSS; - uint16_t InitialSP; - uint16_t Checksum; - uint16_t InitialIP; - uint16_t InitialRelativeCS; - uint16_t AddressOfRelocationTable; - uint16_t OverlayNumber; - uint16_t Reserved[4]; - uint16_t OEMid; - uint16_t OEMinfo; - uint16_t Reserved2[10]; - uint32_t AddressOfNewExeHeader; - }; - - struct PE32Header { - enum { - PE32 = 0x10b, - PE32_PLUS = 0x20b - }; - - uint16_t Magic; - uint8_t MajorLinkerVersion; - uint8_t MinorLinkerVersion; - uint32_t SizeOfCode; - uint32_t SizeOfInitializedData; - uint32_t SizeOfUninitializedData; - uint32_t AddressOfEntryPoint; // RVA - uint32_t BaseOfCode; // RVA - uint32_t BaseOfData; // RVA - uint32_t ImageBase; - uint32_t SectionAlignment; - uint32_t FileAlignment; - uint16_t MajorOperatingSystemVersion; - uint16_t MinorOperatingSystemVersion; - uint16_t MajorImageVersion; - uint16_t MinorImageVersion; - uint16_t MajorSubsystemVersion; - uint16_t MinorSubsystemVersion; - uint32_t Win32VersionValue; - uint32_t SizeOfImage; - uint32_t SizeOfHeaders; - uint32_t CheckSum; - uint16_t Subsystem; - // FIXME: This should be DllCharacteristics to match the COFF spec. - uint16_t DLLCharacteristics; - uint32_t SizeOfStackReserve; - uint32_t SizeOfStackCommit; - uint32_t SizeOfHeapReserve; - uint32_t SizeOfHeapCommit; - uint32_t LoaderFlags; - // FIXME: This should be NumberOfRvaAndSizes to match the COFF spec. - uint32_t NumberOfRvaAndSize; - }; - - struct DataDirectory { - uint32_t RelativeVirtualAddress; - uint32_t Size; - }; - - enum DataDirectoryIndex { - EXPORT_TABLE = 0, - IMPORT_TABLE, - RESOURCE_TABLE, - EXCEPTION_TABLE, - CERTIFICATE_TABLE, - BASE_RELOCATION_TABLE, - DEBUG_DIRECTORY, - ARCHITECTURE, - GLOBAL_PTR, - TLS_TABLE, - LOAD_CONFIG_TABLE, - BOUND_IMPORT, - IAT, - DELAY_IMPORT_DESCRIPTOR, - CLR_RUNTIME_HEADER, - - NUM_DATA_DIRECTORIES - }; - - enum WindowsSubsystem { - IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem. - IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes - IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem. - IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem. - IMAGE_SUBSYSTEM_OS2_CUI = 5, ///< The OS/2 character subsytem. - IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem. - IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, ///< Native Windows 9x driver. - IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE. - IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application. - IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot - /// services. - IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time - /// services. - IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image. - IMAGE_SUBSYSTEM_XBOX = 14, ///< XBOX. - IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application. - }; - - enum DLLCharacteristics { - /// ASLR with 64 bit address space. - IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020, - /// DLL can be relocated at load time. - IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040, - /// Code integrity checks are enforced. - IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080, - ///< Image is NX compatible. - IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100, - /// Isolation aware, but do not isolate the image. - IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200, - /// Does not use structured exception handling (SEH). No SEH handler may be - /// called in this image. - IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400, - /// Do not bind the image. - IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800, - ///< Image should execute in an AppContainer. - IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000, - ///< A WDM driver. - IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000, - ///< Image supports Control Flow Guard. - IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000, - /// Terminal Server aware. - IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000 - }; - - enum DebugType { - IMAGE_DEBUG_TYPE_UNKNOWN = 0, - IMAGE_DEBUG_TYPE_COFF = 1, - IMAGE_DEBUG_TYPE_CODEVIEW = 2, - IMAGE_DEBUG_TYPE_FPO = 3, - IMAGE_DEBUG_TYPE_MISC = 4, - IMAGE_DEBUG_TYPE_EXCEPTION = 5, - IMAGE_DEBUG_TYPE_FIXUP = 6, - IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7, - IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8, - IMAGE_DEBUG_TYPE_BORLAND = 9, - IMAGE_DEBUG_TYPE_RESERVED10 = 10, - IMAGE_DEBUG_TYPE_CLSID = 11, - IMAGE_DEBUG_TYPE_VC_FEATURE = 12, - IMAGE_DEBUG_TYPE_POGO = 13, - IMAGE_DEBUG_TYPE_ILTCG = 14, - IMAGE_DEBUG_TYPE_MPX = 15, - IMAGE_DEBUG_TYPE_REPRO = 16, - }; - - enum BaseRelocationType { - IMAGE_REL_BASED_ABSOLUTE = 0, - IMAGE_REL_BASED_HIGH = 1, - IMAGE_REL_BASED_LOW = 2, - IMAGE_REL_BASED_HIGHLOW = 3, - IMAGE_REL_BASED_HIGHADJ = 4, - IMAGE_REL_BASED_MIPS_JMPADDR = 5, - IMAGE_REL_BASED_ARM_MOV32A = 5, - IMAGE_REL_BASED_ARM_MOV32T = 7, - IMAGE_REL_BASED_MIPS_JMPADDR16 = 9, - IMAGE_REL_BASED_DIR64 = 10 - }; - - enum ImportType { - IMPORT_CODE = 0, - IMPORT_DATA = 1, - IMPORT_CONST = 2 - }; - - enum ImportNameType { - /// Import is by ordinal. This indicates that the value in the Ordinal/Hint - /// field of the import header is the import's ordinal. If this constant is - /// not specified, then the Ordinal/Hint field should always be interpreted - /// as the import's hint. - IMPORT_ORDINAL = 0, - /// The import name is identical to the public symbol name - IMPORT_NAME = 1, - /// The import name is the public symbol name, but skipping the leading ?, - /// @, or optionally _. - IMPORT_NAME_NOPREFIX = 2, - /// The import name is the public symbol name, but skipping the leading ?, - /// @, or optionally _, and truncating at the first @. - IMPORT_NAME_UNDECORATE = 3 - }; - - struct ImportHeader { - uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0). - uint16_t Sig2; ///< Must be 0xFFFF. - uint16_t Version; - uint16_t Machine; - uint32_t TimeDateStamp; - uint32_t SizeOfData; - uint16_t OrdinalHint; - uint16_t TypeInfo; - - ImportType getType() const { - return static_cast(TypeInfo & 0x3); - } - - ImportNameType getNameType() const { - return static_cast((TypeInfo & 0x1C) >> 2); - } - }; - - enum CodeViewIdentifiers { - DEBUG_SECTION_MAGIC = 0x4, - }; - - inline bool isReservedSectionNumber(int32_t SectionNumber) { - return SectionNumber <= 0; - } - -} // End namespace COFF. -} // End namespace llvm. - -#endif diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h index a73047b2b557fdf6381bcb457f468d6d76fe38ed..baa2a814e9a16b68038b1edc8178ca234f7f39ff 100644 --- a/include/llvm/Support/Casting.h +++ b/include/llvm/Support/Casting.h @@ -1,4 +1,4 @@ -//===-- llvm/Support/Casting.h - Allow flexible, checked, casts -*- C++ -*-===// +//===- llvm/Support/Casting.h - Allow flexible, checked, casts --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,6 +18,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/type_traits.h" #include +#include +#include namespace llvm { @@ -30,18 +32,19 @@ namespace llvm { // template selection process... the default implementation is a noop. // template struct simplify_type { - typedef From SimpleType; // The real type this represents... + using SimpleType = From; // The real type this represents... // An accessor to get the real value... static SimpleType &getSimplifiedValue(From &Val) { return Val; } }; template struct simplify_type { - typedef typename simplify_type::SimpleType NonConstSimpleType; - typedef typename add_const_past_pointer::type - SimpleType; - typedef typename add_lvalue_reference_if_not_pointer::type - RetType; + using NonConstSimpleType = typename simplify_type::SimpleType; + using SimpleType = + typename add_const_past_pointer::type; + using RetType = + typename add_lvalue_reference_if_not_pointer::type; + static RetType getSimplifiedValue(const From& Val) { return simplify_type::getSimplifiedValue(const_cast(Val)); } @@ -76,6 +79,14 @@ template struct isa_impl_cl { } }; +template +struct isa_impl_cl> { + static inline bool doit(const std::unique_ptr &Val) { + assert(Val && "isa<> used on a null pointer"); + return isa_impl_cl::doit(*Val); + } +}; + template struct isa_impl_cl { static inline bool doit(const From *Val) { assert(Val && "isa<> used on a null pointer"); @@ -139,47 +150,55 @@ template LLVM_NODISCARD inline bool isa(const Y &Val) { template struct cast_retty; - // Calculate what type the 'cast' function should return, based on a requested // type of To and a source type of From. template struct cast_retty_impl { - typedef To& ret_type; // Normal case, return Ty& + using ret_type = To &; // Normal case, return Ty& }; template struct cast_retty_impl { - typedef const To &ret_type; // Normal case, return Ty& + using ret_type = const To &; // Normal case, return Ty& }; template struct cast_retty_impl { - typedef To* ret_type; // Pointer arg case, return Ty* + using ret_type = To *; // Pointer arg case, return Ty* }; template struct cast_retty_impl { - typedef const To* ret_type; // Constant pointer arg case, return const Ty* + using ret_type = const To *; // Constant pointer arg case, return const Ty* }; template struct cast_retty_impl { - typedef const To* ret_type; // Constant pointer arg case, return const Ty* + using ret_type = const To *; // Constant pointer arg case, return const Ty* }; +template +struct cast_retty_impl> { +private: + using PointerType = typename cast_retty_impl::ret_type; + using ResultType = typename std::remove_pointer::type; + +public: + using ret_type = std::unique_ptr; +}; template struct cast_retty_wrap { // When the simplified type and the from type are not the same, use the type // simplifier to reduce the type, then reuse cast_retty_impl to get the // resultant type. - typedef typename cast_retty::ret_type ret_type; + using ret_type = typename cast_retty::ret_type; }; template struct cast_retty_wrap { // When the simplified type is equal to the from type, use it directly. - typedef typename cast_retty_impl::ret_type ret_type; + using ret_type = typename cast_retty_impl::ret_type; }; template struct cast_retty { - typedef typename cast_retty_wrap::SimpleType>::ret_type ret_type; + using ret_type = typename cast_retty_wrap< + To, From, typename simplify_type::SimpleType>::ret_type; }; // Ensure the non-simple values are converted using the simplify_type template @@ -238,6 +257,16 @@ inline typename cast_retty::ret_type cast(Y *Val) { typename simplify_type::SimpleType>::doit(Val); } +template +inline typename cast_retty>::ret_type +cast(std::unique_ptr &&Val) { + assert(isa(Val.get()) && "cast() argument of incompatible type!"); + using ret_type = typename cast_retty>::ret_type; + return ret_type( + cast_convert_val::SimpleType>::doit( + Val.release())); +} + // cast_or_null - Functionally identical to cast, except that a null value is // accepted. // @@ -271,6 +300,13 @@ cast_or_null(Y *Val) { return cast(Val); } +template +inline typename cast_retty>::ret_type +cast_or_null(std::unique_ptr &&Val) { + if (!Val) + return nullptr; + return cast(std::move(Val)); +} // dyn_cast - Return the argument parameter cast to the specified type. This // casting operator returns null if the argument is of the wrong type, so it can @@ -323,6 +359,41 @@ dyn_cast_or_null(Y *Val) { return (Val && isa(Val)) ? cast(Val) : nullptr; } -} // End llvm namespace +// unique_dyn_cast - Given a unique_ptr, try to return a unique_ptr, +// taking ownership of the input pointer iff isa(Val) is true. If the +// cast is successful, From refers to nullptr on exit and the casted value +// is returned. If the cast is unsuccessful, the function returns nullptr +// and From is unchanged. +template +LLVM_NODISCARD inline auto unique_dyn_cast(std::unique_ptr &Val) + -> decltype(cast(Val)) { + if (!isa(Val)) + return nullptr; + return cast(std::move(Val)); +} + +template +LLVM_NODISCARD inline auto unique_dyn_cast(std::unique_ptr &&Val) + -> decltype(cast(Val)) { + return unique_dyn_cast(Val); +} + +// dyn_cast_or_null - Functionally identical to unique_dyn_cast, except that +// a null value is accepted. +template +LLVM_NODISCARD inline auto unique_dyn_cast_or_null(std::unique_ptr &Val) + -> decltype(cast(Val)) { + if (!Val) + return nullptr; + return unique_dyn_cast(Val); +} + +template +LLVM_NODISCARD inline auto unique_dyn_cast_or_null(std::unique_ptr &&Val) + -> decltype(cast(Val)) { + return unique_dyn_cast_or_null(Val); +} + +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_CASTING_H diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index ae32e20d6daba932545aaa3d2742ff24bef7696f..771b0a8c26a988697b9924d204f393a5c0a482a3 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -21,18 +21,19 @@ #define LLVM_SUPPORT_COMMANDLINE_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include #include #include +#include #include #include #include @@ -41,6 +42,7 @@ namespace llvm { class StringSaver; +class raw_ostream; /// cl Namespace - This namespace contains all of the command line option /// processing machinery. It is intentionally a short name to make qualified @@ -64,12 +66,15 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, void ParseEnvironmentOptions(const char *progName, const char *envvar, const char *Overview = ""); +// Function pointer type for printing version information. +using VersionPrinterTy = std::function; + ///===---------------------------------------------------------------------===// /// SetVersionPrinter - Override the default (LLVM specific) version printer /// used to print out the version when --version is given /// on the command line. This allows other systems using the /// CommandLine utilities to print their own version string. -void SetVersionPrinter(void (*func)()); +void SetVersionPrinter(VersionPrinterTy func); ///===---------------------------------------------------------------------===// /// AddExtraVersionPrinter - Add an extra printer to use in addition to the @@ -78,7 +83,7 @@ void SetVersionPrinter(void (*func)()); /// which will be called after the basic LLVM version /// printing is complete. Each can then add additional /// information specific to the tool. -void AddExtraVersionPrinter(void (*func)()); +void AddExtraVersionPrinter(VersionPrinterTy func); // PrintOptionValues - Print option values. // With -print-options print the difference between option values and defaults. @@ -242,7 +247,7 @@ class Option { // Out of line virtual function to provide home for the class. virtual void anchor(); - int NumOccurrences; // The number of times specified + int NumOccurrences = 0; // The number of times specified // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid // problems with signed enums in bitfields. unsigned Occurrences : 3; // enum NumOccurrencesFlag @@ -252,8 +257,8 @@ class Option { unsigned HiddenFlag : 2; // enum OptionHidden unsigned Formatting : 2; // enum FormattingFlags unsigned Misc : 3; - unsigned Position; // Position of last occurrence of the option - unsigned AdditionalVals; // Greater than 0 for multi-valued option. + unsigned Position = 0; // Position of last occurrence of the option + unsigned AdditionalVals = 0; // Greater than 0 for multi-valued option. public: StringRef ArgStr; // The argument string itself (ex: "help", "o") @@ -261,7 +266,7 @@ public: StringRef ValueStr; // String describing what the value of this option is OptionCategory *Category; // The Category this option belongs to SmallPtrSet Subs; // The subcommands this option belongs to. - bool FullyInitialized; // Has addArguemnt been called? + bool FullyInitialized = false; // Has addArguemnt been called? inline enum NumOccurrencesFlag getNumOccurrencesFlag() const { return (enum NumOccurrencesFlag)Occurrences; @@ -316,10 +321,8 @@ public: protected: explicit Option(enum NumOccurrencesFlag OccurrencesFlag, enum OptionHidden Hidden) - : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0), - HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0), Position(0), - AdditionalVals(0), Category(&GeneralCategory), FullyInitialized(false) { - } + : Occurrences(OccurrencesFlag), Value(0), HiddenFlag(Hidden), + Formatting(NormalFormatting), Misc(0), Category(&GeneralCategory) {} inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; } @@ -447,8 +450,8 @@ struct GenericOptionValue { protected: GenericOptionValue() = default; GenericOptionValue(const GenericOptionValue&) = default; - ~GenericOptionValue() = default; GenericOptionValue &operator=(const GenericOptionValue &) = default; + ~GenericOptionValue() = default; private: virtual void anchor(); @@ -461,7 +464,7 @@ template struct OptionValue; template struct OptionValueBase : public GenericOptionValue { // Temporary storage for argument passing. - typedef OptionValue WrapperType; + using WrapperType = OptionValue; bool hasValue() const { return false; } @@ -487,8 +490,8 @@ template class OptionValueCopy : public GenericOptionValue { protected: OptionValueCopy(const OptionValueCopy&) = default; + OptionValueCopy &operator=(const OptionValueCopy &) = default; ~OptionValueCopy() = default; - OptionValueCopy &operator=(const OptionValueCopy&) = default; public: OptionValueCopy() = default; @@ -519,13 +522,13 @@ public: // Non-class option values. template struct OptionValueBase : OptionValueCopy { - typedef DataType WrapperType; + using WrapperType = DataType; protected: OptionValueBase() = default; OptionValueBase(const OptionValueBase&) = default; + OptionValueBase &operator=(const OptionValueBase &) = default; ~OptionValueBase() = default; - OptionValueBase &operator=(const OptionValueBase&) = default; }; // Top-level option class. @@ -548,7 +551,7 @@ enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE }; template <> struct OptionValue final : OptionValueCopy { - typedef cl::boolOrDefault WrapperType; + using WrapperType = cl::boolOrDefault; OptionValue() = default; @@ -565,7 +568,7 @@ private: template <> struct OptionValue final : OptionValueCopy { - typedef StringRef WrapperType; + using WrapperType = StringRef; OptionValue() = default; @@ -736,13 +739,15 @@ protected: public: OptionInfo(StringRef name, DataType v, StringRef helpStr) : GenericOptionInfo(name, helpStr), V(v) {} + OptionValue V; }; SmallVector Values; public: parser(Option &O) : generic_parser_base(O) {} - typedef DataType parser_data_type; + + using parser_data_type = DataType; // Implement virtual functions needed by generic_parser_base unsigned getNumOptions() const override { return unsigned(Values.size()); } @@ -837,10 +842,10 @@ protected: // template class basic_parser : public basic_parser_impl { public: - basic_parser(Option &O) : basic_parser_impl(O) {} + using parser_data_type = DataType; + using OptVal = OptionValue; - typedef DataType parser_data_type; - typedef OptionValue OptVal; + basic_parser(Option &O) : basic_parser_impl(O) {} protected: ~basic_parser() = default; @@ -1292,6 +1297,7 @@ class opt : public Option, enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1300,6 +1306,7 @@ class opt : public Option, size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1384,16 +1391,18 @@ template class list_storage { std::vector Storage; public: - typedef typename std::vector::iterator iterator; + using iterator = typename std::vector::iterator; iterator begin() { return Storage.begin(); } iterator end() { return Storage.end(); } - typedef typename std::vector::const_iterator const_iterator; + using const_iterator = typename std::vector::const_iterator; + const_iterator begin() const { return Storage.begin(); } const_iterator end() const { return Storage.end(); } - typedef typename std::vector::size_type size_type; + using size_type = typename std::vector::size_type; + size_type size() const { return Storage.size(); } bool empty() const { return Storage.empty(); } @@ -1401,8 +1410,9 @@ public: void push_back(const DataType &value) { Storage.push_back(value); } void push_back(DataType &&value) { Storage.push_back(value); } - typedef typename std::vector::reference reference; - typedef typename std::vector::const_reference const_reference; + using reference = typename std::vector::reference; + using const_reference = typename std::vector::const_reference; + reference operator[](size_type pos) { return Storage[pos]; } const_reference operator[](size_type pos) const { return Storage[pos]; } @@ -1453,6 +1463,7 @@ class list : public Option, public list_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1473,6 +1484,7 @@ class list : public Option, public list_storage { size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1592,6 +1604,7 @@ class bits : public Option, public bits_storage { enum ValueExpected getValueExpectedFlagDefault() const override { return Parser.getValueExpectedFlagDefault(); } + void getExtraOptionNames(SmallVectorImpl &OptionNames) override { return Parser.getExtraOptionNames(OptionNames); } @@ -1612,6 +1625,7 @@ class bits : public Option, public bits_storage { size_t getOptionWidth() const override { return Parser.getOptionWidth(*this); } + void printOptionInfo(size_t GlobalWidth) const override { Parser.printOptionInfo(*this, GlobalWidth); } @@ -1824,9 +1838,9 @@ void TokenizeWindowsCommandLine(StringRef Source, StringSaver &Saver, /// \brief String tokenization function type. Should be compatible with either /// Windows or Unix command line tokenizers. -typedef void (*TokenizerCallback)(StringRef Source, StringSaver &Saver, - SmallVectorImpl &NewArgv, - bool MarkEOLs); +using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver, + SmallVectorImpl &NewArgv, + bool MarkEOLs); /// \brief Expand response files on a command line recursively using the given /// StringSaver and tokenization strategy. Argv should contain the command line @@ -1880,6 +1894,7 @@ void ResetAllOptionOccurrences(); void ResetCommandLineParser(); } // end namespace cl + } // end namespace llvm #endif // LLVM_SUPPORT_COMMANDLINE_H diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index a56bc93e111b6e24c801f35f67f1fd3b085aef7c..be9e465400165a2a9f67518c9626badefb946f6d 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -111,12 +111,6 @@ #define LLVM_PREFETCH(addr, rw, locality) #endif -#if __has_attribute(sentinel) || LLVM_GNUC_PREREQ(3, 0, 0) -#define LLVM_END_WITH_NULL __attribute__((sentinel)) -#else -#define LLVM_END_WITH_NULL -#endif - #if __has_attribute(used) || LLVM_GNUC_PREREQ(3, 1, 0) #define LLVM_ATTRIBUTE_USED __attribute__((__used__)) #else @@ -233,6 +227,8 @@ /// LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements. #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough) #define LLVM_FALLTHROUGH [[fallthrough]] +#elif __has_cpp_attribute(gnu::fallthrough) +#define LLVM_FALLTHROUGH [[gnu::fallthrough]] #elif !__cplusplus // Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious // error when __has_cpp_attribute is given a scoped attribute in C mode. diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h index f714c0ed997ed87367e8b8a4f0aeb568f56fef0a..bd439f360216973d7b75c142e100c81032a405c0 100644 --- a/include/llvm/Support/ConvertUTF.h +++ b/include/llvm/Support/ConvertUTF.h @@ -90,8 +90,8 @@ #ifndef LLVM_SUPPORT_CONVERTUTF_H #define LLVM_SUPPORT_CONVERTUTF_H -#include #include +#include // Wrap everything in namespace llvm so that programs can link with llvm and // their own version of the unicode libraries. diff --git a/include/llvm/Support/DataExtractor.h b/include/llvm/Support/DataExtractor.h index 2d1180c228e31e1a825c544d68af37f09019e552..380b628fd95ff643377f2cdf677b1fb4d8913e36 100644 --- a/include/llvm/Support/DataExtractor.h +++ b/include/llvm/Support/DataExtractor.h @@ -58,6 +58,28 @@ public: /// NULL will be returned. const char *getCStr(uint32_t *offset_ptr) const; + /// Extract a C string from \a *OffsetPtr. + /// + /// Returns a StringRef for the C String from the data at the offset + /// pointed to by \a OffsetPtr. A variable length NULL terminated C + /// string will be extracted and the \a OffsetPtr will be + /// updated with the offset of the byte that follows the NULL + /// terminator byte. + /// + /// \param[in,out] OffsetPtr + /// A pointer to an offset within the data that will be advanced + /// by the appropriate number of bytes if the value is extracted + /// correctly. If the offset is out of bounds or there are not + /// enough bytes to extract this value, the offset will be left + /// unmodified. + /// + /// \return + /// A StringRef for the C string value in the data. If the offset + /// pointed to by \a OffsetPtr is out of bounds, or if the + /// offset plus the length of the C string is out of bounds, + /// a default-initialized StringRef will be returned. + StringRef getCStrRef(uint32_t *OffsetPtr) const; + /// Extract an unsigned integer of size \a byte_size from \a /// *offset_ptr. /// diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake index 541dbc3d635d7abcb61465e1ab42baba658f37ba..a58e2e454b7d1749d3b7a3cdaa41b74af32ddb42 100644 --- a/include/llvm/Support/DataTypes.h.cmake +++ b/include/llvm/Support/DataTypes.h.cmake @@ -85,11 +85,11 @@ typedef u_int64_t uint64_t; #else /* _MSC_VER */ #ifdef __cplusplus -#include #include +#include #else -#include #include +#include #endif #include diff --git a/include/llvm/Support/DebugCounter.h b/include/llvm/Support/DebugCounter.h index 9687cb7b9d95fa835b55e7a0c9991f04e94e4695..a533feae7fa38e1a1d98345520374b5b57654d23 100644 --- a/include/llvm/Support/DebugCounter.h +++ b/include/llvm/Support/DebugCounter.h @@ -121,10 +121,10 @@ public: Us.Counters[ID] = Val; } - // Dump or print the current counter set. - LLVM_DUMP_METHOD void dump() { print(dbgs()); } + // Dump or print the current counter set into llvm::dbgs(). + LLVM_DUMP_METHOD void dump() const; - void print(raw_ostream &OS); + void print(raw_ostream &OS) const; // Get the counter ID for a given named counter, or return 0 if none is found. unsigned getCounterId(const std::string &Name) const { diff --git a/include/llvm/Support/Dwarf.def b/include/llvm/Support/Dwarf.def deleted file mode 100644 index fdbd8ea7011669ac1cfd19123abd4e0d66d9a515..0000000000000000000000000000000000000000 --- a/include/llvm/Support/Dwarf.def +++ /dev/null @@ -1,838 +0,0 @@ -//===- llvm/Support/Dwarf.def - Dwarf definitions ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Macros for running through Dwarf enumerators. -// -//===----------------------------------------------------------------------===// - -// TODO: Add other DW-based macros. -#if !(defined HANDLE_DW_TAG || defined HANDLE_DW_AT || \ - defined HANDLE_DW_FORM || defined HANDLE_DW_OP || \ - defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \ - defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ - defined HANDLE_DW_CC || defined HANDLE_DW_LNS || \ - defined HANDLE_DW_LNE || defined HANDLE_DW_LNCT || \ - defined HANDLE_DW_MACRO || defined HANDLE_DW_RLE || \ - defined HANDLE_DW_CFA || defined HANDLE_DW_APPLE_PROPERTY || \ - defined HANDLE_DW_UT) -#error "Missing macro definition of HANDLE_DW*" -#endif - -#ifndef HANDLE_DW_TAG -#define HANDLE_DW_TAG(ID, NAME) -#endif - -#ifndef HANDLE_DW_AT -#define HANDLE_DW_AT(ID, NAME) -#endif - -#ifndef HANDLE_DW_FORM -#define HANDLE_DW_FORM(ID, NAME) -#endif - -#ifndef HANDLE_DW_OP -#define HANDLE_DW_OP(ID, NAME) -#endif - -#ifndef HANDLE_DW_LANG -#define HANDLE_DW_LANG(ID, NAME) -#endif - -#ifndef HANDLE_DW_ATE -#define HANDLE_DW_ATE(ID, NAME) -#endif - -#ifndef HANDLE_DW_VIRTUALITY -#define HANDLE_DW_VIRTUALITY(ID, NAME) -#endif - -#ifndef HANDLE_DW_DEFAULTED -#define HANDLE_DW_DEFAULTED(ID, NAME) -#endif - -#ifndef HANDLE_DW_CC -#define HANDLE_DW_CC(ID, NAME) -#endif - -#ifndef HANDLE_DW_LNS -#define HANDLE_DW_LNS(ID, NAME) -#endif - -#ifndef HANDLE_DW_LNE -#define HANDLE_DW_LNE(ID, NAME) -#endif - -#ifndef HANDLE_DW_LNCT -#define HANDLE_DW_LNCT(ID, NAME) -#endif - -#ifndef HANDLE_DW_MACRO -#define HANDLE_DW_MACRO(ID, NAME) -#endif - -#ifndef HANDLE_DW_RLE -#define HANDLE_DW_RLE(ID, NAME) -#endif - -#ifndef HANDLE_DW_CFA -#define HANDLE_DW_CFA(ID, NAME) -#endif - -#ifndef HANDLE_DW_APPLE_PROPERTY -#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) -#endif - -#ifndef HANDLE_DW_UT -#define HANDLE_DW_UT(ID, NAME) -#endif - -HANDLE_DW_TAG(0x0000, null) -HANDLE_DW_TAG(0x0001, array_type) -HANDLE_DW_TAG(0x0002, class_type) -HANDLE_DW_TAG(0x0003, entry_point) -HANDLE_DW_TAG(0x0004, enumeration_type) -HANDLE_DW_TAG(0x0005, formal_parameter) -HANDLE_DW_TAG(0x0008, imported_declaration) -HANDLE_DW_TAG(0x000a, label) -HANDLE_DW_TAG(0x000b, lexical_block) -HANDLE_DW_TAG(0x000d, member) -HANDLE_DW_TAG(0x000f, pointer_type) -HANDLE_DW_TAG(0x0010, reference_type) -HANDLE_DW_TAG(0x0011, compile_unit) -HANDLE_DW_TAG(0x0012, string_type) -HANDLE_DW_TAG(0x0013, structure_type) -HANDLE_DW_TAG(0x0015, subroutine_type) -HANDLE_DW_TAG(0x0016, typedef) -HANDLE_DW_TAG(0x0017, union_type) -HANDLE_DW_TAG(0x0018, unspecified_parameters) -HANDLE_DW_TAG(0x0019, variant) -HANDLE_DW_TAG(0x001a, common_block) -HANDLE_DW_TAG(0x001b, common_inclusion) -HANDLE_DW_TAG(0x001c, inheritance) -HANDLE_DW_TAG(0x001d, inlined_subroutine) -HANDLE_DW_TAG(0x001e, module) -HANDLE_DW_TAG(0x001f, ptr_to_member_type) -HANDLE_DW_TAG(0x0020, set_type) -HANDLE_DW_TAG(0x0021, subrange_type) -HANDLE_DW_TAG(0x0022, with_stmt) -HANDLE_DW_TAG(0x0023, access_declaration) -HANDLE_DW_TAG(0x0024, base_type) -HANDLE_DW_TAG(0x0025, catch_block) -HANDLE_DW_TAG(0x0026, const_type) -HANDLE_DW_TAG(0x0027, constant) -HANDLE_DW_TAG(0x0028, enumerator) -HANDLE_DW_TAG(0x0029, file_type) -HANDLE_DW_TAG(0x002a, friend) -HANDLE_DW_TAG(0x002b, namelist) -HANDLE_DW_TAG(0x002c, namelist_item) -HANDLE_DW_TAG(0x002d, packed_type) -HANDLE_DW_TAG(0x002e, subprogram) -HANDLE_DW_TAG(0x002f, template_type_parameter) -HANDLE_DW_TAG(0x0030, template_value_parameter) -HANDLE_DW_TAG(0x0031, thrown_type) -HANDLE_DW_TAG(0x0032, try_block) -HANDLE_DW_TAG(0x0033, variant_part) -HANDLE_DW_TAG(0x0034, variable) -HANDLE_DW_TAG(0x0035, volatile_type) -// New in DWARF v3: -HANDLE_DW_TAG(0x0036, dwarf_procedure) -HANDLE_DW_TAG(0x0037, restrict_type) -HANDLE_DW_TAG(0x0038, interface_type) -HANDLE_DW_TAG(0x0039, namespace) -HANDLE_DW_TAG(0x003a, imported_module) -HANDLE_DW_TAG(0x003b, unspecified_type) -HANDLE_DW_TAG(0x003c, partial_unit) -HANDLE_DW_TAG(0x003d, imported_unit) -HANDLE_DW_TAG(0x003f, condition) -HANDLE_DW_TAG(0x0040, shared_type) -// New in DWARF v4: -HANDLE_DW_TAG(0x0041, type_unit) -HANDLE_DW_TAG(0x0042, rvalue_reference_type) -HANDLE_DW_TAG(0x0043, template_alias) -// New in DWARF v5: -HANDLE_DW_TAG(0x0044, coarray_type) -HANDLE_DW_TAG(0x0045, generic_subrange) -HANDLE_DW_TAG(0x0046, dynamic_type) -HANDLE_DW_TAG(0x0047, atomic_type) -HANDLE_DW_TAG(0x0048, call_site) -HANDLE_DW_TAG(0x0049, call_site_parameter) -HANDLE_DW_TAG(0x004a, skeleton_unit) -HANDLE_DW_TAG(0x004b, immutable_type) -// Vendor extensions: -HANDLE_DW_TAG(0x4081, MIPS_loop) -HANDLE_DW_TAG(0x4101, format_label) -HANDLE_DW_TAG(0x4102, function_template) -HANDLE_DW_TAG(0x4103, class_template) -HANDLE_DW_TAG(0x4106, GNU_template_template_param) -HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack) -HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack) -HANDLE_DW_TAG(0x4200, APPLE_property) -HANDLE_DW_TAG(0xb000, BORLAND_property) -HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string) -HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array) -HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set) -HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant) - -// Attributes. -HANDLE_DW_AT(0x01, sibling) -HANDLE_DW_AT(0x02, location) -HANDLE_DW_AT(0x03, name) -HANDLE_DW_AT(0x09, ordering) -HANDLE_DW_AT(0x0b, byte_size) -HANDLE_DW_AT(0x0c, bit_offset) -HANDLE_DW_AT(0x0d, bit_size) -HANDLE_DW_AT(0x10, stmt_list) -HANDLE_DW_AT(0x11, low_pc) -HANDLE_DW_AT(0x12, high_pc) -HANDLE_DW_AT(0x13, language) -HANDLE_DW_AT(0x15, discr) -HANDLE_DW_AT(0x16, discr_value) -HANDLE_DW_AT(0x17, visibility) -HANDLE_DW_AT(0x18, import) -HANDLE_DW_AT(0x19, string_length) -HANDLE_DW_AT(0x1a, common_reference) -HANDLE_DW_AT(0x1b, comp_dir) -HANDLE_DW_AT(0x1c, const_value) -HANDLE_DW_AT(0x1d, containing_type) -HANDLE_DW_AT(0x1e, default_value) -HANDLE_DW_AT(0x20, inline) -HANDLE_DW_AT(0x21, is_optional) -HANDLE_DW_AT(0x22, lower_bound) -HANDLE_DW_AT(0x25, producer) -HANDLE_DW_AT(0x27, prototyped) -HANDLE_DW_AT(0x2a, return_addr) -HANDLE_DW_AT(0x2c, start_scope) -HANDLE_DW_AT(0x2e, bit_stride) -HANDLE_DW_AT(0x2f, upper_bound) -HANDLE_DW_AT(0x31, abstract_origin) -HANDLE_DW_AT(0x32, accessibility) -HANDLE_DW_AT(0x33, address_class) -HANDLE_DW_AT(0x34, artificial) -HANDLE_DW_AT(0x35, base_types) -HANDLE_DW_AT(0x36, calling_convention) -HANDLE_DW_AT(0x37, count) -HANDLE_DW_AT(0x38, data_member_location) -HANDLE_DW_AT(0x39, decl_column) -HANDLE_DW_AT(0x3a, decl_file) -HANDLE_DW_AT(0x3b, decl_line) -HANDLE_DW_AT(0x3c, declaration) -HANDLE_DW_AT(0x3d, discr_list) -HANDLE_DW_AT(0x3e, encoding) -HANDLE_DW_AT(0x3f, external) -HANDLE_DW_AT(0x40, frame_base) -HANDLE_DW_AT(0x41, friend) -HANDLE_DW_AT(0x42, identifier_case) -HANDLE_DW_AT(0x43, macro_info) -HANDLE_DW_AT(0x44, namelist_item) -HANDLE_DW_AT(0x45, priority) -HANDLE_DW_AT(0x46, segment) -HANDLE_DW_AT(0x47, specification) -HANDLE_DW_AT(0x48, static_link) -HANDLE_DW_AT(0x49, type) -HANDLE_DW_AT(0x4a, use_location) -HANDLE_DW_AT(0x4b, variable_parameter) -HANDLE_DW_AT(0x4c, virtuality) -HANDLE_DW_AT(0x4d, vtable_elem_location) -// New in DWARF v3: -HANDLE_DW_AT(0x4e, allocated) -HANDLE_DW_AT(0x4f, associated) -HANDLE_DW_AT(0x50, data_location) -HANDLE_DW_AT(0x51, byte_stride) -HANDLE_DW_AT(0x52, entry_pc) -HANDLE_DW_AT(0x53, use_UTF8) -HANDLE_DW_AT(0x54, extension) -HANDLE_DW_AT(0x55, ranges) -HANDLE_DW_AT(0x56, trampoline) -HANDLE_DW_AT(0x57, call_column) -HANDLE_DW_AT(0x58, call_file) -HANDLE_DW_AT(0x59, call_line) -HANDLE_DW_AT(0x5a, description) -HANDLE_DW_AT(0x5b, binary_scale) -HANDLE_DW_AT(0x5c, decimal_scale) -HANDLE_DW_AT(0x5d, small) -HANDLE_DW_AT(0x5e, decimal_sign) -HANDLE_DW_AT(0x5f, digit_count) -HANDLE_DW_AT(0x60, picture_string) -HANDLE_DW_AT(0x61, mutable) -HANDLE_DW_AT(0x62, threads_scaled) -HANDLE_DW_AT(0x63, explicit) -HANDLE_DW_AT(0x64, object_pointer) -HANDLE_DW_AT(0x65, endianity) -HANDLE_DW_AT(0x66, elemental) -HANDLE_DW_AT(0x67, pure) -HANDLE_DW_AT(0x68, recursive) -// New in DWARF v4: -HANDLE_DW_AT(0x69, signature) -HANDLE_DW_AT(0x6a, main_subprogram) -HANDLE_DW_AT(0x6b, data_bit_offset) -HANDLE_DW_AT(0x6c, const_expr) -HANDLE_DW_AT(0x6d, enum_class) -HANDLE_DW_AT(0x6e, linkage_name) -// New in DWARF v5: -HANDLE_DW_AT(0x6f, string_length_bit_size) -HANDLE_DW_AT(0x70, string_length_byte_size) -HANDLE_DW_AT(0x71, rank) -HANDLE_DW_AT(0x72, str_offsets_base) -HANDLE_DW_AT(0x73, addr_base) -HANDLE_DW_AT(0x74, rnglists_base) -HANDLE_DW_AT(0x75, dwo_id) ///< Retracted from DWARF 5. -HANDLE_DW_AT(0x76, dwo_name) -HANDLE_DW_AT(0x77, reference) -HANDLE_DW_AT(0x78, rvalue_reference) -HANDLE_DW_AT(0x79, macros) -HANDLE_DW_AT(0x7a, call_all_calls) -HANDLE_DW_AT(0x7b, call_all_source_calls) -HANDLE_DW_AT(0x7c, call_all_tail_calls) -HANDLE_DW_AT(0x7d, call_return_pc) -HANDLE_DW_AT(0x7e, call_value) -HANDLE_DW_AT(0x7f, call_origin) -HANDLE_DW_AT(0x80, call_parameter) -HANDLE_DW_AT(0x81, call_pc) -HANDLE_DW_AT(0x82, call_tail_call) -HANDLE_DW_AT(0x83, call_target) -HANDLE_DW_AT(0x84, call_target_clobbered) -HANDLE_DW_AT(0x85, call_data_location) -HANDLE_DW_AT(0x86, call_data_value) -HANDLE_DW_AT(0x87, noreturn) -HANDLE_DW_AT(0x88, alignment) -HANDLE_DW_AT(0x89, export_symbols) -HANDLE_DW_AT(0x8a, deleted) -HANDLE_DW_AT(0x8b, defaulted) -HANDLE_DW_AT(0x8c, loclists_base) -// Vendor extensions: -HANDLE_DW_AT(0x2002, MIPS_loop_begin) -HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin) -HANDLE_DW_AT(0x2004, MIPS_epilog_begin) -HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor) -HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth) -HANDLE_DW_AT(0x2007, MIPS_linkage_name) -HANDLE_DW_AT(0x2008, MIPS_stride) -HANDLE_DW_AT(0x2009, MIPS_abstract_name) -HANDLE_DW_AT(0x200a, MIPS_clone_origin) -HANDLE_DW_AT(0x200b, MIPS_has_inlines) -HANDLE_DW_AT(0x200c, MIPS_stride_byte) -HANDLE_DW_AT(0x200d, MIPS_stride_elem) -HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype) -HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype) -HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype) -// This one appears to have only been implemented by Open64 for -// fortran and may conflict with other extensions. -HANDLE_DW_AT(0x2011, MIPS_assumed_size) -// GNU extensions -HANDLE_DW_AT(0x2101, sf_names) -HANDLE_DW_AT(0x2102, src_info) -HANDLE_DW_AT(0x2103, mac_info) -HANDLE_DW_AT(0x2104, src_coords) -HANDLE_DW_AT(0x2105, body_begin) -HANDLE_DW_AT(0x2106, body_end) -HANDLE_DW_AT(0x2107, GNU_vector) -HANDLE_DW_AT(0x2110, GNU_template_name) -HANDLE_DW_AT(0x210f, GNU_odr_signature) -HANDLE_DW_AT(0x2119, GNU_macros) -// Extensions for Fission proposal. -HANDLE_DW_AT(0x2130, GNU_dwo_name) -HANDLE_DW_AT(0x2131, GNU_dwo_id) -HANDLE_DW_AT(0x2132, GNU_ranges_base) -HANDLE_DW_AT(0x2133, GNU_addr_base) -HANDLE_DW_AT(0x2134, GNU_pubnames) -HANDLE_DW_AT(0x2135, GNU_pubtypes) -HANDLE_DW_AT(0x2136, GNU_discriminator) -// Borland extensions. -HANDLE_DW_AT(0x3b11, BORLAND_property_read) -HANDLE_DW_AT(0x3b12, BORLAND_property_write) -HANDLE_DW_AT(0x3b13, BORLAND_property_implements) -HANDLE_DW_AT(0x3b14, BORLAND_property_index) -HANDLE_DW_AT(0x3b15, BORLAND_property_default) -HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit) -HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class) -HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record) -HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass) -HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor) -HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor) -HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method) -HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface) -HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI) -HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return) -HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr) -HANDLE_DW_AT(0x3b31, BORLAND_closure) -// LLVM project extensions. -HANDLE_DW_AT(0x3e00, LLVM_include_path) -HANDLE_DW_AT(0x3e01, LLVM_config_macros) -HANDLE_DW_AT(0x3e02, LLVM_isysroot) -// Apple extensions. -HANDLE_DW_AT(0x3fe1, APPLE_optimized) -HANDLE_DW_AT(0x3fe2, APPLE_flags) -HANDLE_DW_AT(0x3fe3, APPLE_isa) -HANDLE_DW_AT(0x3fe4, APPLE_block) -HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers) -HANDLE_DW_AT(0x3fe6, APPLE_runtime_class) -HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr) -HANDLE_DW_AT(0x3fe8, APPLE_property_name) -HANDLE_DW_AT(0x3fe9, APPLE_property_getter) -HANDLE_DW_AT(0x3fea, APPLE_property_setter) -HANDLE_DW_AT(0x3feb, APPLE_property_attribute) -HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type) -HANDLE_DW_AT(0x3fed, APPLE_property) - -// Attribute form encodings. -HANDLE_DW_FORM(0x01, addr) -HANDLE_DW_FORM(0x03, block2) -HANDLE_DW_FORM(0x04, block4) -HANDLE_DW_FORM(0x05, data2) -HANDLE_DW_FORM(0x06, data4) -HANDLE_DW_FORM(0x07, data8) -HANDLE_DW_FORM(0x08, string) -HANDLE_DW_FORM(0x09, block) -HANDLE_DW_FORM(0x0a, block1) -HANDLE_DW_FORM(0x0b, data1) -HANDLE_DW_FORM(0x0c, flag) -HANDLE_DW_FORM(0x0d, sdata) -HANDLE_DW_FORM(0x0e, strp) -HANDLE_DW_FORM(0x0f, udata) -HANDLE_DW_FORM(0x10, ref_addr) -HANDLE_DW_FORM(0x11, ref1) -HANDLE_DW_FORM(0x12, ref2) -HANDLE_DW_FORM(0x13, ref4) -HANDLE_DW_FORM(0x14, ref8) -HANDLE_DW_FORM(0x15, ref_udata) -HANDLE_DW_FORM(0x16, indirect) -// New in DWARF v4: -HANDLE_DW_FORM(0x17, sec_offset) -HANDLE_DW_FORM(0x18, exprloc) -HANDLE_DW_FORM(0x19, flag_present) -// This was defined out of sequence. -HANDLE_DW_FORM(0x20, ref_sig8) -// New in DWARF v5: -HANDLE_DW_FORM(0x1a, strx) -HANDLE_DW_FORM(0x1b, addrx) -HANDLE_DW_FORM(0x1c, ref_sup4) -HANDLE_DW_FORM(0x1d, strp_sup) -HANDLE_DW_FORM(0x1e, data16) -HANDLE_DW_FORM(0x1f, line_strp) -HANDLE_DW_FORM(0x21, implicit_const) -HANDLE_DW_FORM(0x22, loclistx) -HANDLE_DW_FORM(0x23, rnglistx) -HANDLE_DW_FORM(0x24, ref_sup8) -HANDLE_DW_FORM(0x25, strx1) -HANDLE_DW_FORM(0x26, strx2) -HANDLE_DW_FORM(0x27, strx3) -HANDLE_DW_FORM(0x28, strx4) -HANDLE_DW_FORM(0x29, addrx1) -HANDLE_DW_FORM(0x2a, addrx2) -HANDLE_DW_FORM(0x2b, addrx3) -HANDLE_DW_FORM(0x2c, addrx4) -// Extensions for Fission proposal -HANDLE_DW_FORM(0x1f01, GNU_addr_index) -HANDLE_DW_FORM(0x1f02, GNU_str_index) -// Alternate debug sections proposal (output of "dwz" tool). -HANDLE_DW_FORM(0x1f20, GNU_ref_alt) -HANDLE_DW_FORM(0x1f21, GNU_strp_alt) - -// DWARF Expression operators. -HANDLE_DW_OP(0x03, addr) -HANDLE_DW_OP(0x06, deref) -HANDLE_DW_OP(0x08, const1u) -HANDLE_DW_OP(0x09, const1s) -HANDLE_DW_OP(0x0a, const2u) -HANDLE_DW_OP(0x0b, const2s) -HANDLE_DW_OP(0x0c, const4u) -HANDLE_DW_OP(0x0d, const4s) -HANDLE_DW_OP(0x0e, const8u) -HANDLE_DW_OP(0x0f, const8s) -HANDLE_DW_OP(0x10, constu) -HANDLE_DW_OP(0x11, consts) -HANDLE_DW_OP(0x12, dup) -HANDLE_DW_OP(0x13, drop) -HANDLE_DW_OP(0x14, over) -HANDLE_DW_OP(0x15, pick) -HANDLE_DW_OP(0x16, swap) -HANDLE_DW_OP(0x17, rot) -HANDLE_DW_OP(0x18, xderef) -HANDLE_DW_OP(0x19, abs) -HANDLE_DW_OP(0x1a, and) -HANDLE_DW_OP(0x1b, div) -HANDLE_DW_OP(0x1c, minus) -HANDLE_DW_OP(0x1d, mod) -HANDLE_DW_OP(0x1e, mul) -HANDLE_DW_OP(0x1f, neg) -HANDLE_DW_OP(0x20, not) -HANDLE_DW_OP(0x21, or) -HANDLE_DW_OP(0x22, plus) -HANDLE_DW_OP(0x23, plus_uconst) -HANDLE_DW_OP(0x24, shl) -HANDLE_DW_OP(0x25, shr) -HANDLE_DW_OP(0x26, shra) -HANDLE_DW_OP(0x27, xor) -HANDLE_DW_OP(0x28, bra) -HANDLE_DW_OP(0x29, eq) -HANDLE_DW_OP(0x2a, ge) -HANDLE_DW_OP(0x2b, gt) -HANDLE_DW_OP(0x2c, le) -HANDLE_DW_OP(0x2d, lt) -HANDLE_DW_OP(0x2e, ne) -HANDLE_DW_OP(0x2f, skip) -HANDLE_DW_OP(0x30, lit0) -HANDLE_DW_OP(0x31, lit1) -HANDLE_DW_OP(0x32, lit2) -HANDLE_DW_OP(0x33, lit3) -HANDLE_DW_OP(0x34, lit4) -HANDLE_DW_OP(0x35, lit5) -HANDLE_DW_OP(0x36, lit6) -HANDLE_DW_OP(0x37, lit7) -HANDLE_DW_OP(0x38, lit8) -HANDLE_DW_OP(0x39, lit9) -HANDLE_DW_OP(0x3a, lit10) -HANDLE_DW_OP(0x3b, lit11) -HANDLE_DW_OP(0x3c, lit12) -HANDLE_DW_OP(0x3d, lit13) -HANDLE_DW_OP(0x3e, lit14) -HANDLE_DW_OP(0x3f, lit15) -HANDLE_DW_OP(0x40, lit16) -HANDLE_DW_OP(0x41, lit17) -HANDLE_DW_OP(0x42, lit18) -HANDLE_DW_OP(0x43, lit19) -HANDLE_DW_OP(0x44, lit20) -HANDLE_DW_OP(0x45, lit21) -HANDLE_DW_OP(0x46, lit22) -HANDLE_DW_OP(0x47, lit23) -HANDLE_DW_OP(0x48, lit24) -HANDLE_DW_OP(0x49, lit25) -HANDLE_DW_OP(0x4a, lit26) -HANDLE_DW_OP(0x4b, lit27) -HANDLE_DW_OP(0x4c, lit28) -HANDLE_DW_OP(0x4d, lit29) -HANDLE_DW_OP(0x4e, lit30) -HANDLE_DW_OP(0x4f, lit31) -HANDLE_DW_OP(0x50, reg0) -HANDLE_DW_OP(0x51, reg1) -HANDLE_DW_OP(0x52, reg2) -HANDLE_DW_OP(0x53, reg3) -HANDLE_DW_OP(0x54, reg4) -HANDLE_DW_OP(0x55, reg5) -HANDLE_DW_OP(0x56, reg6) -HANDLE_DW_OP(0x57, reg7) -HANDLE_DW_OP(0x58, reg8) -HANDLE_DW_OP(0x59, reg9) -HANDLE_DW_OP(0x5a, reg10) -HANDLE_DW_OP(0x5b, reg11) -HANDLE_DW_OP(0x5c, reg12) -HANDLE_DW_OP(0x5d, reg13) -HANDLE_DW_OP(0x5e, reg14) -HANDLE_DW_OP(0x5f, reg15) -HANDLE_DW_OP(0x60, reg16) -HANDLE_DW_OP(0x61, reg17) -HANDLE_DW_OP(0x62, reg18) -HANDLE_DW_OP(0x63, reg19) -HANDLE_DW_OP(0x64, reg20) -HANDLE_DW_OP(0x65, reg21) -HANDLE_DW_OP(0x66, reg22) -HANDLE_DW_OP(0x67, reg23) -HANDLE_DW_OP(0x68, reg24) -HANDLE_DW_OP(0x69, reg25) -HANDLE_DW_OP(0x6a, reg26) -HANDLE_DW_OP(0x6b, reg27) -HANDLE_DW_OP(0x6c, reg28) -HANDLE_DW_OP(0x6d, reg29) -HANDLE_DW_OP(0x6e, reg30) -HANDLE_DW_OP(0x6f, reg31) -HANDLE_DW_OP(0x70, breg0) -HANDLE_DW_OP(0x71, breg1) -HANDLE_DW_OP(0x72, breg2) -HANDLE_DW_OP(0x73, breg3) -HANDLE_DW_OP(0x74, breg4) -HANDLE_DW_OP(0x75, breg5) -HANDLE_DW_OP(0x76, breg6) -HANDLE_DW_OP(0x77, breg7) -HANDLE_DW_OP(0x78, breg8) -HANDLE_DW_OP(0x79, breg9) -HANDLE_DW_OP(0x7a, breg10) -HANDLE_DW_OP(0x7b, breg11) -HANDLE_DW_OP(0x7c, breg12) -HANDLE_DW_OP(0x7d, breg13) -HANDLE_DW_OP(0x7e, breg14) -HANDLE_DW_OP(0x7f, breg15) -HANDLE_DW_OP(0x80, breg16) -HANDLE_DW_OP(0x81, breg17) -HANDLE_DW_OP(0x82, breg18) -HANDLE_DW_OP(0x83, breg19) -HANDLE_DW_OP(0x84, breg20) -HANDLE_DW_OP(0x85, breg21) -HANDLE_DW_OP(0x86, breg22) -HANDLE_DW_OP(0x87, breg23) -HANDLE_DW_OP(0x88, breg24) -HANDLE_DW_OP(0x89, breg25) -HANDLE_DW_OP(0x8a, breg26) -HANDLE_DW_OP(0x8b, breg27) -HANDLE_DW_OP(0x8c, breg28) -HANDLE_DW_OP(0x8d, breg29) -HANDLE_DW_OP(0x8e, breg30) -HANDLE_DW_OP(0x8f, breg31) -HANDLE_DW_OP(0x90, regx) -HANDLE_DW_OP(0x91, fbreg) -HANDLE_DW_OP(0x92, bregx) -HANDLE_DW_OP(0x93, piece) -HANDLE_DW_OP(0x94, deref_size) -HANDLE_DW_OP(0x95, xderef_size) -HANDLE_DW_OP(0x96, nop) -// New in DWARF v3: -HANDLE_DW_OP(0x97, push_object_address) -HANDLE_DW_OP(0x98, call2) -HANDLE_DW_OP(0x99, call4) -HANDLE_DW_OP(0x9a, call_ref) -HANDLE_DW_OP(0x9b, form_tls_address) -HANDLE_DW_OP(0x9c, call_frame_cfa) -HANDLE_DW_OP(0x9d, bit_piece) -// New in DWARF v4: -HANDLE_DW_OP(0x9e, implicit_value) -HANDLE_DW_OP(0x9f, stack_value) -// New in DWARF v5: -HANDLE_DW_OP(0xa0, implicit_pointer) -HANDLE_DW_OP(0xa1, addrx) -HANDLE_DW_OP(0xa2, constx) -HANDLE_DW_OP(0xa3, entry_value) -HANDLE_DW_OP(0xa4, const_type) -HANDLE_DW_OP(0xa5, regval_type) -HANDLE_DW_OP(0xa6, deref_type) -HANDLE_DW_OP(0xa7, xderef_type) -HANDLE_DW_OP(0xa8, convert) -HANDLE_DW_OP(0xa9, reinterpret) -// Vendor extensions: -// Extensions for GNU-style thread-local storage. -HANDLE_DW_OP(0xe0, GNU_push_tls_address) -// Extensions for Fission proposal. -HANDLE_DW_OP(0xfb, GNU_addr_index) -HANDLE_DW_OP(0xfc, GNU_const_index) - -// DWARF languages. -HANDLE_DW_LANG(0x0001, C89) -HANDLE_DW_LANG(0x0002, C) -HANDLE_DW_LANG(0x0003, Ada83) -HANDLE_DW_LANG(0x0004, C_plus_plus) -HANDLE_DW_LANG(0x0005, Cobol74) -HANDLE_DW_LANG(0x0006, Cobol85) -HANDLE_DW_LANG(0x0007, Fortran77) -HANDLE_DW_LANG(0x0008, Fortran90) -HANDLE_DW_LANG(0x0009, Pascal83) -HANDLE_DW_LANG(0x000a, Modula2) -// New in DWARF v3: -HANDLE_DW_LANG(0x000b, Java) -HANDLE_DW_LANG(0x000c, C99) -HANDLE_DW_LANG(0x000d, Ada95) -HANDLE_DW_LANG(0x000e, Fortran95) -HANDLE_DW_LANG(0x000f, PLI) -HANDLE_DW_LANG(0x0010, ObjC) -HANDLE_DW_LANG(0x0011, ObjC_plus_plus) -HANDLE_DW_LANG(0x0012, UPC) -HANDLE_DW_LANG(0x0013, D) -// New in DWARF v4: -HANDLE_DW_LANG(0x0014, Python) -// New in DWARF v5: -HANDLE_DW_LANG(0x0015, OpenCL) -HANDLE_DW_LANG(0x0016, Go) -HANDLE_DW_LANG(0x0017, Modula3) -HANDLE_DW_LANG(0x0018, Haskell) -HANDLE_DW_LANG(0x0019, C_plus_plus_03) -HANDLE_DW_LANG(0x001a, C_plus_plus_11) -HANDLE_DW_LANG(0x001b, OCaml) -HANDLE_DW_LANG(0x001c, Rust) -HANDLE_DW_LANG(0x001d, C11) -HANDLE_DW_LANG(0x001e, Swift) -HANDLE_DW_LANG(0x001f, Julia) -HANDLE_DW_LANG(0x0020, Dylan) -HANDLE_DW_LANG(0x0021, C_plus_plus_14) -HANDLE_DW_LANG(0x0022, Fortran03) -HANDLE_DW_LANG(0x0023, Fortran08) -HANDLE_DW_LANG(0x0024, RenderScript) -HANDLE_DW_LANG(0x0025, BLISS) -// Vendor extensions: -HANDLE_DW_LANG(0x8001, Mips_Assembler) -HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript) -HANDLE_DW_LANG(0xb000, BORLAND_Delphi) - -// DWARF attribute type encodings. -HANDLE_DW_ATE(0x01, address) -HANDLE_DW_ATE(0x02, boolean) -HANDLE_DW_ATE(0x03, complex_float) -HANDLE_DW_ATE(0x04, float) -HANDLE_DW_ATE(0x05, signed) -HANDLE_DW_ATE(0x06, signed_char) -HANDLE_DW_ATE(0x07, unsigned) -HANDLE_DW_ATE(0x08, unsigned_char) -// New in DWARF v3: -HANDLE_DW_ATE(0x09, imaginary_float) -HANDLE_DW_ATE(0x0a, packed_decimal) -HANDLE_DW_ATE(0x0b, numeric_string) -HANDLE_DW_ATE(0x0c, edited) -HANDLE_DW_ATE(0x0d, signed_fixed) -HANDLE_DW_ATE(0x0e, unsigned_fixed) -HANDLE_DW_ATE(0x0f, decimal_float) -// New in DWARF v4: -HANDLE_DW_ATE(0x10, UTF) -// New in DWARF v5: -HANDLE_DW_ATE(0x11, UCS) -HANDLE_DW_ATE(0x12, ASCII) - -// DWARF virtuality codes. -HANDLE_DW_VIRTUALITY(0x00, none) -HANDLE_DW_VIRTUALITY(0x01, virtual) -HANDLE_DW_VIRTUALITY(0x02, pure_virtual) - -// DWARF v5 Defaulted Member Encodings. -HANDLE_DW_DEFAULTED(0x00, no) -HANDLE_DW_DEFAULTED(0x01, in_class) -HANDLE_DW_DEFAULTED(0x02, out_of_class) - -// DWARF calling convention codes. -HANDLE_DW_CC(0x01, normal) -HANDLE_DW_CC(0x02, program) -HANDLE_DW_CC(0x03, nocall) -// New in DWARF v5: -HANDLE_DW_CC(0x04, pass_by_reference) -HANDLE_DW_CC(0x05, pass_by_value) -// Vendor extensions: -HANDLE_DW_CC(0x41, GNU_borland_fastcall_i386) -HANDLE_DW_CC(0xb0, BORLAND_safecall) -HANDLE_DW_CC(0xb1, BORLAND_stdcall) -HANDLE_DW_CC(0xb2, BORLAND_pascal) -HANDLE_DW_CC(0xb3, BORLAND_msfastcall) -HANDLE_DW_CC(0xb4, BORLAND_msreturn) -HANDLE_DW_CC(0xb5, BORLAND_thiscall) -HANDLE_DW_CC(0xb6, BORLAND_fastcall) -HANDLE_DW_CC(0xc0, LLVM_vectorcall) - -// Line Number Extended Opcode Encodings -HANDLE_DW_LNE(0x01, end_sequence) -HANDLE_DW_LNE(0x02, set_address) -HANDLE_DW_LNE(0x03, define_file) -// New in DWARF v4: -HANDLE_DW_LNE(0x04, set_discriminator) - -// Line Number Standard Opcode Encodings. -HANDLE_DW_LNS(0x00, extended_op) -HANDLE_DW_LNS(0x01, copy) -HANDLE_DW_LNS(0x02, advance_pc) -HANDLE_DW_LNS(0x03, advance_line) -HANDLE_DW_LNS(0x04, set_file) -HANDLE_DW_LNS(0x05, set_column) -HANDLE_DW_LNS(0x06, negate_stmt) -HANDLE_DW_LNS(0x07, set_basic_block) -HANDLE_DW_LNS(0x08, const_add_pc) -HANDLE_DW_LNS(0x09, fixed_advance_pc) -// New in DWARF v3: -HANDLE_DW_LNS(0x0a, set_prologue_end) -HANDLE_DW_LNS(0x0b, set_epilogue_begin) -HANDLE_DW_LNS(0x0c, set_isa) - -// DWARF v5 Line number header entry format. -HANDLE_DW_LNCT(0x01, path) -HANDLE_DW_LNCT(0x02, directory_index) -HANDLE_DW_LNCT(0x03, timestamp) -HANDLE_DW_LNCT(0x04, size) -HANDLE_DW_LNCT(0x05, MD5) - -// DWARF v5 Macro information. -HANDLE_DW_MACRO(0x01, define) -HANDLE_DW_MACRO(0x02, undef) -HANDLE_DW_MACRO(0x03, start_file) -HANDLE_DW_MACRO(0x04, end_file) -HANDLE_DW_MACRO(0x05, define_strp) -HANDLE_DW_MACRO(0x06, undef_strp) -HANDLE_DW_MACRO(0x07, import) -HANDLE_DW_MACRO(0x08, define_sup) -HANDLE_DW_MACRO(0x09, undef_sup) -HANDLE_DW_MACRO(0x0a, import_sup) -HANDLE_DW_MACRO(0x0b, define_strx) -HANDLE_DW_MACRO(0x0c, undef_strx) - -// DWARF v5 Range List Entry encoding values. -HANDLE_DW_RLE(0x00, end_of_list) -HANDLE_DW_RLE(0x01, base_addressx) -HANDLE_DW_RLE(0x02, startx_endx) -HANDLE_DW_RLE(0x03, startx_length) -HANDLE_DW_RLE(0x04, offset_pair) -HANDLE_DW_RLE(0x05, base_address) -HANDLE_DW_RLE(0x06, start_end) -HANDLE_DW_RLE(0x07, start_length) - -// Call frame instruction encodings. -HANDLE_DW_CFA(0x00, nop) -HANDLE_DW_CFA(0x40, advance_loc) -HANDLE_DW_CFA(0x80, offset) -HANDLE_DW_CFA(0xc0, restore) -HANDLE_DW_CFA(0x01, set_loc) -HANDLE_DW_CFA(0x02, advance_loc1) -HANDLE_DW_CFA(0x03, advance_loc2) -HANDLE_DW_CFA(0x04, advance_loc4) -HANDLE_DW_CFA(0x05, offset_extended) -HANDLE_DW_CFA(0x06, restore_extended) -HANDLE_DW_CFA(0x07, undefined) -HANDLE_DW_CFA(0x08, same_value) -HANDLE_DW_CFA(0x09, register) -HANDLE_DW_CFA(0x0a, remember_state) -HANDLE_DW_CFA(0x0b, restore_state) -HANDLE_DW_CFA(0x0c, def_cfa) -HANDLE_DW_CFA(0x0d, def_cfa_register) -HANDLE_DW_CFA(0x0e, def_cfa_offset) -// New in DWARF v3: -HANDLE_DW_CFA(0x0f, def_cfa_expression) -HANDLE_DW_CFA(0x10, expression) -HANDLE_DW_CFA(0x11, offset_extended_sf) -HANDLE_DW_CFA(0x12, def_cfa_sf) -HANDLE_DW_CFA(0x13, def_cfa_offset_sf) -HANDLE_DW_CFA(0x14, val_offset) -HANDLE_DW_CFA(0x15, val_offset_sf) -HANDLE_DW_CFA(0x16, val_expression) -// Vendor extensions: -HANDLE_DW_CFA(0x1d, MIPS_advance_loc8) -HANDLE_DW_CFA(0x2d, GNU_window_save) -HANDLE_DW_CFA(0x2e, GNU_args_size) - -// Apple Objective-C Property Attributes. -// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind! -HANDLE_DW_APPLE_PROPERTY(0x01, readonly) -HANDLE_DW_APPLE_PROPERTY(0x02, getter) -HANDLE_DW_APPLE_PROPERTY(0x04, assign) -HANDLE_DW_APPLE_PROPERTY(0x08, readwrite) -HANDLE_DW_APPLE_PROPERTY(0x10, retain) -HANDLE_DW_APPLE_PROPERTY(0x20, copy) -HANDLE_DW_APPLE_PROPERTY(0x40, nonatomic) -HANDLE_DW_APPLE_PROPERTY(0x80, setter) -HANDLE_DW_APPLE_PROPERTY(0x100, atomic) -HANDLE_DW_APPLE_PROPERTY(0x200, weak) -HANDLE_DW_APPLE_PROPERTY(0x400, strong) -HANDLE_DW_APPLE_PROPERTY(0x800, unsafe_unretained) -HANDLE_DW_APPLE_PROPERTY(0x1000, nullability) -HANDLE_DW_APPLE_PROPERTY(0x2000, null_resettable) -HANDLE_DW_APPLE_PROPERTY(0x4000, class) - -// DWARF v5 Unit Types. -HANDLE_DW_UT(0x01, compile) -HANDLE_DW_UT(0x02, type) -HANDLE_DW_UT(0x03, partial) -HANDLE_DW_UT(0x04, skeleton) -HANDLE_DW_UT(0x05, split_compile) -HANDLE_DW_UT(0x06, split_type) - -#undef HANDLE_DW_TAG -#undef HANDLE_DW_AT -#undef HANDLE_DW_FORM -#undef HANDLE_DW_OP -#undef HANDLE_DW_LANG -#undef HANDLE_DW_ATE -#undef HANDLE_DW_VIRTUALITY -#undef HANDLE_DW_DEFAULTED -#undef HANDLE_DW_CC -#undef HANDLE_DW_LNS -#undef HANDLE_DW_LNE -#undef HANDLE_DW_LNCT -#undef HANDLE_DW_MACRO -#undef HANDLE_DW_RLE -#undef HANDLE_DW_CFA -#undef HANDLE_DW_APPLE_PROPERTY -#undef HANDLE_DW_UT diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h index aa9bb8938ad3bf2f9359a1e5b123b4bd6dfe556e..a8874a10d461a4c6e8d2fcc88e661690419d1163 100644 --- a/include/llvm/Support/DynamicLibrary.h +++ b/include/llvm/Support/DynamicLibrary.h @@ -58,7 +58,7 @@ namespace sys { void *getAddressOfSymbol(const char *symbolName); /// This function permanently loads the dynamic library at the given path. - /// The library will only be unloaded when the program terminates. + /// The library will only be unloaded when llvm_shutdown() is called. /// This returns a valid DynamicLibrary instance on success and an invalid /// instance on failure (see isValid()). \p *errMsg will only be modified /// if the library fails to load. @@ -71,7 +71,8 @@ namespace sys { /// Registers an externally loaded library. The library will be unloaded /// when the program terminates. /// - /// It is safe to call this function multiple times for the same library. + /// It is safe to call this function multiple times for the same library, + /// though ownership is only taken if there was no error. /// /// \returns An empty \p DynamicLibrary if the library was already loaded. static DynamicLibrary addPermanentLibrary(void *handle, @@ -106,6 +107,8 @@ namespace sys { /// libraries. /// @brief Add searchable symbol/value pair. static void AddSymbol(StringRef symbolName, void *symbolValue); + + class HandleSet; }; } // End sys namespace diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h index 06e089ffa166f6efd03ee0ae473cbead75464aa4..f50d9b502dafb1e01fecab421ca8870054a5f4fd 100644 --- a/include/llvm/Support/Endian.h +++ b/include/llvm/Support/Endian.h @@ -14,27 +14,36 @@ #ifndef LLVM_SUPPORT_ENDIAN_H #define LLVM_SUPPORT_ENDIAN_H +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" - -#include +#include +#include +#include +#include +#include namespace llvm { namespace support { + enum endianness {big, little, native}; // These are named values for common alignments. enum {aligned = 0, unaligned = 1}; namespace detail { - /// \brief ::value is either alignment, or alignof(T) if alignment is 0. - template - struct PickAlignment { - enum { value = alignment == 0 ? alignof(T) : alignment }; - }; + +/// \brief ::value is either alignment, or alignof(T) if alignment is 0. +template +struct PickAlignment { + enum { value = alignment == 0 ? alignof(T) : alignment }; +}; + } // end namespace detail namespace endian { + constexpr endianness system_endianness() { return sys::IsBigEndianHost ? big : little; } @@ -190,9 +199,11 @@ inline void writeAtBitAlignment(void *memory, value_type value, &val[0], sizeof(value_type) * 2); } } + } // end namespace endian namespace detail { + template @@ -254,77 +265,78 @@ public: } // end namespace detail -typedef detail::packed_endian_specific_integral - ulittle16_t; -typedef detail::packed_endian_specific_integral - ulittle32_t; -typedef detail::packed_endian_specific_integral - ulittle64_t; - -typedef detail::packed_endian_specific_integral - little16_t; -typedef detail::packed_endian_specific_integral - little32_t; -typedef detail::packed_endian_specific_integral - little64_t; - -typedef detail::packed_endian_specific_integral - aligned_ulittle16_t; -typedef detail::packed_endian_specific_integral - aligned_ulittle32_t; -typedef detail::packed_endian_specific_integral - aligned_ulittle64_t; - -typedef detail::packed_endian_specific_integral - aligned_little16_t; -typedef detail::packed_endian_specific_integral - aligned_little32_t; -typedef detail::packed_endian_specific_integral - aligned_little64_t; - -typedef detail::packed_endian_specific_integral - ubig16_t; -typedef detail::packed_endian_specific_integral - ubig32_t; -typedef detail::packed_endian_specific_integral - ubig64_t; - -typedef detail::packed_endian_specific_integral - big16_t; -typedef detail::packed_endian_specific_integral - big32_t; -typedef detail::packed_endian_specific_integral - big64_t; - -typedef detail::packed_endian_specific_integral - aligned_ubig16_t; -typedef detail::packed_endian_specific_integral - aligned_ubig32_t; -typedef detail::packed_endian_specific_integral - aligned_ubig64_t; - -typedef detail::packed_endian_specific_integral - aligned_big16_t; -typedef detail::packed_endian_specific_integral - aligned_big32_t; -typedef detail::packed_endian_specific_integral - aligned_big64_t; - -typedef detail::packed_endian_specific_integral - unaligned_uint16_t; -typedef detail::packed_endian_specific_integral - unaligned_uint32_t; -typedef detail::packed_endian_specific_integral - unaligned_uint64_t; - -typedef detail::packed_endian_specific_integral - unaligned_int16_t; -typedef detail::packed_endian_specific_integral - unaligned_int32_t; -typedef detail::packed_endian_specific_integral - unaligned_int64_t; +using ulittle16_t = + detail::packed_endian_specific_integral; +using ulittle32_t = + detail::packed_endian_specific_integral; +using ulittle64_t = + detail::packed_endian_specific_integral; + +using little16_t = + detail::packed_endian_specific_integral; +using little32_t = + detail::packed_endian_specific_integral; +using little64_t = + detail::packed_endian_specific_integral; + +using aligned_ulittle16_t = + detail::packed_endian_specific_integral; +using aligned_ulittle32_t = + detail::packed_endian_specific_integral; +using aligned_ulittle64_t = + detail::packed_endian_specific_integral; + +using aligned_little16_t = + detail::packed_endian_specific_integral; +using aligned_little32_t = + detail::packed_endian_specific_integral; +using aligned_little64_t = + detail::packed_endian_specific_integral; + +using ubig16_t = + detail::packed_endian_specific_integral; +using ubig32_t = + detail::packed_endian_specific_integral; +using ubig64_t = + detail::packed_endian_specific_integral; + +using big16_t = + detail::packed_endian_specific_integral; +using big32_t = + detail::packed_endian_specific_integral; +using big64_t = + detail::packed_endian_specific_integral; + +using aligned_ubig16_t = + detail::packed_endian_specific_integral; +using aligned_ubig32_t = + detail::packed_endian_specific_integral; +using aligned_ubig64_t = + detail::packed_endian_specific_integral; + +using aligned_big16_t = + detail::packed_endian_specific_integral; +using aligned_big32_t = + detail::packed_endian_specific_integral; +using aligned_big64_t = + detail::packed_endian_specific_integral; + +using unaligned_uint16_t = + detail::packed_endian_specific_integral; +using unaligned_uint32_t = + detail::packed_endian_specific_integral; +using unaligned_uint64_t = + detail::packed_endian_specific_integral; + +using unaligned_int16_t = + detail::packed_endian_specific_integral; +using unaligned_int32_t = + detail::packed_endian_specific_integral; +using unaligned_int64_t = + detail::packed_endian_specific_integral; namespace endian { + template inline T read(const void *P, endianness E) { return read(P, E); } @@ -394,8 +406,10 @@ inline void write64le(void *P, uint64_t V) { write64(P, V); } inline void write16be(void *P, uint16_t V) { write16(P, V); } inline void write32be(void *P, uint32_t V) { write32(P, V); } inline void write64be(void *P, uint64_t V) { write64(P, V); } + } // end namespace endian + } // end namespace support } // end namespace llvm -#endif +#endif // LLVM_SUPPORT_ENDIAN_H diff --git a/include/llvm/Support/Error.h b/include/llvm/Support/Error.h index 21664d4b71557cf6c5319565c0d7da5db77d90d5..1e27e0b821f055237e9104f83a971e4446b29ba7 100644 --- a/include/llvm/Support/Error.h +++ b/include/llvm/Support/Error.h @@ -1,4 +1,4 @@ -//===----- llvm/Support/Error.h - Recoverable error handling ----*- C++ -*-===// +//===- llvm/Support/Error.h - Recoverable error handling --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,6 +22,7 @@ #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/raw_ostream.h" #include @@ -64,6 +65,12 @@ public: /// using std::error_code. It will be removed in the future. virtual std::error_code convertToErrorCode() const = 0; + // Returns the class ID for this type. + static const void *classID() { return &ID; } + + // Returns the class ID for the dynamic type of this ErrorInfoBase instance. + virtual const void *dynamicClassID() const = 0; + // Check whether this instance is a subclass of the class identified by // ClassID. virtual bool isA(const void *const ClassID) const { @@ -75,9 +82,6 @@ public: return isA(ErrorInfoT::classID()); } - // Returns the class ID for this type. - static const void *classID() { return &ID; } - private: virtual void anchor(); @@ -164,7 +168,7 @@ class LLVM_NODISCARD Error { protected: /// Create a success value. Prefer using 'Error::success()' for readability - Error() : Payload(nullptr) { + Error() { setPtr(nullptr); setChecked(false); } @@ -179,7 +183,7 @@ public: /// Move-construct an error value. The newly constructed error is considered /// unchecked, even if the source error had been checked. The original error /// becomes a checked Success value, regardless of its original state. - Error(Error &&Other) : Payload(nullptr) { + Error(Error &&Other) { setChecked(true); *this = std::move(Other); } @@ -233,6 +237,14 @@ public: return getPtr() && getPtr()->isA(ErrT::classID()); } + /// Returns the dynamic class id of this error, or null if this is a success + /// value. + const void* dynamicClassID() const { + if (!getPtr()) + return nullptr; + return getPtr()->dynamicClassID(); + } + private: void assertIsChecked() { #if LLVM_ENABLE_ABI_BREAKING_CHECKS @@ -288,7 +300,7 @@ private: return Tmp; } - ErrorInfoBase *Payload; + ErrorInfoBase *Payload = nullptr; }; /// Subclass of Error for the sole purpose of identifying the success path in @@ -316,11 +328,13 @@ template Error make_error(ArgTs &&... Args) { template class ErrorInfo : public ParentErrT { public: + static const void *classID() { return &ThisErrT::ID; } + + const void *dynamicClassID() const override { return &ThisErrT::ID; } + bool isA(const void *const ClassID) const override { return ClassID == classID() || ParentErrT::isA(ClassID); } - - static const void *classID() { return &ThisErrT::ID; } }; /// Special ErrorInfo subclass representing a list of ErrorInfos. @@ -629,21 +643,24 @@ private: /// takeError(). It also adds an bool errorIsA() method for testing the /// error class type. template class LLVM_NODISCARD Expected { + template friend class ExpectedAsOutParameter; template friend class Expected; + static const bool isRef = std::is_reference::value; - typedef ReferenceStorage::type> wrap; - typedef std::unique_ptr error_type; + using wrap = ReferenceStorage::type>; + + using error_type = std::unique_ptr; public: - typedef typename std::conditional::type storage_type; - typedef T value_type; + using storage_type = typename std::conditional::type; + using value_type = T; private: - typedef typename std::remove_reference::type &reference; - typedef const typename std::remove_reference::type &const_reference; - typedef typename std::remove_reference::type *pointer; - typedef const typename std::remove_reference::type *const_pointer; + using reference = typename std::remove_reference::type &; + using const_reference = const typename std::remove_reference::type &; + using pointer = typename std::remove_reference::type *; + using const_pointer = const typename std::remove_reference::type *; public: /// Create an Expected error value from the given Error. @@ -737,7 +754,7 @@ public: /// \brief Check that this Expected is an error of type ErrT. template bool errorIsA() const { - return HasError && getErrorStorage()->template isA(); + return HasError && (*getErrorStorage())->template isA(); } /// \brief Take ownership of the stored error. @@ -832,6 +849,18 @@ private: return reinterpret_cast(ErrorStorage.buffer); } + const error_type *getErrorStorage() const { + assert(HasError && "Cannot get error when a value exists!"); + return reinterpret_cast(ErrorStorage.buffer); + } + + // Used by ExpectedAsOutParameter to reset the checked flag. + void setUnchecked() { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + Unchecked = true; +#endif + } + void assertIsChecked() { #if LLVM_ENABLE_ABI_BREAKING_CHECKS if (Unchecked) { @@ -858,6 +887,27 @@ private: #endif }; +/// Helper for Expecteds used as out-parameters. +/// +/// See ErrorAsOutParameter. +template +class ExpectedAsOutParameter { +public: + ExpectedAsOutParameter(Expected *ValOrErr) + : ValOrErr(ValOrErr) { + if (ValOrErr) + (void)!!*ValOrErr; + } + + ~ExpectedAsOutParameter() { + if (ValOrErr) + ValOrErr->setUnchecked(); + } + +private: + Expected *ValOrErr; +}; + /// This class wraps a std::error_code in a Error. /// /// This is useful if you're writing an interface that returns a Error @@ -926,6 +976,8 @@ public: void log(raw_ostream &OS) const override; std::error_code convertToErrorCode() const override; + const std::string &getMessage() const { return Msg; } + private: std::string Msg; std::error_code EC; diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h index 877f4063cd23277c555436e6ae542bd673370ea7..061fb65db465cb7594afe317304cacc37214a9d2 100644 --- a/include/llvm/Support/ErrorOr.h +++ b/include/llvm/Support/ErrorOr.h @@ -16,13 +16,14 @@ #ifndef LLVM_SUPPORT_ERROROR_H #define LLVM_SUPPORT_ERROROR_H -#include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/AlignOf.h" #include #include #include +#include namespace llvm { + /// \brief Stores a reference that can be changed. template class ReferenceStorage { @@ -67,17 +68,19 @@ public: template class ErrorOr { template friend class ErrorOr; + static const bool isRef = std::is_reference::value; - typedef ReferenceStorage::type> wrap; + + using wrap = ReferenceStorage::type>; public: - typedef typename std::conditional::type storage_type; + using storage_type = typename std::conditional::type; private: - typedef typename std::remove_reference::type &reference; - typedef const typename std::remove_reference::type &const_reference; - typedef typename std::remove_reference::type *pointer; - typedef const typename std::remove_reference::type *const_pointer; + using reference = typename std::remove_reference::type &; + using const_reference = const typename std::remove_reference::type &; + using pointer = typename std::remove_reference::type *; + using const_pointer = const typename std::remove_reference::type *; public: template @@ -282,6 +285,7 @@ typename std::enable_if::value || operator==(const ErrorOr &Err, E Code) { return Err.getError() == Code; } + } // end namespace llvm #endif // LLVM_SUPPORT_ERROROR_H diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 29515c231bc46f989de37a7a1bdece459c12acb8..21c5fcdb7145057534a70ed33e6f76e062e2685f 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -116,7 +116,9 @@ inline perms &operator&=(perms &l, perms r) { return l; } inline perms operator~(perms x) { - return static_cast(~static_cast(x)); + // Avoid UB by explicitly truncating the (unsigned) ~ result. + return static_cast( + static_cast(~static_cast(x))); } class UniqueID { @@ -231,50 +233,6 @@ public: void permissions(perms p) { Perms = p; } }; -/// file_magic - An "enum class" enumeration of file types based on magic (the first -/// N bytes of the file). -struct file_magic { - enum Impl { - unknown = 0, ///< Unrecognized file - bitcode, ///< Bitcode file - archive, ///< ar style archive file - elf, ///< ELF Unknown type - elf_relocatable, ///< ELF Relocatable object file - elf_executable, ///< ELF Executable image - elf_shared_object, ///< ELF dynamically linked shared lib - elf_core, ///< ELF core image - macho_object, ///< Mach-O Object file - macho_executable, ///< Mach-O Executable - macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM - macho_core, ///< Mach-O Core File - macho_preload_executable, ///< Mach-O Preloaded Executable - macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib - macho_dynamic_linker, ///< The Mach-O dynamic linker - macho_bundle, ///< Mach-O Bundle file - macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub - macho_dsym_companion, ///< Mach-O dSYM companion file - macho_kext_bundle, ///< Mach-O kext bundle file - macho_universal_binary, ///< Mach-O universal binary - coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file - coff_object, ///< COFF object file - coff_import_library, ///< COFF import library - pecoff_executable, ///< PECOFF executable file - windows_resource, ///< Windows compiled resource file (.rc) - wasm_object ///< WebAssembly Object file - }; - - bool is_object() const { - return V != unknown; - } - - file_magic() = default; - file_magic(Impl V) : V(V) {} - operator Impl() const { return V; } - -private: - Impl V = unknown; -}; - /// @} /// @name Physical Operators /// @{ @@ -768,17 +726,6 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD, std::error_code openFileForRead(const Twine &Name, int &ResultFD, SmallVectorImpl *RealPath = nullptr); -/// @brief Identify the type of a binary file based on how magical it is. -file_magic identify_magic(StringRef magic); - -/// @brief Get and identify \a path's type based on its content. -/// -/// @param path Input path. -/// @param result Set to the type of file, or file_magic::unknown. -/// @returns errc::success if result has been successfully set, otherwise a -/// platform-specific error_code. -std::error_code identify_magic(const Twine &path, file_magic &result); - std::error_code getUniqueID(const Twine Path, UniqueID &Result); /// @brief Get disk space usage information. diff --git a/include/llvm/Support/FormatAdapters.h b/include/llvm/Support/FormatAdapters.h index 698e134b328deaba09e1fc791467f786b602bd27..197beb7363dfcf3a5085c5103f35b3a4f656bfaa 100644 --- a/include/llvm/Support/FormatAdapters.h +++ b/include/llvm/Support/FormatAdapters.h @@ -28,14 +28,16 @@ namespace detail { template class AlignAdapter final : public FormatAdapter { AlignStyle Where; size_t Amount; + char Fill; public: - AlignAdapter(T &&Item, AlignStyle Where, size_t Amount) - : FormatAdapter(std::forward(Item)), Where(Where), Amount(Amount) {} + AlignAdapter(T &&Item, AlignStyle Where, size_t Amount, char Fill) + : FormatAdapter(std::forward(Item)), Where(Where), Amount(Amount), + Fill(Fill) {} void format(llvm::raw_ostream &Stream, StringRef Style) { auto Adapter = detail::build_format_adapter(std::forward(this->Item)); - FmtAlign(Adapter, Where, Amount).format(Stream, Style); + FmtAlign(Adapter, Where, Amount, Fill).format(Stream, Style); } }; @@ -72,8 +74,9 @@ public: } template -detail::AlignAdapter fmt_align(T &&Item, AlignStyle Where, size_t Amount) { - return detail::AlignAdapter(std::forward(Item), Where, Amount); +detail::AlignAdapter fmt_align(T &&Item, AlignStyle Where, size_t Amount, + char Fill = ' ') { + return detail::AlignAdapter(std::forward(Item), Where, Amount, Fill); } template diff --git a/include/llvm/Support/FormatCommon.h b/include/llvm/Support/FormatCommon.h index a8c5fdeb6bffaa57a8f21be7e1757f040a71bb8d..36fbad296c3f2bd1b83ff6a9fdd2724027d077b9 100644 --- a/include/llvm/Support/FormatCommon.h +++ b/include/llvm/Support/FormatCommon.h @@ -21,9 +21,11 @@ struct FmtAlign { detail::format_adapter &Adapter; AlignStyle Where; size_t Amount; + char Fill; - FmtAlign(detail::format_adapter &Adapter, AlignStyle Where, size_t Amount) - : Adapter(Adapter), Where(Where), Amount(Amount) {} + FmtAlign(detail::format_adapter &Adapter, AlignStyle Where, size_t Amount, + char Fill = ' ') + : Adapter(Adapter), Where(Where), Amount(Amount), Fill(Fill) {} void format(raw_ostream &S, StringRef Options) { // If we don't need to align, we can format straight into the underlying @@ -48,21 +50,27 @@ struct FmtAlign { switch (Where) { case AlignStyle::Left: S << Item; - S.indent(PadAmount); + fill(S, PadAmount); break; case AlignStyle::Center: { size_t X = PadAmount / 2; - S.indent(X); + fill(S, X); S << Item; - S.indent(PadAmount - X); + fill(S, PadAmount - X); break; } default: - S.indent(PadAmount); + fill(S, PadAmount); S << Item; break; } } + +private: + void fill(llvm::raw_ostream &S, uint32_t Count) { + for (uint32_t I = 0; I < Count; ++I) + S << Fill; + } }; } diff --git a/include/llvm/Support/FormatVariadic.h b/include/llvm/Support/FormatVariadic.h index 3a4668687cc94b47cc9e5d2019faf22cdc027792..c1153e84dfb569d5f52f69d94382a476b84d6b5d 100644 --- a/include/llvm/Support/FormatVariadic.h +++ b/include/llvm/Support/FormatVariadic.h @@ -27,8 +27,8 @@ #define LLVM_SUPPORT_FORMATVARIADIC_H #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatCommon.h" #include "llvm/Support/FormatProviders.h" diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h index 73fddca8e35bbbccf5f3718bf0bfbf7e3e47af86..268c53c50252fbb4eb4cc82652d8f5c448a74dc0 100644 --- a/include/llvm/Support/GCOV.h +++ b/include/llvm/Support/GCOV.h @@ -16,12 +16,12 @@ #define LLVM_SUPPORT_GCOV_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h index 20f3ffdf3aab79a85b6f5ebdd93be1a25d5e844d..80a2dfcbad889ac1eab81d5601ea6caa5dcf4547 100644 --- a/include/llvm/Support/GenericDomTree.h +++ b/include/llvm/Support/GenericDomTree.h @@ -26,9 +26,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -276,32 +276,25 @@ protected: // NewBB is split and now it has one successor. Update dominator tree to // reflect this change. - template - void Split(DominatorTreeBaseByGraphTraits &DT, - typename GraphT::NodeRef NewBB) { + template + void Split(typename GraphTraits::NodeRef NewBB) { + using GraphT = GraphTraits; + using NodeRef = typename GraphT::NodeRef; assert(std::distance(GraphT::child_begin(NewBB), GraphT::child_end(NewBB)) == 1 && "NewBB should have a single successor!"); - typename GraphT::NodeRef NewBBSucc = *GraphT::child_begin(NewBB); + NodeRef NewBBSucc = *GraphT::child_begin(NewBB); - std::vector PredBlocks; - typedef GraphTraits> InvTraits; - for (typename InvTraits::ChildIteratorType - PI = InvTraits::child_begin(NewBB), - PE = InvTraits::child_end(NewBB); - PI != PE; ++PI) - PredBlocks.push_back(*PI); + std::vector PredBlocks; + for (const auto &Pred : children>(NewBB)) + PredBlocks.push_back(Pred); assert(!PredBlocks.empty() && "No predblocks?"); bool NewBBDominatesNewBBSucc = true; - for (typename InvTraits::ChildIteratorType - PI = InvTraits::child_begin(NewBBSucc), - E = InvTraits::child_end(NewBBSucc); - PI != E; ++PI) { - typename InvTraits::NodeRef ND = *PI; - if (ND != NewBB && !DT.dominates(NewBBSucc, ND) && - DT.isReachableFromEntry(ND)) { + for (const auto &Pred : children>(NewBBSucc)) { + if (Pred != NewBB && !dominates(NewBBSucc, Pred) && + isReachableFromEntry(Pred)) { NewBBDominatesNewBBSucc = false; break; } @@ -312,7 +305,7 @@ protected: NodeT *NewBBIDom = nullptr; unsigned i = 0; for (i = 0; i < PredBlocks.size(); ++i) - if (DT.isReachableFromEntry(PredBlocks[i])) { + if (isReachableFromEntry(PredBlocks[i])) { NewBBIDom = PredBlocks[i]; break; } @@ -324,18 +317,18 @@ protected: return; for (i = i + 1; i < PredBlocks.size(); ++i) { - if (DT.isReachableFromEntry(PredBlocks[i])) - NewBBIDom = DT.findNearestCommonDominator(NewBBIDom, PredBlocks[i]); + if (isReachableFromEntry(PredBlocks[i])) + NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]); } // Create the new dominator tree node... and set the idom of NewBB. - DomTreeNodeBase *NewBBNode = DT.addNewBlock(NewBB, NewBBIDom); + DomTreeNodeBase *NewBBNode = addNewBlock(NewBB, NewBBIDom); // If NewBB strictly dominates other blocks, then it is now the immediate // dominator of NewBBSucc. Update the dominator tree as appropriate. if (NewBBDominatesNewBBSucc) { - DomTreeNodeBase *NewBBSuccNode = DT.getNode(NewBBSucc); - DT.changeImmediateDominator(NewBBSuccNode, NewBBNode); + DomTreeNodeBase *NewBBSuccNode = getNode(NewBBSucc); + changeImmediateDominator(NewBBSuccNode, NewBBNode); } } @@ -379,7 +372,7 @@ public: if (DomTreeNodes.size() != OtherDomTreeNodes.size()) return true; - for (const auto &DomTreeNode : this->DomTreeNodes) { + for (const auto &DomTreeNode : DomTreeNodes) { NodeT *BB = DomTreeNode.first; typename DomTreeNodeMapType::const_iterator OI = OtherDomTreeNodes.find(BB); @@ -663,10 +656,9 @@ public: /// tree to reflect this change. void splitBlock(NodeT *NewBB) { if (this->IsPostDominators) - this->Split, GraphTraits>>(*this, - NewBB); + Split>(NewBB); else - this->Split>(*this, NewBB); + Split(NewBB); } /// print - Convert to human readable form @@ -677,7 +669,7 @@ public: o << "Inorder PostDominator Tree: "; else o << "Inorder Dominator Tree: "; - if (!this->DFSInfoValid) + if (!DFSInfoValid) o << "DFSNumbers invalid: " << SlowQueries << " slow queries."; o << "\n"; @@ -712,12 +704,12 @@ protected: // immediate dominator. NodeT *IDom = getIDom(BB); - assert(IDom || this->DomTreeNodes[nullptr]); + assert(IDom || DomTreeNodes[nullptr]); DomTreeNodeBase *IDomNode = getNodeForBlock(IDom); // Add a new tree node for this NodeT, and link it as a child of // IDomNode - return (this->DomTreeNodes[BB] = IDomNode->addChild( + return (DomTreeNodes[BB] = IDomNode->addChild( llvm::make_unique>(BB, IDomNode))).get(); } @@ -780,7 +772,7 @@ public: template void recalculate(FT &F) { typedef GraphTraits TraitsTy; reset(); - this->Vertex.push_back(nullptr); + Vertex.push_back(nullptr); if (!this->IsPostDominators) { // Initialize root diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h index 7555d5b31a8d6a3571c9ebee5da8572ce46fcba0..c318fea5365119ec80ec48a614eeda6e9a5002e4 100644 --- a/include/llvm/Support/GraphWriter.h +++ b/include/llvm/Support/GraphWriter.h @@ -143,10 +143,9 @@ public: void writeNodes() { // Loop over the graph, printing it out... - for (node_iterator I = GTraits::nodes_begin(G), E = GTraits::nodes_end(G); - I != E; ++I) - if (!isNodeHidden(*I)) - writeNode(*I); + for (const auto Node : nodes(G)) + if (!isNodeHidden(Node)) + writeNode(Node); } bool isNodeHidden(NodeRef Node) { diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h new file mode 100644 index 0000000000000000000000000000000000000000..2c77d40559b9eb02a3844074ef6f90c5808ff4e1 --- /dev/null +++ b/include/llvm/Support/KnownBits.h @@ -0,0 +1,200 @@ +//===- llvm/Support/KnownBits.h - Stores known zeros/ones -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a class for representing known zeros and ones used by +// computeKnownBits. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_KNOWNBITS_H +#define LLVM_SUPPORT_KNOWNBITS_H + +#include "llvm/ADT/APInt.h" + +namespace llvm { + +// Struct for tracking the known zeros and ones of a value. +struct KnownBits { + APInt Zero; + APInt One; + +private: + // Internal constructor for creating a ConstantRange from two APInts. + KnownBits(APInt Zero, APInt One) + : Zero(std::move(Zero)), One(std::move(One)) {} + +public: + // Default construct Zero and One. + KnownBits() {} + + /// Create a known bits object of BitWidth bits initialized to unknown. + KnownBits(unsigned BitWidth) : Zero(BitWidth, 0), One(BitWidth, 0) {} + + /// Get the bit width of this value. + unsigned getBitWidth() const { + assert(Zero.getBitWidth() == One.getBitWidth() && + "Zero and One should have the same width!"); + return Zero.getBitWidth(); + } + + /// Returns true if there is conflicting information. + bool hasConflict() const { return Zero.intersects(One); } + + /// Returns true if we know the value of all bits. + bool isConstant() const { + assert(!hasConflict() && "KnownBits conflict!"); + return Zero.countPopulation() + One.countPopulation() == getBitWidth(); + } + + /// Returns the value when all bits have a known value. This just returns One + /// with a protective assertion. + const APInt &getConstant() const { + assert(isConstant() && "Can only get value when all bits are known"); + return One; + } + + /// Returns true if we don't know any bits. + bool isUnknown() const { return Zero.isNullValue() && One.isNullValue(); } + + /// Resets the known state of all bits. + void resetAll() { + Zero.clearAllBits(); + One.clearAllBits(); + } + + /// Returns true if value is all zero. + bool isZero() const { + assert(!hasConflict() && "KnownBits conflict!"); + return Zero.isAllOnesValue(); + } + + /// Returns true if value is all one bits. + bool isAllOnes() const { + assert(!hasConflict() && "KnownBits conflict!"); + return One.isAllOnesValue(); + } + + /// Make all bits known to be zero and discard any previous information. + void setAllZero() { + Zero.setAllBits(); + One.clearAllBits(); + } + + /// Make all bits known to be one and discard any previous information. + void setAllOnes() { + Zero.clearAllBits(); + One.setAllBits(); + } + + /// Returns true if this value is known to be negative. + bool isNegative() const { return One.isSignBitSet(); } + + /// Returns true if this value is known to be non-negative. + bool isNonNegative() const { return Zero.isSignBitSet(); } + + /// Make this value negative. + void makeNegative() { + assert(!isNonNegative() && "Can't make a non-negative value negative"); + One.setSignBit(); + } + + /// Make this value negative. + void makeNonNegative() { + assert(!isNegative() && "Can't make a negative value non-negative"); + Zero.setSignBit(); + } + + /// Truncate the underlying known Zero and One bits. This is equivalent + /// to truncating the value we're tracking. + KnownBits trunc(unsigned BitWidth) { + return KnownBits(Zero.trunc(BitWidth), One.trunc(BitWidth)); + } + + /// Zero extends the underlying known Zero and One bits. This is equivalent + /// to zero extending the value we're tracking. + KnownBits zext(unsigned BitWidth) { + return KnownBits(Zero.zext(BitWidth), One.zext(BitWidth)); + } + + /// Sign extends the underlying known Zero and One bits. This is equivalent + /// to sign extending the value we're tracking. + KnownBits sext(unsigned BitWidth) { + return KnownBits(Zero.sext(BitWidth), One.sext(BitWidth)); + } + + /// Zero extends or truncates the underlying known Zero and One bits. This is + /// equivalent to zero extending or truncating the value we're tracking. + KnownBits zextOrTrunc(unsigned BitWidth) { + return KnownBits(Zero.zextOrTrunc(BitWidth), One.zextOrTrunc(BitWidth)); + } + + /// Returns the minimum number of trailing zero bits. + unsigned countMinTrailingZeros() const { + return Zero.countTrailingOnes(); + } + + /// Returns the minimum number of trailing one bits. + unsigned countMinTrailingOnes() const { + return One.countTrailingOnes(); + } + + /// Returns the minimum number of leading zero bits. + unsigned countMinLeadingZeros() const { + return Zero.countLeadingOnes(); + } + + /// Returns the minimum number of leading one bits. + unsigned countMinLeadingOnes() const { + return One.countLeadingOnes(); + } + + /// Returns the number of times the sign bit is replicated into the other + /// bits. + unsigned countMinSignBits() const { + if (isNonNegative()) + return countMinLeadingZeros(); + if (isNegative()) + return countMinLeadingOnes(); + return 0; + } + + /// Returns the maximum number of trailing zero bits possible. + unsigned countMaxTrailingZeros() const { + return One.countTrailingZeros(); + } + + /// Returns the maximum number of trailing one bits possible. + unsigned countMaxTrailingOnes() const { + return Zero.countTrailingZeros(); + } + + /// Returns the maximum number of leading zero bits possible. + unsigned countMaxLeadingZeros() const { + return One.countLeadingZeros(); + } + + /// Returns the maximum number of leading one bits possible. + unsigned countMaxLeadingOnes() const { + return Zero.countLeadingZeros(); + } + + /// Returns the number of bits known to be one. + unsigned countMinPopulation() const { + return One.countPopulation(); + } + + /// Returns the maximum number of bits that could be one. + unsigned countMaxPopulation() const { + return getBitWidth() - Zero.countPopulation(); + } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h index ff775f3b7b364d8415c3dc94a0fa87efe4d3e994..29640db692181950c39024b5cbf72efe11cc41b1 100644 --- a/include/llvm/Support/LEB128.h +++ b/include/llvm/Support/LEB128.h @@ -45,8 +45,7 @@ inline void encodeSLEB128(int64_t Value, raw_ostream &OS, /// Utility function to encode a SLEB128 value to a buffer. Returns /// the length in bytes of the encoded value. -inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, - unsigned Padding = 0) { +inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned Padding = 0) { uint8_t *orig_p = p; bool More; do { @@ -111,7 +110,6 @@ inline unsigned encodeULEB128(uint64_t Value, uint8_t *p, return (unsigned)(p - orig_p); } - /// Utility function to decode a ULEB128 value. inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr, const uint8_t *end = nullptr, @@ -119,19 +117,19 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr, const uint8_t *orig_p = p; uint64_t Value = 0; unsigned Shift = 0; - if(error) + if (error) *error = nullptr; do { - if(end && p == end){ - if(error) + if (end && p == end) { + if (error) *error = "malformed uleb128, extends past end"; if (n) *n = (unsigned)(p - orig_p); return 0; } uint64_t Slice = *p & 0x7f; - if(Shift >= 64 || Slice << Shift >> Shift != Slice){ - if(error) + if (Shift >= 64 || Slice << Shift >> Shift != Slice) { + if (error) *error = "uleb128 too big for uint64"; if (n) *n = (unsigned)(p - orig_p); @@ -154,15 +152,15 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr, unsigned Shift = 0; uint8_t Byte; do { - if(end && p == end){ - if(error) + if (end && p == end) { + if (error) *error = "malformed sleb128, extends past end"; if (n) *n = (unsigned)(p - orig_p); return 0; } Byte = *p++; - Value |= ((Byte & 0x7f) << Shift); + Value |= (int64_t(Byte & 0x7f) << Shift); Shift += 7; } while (Byte >= 128); // Sign extend negative numbers. @@ -173,13 +171,12 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr, return Value; } - /// Utility function to get the size of the ULEB128-encoded value. extern unsigned getULEB128Size(uint64_t Value); /// Utility function to get the size of the SLEB128-encoded value. extern unsigned getSLEB128Size(int64_t Value); -} // namespace llvm +} // namespace llvm -#endif // LLVM_SYSTEM_LEB128_H +#endif // LLVM_SYSTEM_LEB128_H diff --git a/include/llvm/Support/LowLevelTypeImpl.h b/include/llvm/Support/LowLevelTypeImpl.h index 02df4d806f13bfd163cf85f16b3d88925db56359..c79dd0c295079cd36dc1dced4687313c28b8c8a2 100644 --- a/include/llvm/Support/LowLevelTypeImpl.h +++ b/include/llvm/Support/LowLevelTypeImpl.h @@ -27,9 +27,9 @@ #ifndef LLVM_SUPPORT_LOWLEVELTYPEIMPL_H #define LLVM_SUPPORT_LOWLEVELTYPEIMPL_H -#include #include "llvm/ADT/DenseMapInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include namespace llvm { @@ -39,100 +39,123 @@ class raw_ostream; class LLT { public: - enum TypeKind : uint16_t { - Invalid, - Scalar, - Pointer, - Vector, - }; - /// Get a low-level scalar or aggregate "bag of bits". static LLT scalar(unsigned SizeInBits) { assert(SizeInBits > 0 && "invalid scalar size"); - return LLT{Scalar, 1, SizeInBits}; + return LLT{/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0, + SizeInBits, /*AddressSpace=*/0}; } /// Get a low-level pointer in the given address space (defaulting to 0). static LLT pointer(uint16_t AddressSpace, unsigned SizeInBits) { - return LLT{Pointer, AddressSpace, SizeInBits}; + assert(SizeInBits > 0 && "invalid pointer size"); + return LLT{/*isPointer=*/true, /*isVector=*/false, /*NumElements=*/0, + SizeInBits, AddressSpace}; } /// Get a low-level vector of some number of elements and element width. /// \p NumElements must be at least 2. static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits) { assert(NumElements > 1 && "invalid number of vector elements"); - return LLT{Vector, NumElements, ScalarSizeInBits}; + assert(ScalarSizeInBits > 0 && "invalid vector element size"); + return LLT{/*isPointer=*/false, /*isVector=*/true, NumElements, + ScalarSizeInBits, /*AddressSpace=*/0}; } /// Get a low-level vector of some number of elements and element type. static LLT vector(uint16_t NumElements, LLT ScalarTy) { assert(NumElements > 1 && "invalid number of vector elements"); - assert(ScalarTy.isScalar() && "invalid vector element type"); - return LLT{Vector, NumElements, ScalarTy.getSizeInBits()}; + assert(!ScalarTy.isVector() && "invalid vector element type"); + return LLT{ScalarTy.isPointer(), /*isVector=*/true, NumElements, + ScalarTy.getSizeInBits(), + ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0}; } - explicit LLT(TypeKind Kind, uint16_t NumElements, unsigned SizeInBits) - : SizeInBits(SizeInBits), ElementsOrAddrSpace(NumElements), Kind(Kind) { - assert((Kind != Vector || ElementsOrAddrSpace > 1) && - "invalid number of vector elements"); + explicit LLT(bool isPointer, bool isVector, uint16_t NumElements, + unsigned SizeInBits, unsigned AddressSpace) { + init(isPointer, isVector, NumElements, SizeInBits, AddressSpace); } - - explicit LLT() : SizeInBits(0), ElementsOrAddrSpace(0), Kind(Invalid) {} + explicit LLT() : IsPointer(false), IsVector(false), RawData(0) {} explicit LLT(MVT VT); - bool isValid() const { return Kind != Invalid; } + bool isValid() const { return RawData != 0; } - bool isScalar() const { return Kind == Scalar; } + bool isScalar() const { return isValid() && !IsPointer && !IsVector; } - bool isPointer() const { return Kind == Pointer; } + bool isPointer() const { return isValid() && IsPointer && !IsVector; } - bool isVector() const { return Kind == Vector; } + bool isVector() const { return isValid() && IsVector; } /// Returns the number of elements in a vector LLT. Must only be called on /// vector types. uint16_t getNumElements() const { - assert(isVector() && "cannot get number of elements on scalar/aggregate"); - return ElementsOrAddrSpace; + assert(IsVector && "cannot get number of elements on scalar/aggregate"); + if (!IsPointer) + return getFieldValue(VectorElementsFieldInfo); + else + return getFieldValue(PointerVectorElementsFieldInfo); } /// Returns the total size of the type. Must only be called on sized types. unsigned getSizeInBits() const { if (isPointer() || isScalar()) - return SizeInBits; - return SizeInBits * ElementsOrAddrSpace; + return getScalarSizeInBits(); + return getScalarSizeInBits() * getNumElements(); } unsigned getScalarSizeInBits() const { - return SizeInBits; + assert(RawData != 0 && "Invalid Type"); + if (!IsVector) { + if (!IsPointer) + return getFieldValue(ScalarSizeFieldInfo); + else + return getFieldValue(PointerSizeFieldInfo); + } else { + if (!IsPointer) + return getFieldValue(VectorSizeFieldInfo); + else + return getFieldValue(PointerVectorSizeFieldInfo); + } } unsigned getAddressSpace() const { - assert(isPointer() && "cannot get address space of non-pointer type"); - return ElementsOrAddrSpace; + assert(RawData != 0 && "Invalid Type"); + assert(IsPointer && "cannot get address space of non-pointer type"); + if (!IsVector) + return getFieldValue(PointerAddressSpaceFieldInfo); + else + return getFieldValue(PointerVectorAddressSpaceFieldInfo); } /// Returns the vector's element type. Only valid for vector types. LLT getElementType() const { assert(isVector() && "cannot get element type of scalar/aggregate"); - return scalar(SizeInBits); + if (IsPointer) + return pointer(getAddressSpace(), getScalarSizeInBits()); + else + return scalar(getScalarSizeInBits()); } /// Get a low-level type with half the size of the original, by halving the /// size of the scalar type involved. For example `s32` will become `s16`, /// `<2 x s32>` will become `<2 x s16>`. LLT halfScalarSize() const { - assert(!isPointer() && getScalarSizeInBits() > 1 && + assert(!IsPointer && getScalarSizeInBits() > 1 && getScalarSizeInBits() % 2 == 0 && "cannot half size of this type"); - return LLT{Kind, ElementsOrAddrSpace, SizeInBits / 2}; + return LLT{/*isPointer=*/false, IsVector ? true : false, + IsVector ? getNumElements() : (uint16_t)0, + getScalarSizeInBits() / 2, /*AddressSpace=*/0}; } /// Get a low-level type with twice the size of the original, by doubling the /// size of the scalar type involved. For example `s32` will become `s64`, /// `<2 x s32>` will become `<2 x s64>`. LLT doubleScalarSize() const { - assert(!isPointer() && "cannot change size of this type"); - return LLT{Kind, ElementsOrAddrSpace, SizeInBits * 2}; + assert(!IsPointer && "cannot change size of this type"); + return LLT{/*isPointer=*/false, IsVector ? true : false, + IsVector ? getNumElements() : (uint16_t)0, + getScalarSizeInBits() * 2, /*AddressSpace=*/0}; } /// Get a low-level type with half the size of the original, by halving the @@ -140,13 +163,13 @@ public: /// a vector type with an even number of elements. For example `<4 x s32>` /// will become `<2 x s32>`, `<2 x s32>` will become `s32`. LLT halfElements() const { - assert(isVector() && ElementsOrAddrSpace % 2 == 0 && - "cannot half odd vector"); - if (ElementsOrAddrSpace == 2) - return scalar(SizeInBits); + assert(isVector() && getNumElements() % 2 == 0 && "cannot half odd vector"); + if (getNumElements() == 2) + return scalar(getScalarSizeInBits()); - return LLT{Vector, static_cast(ElementsOrAddrSpace / 2), - SizeInBits}; + return LLT{/*isPointer=*/false, /*isVector=*/true, + (uint16_t)(getNumElements() / 2), getScalarSizeInBits(), + /*AddressSpace=*/0}; } /// Get a low-level type with twice the size of the original, by doubling the @@ -154,25 +177,105 @@ public: /// a vector type. For example `<2 x s32>` will become `<4 x s32>`. Doubling /// the number of elements in sN produces <2 x sN>. LLT doubleElements() const { - assert(!isPointer() && "cannot double elements in pointer"); - return LLT{Vector, static_cast(ElementsOrAddrSpace * 2), - SizeInBits}; + return LLT{IsPointer ? true : false, /*isVector=*/true, + (uint16_t)(getNumElements() * 2), getScalarSizeInBits(), + IsPointer ? getAddressSpace() : 0}; } void print(raw_ostream &OS) const; bool operator==(const LLT &RHS) const { - return Kind == RHS.Kind && SizeInBits == RHS.SizeInBits && - ElementsOrAddrSpace == RHS.ElementsOrAddrSpace; + return IsPointer == RHS.IsPointer && IsVector == RHS.IsVector && + RHS.RawData == RawData; } bool operator!=(const LLT &RHS) const { return !(*this == RHS); } friend struct DenseMapInfo; + private: - unsigned SizeInBits; - uint16_t ElementsOrAddrSpace; - TypeKind Kind; + /// LLT is packed into 64 bits as follows: + /// isPointer : 1 + /// isVector : 1 + /// with 62 bits remaining for Kind-specific data, packed in bitfields + /// as described below. As there isn't a simple portable way to pack bits + /// into bitfields, here the different fields in the packed structure is + /// described in static const *Field variables. Each of these variables + /// is a 2-element array, with the first element describing the bitfield size + /// and the second element describing the bitfield offset. + typedef int BitFieldInfo[2]; + /// + /// This is how the bitfields are packed per Kind: + /// * Invalid: + /// gets encoded as RawData == 0, as that is an invalid encoding, since for + /// valid encodings, SizeInBits/SizeOfElement must be larger than 0. + /// * Non-pointer scalar (isPointer == 0 && isVector == 0): + /// SizeInBits: 32; + static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0}; + /// * Pointer (isPointer == 1 && isVector == 0): + /// SizeInBits: 16; + /// AddressSpace: 23; + static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0}; + static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{ + 23, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]}; + /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1): + /// NumElements: 16; + /// SizeOfElement: 32; + static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 0}; + static const constexpr BitFieldInfo VectorSizeFieldInfo{ + 32, VectorElementsFieldInfo[0] + VectorElementsFieldInfo[1]}; + /// * Vector-of-pointer (isPointer == 1 && isVector == 1): + /// NumElements: 16; + /// SizeOfElement: 16; + /// AddressSpace: 23; + static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0}; + static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{ + 16, + PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]}; + static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{ + 23, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]}; + + uint64_t IsPointer : 1; + uint64_t IsVector : 1; + uint64_t RawData : 62; + + static uint64_t getMask(const BitFieldInfo FieldInfo) { + const int FieldSizeInBits = FieldInfo[0]; + return (((uint64_t)1) << FieldSizeInBits) - 1; + } + static uint64_t maskAndShift(uint64_t Val, uint64_t Mask, uint8_t Shift) { + assert(Val <= Mask && "Value too large for field"); + return (Val & Mask) << Shift; + } + static uint64_t maskAndShift(uint64_t Val, const BitFieldInfo FieldInfo) { + return maskAndShift(Val, getMask(FieldInfo), FieldInfo[1]); + } + uint64_t getFieldValue(const BitFieldInfo FieldInfo) const { + return getMask(FieldInfo) & (RawData >> FieldInfo[1]); + } + + void init(bool IsPointer, bool IsVector, uint16_t NumElements, + unsigned SizeInBits, unsigned AddressSpace) { + this->IsPointer = IsPointer; + this->IsVector = IsVector; + if (!IsVector) { + if (!IsPointer) + RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo); + else + RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) | + maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo); + } else { + assert(NumElements > 1 && "invalid number of vector elements"); + if (!IsPointer) + RawData = maskAndShift(NumElements, VectorElementsFieldInfo) | + maskAndShift(SizeInBits, VectorSizeFieldInfo); + else + RawData = + maskAndShift(NumElements, PointerVectorElementsFieldInfo) | + maskAndShift(SizeInBits, PointerVectorSizeFieldInfo) | + maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo); + } + } }; inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) { @@ -182,14 +285,18 @@ inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) { template<> struct DenseMapInfo { static inline LLT getEmptyKey() { - return LLT{LLT::Invalid, 0, -1u}; + LLT Invalid; + Invalid.IsPointer = true; + return Invalid; } static inline LLT getTombstoneKey() { - return LLT{LLT::Invalid, 0, -2u}; + LLT Invalid; + Invalid.IsVector = true; + return Invalid; } static inline unsigned getHashValue(const LLT &Ty) { - uint64_t Val = ((uint64_t)Ty.SizeInBits << 32) | - ((uint64_t)Ty.ElementsOrAddrSpace << 16) | (uint64_t)Ty.Kind; + uint64_t Val = ((uint64_t)Ty.RawData) << 2 | ((uint64_t)Ty.IsPointer) << 1 | + ((uint64_t)Ty.IsVector); return DenseMapInfo::getHashValue(Val); } static bool isEqual(const LLT &LHS, const LLT &RHS) { diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h deleted file mode 100644 index 3d704292c260a13335abb11971e3b23da95a9e75..0000000000000000000000000000000000000000 --- a/include/llvm/Support/MachO.h +++ /dev/null @@ -1,2038 +0,0 @@ -//===-- llvm/Support/MachO.h - The MachO file format ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines manifest constants for the MachO object file format. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_MACHO_H -#define LLVM_SUPPORT_MACHO_H - -#include "llvm/Support/Compiler.h" -#include "llvm/Support/DataTypes.h" -#include "llvm/Support/Host.h" - -namespace llvm { - namespace MachO { - // Enums from - enum : uint32_t { - // Constants for the "magic" field in llvm::MachO::mach_header and - // llvm::MachO::mach_header_64 - MH_MAGIC = 0xFEEDFACEu, - MH_CIGAM = 0xCEFAEDFEu, - MH_MAGIC_64 = 0xFEEDFACFu, - MH_CIGAM_64 = 0xCFFAEDFEu, - FAT_MAGIC = 0xCAFEBABEu, - FAT_CIGAM = 0xBEBAFECAu, - FAT_MAGIC_64 = 0xCAFEBABFu, - FAT_CIGAM_64 = 0xBFBAFECAu - }; - - enum HeaderFileType { - // Constants for the "filetype" field in llvm::MachO::mach_header and - // llvm::MachO::mach_header_64 - MH_OBJECT = 0x1u, - MH_EXECUTE = 0x2u, - MH_FVMLIB = 0x3u, - MH_CORE = 0x4u, - MH_PRELOAD = 0x5u, - MH_DYLIB = 0x6u, - MH_DYLINKER = 0x7u, - MH_BUNDLE = 0x8u, - MH_DYLIB_STUB = 0x9u, - MH_DSYM = 0xAu, - MH_KEXT_BUNDLE = 0xBu - }; - - enum { - // Constant bits for the "flags" field in llvm::MachO::mach_header and - // llvm::MachO::mach_header_64 - MH_NOUNDEFS = 0x00000001u, - MH_INCRLINK = 0x00000002u, - MH_DYLDLINK = 0x00000004u, - MH_BINDATLOAD = 0x00000008u, - MH_PREBOUND = 0x00000010u, - MH_SPLIT_SEGS = 0x00000020u, - MH_LAZY_INIT = 0x00000040u, - MH_TWOLEVEL = 0x00000080u, - MH_FORCE_FLAT = 0x00000100u, - MH_NOMULTIDEFS = 0x00000200u, - MH_NOFIXPREBINDING = 0x00000400u, - MH_PREBINDABLE = 0x00000800u, - MH_ALLMODSBOUND = 0x00001000u, - MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000u, - MH_CANONICAL = 0x00004000u, - MH_WEAK_DEFINES = 0x00008000u, - MH_BINDS_TO_WEAK = 0x00010000u, - MH_ALLOW_STACK_EXECUTION = 0x00020000u, - MH_ROOT_SAFE = 0x00040000u, - MH_SETUID_SAFE = 0x00080000u, - MH_NO_REEXPORTED_DYLIBS = 0x00100000u, - MH_PIE = 0x00200000u, - MH_DEAD_STRIPPABLE_DYLIB = 0x00400000u, - MH_HAS_TLV_DESCRIPTORS = 0x00800000u, - MH_NO_HEAP_EXECUTION = 0x01000000u, - MH_APP_EXTENSION_SAFE = 0x02000000u - }; - - enum : uint32_t { - // Flags for the "cmd" field in llvm::MachO::load_command - LC_REQ_DYLD = 0x80000000u - }; - -#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \ - LCName = LCValue, - - enum LoadCommandType : uint32_t { - #include "llvm/Support/MachO.def" - }; - -#undef HANDLE_LOAD_COMMAND - - enum : uint32_t { - // Constant bits for the "flags" field in llvm::MachO::segment_command - SG_HIGHVM = 0x1u, - SG_FVMLIB = 0x2u, - SG_NORELOC = 0x4u, - SG_PROTECTED_VERSION_1 = 0x8u, - - // Constant masks for the "flags" field in llvm::MachO::section and - // llvm::MachO::section_64 - SECTION_TYPE = 0x000000ffu, // SECTION_TYPE - SECTION_ATTRIBUTES = 0xffffff00u, // SECTION_ATTRIBUTES - SECTION_ATTRIBUTES_USR = 0xff000000u, // SECTION_ATTRIBUTES_USR - SECTION_ATTRIBUTES_SYS = 0x00ffff00u // SECTION_ATTRIBUTES_SYS - }; - - /// These are the section type and attributes fields. A MachO section can - /// have only one Type, but can have any of the attributes specified. - enum SectionType : uint32_t { - // Constant masks for the "flags[7:0]" field in llvm::MachO::section and - // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE) - - /// S_REGULAR - Regular section. - S_REGULAR = 0x00u, - /// S_ZEROFILL - Zero fill on demand section. - S_ZEROFILL = 0x01u, - /// S_CSTRING_LITERALS - Section with literal C strings. - S_CSTRING_LITERALS = 0x02u, - /// S_4BYTE_LITERALS - Section with 4 byte literals. - S_4BYTE_LITERALS = 0x03u, - /// S_8BYTE_LITERALS - Section with 8 byte literals. - S_8BYTE_LITERALS = 0x04u, - /// S_LITERAL_POINTERS - Section with pointers to literals. - S_LITERAL_POINTERS = 0x05u, - /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers. - S_NON_LAZY_SYMBOL_POINTERS = 0x06u, - /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers. - S_LAZY_SYMBOL_POINTERS = 0x07u, - /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in - /// the Reserved2 field. - S_SYMBOL_STUBS = 0x08u, - /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for - /// initialization. - S_MOD_INIT_FUNC_POINTERS = 0x09u, - /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for - /// termination. - S_MOD_TERM_FUNC_POINTERS = 0x0au, - /// S_COALESCED - Section contains symbols that are to be coalesced. - S_COALESCED = 0x0bu, - /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4 - /// gigabytes). - S_GB_ZEROFILL = 0x0cu, - /// S_INTERPOSING - Section with only pairs of function pointers for - /// interposing. - S_INTERPOSING = 0x0du, - /// S_16BYTE_LITERALS - Section with only 16 byte literals. - S_16BYTE_LITERALS = 0x0eu, - /// S_DTRACE_DOF - Section contains DTrace Object Format. - S_DTRACE_DOF = 0x0fu, - /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to - /// lazy loaded dylibs. - S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10u, - /// S_THREAD_LOCAL_REGULAR - Thread local data section. - S_THREAD_LOCAL_REGULAR = 0x11u, - /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section. - S_THREAD_LOCAL_ZEROFILL = 0x12u, - /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable - /// structure data. - S_THREAD_LOCAL_VARIABLES = 0x13u, - /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with pointers to thread - /// local structures. - S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14u, - /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local - /// variable initialization pointers to functions. - S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u, - - LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - }; - - enum : uint32_t { - // Constant masks for the "flags[31:24]" field in llvm::MachO::section and - // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR) - - /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine - /// instructions. - S_ATTR_PURE_INSTRUCTIONS = 0x80000000u, - /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be - /// in a ranlib table of contents. - S_ATTR_NO_TOC = 0x40000000u, - /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section - /// in files with the MY_DYLDLINK flag. - S_ATTR_STRIP_STATIC_SYMS = 0x20000000u, - /// S_ATTR_NO_DEAD_STRIP - No dead stripping. - S_ATTR_NO_DEAD_STRIP = 0x10000000u, - /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks. - S_ATTR_LIVE_SUPPORT = 0x08000000u, - /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by - /// dyld. - S_ATTR_SELF_MODIFYING_CODE = 0x04000000u, - /// S_ATTR_DEBUG - A debug section. - S_ATTR_DEBUG = 0x02000000u, - - // Constant masks for the "flags[23:8]" field in llvm::MachO::section and - // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS) - - /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions. - S_ATTR_SOME_INSTRUCTIONS = 0x00000400u, - /// S_ATTR_EXT_RELOC - Section has external relocation entries. - S_ATTR_EXT_RELOC = 0x00000200u, - /// S_ATTR_LOC_RELOC - Section has local relocation entries. - S_ATTR_LOC_RELOC = 0x00000100u, - - // Constant masks for the value of an indirect symbol in an indirect - // symbol table - INDIRECT_SYMBOL_LOCAL = 0x80000000u, - INDIRECT_SYMBOL_ABS = 0x40000000u - }; - - enum DataRegionType { - // Constants for the "kind" field in a data_in_code_entry structure - DICE_KIND_DATA = 1u, - DICE_KIND_JUMP_TABLE8 = 2u, - DICE_KIND_JUMP_TABLE16 = 3u, - DICE_KIND_JUMP_TABLE32 = 4u, - DICE_KIND_ABS_JUMP_TABLE32 = 5u - }; - - enum RebaseType { - REBASE_TYPE_POINTER = 1u, - REBASE_TYPE_TEXT_ABSOLUTE32 = 2u, - REBASE_TYPE_TEXT_PCREL32 = 3u - }; - - enum { - REBASE_OPCODE_MASK = 0xF0u, - REBASE_IMMEDIATE_MASK = 0x0Fu - }; - - enum RebaseOpcode { - REBASE_OPCODE_DONE = 0x00u, - REBASE_OPCODE_SET_TYPE_IMM = 0x10u, - REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x20u, - REBASE_OPCODE_ADD_ADDR_ULEB = 0x30u, - REBASE_OPCODE_ADD_ADDR_IMM_SCALED = 0x40u, - REBASE_OPCODE_DO_REBASE_IMM_TIMES = 0x50u, - REBASE_OPCODE_DO_REBASE_ULEB_TIMES = 0x60u, - REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB = 0x70u, - REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80u - }; - - enum BindType { - BIND_TYPE_POINTER = 1u, - BIND_TYPE_TEXT_ABSOLUTE32 = 2u, - BIND_TYPE_TEXT_PCREL32 = 3u - }; - - enum BindSpecialDylib { - BIND_SPECIAL_DYLIB_SELF = 0, - BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE = -1, - BIND_SPECIAL_DYLIB_FLAT_LOOKUP = -2 - }; - - enum { - BIND_SYMBOL_FLAGS_WEAK_IMPORT = 0x1u, - BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION = 0x8u, - - BIND_OPCODE_MASK = 0xF0u, - BIND_IMMEDIATE_MASK = 0x0Fu - }; - - enum BindOpcode { - BIND_OPCODE_DONE = 0x00u, - BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10u, - BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20u, - BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30u, - BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40u, - BIND_OPCODE_SET_TYPE_IMM = 0x50u, - BIND_OPCODE_SET_ADDEND_SLEB = 0x60u, - BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70u, - BIND_OPCODE_ADD_ADDR_ULEB = 0x80u, - BIND_OPCODE_DO_BIND = 0x90u, - BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0u, - BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0u, - BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0u - }; - - enum { - EXPORT_SYMBOL_FLAGS_KIND_MASK = 0x03u, - EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION = 0x04u, - EXPORT_SYMBOL_FLAGS_REEXPORT = 0x08u, - EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER = 0x10u - }; - - enum ExportSymbolKind { - EXPORT_SYMBOL_FLAGS_KIND_REGULAR = 0x00u, - EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL = 0x01u, - EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE = 0x02u - }; - - enum { - // Constant masks for the "n_type" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 - N_STAB = 0xe0, - N_PEXT = 0x10, - N_TYPE = 0x0e, - N_EXT = 0x01 - }; - - enum NListType : uint8_t { - // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and - // llvm::MachO::nlist_64 - N_UNDF = 0x0u, - N_ABS = 0x2u, - N_SECT = 0xeu, - N_PBUD = 0xcu, - N_INDR = 0xau - }; - - enum SectionOrdinal { - // Constants for the "n_sect" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 - NO_SECT = 0u, - MAX_SECT = 0xffu - }; - - enum { - // Constant masks for the "n_desc" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 - // The low 3 bits are the for the REFERENCE_TYPE. - REFERENCE_TYPE = 0x7, - REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, - REFERENCE_FLAG_UNDEFINED_LAZY = 1, - REFERENCE_FLAG_DEFINED = 2, - REFERENCE_FLAG_PRIVATE_DEFINED = 3, - REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, - REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5, - // Flag bits (some overlap with the library ordinal bits). - N_ARM_THUMB_DEF = 0x0008u, - REFERENCED_DYNAMICALLY = 0x0010u, - N_NO_DEAD_STRIP = 0x0020u, - N_WEAK_REF = 0x0040u, - N_WEAK_DEF = 0x0080u, - N_SYMBOL_RESOLVER = 0x0100u, - N_ALT_ENTRY = 0x0200u, - // For undefined symbols coming from libraries, see GET_LIBRARY_ORDINAL() - // as these are in the top 8 bits. - SELF_LIBRARY_ORDINAL = 0x0, - MAX_LIBRARY_ORDINAL = 0xfd, - DYNAMIC_LOOKUP_ORDINAL = 0xfe, - EXECUTABLE_ORDINAL = 0xff - }; - - enum StabType { - // Constant values for the "n_type" field in llvm::MachO::nlist and - // llvm::MachO::nlist_64 when "(n_type & N_STAB) != 0" - N_GSYM = 0x20u, - N_FNAME = 0x22u, - N_FUN = 0x24u, - N_STSYM = 0x26u, - N_LCSYM = 0x28u, - N_BNSYM = 0x2Eu, - N_PC = 0x30u, - N_AST = 0x32u, - N_OPT = 0x3Cu, - N_RSYM = 0x40u, - N_SLINE = 0x44u, - N_ENSYM = 0x4Eu, - N_SSYM = 0x60u, - N_SO = 0x64u, - N_OSO = 0x66u, - N_LSYM = 0x80u, - N_BINCL = 0x82u, - N_SOL = 0x84u, - N_PARAMS = 0x86u, - N_VERSION = 0x88u, - N_OLEVEL = 0x8Au, - N_PSYM = 0xA0u, - N_EINCL = 0xA2u, - N_ENTRY = 0xA4u, - N_LBRAC = 0xC0u, - N_EXCL = 0xC2u, - N_RBRAC = 0xE0u, - N_BCOMM = 0xE2u, - N_ECOMM = 0xE4u, - N_ECOML = 0xE8u, - N_LENG = 0xFEu - }; - - enum : uint32_t { - // Constant values for the r_symbolnum field in an - // llvm::MachO::relocation_info structure when r_extern is 0. - R_ABS = 0, - - // Constant bits for the r_address field in an - // llvm::MachO::relocation_info structure. - R_SCATTERED = 0x80000000 - }; - - enum RelocationInfoType { - // Constant values for the r_type field in an - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - GENERIC_RELOC_VANILLA = 0, - GENERIC_RELOC_PAIR = 1, - GENERIC_RELOC_SECTDIFF = 2, - GENERIC_RELOC_PB_LA_PTR = 3, - GENERIC_RELOC_LOCAL_SECTDIFF = 4, - GENERIC_RELOC_TLV = 5, - - // Constant values for the r_type field in a PowerPC architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - PPC_RELOC_VANILLA = GENERIC_RELOC_VANILLA, - PPC_RELOC_PAIR = GENERIC_RELOC_PAIR, - PPC_RELOC_BR14 = 2, - PPC_RELOC_BR24 = 3, - PPC_RELOC_HI16 = 4, - PPC_RELOC_LO16 = 5, - PPC_RELOC_HA16 = 6, - PPC_RELOC_LO14 = 7, - PPC_RELOC_SECTDIFF = 8, - PPC_RELOC_PB_LA_PTR = 9, - PPC_RELOC_HI16_SECTDIFF = 10, - PPC_RELOC_LO16_SECTDIFF = 11, - PPC_RELOC_HA16_SECTDIFF = 12, - PPC_RELOC_JBSR = 13, - PPC_RELOC_LO14_SECTDIFF = 14, - PPC_RELOC_LOCAL_SECTDIFF = 15, - - // Constant values for the r_type field in an ARM architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - ARM_RELOC_VANILLA = GENERIC_RELOC_VANILLA, - ARM_RELOC_PAIR = GENERIC_RELOC_PAIR, - ARM_RELOC_SECTDIFF = GENERIC_RELOC_SECTDIFF, - ARM_RELOC_LOCAL_SECTDIFF = 3, - ARM_RELOC_PB_LA_PTR = 4, - ARM_RELOC_BR24 = 5, - ARM_THUMB_RELOC_BR22 = 6, - ARM_THUMB_32BIT_BRANCH = 7, // obsolete - ARM_RELOC_HALF = 8, - ARM_RELOC_HALF_SECTDIFF = 9, - - // Constant values for the r_type field in an ARM64 architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure. - - // For pointers. - ARM64_RELOC_UNSIGNED = 0, - // Must be followed by an ARM64_RELOC_UNSIGNED - ARM64_RELOC_SUBTRACTOR = 1, - // A B/BL instruction with 26-bit displacement. - ARM64_RELOC_BRANCH26 = 2, - // PC-rel distance to page of target. - ARM64_RELOC_PAGE21 = 3, - // Offset within page, scaled by r_length. - ARM64_RELOC_PAGEOFF12 = 4, - // PC-rel distance to page of GOT slot. - ARM64_RELOC_GOT_LOAD_PAGE21 = 5, - // Offset within page of GOT slot, scaled by r_length. - ARM64_RELOC_GOT_LOAD_PAGEOFF12 = 6, - // For pointers to GOT slots. - ARM64_RELOC_POINTER_TO_GOT = 7, - // PC-rel distance to page of TLVP slot. - ARM64_RELOC_TLVP_LOAD_PAGE21 = 8, - // Offset within page of TLVP slot, scaled by r_length. - ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9, - // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. - ARM64_RELOC_ADDEND = 10, - - // Constant values for the r_type field in an x86_64 architecture - // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info - // structure - X86_64_RELOC_UNSIGNED = 0, - X86_64_RELOC_SIGNED = 1, - X86_64_RELOC_BRANCH = 2, - X86_64_RELOC_GOT_LOAD = 3, - X86_64_RELOC_GOT = 4, - X86_64_RELOC_SUBTRACTOR = 5, - X86_64_RELOC_SIGNED_1 = 6, - X86_64_RELOC_SIGNED_2 = 7, - X86_64_RELOC_SIGNED_4 = 8, - X86_64_RELOC_TLV = 9 - }; - - // Values for segment_command.initprot. - // From - enum { - VM_PROT_READ = 0x1, - VM_PROT_WRITE = 0x2, - VM_PROT_EXECUTE = 0x4 - }; - - // Values for platform field in build_version_command. - enum { - PLATFORM_MACOS = 1, - PLATFORM_IOS = 2, - PLATFORM_TVOS = 3, - PLATFORM_WATCHOS = 4, - PLATFORM_BRIDGEOS = 5 - }; - - // Values for tools enum in build_tool_version. - enum { - TOOL_CLANG = 1, - TOOL_SWIFT = 2, - TOOL_LD = 3 - }; - - // Structs from - - struct mach_header { - uint32_t magic; - uint32_t cputype; - uint32_t cpusubtype; - uint32_t filetype; - uint32_t ncmds; - uint32_t sizeofcmds; - uint32_t flags; - }; - - struct mach_header_64 { - uint32_t magic; - uint32_t cputype; - uint32_t cpusubtype; - uint32_t filetype; - uint32_t ncmds; - uint32_t sizeofcmds; - uint32_t flags; - uint32_t reserved; - }; - - struct load_command { - uint32_t cmd; - uint32_t cmdsize; - }; - - struct segment_command { - uint32_t cmd; - uint32_t cmdsize; - char segname[16]; - uint32_t vmaddr; - uint32_t vmsize; - uint32_t fileoff; - uint32_t filesize; - uint32_t maxprot; - uint32_t initprot; - uint32_t nsects; - uint32_t flags; - }; - - struct segment_command_64 { - uint32_t cmd; - uint32_t cmdsize; - char segname[16]; - uint64_t vmaddr; - uint64_t vmsize; - uint64_t fileoff; - uint64_t filesize; - uint32_t maxprot; - uint32_t initprot; - uint32_t nsects; - uint32_t flags; - }; - - struct section { - char sectname[16]; - char segname[16]; - uint32_t addr; - uint32_t size; - uint32_t offset; - uint32_t align; - uint32_t reloff; - uint32_t nreloc; - uint32_t flags; - uint32_t reserved1; - uint32_t reserved2; - }; - - struct section_64 { - char sectname[16]; - char segname[16]; - uint64_t addr; - uint64_t size; - uint32_t offset; - uint32_t align; - uint32_t reloff; - uint32_t nreloc; - uint32_t flags; - uint32_t reserved1; - uint32_t reserved2; - uint32_t reserved3; - }; - - struct fvmlib { - uint32_t name; - uint32_t minor_version; - uint32_t header_addr; - }; - - // The fvmlib_command is obsolete and no longer supported. - struct fvmlib_command { - uint32_t cmd; - uint32_t cmdsize; - struct fvmlib fvmlib; - }; - - struct dylib { - uint32_t name; - uint32_t timestamp; - uint32_t current_version; - uint32_t compatibility_version; - }; - - struct dylib_command { - uint32_t cmd; - uint32_t cmdsize; - struct dylib dylib; - }; - - struct sub_framework_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t umbrella; - }; - - struct sub_client_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t client; - }; - - struct sub_umbrella_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t sub_umbrella; - }; - - struct sub_library_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t sub_library; - }; - - // The prebound_dylib_command is obsolete and no longer supported. - struct prebound_dylib_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t name; - uint32_t nmodules; - uint32_t linked_modules; - }; - - struct dylinker_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t name; - }; - - struct thread_command { - uint32_t cmd; - uint32_t cmdsize; - }; - - struct routines_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t init_address; - uint32_t init_module; - uint32_t reserved1; - uint32_t reserved2; - uint32_t reserved3; - uint32_t reserved4; - uint32_t reserved5; - uint32_t reserved6; - }; - - struct routines_command_64 { - uint32_t cmd; - uint32_t cmdsize; - uint64_t init_address; - uint64_t init_module; - uint64_t reserved1; - uint64_t reserved2; - uint64_t reserved3; - uint64_t reserved4; - uint64_t reserved5; - uint64_t reserved6; - }; - - struct symtab_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t symoff; - uint32_t nsyms; - uint32_t stroff; - uint32_t strsize; - }; - - struct dysymtab_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t ilocalsym; - uint32_t nlocalsym; - uint32_t iextdefsym; - uint32_t nextdefsym; - uint32_t iundefsym; - uint32_t nundefsym; - uint32_t tocoff; - uint32_t ntoc; - uint32_t modtaboff; - uint32_t nmodtab; - uint32_t extrefsymoff; - uint32_t nextrefsyms; - uint32_t indirectsymoff; - uint32_t nindirectsyms; - uint32_t extreloff; - uint32_t nextrel; - uint32_t locreloff; - uint32_t nlocrel; - }; - - struct dylib_table_of_contents { - uint32_t symbol_index; - uint32_t module_index; - }; - - struct dylib_module { - uint32_t module_name; - uint32_t iextdefsym; - uint32_t nextdefsym; - uint32_t irefsym; - uint32_t nrefsym; - uint32_t ilocalsym; - uint32_t nlocalsym; - uint32_t iextrel; - uint32_t nextrel; - uint32_t iinit_iterm; - uint32_t ninit_nterm; - uint32_t objc_module_info_addr; - uint32_t objc_module_info_size; - }; - - struct dylib_module_64 { - uint32_t module_name; - uint32_t iextdefsym; - uint32_t nextdefsym; - uint32_t irefsym; - uint32_t nrefsym; - uint32_t ilocalsym; - uint32_t nlocalsym; - uint32_t iextrel; - uint32_t nextrel; - uint32_t iinit_iterm; - uint32_t ninit_nterm; - uint32_t objc_module_info_size; - uint64_t objc_module_info_addr; - }; - - struct dylib_reference { - uint32_t isym:24, - flags:8; - }; - - // The twolevel_hints_command is obsolete and no longer supported. - struct twolevel_hints_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t offset; - uint32_t nhints; - }; - - // The twolevel_hints_command is obsolete and no longer supported. - struct twolevel_hint { - uint32_t isub_image:8, - itoc:24; - }; - - // The prebind_cksum_command is obsolete and no longer supported. - struct prebind_cksum_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t cksum; - }; - - struct uuid_command { - uint32_t cmd; - uint32_t cmdsize; - uint8_t uuid[16]; - }; - - struct rpath_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t path; - }; - - struct linkedit_data_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t dataoff; - uint32_t datasize; - }; - - struct data_in_code_entry { - uint32_t offset; - uint16_t length; - uint16_t kind; - }; - - struct source_version_command { - uint32_t cmd; - uint32_t cmdsize; - uint64_t version; - }; - - struct encryption_info_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t cryptoff; - uint32_t cryptsize; - uint32_t cryptid; - }; - - struct encryption_info_command_64 { - uint32_t cmd; - uint32_t cmdsize; - uint32_t cryptoff; - uint32_t cryptsize; - uint32_t cryptid; - uint32_t pad; - }; - - struct version_min_command { - uint32_t cmd; // LC_VERSION_MIN_MACOSX or - // LC_VERSION_MIN_IPHONEOS - uint32_t cmdsize; // sizeof(struct version_min_command) - uint32_t version; // X.Y.Z is encoded in nibbles xxxx.yy.zz - uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz - }; - - struct note_command { - uint32_t cmd; // LC_NOTE - uint32_t cmdsize; // sizeof(struct note_command) - char data_owner[16]; // owner name for this LC_NOTE - uint64_t offset; // file offset of this data - uint64_t size; // length of data region - }; - - struct build_tool_version { - uint32_t tool; // enum for the tool - uint32_t version; // version of the tool - }; - - struct build_version_command { - uint32_t cmd; // LC_BUILD_VERSION - uint32_t cmdsize; // sizeof(struct build_version_command) + - // ntools * sizeof(struct build_tool_version) - uint32_t platform; // platform - uint32_t minos; // X.Y.Z is encoded in nibbles xxxx.yy.zz - uint32_t sdk; // X.Y.Z is encoded in nibbles xxxx.yy.zz - uint32_t ntools; // number of tool entries following this - }; - - struct dyld_info_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t rebase_off; - uint32_t rebase_size; - uint32_t bind_off; - uint32_t bind_size; - uint32_t weak_bind_off; - uint32_t weak_bind_size; - uint32_t lazy_bind_off; - uint32_t lazy_bind_size; - uint32_t export_off; - uint32_t export_size; - }; - - struct linker_option_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t count; - }; - - // The symseg_command is obsolete and no longer supported. - struct symseg_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t offset; - uint32_t size; - }; - - // The ident_command is obsolete and no longer supported. - struct ident_command { - uint32_t cmd; - uint32_t cmdsize; - }; - - // The fvmfile_command is obsolete and no longer supported. - struct fvmfile_command { - uint32_t cmd; - uint32_t cmdsize; - uint32_t name; - uint32_t header_addr; - }; - - struct tlv_descriptor_32 { - uint32_t thunk; - uint32_t key; - uint32_t offset; - }; - - struct tlv_descriptor_64 { - uint64_t thunk; - uint64_t key; - uint64_t offset; - }; - - struct tlv_descriptor { - uintptr_t thunk; - uintptr_t key; - uintptr_t offset; - }; - - struct entry_point_command { - uint32_t cmd; - uint32_t cmdsize; - uint64_t entryoff; - uint64_t stacksize; - }; - - // Structs from - struct fat_header { - uint32_t magic; - uint32_t nfat_arch; - }; - - struct fat_arch { - uint32_t cputype; - uint32_t cpusubtype; - uint32_t offset; - uint32_t size; - uint32_t align; - }; - - struct fat_arch_64 { - uint32_t cputype; - uint32_t cpusubtype; - uint64_t offset; - uint64_t size; - uint32_t align; - uint32_t reserved; - }; - - // Structs from - struct relocation_info { - int32_t r_address; - uint32_t r_symbolnum:24, - r_pcrel:1, - r_length:2, - r_extern:1, - r_type:4; - }; - - struct scattered_relocation_info { -#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) - uint32_t r_scattered:1, - r_pcrel:1, - r_length:2, - r_type:4, - r_address:24; -#else - uint32_t r_address:24, - r_type:4, - r_length:2, - r_pcrel:1, - r_scattered:1; -#endif - int32_t r_value; - }; - - // Structs NOT from , but that make LLVM's life easier - struct any_relocation_info { - uint32_t r_word0, r_word1; - }; - - // Structs from - struct nlist_base { - uint32_t n_strx; - uint8_t n_type; - uint8_t n_sect; - uint16_t n_desc; - }; - - struct nlist { - uint32_t n_strx; - uint8_t n_type; - uint8_t n_sect; - int16_t n_desc; - uint32_t n_value; - }; - - struct nlist_64 { - uint32_t n_strx; - uint8_t n_type; - uint8_t n_sect; - uint16_t n_desc; - uint64_t n_value; - }; - - // Byte order swapping functions for MachO structs - - inline void swapStruct(fat_header &mh) { - sys::swapByteOrder(mh.magic); - sys::swapByteOrder(mh.nfat_arch); - } - - inline void swapStruct(fat_arch &mh) { - sys::swapByteOrder(mh.cputype); - sys::swapByteOrder(mh.cpusubtype); - sys::swapByteOrder(mh.offset); - sys::swapByteOrder(mh.size); - sys::swapByteOrder(mh.align); - } - - inline void swapStruct(fat_arch_64 &mh) { - sys::swapByteOrder(mh.cputype); - sys::swapByteOrder(mh.cpusubtype); - sys::swapByteOrder(mh.offset); - sys::swapByteOrder(mh.size); - sys::swapByteOrder(mh.align); - sys::swapByteOrder(mh.reserved); - } - - inline void swapStruct(mach_header &mh) { - sys::swapByteOrder(mh.magic); - sys::swapByteOrder(mh.cputype); - sys::swapByteOrder(mh.cpusubtype); - sys::swapByteOrder(mh.filetype); - sys::swapByteOrder(mh.ncmds); - sys::swapByteOrder(mh.sizeofcmds); - sys::swapByteOrder(mh.flags); - } - - inline void swapStruct(mach_header_64 &H) { - sys::swapByteOrder(H.magic); - sys::swapByteOrder(H.cputype); - sys::swapByteOrder(H.cpusubtype); - sys::swapByteOrder(H.filetype); - sys::swapByteOrder(H.ncmds); - sys::swapByteOrder(H.sizeofcmds); - sys::swapByteOrder(H.flags); - sys::swapByteOrder(H.reserved); - } - - inline void swapStruct(load_command &lc) { - sys::swapByteOrder(lc.cmd); - sys::swapByteOrder(lc.cmdsize); - } - - inline void swapStruct(symtab_command &lc) { - sys::swapByteOrder(lc.cmd); - sys::swapByteOrder(lc.cmdsize); - sys::swapByteOrder(lc.symoff); - sys::swapByteOrder(lc.nsyms); - sys::swapByteOrder(lc.stroff); - sys::swapByteOrder(lc.strsize); - } - - inline void swapStruct(segment_command_64 &seg) { - sys::swapByteOrder(seg.cmd); - sys::swapByteOrder(seg.cmdsize); - sys::swapByteOrder(seg.vmaddr); - sys::swapByteOrder(seg.vmsize); - sys::swapByteOrder(seg.fileoff); - sys::swapByteOrder(seg.filesize); - sys::swapByteOrder(seg.maxprot); - sys::swapByteOrder(seg.initprot); - sys::swapByteOrder(seg.nsects); - sys::swapByteOrder(seg.flags); - } - - inline void swapStruct(segment_command &seg) { - sys::swapByteOrder(seg.cmd); - sys::swapByteOrder(seg.cmdsize); - sys::swapByteOrder(seg.vmaddr); - sys::swapByteOrder(seg.vmsize); - sys::swapByteOrder(seg.fileoff); - sys::swapByteOrder(seg.filesize); - sys::swapByteOrder(seg.maxprot); - sys::swapByteOrder(seg.initprot); - sys::swapByteOrder(seg.nsects); - sys::swapByteOrder(seg.flags); - } - - inline void swapStruct(section_64 §) { - sys::swapByteOrder(sect.addr); - sys::swapByteOrder(sect.size); - sys::swapByteOrder(sect.offset); - sys::swapByteOrder(sect.align); - sys::swapByteOrder(sect.reloff); - sys::swapByteOrder(sect.nreloc); - sys::swapByteOrder(sect.flags); - sys::swapByteOrder(sect.reserved1); - sys::swapByteOrder(sect.reserved2); - } - - inline void swapStruct(section §) { - sys::swapByteOrder(sect.addr); - sys::swapByteOrder(sect.size); - sys::swapByteOrder(sect.offset); - sys::swapByteOrder(sect.align); - sys::swapByteOrder(sect.reloff); - sys::swapByteOrder(sect.nreloc); - sys::swapByteOrder(sect.flags); - sys::swapByteOrder(sect.reserved1); - sys::swapByteOrder(sect.reserved2); - } - - inline void swapStruct(dyld_info_command &info) { - sys::swapByteOrder(info.cmd); - sys::swapByteOrder(info.cmdsize); - sys::swapByteOrder(info.rebase_off); - sys::swapByteOrder(info.rebase_size); - sys::swapByteOrder(info.bind_off); - sys::swapByteOrder(info.bind_size); - sys::swapByteOrder(info.weak_bind_off); - sys::swapByteOrder(info.weak_bind_size); - sys::swapByteOrder(info.lazy_bind_off); - sys::swapByteOrder(info.lazy_bind_size); - sys::swapByteOrder(info.export_off); - sys::swapByteOrder(info.export_size); - } - - inline void swapStruct(dylib_command &d) { - sys::swapByteOrder(d.cmd); - sys::swapByteOrder(d.cmdsize); - sys::swapByteOrder(d.dylib.name); - sys::swapByteOrder(d.dylib.timestamp); - sys::swapByteOrder(d.dylib.current_version); - sys::swapByteOrder(d.dylib.compatibility_version); - } - - inline void swapStruct(sub_framework_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.umbrella); - } - - inline void swapStruct(sub_umbrella_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.sub_umbrella); - } - - inline void swapStruct(sub_library_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.sub_library); - } - - inline void swapStruct(sub_client_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.client); - } - - inline void swapStruct(routines_command &r) { - sys::swapByteOrder(r.cmd); - sys::swapByteOrder(r.cmdsize); - sys::swapByteOrder(r.init_address); - sys::swapByteOrder(r.init_module); - sys::swapByteOrder(r.reserved1); - sys::swapByteOrder(r.reserved2); - sys::swapByteOrder(r.reserved3); - sys::swapByteOrder(r.reserved4); - sys::swapByteOrder(r.reserved5); - sys::swapByteOrder(r.reserved6); - } - - inline void swapStruct(routines_command_64 &r) { - sys::swapByteOrder(r.cmd); - sys::swapByteOrder(r.cmdsize); - sys::swapByteOrder(r.init_address); - sys::swapByteOrder(r.init_module); - sys::swapByteOrder(r.reserved1); - sys::swapByteOrder(r.reserved2); - sys::swapByteOrder(r.reserved3); - sys::swapByteOrder(r.reserved4); - sys::swapByteOrder(r.reserved5); - sys::swapByteOrder(r.reserved6); - } - - inline void swapStruct(thread_command &t) { - sys::swapByteOrder(t.cmd); - sys::swapByteOrder(t.cmdsize); - } - - inline void swapStruct(dylinker_command &d) { - sys::swapByteOrder(d.cmd); - sys::swapByteOrder(d.cmdsize); - sys::swapByteOrder(d.name); - } - - inline void swapStruct(uuid_command &u) { - sys::swapByteOrder(u.cmd); - sys::swapByteOrder(u.cmdsize); - } - - inline void swapStruct(rpath_command &r) { - sys::swapByteOrder(r.cmd); - sys::swapByteOrder(r.cmdsize); - sys::swapByteOrder(r.path); - } - - inline void swapStruct(source_version_command &s) { - sys::swapByteOrder(s.cmd); - sys::swapByteOrder(s.cmdsize); - sys::swapByteOrder(s.version); - } - - inline void swapStruct(entry_point_command &e) { - sys::swapByteOrder(e.cmd); - sys::swapByteOrder(e.cmdsize); - sys::swapByteOrder(e.entryoff); - sys::swapByteOrder(e.stacksize); - } - - inline void swapStruct(encryption_info_command &e) { - sys::swapByteOrder(e.cmd); - sys::swapByteOrder(e.cmdsize); - sys::swapByteOrder(e.cryptoff); - sys::swapByteOrder(e.cryptsize); - sys::swapByteOrder(e.cryptid); - } - - inline void swapStruct(encryption_info_command_64 &e) { - sys::swapByteOrder(e.cmd); - sys::swapByteOrder(e.cmdsize); - sys::swapByteOrder(e.cryptoff); - sys::swapByteOrder(e.cryptsize); - sys::swapByteOrder(e.cryptid); - sys::swapByteOrder(e.pad); - } - - inline void swapStruct(dysymtab_command &dst) { - sys::swapByteOrder(dst.cmd); - sys::swapByteOrder(dst.cmdsize); - sys::swapByteOrder(dst.ilocalsym); - sys::swapByteOrder(dst.nlocalsym); - sys::swapByteOrder(dst.iextdefsym); - sys::swapByteOrder(dst.nextdefsym); - sys::swapByteOrder(dst.iundefsym); - sys::swapByteOrder(dst.nundefsym); - sys::swapByteOrder(dst.tocoff); - sys::swapByteOrder(dst.ntoc); - sys::swapByteOrder(dst.modtaboff); - sys::swapByteOrder(dst.nmodtab); - sys::swapByteOrder(dst.extrefsymoff); - sys::swapByteOrder(dst.nextrefsyms); - sys::swapByteOrder(dst.indirectsymoff); - sys::swapByteOrder(dst.nindirectsyms); - sys::swapByteOrder(dst.extreloff); - sys::swapByteOrder(dst.nextrel); - sys::swapByteOrder(dst.locreloff); - sys::swapByteOrder(dst.nlocrel); - } - - inline void swapStruct(any_relocation_info &reloc) { - sys::swapByteOrder(reloc.r_word0); - sys::swapByteOrder(reloc.r_word1); - } - - inline void swapStruct(nlist_base &S) { - sys::swapByteOrder(S.n_strx); - sys::swapByteOrder(S.n_desc); - } - - inline void swapStruct(nlist &sym) { - sys::swapByteOrder(sym.n_strx); - sys::swapByteOrder(sym.n_desc); - sys::swapByteOrder(sym.n_value); - } - - inline void swapStruct(nlist_64 &sym) { - sys::swapByteOrder(sym.n_strx); - sys::swapByteOrder(sym.n_desc); - sys::swapByteOrder(sym.n_value); - } - - inline void swapStruct(linkedit_data_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.dataoff); - sys::swapByteOrder(C.datasize); - } - - inline void swapStruct(linker_option_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.count); - } - - inline void swapStruct(version_min_command&C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.version); - sys::swapByteOrder(C.sdk); - } - - inline void swapStruct(note_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.size); - } - - inline void swapStruct(build_version_command&C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.platform); - sys::swapByteOrder(C.minos); - sys::swapByteOrder(C.sdk); - sys::swapByteOrder(C.ntools); - } - - inline void swapStruct(build_tool_version&C) { - sys::swapByteOrder(C.tool); - sys::swapByteOrder(C.version); - } - - inline void swapStruct(data_in_code_entry &C) { - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.length); - sys::swapByteOrder(C.kind); - } - - inline void swapStruct(uint32_t &C) { - sys::swapByteOrder(C); - } - - // The prebind_cksum_command is obsolete and no longer supported. - inline void swapStruct(prebind_cksum_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.cksum); - } - - // The twolevel_hints_command is obsolete and no longer supported. - inline void swapStruct(twolevel_hints_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.nhints); - } - - // The prebound_dylib_command is obsolete and no longer supported. - inline void swapStruct(prebound_dylib_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.name); - sys::swapByteOrder(C.nmodules); - sys::swapByteOrder(C.linked_modules); - } - - // The fvmfile_command is obsolete and no longer supported. - inline void swapStruct(fvmfile_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.name); - sys::swapByteOrder(C.header_addr); - } - - // The symseg_command is obsolete and no longer supported. - inline void swapStruct(symseg_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - sys::swapByteOrder(C.offset); - sys::swapByteOrder(C.size); - } - - // The ident_command is obsolete and no longer supported. - inline void swapStruct(ident_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - } - - inline void swapStruct(fvmlib &C) { - sys::swapByteOrder(C.name); - sys::swapByteOrder(C.minor_version); - sys::swapByteOrder(C.header_addr); - } - - // The fvmlib_command is obsolete and no longer supported. - inline void swapStruct(fvmlib_command &C) { - sys::swapByteOrder(C.cmd); - sys::swapByteOrder(C.cmdsize); - swapStruct(C.fvmlib); - } - - // Get/Set functions from - - static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) { - return (((n_desc) >> 8u) & 0xffu); - } - - static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) { - n_desc = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8)); - } - - static inline uint8_t GET_COMM_ALIGN (uint16_t n_desc) { - return (n_desc >> 8u) & 0x0fu; - } - - static inline void SET_COMM_ALIGN (uint16_t &n_desc, uint8_t align) { - n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u)); - } - - // Enums from - enum : uint32_t { - // Capability bits used in the definition of cpu_type. - CPU_ARCH_MASK = 0xff000000, // Mask for architecture bits - CPU_ARCH_ABI64 = 0x01000000 // 64 bit ABI - }; - - // Constants for the cputype field. - enum CPUType { - CPU_TYPE_ANY = -1, - CPU_TYPE_X86 = 7, - CPU_TYPE_I386 = CPU_TYPE_X86, - CPU_TYPE_X86_64 = CPU_TYPE_X86 | CPU_ARCH_ABI64, - /* CPU_TYPE_MIPS = 8, */ - CPU_TYPE_MC98000 = 10, // Old Motorola PowerPC - CPU_TYPE_ARM = 12, - CPU_TYPE_ARM64 = CPU_TYPE_ARM | CPU_ARCH_ABI64, - CPU_TYPE_SPARC = 14, - CPU_TYPE_POWERPC = 18, - CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64 - }; - - enum : uint32_t { - // Capability bits used in the definition of cpusubtype. - CPU_SUBTYPE_MASK = 0xff000000, // Mask for architecture bits - CPU_SUBTYPE_LIB64 = 0x80000000, // 64 bit libraries - - // Special CPU subtype constants. - CPU_SUBTYPE_MULTIPLE = ~0u - }; - - // Constants for the cpusubtype field. - enum CPUSubTypeX86 { - CPU_SUBTYPE_I386_ALL = 3, - CPU_SUBTYPE_386 = 3, - CPU_SUBTYPE_486 = 4, - CPU_SUBTYPE_486SX = 0x84, - CPU_SUBTYPE_586 = 5, - CPU_SUBTYPE_PENT = CPU_SUBTYPE_586, - CPU_SUBTYPE_PENTPRO = 0x16, - CPU_SUBTYPE_PENTII_M3 = 0x36, - CPU_SUBTYPE_PENTII_M5 = 0x56, - CPU_SUBTYPE_CELERON = 0x67, - CPU_SUBTYPE_CELERON_MOBILE = 0x77, - CPU_SUBTYPE_PENTIUM_3 = 0x08, - CPU_SUBTYPE_PENTIUM_3_M = 0x18, - CPU_SUBTYPE_PENTIUM_3_XEON = 0x28, - CPU_SUBTYPE_PENTIUM_M = 0x09, - CPU_SUBTYPE_PENTIUM_4 = 0x0a, - CPU_SUBTYPE_PENTIUM_4_M = 0x1a, - CPU_SUBTYPE_ITANIUM = 0x0b, - CPU_SUBTYPE_ITANIUM_2 = 0x1b, - CPU_SUBTYPE_XEON = 0x0c, - CPU_SUBTYPE_XEON_MP = 0x1c, - - CPU_SUBTYPE_X86_ALL = 3, - CPU_SUBTYPE_X86_64_ALL = 3, - CPU_SUBTYPE_X86_ARCH1 = 4, - CPU_SUBTYPE_X86_64_H = 8 - }; - static inline int CPU_SUBTYPE_INTEL(int Family, int Model) { - return Family | (Model << 4); - } - static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) { - return ((int)ST) & 0x0f; - } - static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) { - return ((int)ST) >> 4; - } - enum { - CPU_SUBTYPE_INTEL_FAMILY_MAX = 15, - CPU_SUBTYPE_INTEL_MODEL_ALL = 0 - }; - - enum CPUSubTypeARM { - CPU_SUBTYPE_ARM_ALL = 0, - CPU_SUBTYPE_ARM_V4T = 5, - CPU_SUBTYPE_ARM_V6 = 6, - CPU_SUBTYPE_ARM_V5 = 7, - CPU_SUBTYPE_ARM_V5TEJ = 7, - CPU_SUBTYPE_ARM_XSCALE = 8, - CPU_SUBTYPE_ARM_V7 = 9, - // unused ARM_V7F = 10, - CPU_SUBTYPE_ARM_V7S = 11, - CPU_SUBTYPE_ARM_V7K = 12, - CPU_SUBTYPE_ARM_V6M = 14, - CPU_SUBTYPE_ARM_V7M = 15, - CPU_SUBTYPE_ARM_V7EM = 16 - }; - - enum CPUSubTypeARM64 { - CPU_SUBTYPE_ARM64_ALL = 0 - }; - - enum CPUSubTypeSPARC { - CPU_SUBTYPE_SPARC_ALL = 0 - }; - - enum CPUSubTypePowerPC { - CPU_SUBTYPE_POWERPC_ALL = 0, - CPU_SUBTYPE_POWERPC_601 = 1, - CPU_SUBTYPE_POWERPC_602 = 2, - CPU_SUBTYPE_POWERPC_603 = 3, - CPU_SUBTYPE_POWERPC_603e = 4, - CPU_SUBTYPE_POWERPC_603ev = 5, - CPU_SUBTYPE_POWERPC_604 = 6, - CPU_SUBTYPE_POWERPC_604e = 7, - CPU_SUBTYPE_POWERPC_620 = 8, - CPU_SUBTYPE_POWERPC_750 = 9, - CPU_SUBTYPE_POWERPC_7400 = 10, - CPU_SUBTYPE_POWERPC_7450 = 11, - CPU_SUBTYPE_POWERPC_970 = 100, - - CPU_SUBTYPE_MC980000_ALL = CPU_SUBTYPE_POWERPC_ALL, - CPU_SUBTYPE_MC98601 = CPU_SUBTYPE_POWERPC_601 - }; - - struct x86_thread_state32_t { - uint32_t eax; - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - uint32_t edi; - uint32_t esi; - uint32_t ebp; - uint32_t esp; - uint32_t ss; - uint32_t eflags; - uint32_t eip; - uint32_t cs; - uint32_t ds; - uint32_t es; - uint32_t fs; - uint32_t gs; - }; - - struct x86_thread_state64_t { - uint64_t rax; - uint64_t rbx; - uint64_t rcx; - uint64_t rdx; - uint64_t rdi; - uint64_t rsi; - uint64_t rbp; - uint64_t rsp; - uint64_t r8; - uint64_t r9; - uint64_t r10; - uint64_t r11; - uint64_t r12; - uint64_t r13; - uint64_t r14; - uint64_t r15; - uint64_t rip; - uint64_t rflags; - uint64_t cs; - uint64_t fs; - uint64_t gs; - }; - - enum x86_fp_control_precis { - x86_FP_PREC_24B = 0, - x86_FP_PREC_53B = 2, - x86_FP_PREC_64B = 3 - }; - - enum x86_fp_control_rc { - x86_FP_RND_NEAR = 0, - x86_FP_RND_DOWN = 1, - x86_FP_RND_UP = 2, - x86_FP_CHOP = 3 - }; - - struct fp_control_t { - unsigned short - invalid :1, - denorm :1, - zdiv :1, - ovrfl :1, - undfl :1, - precis :1, - :2, - pc :2, - rc :2, - :1, - :3; - }; - - struct fp_status_t { - unsigned short - invalid :1, - denorm :1, - zdiv :1, - ovrfl :1, - undfl :1, - precis :1, - stkflt :1, - errsumm :1, - c0 :1, - c1 :1, - c2 :1, - tos :3, - c3 :1, - busy :1; - }; - - struct mmst_reg_t { - char mmst_reg[10]; - char mmst_rsrv[6]; - }; - - struct xmm_reg_t { - char xmm_reg[16]; - }; - - struct x86_float_state64_t { - int32_t fpu_reserved[2]; - fp_control_t fpu_fcw; - fp_status_t fpu_fsw; - uint8_t fpu_ftw; - uint8_t fpu_rsrv1; - uint16_t fpu_fop; - uint32_t fpu_ip; - uint16_t fpu_cs; - uint16_t fpu_rsrv2; - uint32_t fpu_dp; - uint16_t fpu_ds; - uint16_t fpu_rsrv3; - uint32_t fpu_mxcsr; - uint32_t fpu_mxcsrmask; - mmst_reg_t fpu_stmm0; - mmst_reg_t fpu_stmm1; - mmst_reg_t fpu_stmm2; - mmst_reg_t fpu_stmm3; - mmst_reg_t fpu_stmm4; - mmst_reg_t fpu_stmm5; - mmst_reg_t fpu_stmm6; - mmst_reg_t fpu_stmm7; - xmm_reg_t fpu_xmm0; - xmm_reg_t fpu_xmm1; - xmm_reg_t fpu_xmm2; - xmm_reg_t fpu_xmm3; - xmm_reg_t fpu_xmm4; - xmm_reg_t fpu_xmm5; - xmm_reg_t fpu_xmm6; - xmm_reg_t fpu_xmm7; - xmm_reg_t fpu_xmm8; - xmm_reg_t fpu_xmm9; - xmm_reg_t fpu_xmm10; - xmm_reg_t fpu_xmm11; - xmm_reg_t fpu_xmm12; - xmm_reg_t fpu_xmm13; - xmm_reg_t fpu_xmm14; - xmm_reg_t fpu_xmm15; - char fpu_rsrv4[6*16]; - uint32_t fpu_reserved1; - }; - - struct x86_exception_state64_t { - uint16_t trapno; - uint16_t cpu; - uint32_t err; - uint64_t faultvaddr; - }; - - inline void swapStruct(x86_thread_state32_t &x) { - sys::swapByteOrder(x.eax); - sys::swapByteOrder(x.ebx); - sys::swapByteOrder(x.ecx); - sys::swapByteOrder(x.edx); - sys::swapByteOrder(x.edi); - sys::swapByteOrder(x.esi); - sys::swapByteOrder(x.ebp); - sys::swapByteOrder(x.esp); - sys::swapByteOrder(x.ss); - sys::swapByteOrder(x.eflags); - sys::swapByteOrder(x.eip); - sys::swapByteOrder(x.cs); - sys::swapByteOrder(x.ds); - sys::swapByteOrder(x.es); - sys::swapByteOrder(x.fs); - sys::swapByteOrder(x.gs); - } - - inline void swapStruct(x86_thread_state64_t &x) { - sys::swapByteOrder(x.rax); - sys::swapByteOrder(x.rbx); - sys::swapByteOrder(x.rcx); - sys::swapByteOrder(x.rdx); - sys::swapByteOrder(x.rdi); - sys::swapByteOrder(x.rsi); - sys::swapByteOrder(x.rbp); - sys::swapByteOrder(x.rsp); - sys::swapByteOrder(x.r8); - sys::swapByteOrder(x.r9); - sys::swapByteOrder(x.r10); - sys::swapByteOrder(x.r11); - sys::swapByteOrder(x.r12); - sys::swapByteOrder(x.r13); - sys::swapByteOrder(x.r14); - sys::swapByteOrder(x.r15); - sys::swapByteOrder(x.rip); - sys::swapByteOrder(x.rflags); - sys::swapByteOrder(x.cs); - sys::swapByteOrder(x.fs); - sys::swapByteOrder(x.gs); - } - - inline void swapStruct(x86_float_state64_t &x) { - sys::swapByteOrder(x.fpu_reserved[0]); - sys::swapByteOrder(x.fpu_reserved[1]); - // TODO swap: fp_control_t fpu_fcw; - // TODO swap: fp_status_t fpu_fsw; - sys::swapByteOrder(x.fpu_fop); - sys::swapByteOrder(x.fpu_ip); - sys::swapByteOrder(x.fpu_cs); - sys::swapByteOrder(x.fpu_rsrv2); - sys::swapByteOrder(x.fpu_dp); - sys::swapByteOrder(x.fpu_ds); - sys::swapByteOrder(x.fpu_rsrv3); - sys::swapByteOrder(x.fpu_mxcsr); - sys::swapByteOrder(x.fpu_mxcsrmask); - sys::swapByteOrder(x.fpu_reserved1); - } - - inline void swapStruct(x86_exception_state64_t &x) { - sys::swapByteOrder(x.trapno); - sys::swapByteOrder(x.cpu); - sys::swapByteOrder(x.err); - sys::swapByteOrder(x.faultvaddr); - } - - struct x86_state_hdr_t { - uint32_t flavor; - uint32_t count; - }; - - struct x86_thread_state_t { - x86_state_hdr_t tsh; - union { - x86_thread_state64_t ts64; - x86_thread_state32_t ts32; - } uts; - }; - - struct x86_float_state_t { - x86_state_hdr_t fsh; - union { - x86_float_state64_t fs64; - } ufs; - }; - - struct x86_exception_state_t { - x86_state_hdr_t esh; - union { - x86_exception_state64_t es64; - } ues; - }; - - inline void swapStruct(x86_state_hdr_t &x) { - sys::swapByteOrder(x.flavor); - sys::swapByteOrder(x.count); - } - - enum X86ThreadFlavors { - x86_THREAD_STATE32 = 1, - x86_FLOAT_STATE32 = 2, - x86_EXCEPTION_STATE32 = 3, - x86_THREAD_STATE64 = 4, - x86_FLOAT_STATE64 = 5, - x86_EXCEPTION_STATE64 = 6, - x86_THREAD_STATE = 7, - x86_FLOAT_STATE = 8, - x86_EXCEPTION_STATE = 9, - x86_DEBUG_STATE32 = 10, - x86_DEBUG_STATE64 = 11, - x86_DEBUG_STATE = 12 - }; - - inline void swapStruct(x86_thread_state_t &x) { - swapStruct(x.tsh); - if (x.tsh.flavor == x86_THREAD_STATE64) - swapStruct(x.uts.ts64); - } - - inline void swapStruct(x86_float_state_t &x) { - swapStruct(x.fsh); - if (x.fsh.flavor == x86_FLOAT_STATE64) - swapStruct(x.ufs.fs64); - } - - inline void swapStruct(x86_exception_state_t &x) { - swapStruct(x.esh); - if (x.esh.flavor == x86_EXCEPTION_STATE64) - swapStruct(x.ues.es64); - } - - const uint32_t x86_THREAD_STATE32_COUNT = - sizeof(x86_thread_state32_t) / sizeof(uint32_t); - - const uint32_t x86_THREAD_STATE64_COUNT = - sizeof(x86_thread_state64_t) / sizeof(uint32_t); - const uint32_t x86_FLOAT_STATE64_COUNT = - sizeof(x86_float_state64_t) / sizeof(uint32_t); - const uint32_t x86_EXCEPTION_STATE64_COUNT = - sizeof(x86_exception_state64_t) / sizeof(uint32_t); - - const uint32_t x86_THREAD_STATE_COUNT = - sizeof(x86_thread_state_t) / sizeof(uint32_t); - const uint32_t x86_FLOAT_STATE_COUNT = - sizeof(x86_float_state_t) / sizeof(uint32_t); - const uint32_t x86_EXCEPTION_STATE_COUNT = - sizeof(x86_exception_state_t) / sizeof(uint32_t); - - struct arm_thread_state32_t { - uint32_t r[13]; - uint32_t sp; - uint32_t lr; - uint32_t pc; - uint32_t cpsr; - }; - - inline void swapStruct(arm_thread_state32_t &x) { - for (int i = 0; i < 13; i++) - sys::swapByteOrder(x.r[i]); - sys::swapByteOrder(x.sp); - sys::swapByteOrder(x.lr); - sys::swapByteOrder(x.pc); - sys::swapByteOrder(x.cpsr); - } - - struct arm_thread_state64_t { - uint64_t x[29]; - uint64_t fp; - uint64_t lr; - uint64_t sp; - uint64_t pc; - uint32_t cpsr; - uint32_t pad; - }; - - inline void swapStruct(arm_thread_state64_t &x) { - for (int i = 0; i < 29; i++) - sys::swapByteOrder(x.x[i]); - sys::swapByteOrder(x.fp); - sys::swapByteOrder(x.lr); - sys::swapByteOrder(x.sp); - sys::swapByteOrder(x.pc); - sys::swapByteOrder(x.cpsr); - } - - struct arm_state_hdr_t { - uint32_t flavor; - uint32_t count; - }; - - struct arm_thread_state_t { - arm_state_hdr_t tsh; - union { - arm_thread_state32_t ts32; - } uts; - }; - - inline void swapStruct(arm_state_hdr_t &x) { - sys::swapByteOrder(x.flavor); - sys::swapByteOrder(x.count); - } - - enum ARMThreadFlavors { - ARM_THREAD_STATE = 1, - ARM_VFP_STATE = 2, - ARM_EXCEPTION_STATE = 3, - ARM_DEBUG_STATE = 4, - ARN_THREAD_STATE_NONE = 5, - ARM_THREAD_STATE64 = 6, - ARM_EXCEPTION_STATE64 = 7 - }; - - inline void swapStruct(arm_thread_state_t &x) { - swapStruct(x.tsh); - if (x.tsh.flavor == ARM_THREAD_STATE) - swapStruct(x.uts.ts32); - } - - const uint32_t ARM_THREAD_STATE_COUNT = - sizeof(arm_thread_state32_t) / sizeof(uint32_t); - - const uint32_t ARM_THREAD_STATE64_COUNT = - sizeof(arm_thread_state64_t) / sizeof(uint32_t); - - struct ppc_thread_state32_t { - uint32_t srr0; - uint32_t srr1; - uint32_t r0; - uint32_t r1; - uint32_t r2; - uint32_t r3; - uint32_t r4; - uint32_t r5; - uint32_t r6; - uint32_t r7; - uint32_t r8; - uint32_t r9; - uint32_t r10; - uint32_t r11; - uint32_t r12; - uint32_t r13; - uint32_t r14; - uint32_t r15; - uint32_t r16; - uint32_t r17; - uint32_t r18; - uint32_t r19; - uint32_t r20; - uint32_t r21; - uint32_t r22; - uint32_t r23; - uint32_t r24; - uint32_t r25; - uint32_t r26; - uint32_t r27; - uint32_t r28; - uint32_t r29; - uint32_t r30; - uint32_t r31; - uint32_t ct; - uint32_t xer; - uint32_t lr; - uint32_t ctr; - uint32_t mq; - uint32_t vrsave; - }; - - inline void swapStruct(ppc_thread_state32_t &x) { - sys::swapByteOrder(x.srr0); - sys::swapByteOrder(x.srr1); - sys::swapByteOrder(x.r0); - sys::swapByteOrder(x.r1); - sys::swapByteOrder(x.r2); - sys::swapByteOrder(x.r3); - sys::swapByteOrder(x.r4); - sys::swapByteOrder(x.r5); - sys::swapByteOrder(x.r6); - sys::swapByteOrder(x.r7); - sys::swapByteOrder(x.r8); - sys::swapByteOrder(x.r9); - sys::swapByteOrder(x.r10); - sys::swapByteOrder(x.r11); - sys::swapByteOrder(x.r12); - sys::swapByteOrder(x.r13); - sys::swapByteOrder(x.r14); - sys::swapByteOrder(x.r15); - sys::swapByteOrder(x.r16); - sys::swapByteOrder(x.r17); - sys::swapByteOrder(x.r18); - sys::swapByteOrder(x.r19); - sys::swapByteOrder(x.r20); - sys::swapByteOrder(x.r21); - sys::swapByteOrder(x.r22); - sys::swapByteOrder(x.r23); - sys::swapByteOrder(x.r24); - sys::swapByteOrder(x.r25); - sys::swapByteOrder(x.r26); - sys::swapByteOrder(x.r27); - sys::swapByteOrder(x.r28); - sys::swapByteOrder(x.r29); - sys::swapByteOrder(x.r30); - sys::swapByteOrder(x.r31); - sys::swapByteOrder(x.ct); - sys::swapByteOrder(x.xer); - sys::swapByteOrder(x.lr); - sys::swapByteOrder(x.ctr); - sys::swapByteOrder(x.mq); - sys::swapByteOrder(x.vrsave); - } - - struct ppc_state_hdr_t { - uint32_t flavor; - uint32_t count; - }; - - struct ppc_thread_state_t { - ppc_state_hdr_t tsh; - union { - ppc_thread_state32_t ts32; - } uts; - }; - - inline void swapStruct(ppc_state_hdr_t &x) { - sys::swapByteOrder(x.flavor); - sys::swapByteOrder(x.count); - } - - enum PPCThreadFlavors { - PPC_THREAD_STATE = 1, - PPC_FLOAT_STATE = 2, - PPC_EXCEPTION_STATE = 3, - PPC_VECTOR_STATE = 4, - PPC_THREAD_STATE64 = 5, - PPC_EXCEPTION_STATE64 = 6, - PPC_THREAD_STATE_NONE = 7 - }; - - inline void swapStruct(ppc_thread_state_t &x) { - swapStruct(x.tsh); - if (x.tsh.flavor == PPC_THREAD_STATE) - swapStruct(x.uts.ts32); - } - - const uint32_t PPC_THREAD_STATE_COUNT = - sizeof(ppc_thread_state32_t) / sizeof(uint32_t); - - // Define a union of all load command structs - #define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data; - - union macho_load_command { - #include "llvm/Support/MachO.def" - }; - - } // end namespace MachO -} // end namespace llvm - -#endif diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h index 7ce86eee95d24673cf0ad65cb9b908e1528e84ee..b4bf3210cc7384423749ec8e5379871cfabc3491 100644 --- a/include/llvm/Support/ManagedStatic.h +++ b/include/llvm/Support/ManagedStatic.h @@ -14,25 +14,22 @@ #ifndef LLVM_SUPPORT_MANAGEDSTATIC_H #define LLVM_SUPPORT_MANAGEDSTATIC_H -#include "llvm/Support/Compiler.h" #include #include namespace llvm { /// object_creator - Helper method for ManagedStatic. -template -LLVM_LIBRARY_VISIBILITY void* object_creator() { - return new C(); -} +template struct object_creator { + static void *call() { return new C(); } +}; /// object_deleter - Helper method for ManagedStatic. /// -template struct LLVM_LIBRARY_VISIBILITY object_deleter { +template struct object_deleter { static void call(void *Ptr) { delete (T *)Ptr; } }; -template -struct LLVM_LIBRARY_VISIBILITY object_deleter { +template struct object_deleter { static void call(void *Ptr) { delete[](T *)Ptr; } }; @@ -59,14 +56,15 @@ public: /// libraries that link in LLVM components) and for making destruction be /// explicit through the llvm_shutdown() function call. /// -template +template , + class Deleter = object_deleter> class ManagedStatic : public ManagedStaticBase { public: // Accessors. C &operator*() { void *Tmp = Ptr.load(std::memory_order_acquire); if (!Tmp) - RegisterManagedStatic(object_creator, object_deleter::call); + RegisterManagedStatic(Creator::call, Deleter::call); return *static_cast(Ptr.load(std::memory_order_relaxed)); } @@ -76,7 +74,7 @@ public: const C &operator*() const { void *Tmp = Ptr.load(std::memory_order_acquire); if (!Tmp) - RegisterManagedStatic(object_creator, object_deleter::call); + RegisterManagedStatic(Creator::call, Deleter::call); return *static_cast(Ptr.load(std::memory_order_relaxed)); } diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 19380b23d9d24768b8312a8fa56fb525c28ef41e..fd29865c8475e15aa3a8172d9718a7e64b7b3e5e 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -18,9 +18,10 @@ #include "llvm/Support/SwapByteOrder.h" #include #include +#include #include -#include #include +#include #ifdef _MSC_VER #include @@ -198,6 +199,33 @@ template T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { return countTrailingZeros(Val, ZB_Undefined); } +/// \brief Create a bitmask with the N right-most bits set to 1, and all other +/// bits set to 0. Only unsigned types are allowed. +template T maskTrailingOnes(unsigned N) { + static_assert(std::is_unsigned::value, "Invalid type!"); + const unsigned Bits = CHAR_BIT * sizeof(T); + assert(N <= Bits && "Invalid bit index"); + return N == 0 ? 0 : (T(-1) >> (Bits - N)); +} + +/// \brief Create a bitmask with the N left-most bits set to 1, and all other +/// bits set to 0. Only unsigned types are allowed. +template T maskLeadingOnes(unsigned N) { + return ~maskTrailingOnes(CHAR_BIT * sizeof(T) - N); +} + +/// \brief Create a bitmask with the N right-most bits set to 0, and all other +/// bits set to 1. Only unsigned types are allowed. +template T maskTrailingZeros(unsigned N) { + return maskLeadingOnes(CHAR_BIT * sizeof(T) - N); +} + +/// \brief Create a bitmask with the N left-most bits set to 0, and all other +/// bits set to 1. Only unsigned types are allowed. +template T maskLeadingZeros(unsigned N) { + return maskTrailingOnes(CHAR_BIT * sizeof(T) - N); +} + /// \brief Get the index of the last set bit starting from the least /// significant bit. /// @@ -244,23 +272,22 @@ T reverseBits(T Val) { // type overloading so that signed and unsigned integers can be used without // ambiguity. -/// Hi_32 - This function returns the high 32 bits of a 64 bit value. +/// Return the high 32 bits of a 64 bit value. constexpr inline uint32_t Hi_32(uint64_t Value) { return static_cast(Value >> 32); } -/// Lo_32 - This function returns the low 32 bits of a 64 bit value. +/// Return the low 32 bits of a 64 bit value. constexpr inline uint32_t Lo_32(uint64_t Value) { return static_cast(Value); } -/// Make_64 - This functions makes a 64-bit integer from a high / low pair of -/// 32-bit integers. +/// Make a 64-bit integer from a high / low pair of 32-bit integers. constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { return ((uint64_t)High << 32) | (uint64_t)Low; } -/// isInt - Checks if an integer fits into the given bit width. +/// Checks if an integer fits into the given bit width. template constexpr inline bool isInt(int64_t x) { return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1))); } @@ -275,8 +302,7 @@ template <> constexpr inline bool isInt<32>(int64_t x) { return static_cast(x) == x; } -/// isShiftedInt - Checks if a signed integer is an N bit number shifted -/// left by S. +/// Checks if a signed integer is an N bit number shifted left by S. template constexpr inline bool isShiftedInt(int64_t x) { static_assert( @@ -285,7 +311,7 @@ constexpr inline bool isShiftedInt(int64_t x) { return isInt(x) && (x % (UINT64_C(1) << S) == 0); } -/// isUInt - Checks if an unsigned integer fits into the given bit width. +/// Checks if an unsigned integer fits into the given bit width. /// /// This is written as two functions rather than as simply /// @@ -355,71 +381,63 @@ inline int64_t maxIntN(int64_t N) { return (UINT64_C(1) << (N - 1)) - 1; } -/// isUIntN - Checks if an unsigned integer fits into the given (dynamic) -/// bit width. +/// Checks if an unsigned integer fits into the given (dynamic) bit width. inline bool isUIntN(unsigned N, uint64_t x) { return N >= 64 || x <= maxUIntN(N); } -/// isIntN - Checks if an signed integer fits into the given (dynamic) -/// bit width. +/// Checks if an signed integer fits into the given (dynamic) bit width. inline bool isIntN(unsigned N, int64_t x) { return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); } -/// isMask_32 - This function returns true if the argument is a non-empty -/// sequence of ones starting at the least significant bit with the remainder -/// zero (32 bit version). Ex. isMask_32(0x0000FFFFU) == true. +/// Return true if the argument is a non-empty sequence of ones starting at the +/// least significant bit with the remainder zero (32 bit version). +/// Ex. isMask_32(0x0000FFFFU) == true. constexpr inline bool isMask_32(uint32_t Value) { return Value && ((Value + 1) & Value) == 0; } -/// isMask_64 - This function returns true if the argument is a non-empty -/// sequence of ones starting at the least significant bit with the remainder -/// zero (64 bit version). +/// Return true if the argument is a non-empty sequence of ones starting at the +/// least significant bit with the remainder zero (64 bit version). constexpr inline bool isMask_64(uint64_t Value) { return Value && ((Value + 1) & Value) == 0; } -/// isShiftedMask_32 - This function returns true if the argument contains a -/// non-empty sequence of ones with the remainder zero (32 bit version.) -/// Ex. isShiftedMask_32(0x0000FF00U) == true. +/// Return true if the argument contains a non-empty sequence of ones with the +/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. constexpr inline bool isShiftedMask_32(uint32_t Value) { return Value && isMask_32((Value - 1) | Value); } -/// isShiftedMask_64 - This function returns true if the argument contains a -/// non-empty sequence of ones with the remainder zero (64 bit version.) +/// Return true if the argument contains a non-empty sequence of ones with the +/// remainder zero (64 bit version.) constexpr inline bool isShiftedMask_64(uint64_t Value) { return Value && isMask_64((Value - 1) | Value); } -/// isPowerOf2_32 - This function returns true if the argument is a power of -/// two > 0. Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) +/// Return true if the argument is a power of two > 0. +/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) constexpr inline bool isPowerOf2_32(uint32_t Value) { return Value && !(Value & (Value - 1)); } -/// isPowerOf2_64 - This function returns true if the argument is a power of two -/// > 0 (64 bit edition.) +/// Return true if the argument is a power of two > 0 (64 bit edition.) constexpr inline bool isPowerOf2_64(uint64_t Value) { return Value && !(Value & (Value - int64_t(1L))); } -/// ByteSwap_16 - This function returns a byte-swapped representation of the -/// 16-bit argument, Value. +/// Return a byte-swapped representation of the 16-bit argument. inline uint16_t ByteSwap_16(uint16_t Value) { return sys::SwapByteOrder_16(Value); } -/// ByteSwap_32 - This function returns a byte-swapped representation of the -/// 32-bit argument, Value. +/// Return a byte-swapped representation of the 32-bit argument. inline uint32_t ByteSwap_32(uint32_t Value) { return sys::SwapByteOrder_32(Value); } -/// ByteSwap_64 - This function returns a byte-swapped representation of the -/// 64-bit argument, Value. +/// Return a byte-swapped representation of the 64-bit argument. inline uint64_t ByteSwap_64(uint64_t Value) { return sys::SwapByteOrder_64(Value); } @@ -427,7 +445,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) { /// \brief Count the number of ones from the most significant bit to the first /// zero bit. /// -/// Ex. CountLeadingOnes(0xFF0FFF00) == 8. +/// Ex. countLeadingOnes(0xFF0FFF00) == 8. /// Only unsigned integral types are allowed. /// /// \param ZB the behavior on an input of all ones. Only ZB_Width and @@ -498,7 +516,7 @@ inline unsigned countPopulation(T Value) { return detail::PopulationCounter::count(Value); } -/// Log2 - This function returns the log base 2 of the specified value +/// Return the log base 2 of the specified value. inline double Log2(double Value) { #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 return __builtin_log(Value) / __builtin_log(2.0); @@ -507,34 +525,33 @@ inline double Log2(double Value) { #endif } -/// Log2_32 - This function returns the floor log base 2 of the specified value, -/// -1 if the value is zero. (32 bit edition.) +/// Return the floor log base 2 of the specified value, -1 if the value is zero. +/// (32 bit edition.) /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 inline unsigned Log2_32(uint32_t Value) { return 31 - countLeadingZeros(Value); } -/// Log2_64 - This function returns the floor log base 2 of the specified value, -/// -1 if the value is zero. (64 bit edition.) +/// Return the floor log base 2 of the specified value, -1 if the value is zero. +/// (64 bit edition.) inline unsigned Log2_64(uint64_t Value) { return 63 - countLeadingZeros(Value); } -/// Log2_32_Ceil - This function returns the ceil log base 2 of the specified -/// value, 32 if the value is zero. (32 bit edition). +/// Return the ceil log base 2 of the specified value, 32 if the value is zero. +/// (32 bit edition). /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 inline unsigned Log2_32_Ceil(uint32_t Value) { return 32 - countLeadingZeros(Value - 1); } -/// Log2_64_Ceil - This function returns the ceil log base 2 of the specified -/// value, 64 if the value is zero. (64 bit edition.) +/// Return the ceil log base 2 of the specified value, 64 if the value is zero. +/// (64 bit edition.) inline unsigned Log2_64_Ceil(uint64_t Value) { return 64 - countLeadingZeros(Value - 1); } -/// GreatestCommonDivisor64 - Return the greatest common divisor of the two -/// values using Euclid's algorithm. +/// Return the greatest common divisor of the values using Euclid's algorithm. inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { while (B) { uint64_t T = B; @@ -544,8 +561,7 @@ inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { return A; } -/// BitsToDouble - This function takes a 64-bit integer and returns the bit -/// equivalent double. +/// This function takes a 64-bit integer and returns the bit equivalent double. inline double BitsToDouble(uint64_t Bits) { double D; static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); @@ -553,8 +569,7 @@ inline double BitsToDouble(uint64_t Bits) { return D; } -/// BitsToFloat - This function takes a 32-bit integer and returns the bit -/// equivalent float. +/// This function takes a 32-bit integer and returns the bit equivalent float. inline float BitsToFloat(uint32_t Bits) { float F; static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); @@ -562,10 +577,9 @@ inline float BitsToFloat(uint32_t Bits) { return F; } -/// DoubleToBits - This function takes a double and returns the bit -/// equivalent 64-bit integer. Note that copying doubles around -/// changes the bits of NaNs on some hosts, notably x86, so this -/// routine cannot be used if these bits are needed. +/// This function takes a double and returns the bit equivalent 64-bit integer. +/// Note that copying doubles around changes the bits of NaNs on some hosts, +/// notably x86, so this routine cannot be used if these bits are needed. inline uint64_t DoubleToBits(double Double) { uint64_t Bits; static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); @@ -573,10 +587,9 @@ inline uint64_t DoubleToBits(double Double) { return Bits; } -/// FloatToBits - This function takes a float and returns the bit -/// equivalent 32-bit integer. Note that copying floats around -/// changes the bits of NaNs on some hosts, notably x86, so this -/// routine cannot be used if these bits are needed. +/// This function takes a float and returns the bit equivalent 32-bit integer. +/// Note that copying floats around changes the bits of NaNs on some hosts, +/// notably x86, so this routine cannot be used if these bits are needed. inline uint32_t FloatToBits(float Float) { uint32_t Bits; static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); @@ -584,8 +597,8 @@ inline uint32_t FloatToBits(float Float) { return Bits; } -/// MinAlign - A and B are either alignments or offsets. Return the minimum -/// alignment that may be assumed after adding the two together. +/// A and B are either alignments or offsets. Return the minimum alignment that +/// may be assumed after adding the two together. constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { // The largest power of 2 that divides both A and B. // @@ -614,8 +627,8 @@ inline size_t alignmentAdjustment(const void *Ptr, size_t Alignment) { return alignAddr(Ptr, Alignment) - (uintptr_t)Ptr; } -/// NextPowerOf2 - Returns the next power of two (in 64-bits) -/// that is strictly greater than A. Returns zero on overflow. +/// Returns the next power of two (in 64-bits) that is strictly greater than A. +/// Returns zero on overflow. inline uint64_t NextPowerOf2(uint64_t A) { A |= (A >> 1); A |= (A >> 2); diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h index e8bdc3e89fa7cd9d0a7882b1e99e1726614ac527..73f0251a6b6e3176323865ce2f4d851145675ae8 100644 --- a/include/llvm/Support/MemoryBuffer.h +++ b/include/llvm/Support/MemoryBuffer.h @@ -14,14 +14,14 @@ #ifndef LLVM_SUPPORT_MEMORYBUFFER_H #define LLVM_SUPPORT_MEMORYBUFFER_H +#include "llvm-c/Types.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/ErrorOr.h" -#include "llvm-c/Types.h" -#include #include #include +#include namespace llvm { diff --git a/include/llvm/Support/Parallel.h b/include/llvm/Support/Parallel.h new file mode 100644 index 0000000000000000000000000000000000000000..e36e0cc29e149fbbf5ba0492d66f7e0d82af2a2b --- /dev/null +++ b/include/llvm/Support/Parallel.h @@ -0,0 +1,249 @@ +//===- llvm/Support/Parallel.h - Parallel algorithms ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PARALLEL_H +#define LLVM_SUPPORT_PARALLEL_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/MathExtras.h" + +#include +#include +#include +#include + +#if defined(_MSC_VER) && LLVM_ENABLE_THREADS +#pragma warning(push) +#pragma warning(disable : 4530) +#include +#include +#pragma warning(pop) +#endif + +namespace llvm { + +namespace parallel { +struct sequential_execution_policy {}; +struct parallel_execution_policy {}; + +template +struct is_execution_policy + : public std::integral_constant< + bool, llvm::is_one_of::value> {}; + +constexpr sequential_execution_policy seq{}; +constexpr parallel_execution_policy par{}; + +namespace detail { + +#if LLVM_ENABLE_THREADS + +class Latch { + uint32_t Count; + mutable std::mutex Mutex; + mutable std::condition_variable Cond; + +public: + explicit Latch(uint32_t Count = 0) : Count(Count) {} + ~Latch() { sync(); } + + void inc() { + std::unique_lock lock(Mutex); + ++Count; + } + + void dec() { + std::unique_lock lock(Mutex); + if (--Count == 0) + Cond.notify_all(); + } + + void sync() const { + std::unique_lock lock(Mutex); + Cond.wait(lock, [&] { return Count == 0; }); + } +}; + +class TaskGroup { + Latch L; + +public: + void spawn(std::function f); + + void sync() const { L.sync(); } +}; + +#if defined(_MSC_VER) +template +void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp) { + concurrency::parallel_sort(Start, End, Comp); +} +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + concurrency::parallel_for_each(Begin, End, Fn); +} + +template +void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { + concurrency::parallel_for(Begin, End, Fn); +} + +#else +const ptrdiff_t MinParallelSize = 1024; + +/// \brief Inclusive median. +template +RandomAccessIterator medianOf3(RandomAccessIterator Start, + RandomAccessIterator End, + const Comparator &Comp) { + RandomAccessIterator Mid = Start + (std::distance(Start, End) / 2); + return Comp(*Start, *(End - 1)) + ? (Comp(*Mid, *(End - 1)) ? (Comp(*Start, *Mid) ? Mid : Start) + : End - 1) + : (Comp(*Mid, *Start) ? (Comp(*(End - 1), *Mid) ? Mid : End - 1) + : Start); +} + +template +void parallel_quick_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp, TaskGroup &TG, size_t Depth) { + // Do a sequential sort for small inputs. + if (std::distance(Start, End) < detail::MinParallelSize || Depth == 0) { + std::sort(Start, End, Comp); + return; + } + + // Partition. + auto Pivot = medianOf3(Start, End, Comp); + // Move Pivot to End. + std::swap(*(End - 1), *Pivot); + Pivot = std::partition(Start, End - 1, [&Comp, End](decltype(*Start) V) { + return Comp(V, *(End - 1)); + }); + // Move Pivot to middle of partition. + std::swap(*Pivot, *(End - 1)); + + // Recurse. + TG.spawn([=, &Comp, &TG] { + parallel_quick_sort(Start, Pivot, Comp, TG, Depth - 1); + }); + parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1); +} + +template +void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp) { + TaskGroup TG; + parallel_quick_sort(Start, End, Comp, TG, + llvm::Log2_64(std::distance(Start, End)) + 1); +} + +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + // TaskGroup has a relatively high overhead, so we want to reduce + // the number of spawn() calls. We'll create up to 1024 tasks here. + // (Note that 1024 is an arbitrary number. This code probably needs + // improving to take the number of available cores into account.) + ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup TG; + while (TaskSize <= std::distance(Begin, End)) { + TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); + Begin += TaskSize; + } + TG.spawn([=, &Fn] { std::for_each(Begin, End, Fn); }); +} + +template +void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { + ptrdiff_t TaskSize = (End - Begin) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup TG; + IndexTy I = Begin; + for (; I + TaskSize < End; I += TaskSize) { + TG.spawn([=, &Fn] { + for (IndexTy J = I, E = I + TaskSize; J != E; ++J) + Fn(J); + }); + } + TG.spawn([=, &Fn] { + for (IndexTy J = I; J < End; ++J) + Fn(J); + }); +} + +#endif + +#endif + +template +using DefComparator = + std::less::value_type>; + +} // namespace detail + +// sequential algorithm implementations. +template > +void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp = Comparator()) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + std::sort(Start, End, Comp); +} + +template +void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + std::for_each(Begin, End, Fn); +} + +template +void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + for (IndexTy I = Begin; I != End; ++I) + Fn(I); +} + +// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS +// is true. +#if LLVM_ENABLE_THREADS +template > +void sort(parallel_execution_policy policy, RandomAccessIterator Start, + RandomAccessIterator End, const Comparator &Comp = Comparator()) { + detail::parallel_sort(Start, End, Comp); +} + +template +void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End, + FuncTy Fn) { + detail::parallel_for_each(Begin, End, Fn); +} + +template +void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End, + FuncTy Fn) { + detail::parallel_for_each_n(Begin, End, Fn); +} +#endif + +} // namespace parallel +} // namespace llvm + +#endif // LLVM_SUPPORT_PARALLEL_H diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h index 1523aad38d46fbfd356a95780da993ed1ee17f2a..53db2e86d12d3da1489433fd9074b0b667240007 100644 --- a/include/llvm/Support/Recycler.h +++ b/include/llvm/Support/Recycler.h @@ -42,13 +42,16 @@ class Recycler { FreeNode *pop_val() { auto *Val = FreeList; + __asan_unpoison_memory_region(Val, Size); FreeList = FreeList->Next; + __msan_allocated_memory(Val, Size); return Val; } void push(FreeNode *N) { N->Next = FreeList; FreeList = N; + __asan_poison_memory_region(N, Size); } public: diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h index 83db80359ee21a3b58df863b2a65069be936c6a2..f498835bcb582d238866b14964fac5782f9dbdad 100644 --- a/include/llvm/Support/Regex.h +++ b/include/llvm/Support/Regex.h @@ -57,7 +57,7 @@ namespace llvm { /// isValid - returns the error encountered during regex compilation, or /// matching, if any. - bool isValid(std::string &Error); + bool isValid(std::string &Error) const; /// getNumMatches - In a valid regex, return the number of parenthesized /// matches it contains. The number filled in by match will include this diff --git a/include/llvm/Support/ScopedPrinter.h b/include/llvm/Support/ScopedPrinter.h index a2f2e0985431427ee3df1193d88d4d27d337bb4c..1b66519322129fe62de95a818966d549b71dccce 100644 --- a/include/llvm/Support/ScopedPrinter.h +++ b/include/llvm/Support/ScopedPrinter.h @@ -295,6 +295,11 @@ public: printBinaryImpl(Label, StringRef(), V, false); } + void printBinaryBlock(StringRef Label, ArrayRef Value, + uint32_t StartOffset) { + printBinaryImpl(Label, StringRef(), Value, true, StartOffset); + } + void printBinaryBlock(StringRef Label, ArrayRef Value) { printBinaryImpl(Label, StringRef(), Value, true); } @@ -333,7 +338,7 @@ private: } void printBinaryImpl(StringRef Label, StringRef Str, ArrayRef Value, - bool Block); + bool Block, uint32_t StartOffset = 0); raw_ostream &OS; int IndentLevel; diff --git a/include/llvm/Support/Solaris.h b/include/llvm/Support/Solaris.h index b0822853248956d83348d28725590189b181a178..88d83014c468bae84e93ff7c68d1857607d71392 100644 --- a/include/llvm/Support/Solaris.h +++ b/include/llvm/Support/Solaris.h @@ -14,8 +14,8 @@ #ifndef LLVM_SUPPORT_SOLARIS_H #define LLVM_SUPPORT_SOLARIS_H -#include #include +#include /* Solaris doesn't have endian.h. SPARC is the only supported big-endian ISA. */ #define BIG_ENDIAN 4321 diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index cb90d968c44c51e878fa7a49d1d2ffc0a705266e..399f8dcd76fca6002cfa990e8b664e8b73c492a7 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -49,7 +49,7 @@ public: /// Clients that want to handle their own diagnostics in a custom way can /// register a function pointer+context as a diagnostic handler. /// It gets called each time PrintMessage is invoked. - typedef void (*DiagHandlerTy)(const SMDiagnostic &, void *Context); + using DiagHandlerTy = void (*)(const SMDiagnostic &, void *Context); private: struct SrcBuffer { diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h index 2ec0c3b76c11f13557ee5f810baa46a3bf7d512d..bb5fd07f0d00954e0c68b5756c0b81d60e4655fa 100644 --- a/include/llvm/Support/StringPool.h +++ b/include/llvm/Support/StringPool.h @@ -1,4 +1,4 @@ -//===-- StringPool.h - Interned string pool ---------------------*- C++ -*-===// +//===- StringPool.h - Interned string pool ----------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -30,6 +30,7 @@ #define LLVM_SUPPORT_STRINGPOOL_H #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { @@ -43,17 +44,17 @@ namespace llvm { /// PooledString - This is the value of an entry in the pool's interning /// table. struct PooledString { - StringPool *Pool; ///< So the string can remove itself. - unsigned Refcount; ///< Number of referencing PooledStringPtrs. + StringPool *Pool = nullptr; ///< So the string can remove itself. + unsigned Refcount = 0; ///< Number of referencing PooledStringPtrs. public: - PooledString() : Pool(nullptr), Refcount(0) { } + PooledString() = default; }; friend class PooledStringPtr; - typedef StringMap table_t; - typedef StringMapEntry entry_t; + using table_t = StringMap; + using entry_t = StringMapEntry; table_t InternTable; public: @@ -76,11 +77,12 @@ namespace llvm { /// a single pointer, but it does have reference-counting overhead when /// copied. class PooledStringPtr { - typedef StringPool::entry_t entry_t; - entry_t *S; + using entry_t = StringPool::entry_t; + + entry_t *S = nullptr; public: - PooledStringPtr() : S(nullptr) {} + PooledStringPtr() = default; explicit PooledStringPtr(entry_t *E) : S(E) { if (S) ++S->getValue().Refcount; @@ -133,6 +135,6 @@ namespace llvm { inline bool operator!=(const PooledStringPtr &That) const { return S != That.S; } }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_SUPPORT_STRINGPOOL_H diff --git a/include/llvm/Support/StringSaver.h b/include/llvm/Support/StringSaver.h index fcddd4cde5b60e97ced7484a0abde4fa4a1e6d52..e85b2895ce5191ca92ee7ffd717065519e31843c 100644 --- a/include/llvm/Support/StringSaver.h +++ b/include/llvm/Support/StringSaver.h @@ -26,7 +26,7 @@ public: StringRef save(const char *S) { return save(StringRef(S)); } StringRef save(StringRef S); StringRef save(const Twine &S) { return save(StringRef(S.str())); } - StringRef save(std::string &S) { return save(StringRef(S)); } + StringRef save(const std::string &S) { return save(StringRef(S)); } }; } #endif diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h index 68e6b276581029fb18f6306cbb35342abd162faa..f29cc40ffdd5537922ef5da146fc8ea5ef473a4a 100644 --- a/include/llvm/Support/TargetParser.h +++ b/include/llvm/Support/TargetParser.h @@ -75,7 +75,7 @@ enum ArchExtKind : unsigned { AEK_CRC = 0x2, AEK_CRYPTO = 0x4, AEK_FP = 0x8, - AEK_HWDIV = 0x10, + AEK_HWDIVTHUMB = 0x10, AEK_HWDIVARM = 0x20, AEK_MP = 0x40, AEK_SIMD = 0x80, diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index bd68d241448754a305461e5c3931fcde9be6e040..9e9a91b0abdab85648d253de09f66119bf32d7f6 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -20,10 +20,10 @@ #define LLVM_SUPPORT_TARGETREGISTRY_H #include "llvm-c/Disassembler.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -598,7 +598,7 @@ struct TargetRegistry { /// printRegisteredTargetsForVersion - Print the registered targets /// appropriately for inclusion in a tool's version output. - static void printRegisteredTargetsForVersion(); + static void printRegisteredTargetsForVersion(raw_ostream &OS); /// @name Registry Access /// @{ diff --git a/include/llvm/Support/ThreadPool.h b/include/llvm/Support/ThreadPool.h index f0e3ffa0999c262b7a0d8fa38c2c04c71d9301f8..9ada946c6dae3c49e9a330e0cc62c7cf0dace537 100644 --- a/include/llvm/Support/ThreadPool.h +++ b/include/llvm/Support/ThreadPool.h @@ -35,17 +35,8 @@ namespace llvm { /// for some work to become available. class ThreadPool { public: -#ifndef _MSC_VER - using VoidTy = void; using TaskTy = std::function; using PackagedTaskTy = std::packaged_task; -#else - // MSVC 2013 has a bug and can't use std::packaged_task; - // We force it to use bool(bool) instead. - using VoidTy = bool; - using TaskTy = std::function; - using PackagedTaskTy = std::packaged_task; -#endif /// Construct a pool with the number of core available on the system (or /// whatever the value returned by std::thread::hardware_concurrency() is). @@ -60,30 +51,17 @@ public: /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - inline std::shared_future async(Function &&F, Args &&... ArgList) { + inline std::shared_future async(Function &&F, Args &&... ArgList) { auto Task = std::bind(std::forward(F), std::forward(ArgList)...); -#ifndef _MSC_VER return asyncImpl(std::move(Task)); -#else - // This lambda has to be marked mutable because MSVC 2013's std::bind call - // operator isn't const qualified. - return asyncImpl([Task](VoidTy) mutable -> VoidTy { - Task(); - return VoidTy(); - }); -#endif } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - inline std::shared_future async(Function &&F) { -#ifndef _MSC_VER + inline std::shared_future async(Function &&F) { return asyncImpl(std::forward(F)); -#else - return asyncImpl([F] (VoidTy) -> VoidTy { F(); return VoidTy(); }); -#endif } /// Blocking wait for all the threads to complete and the queue to be empty. @@ -93,7 +71,7 @@ public: private: /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. - std::shared_future asyncImpl(TaskTy F); + std::shared_future asyncImpl(TaskTy F); /// Threads in flight std::vector Threads; diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h index 6d02e4aba48a7b94a3d8a2744f6a0420b4a4cbb2..8949d69ce72451b39b61dd1fb0c67c3a69540116 100644 --- a/include/llvm/Support/YAMLTraits.h +++ b/include/llvm/Support/YAMLTraits.h @@ -606,7 +606,7 @@ public: template void bitSetCase(T &Val, const char* Str, const T ConstVal) { if ( bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal) ) { - Val = Val | ConstVal; + Val = static_cast(Val | ConstVal); } } @@ -614,7 +614,7 @@ public: template void bitSetCase(T &Val, const char* Str, const uint32_t ConstVal) { if ( bitSetMatch(Str, outputting() && (Val & ConstVal) == ConstVal) ) { - Val = Val | ConstVal; + Val = static_cast(Val | ConstVal); } } @@ -1606,6 +1606,44 @@ template struct StdMapStringCustomMappingTraitsImpl { } \ } +#define LLVM_YAML_DECLARE_MAPPING_TRAITS(Type) \ + namespace llvm { \ + namespace yaml { \ + template <> struct MappingTraits { \ + static void mapping(IO &IO, Type &Obj); \ + }; \ + } \ + } + +#define LLVM_YAML_DECLARE_ENUM_TRAITS(Type) \ + namespace llvm { \ + namespace yaml { \ + template <> struct ScalarEnumerationTraits { \ + static void enumeration(IO &io, Type &Value); \ + }; \ + } \ + } + +#define LLVM_YAML_DECLARE_BITSET_TRAITS(Type) \ + namespace llvm { \ + namespace yaml { \ + template <> struct ScalarBitSetTraits { \ + static void bitset(IO &IO, Type &Options); \ + }; \ + } \ + } + +#define LLVM_YAML_DECLARE_SCALAR_TRAITS(Type, MustQuote) \ + namespace llvm { \ + namespace yaml { \ + template <> struct ScalarTraits { \ + static void output(const Type &Value, void *ctx, llvm::raw_ostream &Out); \ + static StringRef input(StringRef Scalar, void *ctxt, Type &Value); \ + static bool mustQuote(StringRef) { return MustQuote; } \ + }; \ + } \ + } + /// Utility for declaring that a std::vector of a particular type /// should be considered a YAML document list. #define LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(_type) \ diff --git a/include/llvm/Support/raw_sha1_ostream.h b/include/llvm/Support/raw_sha1_ostream.h index 329ef9fd069bc2012130c2f680e1c0711f3d469a..bd55d98b7c1d6b70eab36f5796f64737a2c1f4e1 100644 --- a/include/llvm/Support/raw_sha1_ostream.h +++ b/include/llvm/Support/raw_sha1_ostream.h @@ -14,9 +14,9 @@ #ifndef LLVM_SUPPORT_RAW_SHA1_OSTREAM_H #define LLVM_SUPPORT_RAW_SHA1_OSTREAM_H -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/SHA1.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/SHA1.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h index ce4bbf8cb2cc5c6603fabb16506443d6af878da7..cc08783588009698eff5f8acabc3d5c96301abd6 100644 --- a/include/llvm/Support/type_traits.h +++ b/include/llvm/Support/type_traits.h @@ -14,11 +14,10 @@ #ifndef LLVM_SUPPORT_TYPE_TRAITS_H #define LLVM_SUPPORT_TYPE_TRAITS_H +#include "llvm/Support/Compiler.h" #include #include -#include "llvm/Support/Compiler.h" - #ifndef __has_feature #define LLVM_DEFINED_HAS_FEATURE #define __has_feature(x) 0 @@ -51,7 +50,7 @@ struct isPodLike { // std::pair's are pod-like if their elements are. template -struct isPodLike > { +struct isPodLike> { static const bool value = isPodLike::value && isPodLike::value; }; @@ -63,7 +62,7 @@ struct isPodLike > { /// Also note that enum classes aren't implicitly convertible to integral types, /// the value may therefore need to be explicitly converted before being used. template class is_integral_or_enum { - typedef typename std::remove_reference::type UnderlyingT; + using UnderlyingT = typename std::remove_reference::type; public: static const bool value = @@ -76,23 +75,23 @@ public: /// \brief If T is a pointer, just return it. If it is not, return T&. template -struct add_lvalue_reference_if_not_pointer { typedef T &type; }; +struct add_lvalue_reference_if_not_pointer { using type = T &; }; template struct add_lvalue_reference_if_not_pointer< T, typename std::enable_if::value>::type> { - typedef T type; + using type = T; }; /// \brief If T is a pointer to X, return a pointer to const X. If it is not, /// return const T. template -struct add_const_past_pointer { typedef const T type; }; +struct add_const_past_pointer { using type = const T; }; template struct add_const_past_pointer< T, typename std::enable_if::value>::type> { - typedef const typename std::remove_pointer::type *type; + using type = const typename std::remove_pointer::type *; }; template @@ -104,7 +103,8 @@ struct const_pointer_or_const_ref< T, typename std::enable_if::value>::type> { using type = typename add_const_past_pointer::type; }; -} + +} // end namespace llvm // If the compiler supports detecting whether a class is final, define // an LLVM_IS_FINAL macro. If it cannot be defined properly, this @@ -119,4 +119,4 @@ struct const_pointer_or_const_ref< #undef __has_feature #endif -#endif +#endif // LLVM_SUPPORT_TYPE_TRAITS_H diff --git a/include/llvm/TableGen/Main.h b/include/llvm/TableGen/Main.h index 866b9868deb557a7a364d738a0fff7192993f8ce..ca8c95cb6da2c7d113a48ba2cf8c4761d9d87636 100644 --- a/include/llvm/TableGen/Main.h +++ b/include/llvm/TableGen/Main.h @@ -16,13 +16,15 @@ namespace llvm { -class RecordKeeper; class raw_ostream; +class RecordKeeper; + /// \brief Perform the action using Records, and write output to OS. /// \returns true on error, false otherwise -typedef bool TableGenMainFn(raw_ostream &OS, RecordKeeper &Records); +using TableGenMainFn = bool (raw_ostream &OS, RecordKeeper &Records); int TableGenMain(char *argv0, TableGenMainFn *MainFn); -} -#endif +} // end namespace llvm + +#endif // LLVM_TABLEGEN_MAIN_H diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index fef5bf304566677fa4c126f33646b128177da8bd..fa9ca285bcde9cdd377e8efc8182633b362f286a 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -38,11 +38,11 @@ namespace llvm { class ListRecTy; +struct MultiClass; class Record; class RecordKeeper; class RecordVal; class StringInit; -struct MultiClass; //===----------------------------------------------------------------------===// // Type Classes @@ -90,7 +90,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) { } /// 'bit' - Represent a single bit -/// class BitRecTy : public RecTy { static BitRecTy Shared; @@ -109,7 +108,6 @@ public: }; /// 'bits' - Represent a fixed number of bits -/// class BitsRecTy : public RecTy { unsigned Size; @@ -130,7 +128,6 @@ public: }; /// 'code' - Represent a code fragment -/// class CodeRecTy : public RecTy { static CodeRecTy Shared; @@ -147,7 +144,6 @@ public: }; /// 'int' - Represent an integer value of no particular size -/// class IntRecTy : public RecTy { static IntRecTy Shared; @@ -166,7 +162,6 @@ public: }; /// 'string' - Represent an string value -/// class StringRecTy : public RecTy { static StringRecTy Shared; @@ -185,14 +180,13 @@ public: /// 'list' - Represent a list of values, all of which must be of /// the specified type. -/// class ListRecTy : public RecTy { + friend ListRecTy *RecTy::getListTy(); + RecTy *Ty; explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), Ty(T) {} - friend ListRecTy *RecTy::getListTy(); - public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == ListRecTyKind; @@ -207,7 +201,6 @@ public: }; /// 'dag' - Represent a dag fragment -/// class DagRecTy : public RecTy { static DagRecTy Shared; @@ -225,14 +218,13 @@ public: /// '[classname]' - Represent an instance of a class, such as: /// (R32 X = EAX). -/// class RecordRecTy : public RecTy { + friend class Record; + Record *Rec; explicit RecordRecTy(Record *R) : RecTy(RecordRecTyKind), Rec(R) {} - friend class Record; - public: static bool classof(const RecTy *RT) { return RT->getRecTyKind() == RecordRecTyKind; @@ -249,7 +241,6 @@ public: /// Find a common type that T1 and T2 convert to. /// Return 0 if no such type exists. -/// RecTy *resolveTypes(RecTy *T1, RecTy *T2); //===----------------------------------------------------------------------===// @@ -341,7 +332,6 @@ public: /// selection operator. Given an initializer, it selects the specified bits /// out, returning them as a new init of bits type. If it is not legal to use /// the bit subscript operator on this initializer, return null. - /// virtual Init *convertInitializerBitRange(ArrayRef Bits) const { return nullptr; } @@ -350,7 +340,6 @@ public: /// selection operator. Given an initializer, it selects the specified list /// elements, returning them as a new init of list type. If it is not legal /// to take a slice of this, return null. - /// virtual Init *convertInitListSlice(ArrayRef Elements) const { return nullptr; } @@ -358,7 +347,6 @@ public: /// This method is used to implement the FieldInit class. /// Implementors of this method should return the type of the named field if /// they are of record type. - /// virtual RecTy *getFieldType(StringInit *FieldName) const { return nullptr; } @@ -366,7 +354,6 @@ public: /// This method complements getFieldType to return the /// initializer for the specified field. If getFieldType returns non-null /// this method should return non-null, otherwise it returns null. - /// virtual Init *getFieldInit(Record &R, const RecordVal *RV, StringInit *FieldName) const { return nullptr; @@ -376,7 +363,6 @@ public: /// variables which may not be defined at the time the expression is formed. /// If a value is set for the variable later, this method will be called on /// users of the value to allow the value to propagate out. - /// virtual Init *resolveReferences(Record &R, const RecordVal *RV) const { return const_cast(this); } @@ -400,7 +386,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) { /// This is the common super-class of types that have a specific, /// explicit, type. -/// class TypedInit : public Init { RecTy *Ty; @@ -409,8 +394,8 @@ protected: : Init(K, Opc), Ty(T) {} public: - TypedInit(const TypedInit &Other) = delete; - TypedInit &operator=(const TypedInit &Other) = delete; + TypedInit(const TypedInit &) = delete; + TypedInit &operator=(const TypedInit &) = delete; static bool classof(const Init *I) { return I->getKind() >= IK_FirstTypedInit && @@ -438,13 +423,12 @@ public: }; /// '?' - Represents an uninitialized value -/// class UnsetInit : public Init { UnsetInit() : Init(IK_UnsetInit) {} public: UnsetInit(const UnsetInit &) = delete; - UnsetInit &operator=(const UnsetInit &Other) = delete; + UnsetInit &operator=(const UnsetInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_UnsetInit; @@ -463,15 +447,14 @@ public: }; /// 'true'/'false' - Represent a concrete initializer for a bit. -/// class BitInit : public Init { bool Value; explicit BitInit(bool V) : Init(IK_BitInit), Value(V) {} public: - BitInit(const BitInit &Other) = delete; - BitInit &operator=(BitInit &Other) = delete; + BitInit(const BitInit &) = delete; + BitInit &operator=(BitInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_BitInit; @@ -493,7 +476,6 @@ public: /// '{ a, b, c }' - Represents an initializer for a BitsRecTy value. /// It contains a vector of bits, whose size is determined by the type. -/// class BitsInit final : public TypedInit, public FoldingSetNode, public TrailingObjects { unsigned NumBits; @@ -502,8 +484,8 @@ class BitsInit final : public TypedInit, public FoldingSetNode, : TypedInit(IK_BitsInit, BitsRecTy::get(N)), NumBits(N) {} public: - BitsInit(const BitsInit &Other) = delete; - BitsInit &operator=(const BitsInit &Other) = delete; + BitsInit(const BitsInit &) = delete; + BitsInit &operator=(const BitsInit &) = delete; // Do not use sized deallocation due to trailing objects. void operator delete(void *p) { ::operator delete(p); } @@ -552,7 +534,6 @@ public: }; /// '7' - Represent an initialization by a literal integer value. -/// class IntInit : public TypedInit { int64_t Value; @@ -560,8 +541,8 @@ class IntInit : public TypedInit { : TypedInit(IK_IntInit, IntRecTy::get()), Value(V) {} public: - IntInit(const IntInit &Other) = delete; - IntInit &operator=(const IntInit &Other) = delete; + IntInit(const IntInit &) = delete; + IntInit &operator=(const IntInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_IntInit; @@ -590,7 +571,6 @@ public: }; /// "foo" - Represent an initialization by a string value. -/// class StringInit : public TypedInit { StringRef Value; @@ -598,8 +578,8 @@ class StringInit : public TypedInit { : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {} public: - StringInit(const StringInit &Other) = delete; - StringInit &operator=(const StringInit &Other) = delete; + StringInit(const StringInit &) = delete; + StringInit &operator=(const StringInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_StringInit; @@ -636,8 +616,8 @@ class CodeInit : public TypedInit { Value(V) {} public: - CodeInit(const StringInit &Other) = delete; - CodeInit &operator=(const StringInit &Other) = delete; + CodeInit(const StringInit &) = delete; + CodeInit &operator=(const StringInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_CodeInit; @@ -671,19 +651,19 @@ public: /// [AL, AH, CL] - Represent a list of defs /// class ListInit final : public TypedInit, public FoldingSetNode, - public TrailingObjects { + public TrailingObjects { unsigned NumValues; public: - typedef Init *const *const_iterator; + using const_iterator = Init *const *; private: explicit ListInit(unsigned N, RecTy *EltTy) : TypedInit(IK_ListInit, ListRecTy::get(EltTy)), NumValues(N) {} public: - ListInit(const ListInit &Other) = delete; - ListInit &operator=(const ListInit &Other) = delete; + ListInit(const ListInit &) = delete; + ListInit &operator=(const ListInit &) = delete; // Do not use sized deallocation due to trailing objects. void operator delete(void *p) { ::operator delete(p); } @@ -744,8 +724,8 @@ protected: : TypedInit(K, Type, Opc) {} public: - OpInit(const OpInit &Other) = delete; - OpInit &operator=(OpInit &Other) = delete; + OpInit(const OpInit &) = delete; + OpInit &operator=(OpInit &) = delete; static bool classof(const Init *I) { return I->getKind() >= IK_FirstOpInit && @@ -781,8 +761,8 @@ private: : OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {} public: - UnOpInit(const UnOpInit &Other) = delete; - UnOpInit &operator=(const UnOpInit &Other) = delete; + UnOpInit(const UnOpInit &) = delete; + UnOpInit &operator=(const UnOpInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_UnOpInit; @@ -819,7 +799,6 @@ public: }; /// !op (X, Y) - Combine two inits. -/// class BinOpInit : public OpInit, public FoldingSetNode { public: enum BinaryOp : uint8_t { ADD, AND, OR, SHL, SRA, SRL, LISTCONCAT, @@ -832,8 +811,8 @@ private: OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {} public: - BinOpInit(const BinOpInit &Other) = delete; - BinOpInit &operator=(const BinOpInit &Other) = delete; + BinOpInit(const BinOpInit &) = delete; + BinOpInit &operator=(const BinOpInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_BinOpInit; @@ -874,7 +853,6 @@ public: }; /// !op (X, Y, Z) - Combine two inits. -/// class TernOpInit : public OpInit, public FoldingSetNode { public: enum TernaryOp : uint8_t { SUBST, FOREACH, IF }; @@ -887,8 +865,8 @@ private: OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {} public: - TernOpInit(const TernOpInit &Other) = delete; - TernOpInit &operator=(const TernOpInit &Other) = delete; + TernOpInit(const TernOpInit &) = delete; + TernOpInit &operator=(const TernOpInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_TernOpInit; @@ -935,7 +913,6 @@ public: }; /// 'Opcode' - Represent a reference to an entire variable object. -/// class VarInit : public TypedInit { Init *VarName; @@ -943,8 +920,8 @@ class VarInit : public TypedInit { : TypedInit(IK_VarInit, T), VarName(VN) {} public: - VarInit(const VarInit &Other) = delete; - VarInit &operator=(const VarInit &Other) = delete; + VarInit(const VarInit &) = delete; + VarInit &operator=(const VarInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_VarInit; @@ -980,7 +957,6 @@ public: }; /// Opcode{0} - Represent access to one bit of a variable or field. -/// class VarBitInit : public Init { TypedInit *TI; unsigned Bit; @@ -994,8 +970,8 @@ class VarBitInit : public Init { } public: - VarBitInit(const VarBitInit &Other) = delete; - VarBitInit &operator=(const VarBitInit &Other) = delete; + VarBitInit(const VarBitInit &) = delete; + VarBitInit &operator=(const VarBitInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_VarBitInit; @@ -1032,8 +1008,8 @@ class VarListElementInit : public TypedInit { } public: - VarListElementInit(const VarListElementInit &Other) = delete; - void operator=(const VarListElementInit &Other) = delete; + VarListElementInit(const VarListElementInit &) = delete; + VarListElementInit &operator=(const VarListElementInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_VarListElementInit; @@ -1057,17 +1033,16 @@ public: }; /// AL - Represent a reference to a 'def' in the description -/// class DefInit : public TypedInit { + friend class Record; + Record *Def; DefInit(Record *D, RecordRecTy *T) : TypedInit(IK_DefInit, T), Def(D) {} - friend class Record; - public: - DefInit(const DefInit &Other) = delete; - DefInit &operator=(const DefInit &Other) = delete; + DefInit(const DefInit &) = delete; + DefInit &operator=(const DefInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_DefInit; @@ -1101,7 +1076,6 @@ public: }; /// X.Y - Represent a reference to a subfield of a variable -/// class FieldInit : public TypedInit { Init *Rec; // Record we are referring to StringInit *FieldName; // Field we are accessing @@ -1112,8 +1086,8 @@ class FieldInit : public TypedInit { } public: - FieldInit(const FieldInit &Other) = delete; - FieldInit &operator=(const FieldInit &Other) = delete; + FieldInit(const FieldInit &) = delete; + FieldInit &operator=(const FieldInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_FieldInit; @@ -1136,22 +1110,24 @@ public: /// (v a, b) - Represent a DAG tree value. DAG inits are required /// to have at least one value then a (possibly empty) list of arguments. Each /// argument can have a name associated with it. -/// -class DagInit : public TypedInit, public FoldingSetNode { +class DagInit final : public TypedInit, public FoldingSetNode, + public TrailingObjects { + friend TrailingObjects; + Init *Val; StringInit *ValName; - SmallVector Args; - SmallVector ArgNames; + unsigned NumArgs; + unsigned NumArgNames; - DagInit(Init *V, StringInit *VN, ArrayRef ArgRange, - ArrayRef NameRange) + DagInit(Init *V, StringInit *VN, unsigned NumArgs, unsigned NumArgNames) : TypedInit(IK_DagInit, DagRecTy::get()), Val(V), ValName(VN), - Args(ArgRange.begin(), ArgRange.end()), - ArgNames(NameRange.begin(), NameRange.end()) {} + NumArgs(NumArgs), NumArgNames(NumArgNames) {} + + size_t numTrailingObjects(OverloadToken) const { return NumArgs; } public: - DagInit(const DagInit &Other) = delete; - DagInit &operator=(const DagInit &Other) = delete; + DagInit(const DagInit &) = delete; + DagInit &operator=(const DagInit &) = delete; static bool classof(const Init *I) { return I->getKind() == IK_DagInit; @@ -1169,45 +1145,54 @@ public: Init *getOperator() const { return Val; } StringInit *getName() const { return ValName; } + StringRef getNameStr() const { return ValName ? ValName->getValue() : StringRef(); } - unsigned getNumArgs() const { return Args.size(); } + unsigned getNumArgs() const { return NumArgs; } + Init *getArg(unsigned Num) const { - assert(Num < Args.size() && "Arg number out of range!"); - return Args[Num]; + assert(Num < NumArgs && "Arg number out of range!"); + return getTrailingObjects()[Num]; } + StringInit *getArgName(unsigned Num) const { - assert(Num < ArgNames.size() && "Arg number out of range!"); - return ArgNames[Num]; + assert(Num < NumArgNames && "Arg number out of range!"); + return getTrailingObjects()[Num]; } + StringRef getArgNameStr(unsigned Num) const { StringInit *Init = getArgName(Num); return Init ? Init->getValue() : StringRef(); } + ArrayRef getArgs() const { + return makeArrayRef(getTrailingObjects(), NumArgs); + } + + ArrayRef getArgNames() const { + return makeArrayRef(getTrailingObjects(), NumArgNames); + } + Init *resolveReferences(Record &R, const RecordVal *RV) const override; std::string getAsString() const override; - typedef SmallVectorImpl::const_iterator const_arg_iterator; - typedef SmallVectorImpl::const_iterator const_name_iterator; + using const_arg_iterator = SmallVectorImpl::const_iterator; + using const_name_iterator = SmallVectorImpl::const_iterator; - inline const_arg_iterator arg_begin() const { return Args.begin(); } - inline const_arg_iterator arg_end () const { return Args.end(); } - inline iterator_range args() const { - return llvm::make_range(arg_begin(), arg_end()); - } + inline const_arg_iterator arg_begin() const { return getArgs().begin(); } + inline const_arg_iterator arg_end () const { return getArgs().end(); } - inline size_t arg_size () const { return Args.size(); } - inline bool arg_empty() const { return Args.empty(); } + inline size_t arg_size () const { return NumArgs; } + inline bool arg_empty() const { return NumArgs == 0; } - inline const_name_iterator name_begin() const { return ArgNames.begin(); } - inline const_name_iterator name_end () const { return ArgNames.end(); } + inline const_name_iterator name_begin() const { return getArgNames().begin();} + inline const_name_iterator name_end () const { return getArgNames().end(); } - inline size_t name_size () const { return ArgNames.size(); } - inline bool name_empty() const { return ArgNames.empty(); } + inline size_t name_size () const { return NumArgNames; } + inline bool name_empty() const { return NumArgNames == 0; } Init *getBit(unsigned Bit) const override { llvm_unreachable("Illegal bit reference off dag"); @@ -1225,13 +1210,13 @@ public: class RecordVal { friend class Record; + Init *Name; PointerIntPair TyAndPrefix; Init *Value; public: RecordVal(Init *N, RecTy *T, bool P); - RecordVal(StringRef N, RecTy *T, bool P); StringRef getName() const; Init *getNameInit() const { return Name; } @@ -1293,7 +1278,7 @@ class Record { // definitions that use them (e.g. Def). However, inside a multiclass they // can't be immediately resolved so we mark them ResolveFirst to fully // resolve them later as soon as the multiclass is instantiated. - bool ResolveFirst; + bool ResolveFirst = false; void init(); void checkName(); @@ -1303,7 +1288,7 @@ public: explicit Record(Init *N, ArrayRef locs, RecordKeeper &records, bool Anonymous = false) : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records), - ID(LastID++), IsAnonymous(Anonymous), ResolveFirst(false) { + ID(LastID++), IsAnonymous(Anonymous) { init(); } @@ -1325,6 +1310,7 @@ public: unsigned getID() const { return ID; } StringRef getName() const; + Init *getNameInit() const { return Name; } @@ -1334,7 +1320,6 @@ public: } void setName(Init *Name); // Also updates RecordKeeper. - void setName(StringRef Name); // Also updates RecordKeeper. ArrayRef getLoc() const { return Locs; } @@ -1357,10 +1342,6 @@ public: return false; } - bool isTemplateArg(StringRef Name) const { - return isTemplateArg(StringInit::get(Name)); - } - const RecordVal *getValue(const Init *Name) const { for (const RecordVal &Val : Values) if (Val.Name == Name) return &Val; @@ -1372,13 +1353,11 @@ public: } RecordVal *getValue(const Init *Name) { - for (RecordVal &Val : Values) - if (Val.Name == Name) return &Val; - return nullptr; + return const_cast(static_cast(this)->getValue(Name)); } RecordVal *getValue(StringRef Name) { - return getValue(StringInit::get(Name)); + return const_cast(static_cast(this)->getValue(Name)); } void addTemplateArg(Init *Name) { @@ -1386,10 +1365,6 @@ public: TemplateArgs.push_back(Name); } - void addTemplateArg(StringRef Name) { - addTemplateArg(StringInit::get(Name)); - } - void addValue(const RecordVal &RV) { assert(getValue(RV.getNameInit()) == nullptr && "Value already added!"); Values.push_back(RV); @@ -1441,7 +1416,6 @@ public: /// If there are any field references that refer to fields /// that have been filled in, we can propagate the values now. - /// void resolveReferences() { resolveReferencesTo(nullptr); } /// If anything in this record refers to RV, replace the @@ -1474,7 +1448,6 @@ public: /// Return the initializer for a value with the specified name, /// or throw an exception if the field does not exist. - /// Init *getValueInit(StringRef FieldName) const; /// Return true if the named field is unset. @@ -1485,67 +1458,56 @@ public: /// This method looks up the specified field and returns /// its value as a string, throwing an exception if the field does not exist /// or if the value is not a string. - /// - std::string getValueAsString(StringRef FieldName) const; + StringRef getValueAsString(StringRef FieldName) const; /// This method looks up the specified field and returns /// its value as a BitsInit, throwing an exception if the field does not exist /// or if the value is not the right type. - /// BitsInit *getValueAsBitsInit(StringRef FieldName) const; /// This method looks up the specified field and returns /// its value as a ListInit, throwing an exception if the field does not exist /// or if the value is not the right type. - /// ListInit *getValueAsListInit(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a vector of records, throwing an exception if the /// field does not exist or if the value is not the right type. - /// std::vector getValueAsListOfDefs(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a vector of integers, throwing an exception if the /// field does not exist or if the value is not the right type. - /// std::vector getValueAsListOfInts(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a vector of strings, throwing an exception if the /// field does not exist or if the value is not the right type. - /// - std::vector getValueAsListOfStrings(StringRef FieldName) const; + std::vector getValueAsListOfStrings(StringRef FieldName) const; /// This method looks up the specified field and returns its /// value as a Record, throwing an exception if the field does not exist or if /// the value is not the right type. - /// Record *getValueAsDef(StringRef FieldName) const; /// This method looks up the specified field and returns its /// value as a bit, throwing an exception if the field does not exist or if /// the value is not the right type. - /// bool getValueAsBit(StringRef FieldName) const; /// This method looks up the specified field and /// returns its value as a bit. If the field is unset, sets Unset to true and /// returns false. - /// bool getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const; /// This method looks up the specified field and returns its /// value as an int64_t, throwing an exception if the field does not exist or /// if the value is not the right type. - /// int64_t getValueAsInt(StringRef FieldName) const; /// This method looks up the specified field and returns its /// value as an Dag, throwing an exception if the field does not exist or if /// the value is not the right type. - /// DagInit *getValueAsDag(StringRef FieldName) const; }; @@ -1553,7 +1515,7 @@ raw_ostream &operator<<(raw_ostream &OS, const Record &R); struct MultiClass { Record Rec; // Placeholder for template args and Name. - typedef std::vector> RecordVector; + using RecordVector = std::vector>; RecordVector DefPrototypes; void dump() const; @@ -1563,7 +1525,7 @@ struct MultiClass { }; class RecordKeeper { - typedef std::map> RecordMap; + using RecordMap = std::map>; RecordMap Classes, Defs; public: @@ -1606,7 +1568,6 @@ public: }; /// Sorting predicate to sort record pointers by name. -/// struct LessRecord { bool operator()(const Record *Rec1, const Record *Rec2) const { return StringRef(Rec1->getName()).compare_numeric(Rec2->getName()) < 0; @@ -1625,7 +1586,6 @@ struct LessRecordByID { /// Sorting predicate to sort record pointers by their /// name field. -/// struct LessRecordFieldName { bool operator()(const Record *Rec1, const Record *Rec2) const { return Rec1->getValueAsString("Name") < Rec2->getValueAsString("Name"); diff --git a/include/llvm/TableGen/SetTheory.h b/include/llvm/TableGen/SetTheory.h index 818b0549b66a8eccd38868653e19cb84fb37dd1b..4b32f9e3da8fb8e83545d7c06414d6263364c243 100644 --- a/include/llvm/TableGen/SetTheory.h +++ b/include/llvm/TableGen/SetTheory.h @@ -64,8 +64,8 @@ class Record; class SetTheory { public: - typedef std::vector RecVec; - typedef SmallSetVector RecSet; + using RecVec = std::vector; + using RecSet = SmallSetVector; /// Operator - A callback representing a DAG operator. class Operator { @@ -95,7 +95,7 @@ public: private: // Map set defs to their fully expanded contents. This serves as a memoization // cache and it makes it possible to return const references on queries. - typedef std::map ExpandMap; + using ExpandMap = std::map; ExpandMap Expansions; // Known DAG operators by name. diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h index 11a8ad8183aad823b0d1185fd8392b1a07ce2259..7c919ffec7b611147cad1bd90bffde46fa31f643 100644 --- a/include/llvm/TableGen/StringMatcher.h +++ b/include/llvm/TableGen/StringMatcher.h @@ -20,7 +20,8 @@ #include namespace llvm { - class raw_ostream; + +class raw_ostream; /// StringMatcher - Given a list of strings and code to execute when they match, /// output a simple switch tree to classify the input string. @@ -30,7 +31,7 @@ namespace llvm { /// class StringMatcher { public: - typedef std::pair StringPair; + using StringPair = std::pair; private: StringRef StrVariableName; @@ -49,6 +50,6 @@ private: unsigned CharNo, unsigned IndentCount) const; }; -} // end llvm namespace. +} // end namespace llvm -#endif +#endif // LLVM_TABLEGEN_STRINGMATCHER_H diff --git a/include/llvm/TableGen/StringToOffsetTable.h b/include/llvm/TableGen/StringToOffsetTable.h index aaf2a356ffab6f6cd4a83e3c2a63b56ea374cf2c..4b11e889ea6c7e980ad91898f76665885e25e29c 100644 --- a/include/llvm/TableGen/StringToOffsetTable.h +++ b/include/llvm/TableGen/StringToOffsetTable.h @@ -27,6 +27,8 @@ class StringToOffsetTable { std::string AggregateString; public: + bool Empty() const { return StringOffset.empty(); } + unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) { auto IterBool = StringOffset.insert(std::make_pair(Str, AggregateString.size())); diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 9f034220815f21c887b8035335b80fdc15fabc09..a06c67fe814c813111b578862370cea4d4c5fc24 100644 --- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -25,25 +25,43 @@ class GINodeEquiv { SDNode Node = node; } -def : GINodeEquiv; +// These are defined in the same order as the G_* instructions. +def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +// G_INTTOPTR - SelectionDAG has no equivalent. +// G_PTRTOINT - SelectionDAG has no equivalent. +// G_CONSTANT - Not needed since constants aren't operators. +// G_FCONSTANT - Not needed since constants aren't operators. def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; - +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; - def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; - -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; -def : GINodeEquiv; - +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/include/llvm/Target/GlobalISel/Target.td b/include/llvm/Target/GlobalISel/Target.td index fa1a424b589542e8a090a0c6eca0bae51fca7004..fd2ebca86d607356c10eebd2ed3384baaacf5e92 100644 --- a/include/llvm/Target/GlobalISel/Target.td +++ b/include/llvm/Target/GlobalISel/Target.td @@ -30,21 +30,13 @@ def s64 : LLT; // Definitions that inherit from this may also inherit from // GIComplexPatternEquiv to enable the import of SelectionDAG patterns involving // those ComplexPatterns. -class GIComplexOperandMatcher { +class GIComplexOperandMatcher { // The expected type of the root of the match. // // TODO: We should probably support, any-type, any-scalar, and multiple types // in the future. LLT Type = type; - // The operands that result from a successful match - // Should be of the form '(ops ty1, ty2, ...)' where ty1/ty2 are definitions - // that inherit from Operand. - // - // FIXME: Which definition is used for ty1/ty2 doesn't actually matter at the - // moment. Only the number of operands is used. - dag Operands = operands; - // The function that determines whether the operand matches. It should be of // the form: // bool select(const MatchOperand &Root, MatchOperand &Result1) diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index b21689e0e1346e9bcac5b02fc626f5b73bd02e1a..6f44292c47ed0eda7fbaf992064a7e74776a6e2b 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -530,6 +530,12 @@ class Predicate { /// PredicateName - User-level name to use for the predicate. Mainly for use /// in diagnostics such as missing feature errors in the asm matcher. string PredicateName = ""; + + /// Setting this to '1' indicates that the predicate must be recomputed on + /// every function change. Most predicates can leave this at '0'. + /// + /// Ignored by SelectionDAG, it always recomputes the predicate on every use. + bit RecomputePerFunction = 0; } /// NoHonorSignDependentRounding - This predicate is true if support for @@ -674,6 +680,11 @@ class RegisterOperand // this type. The method normally will just use an alt-name index to look // up the name to print. Default to the generic printOperand(). string PrintMethod = pm; + + // EncoderMethod - The target method name to call to encode this register + // operand. + string EncoderMethod = ""; + // ParserMatchClass - The "match class" that operands of this type fit // in. Match classes are used to define the order in which instructions are // match, to ensure that which instructions gets matched is deterministic. @@ -996,6 +1007,16 @@ def PATCHABLE_TAIL_CALL : Instruction { let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_EVENT_CALL : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins ptr_rc:$event, i8imm:$size); + let AsmString = "# XRay Custom Event Log."; + let usesCustomInserter = 1; + let isCall = 1; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; +} def FENTRY_CALL : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 0beb6cddf5bc3ff04b903226997424243ef5ed81..7595d43398109b6423a35fd3c4bb916dea9f696c 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -16,13 +16,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineCombinerPattern.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" namespace llvm { @@ -152,6 +152,42 @@ public: unsigned getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; } unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; } + /// Returns true if the argument is a frame pseudo instruction. + bool isFrameInstr(const MachineInstr &I) const { + return I.getOpcode() == getCallFrameSetupOpcode() || + I.getOpcode() == getCallFrameDestroyOpcode(); + } + + /// Returns true if the argument is a frame setup pseudo instruction. + bool isFrameSetup(const MachineInstr &I) const { + return I.getOpcode() == getCallFrameSetupOpcode(); + } + + /// Returns size of the frame associated with the given frame instruction. + /// For frame setup instruction this is frame that is set up space set up + /// after the instruction. For frame destroy instruction this is the frame + /// freed by the caller. + /// Note, in some cases a call frame (or a part of it) may be prepared prior + /// to the frame setup instruction. It occurs in the calls that involve + /// inalloca arguments. This function reports only the size of the frame part + /// that is set up between the frame setup and destroy pseudo instructions. + int64_t getFrameSize(const MachineInstr &I) const { + assert(isFrameInstr(I) && "Not a frame instruction"); + assert(I.getOperand(0).getImm() >= 0); + return I.getOperand(0).getImm(); + } + + /// Returns the total frame size, which is made up of the space set up inside + /// the pair of frame start-stop instructions and the space that is set up + /// prior to the pair. + int64_t getFrameTotalSize(const MachineInstr &I) const { + if (isFrameSetup(I)) { + assert(I.getOperand(1).getImm() >= 0 && "Frame size must not be negative"); + return getFrameSize(I) + I.getOperand(1).getImm(); + } + return getFrameSize(I); + } + unsigned getCatchReturnOpcode() const { return CatchRetOpcode; } unsigned getReturnOpcode() const { return ReturnOpcode; } @@ -1083,7 +1119,7 @@ public: /// Return the noop instruction to use for a noop. - virtual void getNoopForMachoTarget(MCInst &NopInst) const; + virtual void getNoop(MCInst &NopInst) const; /// Return true for post-incremented instructions. virtual bool isPostIncrement(const MachineInstr &MI) const { diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 240896a538f1974d45947bf399ba63ce8040cf2b..a9d67228d205117d32c18a514bdce2ab70a0a5f4 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -69,6 +69,7 @@ class CCValAssign; class FastISel; class FunctionLoweringInfo; class IntrinsicInst; +struct KnownBits; class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -186,7 +187,7 @@ public: IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), IsSwiftSelf(false), IsSwiftError(false) {} - void setAttributes(ImmutableCallSite *CS, unsigned AttrIdx); + void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx); }; typedef std::vector ArgListTy; @@ -230,6 +231,18 @@ public: return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); } + /// Return the type for frame index, which is determined by + /// the alloca address space specified through the data layout. + MVT getFrameIndexTy(const DataLayout &DL) const { + return getPointerTy(DL, DL.getAllocaAddrSpace()); + } + + /// Return the type for operands of fence. + /// TODO: Let fence operands be of i32 type and remove this. + virtual MVT getFenceOperandTy(const DataLayout &DL) const { + return getPointerTy(DL); + } + /// EVT is not used in-tree, but is used by out-of-tree target. /// A documentation for this function would be nice... virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; @@ -392,7 +405,9 @@ public: } /// Returns if it's reasonable to merge stores to MemVT size. - virtual bool canMergeStoresTo(EVT MemVT) const { return true; } + virtual bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const { + return true; + } /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. virtual bool isCheapToSpeculateCttz() const { @@ -662,6 +677,16 @@ public: unsigned &NumIntermediates, MVT &RegisterVT) const; + /// Certain targets such as MIPS require that some types such as vectors are + /// always broken down into scalars in some contexts. This occurs even if the + /// vector type is legal. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, + RegisterVT); + } + struct IntrinsicInfo { unsigned opc = 0; // target opcode EVT memVT; // memory VT @@ -723,7 +748,7 @@ public: if (VT.isExtended()) return Expand; // If a target-specific SDNode requires legalization, require the target // to provide custom legalization for it. - if (Op > array_lengthof(OpActions[0])) return Custom; + if (Op >= array_lengthof(OpActions[0])) return Custom; return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } @@ -762,6 +787,74 @@ public: return (!isTypeLegal(VT) && getOperationAction(Op, VT) == Custom); } + /// Return true if lowering to a jump table is allowed. + bool areJTsAllowed(const Function *Fn) const { + if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") + return false; + + return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + } + + /// Check whether the range [Low,High] fits in a machine word. + bool rangeFitsInWord(const APInt &Low, const APInt &High, + const DataLayout &DL) const { + // FIXME: Using the pointer type doesn't seem ideal. + uint64_t BW = DL.getPointerSizeInBits(); + uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; + return Range <= BW; + } + + /// Return true if lowering to a jump table is suitable for a set of case + /// clusters which may contain \p NumCases cases, \p Range range of values. + /// FIXME: This function check the maximum table size and density, but the + /// minimum size is not checked. It would be nice if the the minimum size is + /// also combined within this function. Currently, the minimum size check is + /// performed in findJumpTable() in SelectionDAGBuiler and + /// getEstimatedNumberOfCaseClusters() in BasicTTIImpl. + bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, + uint64_t Range) const { + const bool OptForSize = SI->getParent()->getParent()->optForSize(); + const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); + const unsigned MaxJumpTableSize = + OptForSize || getMaximumJumpTableSize() == 0 + ? UINT_MAX + : getMaximumJumpTableSize(); + // Check whether a range of clusters is dense enough for a jump table. + if (Range <= MaxJumpTableSize && + (NumCases * 100 >= Range * MinDensity)) { + return true; + } + return false; + } + + /// Return true if lowering to a bit test is suitable for a set of case + /// clusters which contains \p NumDests unique destinations, \p Low and + /// \p High as its lowest and highest case values, and expects \p NumCmps + /// case value comparisons. Check if the number of destinations, comparison + /// metric, and range are all suitable. + bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, + const APInt &Low, const APInt &High, + const DataLayout &DL) const { + // FIXME: I don't think NumCmps is the correct metric: a single case and a + // range of cases both require only one branch to lower. Just looking at the + // number of clusters and destinations should be enough to decide whether to + // build bit tests. + + // To lower a range with bit tests, the range must fit the bitwidth of a + // machine word. + if (!rangeFitsInWord(Low, High, DL)) + return false; + + // Decide whether it's profitable to lower this range with bit tests. Each + // destination requires a bit test and branch, and there is an overall range + // check branch. For a small number of clusters, separate comparisons might + // be cheaper, and for many destinations, splitting the range might be + // better. + return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || + (NumDests == 3 && NumCmps >= 6); + } + /// Return true if the specified operation is illegal on this target or /// unlikely to be made legal with custom lowering. This is used to help guide /// high-level lowering decisions. @@ -1002,6 +1095,33 @@ public: llvm_unreachable("Unsupported extended type!"); } + /// Certain combinations of ABIs, Targets and features require that types + /// are legal for some operations and not for other operations. + /// For MIPS all vector types must be passed through the integer register set. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const { + return getRegisterType(VT); + } + + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + return getRegisterType(Context, VT); + } + + /// Certain targets require unusual breakdowns of certain types. For MIPS, + /// this occurs when a vector type is used, as vector are passed through the + /// integer register set. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + return getNumRegisters(Context, VT); + } + + /// Certain targets have context senstive alignment requirements, where one + /// type has the alignment requirement of another type. + virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, + DataLayout DL) const { + return DL.getABITypeAlignment(ArgTy); + } + /// If true, then instruction selection should seek to shrink the FP constant /// of the specified type to a smaller type in order to save space and / or /// reduce runtime. @@ -1060,6 +1180,16 @@ public: return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; } + /// Get maximum # of load operations permitted for memcmp + /// + /// This function returns the maximum number of load operations permitted + /// to replace a call to memcmp. The value is set by the target at the + /// performance threshold for such a replacement. If OptSize is true, + /// return the limit for functions that have OptSize attribute. + unsigned getMaxExpandSizeMemcmp(bool OptSize) const { + return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; + } + /// \brief Get maximum # of store operations permitted for llvm.memmove /// /// This function returns the maximum number of store operations permitted @@ -1136,6 +1266,9 @@ public: /// Return lower limit for number of blocks in a jump table. unsigned getMinimumJumpTableEntries() const; + /// Return lower limit of the density in a jump table. + unsigned getMinimumJumpTableDensity(bool OptForSize) const; + /// Return upper limit for number of entries in a jump table. /// Zero if no limit. unsigned getMaximumJumpTableSize() const; @@ -1312,7 +1445,10 @@ public: /// It is called by AtomicExpandPass before expanding an /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad /// if shouldInsertFencesForAtomic returns true. - /// RMW and CmpXchg set both IsStore and IsLoad to true. + /// + /// Inst is the original atomic instruction, prior to other expansions that + /// may be performed. + /// /// This function should either return a nullptr, or a pointer to an IR-level /// Instruction*. Even complex fence sequences can be represented by a /// single Instruction* through an intrinsic to be lowered later. @@ -1338,18 +1474,17 @@ public: /// seq_cst. But if they are lowered to monotonic accesses, no amount of /// IR-level fences can prevent it. /// @{ - virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { - if (isReleaseOrStronger(Ord) && IsStore) + virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const { + if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) return Builder.CreateFence(Ord); else return nullptr; } virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { + Instruction *Inst, + AtomicOrdering Ord) const { if (isAcquireOrStronger(Ord)) return Builder.CreateFence(Ord); else @@ -1778,6 +1913,38 @@ public: return false; } + /// Returns true if the opcode is a commutative binary operation. + virtual bool isCommutativeBinOp(unsigned Opcode) const { + // FIXME: This should get its info from the td file. + switch (Opcode) { + case ISD::ADD: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + case ISD::MUL: + case ISD::MULHU: + case ISD::MULHS: + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: + case ISD::FADD: + case ISD::FMUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SADDO: + case ISD::UADDO: + case ISD::ADDC: + case ISD::ADDE: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + return true; + default: return false; + } + } + /// Return true if it's free to truncate a value of type FromTy to type /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 /// by referencing its sub-register AX. @@ -2013,6 +2180,12 @@ public: return LibcallCallingConvs[Call]; } + /// Execute target specific actions to finalize target lowering. + /// This is used to set extra flags in MachineFrameInformation and freezing + /// the set of reserved registers. + /// The default implementation just freezes the set of reserved registers. + virtual void finalizeLowering(MachineFunction &MF) const; + private: const TargetMachine &TM; @@ -2236,6 +2409,8 @@ protected: /// Maximum number of store operations that may be substituted for a call to /// memcpy, used for functions with OptSize attribute. unsigned MaxStoresPerMemcpyOptSize; + unsigned MaxLoadsPerMemcmp; + unsigned MaxLoadsPerMemcmpOptSize; /// \brief Specify maximum bytes of store instructions per memmove call. /// @@ -2262,7 +2437,8 @@ protected: /// Return true if the value types that can be represented by the specified /// register class are all legal. - bool isLegalRC(const TargetRegisterClass *RC) const; + bool isLegalRC(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) const; /// Replace/modify any TargetFrameIndex operands with a targte-dependent /// sequence of memory operands that is recognized by PrologEpilogInserter. @@ -2382,30 +2558,39 @@ public: New = N; return true; } - - /// Check to see if the specified operand of the specified instruction is a - /// constant integer. If so, check to see if there are any bits set in the - /// constant that are not demanded. If so, shrink the constant and return - /// true. - bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded); - - /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This - /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be - /// generalized for targets with other types of implicit widening casts. - bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, - const SDLoc &dl); - - /// Helper for SimplifyDemandedBits that can simplify an operation with - /// multiple uses. This function uses TLI.SimplifyDemandedBits to - /// simplify Operand \p OpIdx of \p User and then updated \p User with - /// the simplified version. No other uses of \p OpIdx are updated. - /// If \p User is the only user of \p OpIdx, this function behaves exactly - /// like TLI.SimplifyDemandedBits except that it also updates the DAG by - /// calling DCI.CommitTargetLoweringOpt. - bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, - const APInt &Demanded, DAGCombinerInfo &DCI); }; + /// Check to see if the specified operand of the specified instruction is a + /// constant integer. If so, check to see if there are any bits set in the + /// constant that are not demanded. If so, shrink the constant and return + /// true. + bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + // Target hook to do target-specific const optimization, which is called by + // ShrinkDemandedConstant. This function should return true if the target + // doesn't want ShrinkDemandedConstant to further optimize the constant. + virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + return false; + } + + /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This + /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be + /// generalized for targets with other types of implicit widening casts. + bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + /// Helper for SimplifyDemandedBits that can simplify an operation with + /// multiple uses. This function simplifies operand \p OpIdx of \p User and + /// then updates \p User with the simplified version. No other uses of + /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this + /// function behaves exactly like function SimplifyDemandedBits declared + /// below except that it also updates the DAG by calling + /// DCI.CommitTargetLoweringOpt. + bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded, + DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const; + /// Look at Op. At this point, we know that only the DemandedMask bits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning @@ -2420,7 +2605,7 @@ public: /// with TLO.New will be incorrect when this parameter is true and TLO.Old /// has multiple uses. bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, - APInt &KnownZero, APInt &KnownOne, + KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth = 0, bool AssumeSingleUse = false) const; @@ -2434,8 +2619,7 @@ public: /// argument allows us to only collect the known bits that are shared by the /// requested vector elements. virtual void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const; @@ -2562,12 +2746,6 @@ public: return false; } - /// Return true if the MachineFunction contains a COPY which would imply - /// HasCopyImplyingStackAdjustment. - virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const { - return false; - } - /// Perform necessary initialization to handle a subset of CSRs explicitly /// via copies. This function is called at the beginning of instruction /// selection. @@ -2680,15 +2858,15 @@ public: ImmutableCallSite &Call) { RetTy = ResultType; - IsInReg = Call.paramHasAttr(0, Attribute::InReg); + IsInReg = Call.hasRetAttr(Attribute::InReg); DoesNotReturn = Call.doesNotReturn() || (!Call.isInvoke() && isa(Call.getInstruction()->getNextNode())); IsVarArg = FTy->isVarArg(); IsReturnValueUsed = !Call.getInstruction()->use_empty(); - RetSExt = Call.paramHasAttr(0, Attribute::SExt); - RetZExt = Call.paramHasAttr(0, Attribute::ZExt); + RetSExt = Call.hasRetAttr(Attribute::SExt); + RetZExt = Call.hasRetAttr(Attribute::ZExt); Callee = Target; @@ -2807,7 +2985,7 @@ public: /// Return true if the target may be able emit the call instruction as a tail /// call. This is used by optimization passes to determine if it's profitable /// to duplicate return instructions to enable tailcall optimization. - virtual bool mayBeEmittedAsTailCall(CallInst *) const { + virtual bool mayBeEmittedAsTailCall(const CallInst *) const { return false; } diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h index 0ffd4b7f8c7863d6eea0bc15d632e8995932f69d..80d4d8e42e519bfbd91f9389582d229050f8ff52 100644 --- a/include/llvm/Target/TargetLoweringObjectFile.h +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -70,10 +70,9 @@ public: virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM, const MCSymbol *Sym) const; - /// Emit the module flags that the platform cares about. - virtual void emitModuleFlags(MCStreamer &Streamer, - ArrayRef Flags, - const TargetMachine &TM) const {} + /// Emit the module-level metadata that the platform cares about. + virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M, + const TargetMachine &TM) const {} /// Given a constant with the SectionKind, return a section that it should be /// placed in. diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 73ae2ad129881cfefc755ac665d3400af7761d48..933c6c87b0beaf4c6bd5030dcdcde83672403dae 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -25,7 +25,6 @@ namespace llvm { class GlobalValue; -class MachineFunctionInitializer; class Mangler; class MCAsmInfo; class MCContext; @@ -227,8 +226,7 @@ public: PassManagerBase &, raw_pwrite_stream &, CodeGenFileType, bool /*DisableVerify*/ = true, AnalysisID /*StartBefore*/ = nullptr, AnalysisID /*StartAfter*/ = nullptr, AnalysisID /*StopBefore*/ = nullptr, - AnalysisID /*StopAfter*/ = nullptr, - MachineFunctionInitializer * /*MFInitializer*/ = nullptr) { + AnalysisID /*StopAfter*/ = nullptr) { return true; } @@ -289,8 +287,7 @@ public: PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, bool DisableVerify = true, AnalysisID StartBefore = nullptr, AnalysisID StartAfter = nullptr, AnalysisID StopBefore = nullptr, - AnalysisID StopAfter = nullptr, - MachineFunctionInitializer *MFInitializer = nullptr) override; + AnalysisID StopAfter = nullptr) override; /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It @@ -299,6 +296,17 @@ public: bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_pwrite_stream &OS, bool DisableVerify = true) override; + + /// Returns true if the target is expected to pass all machine verifier + /// checks. This is a stopgap measure to fix targets one by one. We will + /// remove this at some point and always enable the verifier when + /// EXPENSIVE_CHECKS is enabled. + virtual bool isMachineVerifierClean() const { return true; } + + /// \brief Adds an AsmPrinter pass to the pipeline that prints assembly or + /// machine code from the MI representation. + bool addAsmPrinter(PassManagerBase &PM, raw_pwrite_stream &Out, + CodeGenFileType FileTYpe, MCContext &Context); }; } // end namespace llvm diff --git a/include/llvm/Target/TargetOpcodes.def b/include/llvm/Target/TargetOpcodes.def index 96db6e0a97698dd142464abd784f24ef014873ce..36764249632da801c93c3ea601b7d45981349c21 100644 --- a/include/llvm/Target/TargetOpcodes.def +++ b/include/llvm/Target/TargetOpcodes.def @@ -182,6 +182,10 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT) /// PATCHABLE_RET which specifically only works for return instructions. HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be +/// patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 7cc33f2fdccbbd655e5639afb99bd3b19a73690e..5c2063880f8be89a2cbe5bd797b8ebf092a8acd8 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -105,10 +105,10 @@ namespace llvm { HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), UseInitArray(false), - DisableIntegratedAS(false), CompressDebugSections(false), - RelaxELFRelocations(false), FunctionSections(false), - DataSections(false), UniqueSectionNames(true), TrapUnreachable(false), - EmulatedTLS(false), EnableIPRA(false) {} + DisableIntegratedAS(false), RelaxELFRelocations(false), + FunctionSections(false), DataSections(false), + UniqueSectionNames(true), TrapUnreachable(false), EmulatedTLS(false), + EnableIPRA(false) {} /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging @@ -194,7 +194,7 @@ namespace llvm { unsigned DisableIntegratedAS : 1; /// Compress DWARF debug sections. - unsigned CompressDebugSections : 1; + DebugCompressionType CompressDebugSections = DebugCompressionType::None; unsigned RelaxELFRelocations : 1; diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 3f5daea63ab591270d08c1a550d12e6afc2b2616..86ad8ad530527fcbf97be69af5b79ed8f0694084 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -40,13 +40,12 @@ class TargetRegisterClass { public: typedef const MCPhysReg* iterator; typedef const MCPhysReg* const_iterator; - typedef const MVT::SimpleValueType* vt_iterator; typedef const TargetRegisterClass* const * sc_iterator; // Instance variables filled by tablegen, do not use! const MCRegisterClass *MC; const uint16_t SpillSize, SpillAlignment; - const vt_iterator VTs; + const MVT::SimpleValueType *VTs; const uint32_t *SubClassMask; const uint16_t *SuperRegIndices; const LaneBitmask LaneMask; @@ -93,13 +92,6 @@ public: return MC->contains(Reg1, Reg2); } - /// Return the size of the register in bytes, which is also the size - /// of a stack slot allocated to hold a spilled copy of this register. - unsigned getSize() const { return SpillSize; } - - /// Return the minimum required alignment for a register of this class. - unsigned getAlignment() const { return SpillAlignment; } - /// Return the cost of copying a value between two registers in this class. /// A negative number means the register class is very expensive /// to copy e.g. status flag register classes. @@ -109,26 +101,6 @@ public: /// registers. bool isAllocatable() const { return MC->isAllocatable(); } - /// Return true if this TargetRegisterClass has the ValueType vt. - bool hasType(MVT vt) const { - for(int i = 0; VTs[i] != MVT::Other; ++i) - if (MVT(VTs[i]) == vt) - return true; - return false; - } - - /// vt_begin / vt_end - Loop over all of the value types that can be - /// represented by values in this register class. - vt_iterator vt_begin() const { - return VTs; - } - - vt_iterator vt_end() const { - vt_iterator I = VTs; - while (*I != MVT::Other) ++I; - return I; - } - /// Return true if the specified TargetRegisterClass /// is a proper sub-class of this TargetRegisterClass. bool hasSubClass(const TargetRegisterClass *RC) const { @@ -246,6 +218,7 @@ struct RegClassWeight { class TargetRegisterInfo : public MCRegisterInfo { public: typedef const TargetRegisterClass * const * regclass_iterator; + typedef const MVT::SimpleValueType* vt_iterator; private: const TargetRegisterInfoDesc *InfoDesc; // Extra desc array for codegen const char *const *SubRegIndexNames; // Names of subreg indexes. @@ -327,6 +300,44 @@ public: return Index | (1u << 31); } + /// Return the size in bits of a register from class RC. + unsigned getRegSizeInBits(const TargetRegisterClass &RC) const { + return RC.SpillSize * 8; + } + + /// Return the size in bytes of the stack slot allocated to hold a spilled + /// copy of a register from class RC. + unsigned getSpillSize(const TargetRegisterClass &RC) const { + return RC.SpillSize; + } + + /// Return the minimum required alignment for a spill slot for a register + /// of this class. + unsigned getSpillAlignment(const TargetRegisterClass &RC) const { + return RC.SpillAlignment; + } + + /// Return true if the given TargetRegisterClass has the ValueType T. + bool isTypeLegalForClass(const TargetRegisterClass &RC, MVT T) const { + for (int i = 0; RC.VTs[i] != MVT::Other; ++i) + if (MVT(RC.VTs[i]) == T) + return true; + return false; + } + + /// Loop over all of the value types that can be represented by values + // in the given register class. + vt_iterator legalclasstypes_begin(const TargetRegisterClass &RC) const { + return RC.VTs; + } + + vt_iterator legalclasstypes_end(const TargetRegisterClass &RC) const { + vt_iterator I = RC.VTs; + while (*I != MVT::Other) + ++I; + return I; + } + /// Returns the Register Class of a physical register of the given type, /// picking the most sub register class of the right type that contains this /// physreg. @@ -486,6 +497,16 @@ public: /// function. Used by MachineRegisterInfo::isConstantPhysReg(). virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; } + /// Physical registers that may be modified within a function but are + /// guaranteed to be restored before any uses. This is useful for targets that + /// have call sequences where a GOT register may be updated by the caller + /// prior to a call and is guaranteed to be restored (also by the caller) + /// after the call. + virtual bool isCallerPreservedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + return false; + } + /// Prior to adding the live-out mask to a stackmap or patchpoint /// instruction, provide the target the opportunity to adjust it (mainly to /// remove pseudo-registers that should be ignored). diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index d342e4fe2613556d5aff434c440f4400bc66a63f..7b00c9420e3532c13d98309fc33e2fe55096667a 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -334,7 +334,7 @@ class ReadAdvance writes = []> } // Directly associate a new SchedRead type with a delay and optional -// pipeline bypess. For use with InstRW or ItinRW. +// pipeline bypass. For use with InstRW or ItinRW. class SchedReadAdvance writes = []> : SchedRead, ProcReadAdvance; diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 45a842f77a21a513cb9f5e3ac79b71b8642930c0..9ed614ccee17029395f2f0791454c5d1f807c883 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -281,7 +281,7 @@ def SDTConvertOp : SDTypeProfile<1, 5, [ //cvtss, su, us, uu, ff, fs, fu, sf, su ]>; class SDCallSeqStart constraints> : - SDTypeProfile<0, 1, constraints>; + SDTypeProfile<0, 2, constraints>; class SDCallSeqEnd constraints> : SDTypeProfile<0, 2, constraints>; diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 0b43515960216cc2a02a09505497a6f0e0d5e1aa..9cb07a5c6daea13be0e2e793858379a16d239df6 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -18,8 +18,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/PBQPRAConstraint.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" #include @@ -143,6 +144,9 @@ public: /// TargetLowering preference). It does not yet disable the postRA scheduler. virtual bool enableMachineScheduler() const; + /// \brief Support printing of [latency:throughput] comment in output .S file. + virtual bool supportPrintSchedInfo() const { return false; } + /// \brief True if the machine scheduler should disable the TLI preference /// for preRA scheduling with the source level scheduler. virtual bool enableMachineSchedDefaultSched() const { return true; } @@ -227,6 +231,10 @@ public: /// Please use MachineRegisterInfo::subRegLivenessEnabled() instead where /// possible. virtual bool enableSubRegLiveness() const { return false; } + + /// Returns string representation of scheduler comment + std::string getSchedInfoStr(const MachineInstr &MI) const override; + std::string getSchedInfoStr(MCInst const &MCI) const override; }; } // end namespace llvm diff --git a/include/llvm/Testing/Support/Error.h b/include/llvm/Testing/Support/Error.h new file mode 100644 index 0000000000000000000000000000000000000000..d527529015933cb13fbe408556607ce3233aa7fd --- /dev/null +++ b/include/llvm/Testing/Support/Error.h @@ -0,0 +1,69 @@ +//===- llvm/Testing/Support/Error.h ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TESTING_SUPPORT_ERROR_H +#define LLVM_TESTING_SUPPORT_ERROR_H + +#include "llvm/ADT/Optional.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" + +#include "gmock/gmock.h" +#include + +namespace llvm { +namespace detail { +ErrorHolder TakeError(Error Err); + +template ExpectedHolder TakeExpected(Expected &Exp) { + llvm::detail::ExpectedHolder Result; + auto &EH = static_cast(Result); + EH = TakeError(Exp.takeError()); + if (Result.Success) + Result.Value = &(*Exp); + return Result; +} + +template ExpectedHolder TakeExpected(const Expected &Exp) { + return TakeExpected(const_cast &>(Exp)); +} +} // namespace detail + +#define EXPECT_THAT_ERROR(Err, Matcher) \ + EXPECT_THAT(llvm::detail::TakeError(Err), Matcher) +#define ASSERT_THAT_ERROR(Err, Matcher) \ + ASSERT_THAT(llvm::detail::TakeError(Err), Matcher) + +#define EXPECT_THAT_EXPECTED(Err, Matcher) \ + EXPECT_THAT(llvm::detail::TakeExpected(Err), Matcher) +#define ASSERT_THAT_EXPECTED(Err, Matcher) \ + ASSERT_THAT(llvm::detail::TakeExpected(Err), Matcher) + +MATCHER(Succeeded, "") { return arg.Success; } +MATCHER(Failed, "") { return !arg.Success; } + +MATCHER_P(HasValue, value, + "succeeded with value " + testing::PrintToString(value)) { + if (!arg.Success) { + *result_listener << "operation failed"; + return false; + } + + assert(arg.Value.hasValue()); + if (**arg.Value != value) { + *result_listener << "but \"" + testing::PrintToString(**arg.Value) + + "\" != " + testing::PrintToString(value); + return false; + } + + return true; +} +} // namespace llvm + +#endif diff --git a/include/llvm/Testing/Support/SupportHelpers.h b/include/llvm/Testing/Support/SupportHelpers.h new file mode 100644 index 0000000000000000000000000000000000000000..c4dd414b80dbc28a1e1ace054e1f9c6cf6a29469 --- /dev/null +++ b/include/llvm/Testing/Support/SupportHelpers.h @@ -0,0 +1,47 @@ +//===- Testing/Support/SupportHelpers.h -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H +#define LLVM_TESTING_SUPPORT_SUPPORTHELPERS_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "gtest/gtest-printers.h" + +namespace llvm { +namespace detail { +struct ErrorHolder { + bool Success; + std::string Message; +}; + +template struct ExpectedHolder : public ErrorHolder { + Optional Value; +}; + +inline void PrintTo(const ErrorHolder &Err, std::ostream *Out) { + *Out << (Err.Success ? "succeeded" : "failed"); + if (!Err.Success) { + *Out << " (" << StringRef(Err.Message).trim().str() << ")"; + } +} + +template +void PrintTo(const ExpectedHolder &Item, std::ostream *Out) { + if (Item.Success) { + *Out << "succeeded with value \"" << ::testing::PrintToString(**Item.Value) + << "\""; + } else { + PrintTo(static_cast(Item), Out); + } +} +} // namespace detail +} // namespace llvm + +#endif diff --git a/include/llvm/LibDriver/LibDriver.h b/include/llvm/ToolDrivers/llvm-lib/LibDriver.h similarity index 79% rename from include/llvm/LibDriver/LibDriver.h rename to include/llvm/ToolDrivers/llvm-lib/LibDriver.h index 95feb60be40378bcc8f1a43653a144b1635bc2e2..a4806ac4ad69849be41492a8de70d92093d288f6 100644 --- a/include/llvm/LibDriver/LibDriver.h +++ b/include/llvm/ToolDrivers/llvm-lib/LibDriver.h @@ -1,4 +1,4 @@ -//===- llvm/LibDriver/LibDriver.h - lib.exe-compatible driver ---*- C++ -*-===// +//===- llvm-lib/LibDriver.h - lib.exe-compatible driver ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBDRIVER_LIBDRIVER_H -#define LLVM_LIBDRIVER_LIBDRIVER_H +#ifndef LLVM_TOOLDRIVERS_LLVM_LIB_LIBDRIVER_H +#define LLVM_TOOLDRIVERS_LLVM_LIB_LIBDRIVER_H namespace llvm { template class ArrayRef; diff --git a/include/llvm/Transforms/IPO/FunctionAttrs.h b/include/llvm/Transforms/IPO/FunctionAttrs.h index 85d6364c8bbc98d27e321d46fbd086d440dd0225..36dd06b85b417d0f6960f5e3451ca7145dba4ce8 100644 --- a/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -14,8 +14,8 @@ #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H #define LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H -#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h index ed5742ab8b564f527445457b04bf6c19d7c6949c..de35cdf052e1f523020afd5f7de6eed55020fd8d 100644 --- a/include/llvm/Transforms/IPO/FunctionImport.h +++ b/include/llvm/Transforms/IPO/FunctionImport.h @@ -53,8 +53,7 @@ public: : Index(Index), ModuleLoader(std::move(ModuleLoader)) {} /// Import functions in Module \p M based on the supplied import list. - Expected - importFunctions(Module &M, const ImportMapTy &ImportList); + Expected importFunctions(Module &M, const ImportMapTy &ImportList); private: /// The summaries index used to trigger importing. @@ -82,15 +81,11 @@ public: /// \p ExportLists contains for each Module the set of globals (GUID) that will /// be imported by another module, or referenced by such a function. I.e. this /// is the set of globals that need to be promoted/renamed appropriately. -/// -/// \p DeadSymbols (optional) contains a list of GUID that are deemed "dead" and -/// will be ignored for the purpose of importing. void ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap &ModuleToDefinedGVSummaries, StringMap &ImportLists, - StringMap &ExportLists, - const DenseSet *DeadSymbols = nullptr); + StringMap &ExportLists); /// Compute all the imports for the given module using the Index. /// @@ -103,9 +98,9 @@ void ComputeCrossModuleImportForModule( /// Compute all the symbols that are "dead": i.e these that can't be reached /// in the graph from any of the given symbols listed in /// \p GUIDPreservedSymbols. -DenseSet -computeDeadSymbols(const ModuleSummaryIndex &Index, - const DenseSet &GUIDPreservedSymbols); +void computeDeadSymbols( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols); /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. diff --git a/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h b/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h new file mode 100644 index 0000000000000000000000000000000000000000..bf04bbfe92d81c5412fc087b5a63a02929778241 --- /dev/null +++ b/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h @@ -0,0 +1,41 @@ +//===- ThinLTOBitcodeWriter.h - Bitcode writing pass for ThinLTO ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass prepares a module containing type metadata for ThinLTO by splitting +// it into regular and thin LTO parts if possible, and writing both parts to +// a multi-module bitcode file. Modules that do not contain type metadata are +// written unmodified as a single module. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_THINLTOBITCODEWRITER_H +#define LLVM_TRANSFORMS_IPO_THINLTOBITCODEWRITER_H + +#include +#include + +namespace llvm { + +class ThinLTOBitcodeWriterPass + : public PassInfoMixin { + raw_ostream &OS; + raw_ostream *ThinLinkOS; + +public: + // Writes bitcode to OS. Also write thin link file to ThinLinkOS, if + // it's not nullptr. + ThinLTOBitcodeWriterPass(raw_ostream &OS, raw_ostream *ThinLinkOS) + : OS(OS), ThinLinkOS(ThinLinkOS) {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif diff --git a/include/llvm/Transforms/InstrProfiling.h b/include/llvm/Transforms/InstrProfiling.h index e303dcf7acfef1794d4fd4bd82690f875d432ac2..65e69761baddd932ab0d179052a5a358cdf82d05 100644 --- a/include/llvm/Transforms/InstrProfiling.h +++ b/include/llvm/Transforms/InstrProfiling.h @@ -43,6 +43,7 @@ public: private: InstrProfOptions Options; Module *M; + Triple TT; const TargetLibraryInfo *TLI; struct PerFunctionProfileData { uint32_t NumValueSites[IPVK_Last + 1]; @@ -64,20 +65,6 @@ private: // The end value of precise value profile range for memory intrinsic sizes. int64_t MemOPSizeRangeLast; - bool isMachO() const; - - /// Get the section name for the counter variables. - StringRef getCountersSection() const; - - /// Get the section name for the name variables. - StringRef getNameSection() const; - - /// Get the section name for the profile data variables. - StringRef getDataSection() const; - - /// Get the section name for the coverage mapping data. - StringRef getCoverageSection() const; - /// Count the number of instrumented value sites for the function. void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 01a3975a4f2cc20107e1c5c06001987b3ca6f5ae..b6c6c091631d54376ecd64a026a674e0a15d134b 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -131,7 +131,8 @@ FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false); ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false, - bool Recover = false); + bool Recover = false, + bool UseGlobalsGC = true); // Insert MemorySanitizer instrumentation (detection of uninitialized reads) FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0, @@ -176,6 +177,8 @@ struct SanitizerCoverageOptions { bool Use8bitCounters = false; bool TracePC = false; bool TracePCGuard = false; + bool Inline8bitCounters = false; + bool NoPrune = false; SanitizerCoverageOptions() = default; }; diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index ba0a3ee1287a4b6c22f5900bfabcd2e4f7e6f8b1..856c288a071f3fb966fc62d2b95845208828c563 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -354,6 +354,13 @@ FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false); // FunctionPass *createGVNHoistPass(); +//===----------------------------------------------------------------------===// +// +// GVNSink - This pass uses an "inverted" value numbering to decide the +// similarity of expressions and sinks similar expressions into successors. +// +FunctionPass *createGVNSinkPass(); + //===----------------------------------------------------------------------===// // // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads diff --git a/include/llvm/Transforms/Scalar/ConstantHoisting.h b/include/llvm/Transforms/Scalar/ConstantHoisting.h index 3e2b3327a9fe6bef08b4e304b26a745ee78975a7..edc91add7a7370c6775546a14270a3f697da1e8e 100644 --- a/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ b/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -36,6 +36,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" @@ -98,7 +99,7 @@ public: // Glue for old PM. bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - BasicBlock &Entry); + BlockFrequencyInfo *BFI, BasicBlock &Entry); void releaseMemory() { ConstantVec.clear(); @@ -112,6 +113,7 @@ private: const TargetTransformInfo *TTI; DominatorTree *DT; + BlockFrequencyInfo *BFI; BasicBlock *Entry; /// Keeps track of constant candidates found in the function. @@ -124,8 +126,8 @@ private: SmallVector ConstantVec; Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const; - Instruction *findConstantInsertionPoint( - const consthoist::ConstantInfo &ConstInfo) const; + SmallPtrSet + findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const; void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, ConstantInt *ConstInt); diff --git a/include/llvm/Transforms/Scalar/Float2Int.h b/include/llvm/Transforms/Scalar/Float2Int.h index a8042399fb08acc89020b4a35a06a994e01594d9..206ee980109b4fdbb20f73ee851dc409740b7a96 100644 --- a/include/llvm/Transforms/Scalar/Float2Int.h +++ b/include/llvm/Transforms/Scalar/Float2Int.h @@ -31,7 +31,7 @@ public: private: void findRoots(Function &F, SmallPtrSet &Roots); - ConstantRange seen(Instruction *I, ConstantRange R); + void seen(Instruction *I, ConstantRange R); ConstantRange badRange(); ConstantRange unknownRange(); ConstantRange validateRange(ConstantRange R); diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h index 8f05e8cdb2336634127230aadcf79d7e4e178b67..589aaaca02fe17585308c39adffa5847de2d8cb3 100644 --- a/include/llvm/Transforms/Scalar/GVN.h +++ b/include/llvm/Transforms/Scalar/GVN.h @@ -238,7 +238,12 @@ struct GVNHoistPass : PassInfoMixin { /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; - +/// \brief Uses an "inverted" value numbering to decide the similarity of +/// expressions and sinks similar expressions into successors. +struct GVNSinkPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } #endif diff --git a/include/llvm/Transforms/Scalar/GVNExpression.h b/include/llvm/Transforms/Scalar/GVNExpression.h index 2670a0c1a5339b7cf4b157ce2f65519b9f0b7850..f603ebcbca7cc924ab9f9692e763e22bdfd8370f 100644 --- a/include/llvm/Transforms/Scalar/GVNExpression.h +++ b/include/llvm/Transforms/Scalar/GVNExpression.h @@ -40,6 +40,7 @@ enum ExpressionType { ET_Base, ET_Constant, ET_Variable, + ET_Dead, ET_Unknown, ET_BasicStart, ET_Basic, @@ -57,10 +58,11 @@ class Expression { private: ExpressionType EType; unsigned Opcode; + mutable hash_code HashVal; public: Expression(ExpressionType ET = ET_Base, unsigned O = ~2U) - : EType(ET), Opcode(O) {} + : EType(ET), Opcode(O), HashVal(0) {} Expression(const Expression &) = delete; Expression &operator=(const Expression &) = delete; virtual ~Expression(); @@ -81,8 +83,21 @@ public: return equals(Other); } + hash_code getComputedHash() const { + // It's theoretically possible for a thing to hash to zero. In that case, + // we will just compute the hash a few extra times, which is no worse that + // we did before, which was to compute it always. + if (static_cast(HashVal) == 0) + HashVal = getHashValue(); + return HashVal; + } virtual bool equals(const Expression &Other) const { return true; } + // Return true if the two expressions are exactly the same, including the + // normally ignored fields. + virtual bool exactlyEquals(const Expression &Other) const { + return getExpressionType() == Other.getExpressionType() && equals(Other); + } unsigned getOpcode() const { return Opcode; } void setOpcode(unsigned opcode) { Opcode = opcode; } @@ -106,10 +121,7 @@ public: OS << "}"; } - LLVM_DUMP_METHOD void dump() const { - print(dbgs()); - dbgs() << "\n"; - } + LLVM_DUMP_METHOD void dump() const; }; inline raw_ostream &operator<<(raw_ostream &OS, const Expression &E) { @@ -335,6 +347,10 @@ public: void setAlignment(unsigned Align) { Alignment = Align; } bool equals(const Expression &Other) const override; + bool exactlyEquals(const Expression &Other) const override { + return Expression::exactlyEquals(Other) && + cast(Other).getLoadInst() == getLoadInst(); + } // // Debugging support @@ -372,6 +388,10 @@ public: Value *getStoredValue() const { return StoredValue; } bool equals(const Expression &Other) const override; + bool exactlyEquals(const Expression &Other) const override { + return Expression::exactlyEquals(Other) && + cast(Other).getStoreInst() == getStoreInst(); + } // Debugging support // @@ -380,7 +400,9 @@ public: OS << "ExpressionTypeStore, "; this->BasicExpression::printInternal(OS, false); OS << " represents Store " << *Store; - OS << " with MemoryLeader " << *getMemoryLeader(); + OS << " with StoredValue "; + StoredValue->printAsOperand(OS); + OS << " and MemoryLeader " << *getMemoryLeader(); } }; @@ -513,6 +535,17 @@ public: } }; +class DeadExpression final : public Expression { +public: + DeadExpression() : Expression(ET_Dead) {} + DeadExpression(const DeadExpression &) = delete; + DeadExpression &operator=(const DeadExpression &) = delete; + + static bool classof(const Expression *E) { + return E->getExpressionType() == ET_Dead; + } +}; + class VariableExpression final : public Expression { private: Value *VariableValue; diff --git a/include/llvm/Transforms/Scalar/NaryReassociate.h b/include/llvm/Transforms/Scalar/NaryReassociate.h index a74bb6cc4194b8b959a0f38f48a1d8bd73990c97..f35707eeb3f04de08d99997d03c6612e2eba66d9 100644 --- a/include/llvm/Transforms/Scalar/NaryReassociate.h +++ b/include/llvm/Transforms/Scalar/NaryReassociate.h @@ -167,7 +167,7 @@ private: // foo(a + b); // if (p2) // bar(a + b); - DenseMap> SeenExprs; + DenseMap> SeenExprs; }; } // namespace llvm diff --git a/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h new file mode 100644 index 0000000000000000000000000000000000000000..d7282ac6a7813960e7e53a1692471e16ab41a295 --- /dev/null +++ b/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h @@ -0,0 +1,53 @@ +//===- SimpleLoopUnswitch.h - Hoist loop-invariant control flow -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H +#define LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +/// This pass transforms loops that contain branches on loop-invariant +/// conditions to have multiple loops. For example, it turns the left into the +/// right code: +/// +/// for (...) if (lic) +/// A for (...) +/// if (lic) A; B; C +/// B else +/// C for (...) +/// A; C +/// +/// This can increase the size of the code exponentially (doubling it every time +/// a loop is unswitched) so we only unswitch if the resultant code will be +/// smaller than a threshold. +/// +/// This pass expects LICM to be run before it to hoist invariant conditions out +/// of the loop, to make the unswitching opportunity obvious. +/// +class SimpleLoopUnswitchPass : public PassInfoMixin { +public: + SimpleLoopUnswitchPass() = default; + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +/// Create the legacy pass object for the simple loop unswitcher. +/// +/// See the documentaion for `SimpleLoopUnswitchPass` for details. +Pass *createSimpleLoopUnswitchLegacyPass(); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_SIMPLELOOPUNSWITCH_H diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 337305a0a82ce065f00e4b9e6eb12b2193b6c031..2a8b89d862821e9a3fcbaa4b49e31b005ff375bc 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -36,6 +36,7 @@ class BasicBlock; class BlockFrequencyInfo; class CallInst; class CallGraph; +class DebugInfoFinder; class DominatorTree; class Function; class Instruction; @@ -43,6 +44,7 @@ class InvokeInst; class Loop; class LoopInfo; class Module; +class ProfileSummaryInfo; class ReturnInst; /// Return an exact copy of the specified module @@ -74,7 +76,7 @@ struct ClonedCodeInfo { /// All cloned call sites that have operand bundles attached are appended to /// this vector. This vector may contain nulls or undefs if some of the /// originally inserted callsites were DCE'ed after they were cloned. - std::vector OperandBundleCallSites; + std::vector OperandBundleCallSites; ClonedCodeInfo() = default; }; @@ -109,7 +111,8 @@ struct ClonedCodeInfo { /// BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, - ClonedCodeInfo *CodeInfo = nullptr); + ClonedCodeInfo *CodeInfo = nullptr, + DebugInfoFinder *DIFinder = nullptr); /// CloneFunction - Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed @@ -175,15 +178,17 @@ public: explicit InlineFunctionInfo(CallGraph *cg = nullptr, std::function *GetAssumptionCache = nullptr, + ProfileSummaryInfo *PSI = nullptr, BlockFrequencyInfo *CallerBFI = nullptr, BlockFrequencyInfo *CalleeBFI = nullptr) - : CG(cg), GetAssumptionCache(GetAssumptionCache), CallerBFI(CallerBFI), - CalleeBFI(CalleeBFI) {} + : CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI), + CallerBFI(CallerBFI), CalleeBFI(CalleeBFI) {} /// CG - If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. CallGraph *CG; std::function *GetAssumptionCache; + ProfileSummaryInfo *PSI; BlockFrequencyInfo *CallerBFI, *CalleeBFI; /// StaticAllocas - InlineFunction fills this in with all static allocas that @@ -192,7 +197,7 @@ public: /// InlinedCalls - InlineFunction fills this in with callsites that were /// inlined from the callee. This is only filled in if CG is non-null. - SmallVector InlinedCalls; + SmallVector InlinedCalls; /// All of the new call sites inlined into the caller. /// diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h index a2978663a4d14f82554fea07712de099b7a2a5fe..682b353ab5ae84fa0e637c6c92f2e8d31f174f83 100644 --- a/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/include/llvm/Transforms/Utils/CodeExtractor.h @@ -25,6 +25,7 @@ template class ArrayRef; class BranchProbabilityInfo; class DominatorTree; class Function; + class Instruction; class Loop; class Module; class RegionNode; @@ -65,14 +66,6 @@ template class ArrayRef; /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid. static bool isBlockValidForExtraction(const BasicBlock &BB); - /// \brief Create a code extractor for a single basic block. - /// - /// In this formation, we don't require a dominator tree. The given basic - /// block is set up for extraction. - CodeExtractor(BasicBlock *BB, bool AggregateArgs = false, - BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr); - /// \brief Create a code extractor for a sequence of blocks. /// /// Given a sequence of basic blocks where the first block in the sequence @@ -91,14 +84,6 @@ template class ArrayRef; BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr); - /// \brief Create a code extractor for a region node. - /// - /// Behaves just like the generic code sequence constructor, but uses the - /// block sequence of the region node passed in. - CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr); - /// \brief Perform the extraction, returning the new function. /// /// Returns zero when called on a CodeExtractor instance where isEligible @@ -119,7 +104,34 @@ template class ArrayRef; /// a code sequence, that sequence is modified, including changing these /// sets, before extraction occurs. These modifications won't have any /// significant impact on the cost however. - void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const; + void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &Allocas) const; + + /// Check if life time marker nodes can be hoisted/sunk into the outline + /// region. + /// + /// Returns true if it is safe to do the code motion. + bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const; + /// Find the set of allocas whose life ranges are contained within the + /// outlined region. + /// + /// Allocas which have life_time markers contained in the outlined region + /// should be pushed to the outlined function. The address bitcasts that + /// are used by the lifetime markers are also candidates for shrink- + /// wrapping. The instructions that need to be sunk are collected in + /// 'Allocas'. + void findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + BasicBlock *&ExitBlock) const; + + /// Find or create a block within the outline region for placing hoisted + /// code. + /// + /// CommonExitBlock is block outside the outline region. It is the common + /// successor of blocks inside the region. If there exists a single block + /// inside the region that is the predecessor of CommonExitBlock, that block + /// will be returned. Otherwise CommonExitBlock will be split and the + /// original block will be added to the outline region. + BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock); private: void severSplitPHINodes(BasicBlock *&Header); diff --git a/include/llvm/Transforms/Utils/EscapeEnumerator.h b/include/llvm/Transforms/Utils/EscapeEnumerator.h index 80d16ed4cf5bffca445c125aec5ce109bc354455..1256dfdaca172d01226c6e59fec6a102dd41fe7b 100644 --- a/include/llvm/Transforms/Utils/EscapeEnumerator.h +++ b/include/llvm/Transforms/Utils/EscapeEnumerator.h @@ -15,8 +15,8 @@ #ifndef LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H #define LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" namespace llvm { diff --git a/include/llvm/Transforms/Utils/FunctionComparator.h b/include/llvm/Transforms/Utils/FunctionComparator.h index ee58d1d138f74c77a86d80b8dd6b4997990815c6..b0f10eafaa95f75358210ca9731036efb804d8d2 100644 --- a/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/include/llvm/Transforms/Utils/FunctionComparator.h @@ -19,8 +19,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" -#include "llvm/IR/ValueMap.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include diff --git a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h index bb7fa523cb1982103fa1256785422979a983eb67..b7a3d130aa11ed8e12918e5d51c3cae20f6218e9 100644 --- a/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h +++ b/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h @@ -14,8 +14,8 @@ #define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include #include diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 4933712fb8adc370d27669957728304a687136e7..8fed292e77a37a28414df4d02ff871a59bde7636 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -15,13 +15,13 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H #define LLVM_TRANSFORMS_UTILS_LOCAL_H +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" -#include "llvm/ADT/SmallPtrSet.h" namespace llvm { @@ -286,9 +286,6 @@ DbgDeclareInst *FindAllocaDbgDeclare(Value *V); /// Finds the llvm.dbg.value intrinsics describing a value. void findDbgValues(SmallVectorImpl &DbgValues, Value *V); -/// Constants for \p replaceDbgDeclare and friends. -enum { NoDeref = false, WithDeref = true }; - /// Replaces llvm.dbg.declare instruction when the address it describes /// is replaced with a new value. If Deref is true, an additional DW_OP_deref is /// prepended to the expression. If Offset is non-zero, a constant displacement @@ -359,6 +356,10 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef Kn /// Unknown metadata is removed. void combineMetadataForCSE(Instruction *K, const Instruction *J); +// Replace each use of 'From' with 'To', if that use does not belong to basic +// block where 'From' is defined. Returns the number of replacements made. +unsigned replaceNonLocalUsesWith(Instruction *From, Value *To); + /// Replace each use of 'From' with 'To' if that use is dominated by /// the given edge. Returns the number of replacements made. unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, @@ -409,6 +410,14 @@ bool recognizeBSwapOrBitReverseIdiom( void maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI, const TargetLibraryInfo *TLI); +//===----------------------------------------------------------------------===// +// Transform predicates +// + +/// Given an instruction, is it legal to set operand OpIdx to a non-constant +/// value? +bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index a1cf41d6f931a087625500e78db486dd6923808f..561f9488062406b383cbd860d617878738f43417 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" @@ -42,6 +43,7 @@ class PredIteratorCache; class ScalarEvolution; class SCEV; class TargetLibraryInfo; +class TargetTransformInfo; /// \brief Captures loop safety information. /// It keep information for loop & its header may throw exception. @@ -489,6 +491,36 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE = nullptr); +/// Generates a vector reduction using shufflevectors to reduce the value. +Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind + MinMaxKind = RecurrenceDescriptor::MRK_Invalid, + ArrayRef RedOps = ArrayRef()); + +/// Create a target reduction of the given vector. The reduction operation +/// is described by the \p Opcode parameter. min/max reductions require +/// additional information supplied in \p Flags. +/// The target is queried to determine if intrinsics or shuffle sequences are +/// required to implement the reduction. +Value * +createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, + unsigned Opcode, Value *Src, + TargetTransformInfo::ReductionFlags Flags = + TargetTransformInfo::ReductionFlags(), + ArrayRef RedOps = ArrayRef()); + +/// Create a generic target reduction using a recurrence descriptor \p Desc +/// The target is queried to determine if intrinsics or shuffle sequences are +/// required to implement the reduction. +Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, + RecurrenceDescriptor &Desc, Value *Src, + bool NoNaN = false); + +/// Get the intersection (logical and) of all of the potential IR flags +/// of each scalar operation (VL) that will be converted into a vector (I). +/// Flag set: NSW, NUW, exact, and all of fast-math. +void propagateIRFlags(Value *I, ArrayRef VL); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H diff --git a/include/llvm/Transforms/Utils/LoopVersioning.h b/include/llvm/Transforms/Utils/LoopVersioning.h index 0d345a972e103c180adc0ec1b79172cca95ab97d..fa5d7845d08083d9da7b95c0c7200aba5b0aaa61 100644 --- a/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/include/llvm/Transforms/Utils/LoopVersioning.h @@ -18,8 +18,8 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { diff --git a/include/llvm/Transforms/Utils/Mem2Reg.h b/include/llvm/Transforms/Utils/Mem2Reg.h index 456876b520b0f3fbe43a2eca910b8cc4bcfe7ae6..1fe186d6c3ad98ae9f3e2117e13a81b823fd7bb5 100644 --- a/include/llvm/Transforms/Utils/Mem2Reg.h +++ b/include/llvm/Transforms/Utils/Mem2Reg.h @@ -25,4 +25,4 @@ public: }; } -#endif // LLVM_TRANSFORMS_UTILS_MEM2REG_H \ No newline at end of file +#endif // LLVM_TRANSFORMS_UTILS_MEM2REG_H diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h index f5e843e2e8b55688e939b1f0667ca68a3c73ca5d..e9793fe4b66668cbaaa556caebf6d63798305762 100644 --- a/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/include/llvm/Transforms/Utils/ModuleUtils.h @@ -84,6 +84,17 @@ void appendToCompilerUsed(Module &M, ArrayRef Values); void filterDeadComdatFunctions( Module &M, SmallVectorImpl &DeadComdatFunctions); +/// \brief Produce a unique identifier for this module by taking the MD5 sum of +/// the names of the module's strong external symbols. +/// +/// This identifier is normally guaranteed to be unique, or the program would +/// fail to link due to multiply defined symbols. +/// +/// If the module has no strong external symbols (such a module may still have a +/// semantic effect if it performs global initialization), we cannot produce a +/// unique identifier for this module, so we return the empty string. +std::string getUniqueModuleId(Module *M); + } // End llvm namespace #endif // LLVM_TRANSFORMS_UTILS_MODULEUTILS_H diff --git a/include/llvm/Transforms/Utils/OrderedInstructions.h b/include/llvm/Transforms/Utils/OrderedInstructions.h new file mode 100644 index 0000000000000000000000000000000000000000..e043ff39a998b0f89526c7709ae7119525cb000f --- /dev/null +++ b/include/llvm/Transforms/Utils/OrderedInstructions.h @@ -0,0 +1,51 @@ +//===- llvm/Transforms/Utils/OrderedInstructions.h -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an efficient way to check for dominance relation between 2 +// instructions. +// +// This interface dispatches to appropriate dominance check given 2 +// instructions, i.e. in case the instructions are in the same basic block, +// OrderedBasicBlock (with instruction numbering and caching) are used. +// Otherwise, dominator tree is used. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H +#define LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Operator.h" + +namespace llvm { + +class OrderedInstructions { + /// Used to check dominance for instructions in same basic block. + mutable DenseMap> + OBBMap; + + /// The dominator tree of the parent function. + DominatorTree *DT; + +public: + /// Constructor. + OrderedInstructions(DominatorTree *DT) : DT(DT) {} + + /// Return true if first instruction dominates the second. + bool dominates(const Instruction *, const Instruction *) const; + + /// Invalidate the OrderedBasicBlock cache when its basic block changes. + void invalidateBlock(BasicBlock *BB) { OBBMap.erase(BB); } +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_ORDEREDINSTRUCTIONS_H diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h index 6cdeeeb60a65b4e974e37b8632bd131d651f509a..8d50aeb10d6ebd26fec1b8f58dffe7a5c3812fc3 100644 --- a/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -46,13 +46,13 @@ public: /// simplifyUsersOfIV - Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead, + LoopInfo *LI, SmallVectorImpl &Dead, IVVisitor *V = nullptr); /// SimplifyLoopIVs - Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead); + LoopInfo *LI, SmallVectorImpl &Dead); } // end namespace llvm diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 665dd6f4b257939eca7b31c73465de2ebbc28f8d..6aba9b2298b100be45d9ec62d258d2ae8db0f21e 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -121,6 +121,7 @@ private: Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); + Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B); // Wrapper for all String/Memory Library Call Optimizations Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); @@ -165,6 +166,9 @@ private: /// hasFloatVersion - Checks if there is a float version of the specified /// function by checking for an existing function with name FuncName + f bool hasFloatVersion(StringRef FuncName); + + /// Shared code to optimize strlen+wcslen. + Value *optimizeStringLength(CallInst *CI, IRBuilder<> &B, unsigned CharSize); }; } // End llvm namespace diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index 950ad92afcd74b213e5905eda12bd465d4c53365..0cc6b34d45934358a956d90507a9c08324c0ab15 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -16,14 +16,14 @@ #define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/IR/ValueMap.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" namespace llvm { class Value; class Instruction; -typedef ValueMap ValueToValueMapTy; +typedef ValueMap ValueToValueMapTy; /// This is a class that can be implemented by clients to remap types when /// cloning constants and instructions. diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index d669a8e5b615c9d5368cc05678e85d07c62fad61..6f258191e89ebab6d88987aa8f21044c6a6662ee 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -24,6 +24,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" @@ -40,8 +41,8 @@ class BoUpSLP; struct SLPVectorizerPass : public PassInfoMixin { typedef SmallVector StoreList; typedef MapVector StoreListMap; - typedef SmallVector WeakVHList; - typedef MapVector WeakVHListMap; + typedef SmallVector WeakTrackingVHList; + typedef MapVector WeakTrackingVHListMap; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -59,7 +60,8 @@ public: // Glue for old PM. bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, - DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_); + DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, + OptimizationRemarkEmitter *ORE_); private: /// \brief Collect store and getelementptr instructions and organize them @@ -82,7 +84,7 @@ private: ArrayRef BuildVector = None, bool AllowReorder = false); - /// \brief Try to vectorize a chain that may start at the operands of \V; + /// \brief Try to vectorize a chain that may start at the operands of \p V. bool tryToVectorize(BinaryOperator *V, slpvectorizer::BoUpSLP &R); /// \brief Vectorize the store instructions collected in Stores. @@ -111,7 +113,7 @@ private: StoreListMap Stores; /// The getelementptr instructions in a basic block organized by base pointer. - WeakVHListMap GEPs; + WeakTrackingVHListMap GEPs; }; } diff --git a/include/llvm/XRay/InstrumentationMap.h b/include/llvm/XRay/InstrumentationMap.h index f7286c52ff42e219d1adf7a2db1c65a39ececb89..0342da0a2f0fa61f97984d198a40c85cf6860b4f 100644 --- a/include/llvm/XRay/InstrumentationMap.h +++ b/include/llvm/XRay/InstrumentationMap.h @@ -59,6 +59,7 @@ struct YAMLXRaySledEntry { yaml::Hex64 Function; SledEntry::FunctionKinds Kind; bool AlwaysInstrument; + std::string FunctionName; }; /// The InstrumentationMap represents the computed function id's and indicated @@ -115,6 +116,7 @@ template <> struct MappingTraits { IO.mapRequired("function", Entry.Function); IO.mapRequired("kind", Entry.Kind); IO.mapRequired("always-instrument", Entry.AlwaysInstrument); + IO.mapOptional("function-name", Entry.FunctionName); } static constexpr bool flow = true; diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index 59b1f1621039f91ecdf2b82afc3ec2ee7ba0025c..d906b05f7aaa100c83db5b0526a2225583cabd06 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -38,6 +38,31 @@ module LLVM_Backend { } module LLVM_Bitcode { requires cplusplus umbrella "Bitcode" module * { export * } } + +module LLVM_BinaryFormat { + requires cplusplus + umbrella "BinaryFormat" module * { export * } + textual header "BinaryFormat/Dwarf.def" + textual header "BinaryFormat/MachO.def" + textual header "BinaryFormat/ELFRelocs/AArch64.def" + textual header "BinaryFormat/ELFRelocs/AMDGPU.def" + textual header "BinaryFormat/ELFRelocs/ARM.def" + textual header "BinaryFormat/ELFRelocs/AVR.def" + textual header "BinaryFormat/ELFRelocs/BPF.def" + textual header "BinaryFormat/ELFRelocs/Hexagon.def" + textual header "BinaryFormat/ELFRelocs/i386.def" + textual header "BinaryFormat/ELFRelocs/Lanai.def" + textual header "BinaryFormat/ELFRelocs/Mips.def" + textual header "BinaryFormat/ELFRelocs/PowerPC64.def" + textual header "BinaryFormat/ELFRelocs/PowerPC.def" + textual header "BinaryFormat/ELFRelocs/RISCV.def" + textual header "BinaryFormat/ELFRelocs/Sparc.def" + textual header "BinaryFormat/ELFRelocs/SystemZ.def" + textual header "BinaryFormat/ELFRelocs/x86_64.def" + textual header "BinaryFormat/ELFRelocs/WebAssembly.def" + textual header "BinaryFormat/WasmRelocs/WebAssembly.def" +} + module LLVM_Config { requires cplusplus umbrella "Config" module * { export * } } module LLVM_DebugInfo { @@ -95,8 +120,8 @@ module LLVM_DebugInfo_CodeView { module * { export * } // These are intended for (repeated) textual inclusion. - textual header "DebugInfo/CodeView/TypeRecords.def" - textual header "DebugInfo/CodeView/CVSymbolTypes.def" + textual header "DebugInfo/CodeView/CodeViewTypes.def" + textual header "DebugInfo/CodeView/CodeViewSymbols.def" } module LLVM_ExecutionEngine { @@ -148,6 +173,7 @@ module LLVM_intrinsic_gen { module IR_Attributes { header "IR/Attributes.h" export * } module IR_CallSite { header "IR/CallSite.h" export * } module IR_ConstantFolder { header "IR/ConstantFolder.h" export * } + module IR_GlobalVariable { header "IR/GlobalVariable.h" export * } module IR_NoFolder { header "IR/NoFolder.h" export * } module IR_Module { header "IR/Module.h" export * } module IR_ModuleSummaryIndex { header "IR/ModuleSummaryIndex.h" export * } @@ -258,25 +284,6 @@ module LLVM_Utils { // These are intended for textual inclusion. textual header "Support/ARMTargetParser.def" textual header "Support/AArch64TargetParser.def" - textual header "Support/Dwarf.def" - textual header "Support/MachO.def" - textual header "Support/ELFRelocs/AArch64.def" - textual header "Support/ELFRelocs/AMDGPU.def" - textual header "Support/ELFRelocs/ARM.def" - textual header "Support/ELFRelocs/AVR.def" - textual header "Support/ELFRelocs/BPF.def" - textual header "Support/ELFRelocs/Hexagon.def" - textual header "Support/ELFRelocs/i386.def" - textual header "Support/ELFRelocs/Lanai.def" - textual header "Support/ELFRelocs/Mips.def" - textual header "Support/ELFRelocs/PowerPC64.def" - textual header "Support/ELFRelocs/PowerPC.def" - textual header "Support/ELFRelocs/RISCV.def" - textual header "Support/ELFRelocs/Sparc.def" - textual header "Support/ELFRelocs/SystemZ.def" - textual header "Support/ELFRelocs/x86_64.def" - textual header "Support/ELFRelocs/WebAssembly.def" - textual header "Support/WasmRelocs/WebAssembly.def" } // This part of the module is usable from both C and C++ code. diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 4c6423d5c17dd770ec53284328e3159cec381580..4c29aeaa622f9ebd0e5f10fb38404830bf1ea45d 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -697,7 +697,7 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, bool llvm::isNoAliasCall(const Value *V) { if (auto CS = ImmutableCallSite(V)) - return CS.paramHasAttr(0, Attribute::NoAlias); + return CS.hasRetAttr(Attribute::NoAlias); return false; } diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 4d6a6c9a30aa9d64f112842d8a02ab9209e4712a..435c782d97a57d84c8ae83ab0a0382d018ecab6f 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -14,9 +14,9 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 16b711a69ec390f539a06ab90fef19c6450eae5e..ee17ad3ba58635483340d3143198e5e2f6a13d4a 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -17,8 +17,8 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index 1fae947244878daed5da51cf799f2144c2d601df..0468c794e81dd368a24dec0345ceba328b743ef7 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -29,15 +29,16 @@ static cl::opt cl::desc("Enable verification of assumption cache"), cl::init(false)); -SmallVector &AssumptionCache::getOrInsertAffectedValues(Value *V) { +SmallVector & +AssumptionCache::getOrInsertAffectedValues(Value *V) { // Try using find_as first to avoid creating extra value handles just for the // purpose of doing the lookup. auto AVI = AffectedValues.find_as(V); if (AVI != AffectedValues.end()) return AVI->second; - auto AVIP = AffectedValues.insert({ - AffectedValueCallbackVH(V, this), SmallVector()}); + auto AVIP = AffectedValues.insert( + {AffectedValueCallbackVH(V, this), SmallVector()}); return AVIP.first->second; } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 0fa884ae30ab5b61bb3883449dc6f16e4f069dd7..dbb1b01b94ac28883efb5802db446f1a2683a465 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -17,13 +17,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -36,6 +36,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include #define DEBUG_TYPE "basicaa" @@ -637,7 +638,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) { /// Returns true if this is a writeonly (i.e Mod only) parameter. static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx, const TargetLibraryInfo &TLI) { - if (CS.paramHasAttr(ArgIdx + 1, Attribute::WriteOnly)) + if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly)) return true; // We can bound the aliasing properties of memset_pattern16 just as we can @@ -666,10 +667,10 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, if (isWriteOnlyParam(CS, ArgIdx, TLI)) return MRI_Mod; - if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly)) + if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly)) return MRI_Ref; - if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone)) + if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone)) return MRI_NoModRef; return AAResultBase::getArgModRefInfo(CS, ArgIdx); @@ -682,8 +683,11 @@ static bool isIntrinsicCall(ImmutableCallSite CS, Intrinsic::ID IID) { #ifndef NDEBUG static const Function *getParent(const Value *V) { - if (const Instruction *inst = dyn_cast(V)) + if (const Instruction *inst = dyn_cast(V)) { + if (!inst->getParent()) + return nullptr; return inst->getParent()->getParent(); + } if (const Argument *arg = dyn_cast(V)) return arg->getParent(); @@ -808,7 +812,7 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, // well. Or alternatively, replace all of this with inaccessiblememonly once // that's implemented fully. auto *Inst = CS.getInstruction(); - if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI)) { + if (isMallocOrCallocLikeFn(Inst, &TLI)) { // Be conservative if the accessed pointer may alias the allocation - // fallback to the generic handling below. if (getBestAAResults().alias(MemoryLocation(Inst), Loc) == NoAlias) @@ -924,10 +928,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, uint64_t V2Size, const DataLayout &DL) { - assert(GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && - GEP1->getPointerOperand()->getType() == - GEP2->getPointerOperand()->getType() && + assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == + GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && + GEP1->getPointerOperandType() == GEP2->getPointerOperandType() && "Expected GEPs with the same pointer operand"); // Try to determine whether GEP1 and GEP2 index through arrays, into structs, @@ -1008,10 +1011,24 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // equal each other so we can exit early. if (C1 && C2) return NoAlias; - if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1), - GEP2->getOperand(GEP2->getNumOperands() - 1), - DL)) - return NoAlias; + { + Value *GEP1LastIdx = GEP1->getOperand(GEP1->getNumOperands() - 1); + Value *GEP2LastIdx = GEP2->getOperand(GEP2->getNumOperands() - 1); + if (isa(GEP1LastIdx) || isa(GEP2LastIdx)) { + // If one of the indices is a PHI node, be safe and only use + // computeKnownBits so we don't make any assumptions about the + // relationships between the two indices. This is important if we're + // asking about values from different loop iterations. See PR32314. + // TODO: We may be able to change the check so we only do this when + // we definitely looked through a PHINode. + KnownBits Known1 = computeKnownBits(GEP1LastIdx, DL); + KnownBits Known2 = computeKnownBits(GEP2LastIdx, DL); + if (Known1.Zero.intersects(Known2.One) || + Known1.One.intersects(Known2.Zero)) + return NoAlias; + } else if (isKnownNonEqual(GEP1LastIdx, GEP2LastIdx, DL)) + return NoAlias; + } return MayAlias; } else if (!LastIndexedStruct || !C1 || !C2) { return MayAlias; @@ -1185,10 +1202,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // If we know the two GEPs are based off of the exact same pointer (and not // just the same underlying object), see if that tells us anything about // the resulting pointers. - if (GEP1->getPointerOperand()->stripPointerCasts() == - GEP2->getPointerOperand()->stripPointerCasts() && - GEP1->getPointerOperand()->getType() == - GEP2->getPointerOperand()->getType()) { + if (GEP1->getPointerOperand()->stripPointerCastsAndBarriers() == + GEP2->getPointerOperand()->stripPointerCastsAndBarriers() && + GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) { AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL); // If we couldn't find anything interesting, don't abandon just yet. if (R != MayAlias) @@ -1285,9 +1301,9 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // give up if we can't determine conditions that hold for every cycle: const Value *V = DecompGEP1.VarIndices[i].V; - bool SignKnownZero, SignKnownOne; - ComputeSignBit(const_cast(V), SignKnownZero, SignKnownOne, DL, - 0, &AC, nullptr, DT); + KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, DT); + bool SignKnownZero = Known.isNonNegative(); + bool SignKnownOne = Known.isNegative(); // Zero-extension widens the variable, and so forces the sign // bit to zero. @@ -1502,8 +1518,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, return NoAlias; // Strip off any casts if they exist. - V1 = V1->stripPointerCasts(); - V2 = V2->stripPointerCasts(); + V1 = V1->stripPointerCastsAndBarriers(); + V2 = V2->stripPointerCastsAndBarriers(); // If V1 or V2 is undef, the result is NoAlias because we can always pick a // value for undef that aliases nothing in the program. diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 5935dec15c7019aa07fb3aeca1610bc99c111236..23d5a887c34af7aed76633b77c1fb9bf23284696 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -30,6 +31,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", "Branch Probability Analysis", false, true) @@ -58,19 +60,12 @@ char BranchProbabilityInfoWrapperPass::ID = 0; static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; -/// \brief Unreachable-terminating branch taken weight. +/// \brief Unreachable-terminating branch taken probability. /// -/// This is the weight for a branch being taken to a block that terminates +/// This is the probability for a branch being taken to a block that terminates /// (eventually) in unreachable. These are predicted as unlikely as possible. -static const uint32_t UR_TAKEN_WEIGHT = 1; - -/// \brief Unreachable-terminating branch not-taken weight. -/// -/// This is the weight for a branch not being taken toward a block that -/// terminates (eventually) in unreachable. Such a branch is essentially never -/// taken. Set the weight to an absurdly high value so that nested loops don't -/// easily subsume it. -static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; +/// All reachable probability will equally share the remaining part. +static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); /// \brief Weight for a branch taken going into a cold block. /// @@ -179,7 +174,11 @@ BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { /// unreachable-terminated block as extremely unlikely. bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) + assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); + + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + if (isa(TI)) return false; SmallVector UnreachableEdges; @@ -191,14 +190,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { else ReachableEdges.push_back(I.getSuccessorIndex()); - // Skip probabilities if this block has a single successor or if all were - // reachable. - if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) - return false; - - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). - if (isa(TI)) + // Skip probabilities if all were reachable. + if (UnreachableEdges.empty()) return false; if (ReachableEdges.empty()) { @@ -208,12 +201,10 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { return true; } - auto UnreachableProb = BranchProbability::getBranchProbability( - UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * - uint64_t(UnreachableEdges.size())); - auto ReachableProb = BranchProbability::getBranchProbability( - UR_NONTAKEN_WEIGHT, - (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size())); + auto UnreachableProb = UR_TAKEN_PROB; + auto ReachableProb = + (BranchProbability::getOne() - UR_TAKEN_PROB * UnreachableEdges.size()) / + ReachableEdges.size(); for (unsigned SuccIdx : UnreachableEdges) setEdgeProbability(BB, SuccIdx, UnreachableProb); @@ -224,11 +215,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) { } // Propagate existing explicit probabilities from either profile data or -// 'expect' intrinsic processing. +// 'expect' intrinsic processing. Examine metadata against unreachable +// heuristic. The probability of the edge coming to unreachable block is +// set to min of metadata and unreachable heuristic. bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 1) - return false; + assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); if (!isa(TI) && !isa(TI)) return false; @@ -249,6 +241,8 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { // be scaled to fit in 32 bits. uint64_t WeightSum = 0; SmallVector Weights; + SmallVector UnreachableIdxs; + SmallVector ReachableIdxs; Weights.reserve(TI->getNumSuccessors()); for (unsigned i = 1, e = WeightsNode->getNumOperands(); i != e; ++i) { ConstantInt *Weight = @@ -259,6 +253,10 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { "Too many bits for uint32_t"); Weights.push_back(Weight->getZExtValue()); WeightSum += Weights.back(); + if (PostDominatedByUnreachable.count(TI->getSuccessor(i - 1))) + UnreachableIdxs.push_back(i - 1); + else + ReachableIdxs.push_back(i - 1); } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); @@ -267,22 +265,49 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { uint64_t ScalingFactor = (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; - WeightSum = 0; - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - Weights[i] /= ScalingFactor; - WeightSum += Weights[i]; + if (ScalingFactor > 1) { + WeightSum = 0; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + Weights[i] /= ScalingFactor; + WeightSum += Weights[i]; + } } + assert(WeightSum <= UINT32_MAX && + "Expected weights to scale down to 32 bits"); - if (WeightSum == 0) { - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeProbability(BB, i, {1, e}); - } else { + if (WeightSum == 0 || ReachableIdxs.size() == 0) { for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - setEdgeProbability(BB, i, {Weights[i], static_cast(WeightSum)}); + Weights[i] = 1; + WeightSum = TI->getNumSuccessors(); } - assert(WeightSum <= UINT32_MAX && - "Expected weights to scale down to 32 bits"); + // Set the probability. + SmallVector BP; + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + BP.push_back({ Weights[i], static_cast(WeightSum) }); + + // Examine the metadata against unreachable heuristic. + // If the unreachable heuristic is more strong then we use it for this edge. + if (UnreachableIdxs.size() > 0 && ReachableIdxs.size() > 0) { + auto ToDistribute = BranchProbability::getZero(); + auto UnreachableProb = UR_TAKEN_PROB; + for (auto i : UnreachableIdxs) + if (UnreachableProb < BP[i]) { + ToDistribute += BP[i] - UnreachableProb; + BP[i] = UnreachableProb; + } + + // If we modified the probability of some edges then we must distribute + // the difference between reachable blocks. + if (ToDistribute > BranchProbability::getZero()) { + BranchProbability PerEdge = ToDistribute / ReachableIdxs.size(); + for (auto i : ReachableIdxs) + BP[i] += PerEdge; + } + } + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + setEdgeProbability(BB, i, BP[i]); return true; } @@ -297,7 +322,11 @@ bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { /// Return false, otherwise. bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { const TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) + assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); + + // Return false here so that edge weights for InvokeInst could be decided + // in calcInvokeHeuristics(). + if (isa(TI)) return false; // Determine which successors are post-dominated by a cold block. @@ -309,13 +338,8 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) { else NormalEdges.push_back(I.getSuccessorIndex()); - // Return false here so that edge weights for InvokeInst could be decided - // in calcInvokeHeuristics(). - if (isa(TI)) - return false; - - // Skip probabilities if this block has a single successor. - if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) + // Skip probabilities if no cold edges. + if (ColdEdges.empty()) return false; if (NormalEdges.empty()) { @@ -435,7 +459,8 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(const BasicBlock *BB, return true; } -bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { +bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, + const TargetLibraryInfo *TLI) { const BranchInst *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -458,8 +483,37 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB) { if (AndRHS->getUniqueInteger().isPowerOf2()) return false; + // Check if the LHS is the return value of a library function + LibFunc Func = NumLibFuncs; + if (TLI) + if (CallInst *Call = dyn_cast(CI->getOperand(0))) + if (Function *CalledFn = Call->getCalledFunction()) + TLI->getLibFunc(*CalledFn, Func); + bool isProb; - if (CV->isZero()) { + if (Func == LibFunc_strcasecmp || + Func == LibFunc_strcmp || + Func == LibFunc_strncasecmp || + Func == LibFunc_strncmp || + Func == LibFunc_memcmp) { + // strcmp and similar functions return zero, negative, or positive, if the + // first string is equal, less, or greater than the second. We consider it + // likely that the strings are not equal, so a comparison with zero is + // probably false, but also a comparison with any other number is also + // probably false given that what exactly is returned for nonzero values is + // not specified. Any kind of comparison other than equality we know + // nothing about. + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + isProb = false; + break; + case CmpInst::ICMP_NE: + isProb = true; + break; + default: + return false; + } + } else if (CV->isZero()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == 0 -> Unlikely @@ -685,7 +739,8 @@ void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { } } -void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { +void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, + const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() << " ----\n\n"); LastF = &F; // Store the last function we ran on for printing. @@ -698,17 +753,20 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); updatePostDominatedByUnreachable(BB); updatePostDominatedByColdCall(BB); - if (calcUnreachableHeuristics(BB)) + // If there is no at least two successors, no sense to set probability. + if (BB->getTerminator()->getNumSuccessors() < 2) continue; if (calcMetadataWeights(BB)) continue; + if (calcUnreachableHeuristics(BB)) + continue; if (calcColdCallHeuristics(BB)) continue; if (calcLoopBranchHeuristics(BB, LI)) continue; if (calcPointerHeuristics(BB)) continue; - if (calcZeroHeuristics(BB)) + if (calcZeroHeuristics(BB, TLI)) continue; if (calcFloatingPointHeuristics(BB)) continue; @@ -722,12 +780,14 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) { void BranchProbabilityInfoWrapperPass::getAnalysisUsage( AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { const LoopInfo &LI = getAnalysis().getLoopInfo(); - BPI.calculate(F, LI); + const TargetLibraryInfo &TLI = getAnalysis().getTLI(); + BPI.calculate(F, LI, &TLI); return false; } @@ -742,7 +802,7 @@ AnalysisKey BranchProbabilityAnalysis::Key; BranchProbabilityInfo BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { BranchProbabilityInfo BPI; - BPI.calculate(F, AM.getResult(F)); + BPI.calculate(F, AM.getResult(F), &AM.getResult(F)); return BPI; } diff --git a/lib/Analysis/CFLGraph.h b/lib/Analysis/CFLGraph.h index e526e0e16aa7bb232ea21ca329b19ee630672e1f..95874b88244b179e359d0e26b5adad98810488ff 100644 --- a/lib/Analysis/CFLGraph.h +++ b/lib/Analysis/CFLGraph.h @@ -16,7 +16,6 @@ #define LLVM_ANALYSIS_CFLGRAPH_H #include "AliasAnalysisSummary.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" @@ -210,6 +209,11 @@ template class CFLGraphBuilder { void addDerefEdge(Value *From, Value *To, bool IsRead) { assert(From != nullptr && To != nullptr); + // FIXME: This is subtly broken, due to how we model some instructions + // (e.g. extractvalue, extractelement) as loads. Since those take + // non-pointer operands, we'll entirely skip adding edges for those. + // + // addAssignEdge seems to have a similar issue with insertvalue, etc. if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy()) return; addNode(From); @@ -400,8 +404,7 @@ template class CFLGraphBuilder { // TODO: address other common library functions such as realloc(), // strdup(), // etc. - if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) || - isFreeCall(Inst, &TLI)) + if (isMallocOrCallocLikeFn(Inst, &TLI) || isFreeCall(Inst, &TLI)) return; // TODO: Add support for noalias args/all the other fun function @@ -430,7 +433,7 @@ template class CFLGraphBuilder { if (Inst->getType()->isPointerTy()) { auto *Fn = CS.getCalledFunction(); - if (Fn == nullptr || !Fn->doesNotAlias(0)) + if (Fn == nullptr || !Fn->returnDoesNotAlias()) // No need to call addNode() since we've added Inst at the // beginning of this function and we know it is not a global. Graph.addAttr(InstantiatedValue{Inst, 0}, getAttrUnknown()); @@ -541,6 +544,7 @@ template class CFLGraphBuilder { case Instruction::ExtractValue: { auto *Ptr = CE->getOperand(0); addLoadEdge(Ptr, CE); + break; } case Instruction::ShuffleVector: { auto *From1 = CE->getOperand(0); diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp index 6942176ae6ae1f73919cb642c9af13b5eae84f43..ff5242f69a1b12294327361d531e37d5cd12d822 100644 --- a/lib/Analysis/CallGraph.cpp +++ b/lib/Analysis/CallGraph.cpp @@ -21,23 +21,18 @@ using namespace llvm; // CallGraph::CallGraph(Module &M) - : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), + : M(M), ExternalCallingNode(getOrInsertFunction(nullptr)), CallsExternalNode(llvm::make_unique(nullptr)) { // Add every function to the call graph. for (Function &F : M) addToCallGraph(&F); - - // If we didn't find a main function, use the external call graph node - if (!Root) - Root = ExternalCallingNode; } CallGraph::CallGraph(CallGraph &&Arg) - : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root), + : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), ExternalCallingNode(Arg.ExternalCallingNode), CallsExternalNode(std::move(Arg.CallsExternalNode)) { Arg.FunctionMap.clear(); - Arg.Root = nullptr; Arg.ExternalCallingNode = nullptr; } @@ -57,21 +52,9 @@ CallGraph::~CallGraph() { void CallGraph::addToCallGraph(Function *F) { CallGraphNode *Node = getOrInsertFunction(F); - // If this function has external linkage, anything could call it. - if (!F->hasLocalLinkage()) { - ExternalCallingNode->addCalledFunction(CallSite(), Node); - - // Found the entry point? - if (F->getName() == "main") { - if (Root) // Found multiple external mains? Don't pick one. - Root = ExternalCallingNode; - else - Root = Node; // Found a main, keep track of it! - } - } - - // If this function has its address taken, anything could call it. - if (F->hasAddressTaken()) + // If this function has external linkage or has its address taken, anything + // could call it. + if (!F->hasLocalLinkage() || F->hasAddressTaken()) ExternalCallingNode->addCalledFunction(CallSite(), Node); // If this function is not defined in this translation unit, it could call @@ -96,13 +79,6 @@ void CallGraph::addToCallGraph(Function *F) { } void CallGraph::print(raw_ostream &OS) const { - OS << "CallGraph Root is: "; - if (Function *F = Root->getFunction()) - OS << F->getName() << "\n"; - else { - OS << "<>\n"; - } - // Print in a deterministic order by sorting CallGraphNodes by name. We do // this here to avoid slowing down the non-printing fast path. diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp index ea70f5752c613f3fc959296365e06d028020cabf..facda246936dcf8e34d7e94007f539d73c5a141b 100644 --- a/lib/Analysis/CallGraphSCCPass.cpp +++ b/lib/Analysis/CallGraphSCCPass.cpp @@ -204,7 +204,7 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, // Get the set of call sites currently in the function. for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call - // entirely and the WeakVH nulled it out. + // entirely and the WeakTrackingVH nulled it out. if (!I->first || // If we've already seen this call site, then the FunctionPass RAUW'd // one call with another, which resulted in two "uses" in the edge @@ -347,7 +347,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG, DevirtualizedCall = true; // After scanning this function, if we still have entries in callsites, then - // they are dangling pointers. WeakVH should save us for this, so abort if + // they are dangling pointers. WeakTrackingVH should save us for this, so + // abort if // this happens. assert(CallSites.empty() && "Dangling pointers found in call sites map"); @@ -476,10 +477,8 @@ bool CGPassManager::runOnModule(Module &M) { if (DevirtualizedCall) DEBUG(dbgs() << " CGSCCPASSMGR: Stopped iteration after " << Iteration << " times, due to -max-cg-scc-iterations\n"); - - if (Iteration > MaxSCCIterations) - MaxSCCIterations = Iteration; - + + MaxSCCIterations.updateMax(Iteration); } Changed |= doFinalization(CG); return Changed; @@ -609,18 +608,18 @@ namespace { } bool runOnSCC(CallGraphSCC &SCC) override { + bool BannerPrinted = false; auto PrintBannerOnce = [&] () { - static bool BannerPrinted = false; if (BannerPrinted) return; Out << Banner; BannerPrinted = true; }; for (CallGraphNode *CGN : SCC) { - if (CGN->getFunction()) { - if (isFunctionInPrintList(CGN->getFunction()->getName())) { + if (Function *F = CGN->getFunction()) { + if (!F->isDeclaration() && isFunctionInPrintList(F->getName())) { PrintBannerOnce(); - CGN->getFunction()->print(Out); + F->print(Out); } } else if (llvm::isFunctionInPrintList("*")) { PrintBannerOnce(); diff --git a/lib/Analysis/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp index af942e9ed3e975f77479feff11c9b7a0c2ef1239..e7017e77652af7c176eb0143b0056576badac36d 100644 --- a/lib/Analysis/CallPrinter.cpp +++ b/lib/Analysis/CallPrinter.cpp @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" using namespace llvm; diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 9862c3c9c270aaeceef63d347b4b67ac05afe4d2..2093f0fdec1236f6ddd7312316c836540c7da147 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index bdffdd8eb270b1e9f4787ae57f545b345c6869a3..e4d9292db92d71040fed9aafa3107c8c626d4738 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 14176dac2104c5a1d242ea1d2997b432d6a527d2..0f5ec3f5626ef0301c6dde6e095fa97a52180e07 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -22,8 +22,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" @@ -42,6 +42,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include #include @@ -686,25 +687,21 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, // bits. if (Opc == Instruction::And) { - unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType()); - APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); - APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); - computeKnownBits(Op0, KnownZero0, KnownOne0, DL); - computeKnownBits(Op1, KnownZero1, KnownOne1, DL); - if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + KnownBits Known0 = computeKnownBits(Op0, DL); + KnownBits Known1 = computeKnownBits(Op1, DL); + if ((Known1.One | Known0.Zero).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; } - if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + if ((Known0.One | Known1.Zero).isAllOnesValue()) { // All the bits of Op1 that the 'and' could be masking are already zero. return Op1; } - APInt KnownZero = KnownZero0 | KnownZero1; - APInt KnownOne = KnownOne0 & KnownOne1; - if ((KnownZero | KnownOne).isAllOnesValue()) { - return ConstantInt::get(Op0->getType(), KnownOne); - } + Known0.Zero |= Known1.Zero; + Known0.One &= Known1.One; + if (Known0.isConstant()) + return ConstantInt::get(Op0->getType(), Known0.getConstant()); } // If the constant expr is something like &A[123] - &A[4].f, fold this into a @@ -1018,9 +1015,11 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, case Instruction::ICmp: case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: - if (auto *F = dyn_cast(Ops.back())) - if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); + if (auto *F = dyn_cast(Ops.back())) { + ImmutableCallSite CS(cast(InstOrCE)); + if (canConstantFoldCallTo(CS, F)) + return ConstantFoldCall(CS, F, Ops.slice(0, Ops.size() - 1), TLI); + } return nullptr; case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); @@ -1173,7 +1172,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, const DataLayout &DL, const TargetLibraryInfo *TLI) { // fold: icmp (inttoptr x), null -> icmp x, 0 + // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null + // fold: icmp 0, (ptrtoint x) -> icmp null, x // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y // @@ -1243,6 +1244,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or; return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL); } + } else if (isa(Ops1)) { + // If RHS is a constant expression, but the left side isn't, swap the + // operands and try again. + Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate); + return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } return ConstantExpr::getCompare(Predicate, Ops0, Ops1); @@ -1352,7 +1358,9 @@ llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // Constant Folding for Calls // -bool llvm::canConstantFoldCallTo(const Function *F) { +bool llvm::canConstantFoldCallTo(ImmutableCallSite CS, const Function *F) { + if (CS.isNoBuiltin()) + return false; switch (F->getIntrinsicID()) { case Intrinsic::fabs: case Intrinsic::minnum: @@ -1438,6 +1446,36 @@ bool llvm::canConstantFoldCallTo(const Function *F) { Name == "sinf" || Name == "sinhf" || Name == "sqrtf"; case 't': return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf"; + case '_': + + // Check for various function names that get used for the math functions + // when the header files are preprocessed with the macro + // __FINITE_MATH_ONLY__ enabled. + // The '12' here is the length of the shortest name that can match. + // We need to check the size before looking at Name[1] and Name[2] + // so we may as well check a limit that will eliminate mismatches. + if (Name.size() < 12 || Name[1] != '_') + return false; + switch (Name[2]) { + default: + return false; + case 'a': + return Name == "__acos_finite" || Name == "__acosf_finite" || + Name == "__asin_finite" || Name == "__asinf_finite" || + Name == "__atan2_finite" || Name == "__atan2f_finite"; + case 'c': + return Name == "__cosh_finite" || Name == "__coshf_finite"; + case 'e': + return Name == "__exp_finite" || Name == "__expf_finite" || + Name == "__exp2_finite" || Name == "__exp2f_finite"; + case 'l': + return Name == "__log_finite" || Name == "__logf_finite" || + Name == "__log10_finite" || Name == "__log10f_finite"; + case 'p': + return Name == "__pow_finite" || Name == "__powf_finite"; + case 's': + return Name == "__sinh_finite" || Name == "__sinhf_finite"; + } } } @@ -1550,6 +1588,9 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN if (IntrinsicID == Intrinsic::cos) return Constant::getNullValue(Ty); + if (IntrinsicID == Intrinsic::bswap || + IntrinsicID == Intrinsic::bitreverse) + return Operands[0]; } if (auto *Op = dyn_cast(Operands[0])) { if (IntrinsicID == Intrinsic::convert_to_fp16) { @@ -1637,13 +1678,21 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, if (!TLI) return nullptr; - switch (Name[0]) { + char NameKeyChar = Name[0]; + if (Name[0] == '_' && Name.size() > 2 && Name[1] == '_') + NameKeyChar = Name[2]; + + switch (NameKeyChar) { case 'a': if ((Name == "acos" && TLI->has(LibFunc_acos)) || - (Name == "acosf" && TLI->has(LibFunc_acosf))) + (Name == "acosf" && TLI->has(LibFunc_acosf)) || + (Name == "__acos_finite" && TLI->has(LibFunc_acos_finite)) || + (Name == "__acosf_finite" && TLI->has(LibFunc_acosf_finite))) return ConstantFoldFP(acos, V, Ty); else if ((Name == "asin" && TLI->has(LibFunc_asin)) || - (Name == "asinf" && TLI->has(LibFunc_asinf))) + (Name == "asinf" && TLI->has(LibFunc_asinf)) || + (Name == "__asin_finite" && TLI->has(LibFunc_asin_finite)) || + (Name == "__asinf_finite" && TLI->has(LibFunc_asinf_finite))) return ConstantFoldFP(asin, V, Ty); else if ((Name == "atan" && TLI->has(LibFunc_atan)) || (Name == "atanf" && TLI->has(LibFunc_atanf))) @@ -1657,15 +1706,21 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, (Name == "cosf" && TLI->has(LibFunc_cosf))) return ConstantFoldFP(cos, V, Ty); else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) || - (Name == "coshf" && TLI->has(LibFunc_coshf))) + (Name == "coshf" && TLI->has(LibFunc_coshf)) || + (Name == "__cosh_finite" && TLI->has(LibFunc_cosh_finite)) || + (Name == "__coshf_finite" && TLI->has(LibFunc_coshf_finite))) return ConstantFoldFP(cosh, V, Ty); break; case 'e': if ((Name == "exp" && TLI->has(LibFunc_exp)) || - (Name == "expf" && TLI->has(LibFunc_expf))) + (Name == "expf" && TLI->has(LibFunc_expf)) || + (Name == "__exp_finite" && TLI->has(LibFunc_exp_finite)) || + (Name == "__expf_finite" && TLI->has(LibFunc_expf_finite))) return ConstantFoldFP(exp, V, Ty); if ((Name == "exp2" && TLI->has(LibFunc_exp2)) || - (Name == "exp2f" && TLI->has(LibFunc_exp2f))) + (Name == "exp2f" && TLI->has(LibFunc_exp2f)) || + (Name == "__exp2_finite" && TLI->has(LibFunc_exp2_finite)) || + (Name == "__exp2f_finite" && TLI->has(LibFunc_exp2f_finite))) // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a // C99 library. return ConstantFoldBinaryFP(pow, 2.0, V, Ty); @@ -1680,22 +1735,33 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, break; case 'l': if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) || - (Name == "logf" && V > 0 && TLI->has(LibFunc_logf))) + (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)) || + (Name == "__log_finite" && V > 0 && + TLI->has(LibFunc_log_finite)) || + (Name == "__logf_finite" && V > 0 && + TLI->has(LibFunc_logf_finite))) return ConstantFoldFP(log, V, Ty); else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) || - (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f))) + (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)) || + (Name == "__log10_finite" && V > 0 && + TLI->has(LibFunc_log10_finite)) || + (Name == "__log10f_finite" && V > 0 && + TLI->has(LibFunc_log10f_finite))) return ConstantFoldFP(log10, V, Ty); break; case 'r': if ((Name == "round" && TLI->has(LibFunc_round)) || (Name == "roundf" && TLI->has(LibFunc_roundf))) return ConstantFoldFP(round, V, Ty); + break; case 's': if ((Name == "sin" && TLI->has(LibFunc_sin)) || (Name == "sinf" && TLI->has(LibFunc_sinf))) return ConstantFoldFP(sin, V, Ty); else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) || - (Name == "sinhf" && TLI->has(LibFunc_sinhf))) + (Name == "sinhf" && TLI->has(LibFunc_sinhf)) || + (Name == "__sinh_finite" && TLI->has(LibFunc_sinh_finite)) || + (Name == "__sinhf_finite" && TLI->has(LibFunc_sinhf_finite))) return ConstantFoldFP(sinh, V, Ty); else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) || (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf))) @@ -1756,6 +1822,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/false, Ty); + break; case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: @@ -1764,16 +1831,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty); + break; } } - if (isa(Operands[0])) { - if (IntrinsicID == Intrinsic::bswap || - IntrinsicID == Intrinsic::bitreverse) - return Operands[0]; - return nullptr; - } - return nullptr; } @@ -1813,13 +1874,17 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, if (!TLI) return nullptr; if ((Name == "pow" && TLI->has(LibFunc_pow)) || - (Name == "powf" && TLI->has(LibFunc_powf))) + (Name == "powf" && TLI->has(LibFunc_powf)) || + (Name == "__pow_finite" && TLI->has(LibFunc_pow_finite)) || + (Name == "__powf_finite" && TLI->has(LibFunc_powf_finite))) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); if ((Name == "fmod" && TLI->has(LibFunc_fmod)) || (Name == "fmodf" && TLI->has(LibFunc_fmodf))) return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); if ((Name == "atan2" && TLI->has(LibFunc_atan2)) || - (Name == "atan2f" && TLI->has(LibFunc_atan2f))) + (Name == "atan2f" && TLI->has(LibFunc_atan2f)) || + (Name == "__atan2_finite" && TLI->has(LibFunc_atan2_finite)) || + (Name == "__atan2f_finite" && TLI->has(LibFunc_atan2f_finite))) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (auto *Op2C = dyn_cast(Operands[1])) { if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) @@ -1970,6 +2035,14 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { + // These intrinsics use a scalar type for their second argument. + if (J == 1 && + (IntrinsicID == Intrinsic::cttz || IntrinsicID == Intrinsic::ctlz || + IntrinsicID == Intrinsic::powi)) { + Lane[J] = Operands[J]; + continue; + } + Constant *Agg = Operands[J]->getAggregateElement(I); if (!Agg) return nullptr; @@ -1990,8 +2063,11 @@ Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, } // end anonymous namespace Constant * -llvm::ConstantFoldCall(Function *F, ArrayRef Operands, +llvm::ConstantFoldCall(ImmutableCallSite CS, Function *F, + ArrayRef Operands, const TargetLibraryInfo *TLI) { + if (CS.isNoBuiltin()) + return nullptr; if (!F->hasName()) return nullptr; StringRef Name = F->getName(); @@ -2008,6 +2084,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) { // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap // (and to some extent ConstantFoldScalarCall). + if (CS.isNoBuiltin()) + return false; Function *F = CS.getCalledFunction(); if (!F) return false; diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 688c1db534c1e3a50dad7afa87bd2d768fb22296..8f808f3e78719b34bc157574ec05339d11ae7900 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -72,8 +73,7 @@ static bool isAlwaysLive(Instruction *I) { void DemandedBits::determineLiveOperandBits( const Instruction *UserI, const Instruction *I, unsigned OperandNo, - const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2) { + const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) { unsigned BitWidth = AB.getBitWidth(); // We're called once per operand, but for some instructions, we need to @@ -85,16 +85,12 @@ void DemandedBits::determineLiveOperandBits( auto ComputeKnownBits = [&](unsigned BitWidth, const Value *V1, const Value *V2) { const DataLayout &DL = I->getModule()->getDataLayout(); - KnownZero = APInt(BitWidth, 0); - KnownOne = APInt(BitWidth, 0); - computeKnownBits(const_cast(V1), KnownZero, KnownOne, DL, 0, - &AC, UserI, &DT); + Known = KnownBits(BitWidth); + computeKnownBits(V1, Known, DL, 0, &AC, UserI, &DT); if (V2) { - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); - computeKnownBits(const_cast(V2), KnownZero2, KnownOne2, DL, - 0, &AC, UserI, &DT); + Known2 = KnownBits(BitWidth); + computeKnownBits(V2, Known2, DL, 0, &AC, UserI, &DT); } }; @@ -110,6 +106,9 @@ void DemandedBits::determineLiveOperandBits( // the output. AB = AOut.byteSwap(); break; + case Intrinsic::bitreverse: + AB = AOut.reverseBits(); + break; case Intrinsic::ctlz: if (OperandNo == 0) { // We need some output bits, so we need all bits of the @@ -117,7 +116,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getHighBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countLeadingZeros()+1)); + std::min(BitWidth, Known.countMaxLeadingZeros()+1)); } break; case Intrinsic::cttz: @@ -127,7 +126,7 @@ void DemandedBits::determineLiveOperandBits( // known to be one. ComputeKnownBits(BitWidth, I, nullptr); AB = APInt::getLowBitsSet(BitWidth, - std::min(BitWidth, KnownOne.countTrailingZeros()+1)); + std::min(BitWidth, Known.countMaxTrailingZeros()+1)); } break; } @@ -180,7 +179,7 @@ void DemandedBits::determineLiveOperandBits( // bits, then we must keep the highest input bit. if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt)) .getBoolValue()) - AB.setBit(BitWidth-1); + AB.setSignBit(); // If the shift is exact, then the low bits are not dead // (they must be zero). @@ -197,11 +196,11 @@ void DemandedBits::determineLiveOperandBits( // dead). if (OperandNo == 0) { ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownZero2; + AB &= ~Known2.Zero; } else { if (!isa(UserI->getOperand(0))) ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownZero & ~KnownZero2); + AB &= ~(Known.Zero & ~Known2.Zero); } break; case Instruction::Or: @@ -213,11 +212,11 @@ void DemandedBits::determineLiveOperandBits( // dead). if (OperandNo == 0) { ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); - AB &= ~KnownOne2; + AB &= ~Known2.One; } else { if (!isa(UserI->getOperand(0))) ComputeKnownBits(BitWidth, UserI->getOperand(0), I); - AB &= ~(KnownOne & ~KnownOne2); + AB &= ~(Known.One & ~Known2.One); } break; case Instruction::Xor: @@ -238,7 +237,7 @@ void DemandedBits::determineLiveOperandBits( if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(), AOut.getBitWidth() - BitWidth)) .getBoolValue()) - AB.setBit(BitWidth-1); + AB.setSignBit(); break; case Instruction::Select: if (OperandNo != 0) @@ -315,7 +314,7 @@ void DemandedBits::performAnalysis() { if (!UserI->getType()->isIntegerTy()) Visited.insert(UserI); - APInt KnownZero, KnownOne, KnownZero2, KnownOne2; + KnownBits Known, Known2; // Compute the set of alive bits for each operand. These are anded into the // existing set, if any, and if that changes the set of alive bits, the // operand is added to the work-list. @@ -332,8 +331,7 @@ void DemandedBits::performAnalysis() { // Bits of each operand that are used to compute alive bits of the // output are alive, all others are dead. determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, - KnownZero, KnownOne, - KnownZero2, KnownOne2); + Known, Known2); } // If we've added to the set of alive bits (or the operand has not diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index a4672efeedd69635a8beba8b295fb70e4b0122e2..e4d58bf1b4eb18b488e656a77f3f2b9125d5f917 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -2984,7 +2984,7 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst, SmallVectorImpl &Constraints, bool &Consistent) { bool Result = false; - for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { + for (unsigned LI : Loops.set_bits()) { DEBUG(dbgs() << "\t Constraint[" << LI << "] is"); DEBUG(Constraints[LI].dump(dbgs())); if (Constraints[LI].isDistance()) @@ -3266,7 +3266,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, // For debugging purposes, dump a small bit vector to dbgs(). static void dumpSmallBitVector(SmallBitVector &BV) { dbgs() << "{"; - for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) { + for (unsigned VI : BV.set_bits()) { dbgs() << VI; if (BV.find_next(VI) >= 0) dbgs() << ' '; @@ -3506,7 +3506,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, NewConstraint.setAny(SE); // test separable subscripts - for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + for (unsigned SI : Separable.set_bits()) { DEBUG(dbgs() << "testing subscript " << SI); switch (Pair[SI].Classification) { case Subscript::ZIV: @@ -3545,14 +3545,14 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SmallVector Constraints(MaxLevels + 1); for (unsigned II = 0; II <= MaxLevels; ++II) Constraints[II].setAny(SE); - for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + for (unsigned SI : Coupled.set_bits()) { DEBUG(dbgs() << "testing subscript group " << SI << " { "); SmallBitVector Group(Pair[SI].Group); SmallBitVector Sivs(Pairs); SmallBitVector Mivs(Pairs); SmallBitVector ConstrainedLevels(MaxLevels + 1); SmallVector PairsInGroup; - for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + for (unsigned SJ : Group.set_bits()) { DEBUG(dbgs() << SJ << " "); if (Pair[SJ].Classification == Subscript::SIV) Sivs.set(SJ); @@ -3564,7 +3564,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << "}\n"); while (Sivs.any()) { bool Changed = false; - for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + for (unsigned SJ : Sivs.set_bits()) { DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); // SJ is an SIV subscript that's part of the current coupled group unsigned Level; @@ -3588,7 +3588,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, DEBUG(dbgs() << " propagating\n"); DEBUG(dbgs() << "\tMivs = "); DEBUG(dumpSmallBitVector(Mivs)); - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { // SJ is an MIV subscript that's part of the current coupled group DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, @@ -3622,7 +3622,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, } // test & propagate remaining RDIVs - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { if (Pair[SJ].Classification == Subscript::RDIV) { DEBUG(dbgs() << "RDIV test\n"); if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) @@ -3635,7 +3635,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // test remaining MIVs // This code is temporary. // Better to somehow test all remaining subscripts simultaneously. - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { if (Pair[SJ].Classification == Subscript::MIV) { DEBUG(dbgs() << "MIV test\n"); if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) @@ -3647,9 +3647,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, // update Result.DV from constraint vector DEBUG(dbgs() << " updating\n"); - for (int SJ = ConstrainedLevels.find_first(); SJ >= 0; - SJ = ConstrainedLevels.find_next(SJ)) { - if (SJ > (int)CommonLevels) + for (unsigned SJ : ConstrainedLevels.set_bits()) { + if (SJ > CommonLevels) break; updateDirection(Result.DV[SJ - 1], Constraints[SJ]); if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) @@ -3859,7 +3858,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, NewConstraint.setAny(SE); // test separable subscripts - for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) { + for (unsigned SI : Separable.set_bits()) { switch (Pair[SI].Classification) { case Subscript::SIV: { unsigned Level; @@ -3886,12 +3885,12 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, SmallVector Constraints(MaxLevels + 1); for (unsigned II = 0; II <= MaxLevels; ++II) Constraints[II].setAny(SE); - for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) { + for (unsigned SI : Coupled.set_bits()) { SmallBitVector Group(Pair[SI].Group); SmallBitVector Sivs(Pairs); SmallBitVector Mivs(Pairs); SmallBitVector ConstrainedLevels(MaxLevels + 1); - for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) { + for (unsigned SJ : Group.set_bits()) { if (Pair[SJ].Classification == Subscript::SIV) Sivs.set(SJ); else @@ -3899,7 +3898,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, } while (Sivs.any()) { bool Changed = false; - for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { + for (unsigned SJ : Sivs.set_bits()) { // SJ is an SIV subscript that's part of the current coupled group unsigned Level; const SCEV *SplitIter = nullptr; @@ -3914,7 +3913,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, } if (Changed) { // propagate, possibly creating new SIVs and ZIVs - for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) { + for (unsigned SJ : Mivs.set_bits()) { // SJ is an MIV subscript that's part of the current coupled group if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Constraints, Result.Consistent)) { diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp index 1b36569f7a07c9075a08aabf4299d04d53b5ae7c..2d39a0b021500abe0f7b1ba469a1e3cdd68bd191 100644 --- a/lib/Analysis/DivergenceAnalysis.cpp +++ b/lib/Analysis/DivergenceAnalysis.cpp @@ -241,7 +241,7 @@ void DivergencePropagator::exploreDataDependency(Value *V) { // Follow def-use chains of V. for (User *U : V->users()) { Instruction *UserInst = cast(U); - if (DV.insert(UserInst).second) + if (!TTI.isAlwaysUniform(U) && DV.insert(UserInst).second) Worklist.push_back(UserInst); } } diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index 7acfb41500d4ece12593f90401c77df694b84a6d..8abc0e7d0df957fbc74375f109023645a60ce2d0 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -80,6 +80,22 @@ struct DOTGraphTraits }; } +void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "DomTree dump not available, build with DEBUG\n"; +#endif // NDEBUG +} + +void DominatorTree::viewGraph() { +#ifndef NDEBUG + this->viewGraph("domtree", "Dominator Tree for function"); +#else + errs() << "DomTree dump not available, build with DEBUG\n"; +#endif // NDEBUG +} + namespace { struct DominatorTreeWrapperPassAnalysisGraphTraits { static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) { diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp index ebf0a370b0b05c99887f9c5a091d3144193641db..b12ae9884e3d623a62e2754c208ca9b35ca29c5a 100644 --- a/lib/Analysis/EHPersonalities.cpp +++ b/lib/Analysis/EHPersonalities.cpp @@ -27,8 +27,10 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) { return StringSwitch(F->getName()) .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) .Case("__gxx_personality_v0", EHPersonality::GNU_CXX) + .Case("__gxx_personality_seh0",EHPersonality::GNU_CXX) .Case("__gxx_personality_sj0", EHPersonality::GNU_CXX_SjLj) .Case("__gcc_personality_v0", EHPersonality::GNU_C) + .Case("__gcc_personality_seh0",EHPersonality::GNU_C) .Case("__gcc_personality_sj0", EHPersonality::GNU_C_SjLj) .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) .Case("_except_handler3", EHPersonality::MSVC_X86SEH) diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp index 33f00cb19b269687b61d84318a69d8d3d7a45440..4ef023379bb6705af7c2e600849e52b4d082e44b 100644 --- a/lib/Analysis/GlobalsModRef.cpp +++ b/lib/Analysis/GlobalsModRef.cpp @@ -475,7 +475,9 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { const std::vector &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); - if (!SCC[0]->getFunction() || !SCC[0]->getFunction()->isDefinitionExact()) { + Function *F = SCC[0]->getFunction(); + + if (!F || !F->isDefinitionExact()) { // Calls externally or not exact - can't say anything useful. Remove any // existing function records (may have been created when scanning // globals). @@ -484,19 +486,18 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { continue; } - FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()]; + FunctionInfo &FI = FunctionInfos[F]; bool KnowNothing = false; // Collect the mod/ref properties due to called functions. We only compute // one mod-ref set. for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { - Function *F = SCC[i]->getFunction(); if (!F) { KnowNothing = true; break; } - if (F->isDeclaration()) { + if (F->isDeclaration() || F->hasFnAttribute(Attribute::OptimizeNone)) { // Try to get mod/ref behaviour from function attributes. if (F->doesNotAccessMemory()) { // Can't do better than that! @@ -545,6 +546,13 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { for (auto *Node : SCC) { if (FI.getModRefInfo() == MRI_ModRef) break; // The mod/ref lattice saturates here. + + // Don't prove any properties based on the implementation of an optnone + // function. Function attributes were already used as a best approximation + // above. + if (Node->getFunction()->hasFnAttribute(Attribute::OptimizeNone)) + continue; + for (Instruction &I : instructions(Node->getFunction())) { if (FI.getModRefInfo() == MRI_ModRef) break; // The mod/ref lattice saturates here. diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index a661b0101e6a6530edd5158bbd53b9601e8e5c2c..c30feb973e60da52d5e85b0efacbe37512c3b634 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -76,9 +76,8 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, // An add is interesting if exactly one of its operands is interesting. if (const SCEVAddExpr *Add = dyn_cast(S)) { bool AnyInterestingYet = false; - for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); - OI != OE; ++OI) - if (isInteresting(*OI, I, L, SE, LI)) { + for (const auto *Op : Add->operands()) + if (isInteresting(Op, I, L, SE, LI)) { if (AnyInterestingYet) return false; AnyInterestingYet = true; @@ -118,6 +117,50 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, return true; } +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) + return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast(User); + if (!PN || !Operand) + return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of Operand by the PHI node. If any use corresponds + // to a block that is not dominated by the latch block, give up and use the + // preincremented value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Operand && + !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + + // Okay, all uses of Operand by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + /// AddUsersImpl - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. @@ -208,10 +251,16 @@ bool IVUsers::AddUsersImpl(Instruction *I, // The regular return value here is discarded; instead of recording // it, we just recompute it when we need it. const SCEV *OriginalISE = ISE; - ISE = TransformForPostIncUse(NormalizeAutodetect, - ISE, User, I, - NewUse.PostIncLoops, - *SE, *DT); + + auto NormalizePred = [&](const SCEVAddRecExpr *AR) { + auto *L = AR->getLoop(); + bool Result = IVUseShouldUsePostIncValue(User, I, L, DT); + if (Result) + NewUse.PostIncLoops.insert(L); + return Result; + }; + + ISE = normalizeForPostIncUseIf(ISE, NormalizePred, *SE); // PostIncNormalization effectively simplifies the expression under // pre-increment assumptions. Those assumptions (no wrapping) might not @@ -219,8 +268,7 @@ bool IVUsers::AddUsersImpl(Instruction *I, // transformation is invertible. if (OriginalISE != ISE) { const SCEV *DenormalizedISE = - TransformForPostIncUse(Denormalize, ISE, User, I, - NewUse.PostIncLoops, *SE, *DT); + denormalizeForPostIncUse(ISE, NewUse.PostIncLoops, *SE); // If we normalized the expression, but denormalization doesn't give the // original one, discard this user. @@ -338,11 +386,8 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const { /// getExpr - Return the expression for the use. const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const { - return - TransformForPostIncUse(Normalize, getReplacementExpr(IU), - IU.getUser(), IU.getOperandValToReplace(), - const_cast(IU.getPostIncLoops()), - *SE, *DT); + return normalizeForPostIncUse(getReplacementExpr(IU), IU.getPostIncLoops(), + *SE); } static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { @@ -353,9 +398,8 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { } if (const SCEVAddExpr *Add = dyn_cast(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) - if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + for (const auto *Op : Add->operands()) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(Op, L)) return AR; return nullptr; } diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 3da33ac71421e2bfb2cc029096b6f4c0fedbec84..ed233d201537f61d80cf7163d7509a0feece6961 100644 --- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -43,7 +43,7 @@ static cl::opt // The percent threshold for the direct-call target (this call site vs the // total call count) for it to be considered as the promotion target. static cl::opt - ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden, + ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden, cl::ZeroOrMore, cl::desc("The percentage threshold for the promotion")); diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index cd85ef3703f53bbd3ed1123e37b0e1767a70614d..6ff5938a3175abea49afb572a9ec33e0d86aa293 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -58,7 +58,7 @@ static cl::opt // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(225), + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt @@ -87,6 +87,9 @@ class CallAnalyzer : public InstVisitor { /// The called function. Function &F; + // Cache the DataLayout since we use it a lot. + const DataLayout &DL; + /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. @@ -217,17 +220,17 @@ public: ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, const InlineParams &Params) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), - PSI(PSI), F(Callee), CandidateCS(CSArg), Params(Params), - Threshold(Params.DefaultThreshold), Cost(0), IsCallerRecursive(false), - IsRecursiveCall(false), ExposesReturnsTwice(false), - HasDynamicAlloca(false), ContainsNoDuplicateCall(false), - HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), - AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), + CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), + Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -304,7 +307,6 @@ void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - const DataLayout &DL = F.getParent()->getDataLayout(); unsigned IntPtrWidth = DL.getPointerSizeInBits(); assert(IntPtrWidth == Offset.getBitWidth()); @@ -354,7 +356,6 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { if (I.isArrayAllocation()) { Constant *Size = SimplifiedValues.lookup(I.getArraySize()); if (auto *AllocSize = dyn_cast_or_null(Size)) { - const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize); @@ -364,7 +365,6 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) { // Accumulate the allocated size. if (I.isStaticAlloca()) { - const DataLayout &DL = F.getParent()->getDataLayout(); Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize); } @@ -505,7 +505,6 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - const DataLayout &DL = F.getParent()->getDataLayout(); if (IntegerSize >= DL.getPointerSizeInBits()) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); @@ -539,7 +538,6 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - const DataLayout &DL = F.getParent()->getDataLayout(); if (IntegerSize <= DL.getPointerSizeInBits()) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) @@ -571,7 +569,6 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Value *Operand = I.getOperand(0); if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - const DataLayout &DL = F.getParent()->getDataLayout(); return ConstantFoldInstOperands(&I, COps[0], DL); })) return true; @@ -583,8 +580,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { } bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { - unsigned ArgNo = A->getArgNo(); - return CandidateCS.paramHasAttr(ArgNo + 1, Attr); + return CandidateCS.paramHasAttr(A->getArgNo(), Attr); } bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { @@ -668,21 +664,33 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { Threshold = MaxIfValid(Threshold, Params.HintThreshold); if (PSI) { BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr; - if (PSI->isHotCallSite(CS, CallerBFI)) { - DEBUG(dbgs() << "Hot callsite.\n"); - Threshold = Params.HotCallSiteThreshold.getValue(); - } else if (PSI->isFunctionEntryHot(&Callee)) { - DEBUG(dbgs() << "Hot callee.\n"); - // If callsite hotness can not be determined, we may still know - // that the callee is hot and treat it as a weaker hint for threshold - // increase. - Threshold = MaxIfValid(Threshold, Params.HintThreshold); - } else if (PSI->isColdCallSite(CS, CallerBFI)) { - DEBUG(dbgs() << "Cold callsite.\n"); - Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); - } else if (PSI->isFunctionEntryCold(&Callee)) { - DEBUG(dbgs() << "Cold callee.\n"); - Threshold = MinIfValid(Threshold, Params.ColdThreshold); + // FIXME: After switching to the new passmanager, simplify the logic below + // by checking only the callsite hotness/coldness. The check for CallerBFI + // exists only because we do not have BFI available with the old PM. + // + // Use callee's hotness information only if we have no way of determining + // callsite's hotness information. Callsite hotness can be determined if + // sample profile is used (which adds hotness metadata to calls) or if + // caller's BlockFrequencyInfo is available. + if (CallerBFI || PSI->hasSampleProfile()) { + if (PSI->isHotCallSite(CS, CallerBFI)) { + DEBUG(dbgs() << "Hot callsite.\n"); + Threshold = Params.HotCallSiteThreshold.getValue(); + } else if (PSI->isColdCallSite(CS, CallerBFI)) { + DEBUG(dbgs() << "Cold callsite.\n"); + Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); + } + } else { + if (PSI->isFunctionEntryHot(&Callee)) { + DEBUG(dbgs() << "Hot callee.\n"); + // If callsite hotness can not be determined, we may still know + // that the callee is hot and treat it as a weaker hint for threshold + // increase. + Threshold = MaxIfValid(Threshold, Params.HintThreshold); + } else if (PSI->isFunctionEntryCold(&Callee)) { + DEBUG(dbgs() << "Cold callee.\n"); + Threshold = MinIfValid(Threshold, Params.ColdThreshold); + } } } } @@ -778,7 +786,6 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); auto Evaluate = [&](SmallVectorImpl &COps) { Value *SimpleV = nullptr; - const DataLayout &DL = F.getParent()->getDataLayout(); if (auto FI = dyn_cast(&I)) SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1], FI->getFastMathFlags(), DL); @@ -862,7 +869,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { // because we have to continually rebuild the argument list even when no // simplifications can be performed. Until that is fixed with remapping // inside of instsimplify, directly constant fold calls here. - if (!canConstantFoldCallTo(F)) + if (!canConstantFoldCallTo(CS, F)) return false; // Try to re-map the arguments to constants. @@ -878,7 +885,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { ConstantArgs.push_back(C); } - if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + if (Constant *C = ConstantFoldCall(CS, F, ConstantArgs)) { SimplifiedValues[CS.getInstruction()] = C; return true; } @@ -1003,22 +1010,68 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { if (isa(V)) return true; - // Otherwise, we need to accumulate a cost proportional to the number of - // distinct successor blocks. This fan-out in the CFG cannot be represented - // for free even if we can represent the core switch as a jumptable that - // takes a single instruction. + // Assume the most general case where the swith is lowered into + // either a jump table, bit test, or a balanced binary tree consisting of + // case clusters without merging adjacent clusters with the same + // destination. We do not consider the switches that are lowered with a mix + // of jump table/bit test/binary search tree. The cost of the switch is + // proportional to the size of the tree or the size of jump table range. // // NB: We convert large switches which are just used to initialize large phi // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent // inlining those. It will prevent inlining in cases where the optimization // does not (yet) fire. - SmallPtrSet SuccessorBlocks; - SuccessorBlocks.insert(SI.getDefaultDest()); - for (auto Case : SI.cases()) - SuccessorBlocks.insert(Case.getCaseSuccessor()); - // Add cost corresponding to the number of distinct destinations. The first - // we model as free because of fallthrough. - Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + + // Exit early for a large switch, assuming one case needs at least one + // instruction. + // FIXME: This is not true for a bit test, but ignore such case for now to + // save compile-time. + int64_t CostLowerBound = + std::min((int64_t)INT_MAX, + (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); + + if (CostLowerBound > Threshold) { + Cost = CostLowerBound; + return false; + } + + unsigned JumpTableSize = 0; + unsigned NumCaseCluster = + TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize); + + // If suitable for a jump table, consider the cost for the table size and + // branch to destination. + if (JumpTableSize) { + int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost + + 4 * InlineConstants::InstrCost; + Cost = std::min((int64_t)INT_MAX, JTCost + Cost); + return false; + } + + // Considering forming a binary search, we should find the number of nodes + // which is same as the number of comparisons when lowered. For a given + // number of clusters, n, we can define a recursive function, f(n), to find + // the number of nodes in the tree. The recursion is : + // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, + // and f(n) = n, when n <= 3. + // This will lead a binary tree where the leaf should be either f(2) or f(3) + // when n > 3. So, the number of comparisons from leaves should be n, while + // the number of non-leaf should be : + // 2^(log2(n) - 1) - 1 + // = 2^log2(n) * 2^-1 - 1 + // = n / 2 - 1. + // Considering comparisons from leaf and non-leaf nodes, we can estimate the + // number of comparisons in a simple closed form : + // n + n / 2 - 1 = n * 3 / 2 - 1 + if (NumCaseCluster <= 3) { + // Suppose a comparison includes one compare and one conditional branch. + Cost += NumCaseCluster * 2 * InlineConstants::InstrCost; + return false; + } + int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1; + uint64_t SwitchCost = + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost); return false; } @@ -1106,19 +1159,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, // is expensive or the function has the "use-soft-float" attribute, this may // eventually become a library call. Treat the cost as such. if (I->getType()->isFloatingPointTy()) { - bool hasSoftFloatAttr = false; - // If the function has the "use-soft-float" attribute, mark it as // expensive. - if (F.hasFnAttribute("use-soft-float")) { - Attribute Attr = F.getFnAttribute("use-soft-float"); - StringRef Val = Attr.getValueAsString(); - if (Val == "true") - hasSoftFloatAttr = true; - } - if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || - hasSoftFloatAttr) + (F.getFnAttribute("use-soft-float").getValueAsString() == "true")) Cost += InlineConstants::CallPenalty; } @@ -1163,7 +1207,6 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!V->getType()->isPointerTy()) return nullptr; - const DataLayout &DL = F.getParent()->getDataLayout(); unsigned IntPtrWidth = DL.getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); @@ -1220,7 +1263,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { FiftyPercentVectorBonus = 3 * Threshold / 2; TenPercentVectorBonus = 3 * Threshold / 4; - const DataLayout &DL = F.getParent()->getDataLayout(); // Track whether the post-inlining function would have more than one basic // block. A single basic block is often intended for inlining. Balloon the @@ -1233,36 +1275,10 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // the rest of the function body. Threshold += (SingleBBBonus + FiftyPercentVectorBonus); - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast(CS.getArgument(I)->getType()); - unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = DL.getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // DataLayout. - NumStores = std::min(NumStores, 8U); + // Give out bonuses for the callsite, as the instructions setting them up + // will be gone after inlining. + Cost -= getCallsiteCost(CS, DL); - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } - } - // The call instruction also disappears after inlining. - Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty; - // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = @@ -1447,6 +1463,38 @@ static bool functionsHaveCompatibleAttributes(Function *Caller, AttributeFuncs::areInlineCompatible(*Caller, *Callee); } +int llvm::getCallsiteCost(CallSite CS, const DataLayout &DL) { + int Cost = 0; + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast(CS.getArgument(I)->getType()); + unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = DL.getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost += 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost += InlineConstants::InstrCost; + } + } + // The call instruction also disappears after inlining. + Cost += InlineConstants::InstrCost + InlineConstants::CallPenalty; + return Cost; +} + InlineCost llvm::getInlineCost( CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function &GetAssumptionCache, @@ -1572,7 +1620,6 @@ InlineParams llvm::getInlineParams(int Threshold) { // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold. Params.ColdCallSiteThreshold = ColdCallSiteThreshold; - // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // -inlinehint-threshold commandline option is not explicitly given. If that // option is present, then its value applies even for callees with size and diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index de2b9c0c56dba4573838e7ddbca9e9dc7c4dc29d..27c6b580e7ac83267e614f577e6dac6a07d2f2fb 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" @@ -33,7 +33,6 @@ STATISTIC(TotalMemInst, "Number of memory instructions"); #include "llvm/IR/Instruction.def" - namespace { class InstCount : public FunctionPass, public InstVisitor { friend class InstVisitor; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 0e522cb4e495c6e3f1ed2cb3d0b29f61d4b2bbea..a975be79619b7b04c0c2b6fca4471a771ae1b8ee 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -21,8 +21,10 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -35,6 +37,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/KnownBits.h" #include using namespace llvm; using namespace llvm::PatternMatch; @@ -46,49 +49,30 @@ enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumReassoc, "Number of reassociations"); -namespace { -struct Query { - const DataLayout &DL; - const TargetLibraryInfo *TLI; - const DominatorTree *DT; - AssumptionCache *AC; - const Instruction *CxtI; - - Query(const DataLayout &DL, const TargetLibraryInfo *tli, - const DominatorTree *dt, AssumptionCache *ac = nullptr, - const Instruction *cxti = nullptr) - : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {} -}; -} // end anonymous namespace - -static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, +static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &, - const Query &, unsigned); -static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, + const SimplifyQuery &, unsigned); +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse); -static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); -static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); + const SimplifyQuery &Q, unsigned MaxRecurse); +static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned); static Value *SimplifyCastInst(unsigned, Value *, Type *, - const Query &, unsigned); + const SimplifyQuery &, unsigned); -/// For a boolean type, or a vector of boolean type, return false, or -/// a vector with every element false, as appropriate for the type. +/// For a boolean type or a vector of boolean type, return false or a vector +/// with every element false. static Constant *getFalse(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && - "Expected i1 type or a vector of i1!"); - return Constant::getNullValue(Ty); + return ConstantInt::getFalse(Ty); } -/// For a boolean type, or a vector of boolean type, return true, or -/// a vector with every element true, as appropriate for the type. +/// For a boolean type or a vector of boolean type, return true or a vector +/// with every element true. static Constant *getTrue(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && - "Expected i1 type or a vector of i1!"); - return Constant::getAllOnesValue(Ty); + return ConstantInt::getTrue(Ty); } /// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? @@ -119,13 +103,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { return false; // If we have a DominatorTree then do a precise test. - if (DT) { - if (!DT->isReachableFromEntry(P->getParent())) - return true; - if (!DT->isReachableFromEntry(I->getParent())) - return false; + if (DT) return DT->dominates(I, P); - } // Otherwise, if the instruction is in the entry block and is not an invoke, // then it obviously dominates all phi nodes. @@ -142,8 +121,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, - Instruction::BinaryOps OpcodeToExpand, const Query &Q, - unsigned MaxRecurse) { + Instruction::BinaryOps OpcodeToExpand, + const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -200,7 +179,8 @@ static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, /// Generic simplifications for associative binary operations. /// Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, - Value *LHS, Value *RHS, const Query &Q, + Value *LHS, Value *RHS, + const SimplifyQuery &Q, unsigned MaxRecurse) { assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); @@ -299,7 +279,7 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode, /// of the select results in the same value. Returns the common value if so, /// otherwise returns null. static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -371,7 +351,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS, /// comparison by seeing whether both branches of the select result in the same /// value. Returns the common value if so, otherwise returns null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -453,7 +433,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// phi values yields the same result for every value. If so returns the common /// value, otherwise returns null. static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -496,7 +476,7 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS, /// yields the same result every time. If so returns the common result, /// otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return nullptr; @@ -531,7 +511,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, Value *&Op0, Value *&Op1, - const Query &Q) { + const SimplifyQuery &Q) { if (auto *CLHS = dyn_cast(Op0)) { if (auto *CRHS = dyn_cast(Op1)) return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL); @@ -546,7 +526,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode, /// Given operands for an Add, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q)) return C; @@ -572,11 +552,11 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Ty); - // add nsw/nuw (xor Y, signbit), signbit --> Y + // add nsw/nuw (xor Y, signmask), signmask --> Y // The no-wrapping add guarantees that the top bit will be set by the add. // Therefore, the xor must be clearing the already set sign bit of Y. - if ((isNSW || isNUW) && match(Op1, m_SignBit()) && - match(Op0, m_Xor(m_Value(Y), m_SignBit()))) + if ((isNSW || isNUW) && match(Op1, m_SignMask()) && + match(Op0, m_Xor(m_Value(Y), m_SignMask()))) return Y; /// i1 add -> xor. @@ -602,11 +582,8 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Query) { + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit); } /// \brief Compute the base pointer and cumulative constant offsets for V. @@ -683,7 +660,7 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS, /// Given operands for a Sub, see if we can fold the result. /// If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q)) return C; @@ -706,11 +683,8 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (isNUW) return Op0; - unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (KnownZero.isMaxSignedValue()) { + KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. if (isNSW) @@ -814,17 +788,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } /// Given operands for an FAdd, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q)) return C; @@ -858,7 +829,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given operands for an FSub, see if we can fold the result. If not, this /// returns null. static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q)) return C; @@ -890,7 +861,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given the operands for an FMul, see if we can fold the result static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q)) return C; @@ -907,7 +878,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, /// Given operands for a Mul, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q)) return C; @@ -963,38 +934,23 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit); } + Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit); } Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit); } -Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit); } /// Check for common or similar folds of integer division or integer remainder. @@ -1051,7 +1007,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { /// Given operands for an SDiv or UDiv, see if we can fold the result. /// If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1085,7 +1041,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (!isSigned && match(Op0, m_UDiv(m_Value(X), m_ConstantInt(C1))) && match(Op1, m_ConstantInt(C2))) { bool Overflow; - C1->getValue().umul_ov(C2->getValue(), Overflow); + (void)C1->getValue().umul_ov(C2->getValue(), Overflow); if (Overflow) return Constant::getNullValue(Op0->getType()); } @@ -1107,7 +1063,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// Given operands for an SDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; @@ -1115,17 +1071,13 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a UDiv, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; @@ -1143,16 +1095,12 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit); } static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q)) return C; @@ -1193,18 +1141,14 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit); } /// Given operands for an SRem or URem, see if we can fold the result. /// If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1235,7 +1179,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// Given operands for an SRem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; @@ -1243,17 +1187,13 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a URem, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; @@ -1271,16 +1211,12 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit); } static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q)) return C; @@ -1302,12 +1238,8 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit); } /// Returns true if a shift by \c Amount always yields undef. @@ -1340,7 +1272,7 @@ static bool isUndefShift(Value *Amount) { /// Given operands for an Shl, LShr or AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, - Value *Op1, const Query &Q, unsigned MaxRecurse) { + Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q)) return C; @@ -1370,18 +1302,14 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // If any bits in the shift amount make that value greater than or equal to // the number of bits in the type, the shift is undefined. - unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (KnownOne.getLimitedValue() >= BitWidth) + KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.One.getLimitedValue() >= Known.getBitWidth()) return UndefValue::get(Op0->getType()); // If all valid bits in the shift amount are known zero, the first operand is // unchanged. - unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth); - APInt ShiftAmountMask = APInt::getLowBitsSet(BitWidth, NumValidShiftBits); - if ((KnownZero & ShiftAmountMask) == ShiftAmountMask) + unsigned NumValidShiftBits = Log2_32_Ceil(Known.getBitWidth()); + if (Known.countMinTrailingZeros() >= NumValidShiftBits) return Op0; return nullptr; @@ -1390,7 +1318,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, /// \brief Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, - Value *Op1, bool isExact, const Query &Q, + Value *Op1, bool isExact, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse)) return V; @@ -1406,12 +1334,8 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { - unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); - APInt Op0KnownZero(BitWidth, 0); - APInt Op0KnownOne(BitWidth, 0); - computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC, - Q.CxtI, Q.DT); - if (Op0KnownOne[0]) + KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + if (Op0Known.One[0]) return Op0; } @@ -1421,7 +1345,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, /// Given operands for an Shl, see if we can fold the result. /// If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) return V; @@ -1438,17 +1362,14 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit); } /// Given operands for an LShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; @@ -1462,18 +1383,14 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit); } /// Given operands for an AShr, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q, MaxRecurse)) return V; @@ -1496,14 +1413,12 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); } +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, ICmpInst *UnsignedICmp, bool IsAnd) { Value *X, *Y; @@ -1572,29 +1487,75 @@ static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { /// Commuted variants are assumed to be handled by calling this function again /// with the parameters swapped. -static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) - return X; +static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { + ICmpInst::Predicate Pred0, Pred1; + Value *A ,*B; + if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || + !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) + return nullptr; - if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) - return X; + // We have (icmp Pred0, A, B) | (icmp Pred1, A, B). + // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we + // can eliminate Op0 from this 'or'. + if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)) + return Op1; + + // Check for any combination of predicates that cover the entire range of + // possibilities. + if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) || + (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) || + (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) || + (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE)) + return getTrue(Op0->getType()); + + return nullptr; +} + +/// Test if a pair of compares with a shared operand and 2 constants has an +/// empty set intersection, full set union, or if one compare is a superset of +/// the other. +static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, + bool IsAnd) { + // Look for this pattern: {and/or} (icmp X, C0), (icmp X, C1)). + if (Cmp0->getOperand(0) != Cmp1->getOperand(0)) + return nullptr; - // Look for this pattern: (icmp V, C0) & (icmp V, C1)). - Type *ITy = Op0->getType(); - ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; - Value *V; - if (match(Op0, m_ICmp(Pred0, m_Value(V), m_APInt(C0))) && - match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) { - // Make a constant range that's the intersection of the two icmp ranges. - // If the intersection is empty, we know that the result is false. - auto Range0 = ConstantRange::makeAllowedICmpRegion(Pred0, *C0); - auto Range1 = ConstantRange::makeAllowedICmpRegion(Pred1, *C1); - if (Range0.intersectWith(Range1).isEmptySet()) - return getFalse(ITy); - } + if (!match(Cmp0->getOperand(1), m_APInt(C0)) || + !match(Cmp1->getOperand(1), m_APInt(C1))) + return nullptr; + auto Range0 = ConstantRange::makeExactICmpRegion(Cmp0->getPredicate(), *C0); + auto Range1 = ConstantRange::makeExactICmpRegion(Cmp1->getPredicate(), *C1); + + // For and-of-compares, check if the intersection is empty: + // (icmp X, C0) && (icmp X, C1) --> empty set --> false + if (IsAnd && Range0.intersectWith(Range1).isEmptySet()) + return getFalse(Cmp0->getType()); + + // For or-of-compares, check if the union is full: + // (icmp X, C0) || (icmp X, C1) --> full set --> true + if (!IsAnd && Range0.unionWith(Range1).isFullSet()) + return getTrue(Cmp0->getType()); + + // Is one range a superset of the other? + // If this is and-of-compares, take the smaller set: + // (icmp sgt X, 4) && (icmp sgt X, 42) --> icmp sgt X, 42 + // If this is or-of-compares, take the larger set: + // (icmp sgt X, 4) || (icmp sgt X, 42) --> icmp sgt X, 4 + if (Range0.contains(Range1)) + return IsAnd ? Cmp1 : Cmp0; + if (Range1.contains(Range0)) + return IsAnd ? Cmp0 : Cmp1; + + return nullptr; +} + +static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) & (icmp V, C0) + ICmpInst::Predicate Pred0, Pred1; + const APInt *C0, *C1; + Value *V; if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) return nullptr; @@ -1605,6 +1566,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (AddInst->getOperand(1) != Op1->getOperand(1)) return nullptr; + Type *ITy = Op0->getType(); bool isNSW = AddInst->hasNoSignedWrap(); bool isNUW = AddInst->hasNoUnsignedWrap(); @@ -1635,9 +1597,129 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } +static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + return X; + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) + return X; + + if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) + return X; + if (Value *X = simplifyAndOfICmpsWithSameOperands(Op1, Op0)) + return X; + + if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) + return X; + + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1)) + return X; + if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0)) + return X; + + return nullptr; +} + +static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { + // (icmp (add V, C0), C1) | (icmp V, C0) + ICmpInst::Predicate Pred0, Pred1; + const APInt *C0, *C1; + Value *V; + if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) + return nullptr; + + if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))) + return nullptr; + + auto *AddInst = cast(Op0->getOperand(0)); + if (AddInst->getOperand(1) != Op1->getOperand(1)) + return nullptr; + + Type *ITy = Op0->getType(); + bool isNSW = AddInst->hasNoSignedWrap(); + bool isNUW = AddInst->hasNoUnsignedWrap(); + + const APInt Delta = *C1 - *C0; + if (C0->isStrictlyPositive()) { + if (Delta == 2) { + if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE) + return getTrue(ITy); + if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW) + return getTrue(ITy); + } + if (Delta == 1) { + if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE) + return getTrue(ITy); + if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW) + return getTrue(ITy); + } + } + if (C0->getBoolValue() && isNUW) { + if (Delta == 2) + if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE) + return getTrue(ITy); + if (Delta == 1) + if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + + return nullptr; +} + +static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + return X; + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) + return X; + + if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) + return X; + if (Value *X = simplifyOrOfICmpsWithSameOperands(Op1, Op0)) + return X; + + if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) + return X; + + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1)) + return X; + if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0)) + return X; + + return nullptr; +} + +static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) { + // Look through casts of the 'and' operands to find compares. + auto *Cast0 = dyn_cast(Op0); + auto *Cast1 = dyn_cast(Op1); + if (Cast0 && Cast1 && Cast0->getOpcode() == Cast1->getOpcode() && + Cast0->getSrcTy() == Cast1->getSrcTy()) { + Op0 = Cast0->getOperand(0); + Op1 = Cast1->getOperand(0); + } + + auto *Cmp0 = dyn_cast(Op0); + auto *Cmp1 = dyn_cast(Op1); + if (!Cmp0 || !Cmp1) + return nullptr; + + Value *V = + IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1); + if (!V) + return nullptr; + if (!Cast0) + return V; + + // If we looked through casts, we can only handle a constant simplification + // because we are not allowed to create a cast instruction here. + if (auto *C = dyn_cast(V)) + return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType()); + + return nullptr; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q)) return C; @@ -1674,6 +1756,24 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, (A == Op0 || B == Op0)) return Op0; + // A mask that only clears known zeros of a shifted value is a no-op. + Value *X; + const APInt *Mask; + const APInt *ShAmt; + if (match(Op1, m_APInt(Mask))) { + // If all bits in the inverted and shifted mask are clear: + // and (shl X, ShAmt), Mask --> shl X, ShAmt + if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) && + (~(*Mask)).lshr(*ShAmt).isNullValue()) + return Op0; + + // If all bits in the inverted and shifted mask are clear: + // and (lshr X, ShAmt), Mask --> lshr X, ShAmt + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) && + (~(*Mask)).shl(*ShAmt).isNullValue()) + return Op0; + } + // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { @@ -1685,32 +1785,8 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return Op1; } - if (auto *ICILHS = dyn_cast(Op0)) { - if (auto *ICIRHS = dyn_cast(Op1)) { - if (Value *V = SimplifyAndOfICmps(ICILHS, ICIRHS)) - return V; - if (Value *V = SimplifyAndOfICmps(ICIRHS, ICILHS)) - return V; - } - } - - // The compares may be hidden behind casts. Look through those and try the - // same folds as above. - auto *Cast0 = dyn_cast(Op0); - auto *Cast1 = dyn_cast(Op1); - if (Cast0 && Cast1 && Cast0->getOpcode() == Cast1->getOpcode() && - Cast0->getSrcTy() == Cast1->getSrcTy()) { - auto *Cmp0 = dyn_cast(Cast0->getOperand(0)); - auto *Cmp1 = dyn_cast(Cast1->getOperand(0)); - if (Cmp0 && Cmp1) { - Instruction::CastOps CastOpc = Cast0->getOpcode(); - Type *ResultType = Cast0->getType(); - if (auto *V = dyn_cast_or_null(SimplifyAndOfICmps(Cmp0, Cmp1))) - return ConstantExpr::getCast(CastOpc, V, ResultType); - if (auto *V = dyn_cast_or_null(SimplifyAndOfICmps(Cmp1, Cmp0))) - return ConstantExpr::getCast(CastOpc, V, ResultType); - } - } + if (Value *V = simplifyAndOrOfICmps(Op0, Op1, true)) + return V; // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, @@ -1744,97 +1820,13 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); -} - -/// Commuted variants are assumed to be handled by calling this function again -/// with the parameters swapped. -static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) { - ICmpInst::Predicate Pred0, Pred1; - Value *A ,*B; - if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) || - !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B)))) - return nullptr; - - // We have (icmp Pred0, A, B) | (icmp Pred1, A, B). - // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we - // can eliminate Op0 from this 'or'. - if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1)) - return Op1; - - // Check for any combination of predicates that cover the entire range of - // possibilities. - if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) || - (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) || - (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) || - (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE)) - return getTrue(Op0->getType()); - - return nullptr; -} - -/// Commuted variants are assumed to be handled by calling this function again -/// with the parameters swapped. -static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) - return X; - - if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) - return X; - - // (icmp (add V, C0), C1) | (icmp V, C0) - ICmpInst::Predicate Pred0, Pred1; - const APInt *C0, *C1; - Value *V; - if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1)))) - return nullptr; - - if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value()))) - return nullptr; - - auto *AddInst = cast(Op0->getOperand(0)); - if (AddInst->getOperand(1) != Op1->getOperand(1)) - return nullptr; - - Type *ITy = Op0->getType(); - bool isNSW = AddInst->hasNoSignedWrap(); - bool isNUW = AddInst->hasNoUnsignedWrap(); - - const APInt Delta = *C1 - *C0; - if (C0->isStrictlyPositive()) { - if (Delta == 2) { - if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE) - return getTrue(ITy); - if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW) - return getTrue(ITy); - } - if (Delta == 1) { - if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE) - return getTrue(ITy); - if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW) - return getTrue(ITy); - } - } - if (C0->getBoolValue() && isNUW) { - if (Delta == 2) - if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE) - return getTrue(ITy); - if (Delta == 1) - if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE) - return getTrue(ITy); - } - - return nullptr; +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for an Or, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q)) return C; @@ -1881,14 +1873,45 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, (A == Op0 || B == Op0)) return Constant::getAllOnesValue(Op0->getType()); - if (auto *ICILHS = dyn_cast(Op0)) { - if (auto *ICIRHS = dyn_cast(Op1)) { - if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS)) - return V; - if (Value *V = SimplifyOrOfICmps(ICIRHS, ICILHS)) - return V; - } - } + // (A & ~B) | (A ^ B) -> (A ^ B) + // (~B & A) | (A ^ B) -> (A ^ B) + // (A & ~B) | (B ^ A) -> (B ^ A) + // (~B & A) | (B ^ A) -> (B ^ A) + if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && + (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) + return Op1; + + // Commute the 'or' operands. + // (A ^ B) | (A & ~B) -> (A ^ B) + // (A ^ B) | (~B & A) -> (A ^ B) + // (B ^ A) | (A & ~B) -> (B ^ A) + // (B ^ A) | (~B & A) -> (B ^ A) + if (match(Op0, m_Xor(m_Value(A), m_Value(B))) && + (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))) + return Op0; + + // (A & B) | (~A ^ B) -> (~A ^ B) + // (B & A) | (~A ^ B) -> (~A ^ B) + // (A & B) | (B ^ ~A) -> (B ^ ~A) + // (B & A) | (B ^ ~A) -> (B ^ ~A) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) + return Op1; + + // (~A ^ B) | (A & B) -> (~A ^ B) + // (~A ^ B) | (B & A) -> (~A ^ B) + // (B ^ ~A) | (A & B) -> (B ^ ~A) + // (B ^ ~A) | (B & A) -> (B ^ ~A) + if (match(Op1, m_And(m_Value(A), m_Value(B))) && + (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) || + match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B))))) + return Op0; + + if (Value *V = simplifyAndOrOfICmps(Op0, Op1, false)) + return V; // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, @@ -1907,37 +1930,27 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - // (A & C)|(B & D) - Value *C = nullptr, *D = nullptr; - if (match(Op0, m_And(m_Value(A), m_Value(C))) && - match(Op1, m_And(m_Value(B), m_Value(D)))) { - ConstantInt *C1 = dyn_cast(C); - ConstantInt *C2 = dyn_cast(D); - if (C1 && C2 && (C1->getValue() == ~C2->getValue())) { + // (A & C1)|(B & C2) + const APInt *C1, *C2; + if (match(Op0, m_And(m_Value(A), m_APInt(C1))) && + match(Op1, m_And(m_Value(B), m_APInt(C2)))) { + if (*C1 == ~*C2) { // (A & C1)|(B & C2) // If we have: ((V + N) & C1) | (V & C2) // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 // replace with V+N. - Value *V1, *V2; - if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { + Value *N; + if (C2->isMask() && // C2 == 0+1+ + match(A, m_c_Add(m_Specific(B), m_Value(N)))) { // Add commutes, try both ways. - if (V1 == B && - MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return A; - if (V2 == B && - MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue() + 1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { + if (C1->isMask() && + match(B, m_c_Add(m_Specific(A), m_Value(N)))) { // Add commutes, try both ways. - if (V1 == A && - MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return B; - if (V2 == A && - MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; } } @@ -1952,17 +1965,13 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit); } /// Given operands for a Xor, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) { if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q)) return C; @@ -2001,14 +2010,11 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, return nullptr; } -Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { + return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit); } + static Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } @@ -2242,34 +2248,55 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI, /// Fold an icmp when its operands have i1 scalar type. static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q) { + Value *RHS, const SimplifyQuery &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. if (!OpTy->getScalarType()->isIntegerTy(1)) return nullptr; - switch (Pred) { - default: - break; - case ICmpInst::ICMP_EQ: - // X == 1 -> X - if (match(RHS, m_One())) - return LHS; - break; - case ICmpInst::ICMP_NE: - // X != 0 -> X - if (match(RHS, m_Zero())) + // A boolean compared to true/false can be simplified in 14 out of the 20 + // (10 predicates * 2 constants) possible combinations. Cases not handled here + // require a 'not' of the LHS, so those must be transformed in InstCombine. + if (match(RHS, m_Zero())) { + switch (Pred) { + case CmpInst::ICMP_NE: // X != 0 -> X + case CmpInst::ICMP_UGT: // X >u 0 -> X + case CmpInst::ICMP_SLT: // X X return LHS; - break; - case ICmpInst::ICMP_UGT: - // X >u 0 -> X - if (match(RHS, m_Zero())) + + case CmpInst::ICMP_ULT: // X false + case CmpInst::ICMP_SGT: // X >s 0 -> false + return getFalse(ITy); + + case CmpInst::ICMP_UGE: // X >=u 0 -> true + case CmpInst::ICMP_SLE: // X <=s 0 -> true + return getTrue(ITy); + + default: break; + } + } else if (match(RHS, m_One())) { + switch (Pred) { + case CmpInst::ICMP_EQ: // X == 1 -> X + case CmpInst::ICMP_UGE: // X >=u 1 -> X + case CmpInst::ICMP_SLE: // X <=s -1 -> X return LHS; + + case CmpInst::ICMP_UGT: // X >u 1 -> false + case CmpInst::ICMP_SLT: // X false + return getFalse(ITy); + + case CmpInst::ICMP_ULE: // X <=u 1 -> true + case CmpInst::ICMP_SGE: // X >=s -1 -> true + return getTrue(ITy); + + default: break; + } + } + + switch (Pred) { + default: break; case ICmpInst::ICMP_UGE: - // X >=u 1 -> X - if (match(RHS, m_One())) - return LHS; if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false)) return getTrue(ITy); break; @@ -2284,16 +2311,6 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) return getTrue(ITy); break; - case ICmpInst::ICMP_SLT: - // X X - if (match(RHS, m_Zero())) - return LHS; - break; - case ICmpInst::ICMP_SLE: - // X <=s -1 -> X - if (match(RHS, m_One())) - return LHS; - break; case ICmpInst::ICMP_ULE: if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false)) return getTrue(ITy); @@ -2305,12 +2322,11 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, /// Try hard to fold icmp with zero RHS because this is a common case. static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q) { + Value *RHS, const SimplifyQuery &Q) { if (!match(RHS, m_Zero())) return nullptr; Type *ITy = GetCompareTy(LHS); // The return type. - bool LHSKnownNonNegative, LHSKnownNegative; switch (Pred) { default: llvm_unreachable("Unknown ICmp predicate!"); @@ -2328,39 +2344,41 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS, if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; - case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + case ICmpInst::ICMP_SLT: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getTrue(ITy); - if (LHSKnownNonNegative) + if (LHSKnown.isNonNegative()) return getFalse(ITy); break; - case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SLE: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (LHSKnown.isNonNegative() && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getFalse(ITy); break; - case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SGE: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getFalse(ITy); - if (LHSKnownNonNegative) + if (LHSKnown.isNonNegative()) return getTrue(ITy); break; - case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNegative) + } + case ICmpInst::ICMP_SGT: { + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNegative()) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (LHSKnown.isNonNegative() && + isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return getTrue(ITy); break; } + } return nullptr; } @@ -2373,7 +2391,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { const APInt *C; switch (BO.getOpcode()) { case Instruction::Add: - if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // FIXME: If we have both nuw and nsw, we should reduce the range further. if (BO.hasNoUnsignedWrap()) { // 'add nuw x, C' produces [C, UINT_MAX]. @@ -2411,7 +2429,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { unsigned ShiftAmount = Width - 1; - if (*C != 0 && BO.isExact()) + if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); if (C->isNegative()) { // 'ashr C, x' produces [C, C >> (Width-1)] @@ -2432,7 +2450,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { } else if (match(BO.getOperand(0), m_APInt(C))) { // 'lshr C, x' produces [C >> (Width-1), C]. unsigned ShiftAmount = Width - 1; - if (*C != 0 && BO.isExact()) + if (!C->isNullValue() && BO.isExact()) ShiftAmount = C->countTrailingZeros(); Lower = C->lshr(ShiftAmount); Upper = *C + 1; @@ -2494,7 +2512,7 @@ static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) { break; case Instruction::UDiv: - if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) { + if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) { // 'udiv x, C' produces [0, UINT_MAX / C]. Upper = APInt::getMaxValue(Width).udiv(*C) + 1; } else if (match(BO.getOperand(0), m_APInt(C))) { @@ -2559,8 +2577,11 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS, return nullptr; } +/// TODO: A large part of this logic is duplicated in InstCombine's +/// foldICmpBinOp(). We should be able to share that and avoid the code +/// duplication. static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. @@ -2640,15 +2661,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getTrue(ITy); if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) { - bool RHSKnownNonNegative, RHSKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, Q.DL, 0, - Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (RHSKnownNonNegative && YKnownNegative) + KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (RHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy); - if (RHSKnownNegative || YKnownNonNegative) + if (RHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy); } } @@ -2660,15 +2677,11 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, return getFalse(ITy); if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE) { - bool LHSKnownNonNegative, LHSKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, - Q.AC, Q.CxtI, Q.DT); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (LHSKnownNonNegative && YKnownNegative) + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (LHSKnown.isNonNegative() && YKnown.isNegative()) return Pred == ICmpInst::ICMP_SGT ? getTrue(ITy) : getFalse(ITy); - if (LHSKnownNegative || YKnownNonNegative) + if (LHSKnown.isNegative() || YKnown.isNonNegative()) return Pred == ICmpInst::ICMP_SGT ? getFalse(ITy) : getTrue(ITy); } } @@ -2715,28 +2728,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { - bool KnownNonNegative, KnownNegative; switch (Pred) { default: break; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getFalse(ITy); case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2746,28 +2758,27 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // icmp pred X, (urem Y, X) if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { - bool KnownNonNegative, KnownNegative; switch (Pred) { default: break; case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SGE: { + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return getTrue(ITy); case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL, 0, Q.AC, - Q.CxtI, Q.DT); - if (!KnownNonNegative) + case ICmpInst::ICMP_SLE: { + KnownBits Known = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (!Known.isNonNegative()) break; LLVM_FALLTHROUGH; + } case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: @@ -2816,14 +2827,14 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, // - CI2 is one // - CI isn't zero if (LBO->hasNoSignedWrap() || LBO->hasNoUnsignedWrap() || - *CI2Val == 1 || !CI->isZero()) { + CI2Val->isOneValue() || !CI->isZero()) { if (Pred == ICmpInst::ICMP_EQ) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_NE) return ConstantInt::getTrue(RHS->getContext()); } } - if (CIVal->isSignBit() && *CI2Val == 1) { + if (CIVal->isSignMask() && CI2Val->isOneValue()) { if (Pred == ICmpInst::ICMP_UGT) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_ULE) @@ -2839,10 +2850,19 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, break; case Instruction::UDiv: case Instruction::LShr: - if (ICmpInst::isSigned(Pred)) + if (ICmpInst::isSigned(Pred) || !LBO->isExact() || !RBO->isExact()) break; - LLVM_FALLTHROUGH; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse - 1)) + return V; + break; case Instruction::SDiv: + if (!ICmpInst::isEquality(Pred) || !LBO->isExact() || !RBO->isExact()) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), Q, MaxRecurse - 1)) + return V; + break; case Instruction::AShr: if (!LBO->isExact() || !RBO->isExact()) break; @@ -2870,7 +2890,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS, /// Simplify integer comparisons where at least one operand of the compare /// matches an integer min/max idiom. static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const Query &Q, + Value *RHS, const SimplifyQuery &Q, unsigned MaxRecurse) { Type *ITy = GetCompareTy(LHS); // The return type. Value *A, *B; @@ -3074,7 +3094,7 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS, /// Given operands for an ICmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); @@ -3288,11 +3308,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } // icmp eq|ne X, Y -> false|true if X != Y - if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && + if (ICmpInst::isEquality(Pred) && isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { - LLVMContext &Ctx = LHS->getType()->getContext(); - return Pred == ICmpInst::ICMP_NE ? - ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy); } if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse)) @@ -3340,22 +3358,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // If a bit is known to be zero for A and known to be one for B, - // then A and B cannot be equal. - if (ICmpInst::isEquality(Pred)) { - const APInt *RHSVal; - if (match(RHS, m_APInt(RHSVal))) { - unsigned BitWidth = RHSVal->getBitWidth(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC, - Q.CxtI, Q.DT); - if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0)) - return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) - : ConstantInt::getTrue(ITy); - } - } - // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) @@ -3372,18 +3374,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } /// Given operands for an FCmpInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const Query &Q, + FastMathFlags FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); @@ -3505,22 +3503,22 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit); } /// See if V simplifies when its operand Op is replaced with RepOp. static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const Query &Q, + const SimplifyQuery &Q, unsigned MaxRecurse) { // Trivial replacement. if (V == Op) return RepOp; + // We cannot replace a constant, and shouldn't even try. + if (isa(Op)) + return nullptr; + auto *I = dyn_cast(V); if (!I) return nullptr; @@ -3663,7 +3661,7 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal, /// Try to simplify a select instruction when its condition operand is an /// integer comparison. static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, - Value *FalseVal, const Query &Q, + Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { ICmpInst::Predicate Pred; Value *CmpLHS, *CmpRHS; @@ -3742,7 +3740,7 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, /// Given operands for a SelectInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, - Value *FalseVal, const Query &Q, + Value *FalseVal, const SimplifyQuery &Q, unsigned MaxRecurse) { // select true, X, Y -> X // select false, X, Y -> Y @@ -3758,9 +3756,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, return TrueVal; if (isa(CondVal)) { // select undef, X, Y -> X or Y - if (isa(TrueVal)) - return TrueVal; - return FalseVal; + if (isa(FalseVal)) + return FalseVal; + return TrueVal; } if (isa(TrueVal)) // select C, undef, X -> X return FalseVal; @@ -3775,18 +3773,14 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifySelectInst(Cond, TrueVal, FalseVal, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit); } /// Given operands for an GetElementPtrInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, - const Query &Q, unsigned) { + const SimplifyQuery &Q, unsigned) { // The type of the GEP pointer operand. unsigned AS = cast(Ops[0]->getType()->getScalarType())->getAddressSpace(); @@ -3800,6 +3794,8 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, Type *GEPTy = PointerType::get(LastType, AS); if (VectorType *VT = dyn_cast(Ops[0]->getType())) GEPTy = VectorType::get(GEPTy, VT->getNumElements()); + else if (VectorType *VT = dyn_cast(Ops[1]->getType())) + GEPTy = VectorType::get(GEPTy, VT->getNumElements()); if (isa(Ops[0])) return UndefValue::get(GEPTy); @@ -3885,27 +3881,25 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, } // Check to see if this is constant foldable. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (!isa(Ops[i])) - return nullptr; + if (!all_of(Ops, [](Value *V) { return isa(V); })) + return nullptr; - return ConstantExpr::getGetElementPtr(SrcTy, cast(Ops[0]), - Ops.slice(1)); + auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast(Ops[0]), + Ops.slice(1)); + if (auto *CEFolded = ConstantFoldConstant(CE, Q.DL)) + return CEFolded; + return CE; } Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyGEPInst(SrcTy, Ops, - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit); } /// Given operands for an InsertValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef Idxs, const Query &Q, + ArrayRef Idxs, const SimplifyQuery &Q, unsigned) { if (Constant *CAgg = dyn_cast(Agg)) if (Constant *CVal = dyn_cast(Val)) @@ -3931,18 +3925,16 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return nullptr; } -Value *llvm::SimplifyInsertValueInst( - Value *Agg, Value *Val, ArrayRef Idxs, const DataLayout &DL, - const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef Idxs, + const SimplifyQuery &Q) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit); } /// Given operands for an ExtractValueInst, see if we can fold the result. /// If not, this returns null. static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, - const Query &, unsigned) { + const SimplifyQuery &, unsigned) { if (auto *CAgg = dyn_cast(Agg)) return ConstantFoldExtractValueInstruction(CAgg, Idxs); @@ -3965,18 +3957,13 @@ static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, } Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit); } /// Given operands for an ExtractElementInst, see if we can fold the result. /// If not, this returns null. -static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, +static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &, unsigned) { if (auto *CVec = dyn_cast(Vec)) { if (auto *CIdx = dyn_cast(Idx)) @@ -3999,15 +3986,13 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &, return nullptr; } -Value *llvm::SimplifyExtractElementInst( - Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx, + const SimplifyQuery &Q) { + return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit); } /// See if we can fold the given phi. If not, returns null. -static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { +static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = nullptr; @@ -4040,7 +4025,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { } static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, - Type *Ty, const Query &Q, unsigned MaxRecurse) { + Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) { if (auto *C = dyn_cast(Op)) return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL); @@ -4074,68 +4059,141 @@ static Value *SimplifyCastInst(unsigned CastOpc, Value *Op, } Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, - const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit); +} + +/// For the given destination element of a shuffle, peek through shuffles to +/// match a root vector source operand that contains that element in the same +/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). +static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, + int MaskVal, Value *RootVec, + unsigned MaxRecurse) { + if (!MaxRecurse--) + return nullptr; + + // Bail out if any mask value is undefined. That kind of shuffle may be + // simplified further based on demanded bits or other folds. + if (MaskVal == -1) + return nullptr; + + // The mask value chooses which source operand we need to look at next. + int InVecNumElts = Op0->getType()->getVectorNumElements(); + int RootElt = MaskVal; + Value *SourceOp = Op0; + if (MaskVal >= InVecNumElts) { + RootElt = MaskVal - InVecNumElts; + SourceOp = Op1; + } + + // If the source operand is a shuffle itself, look through it to find the + // matching root vector. + if (auto *SourceShuf = dyn_cast(SourceOp)) { + return foldIdentityShuffles( + DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1), + SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse); + } + + // TODO: Look through bitcasts? What if the bitcast changes the vector element + // size? + + // The source operand is not a shuffle. Initialize the root vector value for + // this shuffle if that has not been done yet. + if (!RootVec) + RootVec = SourceOp; + + // Give up as soon as a source operand does not match the existing root value. + if (RootVec != SourceOp) + return nullptr; + + // The element must be coming from the same lane in the source vector + // (although it may have crossed lanes in intermediate shuffles). + if (RootElt != DestElt) + return nullptr; + + return RootVec; } static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const Query &Q, + Type *RetTy, const SimplifyQuery &Q, unsigned MaxRecurse) { + if (isa(Mask)) + return UndefValue::get(RetTy); + Type *InVecTy = Op0->getType(); unsigned MaskNumElts = Mask->getType()->getVectorNumElements(); unsigned InVecNumElts = InVecTy->getVectorNumElements(); - auto *Op0Const = dyn_cast(Op0); - auto *Op1Const = dyn_cast(Op1); - - // If all operands are constant, constant fold the shuffle. - if (Op0Const && Op1Const) - return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); + SmallVector Indices; + ShuffleVectorInst::getShuffleMask(Mask, Indices); + assert(MaskNumElts == Indices.size() && + "Size of Indices not same as number of mask elements?"); - // If only one of the operands is constant, constant fold the shuffle if the - // mask does not select elements from the variable operand. + // Canonicalization: If mask does not select elements from an input vector, + // replace that input vector with undef. bool MaskSelects0 = false, MaskSelects1 = false; for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = ShuffleVectorInst::getMaskValue(Mask, i); - if (Idx == -1) + if (Indices[i] == -1) continue; - if ((unsigned)Idx < InVecNumElts) + if ((unsigned)Indices[i] < InVecNumElts) MaskSelects0 = true; else MaskSelects1 = true; } - if (!MaskSelects0 && Op1Const) - return ConstantFoldShuffleVectorInstruction(UndefValue::get(InVecTy), - Op1Const, Mask); - if (!MaskSelects1 && Op0Const) - return ConstantFoldShuffleVectorInstruction(Op0Const, - UndefValue::get(InVecTy), Mask); + if (!MaskSelects0) + Op0 = UndefValue::get(InVecTy); + if (!MaskSelects1) + Op1 = UndefValue::get(InVecTy); + + auto *Op0Const = dyn_cast(Op0); + auto *Op1Const = dyn_cast(Op1); + + // If all operands are constant, constant fold the shuffle. + if (Op0Const && Op1Const) + return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask); + + // Canonicalization: if only one input vector is constant, it shall be the + // second one. + if (Op0Const && !Op1Const) { + std::swap(Op0, Op1); + ShuffleVectorInst::commuteShuffleMask(Indices, InVecNumElts); + } // A shuffle of a splat is always the splat itself. Legal if the shuffle's // value type is same as the input vectors' type. if (auto *OpShuf = dyn_cast(Op0)) - if (!MaskSelects1 && RetTy == InVecTy && + if (isa(Op1) && RetTy == InVecTy && OpShuf->getMask()->getSplatValue()) return Op0; - if (auto *OpShuf = dyn_cast(Op1)) - if (!MaskSelects0 && RetTy == InVecTy && - OpShuf->getMask()->getSplatValue()) - return Op1; - return nullptr; + // Don't fold a shuffle with undef mask elements. This may get folded in a + // better way using demanded bits or other analysis. + // TODO: Should we allow this? + if (find(Indices, -1) != Indices.end()) + return nullptr; + + // Check if every element of this shuffle can be mapped back to the + // corresponding element of a single root vector. If so, we don't need this + // shuffle. This handles simple identity shuffles as well as chains of + // shuffles that may widen/narrow and/or move elements across lanes and back. + Value *RootVec = nullptr; + for (unsigned i = 0; i != MaskNumElts; ++i) { + // Note that recursion is limited for each vector element, so if any element + // exceeds the limit, this will fail to simplify. + RootVec = + foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse); + + // We can't replace a widening/narrowing shuffle with one of its operands. + if (!RootVec || RootVec->getType() != RetTy) + return nullptr; + } + return RootVec; } /// Given operands for a ShuffleVectorInst, fold the result or return null. -Value *llvm::SimplifyShuffleVectorInst( - Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, - const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyShuffleVectorInst( - Op0, Op1, Mask, RetTy, Query(DL, TLI, DT, AC, CxtI), RecursionLimit); +Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q) { + return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit); } //=== Helper functions for higher up the class hierarchy. @@ -4143,7 +4201,7 @@ Value *llvm::SimplifyShuffleVectorInst( /// Given operands for a BinaryOperator, see if we can fold the result. /// If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse); @@ -4191,7 +4249,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const Query &Q, + const FastMathFlags &FMF, const SimplifyQuery &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::FAdd: @@ -4208,36 +4266,26 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, } Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit); } Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const FastMathFlags &FMF, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + FastMathFlags FMF, const SimplifyQuery &Q) { + return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit); } /// Given operands for a CmpInst, see if we can fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); + const SimplifyQuery &Q) { + return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit); } static bool IsIdempotent(Intrinsic::ID ID) { @@ -4330,7 +4378,7 @@ static bool maskIsAllZeroOrUndef(Value *Mask) { template static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, - const Query &Q, unsigned MaxRecurse) { + const SimplifyQuery &Q, unsigned MaxRecurse) { Intrinsic::ID IID = F->getIntrinsicID(); unsigned NumOperands = std::distance(ArgBegin, ArgEnd); @@ -4378,19 +4426,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: { // X + undef -> undef - if (isa(RHS)) + if (isa(LHS) || isa(RHS)) return UndefValue::get(ReturnType); return nullptr; } case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: { + // 0 * X -> { 0, false } // X * 0 -> { 0, false } - if (match(RHS, m_Zero())) + if (match(LHS, m_Zero()) || match(RHS, m_Zero())) return Constant::getNullValue(ReturnType); + // undef * X -> { 0, false } // X * undef -> { 0, false } - if (match(RHS, m_Undef())) + if (match(LHS, m_Undef()) || match(RHS, m_Undef())) return Constant::getNullValue(ReturnType); return nullptr; @@ -4423,8 +4473,9 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, } template -static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, - const Query &Q, unsigned MaxRecurse) { +static Value *SimplifyCall(ImmutableCallSite CS, Value *V, IterTy ArgBegin, + IterTy ArgEnd, const SimplifyQuery &Q, + unsigned MaxRecurse) { Type *Ty = V->getType(); if (PointerType *PTy = dyn_cast(Ty)) Ty = PTy->getElementType(); @@ -4443,7 +4494,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, if (Value *Ret = SimplifyIntrinsic(F, ArgBegin, ArgEnd, Q, MaxRecurse)) return Ret; - if (!canConstantFoldCallTo(F)) + if (!canConstantFoldCallTo(CS, F)) return nullptr; SmallVector ConstantArgs; @@ -4455,189 +4506,170 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, ConstantArgs.push_back(C); } - return ConstantFoldCall(F, ConstantArgs, Q.TLI); + return ConstantFoldCall(CS, F, ConstantArgs, Q.TLI); } -Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const DataLayout &DL, - const TargetLibraryInfo *TLI, const DominatorTree *DT, - AssumptionCache *AC, const Instruction *CxtI) { - return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI), - RecursionLimit); +Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, + User::op_iterator ArgBegin, User::op_iterator ArgEnd, + const SimplifyQuery &Q) { + return ::SimplifyCall(CS, V, ArgBegin, ArgEnd, Q, RecursionLimit); } -Value *llvm::SimplifyCall(Value *V, ArrayRef Args, - const DataLayout &DL, const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, - const Instruction *CxtI) { - return ::SimplifyCall(V, Args.begin(), Args.end(), - Query(DL, TLI, DT, AC, CxtI), RecursionLimit); +Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V, + ArrayRef Args, const SimplifyQuery &Q) { + return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit); } /// See if we can compute a simplified version of this instruction. /// If not, this returns null. -Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, AssumptionCache *AC, + +Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { + const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I); Value *Result; switch (I->getOpcode()) { default: - Result = ConstantFoldInstruction(I, DL, TLI); + Result = ConstantFoldInstruction(I, Q.DL, Q.TLI); break; case Instruction::FAdd: Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), - cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FSub: Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), - cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::FMul: Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Mul: - Result = - SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::SDiv: - Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::UDiv: - Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::SRem: - Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::URem: - Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, - AC, I); + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FRem: Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + I->getFastMathFlags(), Q); break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), - cast(I)->hasNoUnsignedWrap(), DL, - TLI, DT, AC, I); + cast(I)->hasNoUnsignedWrap(), Q); break; case Instruction::LShr: Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1), - cast(I)->isExact(), DL, TLI, DT, - AC, I); + cast(I)->isExact(), Q); break; case Instruction::AShr: Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1), - cast(I)->isExact(), DL, TLI, DT, - AC, I); + cast(I)->isExact(), Q); break; case Instruction::And: - Result = - SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Or: - Result = - SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::Xor: - Result = - SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), Q); break; case Instruction::ICmp: - Result = - SimplifyICmpInst(cast(I)->getPredicate(), I->getOperand(0), - I->getOperand(1), DL, TLI, DT, AC, I); + Result = SimplifyICmpInst(cast(I)->getPredicate(), + I->getOperand(0), I->getOperand(1), Q); break; case Instruction::FCmp: - Result = SimplifyFCmpInst(cast(I)->getPredicate(), - I->getOperand(0), I->getOperand(1), - I->getFastMathFlags(), DL, TLI, DT, AC, I); + Result = + SimplifyFCmpInst(cast(I)->getPredicate(), I->getOperand(0), + I->getOperand(1), I->getFastMathFlags(), Q); break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), DL, TLI, DT, AC, I); + I->getOperand(2), Q); break; case Instruction::GetElementPtr: { - SmallVector Ops(I->op_begin(), I->op_end()); + SmallVector Ops(I->op_begin(), I->op_end()); Result = SimplifyGEPInst(cast(I)->getSourceElementType(), - Ops, DL, TLI, DT, AC, I); + Ops, Q); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), DL, TLI, DT, AC, I); + IV->getIndices(), Q); break; } case Instruction::ExtractValue: { auto *EVI = cast(I); Result = SimplifyExtractValueInst(EVI->getAggregateOperand(), - EVI->getIndices(), DL, TLI, DT, AC, I); + EVI->getIndices(), Q); break; } case Instruction::ExtractElement: { auto *EEI = cast(I); - Result = SimplifyExtractElementInst( - EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I); + Result = SimplifyExtractElementInst(EEI->getVectorOperand(), + EEI->getIndexOperand(), Q); break; } case Instruction::ShuffleVector: { auto *SVI = cast(I); Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), - SVI->getMask(), SVI->getType(), DL, TLI, - DT, AC, I); + SVI->getMask(), SVI->getType(), Q); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast(I), Query(DL, TLI, DT, AC, I)); + Result = SimplifyPHINode(cast(I), Q); break; case Instruction::Call: { CallSite CS(cast(I)); - Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL, - TLI, DT, AC, I); + Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + Q); break; } #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: #include "llvm/IR/Instruction.def" #undef HANDLE_CAST_INST - Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), - DL, TLI, DT, AC, I); + Result = + SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), Q); + break; + case Instruction::Alloca: + // No simplifications for Alloca and it can't be constant folded. + Result = nullptr; break; } // In general, it is possible for computeKnownBits to determine all bits in a // value even when the operands are not all constants. if (!Result && I->getType()->isIntOrIntVectorTy()) { - unsigned BitWidth = I->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT, ORE); - if ((KnownZero | KnownOne).isAllOnesValue()) - Result = ConstantInt::get(I->getType(), KnownOne); + KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); + if (Known.isConstant()) + Result = ConstantInt::get(I->getType(), Known.getConstant()); } /// If called on unreachable code, the above logic may report that the @@ -4689,7 +4721,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, I = Worklist[Idx]; // See if this instruction simplifies. - SimpleV = SimplifyInstruction(I, DL, TLI, DT, AC); + SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC}); if (!SimpleV) continue; @@ -4728,3 +4760,31 @@ bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, assert(SimpleV && "Must provide a simplified value."); return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC); } + +namespace llvm { +const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) { + auto *DTWP = P.getAnalysisIfAvailable(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *TLIWP = P.getAnalysisIfAvailable(); + auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; + auto *ACWP = P.getAnalysisIfAvailable(); + auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; + return {F.getParent()->getDataLayout(), TLI, DT, AC}; +} + +const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &AR, + const DataLayout &DL) { + return {DL, &AR.TLI, &AR.DT, &AR.AC}; +} + +template +const SimplifyQuery getBestSimplifyQuery(AnalysisManager &AM, + Function &F) { + auto *DT = AM.template getCachedResult(F); + auto *TLI = AM.template getCachedResult(F); + auto *AC = AM.template getCachedResult(F); + return {F.getParent()->getDataLayout(), TLI, DT, AC}; +} +template const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, + Function &); +} diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt index 15c757b48f76c8a6e0452e009a8cbea5d5600681..8a87b980b0a8964024e1a537116a7118b88808b2 100644 --- a/lib/Analysis/LLVMBuild.txt +++ b/lib/Analysis/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Analysis parent = Libraries -required_libraries = Core Support ProfileData Object +required_libraries = BinaryFormat Core Object ProfileData Support diff --git a/lib/Analysis/LazyBranchProbabilityInfo.cpp b/lib/Analysis/LazyBranchProbabilityInfo.cpp index b51c6beb795928277a674f70d50fd7b109fecb50..e2884d0a45646c295744f571b0a98b91318a4194 100644 --- a/lib/Analysis/LazyBranchProbabilityInfo.cpp +++ b/lib/Analysis/LazyBranchProbabilityInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/LazyBranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; @@ -24,6 +25,7 @@ using namespace llvm; INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE, "Lazy Branch Probability Analysis", true, true) @@ -41,6 +43,7 @@ void LazyBranchProbabilityInfoPass::print(raw_ostream &OS, void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } @@ -48,16 +51,19 @@ void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); } bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) { LoopInfo &LI = getAnalysis().getLoopInfo(); - LBPI = llvm::make_unique(&F, &LI); + TargetLibraryInfo &TLI = getAnalysis().getTLI(); + LBPI = llvm::make_unique(&F, &LI, &TLI); return false; } void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) { AU.addRequired(); AU.addRequired(); + AU.addRequired(); } void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass); INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); + INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass); } diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index eef56815f2e07f515b201a76da8bd9683effdade..b6a9436cc1ec3f8079d47e544f961536200ff1e4 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -8,10 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/Sequence.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/Sequence.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index ad01f7f2f2158e07b9dd12cf314f1de3b38e94f0..3ed61a79478ad48c6d4399418874d1292fe87c41 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -142,7 +142,7 @@ public: return Val; } - ConstantRange getConstantRange() const { + const ConstantRange &getConstantRange() const { assert(isConstantRange() && "Cannot get the constant-range of a non-constant-range!"); return Range; @@ -250,7 +250,7 @@ public: if (NewR.isFullSet()) markOverdefined(); else - markConstantRange(NewR); + markConstantRange(std::move(NewR)); } }; @@ -302,7 +302,7 @@ static bool hasSingleValue(const LVILatticeVal &Val) { /// contradictory. If this happens, we return some valid lattice value so as /// not confuse the rest of LVI. Ideally, we'd always return Undefined, but /// we do not make this guarantee. TODO: This would be a useful enhancement. -static LVILatticeVal intersect(LVILatticeVal A, LVILatticeVal B) { +static LVILatticeVal intersect(const LVILatticeVal &A, const LVILatticeVal &B) { // Undefined is the strongest state. It means the value is known to be along // an unreachable path. if (A.isUndefined()) @@ -364,7 +364,6 @@ namespace { /// This is the cache kept by LazyValueInfo which /// maintains information about queries across the clients' queries. class LazyValueInfoCache { - friend class LazyValueInfoAnnotatedWriter; /// This is all of the cached block information for exactly one Value*. /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. @@ -384,7 +383,6 @@ namespace { /// don't spend time removing unused blocks from our caches. DenseSet > SeenBlocks; - protected: /// This is all of the cached information for all values, /// mapped from Value* to key information. DenseMap> ValueCache; @@ -443,7 +441,6 @@ namespace { return BBI->second; } - void printCache(Function &F, raw_ostream &OS); /// clear - Empty the cache. void clear() { SeenBlocks.clear(); @@ -467,61 +464,6 @@ namespace { }; } - -namespace { - - /// An assembly annotator class to print LazyValueCache information in - /// comments. - class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { - const LazyValueInfoCache* LVICache; - - public: - LazyValueInfoAnnotatedWriter(const LazyValueInfoCache *L) : LVICache(L) {} - - virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, - formatted_raw_ostream &OS) { - auto ODI = LVICache->OverDefinedCache.find(const_cast(BB)); - if (ODI == LVICache->OverDefinedCache.end()) - return; - OS << "; OverDefined values for block are: \n"; - for (auto *V : ODI->second) - OS << ";" << *V << "\n"; - - // Find if there are latticevalues defined for arguments of the function. - auto *F = const_cast(BB->getParent()); - for (auto &Arg : F->args()) { - auto VI = LVICache->ValueCache.find_as(&Arg); - if (VI == LVICache->ValueCache.end()) - continue; - auto BBI = VI->second->BlockVals.find(const_cast(BB)); - if (BBI != VI->second->BlockVals.end()) - OS << "; CachedLatticeValue for: '" << *VI->first << "' is: '" - << BBI->second << "'\n"; - } - } - - virtual void emitInstructionAnnot(const Instruction *I, - formatted_raw_ostream &OS) { - - auto VI = LVICache->ValueCache.find_as(const_cast(I)); - if (VI == LVICache->ValueCache.end()) - return; - OS << "; CachedLatticeValues for: '" << *VI->first << "'\n"; - for (auto &BV : VI->second->BlockVals) { - OS << "; at beginning of BasicBlock: '"; - BV.first->printAsOperand(OS, false); - OS << "' LatticeVal: '" << BV.second << "' \n"; - } - } -}; -} - -void LazyValueInfoCache::printCache(Function &F, raw_ostream &OS) { - LazyValueInfoAnnotatedWriter Writer(this); - F.print(OS, &Writer); - -} - void LazyValueInfoCache::eraseValue(Value *V) { for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) { // Copy and increment the iterator immediately so we can erase behind @@ -615,6 +557,30 @@ void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc, } } + +namespace { +/// An assembly annotator class to print LazyValueCache information in +/// comments. +class LazyValueInfoImpl; +class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter { + LazyValueInfoImpl *LVIImpl; + // While analyzing which blocks we can solve values for, we need the dominator + // information. Since this is an optional parameter in LVI, we require this + // DomTreeAnalysis pass in the printer pass, and pass the dominator + // tree to the LazyValueInfoAnnotatedWriter. + DominatorTree &DT; + +public: + LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree) + : LVIImpl(L), DT(DTree) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS); + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS); +}; +} namespace { // The actual implementation of the lazy analysis and update. Note that the // inheritance from LazyValueInfoCache is intended to be temporary while @@ -662,13 +628,13 @@ namespace { bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB); bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S, BasicBlock *BB); - bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI, BasicBlock *BB); - bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI, + bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI, BasicBlock *BB); void intersectAssumeOrGuardBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV, - Instruction *BBI); + Instruction *BBI); void solve(); @@ -693,9 +659,10 @@ namespace { TheCache.clear(); } - /// Printing the LazyValueInfoCache. - void printCache(Function &F, raw_ostream &OS) { - TheCache.printCache(F, OS); + /// Printing the LazyValueInfo Analysis. + void printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { + LazyValueInfoAnnotatedWriter Writer(this, DTree); + F.print(OS, &Writer); } /// This is part of the update interface to inform the cache @@ -714,6 +681,7 @@ namespace { }; } // end anonymous namespace + void LazyValueInfoImpl::solve() { SmallVector, 8> StartingStack( BlockValueStack.begin(), BlockValueStack.end()); @@ -838,7 +806,7 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, // that for all other pointer typed values, we terminate the search at the // definition. We could easily extend this to look through geps, bitcasts, // and the like to prove non-nullness, but it's not clear that's worth it - // compile time wise. The context-insensative value walk done inside + // compile time wise. The context-insensitive value walk done inside // isKnownNonNull gets most of the profitable cases at much less expense. // This does mean that we have a sensativity to where the defining // instruction is placed, even if it could legally be hoisted much higher. @@ -849,12 +817,12 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res, return true; } if (BBI->getType()->isIntegerTy()) { - if (isa(BBI)) - return solveBlockValueCast(Res, BBI, BB); - + if (auto *CI = dyn_cast(BBI)) + return solveBlockValueCast(Res, CI, BB); + BinaryOperator *BO = dyn_cast(BBI); if (BO && isa(BO->getOperand(1))) - return solveBlockValueBinaryOp(Res, BBI, BB); + return solveBlockValueBinaryOp(Res, BO, BB); } DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -920,7 +888,7 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV, // value is overdefined. if (BB == &BB->getParent()->getEntryBlock()) { assert(isa(Val) && "Unknown live-in to the entry block"); - // Bofore giving up, see if we can prove the pointer non-null local to + // Before giving up, see if we can prove the pointer non-null local to // this particular block. if (Val->getType()->isPointerTy() && (isKnownNonNull(Val) || isObjectDereferencedInBlock(Val, BB))) { @@ -1079,8 +1047,8 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, } if (TrueVal.isConstantRange() && FalseVal.isConstantRange()) { - ConstantRange TrueCR = TrueVal.getConstantRange(); - ConstantRange FalseCR = FalseVal.getConstantRange(); + const ConstantRange &TrueCR = TrueVal.getConstantRange(); + const ConstantRange &FalseCR = FalseVal.getConstantRange(); Value *LHS = nullptr; Value *RHS = nullptr; SelectPatternResult SPR = matchSelectPattern(SI, LHS, RHS); @@ -1168,9 +1136,9 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV, } bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, - Instruction *BBI, - BasicBlock *BB) { - if (!BBI->getOperand(0)->getType()->isSized()) { + CastInst *CI, + BasicBlock *BB) { + if (!CI->getOperand(0)->getType()->isSized()) { // Without knowing how wide the input is, we can't analyze it in any useful // way. BBLV = LVILatticeVal::getOverdefined(); @@ -1180,7 +1148,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Filter out casts we don't know how to reason about before attempting to // recurse on our operand. This can cut a long search short if we know we're // not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + switch (CI->getOpcode()) { case Instruction::Trunc: case Instruction::SExt: case Instruction::ZExt: @@ -1197,44 +1165,43 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, we still apply the // transfer rule on the full set since we may be able to locally infer // interesting facts. - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(CI->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, CI->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(CI->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(CI->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal, + CI); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - const unsigned ResultBitWidth = - cast(BBI->getType())->getBitWidth(); + const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth(); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto CastOp = (Instruction::CastOps) BBI->getOpcode(); - BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth)); + BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(), + ResultBitWidth)); return true; } bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, - Instruction *BBI, + BinaryOperator *BO, BasicBlock *BB) { - assert(BBI->getOperand(0)->getType()->isSized() && + assert(BO->getOperand(0)->getType()->isSized() && "all operands to binary operators are sized"); // Filter out operators we don't know how to reason about before attempting to // recurse on our operand(s). This can cut a long search short if we know - // we're not going to be able to get any useful information anways. - switch (BBI->getOpcode()) { + // we're not going to be able to get any useful information anyways. + switch (BO->getOpcode()) { case Instruction::Add: case Instruction::Sub: case Instruction::Mul: @@ -1256,29 +1223,29 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV, // Figure out the range of the LHS. If that fails, use a conservative range, // but apply the transfer rule anyways. This lets us pick up facts from // expressions like "and i32 (call i32 @foo()), 32" - if (!hasBlockValue(BBI->getOperand(0), BB)) - if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0)))) + if (!hasBlockValue(BO->getOperand(0), BB)) + if (pushBlockValue(std::make_pair(BB, BO->getOperand(0)))) // More work to do before applying this transfer rule. return false; const unsigned OperandBitWidth = - DL.getTypeSizeInBits(BBI->getOperand(0)->getType()); + DL.getTypeSizeInBits(BO->getOperand(0)->getType()); ConstantRange LHSRange = ConstantRange(OperandBitWidth); - if (hasBlockValue(BBI->getOperand(0), BB)) { - LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB); - intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal, - BBI); + if (hasBlockValue(BO->getOperand(0), BB)) { + LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB); + intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal, + BO); if (LHSVal.isConstantRange()) LHSRange = LHSVal.getConstantRange(); } - ConstantInt *RHS = cast(BBI->getOperand(1)); + ConstantInt *RHS = cast(BO->getOperand(1)); ConstantRange RHSRange = ConstantRange(RHS->getValue()); // NOTE: We're currently limited by the set of operations that ConstantRange // can evaluate symbolically. Enhancing that set will allows us to analyze // more definitions. - auto BinOp = (Instruction::BinaryOps) BBI->getOpcode(); + Instruction::BinaryOps BinOp = BO->getOpcode(); BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange)); return true; } @@ -1649,7 +1616,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB, if (Result.isConstant()) return Result.getConstant(); if (Result.isConstantRange()) { - ConstantRange CR = Result.getConstantRange(); + const ConstantRange &CR = Result.getConstantRange(); if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } @@ -1686,7 +1653,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, if (Result.isConstant()) return Result.getConstant(); if (Result.isConstantRange()) { - ConstantRange CR = Result.getConstantRange(); + const ConstantRange &CR = Result.getConstantRange(); if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } @@ -1694,63 +1661,62 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, } static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, - LVILatticeVal &Result, + const LVILatticeVal &Val, const DataLayout &DL, TargetLibraryInfo *TLI) { // If we know the value is a constant, evaluate the conditional. Constant *Res = nullptr; - if (Result.isConstant()) { - Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, - TLI); + if (Val.isConstant()) { + Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI); if (ConstantInt *ResCI = dyn_cast(Res)) return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True; return LazyValueInfo::Unknown; } - if (Result.isConstantRange()) { + if (Val.isConstantRange()) { ConstantInt *CI = dyn_cast(C); if (!CI) return LazyValueInfo::Unknown; - ConstantRange CR = Result.getConstantRange(); + const ConstantRange &CR = Val.getConstantRange(); if (Pred == ICmpInst::ICMP_EQ) { if (!CR.contains(CI->getValue())) return LazyValueInfo::False; - if (CR.isSingleElement() && CR.contains(CI->getValue())) + if (CR.isSingleElement()) return LazyValueInfo::True; } else if (Pred == ICmpInst::ICMP_NE) { if (!CR.contains(CI->getValue())) return LazyValueInfo::True; - if (CR.isSingleElement() && CR.contains(CI->getValue())) + if (CR.isSingleElement()) + return LazyValueInfo::False; + } else { + // Handle more complex predicates. + ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( + (ICmpInst::Predicate)Pred, CI->getValue()); + if (TrueValues.contains(CR)) + return LazyValueInfo::True; + if (TrueValues.inverse().contains(CR)) return LazyValueInfo::False; } - - // Handle more complex predicates. - ConstantRange TrueValues = ConstantRange::makeExactICmpRegion( - (ICmpInst::Predicate)Pred, CI->getValue()); - if (TrueValues.contains(CR)) - return LazyValueInfo::True; - if (TrueValues.inverse().contains(CR)) - return LazyValueInfo::False; return LazyValueInfo::Unknown; } - if (Result.isNotConstant()) { + if (Val.isNotConstant()) { // If this is an equality comparison, we can try to fold it knowing that // "V != C1". if (Pred == ICmpInst::ICMP_EQ) { // !C1 == C -> false iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, DL, + Val.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::False; } else if (Pred == ICmpInst::ICMP_NE) { // !C1 != C -> true iff C1 == C. Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE, - Result.getNotConstant(), C, DL, + Val.getNotConstant(), C, DL, TLI); if (Res->isNullValue()) return LazyValueInfo::True; @@ -1891,12 +1857,65 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) { } -void LazyValueInfo::printCache(Function &F, raw_ostream &OS) { +void LazyValueInfo::printLVI(Function &F, DominatorTree &DTree, raw_ostream &OS) { if (PImpl) { - getImpl(PImpl, AC, DL, DT).printCache(F, OS); + getImpl(PImpl, AC, DL, DT).printLVI(F, DTree, OS); } } +// Print the LVI for the function arguments at the start of each basic block. +void LazyValueInfoAnnotatedWriter::emitBasicBlockStartAnnot( + const BasicBlock *BB, formatted_raw_ostream &OS) { + // Find if there are latticevalues defined for arguments of the function. + auto *F = BB->getParent(); + for (auto &Arg : F->args()) { + LVILatticeVal Result = LVIImpl->getValueInBlock( + const_cast(&Arg), const_cast(BB)); + if (Result.isUndefined()) + continue; + OS << "; LatticeVal for: '" << Arg << "' is: " << Result << "\n"; + } +} + +// This function prints the LVI analysis for the instruction I at the beginning +// of various basic blocks. It relies on calculated values that are stored in +// the LazyValueInfoCache, and in the absence of cached values, recalculte the +// LazyValueInfo for `I`, and print that info. +void LazyValueInfoAnnotatedWriter::emitInstructionAnnot( + const Instruction *I, formatted_raw_ostream &OS) { + + auto *ParentBB = I->getParent(); + SmallPtrSet BlocksContainingLVI; + // We can generate (solve) LVI values only for blocks that are dominated by + // the I's parent. However, to avoid generating LVI for all dominating blocks, + // that contain redundant/uninteresting information, we print LVI for + // blocks that may use this LVI information (such as immediate successor + // blocks, and blocks that contain uses of `I`). + auto printResult = [&](const BasicBlock *BB) { + if (!BlocksContainingLVI.insert(BB).second) + return; + LVILatticeVal Result = LVIImpl->getValueInBlock( + const_cast(I), const_cast(BB)); + OS << "; LatticeVal for: '" << *I << "' in BB: '"; + BB->printAsOperand(OS, false); + OS << "' is: " << Result << "\n"; + }; + + printResult(ParentBB); + // Print the LVI analysis results for the the immediate successor blocks, that + // are dominated by `ParentBB`. + for (auto *BBSucc : successors(ParentBB)) + if (DT.dominates(ParentBB, BBSucc)) + printResult(BBSucc); + + // Print LVI in blocks where `I` is used. + for (auto *U : I->users()) + if (auto *UseI = dyn_cast(U)) + if (!isa(UseI) || DT.dominates(ParentBB, UseI->getParent())) + printResult(UseI->getParent()); + +} + namespace { // Printer class for LazyValueInfo results. class LazyValueInfoPrinter : public FunctionPass { @@ -1909,12 +1928,16 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); } + // Get the mandatory dominator tree analysis and pass this in to the + // LVIPrinter. We cannot rely on the LVI's DT, since it's optional. bool runOnFunction(Function &F) override { dbgs() << "LVI for function '" << F.getName() << "':\n"; auto &LVI = getAnalysis().getLVI(); - LVI.printCache(F, dbgs()); + auto &DTree = getAnalysis().getDomTree(); + LVI.printLVI(F, DTree, dbgs()); return false; } }; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 2ca46b1fe872f1b4dc76c7a6fcf2ca44e414f60e..9713588537b3909b0c19bb106d009ff9678c83a7 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -58,18 +58,19 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -533,11 +534,8 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { - unsigned BitWidth = V->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, - dyn_cast(V), DT); - return KnownZero.isAllOnesValue(); + KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); + return Known.isZero(); } // Per-component check doesn't work with zeroinitializer @@ -550,15 +548,13 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, // For a vector, KnownZero will only be true if all values are zero, so check // this per component - unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { Constant *Elem = C->getAggregateElement(I); if (isa(Elem)) return true; - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Elem, KnownZero, KnownOne, DL); - if (KnownZero.isAllOnesValue()) + KnownBits Known = computeKnownBits(Elem, DL); + if (Known.isZero()) return true; } @@ -699,7 +695,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, // As a last resort, try SimplifyInstruction or constant folding. if (Instruction *Inst = dyn_cast(V)) { - if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) + if (Value *W = SimplifyInstruction(Inst, {*DL, TLI, DT, AC})) return findValueImpl(W, OffsetOk, Visited); } else if (auto *C = dyn_cast(V)) { if (Value *W = ConstantFoldConstant(C, *DL, TLI)) diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 0b5f6266e3737a5aa9d1497b1caaae7e92b2151d..e988f6444a58d08a94f31377ccb23d8e2fcd3e41 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -73,30 +73,23 @@ LPPassManager::LPPassManager() CurrentLoop = nullptr; } -// Inset loop into loop nest (LoopInfo) and loop queue (LQ). -Loop &LPPassManager::addLoop(Loop *ParentLoop) { - // Create a new loop. LI will take ownership. - Loop *L = new Loop(); - - // Insert into the loop nest and the loop queue. - if (!ParentLoop) { +// Insert loop into loop nest (LoopInfo) and loop queue (LQ). +void LPPassManager::addLoop(Loop &L) { + if (!L.getParentLoop()) { // This is the top level loop. - LI->addTopLevelLoop(L); - LQ.push_front(L); - return *L; + LQ.push_front(&L); + return; } - ParentLoop->addChildLoop(L); // Insert L into the loop queue after the parent loop. for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) { - if (*I == L->getParentLoop()) { + if (*I == L.getParentLoop()) { // deque does not support insert after. ++I; - LQ.insert(I, 1, L); - break; + LQ.insert(I, 1, &L); + return; } } - return *L; } /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index e7a85ae06e68151c0bdd651be5a205fdab1dc669..5c0cbb26484c1b74b78d48a302f20fb34a168cbb 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/LLVMContext.h" diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp index fa0cc5a46c2b864d19b291d59071c0044c089d84..4231a78352ce5d43a83f6c1e6f7534ba28e0ae6e 100644 --- a/lib/Analysis/MemDerefPrinter.cpp +++ b/lib/Analysis/MemDerefPrinter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstIterator.h" diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index f99d3b3fbda324da88029db521dacc41c9c2ffc7..7983d62c2f7aac8eaca65068b4374377e342a99a 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -37,6 +37,7 @@ enum AllocType : uint8_t { CallocLike = 1<<2, // allocates + bzero ReallocLike = 1<<3, // reallocates StrDupLike = 1<<4, + MallocOrCallocLike = MallocLike | CallocLike, AllocLike = MallocLike | CallocLike | StrDupLike, AnyAlloc = AllocLike | ReallocLike }; @@ -77,8 +78,8 @@ static const std::pair AllocationFnData[] = { // TODO: Handle "int posix_memalign(void **, size_t, size_t)" }; -static Function *getCalledFunction(const Value *V, bool LookThroughBitCast, - bool &IsNoBuiltin) { +static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast, + bool &IsNoBuiltin) { // Don't care about intrinsics in this case. if (isa(V)) return nullptr; @@ -86,13 +87,13 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast, if (LookThroughBitCast) V = V->stripPointerCasts(); - CallSite CS(const_cast(V)); + ImmutableCallSite CS(V); if (!CS.getInstruction()) return nullptr; IsNoBuiltin = CS.isNoBuiltin(); - Function *Callee = CS.getCalledFunction(); + const Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) return nullptr; return Callee; @@ -183,7 +184,7 @@ static Optional getAllocationSize(const Value *V, static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); - return CS && CS.paramHasAttr(AttributeList::ReturnIndex, Attribute::NoAlias); + return CS && CS.hasRetAttr(Attribute::NoAlias); } @@ -219,6 +220,14 @@ bool llvm::isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, return getAllocationData(V, CallocLike, TLI, LookThroughBitCast).hasValue(); } +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory similiar to malloc or calloc. +bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, MallocOrCallocLike, TLI, + LookThroughBitCast).hasValue(); +} + /// \brief Tests if a value is a call or invoke to a library function that /// allocates memory (either malloc, calloc, or strdup like). bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 66a0d145dcd853e8b5b4d5271427de00e9f3c21a..3fdedbb0ab3c293a18daee4b9286f508ed966877 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -15,17 +15,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/OrderedBasicBlock.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -691,6 +691,7 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( // load query, we can safely ignore it (scan past it). if (isLoad) continue; + LLVM_FALLTHROUGH; default: // Otherwise, there is a potential dependence. Return a clobber. return MemDepResult::getClobber(Inst); diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index 910170561abf6cc5d72eea27a124bc16b6f4d7f3..86d0d92799f22ed6ef6f3bc7cdbfad06a9314458 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -1291,7 +1291,6 @@ void MemorySSA::buildMemorySSA() { // could just look up the memory access for every possible instruction in the // stream. SmallPtrSet DefiningBlocks; - SmallPtrSet DefUseBlocks; // Go through each block, figure out where defs occur, and chain together all // the accesses. for (BasicBlock &B : F) { @@ -1316,8 +1315,6 @@ void MemorySSA::buildMemorySSA() { } if (InsertIntoDef) DefiningBlocks.insert(&B); - if (Accesses) - DefUseBlocks.insert(&B); } placePHINodes(DefiningBlocks, BBNumbers); @@ -1802,6 +1799,15 @@ bool MemorySSA::dominates(const MemoryAccess *Dominator, const static char LiveOnEntryStr[] = "liveOnEntry"; +void MemoryAccess::print(raw_ostream &OS) const { + switch (getValueID()) { + case MemoryPhiVal: return static_cast(this)->print(OS); + case MemoryDefVal: return static_cast(this)->print(OS); + case MemoryUseVal: return static_cast(this)->print(OS); + } + llvm_unreachable("invalid value id"); +} + void MemoryDef::print(raw_ostream &OS) const { MemoryAccess *UO = getDefiningAccess(); @@ -1839,8 +1845,6 @@ void MemoryPhi::print(raw_ostream &OS) const { OS << ')'; } -MemoryAccess::~MemoryAccess() {} - void MemoryUse::print(raw_ostream &OS) const { MemoryAccess *UO = getDefiningAccess(); OS << "MemoryUse("; @@ -1868,7 +1872,6 @@ MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); - AU.addPreserved(); } bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { @@ -1953,6 +1956,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess( #ifdef EXPENSIVE_CHECKS MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q); assert(NewNoCache == New && "Cache made us hand back a different result?"); + (void)NewNoCache; #endif if (AutoResetWalker) resetClobberWalker(); @@ -2057,3 +2061,15 @@ MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( return StartingAccess; } } // namespace llvm + +void MemoryPhi::deleteMe(DerivedUser *Self) { + delete static_cast(Self); +} + +void MemoryDef::deleteMe(DerivedUser *Self) { + delete static_cast(Self); +} + +void MemoryUse::deleteMe(DerivedUser *Self) { + delete static_cast(Self); +} diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index c63677fe5502a893d0730d37bd4565a3f4224a18..1ff84471c094675cfbeea698715faf9b0560d6ef 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalVariable.h" @@ -24,12 +25,11 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Analysis/MemorySSA.h" #include #define DEBUG_TYPE "memoryssa" using namespace llvm; -namespace llvm { + // This is the marker algorithm from "Simple and Efficient Construction of // Static Single Assignment Form" // The simple, non-marker algorithm places phi nodes at any join @@ -124,17 +124,12 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) { return &*Iter; } else { // Otherwise, have to walk the all access iterator. - auto Iter = MA->getReverseIterator(); - ++Iter; - while (&*Iter != &*Defs->begin()) { - if (!isa(*Iter)) - return &*Iter; - --Iter; - } - // At this point it must be pointing at firstdef - assert(&*Iter == &*Defs->begin() && - "Should have hit first def walking backwards"); - return &*Iter; + auto End = MSSA->getWritableBlockAccesses(MA->getBlock())->rend(); + for (auto &U : make_range(++MA->getReverseIterator(), End)) + if (!isa(U)) + return cast(&U); + // Note that if MA comes before Defs->begin(), we won't hit a def. + return nullptr; } } return nullptr; @@ -211,8 +206,8 @@ void MemorySSAUpdater::insertUse(MemoryUse *MU) { } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. -void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, - MemoryAccess *NewDef) { +static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, + MemoryAccess *NewDef) { // Replace any operand with us an incoming block with the new defining // access. int i = MP->getBasicBlockIndex(BB); @@ -415,6 +410,7 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) { } return MA; } + void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { assert(!MSSA->isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); @@ -490,5 +486,3 @@ MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessAfter( ++InsertPt->getIterator()); return NewAccess; } - -} // namespace llvm diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index f675830aa67d9a429a07d2fbb6407f99cb7fb4fd..e12cdf9182c7439df71ed4a607ddcbb5143e79b5 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Passes.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index f6d9a73e4e9a5bf00b468133e570fc7bcc18efc7..095647e1bd20b233507508139db1554f0e03513a 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -37,7 +37,8 @@ using namespace llvm; // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). -static void findRefEdges(const User *CurUser, SetVector &RefEdges, +static void findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, + SetVector &RefEdges, SmallPtrSet &Visited) { SmallVector Worklist; Worklist.push_back(CurUser); @@ -61,7 +62,7 @@ static void findRefEdges(const User *CurUser, SetVector &RefEdges, // the reference set unless it is a callee. Callees are handled // specially by WriteFunction and are added to a separate list. if (!(CS && CS.isCallee(&OI))) - RefEdges.insert(GV); + RefEdges.insert(Index.getOrInsertValueInfo(GV)); continue; } Worklist.push_back(Operand); @@ -198,7 +199,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, if (isa(I)) continue; ++NumInsts; - findRefEdges(&I, RefEdges, Visited); + findRefEdges(Index, &I, RefEdges, Visited); auto CS = ImmutableCallSite(&I); if (!CS) continue; @@ -231,7 +232,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, } // We should have named any anonymous globals assert(CalledFunction->hasName()); - auto ScaledCount = ProfileSummaryInfo::getProfileCount(&I, BFI); + auto ScaledCount = PSI->getProfileCount(&I, BFI); auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI) : CalleeInfo::HotnessType::Unknown; @@ -239,7 +240,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // to record the call edge to the alias in that case. Eventually // an alias summary will be created to associate the alias and // aliasee. - CallGraphEdges[cast(CalledValue)].updateHotness(Hotness); + CallGraphEdges[Index.getOrInsertValueInfo( + cast(CalledValue))] + .updateHotness(Hotness); } else { // Skip inline assembly calls. if (CI && CI->isInlineAsm()) @@ -254,15 +257,16 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, ICallAnalysis.getPromotionCandidatesForInstruction( &I, NumVals, TotalCount, NumCandidates); for (auto &Candidate : CandidateProfileData) - CallGraphEdges[Candidate.Value].updateHotness( - getHotness(Candidate.Count, PSI)); + CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)] + .updateHotness(getHotness(Candidate.Count, PSI)); } } // Explicit add hot edges to enforce importing for designated GUIDs for // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) - CallGraphEdges[I].updateHotness(CalleeInfo::HotnessType::Hot); + CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness( + CalleeInfo::HotnessType::Hot); bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = @@ -271,7 +275,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // FIXME: refactor this to use the same code that inliner is using. F.isVarArg(); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, - /* LiveRoot = */ false); + /* Live = */ false); auto FuncSummary = llvm::make_unique( Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), @@ -288,10 +292,10 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, DenseSet &CantBePromoted) { SetVector RefEdges; SmallPtrSet Visited; - findRefEdges(&V, RefEdges, Visited); + findRefEdges(Index, &V, RefEdges, Visited); bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto GVarSummary = llvm::make_unique(Flags, RefEdges.takeVector()); if (NonRenamableLocal) @@ -304,7 +308,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, DenseSet &CantBePromoted) { bool NonRenamableLocal = isNonRenamableLocal(A); GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal, - /* LiveRoot = */ false); + /* Live = */ false); auto AS = llvm::make_unique(Flags, ArrayRef{}); auto *Aliasee = A.getBaseObject(); auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee); @@ -317,18 +321,16 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A, // Set LiveRoot flag on entries matching the given value name. static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) { - auto SummaryList = - Index.findGlobalValueSummaryList(GlobalValue::getGUID(Name)); - if (SummaryList == Index.end()) - return; - for (auto &Summary : SummaryList->second) - Summary->setLiveRoot(); + if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name))) + for (auto &Summary : VI.getSummaryList()) + Summary->setLive(true); } ModuleSummaryIndex llvm::buildModuleSummaryIndex( const Module &M, std::function GetBFICallback, ProfileSummaryInfo *PSI) { + assert(PSI); ModuleSummaryIndex Index; // Identify the local values in the llvm.used and llvm.compiler.used sets, @@ -421,8 +423,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( return; assert(GV->isDeclaration() && "Def in module asm already has definition"); GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage, - /* NotEligibleToImport */ true, - /* LiveRoot */ true); + /* NotEligibleToImport = */ true, + /* Live = */ true); CantBePromoted.insert(GlobalValue::getGUID(Name)); // Create the appropriate summary type. if (isa(GV)) { @@ -445,19 +447,27 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( }); } + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + for (auto &GlobalList : Index) { - assert(GlobalList.second.size() == 1 && + // Ignore entries for references that are undefined in the current module. + if (GlobalList.second.SummaryList.empty()) + continue; + + assert(GlobalList.second.SummaryList.size() == 1 && "Expected module's index to have one summary per GUID"); - auto &Summary = GlobalList.second[0]; + auto &Summary = GlobalList.second.SummaryList[0]; + if (!IsThinLTO) { + Summary->setNotEligibleToImport(); + continue; + } + bool AllRefsCanBeExternallyReferenced = llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) { - // If a global value definition references an unnamed global, - // be conservative. They're valid IR so we don't want to crash - // when we encounter any of them but they're infrequent enough - // that we don't bother optimizing them. - if (!VI.getValue()->hasName()) - return false; - return !CantBePromoted.count(VI.getValue()->getGUID()); + return !CantBePromoted.count(VI.getGUID()); }); if (!AllRefsCanBeExternallyReferenced) { Summary->setNotEligibleToImport(); @@ -467,9 +477,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( if (auto *FuncSummary = dyn_cast(Summary.get())) { bool AllCallsCanBeExternallyReferenced = llvm::all_of( FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) { - auto GUID = Edge.first.isGUID() ? Edge.first.getGUID() - : Edge.first.getValue()->getGUID(); - return !CantBePromoted.count(GUID); + return !CantBePromoted.count(Edge.first.getGUID()); }); if (!AllCallsCanBeExternallyReferenced) Summary->setNotEligibleToImport(); diff --git a/lib/Analysis/ObjCARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp index 1e75c0824d031e6a70dcbd773f4e617478da35e2..f374dd33f86f6bc3a91cbb98d3bb1fcfa9fc1170 100644 --- a/lib/Analysis/ObjCARCInstKind.cpp +++ b/lib/Analysis/ObjCARCInstKind.cpp @@ -20,8 +20,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ObjCARCInstKind.h" -#include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp index 73245981b0228a10828cc4bf4bf36f9260b7cd8d..e38e530c052d2e450bb652e93ddc88d5a2aa9d13 100644 --- a/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp @@ -101,7 +101,7 @@ void MappingTraits::mapping( // These are read-only for now. DiagnosticLocation DL = OptDiag->getLocation(); StringRef FN = - GlobalValue::getRealLinkageName(OptDiag->getFunction().getName()); + GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); StringRef PassName(OptDiag->PassName); io.mapRequired("Pass", PassName); diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp index 0f0016f22cc0a79849f4c443109604ba5280fd66..a04c0aef04beaa7cc74a82f629db0d13f461b17a 100644 --- a/lib/Analysis/OrderedBasicBlock.cpp +++ b/lib/Analysis/OrderedBasicBlock.cpp @@ -55,7 +55,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A, assert(II != IE && "Instruction not found?"); assert((Inst == A || Inst == B) && "Should find A or B"); LastInstFound = II; - return Inst == A; + return Inst != B; } /// \brief Find out whether \p A dominates \p B, meaning whether \p A diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 84ecd4ab980976342666019666615952224f3fee..682af4dc708e5aad102f14f440d8f808d868f180 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // Simplify the GEP to handle 'gep x, 0' -> x etc. if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), - GEPOps, DL, TLI, DT, AC)) { + GEPOps, {DL, TLI, DT, AC})) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); @@ -276,7 +276,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, } // See if the add simplifies away. - if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT, AC)) { + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, {DL, TLI, DT, AC})) { // If we simplified the operands, the LHS is no longer an input, but Res // is. RemoveInstInputs(LHS, InstInputs); diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp index 1a53a8ed428377f3067459ee03ee06cd1063da11..12b86daa602be02df4475075edbb38e93279abd2 100644 --- a/lib/Analysis/ProfileSummaryInfo.cpp +++ b/lib/Analysis/ProfileSummaryInfo.cpp @@ -75,11 +75,14 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst, return None; assert((isa(Inst) || isa(Inst)) && "We can only get profile count for call/invoke instruction."); - // Check if there is a profile metadata on the instruction. If it is present, - // determine hotness solely based on that. - uint64_t TotalCount; - if (Inst->extractProfTotalWeight(TotalCount)) - return TotalCount; + if (hasSampleProfile()) { + // In sample PGO mode, check if there is a profile metadata on the + // instruction. If it is present, determine hotness solely based on that, + // since the sampled entry count may not be accurate. + uint64_t TotalCount; + if (Inst->extractProfTotalWeight(TotalCount)) + return TotalCount; + } if (BFI) return BFI->getBlockProfileCount(Inst->getParent()); return None; diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 82107cb1802518383c7b2edfab87f37322899194..b38e6225c8403376a3cc420a3e724a62741a9f86 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionPass.h" #include "llvm/Analysis/RegionIterator.h" +#include "llvm/IR/OptBisect.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" @@ -280,3 +281,18 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O, const std::string &Banner) const { return new PrintRegionPass(Banner, O); } + +bool RegionPass::skipRegion(Region &R) const { + Function &F = *R.getEntry()->getParent(); + if (!F.getContext().getOptBisect().shouldRunPass(this, R)) + return true; + + if (F.hasFnAttribute(Attribute::OptimizeNone)) { + // Report this only once per function. + if (R.getEntry() == &F.getEntryBlock()) + DEBUG(dbgs() << "Skipping pass '" << getPassName() + << "' on function " << F.getName() << "\n"); + return true; + } + return false; +} diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 30a4e011060e9bbf9560a1fca28494f57f7bd01f..5986b8c4e0c30aa3181cf11e678d92edb5fbc8ca 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -9,14 +9,14 @@ // Print out the region tree of a function using dotty/graphviz. //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/RegionPrinter.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/DOTGraphTraitsPass.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/RegionPrinter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index ca32cf3c7c34292d9d4b7429e832b64f862ff83b..aebc80a0a88512b48aec3f0f9877bb24cb2e09c0 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -89,9 +89,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -148,9 +149,9 @@ static cl::opt MaxValueCompareDepth( cl::init(2)); static cl::opt - MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden, - cl::desc("Maximum depth of recursive AddExpr"), - cl::init(32)); + MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, + cl::desc("Maximum depth of recursive arithmetics"), + cl::init(32)); static cl::opt MaxConstantEvolvingDepth( "scalar-evolution-max-constant-evolving-depth", cl::Hidden, @@ -583,7 +584,7 @@ CompareValueComplexity(SmallSet, 8> &EqCache, static int CompareSCEVComplexity( SmallSet, 8> &EqCacheSCEV, const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, - unsigned Depth = 0) { + DominatorTree &DT, unsigned Depth = 0) { // Fast-path: SCEVs are uniqued so we can do a quick equality check. if (LHS == RHS) return 0; @@ -628,12 +629,19 @@ static int CompareSCEVComplexity( const SCEVAddRecExpr *LA = cast(LHS); const SCEVAddRecExpr *RA = cast(RHS); - // Compare addrec loop depths. + // There is always a dominance between two recs that are used by one SCEV, + // so we can safely sort recs by loop header dominance. We require such + // order in getAddExpr. const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); if (LLoop != RLoop) { - unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth(); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; + const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader(); + assert(LHead != RHead && "Two loops share the same header?"); + if (DT.dominates(LHead, RHead)) + return 1; + else + assert(DT.dominates(RHead, LHead) && + "No dominance between recurrences used by one SCEV?"); + return -1; } // Addrec complexity grows with operand count. @@ -644,7 +652,7 @@ static int CompareSCEVComplexity( // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i), - RA->getOperand(i), Depth + 1); + RA->getOperand(i), DT, Depth + 1); if (X != 0) return X; } @@ -668,7 +676,7 @@ static int CompareSCEVComplexity( if (i >= RNumOps) return 1; int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i), - RC->getOperand(i), Depth + 1); + RC->getOperand(i), DT, Depth + 1); if (X != 0) return X; } @@ -682,10 +690,10 @@ static int CompareSCEVComplexity( // Lexicographically compare udiv expressions. int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(), - Depth + 1); + DT, Depth + 1); if (X != 0) return X; - X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), + X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(), DT, Depth + 1); if (X == 0) EqCacheSCEV.insert({LHS, RHS}); @@ -700,7 +708,7 @@ static int CompareSCEVComplexity( // Compare cast expressions by operand. int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(), - RC->getOperand(), Depth + 1); + RC->getOperand(), DT, Depth + 1); if (X == 0) EqCacheSCEV.insert({LHS, RHS}); return X; @@ -723,7 +731,7 @@ static int CompareSCEVComplexity( /// land in memory. /// static void GroupByComplexity(SmallVectorImpl &Ops, - LoopInfo *LI) { + LoopInfo *LI, DominatorTree &DT) { if (Ops.size() < 2) return; // Noop SmallSet, 8> EqCache; @@ -731,15 +739,16 @@ static void GroupByComplexity(SmallVectorImpl &Ops, // This is the common case, which also happens to be trivially simple. // Special case it. const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; - if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0) + if (CompareSCEVComplexity(EqCache, LI, RHS, LHS, DT) < 0) std::swap(LHS, RHS); return; } // Do the rough sort by complexity. std::stable_sort(Ops.begin(), Ops.end(), - [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) { - return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0; + [&EqCache, LI, &DT](const SCEV *LHS, const SCEV *RHS) { + return + CompareSCEVComplexity(EqCache, LI, LHS, RHS, DT) < 0; }); // Now that we are sorted by complexity, group elements of the same @@ -1093,7 +1102,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, APInt Mult(W, i); unsigned TwoFactors = Mult.countTrailingZeros(); T += TwoFactors; - Mult = Mult.lshr(TwoFactors); + Mult.lshrInPlace(TwoFactors); OddFactorial *= Mult; } @@ -1276,7 +1285,8 @@ static const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, namespace { struct ExtendOpTraitsBase { - typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *); + typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)( + const SCEV *, Type *, ScalarEvolution::ExtendCacheTy &Cache); }; // Used to make code generic over signed and unsigned overflow. @@ -1305,8 +1315,9 @@ struct ExtendOpTraits : public ExtendOpTraitsBase { } }; -const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< - SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; +const ExtendOpTraitsBase::GetExtendExprTy + ExtendOpTraits::GetExtendExpr = + &ScalarEvolution::getSignExtendExprCached; template <> struct ExtendOpTraits : public ExtendOpTraitsBase { @@ -1321,8 +1332,9 @@ struct ExtendOpTraits : public ExtendOpTraitsBase { } }; -const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< - SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; +const ExtendOpTraitsBase::GetExtendExprTy + ExtendOpTraits::GetExtendExpr = + &ScalarEvolution::getZeroExtendExprCached; } // The recurrence AR has been shown to have no signed/unsigned wrap or something @@ -1334,7 +1346,8 @@ const ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< // "sext/zext(PostIncAR)" template static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, - ScalarEvolution *SE) { + ScalarEvolution *SE, + ScalarEvolution::ExtendCacheTy &Cache) { auto WrapType = ExtendOpTraits::WrapType; auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; @@ -1381,9 +1394,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); const SCEV *OperandExtendedStart = - SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy), - (SE->*GetExtendExpr)(Step, WideTy)); - if ((SE->*GetExtendExpr)(Start, WideTy) == OperandExtendedStart) { + SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Cache), + (SE->*GetExtendExpr)(Step, WideTy, Cache)); + if ((SE->*GetExtendExpr)(Start, WideTy, Cache) == OperandExtendedStart) { if (PreAR && AR->getNoWrapFlags(WrapType)) { // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then @@ -1408,15 +1421,17 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, // Get the normalized zero or sign extended expression for this AddRec's Start. template static const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, - ScalarEvolution *SE) { + ScalarEvolution *SE, + ScalarEvolution::ExtendCacheTy &Cache) { auto GetExtendExpr = ExtendOpTraits::GetExtendExpr; - const SCEV *PreStart = getPreStartForExtend(AR, Ty, SE); + const SCEV *PreStart = getPreStartForExtend(AR, Ty, SE, Cache); if (!PreStart) - return (SE->*GetExtendExpr)(AR->getStart(), Ty); + return (SE->*GetExtendExpr)(AR->getStart(), Ty, Cache); - return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty), - (SE->*GetExtendExpr)(PreStart, Ty)); + return SE->getAddExpr( + (SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, Cache), + (SE->*GetExtendExpr)(PreStart, Ty, Cache)); } // Try to prove away overflow by looking at "nearby" add recurrences. A @@ -1496,8 +1511,31 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, return false; } -const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, - Type *Ty) { +const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty) { + // Use the local cache to prevent exponential behavior of + // getZeroExtendExprImpl. + ExtendCacheTy Cache; + return getZeroExtendExprCached(Op, Ty, Cache); +} + +/// Query \p Cache before calling getZeroExtendExprImpl. If there is no +/// related entry in the \p Cache, call getZeroExtendExprImpl and save +/// the result in the \p Cache. +const SCEV *ScalarEvolution::getZeroExtendExprCached(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { + auto It = Cache.find({Op, Ty}); + if (It != Cache.end()) + return It->second; + const SCEV *ZExt = getZeroExtendExprImpl(Op, Ty, Cache); + auto InsertResult = Cache.insert({{Op, Ty}, ZExt}); + assert(InsertResult.second && "Expect the key was not in the cache"); + (void)InsertResult; + return ZExt; +} + +/// The real implementation of getZeroExtendExpr. +const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1507,11 +1545,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getZExt(SC->getValue(), Ty))); + cast(ConstantExpr::getZExt(SC->getValue(), Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) - return getZeroExtendExpr(SZ->getOperand(), Ty); + return getZeroExtendExprCached(SZ->getOperand(), Ty, Cache); // Before doing any expensive analysis, check to see if we've already // computed a SCEV for this Op and Ty. @@ -1555,8 +1593,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // we don't need to do any further analysis. if (AR->hasNoUnsignedWrap()) return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1581,21 +1619,22 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy); - const SCEV *WideStart = getZeroExtendExpr(Start, WideTy); + const SCEV *ZAdd = + getZeroExtendExprCached(getAddExpr(Start, ZMul), WideTy, Cache); + const SCEV *WideStart = getZeroExtendExprCached(Start, WideTy, Cache); const SCEV *WideMaxBECount = - getZeroExtendExpr(CastedMaxBECount, WideTy); - const SCEV *OperandExtendedAdd = - getAddExpr(WideStart, - getMulExpr(WideMaxBECount, - getZeroExtendExpr(Step, WideTy))); + getZeroExtendExprCached(CastedMaxBECount, WideTy, Cache); + const SCEV *OperandExtendedAdd = getAddExpr( + WideStart, getMulExpr(WideMaxBECount, getZeroExtendExprCached( + Step, WideTy, Cache))); if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. @@ -1609,7 +1648,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), + getExtendAddRecStart(AR, Ty, this, Cache), getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1641,8 +1680,9 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - @@ -1657,7 +1697,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), + getExtendAddRecStart(AR, Ty, this, Cache), getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1666,8 +1706,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, if (proveNoWrapByVaryingStart(Start, Step, L)) { const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Cache), + getZeroExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); } } @@ -1678,7 +1718,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // commute the zero extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) - Ops.push_back(getZeroExtendExpr(Op, Ty)); + Ops.push_back(getZeroExtendExprCached(Op, Ty, Cache)); return getAddExpr(Ops, SCEV::FlagNUW); } } @@ -1692,8 +1732,31 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, return S; } -const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, - Type *Ty) { +const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty) { + // Use the local cache to prevent exponential behavior of + // getSignExtendExprImpl. + ExtendCacheTy Cache; + return getSignExtendExprCached(Op, Ty, Cache); +} + +/// Query \p Cache before calling getSignExtendExprImpl. If there is no +/// related entry in the \p Cache, call getSignExtendExprImpl and save +/// the result in the \p Cache. +const SCEV *ScalarEvolution::getSignExtendExprCached(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { + auto It = Cache.find({Op, Ty}); + if (It != Cache.end()) + return It->second; + const SCEV *SExt = getSignExtendExprImpl(Op, Ty, Cache); + auto InsertResult = Cache.insert({{Op, Ty}, SExt}); + assert(InsertResult.second && "Expect the key was not in the cache"); + (void)InsertResult; + return SExt; +} + +/// The real implementation of getSignExtendExpr. +const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty, + ExtendCacheTy &Cache) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -1703,11 +1766,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getSExt(SC->getValue(), Ty))); + cast(ConstantExpr::getSExt(SC->getValue(), Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) - return getSignExtendExpr(SS->getOperand(), Ty); + return getSignExtendExprCached(SS->getOperand(), Ty, Cache); // sext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) @@ -1746,8 +1809,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) - return getAddExpr(getSignExtendExpr(SC1, Ty), - getSignExtendExpr(SMul, Ty)); + return getAddExpr(getSignExtendExprCached(SC1, Ty, Cache), + getSignExtendExprCached(SMul, Ty, Cache)); } } } @@ -1758,7 +1821,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // commute the sign extension with the addition operation. SmallVector Ops; for (const auto *Op : SA->operands()) - Ops.push_back(getSignExtendExpr(Op, Ty)); + Ops.push_back(getSignExtendExprCached(Op, Ty, Cache)); return getAddExpr(Ops, SCEV::FlagNSW); } } @@ -1782,8 +1845,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // we don't need to do any further analysis. if (AR->hasNoSignedWrap()) return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, SCEV::FlagNSW); + getExtendAddRecStart(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1808,21 +1871,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy); - const SCEV *WideStart = getSignExtendExpr(Start, WideTy); + const SCEV *SAdd = + getSignExtendExprCached(getAddExpr(Start, SMul), WideTy, Cache); + const SCEV *WideStart = getSignExtendExprCached(Start, WideTy, Cache); const SCEV *WideMaxBECount = - getZeroExtendExpr(CastedMaxBECount, WideTy); - const SCEV *OperandExtendedAdd = - getAddExpr(WideStart, - getMulExpr(WideMaxBECount, - getSignExtendExpr(Step, WideTy))); + getZeroExtendExpr(CastedMaxBECount, WideTy); + const SCEV *OperandExtendedAdd = getAddExpr( + WideStart, getMulExpr(WideMaxBECount, getSignExtendExprCached( + Step, WideTy, Cache))); if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. @@ -1843,7 +1907,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Return the expression with the addrec on the outside. return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), + getExtendAddRecStart(AR, Ty, this, Cache), getZeroExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); } } @@ -1875,8 +1939,9 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, + AR->getNoWrapFlags()); } } @@ -1890,18 +1955,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, const APInt &C2 = SC2->getAPInt(); if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && C2.isPowerOf2()) { - Start = getSignExtendExpr(Start, Ty); + Start = getSignExtendExprCached(Start, Ty, Cache); const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, AR->getNoWrapFlags()); - return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); + return getAddExpr(Start, getSignExtendExprCached(NewAR, Ty, Cache)); } } if (proveNoWrapByVaryingStart(Start, Step, L)) { const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); return getAddRecExpr( - getExtendAddRecStart(AR, Ty, this), - getSignExtendExpr(Step, Ty), L, AR->getNoWrapFlags()); + getExtendAddRecStart(AR, Ty, this, Cache), + getSignExtendExprCached(Step, Ty, Cache), L, AR->getNoWrapFlags()); } } @@ -2113,6 +2178,62 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, return Flags; } +bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { + if (!isLoopInvariant(S, L)) + return false; + // If a value depends on a SCEVUnknown which is defined after the loop, we + // conservatively assume that we cannot calculate it at the loop's entry. + struct FindDominatedSCEVUnknown { + bool Found = false; + const Loop *L; + DominatorTree &DT; + LoopInfo &LI; + + FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI) + : L(L), DT(DT), LI(LI) {} + + bool checkSCEVUnknown(const SCEVUnknown *SU) { + if (auto *I = dyn_cast(SU->getValue())) { + if (DT.dominates(L->getHeader(), I->getParent())) + Found = true; + else + assert(DT.dominates(I->getParent(), L->getHeader()) && + "No dominance relationship between SCEV and loop?"); + } + return false; + } + + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scConstant: + return false; + case scAddRecExpr: + case scTruncate: + case scZeroExtend: + case scSignExtend: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUDivExpr: + return true; + case scUnknown: + return checkSCEVUnknown(cast(S)); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + return false; + } + + bool isDone() { return Found; } + }; + + FindDominatedSCEVUnknown FSU(L, DT, LI); + SCEVTraversal ST(FSU); + ST.visitAll(S); + return !FSU.Found; +} + /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, @@ -2129,7 +2250,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); @@ -2155,8 +2276,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, if (Ops.size() == 1) return Ops[0]; } - // Limit recursion calls depth - if (Depth > MaxAddExprDepth) + // Limit recursion calls depth. + if (Depth > MaxArithDepth) return getOrCreateAddExpr(Ops, Flags); // Okay, check to see if the same value occurs in the operand list more than @@ -2172,7 +2293,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, ++Count; // Merge the values into a multiply. const SCEV *Scale = getConstant(Ty, Count); - const SCEV *Mul = getMulExpr(Scale, Ops[i]); + const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == Count) return Mul; Ops[i] = Mul; @@ -2222,7 +2343,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, } } if (Ok) - LargeOps.push_back(getMulExpr(LargeMulOps)); + LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1)); } else { Ok = false; break; @@ -2296,7 +2417,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, if (MulOp.first != 0) Ops.push_back(getMulExpr( getConstant(MulOp.first), - getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1))); + getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1)); if (Ops.empty()) return getZero(Ty); if (Ops.size() == 1) @@ -2324,11 +2446,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SmallVector MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul = getMulExpr(MulOps); + InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {getOne(Ty), InnerMul}; const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); - const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV, + SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); @@ -2357,19 +2480,20 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SmallVector MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul1 = getMulExpr(MulOps); + InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector MulOps(OtherMul->op_begin(), OtherMul->op_begin()+OMulOp); MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); - InnerMul2 = getMulExpr(MulOps); + InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); } SmallVector TwoOps = {InnerMul1, InnerMul2}; const SCEV *InnerMulSum = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); - const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum, + SCEV::FlagAnyWrap, Depth + 1); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); @@ -2394,7 +2518,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -2435,32 +2559,40 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // added together. If so, we can fold them. for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) + ++OtherIdx) { + // We expect the AddRecExpr's to be sorted in reverse dominance order, + // so that the 1st found AddRecExpr is dominated by all others. + assert(DT.dominates( + cast(Ops[OtherIdx])->getLoop()->getHeader(), + AddRec->getLoop()->getHeader()) && + "AddRecExprs are not sorted in reverse dominance order?"); if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} SmallVector AddRecOps(AddRec->op_begin(), AddRec->op_end()); for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) - if (const auto *OtherAddRec = dyn_cast(Ops[OtherIdx])) - if (OtherAddRec->getLoop() == AddRecLoop) { - for (unsigned i = 0, e = OtherAddRec->getNumOperands(); - i != e; ++i) { - if (i >= AddRecOps.size()) { - AddRecOps.append(OtherAddRec->op_begin()+i, - OtherAddRec->op_end()); - break; - } - SmallVector TwoOps = { - AddRecOps[i], OtherAddRec->getOperand(i)}; - AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); + ++OtherIdx) { + const auto *OtherAddRec = cast(Ops[OtherIdx]); + if (OtherAddRec->getLoop() == AddRecLoop) { + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); + i != e; ++i) { + if (i >= AddRecOps.size()) { + AddRecOps.append(OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; } - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + SmallVector TwoOps = { + AddRecOps[i], OtherAddRec->getOperand(i)}; + AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); } + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + } + } // Step size has changed, so we cannot guarantee no self-wraparound. Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } + } // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2492,6 +2624,27 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl &Ops, return S; } +const SCEV * +ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl &Ops, + SCEV::NoWrapFlags Flags) { + FoldingSetNodeID ID; + ID.AddInteger(scMulExpr); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + SCEVMulExpr *S = + static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); + UniqueSCEVs.InsertNode(S, IP); + } + S->setNoWrapFlags(Flags); + return S; +} + static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { uint64_t k = i*j; if (j > 1 && k / j != i) Overflow = true; @@ -2544,7 +2697,8 @@ static bool containsConstantSomewhere(const SCEV *StartExpr) { /// Get a canonical multiply expression, or something simpler if possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); @@ -2557,10 +2711,14 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); + // Limit recursion calls depth. + if (Depth > MaxArithDepth) + return getOrCreateMulExpr(Ops, Flags); + // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { @@ -2572,8 +2730,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // apply this transformation as well. if (Add->getNumOperands() == 2) if (containsConstantSomewhere(Add)) - return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)), - getMulExpr(LHSC, Add->getOperand(1))); + return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), + SCEV::FlagAnyWrap, Depth + 1), + getMulExpr(LHSC, Add->getOperand(1), + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); ++Idx; while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { @@ -2601,17 +2762,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, SmallVector NewOps; bool AnyFolded = false; for (const SCEV *AddOp : Add->operands()) { - const SCEV *Mul = getMulExpr(Ops[0], AddOp); + const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap, + Depth + 1); if (!isa(Mul)) AnyFolded = true; NewOps.push_back(Mul); } if (AnyFolded) - return getAddExpr(NewOps); + return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1); } else if (const auto *AddRec = dyn_cast(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector Operands; for (const SCEV *AddRecOp : AddRec->operands()) - Operands.push_back(getMulExpr(Ops[0], AddRecOp)); + Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap, + Depth + 1)); return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); @@ -2633,18 +2796,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, while (const SCEVMulExpr *Mul = dyn_cast(Ops[Idx])) { if (Ops.size() > MulOpsInlineThreshold) break; - // If we have an mul, expand the mul operands onto the end of the operands - // list. + // If we have an mul, expand the mul operands onto the end of the + // operands list. Ops.erase(Ops.begin()+Idx); Ops.append(Mul->op_begin(), Mul->op_end()); DeletedMul = true; } - // If we deleted at least one mul, we added operands to the end of the list, - // and they are not necessarily sorted. Recurse to resort and resimplify - // any operands we just acquired. + // If we deleted at least one mul, we added operands to the end of the + // list, and they are not necessarily sorted. Recurse to resort and + // resimplify any operands we just acquired. if (DeletedMul) - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } // If there are any add recurrences in the operands list, see if any other @@ -2655,13 +2818,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // Scan over all recurrences, trying to fold loop invariants into them. for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { - // Scan all of the other operands to this mul and add them to the vector if - // they are loop invariant w.r.t. the recurrence. + // Scan all of the other operands to this mul and add them to the vector + // if they are loop invariant w.r.t. the recurrence. SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -2672,9 +2835,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} SmallVector NewOps; NewOps.reserve(AddRec->getNumOperands()); - const SCEV *Scale = getMulExpr(LIOps); + const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1); for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) - NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i), + SCEV::FlagAnyWrap, Depth + 1)); // Build the new addrec. Propagate the NUW and NSW flags if both the // outer mul and the inner addrec are guaranteed to have no overflow. @@ -2693,12 +2857,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, Ops[i] = NewRec; break; } - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } - // Okay, if there weren't any loop invariants to be folded, check to see if - // there are multiple AddRec's with the same loop induction variable being - // multiplied together. If so, we can fold them. + // Okay, if there weren't any loop invariants to be folded, check to see + // if there are multiple AddRec's with the same loop induction variable + // being multiplied together. If so, we can fold them. // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ @@ -2740,7 +2904,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, const SCEV *CoeffTerm = getConstant(Ty, Coeff); const SCEV *Term1 = AddRec->getOperand(y-z); const SCEV *Term2 = OtherAddRec->getOperand(z); - Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1, Term2, + SCEV::FlagAnyWrap, Depth + 1), + SCEV::FlagAnyWrap, Depth + 1); } } AddRecOps.push_back(Term); @@ -2758,7 +2924,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, } } if (OpsModified) - return getMulExpr(Ops); + return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); // Otherwise couldn't fold anything into this recurrence. Move onto the // next one. @@ -2766,22 +2932,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // Okay, it looks like we really DO need an mul expr. Check to see if we // already have one, otherwise create a new one. - FoldingSetNodeID ID; - ID.AddInteger(scMulExpr); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); - void *IP = nullptr; - SCEVMulExpr *S = - static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); - if (!S) { - const SCEV **O = SCEVAllocator.Allocate(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); - S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), - O, Ops.size()); - UniqueSCEVs.InsertNode(S, IP); - } - S->setNoWrapFlags(Flags); - return S; + return getOrCreateMulExpr(Ops, Flags); } /// Get a canonical unsigned division expression, or something simpler if @@ -2913,7 +3064,7 @@ static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { else if (ABW < BBW) A = A.zext(BBW); - return APIntOps::GreatestCommonDivisor(A, B); + return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B)); } /// Get a canonical unsigned division expression, or something simpler if @@ -3154,7 +3305,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3255,7 +3406,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { #endif // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI); + GroupByComplexity(Ops, &LI, DT); // If there are any constants, fold them together. unsigned Idx = 0; @@ -3584,7 +3735,8 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { } const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { // Fast path: X - X --> 0. if (LHS == RHS) return getZero(LHS->getType()); @@ -3618,7 +3770,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, // larger scope than intended. auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags); + return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); } const SCEV * @@ -3812,7 +3964,7 @@ public: : SCEVRewriteVisitor(SE), L(L), Valid(true) {} const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + if (!SE.isLoopInvariant(Expr, L)) Valid = false; return Expr; } @@ -3846,7 +3998,7 @@ public: const SCEV *visitUnknown(const SCEVUnknown *Expr) { // Only allow AddRecExprs for this loop. - if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) + if (!SE.isLoopInvariant(Expr, L)) Valid = false; return Expr; } @@ -3951,9 +4103,9 @@ static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { case Instruction::Xor: if (auto *RHSC = dyn_cast(Op->getOperand(1))) - // If the RHS of the xor is a signbit, then this is just an add. - // Instcombine turns add of signbit into xor as a strength reduction step. - if (RHSC->getValue().isSignBit()) + // If the RHS of the xor is a signmask, then this is just an add. + // Instcombine turns add of signmask into xor as a strength reduction step. + if (RHSC->getValue().isSignMask()) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); return BinaryOp(Op); @@ -4026,6 +4178,56 @@ static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { return None; } +/// A helper function for createAddRecFromPHI to handle simple cases. +/// +/// This function tries to find an AddRec expression for the simplest (yet most +/// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)). +/// If it fails, createAddRecFromPHI will use a more general, but slow, +/// technique for finding the AddRec expression. +const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, + Value *BEValueV, + Value *StartValueV) { + const Loop *L = LI.getLoopFor(PN->getParent()); + assert(L && L->getHeader() == PN->getParent()); + assert(BEValueV && StartValueV); + + auto BO = MatchBinaryOp(BEValueV, DT); + if (!BO) + return nullptr; + + if (BO->Opcode != Instruction::Add) + return nullptr; + + const SCEV *Accum = nullptr; + if (BO->LHS == PN && L->isLoopInvariant(BO->RHS)) + Accum = getSCEV(BO->RHS); + else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS)) + Accum = getSCEV(BO->LHS); + + if (!Accum) + return nullptr; + + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + if (BO->IsNUW) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (BO->IsNSW) + Flags = setFlags(Flags, SCEV::FlagNSW); + + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + + // We can add Flags to the post-inc expression only if we + // know that it is *undefined behavior* for BEValueV to + // overflow. + if (auto *BEInst = dyn_cast(BEValueV)) + if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + + return PHISCEV; +} + const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { const Loop *L = LI.getLoopFor(PN->getParent()); if (!L || L->getHeader() != PN->getParent()) @@ -4051,127 +4253,134 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { break; } } - if (BEValueV && StartValueV) { - // While we are analyzing this PHI node, handle its value symbolically. - const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && - "PHI node already processed?"); - ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); + if (!BEValueV || !StartValueV) + return nullptr; - // Using this symbolic name for the PHI, analyze the value coming around - // the back-edge. - const SCEV *BEValue = getSCEV(BEValueV); + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && + "PHI node already processed?"); - // NOTE: If BEValue is loop invariant, we know that the PHI node just - // has a special value for the first iteration of the loop. + // First, try to find AddRec expression without creating a fictituos symbolic + // value for PN. + if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV)) + return S; + + // Handle PHI node value symbolically. + const SCEV *SymbolicName = getUnknown(PN); + ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); + + // Using this symbolic name for the PHI, analyze the value coming around + // the back-edge. + const SCEV *BEValue = getSCEV(BEValueV); + + // NOTE: If BEValue is loop invariant, we know that the PHI node just + // has a special value for the first iteration of the loop. + + // If the value coming around the backedge is an add with the symbolic + // value we just inserted, then we found a simple induction variable! + if (const SCEVAddExpr *Add = dyn_cast(BEValue)) { + // If there is a single occurrence of the symbolic value, replace it + // with a recurrence. + unsigned FoundIndex = Add->getNumOperands(); + for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) + if (Add->getOperand(i) == SymbolicName) + if (FoundIndex == e) { + FoundIndex = i; + break; + } - // If the value coming around the backedge is an add with the symbolic - // value we just inserted, then we found a simple induction variable! - if (const SCEVAddExpr *Add = dyn_cast(BEValue)) { - // If there is a single occurrence of the symbolic value, replace it - // with a recurrence. - unsigned FoundIndex = Add->getNumOperands(); + if (FoundIndex != Add->getNumOperands()) { + // Create an add with everything but the specified operand. + SmallVector Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (Add->getOperand(i) == SymbolicName) - if (FoundIndex == e) { - FoundIndex = i; - break; + if (i != FoundIndex) + Ops.push_back(Add->getOperand(i)); + const SCEV *Accum = getAddExpr(Ops); + + // This is not a valid addrec if the step amount is varying each + // loop iteration, but is not itself an addrec in this loop. + if (isLoopInvariant(Accum, L) || + (isa(Accum) && + cast(Accum)->getLoop() == L)) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + + if (auto BO = MatchBinaryOp(BEValueV, DT)) { + if (BO->Opcode == Instruction::Add && BO->LHS == PN) { + if (BO->IsNUW) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (BO->IsNSW) + Flags = setFlags(Flags, SCEV::FlagNSW); } - - if (FoundIndex != Add->getNumOperands()) { - // Create an add with everything but the specified operand. - SmallVector Ops; - for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) - if (i != FoundIndex) - Ops.push_back(Add->getOperand(i)); - const SCEV *Accum = getAddExpr(Ops); - - // This is not a valid addrec if the step amount is varying each - // loop iteration, but is not itself an addrec in this loop. - if (isLoopInvariant(Accum, L) || - (isa(Accum) && - cast(Accum)->getLoop() == L)) { - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - - if (auto BO = MatchBinaryOp(BEValueV, DT)) { - if (BO->Opcode == Instruction::Add && BO->LHS == PN) { - if (BO->IsNUW) - Flags = setFlags(Flags, SCEV::FlagNUW); - if (BO->IsNSW) - Flags = setFlags(Flags, SCEV::FlagNSW); - } - } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { - // If the increment is an inbounds GEP, then we know the address - // space cannot be wrapped around. We cannot make any guarantee - // about signed or unsigned overflow because pointers are - // unsigned but we may have a negative index from the base - // pointer. We can guarantee that no unsigned wrap occurs if the - // indices form a positive value. - if (GEP->isInBounds() && GEP->getOperand(0) == PN) { - Flags = setFlags(Flags, SCEV::FlagNW); - - const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); - if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) - Flags = setFlags(Flags, SCEV::FlagNUW); - } - - // We cannot transfer nuw and nsw flags from subtraction - // operations -- sub nuw X, Y is not the same as add nuw X, -Y - // for instance. + } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds() && GEP->getOperand(0) == PN) { + Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); } - const SCEV *StartVal = getSCEV(StartValueV); - const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + // We cannot transfer nuw and nsw flags from subtraction + // operations -- sub nuw X, Y is not the same as add nuw X, -Y + // for instance. + } - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; + const SCEV *StartVal = getSCEV(StartValueV); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - // We can add Flags to the post-inc expression only if we - // know that it us *undefined behavior* for BEValueV to - // overflow. - if (auto *BEInst = dyn_cast(BEValueV)) - if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) - (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + forgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; - return PHISCEV; - } + // We can add Flags to the post-inc expression only if we + // know that it is *undefined behavior* for BEValueV to + // overflow. + if (auto *BEInst = dyn_cast(BEValueV)) + if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + + return PHISCEV; } - } else { - // Otherwise, this could be a loop like this: - // i = 0; for (j = 1; ..; ++j) { .... i = j; } - // In this case, j = {1,+,1} and BEValue is j. - // Because the other in-value of i (0) fits the evolution of BEValue - // i really is an addrec evolution. - // - // We can generalize this saying that i is the shifted value of BEValue - // by one iteration: - // PHI(f(0), f({1,+,1})) --> f({0,+,1}) - const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); - const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); - if (Shifted != getCouldNotCompute() && - Start != getCouldNotCompute()) { - const SCEV *StartVal = getSCEV(StartValueV); - if (Start == StartVal) { - // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and purge all of the - // entries for the scalars that use the symbolic expression. - forgetSymbolicName(PN, SymbolicName); - ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; - return Shifted; - } + } + } else { + // Otherwise, this could be a loop like this: + // i = 0; for (j = 1; ..; ++j) { .... i = j; } + // In this case, j = {1,+,1} and BEValue is j. + // Because the other in-value of i (0) fits the evolution of BEValue + // i really is an addrec evolution. + // + // We can generalize this saying that i is the shifted value of BEValue + // by one iteration: + // PHI(f(0), f({1,+,1})) --> f({0,+,1}) + const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); + const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); + if (Shifted != getCouldNotCompute() && + Start != getCouldNotCompute()) { + const SCEV *StartVal = getSCEV(StartValueV); + if (Start == StartVal) { + // Okay, for the entire analysis of this edge we assumed the PHI + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + forgetSymbolicName(PN, SymbolicName); + ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; + return Shifted; } } - - // Remove the temporary PHI node SCEV that has been inserted while intending - // to create an AddRecExpr for this PHI node. We can not keep this temporary - // as it will prevent later (possibly simpler) SCEV expressions to be added - // to the ValueExprMap. - eraseValueFromMap(PN); } + // Remove the temporary PHI node SCEV that has been inserted while intending + // to create an AddRecExpr for this PHI node. We can not keep this temporary + // as it will prevent later (possibly simpler) SCEV expressions to be added + // to the ValueExprMap. + eraseValueFromMap(PN); + return nullptr; } @@ -4331,7 +4540,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // PHI's incoming blocks are in a different loop, in which case doing so // risks breaking LCSSA form. Instcombine would normally zap these, but // it doesn't have DominatorTree information, so it may miss cases. - if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC)) + if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) if (LI.replacementPreservesLCSSAForm(PN, V)) return getSCEV(V); @@ -4518,11 +4727,8 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, - nullptr, &DT); - return Zeros.countTrailingOnes(); + KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT); + return Known.countMinTrailingZeros(); } // SCEVUDivExpr @@ -4686,7 +4892,7 @@ ScalarEvolution::getRange(const SCEV *S, } } - return setRange(AddRec, SignHint, ConservativeResult); + return setRange(AddRec, SignHint, std::move(ConservativeResult)); } if (const SCEVUnknown *U = dyn_cast(S)) { @@ -4701,11 +4907,11 @@ ScalarEvolution::getRange(const SCEV *S, const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. - APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT); - if (Ones != ~Zeros + 1) + KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); + if (Known.One != ~Known.Zero + 1) ConservativeResult = - ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)); + ConservativeResult.intersectWith(ConstantRange(Known.One, + ~Known.Zero + 1)); } else { assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && "generalize as needed!"); @@ -4716,10 +4922,10 @@ ScalarEvolution::getRange(const SCEV *S, APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1)); } - return setRange(U, SignHint, ConservativeResult); + return setRange(U, SignHint, std::move(ConservativeResult)); } - return setRange(S, SignHint, ConservativeResult); + return setRange(S, SignHint, std::move(ConservativeResult)); } // Given a StartRange, Step and MaxBECount for an expression compute a range of @@ -4727,8 +4933,8 @@ ScalarEvolution::getRange(const SCEV *S, // from StartRange and then is changed by Step up to MaxBECount times. Signed // argument defines if we treat Step as signed or unsigned. static ConstantRange getRangeForAffineARHelper(APInt Step, - ConstantRange StartRange, - APInt MaxBECount, + const ConstantRange &StartRange, + const APInt &MaxBECount, unsigned BitWidth, bool Signed) { // If either Step or MaxBECount is 0, then the expression won't change, and we // just need to return the initial range. @@ -4767,8 +4973,8 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, // if the expression is decreasing and will be increased by Offset otherwise. APInt StartLower = StartRange.getLower(); APInt StartUpper = StartRange.getUpper() - 1; - APInt MovedBoundary = - Descending ? (StartLower - Offset) : (StartUpper + Offset); + APInt MovedBoundary = Descending ? (StartLower - std::move(Offset)) + : (StartUpper + std::move(Offset)); // It's possible that the new minimum/maximum value will fall into the initial // range (due to wrap around). This means that the expression can take any @@ -4776,21 +4982,18 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, if (StartRange.contains(MovedBoundary)) return ConstantRange(BitWidth, /* isFullSet = */ true); - APInt NewLower, NewUpper; - if (Descending) { - NewLower = MovedBoundary; - NewUpper = StartUpper; - } else { - NewLower = StartLower; - NewUpper = MovedBoundary; - } + APInt NewLower = + Descending ? std::move(MovedBoundary) : std::move(StartLower); + APInt NewUpper = + Descending ? std::move(StartUpper) : std::move(MovedBoundary); + NewUpper += 1; // If we end up with full range, return a proper full range. - if (NewLower == NewUpper + 1) + if (NewLower == NewUpper) return ConstantRange(BitWidth, /* isFullSet = */ true); // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. - return ConstantRange(NewLower, NewUpper + 1); + return ConstantRange(std::move(NewLower), std::move(NewUpper)); } ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, @@ -4970,7 +5173,8 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { return false; // Only proceed if we can prove that I does not yield poison. - if (!isKnownNotFullPoison(I)) return false; + if (!programUndefinedIfFullPoison(I)) + return false; // At this point we know that if I is executed, then it does not wrap // according to at least one of NSW or NUW. If I is not executed, then we do @@ -5236,13 +5440,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { unsigned LZ = A.countLeadingZeros(); unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(), + KnownBits Known(BitWidth); + computeKnownBits(BO->LHS, Known, getDataLayout(), 0, &AC, nullptr, &DT); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); - if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) { + if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) { const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); const SCEV *LHS = getSCEV(BO->LHS); const SCEV *ShiftedLHS = nullptr; @@ -5329,7 +5533,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // using an add, which is equivalent, and re-apply the zext. APInt Trunc = CI->getValue().trunc(Z0TySize); if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && - Trunc.isSignBit()) + Trunc.isSignMask()) return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), UTy); } @@ -5818,6 +6022,8 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax()) return SE->getCouldNotCompute(); + assert((isa(getMax()) || isa(getMax())) && + "No point in having a non-constant max backedge taken count!"); return getMax(); } @@ -5842,6 +6048,45 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, return false; } +ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) + : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) { + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); +} + +ScalarEvolution::ExitLimit::ExitLimit( + const SCEV *E, const SCEV *M, bool MaxOrZero, + ArrayRef *> PredSetList) + : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { + assert((isa(ExactNotTaken) || + !isa(MaxNotTaken)) && + "Exact is not allowed to be less precise than Max"); + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); + for (auto *PredSet : PredSetList) + for (auto *P : *PredSet) + addPredicate(P); +} + +ScalarEvolution::ExitLimit::ExitLimit( + const SCEV *E, const SCEV *M, bool MaxOrZero, + const SmallPtrSetImpl &PredSet) + : ExitLimit(E, M, MaxOrZero, {&PredSet}) { + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); +} + +ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, + bool MaxOrZero) + : ExitLimit(E, M, MaxOrZero, None) { + assert((isa(MaxNotTaken) || + isa(MaxNotTaken)) && + "No point in having a non-constant max backedge taken count!"); +} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -5865,6 +6110,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate)); }); + assert((isa(MaxCount) || isa(MaxCount)) && + "No point in having a non-constant max backedge taken count!"); } /// Invalidate this result and free the ExitNotTakenInfo array. @@ -6023,24 +6270,74 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, return getCouldNotCompute(); } -ScalarEvolution::ExitLimit -ScalarEvolution::computeExitLimitFromCond(const Loop *L, - Value *ExitCond, - BasicBlock *TBB, - BasicBlock *FBB, - bool ControlsExit, - bool AllowPredicates) { +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( + const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, bool AllowPredicates) { + ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates); + return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB, + ControlsExit, AllowPredicates); +} + +Optional +ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, bool AllowPredicates) { + (void)this->L; + (void)this->TBB; + (void)this->FBB; + (void)this->AllowPredicates; + + assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + this->AllowPredicates == AllowPredicates && + "Variance in assumed invariant key components!"); + auto Itr = TripCountMap.find({ExitCond, ControlsExit}); + if (Itr == TripCountMap.end()) + return None; + return Itr->second; +} + +void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, + BasicBlock *TBB, BasicBlock *FBB, + bool ControlsExit, + bool AllowPredicates, + const ExitLimit &EL) { + assert(this->L == L && this->TBB == TBB && this->FBB == FBB && + this->AllowPredicates == AllowPredicates && + "Variance in assumed invariant key components!"); + + auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL}); + assert(InsertResult.second && "Expected successful insertion!"); + (void)InsertResult; +} + +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { + + if (auto MaybeEL = + Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates)) + return *MaybeEL; + + ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB, + ControlsExit, AllowPredicates); + Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL); + return EL; +} + +ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( + ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB, + BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) { // Check if the controlling expression for this loop is an And or Or. if (BinaryOperator *BO = dyn_cast(ExitCond)) { if (BO->getOpcode() == Instruction::And) { // Recurse on the operands of the and. bool EitherMayExit = L->contains(TBB); - ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); - ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); + ExitLimit EL0 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); + ExitLimit EL1 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -6076,7 +6373,7 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, // to not. if (isa(MaxBECount) && !isa(BECount)) - MaxBECount = BECount; + MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax()); return ExitLimit(BECount, MaxBECount, false, {&EL0.Predicates, &EL1.Predicates}); @@ -6084,12 +6381,12 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L, if (BO->getOpcode() == Instruction::Or) { // Recurse on the operands of the or. bool EitherMayExit = L->contains(FBB); - ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); - ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, - ControlsExit && !EitherMayExit, - AllowPredicates); + ExitLimit EL0 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); + ExitLimit EL1 = computeExitLimitFromCondCached( + Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit, + AllowPredicates); const SCEV *BECount = getCouldNotCompute(); const SCEV *MaxBECount = getCouldNotCompute(); if (EitherMayExit) { @@ -6474,13 +6771,12 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( // {K,ashr,} stabilizes to signum(K) in at most // bitwidth(K) iterations. Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); - bool KnownZero, KnownOne; - ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, - Predecessor->getTerminator(), &DT); + KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr, + Predecessor->getTerminator(), &DT); auto *Ty = cast(RHS->getType()); - if (KnownZero) + if (Known.isNonNegative()) StableValue = ConstantInt::get(Ty, 0); - else if (KnownOne) + else if (Known.isNegative()) StableValue = ConstantInt::get(Ty, -1, true); else return getCouldNotCompute(); @@ -6520,7 +6816,7 @@ static bool CanConstantFold(const Instruction *I) { if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) - return canConstantFoldCallTo(F); + return canConstantFoldCallTo(CI, F); return false; } @@ -7213,50 +7509,50 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { const APInt &M = MC->getAPInt(); const APInt &N = NC->getAPInt(); APInt Two(BitWidth, 2); - APInt Four(BitWidth, 4); - - { - using namespace APIntOps; - const APInt& C = L; - // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C - // The B coefficient is M-N/2 - APInt B(M); - B -= N.sdiv(Two); - - // The A coefficient is N/2 - APInt A(N.sdiv(Two)); - - // Compute the B^2-4ac term. - APInt SqrtTerm(B); - SqrtTerm *= B; - SqrtTerm -= Four * (A * C); - - if (SqrtTerm.isNegative()) { - // The loop is provably infinite. - return None; - } - // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest - // integer value or else APInt::sqrt() will assert. - APInt SqrtVal(SqrtTerm.sqrt()); + // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C - // Compute the two solutions for the quadratic formula. - // The divisions must be performed as signed divisions. - APInt NegB(-B); - APInt TwoA(A << 1); - if (TwoA.isMinValue()) - return None; + // The A coefficient is N/2 + APInt A = N.sdiv(Two); + + // The B coefficient is M-N/2 + APInt B = M; + B -= A; // A is the same as N/2. + + // The C coefficient is L. + const APInt& C = L; - LLVMContext &Context = SE.getContext(); + // Compute the B^2-4ac term. + APInt SqrtTerm = B; + SqrtTerm *= B; + SqrtTerm -= 4 * (A * C); - ConstantInt *Solution1 = - ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); - ConstantInt *Solution2 = - ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); + if (SqrtTerm.isNegative()) { + // The loop is provably infinite. + return None; + } + + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest + // integer value or else APInt::sqrt() will assert. + APInt SqrtVal = SqrtTerm.sqrt(); + + // Compute the two solutions for the quadratic formula. + // The divisions must be performed as signed divisions. + APInt NegB = -std::move(B); + APInt TwoA = std::move(A); + TwoA <<= 1; + if (TwoA.isNullValue()) + return None; + + LLVMContext &Context = SE.getContext(); + + ConstantInt *Solution1 = + ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = + ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); - return std::make_pair(cast(SE.getConstant(Solution1)), - cast(SE.getConstant(Solution2))); - } // end APIntOps namespace + return std::make_pair(cast(SE.getConstant(Solution1)), + cast(SE.getConstant(Solution2))); } ScalarEvolution::ExitLimit @@ -7381,13 +7677,20 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, loopHasNoAbnormalExits(AddRec->getLoop())) { const SCEV *Exact = getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - return ExitLimit(Exact, Exact, false, Predicates); + const SCEV *Max = + Exact == getCouldNotCompute() + ? Exact + : getConstant(getUnsignedRange(Exact).getUnsignedMax()); + return ExitLimit(Exact, Max, false, Predicates); } // Solve the general equation. - const SCEV *E = SolveLinEquationWithOverflow( - StepC->getAPInt(), getNegativeSCEV(Start), *this); - return ExitLimit(E, E, false, Predicates); + const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), + getNegativeSCEV(Start), *this); + const SCEV *M = E == getCouldNotCompute() + ? E + : getConstant(getUnsignedRange(E).getUnsignedMax()); + return ExitLimit(E, M, false, Predicates); } ScalarEvolution::ExitLimit @@ -7902,6 +8205,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, case ICmpInst::ICMP_SGE: std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLE: // X s<= (X + C) if C >= 0 if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) @@ -7915,6 +8219,7 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, case ICmpInst::ICMP_SGT: std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SLT: // X s< (X + C) if C > 0 if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && @@ -8272,6 +8577,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin))) return true; + LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_UGT: @@ -8286,6 +8592,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) return true; + LLVM_FALLTHROUGH; default: // No change @@ -8777,7 +9084,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, if (!Addend) return false; - APInt ConstFoundRHS = cast(FoundRHS)->getAPInt(); + const APInt &ConstFoundRHS = cast(FoundRHS)->getAPInt(); // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the // antecedent "`FoundLHS` `Pred` `FoundRHS`". @@ -8789,7 +9096,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, // We can also compute the range of values for `LHS` that satisfy the // consequent, "`LHS` `Pred` `RHS`": - APInt ConstRHS = cast(RHS)->getAPInt(); + const APInt &ConstRHS = cast(RHS)->getAPInt(); ConstantRange SatisfyingLHSRange = ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); @@ -8814,7 +9121,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, .getSignedMax(); // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! - return (MaxValue - MaxStrideMinusOne).slt(MaxRHS); + return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); } APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); @@ -8823,7 +9130,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, .getUnsignedMax(); // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! - return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); + return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); } bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, @@ -8840,7 +9147,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, .getSignedMax(); // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! - return (MinValue + MaxStrideMinusOne).sgt(MinRHS); + return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); } APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); @@ -8849,7 +9156,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, .getUnsignedMax(); // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! - return (MinValue + MaxStrideMinusOne).ugt(MinRHS); + return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); } const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, @@ -9016,8 +9323,9 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, getConstant(StrideForMaxBECount), false); } - if (isa(MaxBECount)) - MaxBECount = BECount; + if (isa(MaxBECount) && + !isa(BECount)) + MaxBECount = getConstant(getUnsignedRange(BECount).getUnsignedMax()); return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); } @@ -9140,9 +9448,8 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, // the upper value of the range must be the first possible exit value. // If A is negative then the lower of the range is the last possible loop // value. Also note that we already checked for a full range. - APInt One(BitWidth,1); APInt A = cast(getOperand(1))->getAPInt(); - APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower(); + APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower(); // The exit value should be (End+A)/A. APInt ExitVal = (End + A).udiv(A); @@ -9158,7 +9465,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, // Ensure that the previous value is in the range. This is a sanity check. assert(Range.contains( EvaluateConstantChrecAtConstant(this, - ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && + ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && "Linear scev computation is off in a bad way!"); return SE.getConstant(ExitValue); } else if (isQuadratic()) { @@ -9309,8 +9616,11 @@ struct SCEVCollectAddRecMultiplies { bool HasAddRec = false; SmallVector Operands; for (auto Op : Mul->operands()) { - if (isa(Op)) { + const SCEVUnknown *Unknown = dyn_cast(Op); + if (Unknown && !isa(Unknown->getValue())) { Operands.push_back(Op); + } else if (Unknown) { + HasAddRec = true; } else { bool ContainsAddRec; SCEVHasAddRec ContiansAddRec(ContainsAddRec); @@ -9464,7 +9774,7 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, SmallVectorImpl &Sizes, - const SCEV *ElementSize) const { + const SCEV *ElementSize) { if (Terms.size() < 1 || !ElementSize) return; @@ -9480,7 +9790,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, }); // Remove duplicates. - std::sort(Terms.begin(), Terms.end()); + array_pod_sort(Terms.begin(), Terms.end()); Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); // Put larger terms first. @@ -9488,13 +9798,11 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, return numberOfTerms(LHS) > numberOfTerms(RHS); }); - ScalarEvolution &SE = *const_cast(this); - // Try to divide all terms by the element size. If term is not divisible by // element size, proceed with the original term. for (const SCEV *&Term : Terms) { const SCEV *Q, *R; - SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); + SCEVDivision::divide(*this, Term, ElementSize, &Q, &R); if (!Q->isZero()) Term = Q; } @@ -9503,7 +9811,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, // Remove constant factors. for (const SCEV *T : Terms) - if (const SCEV *NewT = removeConstantFactors(SE, T)) + if (const SCEV *NewT = removeConstantFactors(*this, T)) NewTerms.push_back(NewT); DEBUG({ @@ -9512,8 +9820,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, dbgs() << *T << "\n"; }); - if (NewTerms.empty() || - !findArrayDimensionsRec(SE, NewTerms, Sizes)) { + if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) { Sizes.clear(); return; } @@ -10181,84 +10488,75 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); } -typedef DenseMap VerifyMap; +void ScalarEvolution::verify() const { + ScalarEvolution &SE = *const_cast(this); + ScalarEvolution SE2(F, TLI, AC, DT, LI); -/// replaceSubString - Replaces all occurrences of From in Str with To. -static void replaceSubString(std::string &Str, StringRef From, StringRef To) { - size_t Pos = 0; - while ((Pos = Str.find(From, Pos)) != std::string::npos) { - Str.replace(Pos, From.size(), To.data(), To.size()); - Pos += To.size(); - } -} + SmallVector LoopStack(LI.begin(), LI.end()); -/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. -static void -getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { - std::string &S = Map[L]; - if (S.empty()) { - raw_string_ostream OS(S); - SE.getBackedgeTakenCount(L)->print(OS); + // Map's SCEV expressions from one ScalarEvolution "universe" to another. + struct SCEVMapper : public SCEVRewriteVisitor { + const SCEV *visitConstant(const SCEVConstant *Constant) { + return SE.getConstant(Constant->getAPInt()); + } + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return SE.getUnknown(Expr->getValue()); + } - // false and 0 are semantically equivalent. This can happen in dead loops. - replaceSubString(OS.str(), "false", "0"); - // Remove wrap flags, their use in SCEV is highly fragile. - // FIXME: Remove this when SCEV gets smarter about them. - replaceSubString(OS.str(), "", ""); - replaceSubString(OS.str(), "", ""); - replaceSubString(OS.str(), "", ""); - } + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return SE.getCouldNotCompute(); + } + SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {} + }; - for (auto *R : reverse(*L)) - getLoopBackedgeTakenCounts(R, Map, SE); // recurse. -} + SCEVMapper SCM(SE2); -void ScalarEvolution::verify() const { - ScalarEvolution &SE = *const_cast(this); + while (!LoopStack.empty()) { + auto *L = LoopStack.pop_back_val(); + LoopStack.insert(LoopStack.end(), L->begin(), L->end()); - // Gather stringified backedge taken counts for all loops using SCEV's caches. - // FIXME: It would be much better to store actual values instead of strings, - // but SCEV pointers will change if we drop the caches. - VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; - for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); + auto *CurBECount = SCM.visit( + const_cast(this)->getBackedgeTakenCount(L)); + auto *NewBECount = SE2.getBackedgeTakenCount(L); - // Gather stringified backedge taken counts for all loops using a fresh - // ScalarEvolution object. - ScalarEvolution SE2(F, TLI, AC, DT, LI); - for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) - getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2); - - // Now compare whether they're the same with and without caches. This allows - // verifying that no pass changed the cache. - assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && - "New loops suddenly appeared!"); - - for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), - OldE = BackedgeDumpsOld.end(), - NewI = BackedgeDumpsNew.begin(); - OldI != OldE; ++OldI, ++NewI) { - assert(OldI->first == NewI->first && "Loop order changed!"); - - // Compare the stringified SCEVs. We don't care if undef backedgetaken count - // changes. - // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This - // means that a pass is buggy or SCEV has to learn a new pattern but is - // usually not harmful. - if (OldI->second != NewI->second && - OldI->second.find("undef") == std::string::npos && - NewI->second.find("undef") == std::string::npos && - OldI->second != "***COULDNOTCOMPUTE***" && - NewI->second != "***COULDNOTCOMPUTE***") { - dbgs() << "SCEVValidator: SCEV for loop '" - << OldI->first->getHeader()->getName() - << "' changed from '" << OldI->second - << "' to '" << NewI->second << "'!\n"; + if (CurBECount == SE2.getCouldNotCompute() || + NewBECount == SE2.getCouldNotCompute()) { + // NB! This situation is legal, but is very suspicious -- whatever pass + // change the loop to make a trip count go from could not compute to + // computable or vice-versa *should have* invalidated SCEV. However, we + // choose not to assert here (for now) since we don't want false + // positives. + continue; + } + + if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) { + // SCEV treats "undef" as an unknown but consistent value (i.e. it does + // not propagate undef aggressively). This means we can (and do) fail + // verification in cases where a transform makes the trip count of a loop + // go from "undef" to "undef+1" (say). The transform is fine, since in + // both cases the loop iterates "undef" times, but SCEV thinks we + // increased the trip count of the loop by 1 incorrectly. + continue; + } + + if (SE.getTypeSizeInBits(CurBECount->getType()) > + SE.getTypeSizeInBits(NewBECount->getType())) + NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); + else if (SE.getTypeSizeInBits(CurBECount->getType()) < + SE.getTypeSizeInBits(NewBECount->getType())) + CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); + + auto *ConstantDelta = + dyn_cast(SE2.getMinusSCEV(CurBECount, NewBECount)); + + if (ConstantDelta && ConstantDelta->getAPInt() != 0) { + dbgs() << "Trip Count Changed!\n"; + dbgs() << "Old: " << *CurBECount << "\n"; + dbgs() << "New: " << *NewBECount << "\n"; + dbgs() << "Delta: " << *ConstantDelta << "\n"; std::abort(); } } - - // TODO: Verify more things. } bool ScalarEvolution::invalidate( diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index d15a7dbd20e60b79b940e93d8e58fc5418f03dab..f9b9df2bc707db8705dc60f2eefa4edd97d8d4e4 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1268,8 +1268,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (PostIncLoops.count(L)) { PostIncLoopSet Loops; Loops.insert(L); - Normalized = cast(TransformForPostIncUse( - Normalize, S, nullptr, nullptr, Loops, SE, SE.DT)); + Normalized = cast(normalizeForPostIncUse(S, Loops, SE)); } // Strip off any non-loop-dominating component from the addrec start. @@ -1306,12 +1305,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. Type *ExpandTy = PostLoopScale ? IntTy : STy; + // We can't use a pointer type for the addrec if the pointer type is + // non-integral. + Type *AddRecPHIExpandTy = + DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; + // In some cases, we decide to reuse an existing phi node but need to truncate // it and/or invert the step. Type *TruncTy = nullptr; bool InvertStep = false; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, - TruncTy, InvertStep); + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, + IntTy, TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1384,8 +1388,15 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating offset. if (PostLoopOffset) { if (PointerType *PTy = dyn_cast(ExpandTy)) { - const SCEV *const OffsetArray[1] = { PostLoopOffset }; - Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); + if (Result->getType()->isIntegerTy()) { + Value *Base = expandCodeFor(PostLoopOffset, ExpandTy); + const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)}; + Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base); + } else { + const SCEV *const OffsetArray[1] = {PostLoopOffset}; + Result = + expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result); + } } else { Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateAdd(Result, @@ -1773,9 +1784,10 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, /// /// This does not depend on any SCEVExpander state but should be used in /// the same context that SCEVExpander is used. -unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl &DeadInsts, - const TargetTransformInfo *TTI) { +unsigned +SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl &DeadInsts, + const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; for (auto &I : *L->getHeader()) { @@ -1800,7 +1812,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // so narrow phis can reuse them. for (PHINode *Phi : Phis) { auto SimplifyPHINode = [&](PHINode *PN) -> Value * { - if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC)) + if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) return V; if (!SE.isSCEVable(PN->getType())) return nullptr; diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index c1f9503816eea98abc81da47b01380288263d05e..3740039b8f8674d253a65b6ba371863837cd8fb7 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -12,243 +12,107 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Dominators.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionNormalization.h" using namespace llvm; -/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression -/// and now we need to decide whether the user should use the preinc or post-inc -/// value. If this user should use the post-inc version of the IV, return true. -/// -/// Choosing wrong here can break dominance properties (if we choose to use the -/// post-inc value when we cannot) or it can end up adding extra live-ranges to -/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we -/// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand, - const Loop *L, DominatorTree *DT) { - // If the user is in the loop, use the preinc value. - if (L->contains(User)) return false; - - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) - return false; - - // Ok, the user is outside of the loop. If it is dominated by the latch - // block, use the post-inc value. - if (DT->dominates(LatchBlock, User->getParent())) - return true; - - // There is one case we have to be careful of: PHI nodes. These little guys - // can live in blocks that are not dominated by the latch block, but (since - // their uses occur in the predecessor block, not the block the PHI lives in) - // should still use the post-inc value. Check for this case now. - PHINode *PN = dyn_cast(User); - if (!PN || !Operand) return false; // not a phi, not dominated by latch block. - - // Look at all of the uses of Operand by the PHI node. If any use corresponds - // to a block that is not dominated by the latch block, give up and use the - // preincremented value. - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Operand && - !DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - - // Okay, all uses of Operand by PN are in predecessor blocks that really are - // dominated by the latch block. Use the post-incremented value. - return true; -} +/// TransformKind - Different types of transformations that +/// TransformForPostIncUse can do. +enum TransformKind { + /// Normalize - Normalize according to the given loops. + Normalize, + /// Denormalize - Perform the inverse transform on the expression with the + /// given loop set. + Denormalize +}; namespace { - -/// Hold the state used during post-inc expression transformation, including a -/// map of transformed expressions. -class PostIncTransform { - TransformKind Kind; - PostIncLoopSet &Loops; - ScalarEvolution &SE; - DominatorTree &DT; - - DenseMap Transformed; - -public: - PostIncTransform(TransformKind kind, PostIncLoopSet &loops, - ScalarEvolution &se, DominatorTree &dt): - Kind(kind), Loops(loops), SE(se), DT(dt) {} - - const SCEV *TransformSubExpr(const SCEV *S, Instruction *User, - Value *OperandValToReplace); - -protected: - const SCEV *TransformImpl(const SCEV *S, Instruction *User, - Value *OperandValToReplace); +struct NormalizeDenormalizeRewriter + : public SCEVRewriteVisitor { + const TransformKind Kind; + + // NB! Pred is a function_ref. Storing it here is okay only because + // we're careful about the lifetime of NormalizeDenormalizeRewriter. + const NormalizePredTy Pred; + + NormalizeDenormalizeRewriter(TransformKind Kind, NormalizePredTy Pred, + ScalarEvolution &SE) + : SCEVRewriteVisitor(SE), Kind(Kind), + Pred(Pred) {} + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr); }; - } // namespace -/// Implement post-inc transformation for all valid expression types. -const SCEV *PostIncTransform:: -TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { - - if (const SCEVCastExpr *X = dyn_cast(S)) { - const SCEV *O = X->getOperand(); - const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); - if (O != N) - switch (S->getSCEVType()) { - case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); - case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); - case scTruncate: return SE.getTruncateExpr(N, S->getType()); - default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); - } - return S; +const SCEV * +NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) { + SmallVector Operands; + + transform(AR->operands(), std::back_inserter(Operands), + [&](const SCEV *Op) { return visit(Op); }); + + if (!Pred(AR)) + return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); + + // Normalization and denormalization are fancy names for decrementing and + // incrementing a SCEV expression with respect to a set of loops. Since + // Pred(AR) has returned true, we know we need to normalize or denormalize AR + // with respect to its loop. + + if (Kind == Denormalize) { + // Denormalization / "partial increment" is essentially the same as \c + // SCEVAddRecExpr::getPostIncExpr. Here we use an explicit loop to make the + // symmetry with Normalization clear. + for (int i = 0, e = Operands.size() - 1; i < e; i++) + Operands[i] = SE.getAddExpr(Operands[i], Operands[i + 1]); + } else { + assert(Kind == Normalize && "Only two possibilities!"); + + // Normalization / "partial decrement" is a bit more subtle. Since + // incrementing a SCEV expression (in general) changes the step of the SCEV + // expression as well, we cannot use the step of the current expression. + // Instead, we have to use the step of the very expression we're trying to + // compute! + // + // We solve the issue by recursively building up the result, starting from + // the "least significant" operand in the add recurrence: + // + // Base case: + // Single operand add recurrence. It's its own normalization. + // + // N-operand case: + // {S_{N-1},+,S_{N-2},+,...,+,S_0} = S + // + // Since the step recurrence of S is {S_{N-2},+,...,+,S_0}, we know its + // normalization by induction. We subtract the normalized step + // recurrence from S_{N-1} to get the normalization of S. + + for (int i = Operands.size() - 2; i >= 0; i--) + Operands[i] = SE.getMinusSCEV(Operands[i], Operands[i + 1]); } - if (const SCEVAddRecExpr *AR = dyn_cast(S)) { - // An addrec. This is the interesting part. - SmallVector Operands; - const Loop *L = AR->getLoop(); - // The addrec conceptually uses its operands at loop entry. - Instruction *LUser = &L->getHeader()->front(); - // Transform each operand. - for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); - I != E; ++I) { - Operands.push_back(TransformSubExpr(*I, LUser, nullptr)); - } - // Conservatively use AnyWrap until/unless we need FlagNW. - const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); - switch (Kind) { - case NormalizeAutodetect: - // Normalize this SCEV by subtracting the expression for the final step. - // We only allow affine AddRecs to be normalized, otherwise we would not - // be able to correctly denormalize. - // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} - // Normalized form: {-2,+,1,+,2} - // Denormalized form: {1,+,3,+,2} - // - // However, denormalization would use a different step expression than - // normalization (see getPostIncExpr), generating the wrong final - // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} - if (AR->isAffine() && - IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getMinusSCEV(Result, TransformedStep); - Loops.insert(L); - } -#if 0 - // This assert is conceptually correct, but ScalarEvolution currently - // sometimes fails to canonicalize two equal SCEVs to exactly the same - // form. It's possibly a pessimization when this happens, but it isn't a - // correctness problem, so disable this assert for now. - assert(S == TransformSubExpr(Result, User, OperandValToReplace) && - "SCEV normalization is not invertible!"); -#endif - break; - case Normalize: - // We want to normalize step expression, because otherwise we might not be - // able to denormalize to the original expression. - // - // Here is an example what will happen if we don't normalize step: - // ORIGINAL ISE: - // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25> - // NORMALIZED ISE: - // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+, - // (100 /u {0,+,1}<%bb16>)}<%bb25> - // DENORMALIZED BACK ISE: - // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+, - // (100 /u {1,+,1}<%bb16>)}<%bb25> - // Note that the initial value changes after normalization + - // denormalization, which isn't correct. - if (Loops.count(L)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getMinusSCEV(Result, TransformedStep); - } -#if 0 - // See the comment on the assert above. - assert(S == TransformSubExpr(Result, User, OperandValToReplace) && - "SCEV normalization is not invertible!"); -#endif - break; - case Denormalize: - // Here we want to normalize step expressions for the same reasons, as - // stated above. - if (Loops.count(L)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getAddExpr(Result, TransformedStep); - } - break; - } - return Result; - } - - if (const SCEVNAryExpr *X = dyn_cast(S)) { - SmallVector Operands; - bool Changed = false; - // Transform each operand. - for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); - I != E; ++I) { - const SCEV *O = *I; - const SCEV *N = TransformSubExpr(O, User, OperandValToReplace); - Changed |= N != O; - Operands.push_back(N); - } - // If any operand actually changed, return a transformed result. - if (Changed) - switch (S->getSCEVType()) { - case scAddExpr: return SE.getAddExpr(Operands); - case scMulExpr: return SE.getMulExpr(Operands); - case scSMaxExpr: return SE.getSMaxExpr(Operands); - case scUMaxExpr: return SE.getUMaxExpr(Operands); - default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); - } - return S; - } - - if (const SCEVUDivExpr *X = dyn_cast(S)) { - const SCEV *LO = X->getLHS(); - const SCEV *RO = X->getRHS(); - const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace); - const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace); - if (LO != LN || RO != RN) - return SE.getUDivExpr(LN, RN); - return S; - } - - llvm_unreachable("Unexpected SCEV kind!"); + return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap); } -/// Manage recursive transformation across an expression DAG. Revisiting -/// expressions would lead to exponential recursion. -const SCEV *PostIncTransform:: -TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { - - if (isa(S) || isa(S)) - return S; - - const SCEV *Result = Transformed.lookup(S); - if (Result) - return Result; +const SCEV *llvm::normalizeForPostIncUse(const SCEV *S, + const PostIncLoopSet &Loops, + ScalarEvolution &SE) { + auto Pred = [&](const SCEVAddRecExpr *AR) { + return Loops.count(AR->getLoop()); + }; + return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S); +} - Result = TransformImpl(S, User, OperandValToReplace); - Transformed[S] = Result; - return Result; +const SCEV *llvm::normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred, + ScalarEvolution &SE) { + return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S); } -/// Top level driver for transforming an expression DAG into its requested -/// post-inc form (either "Normalized" or "Denormalized"). -const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, - const SCEV *S, - Instruction *User, - Value *OperandValToReplace, - PostIncLoopSet &Loops, - ScalarEvolution &SE, - DominatorTree &DT) { - PostIncTransform Transform(Kind, Loops, SE, DT); - return Transform.TransformSubExpr(S, User, OperandValToReplace); +const SCEV *llvm::denormalizeForPostIncUse(const SCEV *S, + const PostIncLoopSet &Loops, + ScalarEvolution &SE) { + auto Pred = [&](const SCEVAddRecExpr *AR) { + return Loops.count(AR->getLoop()); + }; + return NormalizeDenormalizeRewriter(Denormalize, Pred, SE).visit(S); } diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index be734fa91425d2653543740a67053eea566eb72e..2be5d5caf7c2156e6859f329a8e036c4e2e48543 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -13,6 +13,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -241,6 +242,50 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_tanhf); } + // These definitions are due to math-finite.h header on Linux + TLI.setUnavailable(LibFunc_acos_finite); + TLI.setUnavailable(LibFunc_acosf_finite); + TLI.setUnavailable(LibFunc_acosl_finite); + TLI.setUnavailable(LibFunc_acosh_finite); + TLI.setUnavailable(LibFunc_acoshf_finite); + TLI.setUnavailable(LibFunc_acoshl_finite); + TLI.setUnavailable(LibFunc_asin_finite); + TLI.setUnavailable(LibFunc_asinf_finite); + TLI.setUnavailable(LibFunc_asinl_finite); + TLI.setUnavailable(LibFunc_atan2_finite); + TLI.setUnavailable(LibFunc_atan2f_finite); + TLI.setUnavailable(LibFunc_atan2l_finite); + TLI.setUnavailable(LibFunc_atanh_finite); + TLI.setUnavailable(LibFunc_atanhf_finite); + TLI.setUnavailable(LibFunc_atanhl_finite); + TLI.setUnavailable(LibFunc_cosh_finite); + TLI.setUnavailable(LibFunc_coshf_finite); + TLI.setUnavailable(LibFunc_coshl_finite); + TLI.setUnavailable(LibFunc_exp10_finite); + TLI.setUnavailable(LibFunc_exp10f_finite); + TLI.setUnavailable(LibFunc_exp10l_finite); + TLI.setUnavailable(LibFunc_exp2_finite); + TLI.setUnavailable(LibFunc_exp2f_finite); + TLI.setUnavailable(LibFunc_exp2l_finite); + TLI.setUnavailable(LibFunc_exp_finite); + TLI.setUnavailable(LibFunc_expf_finite); + TLI.setUnavailable(LibFunc_expl_finite); + TLI.setUnavailable(LibFunc_log10_finite); + TLI.setUnavailable(LibFunc_log10f_finite); + TLI.setUnavailable(LibFunc_log10l_finite); + TLI.setUnavailable(LibFunc_log2_finite); + TLI.setUnavailable(LibFunc_log2f_finite); + TLI.setUnavailable(LibFunc_log2l_finite); + TLI.setUnavailable(LibFunc_log_finite); + TLI.setUnavailable(LibFunc_logf_finite); + TLI.setUnavailable(LibFunc_logl_finite); + TLI.setUnavailable(LibFunc_pow_finite); + TLI.setUnavailable(LibFunc_powf_finite); + TLI.setUnavailable(LibFunc_powl_finite); + TLI.setUnavailable(LibFunc_sinh_finite); + TLI.setUnavailable(LibFunc_sinhf_finite); + TLI.setUnavailable(LibFunc_sinhl_finite); + // Win32 does *not* provide provide these functions, but they are // generally available on POSIX-compliant systems: TLI.setUnavailable(LibFunc_access); @@ -496,7 +541,7 @@ static StringRef sanitizeFunctionName(StringRef funcName) { // Check for \01 prefix that is used to mangle __asm declarations and // strip it if present. - return GlobalValue::getRealLinkageName(funcName); + return GlobalValue::dropLLVMManglingEscape(funcName); } bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, @@ -1004,22 +1049,34 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, return (NumParams == 1 && FTy.getParamType(0)->isFloatingPointTy()); case LibFunc_acos: + case LibFunc_acos_finite: case LibFunc_acosf: + case LibFunc_acosf_finite: case LibFunc_acosh: + case LibFunc_acosh_finite: case LibFunc_acoshf: + case LibFunc_acoshf_finite: case LibFunc_acoshl: + case LibFunc_acoshl_finite: case LibFunc_acosl: + case LibFunc_acosl_finite: case LibFunc_asin: + case LibFunc_asin_finite: case LibFunc_asinf: + case LibFunc_asinf_finite: case LibFunc_asinh: case LibFunc_asinhf: case LibFunc_asinhl: case LibFunc_asinl: + case LibFunc_asinl_finite: case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanh: + case LibFunc_atanh_finite: case LibFunc_atanhf: + case LibFunc_atanhf_finite: case LibFunc_atanhl: + case LibFunc_atanhl_finite: case LibFunc_atanl: case LibFunc_cbrt: case LibFunc_cbrtf: @@ -1030,18 +1087,30 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosh: + case LibFunc_cosh_finite: case LibFunc_coshf: + case LibFunc_coshf_finite: case LibFunc_coshl: + case LibFunc_coshl_finite: case LibFunc_cosl: case LibFunc_exp10: + case LibFunc_exp10_finite: case LibFunc_exp10f: + case LibFunc_exp10f_finite: case LibFunc_exp10l: + case LibFunc_exp10l_finite: case LibFunc_exp2: + case LibFunc_exp2_finite: case LibFunc_exp2f: + case LibFunc_exp2f_finite: case LibFunc_exp2l: + case LibFunc_exp2l_finite: case LibFunc_exp: + case LibFunc_exp_finite: case LibFunc_expf: + case LibFunc_expf_finite: case LibFunc_expl: + case LibFunc_expl_finite: case LibFunc_expm1: case LibFunc_expm1f: case LibFunc_expm1l: @@ -1052,20 +1121,29 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_floorf: case LibFunc_floorl: case LibFunc_log10: + case LibFunc_log10_finite: case LibFunc_log10f: + case LibFunc_log10f_finite: case LibFunc_log10l: + case LibFunc_log10l_finite: case LibFunc_log1p: case LibFunc_log1pf: case LibFunc_log1pl: case LibFunc_log2: + case LibFunc_log2_finite: case LibFunc_log2f: + case LibFunc_log2f_finite: case LibFunc_log2l: + case LibFunc_log2l_finite: case LibFunc_log: + case LibFunc_log_finite: case LibFunc_logb: case LibFunc_logbf: case LibFunc_logbl: case LibFunc_logf: + case LibFunc_logf_finite: case LibFunc_logl: + case LibFunc_logl_finite: case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: @@ -1078,8 +1156,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinh: + case LibFunc_sinh_finite: case LibFunc_sinhf: + case LibFunc_sinhf_finite: case LibFunc_sinhl: + case LibFunc_sinhl_finite: case LibFunc_sinl: case LibFunc_sqrt: case LibFunc_sqrt_finite: @@ -1100,8 +1181,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getReturnType() == FTy.getParamType(0)); case LibFunc_atan2: + case LibFunc_atan2_finite: case LibFunc_atan2f: + case LibFunc_atan2f_finite: case LibFunc_atan2l: + case LibFunc_atan2l_finite: case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: @@ -1115,8 +1199,11 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, case LibFunc_copysignf: case LibFunc_copysignl: case LibFunc_pow: + case LibFunc_pow_finite: case LibFunc_powf: + case LibFunc_powf_finite: case LibFunc_powl: + case LibFunc_powl_finite: return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() && FTy.getReturnType() == FTy.getParamType(0) && FTy.getReturnType() == FTy.getParamType(1)); @@ -1176,6 +1263,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy, FTy.getParamType(0)->isPointerTy() && FTy.getParamType(1) == SizeTTy && FTy.getParamType(2) == SizeTTy); + case LibFunc_wcslen: + return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() && + FTy.getReturnType()->isIntegerTy()); + case LibFunc::NumLibFuncs: break; } @@ -1290,6 +1381,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"powf", "__svml_powf8", 8}, {"powf", "__svml_powf16", 16}, + { "__pow_finite", "__svml_pow2", 2 }, + { "__pow_finite", "__svml_pow4", 4 }, + { "__pow_finite", "__svml_pow8", 8 }, + + { "__powf_finite", "__svml_powf4", 4 }, + { "__powf_finite", "__svml_powf8", 8 }, + { "__powf_finite", "__svml_powf16", 16 }, + {"llvm.pow.f64", "__svml_pow2", 2}, {"llvm.pow.f64", "__svml_pow4", 4}, {"llvm.pow.f64", "__svml_pow8", 8}, @@ -1306,6 +1405,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"expf", "__svml_expf8", 8}, {"expf", "__svml_expf16", 16}, + { "__exp_finite", "__svml_exp2", 2 }, + { "__exp_finite", "__svml_exp4", 4 }, + { "__exp_finite", "__svml_exp8", 8 }, + + { "__expf_finite", "__svml_expf4", 4 }, + { "__expf_finite", "__svml_expf8", 8 }, + { "__expf_finite", "__svml_expf16", 16 }, + {"llvm.exp.f64", "__svml_exp2", 2}, {"llvm.exp.f64", "__svml_exp4", 4}, {"llvm.exp.f64", "__svml_exp8", 8}, @@ -1322,6 +1429,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( {"logf", "__svml_logf8", 8}, {"logf", "__svml_logf16", 16}, + { "__log_finite", "__svml_log2", 2 }, + { "__log_finite", "__svml_log4", 4 }, + { "__log_finite", "__svml_log8", 8 }, + + { "__logf_finite", "__svml_logf4", 4 }, + { "__logf_finite", "__svml_logf8", 8 }, + { "__logf_finite", "__svml_logf16", 16 }, + {"llvm.log.f64", "__svml_log2", 2}, {"llvm.log.f64", "__svml_log4", 4}, {"llvm.log.f64", "__svml_log8", 8}, @@ -1404,6 +1519,21 @@ TargetLibraryInfoImpl &TargetLibraryAnalysis::lookupInfoImpl(const Triple &T) { return *Impl; } +unsigned TargetLibraryInfoImpl::getTargetWCharSize(const Triple &T) { + // See also clang/lib/Basic/Targets.cpp. + if (T.isPS4() || T.isOSWindows() || T.isArch16Bit()) + return 2; + if (T.getArch() == Triple::xcore) + return 1; + return 4; +} + +unsigned TargetLibraryInfoImpl::getWCharSize(const Module &M) const { + if (auto *ShortWChar = cast_or_null( + M.getModuleFlag("wchar_size"))) + return cast(ShortWChar->getValue())->getZExtValue(); + return getTargetWCharSize(Triple(M.getTargetTriple())); +} TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass() : ImmutablePass(ID), TLIImpl(), TLI(TLIImpl) { diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index d73b1a12803187114c55d34904957cdf580f086c..92328f6e5efd58db4d17c389e38fdada793d32b3 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -83,6 +83,12 @@ int TargetTransformInfo::getIntrinsicCost( return Cost; } +unsigned +TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI, + unsigned &JTSize) const { + return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); +} + int TargetTransformInfo::getUserCost(const User *U) const { int Cost = TTIImpl->getUserCost(U); assert(Cost >= 0 && "TTI should not produce negative costs!"); @@ -97,6 +103,10 @@ bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const { return TTIImpl->isSourceOfDivergence(V); } +bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const { + return TTIImpl->isAlwaysUniform(V); +} + unsigned TargetTransformInfo::getFlatAddressSpace() const { return TTIImpl->getFlatAddressSpace(); } @@ -127,6 +137,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale, AddrSpace); } +bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { + return TTIImpl->isLSRCostLess(C1, C2); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { return TTIImpl->isLegalMaskedStore(DataType); } @@ -143,6 +157,10 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { return TTIImpl->isLegalMaskedGather(DataType); } +bool TargetTransformInfo::prefersVectorizedAddressing() const { + return TTIImpl->prefersVectorizedAddressing(); +} + int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, @@ -205,6 +223,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c return TTIImpl->enableAggressiveInterleaving(LoopHasReductions); } +bool TargetTransformInfo::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) const { + return TTIImpl->expandMemCmp(I, MaxLoadSize); +} + bool TargetTransformInfo::enableInterleavedAccessVectorization() const { return TTIImpl->enableInterleavedAccessVectorization(); } @@ -273,6 +295,10 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { return TTIImpl->getRegisterBitWidth(Vector); } +unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const { + return TTIImpl->getMinVectorRegisterBitWidth(); +} + bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( @@ -446,6 +472,10 @@ bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst, return TTIImpl->getTgtMemIntrinsic(Inst, Info); } +unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const { + return TTIImpl->getAtomicMemIntrinsicMaxElementSize(); +} + Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( IntrinsicInst *Inst, Type *ExpectedType) const { return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); @@ -494,6 +524,15 @@ unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF, return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } +bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode, + Type *Ty, ReductionFlags Flags) const { + return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags); +} + +bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { + return TTIImpl->shouldExpandReduction(II); +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index d4c0e7092eaa146cad0504514baf52d60c71e0c7..b065f427b06cba146282bbcf75bc6f4ff51a12da 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -17,15 +17,16 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" @@ -38,6 +39,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include #include @@ -58,8 +60,8 @@ static cl::opt DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits", cl::Hidden, cl::init(true)); -/// Returns the bitwidth of the given scalar or pointer type (if unknown returns -/// 0). For vector types, returns the element type's bitwidth. +/// Returns the bitwidth of the given scalar or pointer type. For vector types, +/// returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; @@ -87,9 +89,8 @@ struct Query { /// classic case of this is assume(x = y), which will attempt to determine /// bits in x from bits in y, which will attempt to determine bits in y from /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call - /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and - /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so - /// on. + /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo + /// (all of which can call computeKnownBits), and so on. std::array Excluded; unsigned NumExcluded; @@ -130,18 +131,30 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) { return nullptr; } -static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, +static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Query &Q); -void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, +void llvm::computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE) { - ::computeKnownBits(V, KnownZero, KnownOne, Depth, + ::computeKnownBits(V, Known, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } +static KnownBits computeKnownBits(const Value *V, unsigned Depth, + const Query &Q); + +KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT, + OptimizationRemarkEmitter *ORE) { + return ::computeKnownBits(V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); +} + bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, @@ -151,22 +164,24 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, assert(LHS->getType()->isIntOrIntVectorTy() && "LHS and RHS should be integers"); IntegerType *IT = cast(LHS->getType()->getScalarType()); - APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0); - APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT); - return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); + KnownBits LHSKnown(IT->getBitWidth()); + KnownBits RHSKnown(IT->getBitWidth()); + computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT); + return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } -static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - unsigned Depth, const Query &Q); -void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - const DataLayout &DL, unsigned Depth, - AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - ::ComputeSignBit(V, KnownZero, KnownOne, Depth, - Query(DL, AC, safeCxtI(V, CxtI), DT)); +bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) { + for (const User *U : CxtI->users()) { + if (const ICmpInst *IC = dyn_cast(U)) + if (IC->isEquality()) + if (Constant *C = dyn_cast(IC->getOperand(1))) + if (C->isNullValue()) + continue; + return false; + } + return true; } static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, @@ -193,9 +208,8 @@ bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - bool NonNegative, Negative; - ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); - return NonNegative; + KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); + return Known.isNonNegative(); } bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, @@ -213,9 +227,8 @@ bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth, bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - bool NonNegative, Negative; - ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); - return Negative; + KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT); + return Known.isNegative(); } static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q); @@ -252,67 +265,65 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, + KnownBits &KnownOut, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = KnownOut.getBitWidth(); // If an initial sequence of bits in the result is not needed, the // corresponding bits in the operands are not needed. - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, Depth + 1, Q); - computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q); + KnownBits LHSKnown(BitWidth); + computeKnownBits(Op0, LHSKnown, Depth + 1, Q); + computeKnownBits(Op1, Known2, Depth + 1, Q); // Carry in a 1 for a subtract, rather than a 0. uint64_t CarryIn = 0; if (!Add) { // Sum = LHS + ~RHS + 1 - std::swap(KnownZero2, KnownOne2); + std::swap(Known2.Zero, Known2.One); CarryIn = 1; } - APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn; - APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn; + APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn; + APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn; // Compute known bits of the carry. - APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2); - APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2; + APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero); + APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One; // Compute set of known bits (where all three relevant bits are known). - APInt LHSKnown = LHSKnownZero | LHSKnownOne; - APInt RHSKnown = KnownZero2 | KnownOne2; - APInt CarryKnown = CarryKnownZero | CarryKnownOne; - APInt Known = LHSKnown & RHSKnown & CarryKnown; + APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One; + APInt RHSKnownUnion = Known2.Zero | Known2.One; + APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne; + APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion; assert((PossibleSumZero & Known) == (PossibleSumOne & Known) && "known bits of sum differ"); // Compute known bits of the result. - KnownZero = ~PossibleSumOne & Known; - KnownOne = PossibleSumOne & Known; + KnownOut.Zero = ~PossibleSumOne & Known; + KnownOut.One = PossibleSumOne & Known; // Are we still trying to solve for the sign bit? - if (!Known.isNegative()) { + if (!Known.isSignBitSet()) { if (NSW) { // Adding two non-negative numbers, or subtracting a negative number from // a non-negative one, can't wrap into negative. - if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) - KnownZero.setSignBit(); + if (LHSKnown.isNonNegative() && Known2.isNonNegative()) + KnownOut.makeNonNegative(); // Adding two negative numbers, or subtracting a non-negative number from // a negative one, can't wrap into non-negative. - else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) - KnownOne.setSignBit(); + else if (LHSKnown.isNegative() && Known2.isNegative()) + KnownOut.makeNegative(); } } } static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, + KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); - computeKnownBits(Op1, KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(Op0, KnownZero2, KnownOne2, Depth + 1, Q); + unsigned BitWidth = Known.getBitWidth(); + computeKnownBits(Op1, Known, Depth + 1, Q); + computeKnownBits(Op0, Known2, Depth + 1, Q); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -322,10 +333,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // The product of a number with itself is non-negative. isKnownNonNegative = true; } else { - bool isKnownNonNegativeOp1 = KnownZero.isNegative(); - bool isKnownNonNegativeOp0 = KnownZero2.isNegative(); - bool isKnownNegativeOp1 = KnownOne.isNegative(); - bool isKnownNegativeOp0 = KnownOne2.isNegative(); + bool isKnownNonNegativeOp1 = Known.isNonNegative(); + bool isKnownNonNegativeOp0 = Known2.isNonNegative(); + bool isKnownNegativeOp1 = Known.isNegative(); + bool isKnownNegativeOp0 = Known2.isNegative(); // The product of two numbers with the same sign is non-negative. isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); @@ -343,39 +354,37 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW, // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), + unsigned TrailZ = Known.countMinTrailingZeros() + + Known2.countMinTrailingZeros(); + unsigned LeadZ = std::max(Known.countMinLeadingZeros() + + Known2.countMinLeadingZeros(), BitWidth) - BitWidth; TrailZ = std::min(TrailZ, BitWidth); LeadZ = std::min(LeadZ, BitWidth); - KnownZero.clearAllBits(); - KnownZero.setLowBits(TrailZ); - KnownZero.setHighBits(LeadZ); + Known.resetAll(); + Known.Zero.setLowBits(TrailZ); + Known.Zero.setHighBits(LeadZ); // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in // which case we prefer to follow the result of the direct computation, // though as the program is invoking undefined behaviour we can choose // whatever we like here. - if (isKnownNonNegative && !KnownOne.isNegative()) - KnownZero.setSignBit(); - else if (isKnownNegative && !KnownZero.isNegative()) - KnownOne.setSignBit(); + if (isKnownNonNegative && !Known.isNegative()) + Known.makeNonNegative(); + else if (isKnownNegative && !Known.isNonNegative()) + Known.makeNegative(); } void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, - APInt &KnownZero, - APInt &KnownOne) { - unsigned BitWidth = KnownZero.getBitWidth(); + KnownBits &Known) { + unsigned BitWidth = Known.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); - KnownZero.setAllBits(); - KnownOne.setAllBits(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (unsigned i = 0; i < NumRanges; ++i) { ConstantInt *Lower = @@ -389,8 +398,8 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); - KnownOne &= Range.getUnsignedMax() & Mask; - KnownZero &= ~Range.getUnsignedMax() & Mask; + Known.One &= Range.getUnsignedMax() & Mask; + Known.Zero &= ~Range.getUnsignedMax() & Mask; } } @@ -499,15 +508,14 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv, return !isEphemeralValueOf(Inv, CxtI); } -static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - const Query &Q) { +static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, + unsigned Depth, const Query &Q) { // Use of assumptions is context-sensitive. If we don't have a context, we // cannot use them! if (!Q.AC || !Q.CxtI) return; - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); // Note that the patterns below need to be kept in sync with the code // in AssumptionCache::updateAffectedValues. @@ -532,15 +540,13 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); - KnownZero.clearAllBits(); - KnownOne.setAllBits(); + Known.setAllOnes(); return; } if (match(Arg, m_Not(m_Specific(V))) && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { assert(BitWidth == 1 && "assume operand is not i1?"); - KnownZero.setAllBits(); - KnownOne.clearAllBits(); + Known.setAllZero(); return; } @@ -558,122 +564,126 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // assume(v = a) if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - KnownZero |= RHSKnownZero; - KnownOne |= RHSKnownOne; + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + Known.Zero |= RHSKnown.Zero; + Known.One |= RHSKnown.One; // assume(v & b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // known bits from the RHS to V. - KnownZero |= RHSKnownZero & MaskKnownOne; - KnownOne |= RHSKnownOne & MaskKnownOne; + Known.Zero |= RHSKnown.Zero & MaskKnown.One; + Known.One |= RHSKnown.One & MaskKnown.One; // assume(~(v & b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0); - computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits MaskKnown(BitWidth); + computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I)); // For those bits in the mask that are known to be one, we can propagate // inverted known bits from the RHS to V. - KnownZero |= RHSKnownOne & MaskKnownOne; - KnownOne |= RHSKnownZero & MaskKnownOne; + Known.Zero |= RHSKnown.One & MaskKnown.One; + Known.One |= RHSKnown.Zero & MaskKnown.One; // assume(v | b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. - KnownZero |= RHSKnownZero & BKnownZero; - KnownOne |= RHSKnownOne & BKnownZero; + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; // assume(~(v | b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. - KnownZero |= RHSKnownOne & BKnownZero; - KnownOne |= RHSKnownZero & BKnownZero; + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; // assume(v ^ b = a) } else if (match(Arg, m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate known // bits from the RHS to V. For those bits in B that are known to be one, // we can propagate inverted known bits from the RHS to V. - KnownZero |= RHSKnownZero & BKnownZero; - KnownOne |= RHSKnownOne & BKnownZero; - KnownZero |= RHSKnownOne & BKnownOne; - KnownOne |= RHSKnownZero & BKnownOne; + Known.Zero |= RHSKnown.Zero & BKnown.Zero; + Known.One |= RHSKnown.One & BKnown.Zero; + Known.Zero |= RHSKnown.One & BKnown.One; + Known.One |= RHSKnown.Zero & BKnown.One; // assume(~(v ^ b) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); - APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0); - computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); + KnownBits BKnown(BitWidth); + computeKnownBits(B, BKnown, Depth+1, Query(Q, I)); // For those bits in B that are known to be zero, we can propagate // inverted known bits from the RHS to V. For those bits in B that are // known to be one, we can propagate known bits from the RHS to V. - KnownZero |= RHSKnownOne & BKnownZero; - KnownOne |= RHSKnownZero & BKnownZero; - KnownZero |= RHSKnownZero & BKnownOne; - KnownOne |= RHSKnownOne & BKnownOne; + Known.Zero |= RHSKnown.One & BKnown.Zero; + Known.One |= RHSKnown.Zero & BKnown.Zero; + Known.Zero |= RHSKnown.Zero & BKnown.One; + Known.One |= RHSKnown.One & BKnown.One; // assume(v << c = a) } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - KnownZero |= RHSKnownZero.lshr(C->getZExtValue()); - KnownOne |= RHSKnownOne.lshr(C->getZExtValue()); + RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + Known.Zero |= RHSKnown.Zero; + RHSKnown.One.lshrInPlace(C->getZExtValue()); + Known.One |= RHSKnown.One; // assume(~(v << c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))), m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - KnownZero |= RHSKnownOne.lshr(C->getZExtValue()); - KnownOne |= RHSKnownZero.lshr(C->getZExtValue()); + RHSKnown.One.lshrInPlace(C->getZExtValue()); + Known.Zero |= RHSKnown.One; + RHSKnown.Zero.lshrInPlace(C->getZExtValue()); + Known.One |= RHSKnown.Zero; // assume(v >> c = a) } else if (match(Arg, m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)), @@ -681,12 +691,12 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them to known // bits in V shifted to the right by C. - KnownZero |= RHSKnownZero << C->getZExtValue(); - KnownOne |= RHSKnownOne << C->getZExtValue(); + Known.Zero |= RHSKnown.Zero << C->getZExtValue(); + Known.One |= RHSKnown.One << C->getZExtValue(); // assume(~(v >> c) = a) } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr( m_LShr(m_V, m_ConstantInt(C)), @@ -694,78 +704,78 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, m_Value(A))) && Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // For those bits in RHS that are known, we can propagate them inverted // to known bits in V shifted to the right by C. - KnownZero |= RHSKnownOne << C->getZExtValue(); - KnownOne |= RHSKnownZero << C->getZExtValue(); + Known.Zero |= RHSKnown.One << C->getZExtValue(); + Known.One |= RHSKnown.Zero << C->getZExtValue(); // assume(v >=_s c) where c is non-negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownZero.isNegative()) { + if (RHSKnown.isNonNegative()) { // We know that the sign bit is zero. - KnownZero.setSignBit(); + Known.makeNonNegative(); } // assume(v >_s c) where c is at least -1. } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SGT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) { + if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) { // We know that the sign bit is zero. - KnownZero.setSignBit(); + Known.makeNonNegative(); } // assume(v <=_s c) where c is negative } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownOne.isNegative()) { + if (RHSKnown.isNegative()) { // We know that the sign bit is one. - KnownOne.setSignBit(); + Known.makeNegative(); } // assume(v <_s c) where c is non-positive } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_SLT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); - if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) { + if (RHSKnown.isZero() || RHSKnown.isNegative()) { // We know that the sign bit is one. - KnownOne.setSignBit(); + Known.makeNegative(); } // assume(v <=_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULE && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero. - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); - // assume(v <_u c) + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); + // assume(v <_u c) } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) && Pred == ICmpInst::ICMP_ULT && isValidAssumeForContext(I, Q.CxtI, Q.DT)) { - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I)); + KnownBits RHSKnown(BitWidth); + computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I)); // Whatever high bits in c are zero are known to be zero (if c is a power // of 2, then one more). if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I))) - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()+1); + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1); else - KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()); + Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros()); } } @@ -774,9 +784,8 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, // so this isn't a real bug. On the other hand, the program may have undefined // behavior, or we might have a bug in the compiler. We can't assert/crash, so // clear out the known bits, try to warn the user, and hope for the best. - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if (Known.Zero.intersects(Known.One)) { + Known.resetAll(); if (Q.ORE) { auto *CxtI = const_cast(Q.CxtI); @@ -789,57 +798,53 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero, } // Compute known bits from a shift operator, including those with a -// non-constant shift amount. KnownZero and KnownOne are the outputs of this -// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the -// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific -// functors that, given the known-zero or known-one bits respectively, and a -// shift amount, compute the implied known-zero or known-one bits of the shift -// operator's result respectively for that shift amount. The results from calling -// KZF and KOF are conservatively combined for all permitted shift amounts. +// non-constant shift amount. Known is the outputs of this function. Known2 is a +// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are +// operator-specific functors that, given the known-zero or known-one bits +// respectively, and a shift amount, compute the implied known-zero or known-one +// bits of the shift operator's result respectively for that shift amount. The +// results from calling KZF and KOF are conservatively combined for all +// permitted shift amounts. static void computeKnownBitsFromShiftOperator( - const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, - APInt &KnownOne2, unsigned Depth, const Query &Q, + const Operator *I, KnownBits &Known, KnownBits &Known2, + unsigned Depth, const Query &Q, function_ref KZF, function_ref KOF) { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); if (auto *SA = dyn_cast(I->getOperand(1))) { unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KZF(KnownZero, ShiftAmt); - KnownOne = KOF(KnownOne, ShiftAmt); - // If there is conflict between KnownZero and KnownOne, this must be an - // overflowing left shift, so the shift result is undefined. Clear KnownZero - // and KnownOne bits so that other code could propagate this undef. - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - } + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero = KZF(Known.Zero, ShiftAmt); + Known.One = KOF(Known.One, ShiftAmt); + // If there is conflict between Known.Zero and Known.One, this must be an + // overflowing left shift, so the shift result is undefined. Clear Known + // bits so that other code could propagate this undef. + if ((Known.Zero & Known.One) != 0) + Known.resetAll(); return; } - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); // If the shift amount could be greater than or equal to the bit-width of the LHS, the // value could be undef, so we don't know anything about it. - if ((~KnownZero).uge(BitWidth)) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + if ((~Known.Zero).uge(BitWidth)) { + Known.resetAll(); return; } - // Note: We cannot use KnownZero.getLimitedValue() here, because if + // Note: We cannot use Known.Zero.getLimitedValue() here, because if // BitWidth > 64 and any upper bits are known, we'll end up returning the // limit value (which implies all bits are known). - uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); - uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue(); + uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue(); // It would be more-clearly correct to use the two temporaries for this // calculation. Reusing the APInts here to prevent unnecessary allocations. - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.resetAll(); // If we know the shifter operand is nonzero, we can sometimes infer more // known bits. However this is expensive to compute, so be lazy about it and @@ -847,16 +852,18 @@ static void computeKnownBitsFromShiftOperator( Optional ShifterOperandIsNonZero; // Early exit if we can't constrain any well-defined shift amount. - if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { + if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && + !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) return; } - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { // Combine the shifted known input bits only for those shift amounts // compatible with its known constraints. @@ -875,8 +882,8 @@ static void computeKnownBitsFromShiftOperator( continue; } - KnownZero &= KZF(KnownZero2, ShiftAmt); - KnownOne &= KOF(KnownOne2, ShiftAmt); + Known.Zero &= KZF(Known2.Zero, ShiftAmt); + Known.One &= KOF(Known2.One, ShiftAmt); } // If there are no compatible shift amounts, then we've proven that the shift @@ -884,33 +891,30 @@ static void computeKnownBitsFromShiftOperator( // return anything we'd like, but we need to make sure the sets of known bits // stay disjoint (it should be better for some other code to actually // propagate the undef than to pick a value here using known bits). - if ((KnownZero & KnownOne) != 0) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - } + if (Known.Zero.intersects(Known.One)) + Known.resetAll(); } -static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - const Query &Q) { - unsigned BitWidth = KnownZero.getBitWidth(); +static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, + unsigned Depth, const Query &Q) { + unsigned BitWidth = Known.getBitWidth(); - APInt KnownZero2(KnownZero), KnownOne2(KnownOne); + KnownBits Known2(Known); switch (I->getOpcode()) { default: break; case Instruction::Load: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*MD, Known); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; // and(x, add (x, -1)) is a common idiom that always clears the low bit; // here we handle the more general case of adding any odd number by @@ -918,118 +922,113 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // TODO: This could be generalized to clearing any bit set in y where the // following bit is known to be unset in y. Value *Y = nullptr; - if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), - m_Value(Y))) || - match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), - m_Value(Y)))) { - APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); - computeKnownBits(Y, KnownZero3, KnownOne3, Depth + 1, Q); - if (KnownOne3.countTrailingOnes() > 0) - KnownZero.setBit(0); + if (!Known.Zero[0] && !Known.One[0] && + (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), + m_Value(Y))) || + match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), + m_Value(Y))))) { + Known2.resetAll(); + computeKnownBits(Y, Known2, Depth + 1, Q); + if (Known2.countMinTrailingOnes() > 0) + Known.Zero.setBit(0); } break; } case Instruction::Or: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; } case Instruction::Xor: { - computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = std::move(KnownZeroOut); break; } case Instruction::Mul: { bool NSW = cast(I)->hasNoSignedWrap(); - computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known, + Known2, Depth, Q); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - unsigned LeadZ = KnownZero2.countLeadingOnes(); - - KnownOne2.clearAllBits(); - KnownZero2.clearAllBits(); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); - unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); - if (RHSUnknownLeadingOnes != BitWidth) - LeadZ = std::min(BitWidth, - LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - - KnownZero.setHighBits(LeadZ); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + unsigned LeadZ = Known2.countMinLeadingZeros(); + + Known2.resetAll(); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); + if (RHSMaxLeadingZeros != BitWidth) + LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); + + Known.Zero.setHighBits(LeadZ); break; } case Instruction::Select: { - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); - - const Value *LHS; - const Value *RHS; + const Value *LHS, *RHS; SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; if (SelectPatternResult::isMinOrMax(SPF)) { - computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(RHS, Known, Depth + 1, Q); + computeKnownBits(LHS, Known2, Depth + 1, Q); } else { - computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(2), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); } unsigned MaxHighOnes = 0; unsigned MaxHighZeros = 0; if (SPF == SPF_SMAX) { // If both sides are negative, the result is negative. - if (KnownOne.isNegative() && KnownOne2.isNegative()) + if (Known.isNegative() && Known2.isNegative()) // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = - std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); // If either side is non-negative, the result is non-negative. - else if (KnownZero.isNegative() || KnownZero2.isNegative()) + else if (Known.isNonNegative() || Known2.isNonNegative()) MaxHighZeros = 1; } else if (SPF == SPF_SMIN) { // If both sides are non-negative, the result is non-negative. - if (KnownZero.isNegative() && KnownZero2.isNegative()) + if (Known.isNonNegative() && Known2.isNonNegative()) // We can derive an upper bound on the result by taking the max of the // leading zero bits. - MaxHighZeros = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); + MaxHighZeros = std::max(Known.countMinLeadingZeros(), + Known2.countMinLeadingZeros()); // If either side is negative, the result is negative. - else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1]) + else if (Known.isNegative() || Known2.isNegative()) MaxHighOnes = 1; } else if (SPF == SPF_UMAX) { // We can derive a lower bound on the result by taking the max of the // leading one bits. MaxHighOnes = - std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes()); } else if (SPF == SPF_UMIN) { // We can derive an upper bound on the result by taking the max of the // leading zero bits. MaxHighZeros = - std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); + std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); } // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; if (MaxHighOnes > 0) - KnownOne.setHighBits(MaxHighOnes); + Known.One.setHighBits(MaxHighOnes); if (MaxHighZeros > 0) - KnownZero.setHighBits(MaxHighZeros); + Known.Zero.setHighBits(MaxHighZeros); break; } case Instruction::FPTrunc: @@ -1053,14 +1052,12 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType()); assert(SrcBitWidth && "SrcBitWidth can't be zero"); - KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); - KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KnownZero.zextOrTrunc(BitWidth); - KnownOne = KnownOne.zextOrTrunc(BitWidth); + Known = Known.zextOrTrunc(SrcBitWidth); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known = Known.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) - KnownZero.setBitsFrom(SrcBitWidth); + Known.Zero.setBitsFrom(SrcBitWidth); break; } case Instruction::BitCast: { @@ -1069,7 +1066,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; } break; @@ -1078,18 +1075,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - + Known = Known.trunc(SrcBitWidth); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero - KnownZero.setBitsFrom(SrcBitWidth); - else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set - KnownOne.setBitsFrom(SrcBitWidth); + Known = Known.sext(BitWidth); break; } case Instruction::Shl: { @@ -1100,38 +1090,35 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, KZResult.setLowBits(ShiftAmt); // Low bits known 0. // If this shift has "nsw" keyword, then the result is either a poison // value or has the same sign bit as the first operand. - if (NSW && KnownZero.isNegative()) + if (NSW && KnownZero.isSignBitSet()) KZResult.setSignBit(); return KZResult; }; auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) { APInt KOResult = KnownOne << ShiftAmt; - if (NSW && KnownOne.isNegative()) + if (NSW && KnownOne.isSignBitSet()) KOResult.setSignBit(); return KOResult; }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::LShr: { // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 - auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { - return KnownZero.lshr(ShiftAmt) | - // High bits known zero. - APInt::getHighBitsSet(BitWidth, ShiftAmt); + auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { + APInt KZResult = KnownZero.lshr(ShiftAmt); + // High bits known zero. + KZResult.setHighBits(ShiftAmt); + return KZResult; }; auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) { return KnownOne.lshr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::AShr: { @@ -1144,23 +1131,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, return KnownOne.ashr(ShiftAmt); }; - computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, - KnownZero2, KnownOne2, Depth, Q, KZF, - KOF); + computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF); break; } case Instruction::Sub: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } case Instruction::Add: { bool NSW = cast(I)->hasNoSignedWrap(); computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } case Instruction::SRem: @@ -1168,37 +1151,33 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, - Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // The low bits of the first operand are unchanged by the srem. - KnownZero = KnownZero2 & LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero & LowBits; + Known.One = Known2.One & LowBits; // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2.isNegative() || ((KnownZero2 & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) + Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2.isNegative() && ((KnownOne2 & LowBits) != 0)) - KnownOne |= ~LowBits; + if (Known2.isNegative() && LowBits.intersects(Known2.One)) + Known.One |= ~LowBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (KnownZero.isNonNegative()) { - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - Q); - // If it's known zero, our sign bit is also zero. - if (LHSKnownZero.isNegative()) - KnownZero.setSignBit(); - } + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + // If it's known zero, our sign bit is also zero. + if (Known2.isNonNegative()) + Known.makeNonNegative(); break; case Instruction::URem: { @@ -1206,23 +1185,22 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, const APInt &RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero |= ~LowBits; - KnownOne &= LowBits; + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + Known.Zero |= ~LowBits; + Known.One &= LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); - - unsigned Leaders = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); - KnownZero.setHighBits(Leaders); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + + unsigned Leaders = + std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); + Known.resetAll(); + Known.Zero.setHighBits(Leaders); break; } @@ -1233,16 +1211,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, Align = Q.DL.getABITypeAlignment(AI->getAllocatedType()); if (Align > 0) - KnownZero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align)); break; } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. - APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, Depth + 1, - Q); - unsigned TrailZ = LocalKnownZero.countTrailingOnes(); + KnownBits LocalKnown(BitWidth); + computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); + unsigned TrailZ = LocalKnown.countMinTrailingZeros(); gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { @@ -1272,15 +1249,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy); - LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - computeKnownBits(Index, LocalKnownZero, LocalKnownOne, Depth + 1, Q); + LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0); + computeKnownBits(Index, LocalKnown, Depth + 1, Q); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + - LocalKnownZero.countTrailingOnes())); + LocalKnown.countMinTrailingZeros())); } } - KnownZero.setLowBits(TrailZ); + Known.Zero.setLowBits(TrailZ); break; } case Instruction::PHI: { @@ -1315,14 +1292,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(R, Known2, Depth + 1, Q); // We need to take the minimum number of known bits - APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q); + KnownBits Known3(Known); + computeKnownBits(L, Known3, Depth + 1, Q); - KnownZero.setLowBits(std::min(KnownZero2.countTrailingOnes(), - KnownZero3.countTrailingOnes())); + Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), + Known3.countMinTrailingZeros())); if (DontImproveNonNegativePhiBits) break; @@ -1339,25 +1316,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // (add non-negative, non-negative) --> non-negative // (add negative, negative) --> negative if (Opcode == Instruction::Add) { - if (KnownZero2.isNegative() && KnownZero3.isNegative()) - KnownZero.setSignBit(); - else if (KnownOne2.isNegative() && KnownOne3.isNegative()) - KnownOne.setSignBit(); + if (Known2.isNonNegative() && Known3.isNonNegative()) + Known.makeNonNegative(); + else if (Known2.isNegative() && Known3.isNegative()) + Known.makeNegative(); } // (sub nsw non-negative, negative) --> non-negative // (sub nsw negative, non-negative) --> negative else if (Opcode == Instruction::Sub && LL == I) { - if (KnownZero2.isNegative() && KnownOne3.isNegative()) - KnownZero.setSignBit(); - else if (KnownOne2.isNegative() && KnownZero3.isNegative()) - KnownOne.setSignBit(); + if (Known2.isNonNegative() && Known3.isNegative()) + Known.makeNonNegative(); + else if (Known2.isNegative() && Known3.isNonNegative()) + Known.makeNegative(); } // (mul nsw non-negative, non-negative) --> non-negative - else if (Opcode == Instruction::Mul && KnownZero2.isNegative() && - KnownZero3.isNegative()) - KnownZero.setSignBit(); + else if (Opcode == Instruction::Mul && Known2.isNonNegative() && + Known3.isNonNegative()) + Known.makeNonNegative(); } break; @@ -1371,27 +1348,26 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. - if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) { // Skip if every incoming value references to ourself. if (dyn_cast_or_null(P->hasConstantValue())) break; - KnownZero.setAllBits(); - KnownOne.setAllBits(); + Known.Zero.setAllBits(); + Known.One.setAllBits(); for (Value *IncValue : P->incoming_values()) { // Skip direct self references. if (IncValue == P) continue; - KnownZero2 = APInt(BitWidth, 0); - KnownOne2 = APInt(BitWidth, 0); + Known2 = KnownBits(BitWidth); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(IncValue, KnownZero2, KnownOne2, MaxDepth - 1, Q); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + computeKnownBits(IncValue, Known2, MaxDepth - 1, Q); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; // If all bits have been ruled out, there's no need to check // more operands. - if (!KnownZero && !KnownOne) + if (!Known.Zero && !Known.One) break; } } @@ -1403,50 +1379,60 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // and then intersect with known bits based on other properties of the // function. if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*MD, Known); if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) { - computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2; - KnownOne |= KnownOne2; + computeKnownBits(RV, Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero; + Known.One |= Known2.One; } if (const IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::bitreverse: - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2.reverseBits(); - KnownOne |= KnownOne2.reverseBits(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero.reverseBits(); + Known.One |= Known2.One.reverseBits(); break; case Intrinsic::bswap: - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero |= KnownZero2.byteSwap(); - KnownOne |= KnownOne2.byteSwap(); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + Known.Zero |= Known2.Zero.byteSwap(); + Known.One |= Known2.One.byteSwap(); + break; + case Intrinsic::ctlz: { + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + // If we have a known 1, its position is our upper bound. + unsigned PossibleLZ = Known2.One.countLeadingZeros(); + // If this call is undefined for 0, the result will be less than 2^n. + if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) + PossibleLZ = std::min(PossibleLZ, BitWidth - 1); + unsigned LowBits = Log2_32(PossibleLZ)+1; + Known.Zero.setBitsFrom(LowBits); break; - case Intrinsic::ctlz: + } case Intrinsic::cttz: { - unsigned LowBits = Log2_32(BitWidth)+1; + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.One.countTrailingZeros(); // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) - LowBits -= 1; - KnownZero.setBitsFrom(LowBits); + PossibleTZ = std::min(PossibleTZ, BitWidth - 1); + unsigned LowBits = Log2_32(PossibleTZ)+1; + Known.Zero.setBitsFrom(LowBits); break; } case Intrinsic::ctpop: { - computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); // We can bound the space the count needs. Also, bits known to be zero // can't contribute to the population. - unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); - unsigned LeadingZeros = - APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); - assert(LeadingZeros <= BitWidth); - KnownZero.setHighBits(LeadingZeros); - KnownOne &= ~KnownZero; + unsigned BitsPossiblySet = Known2.countMaxPopulation(); + unsigned LowBits = Log2_32(BitsPossiblySet)+1; + Known.Zero.setBitsFrom(LowBits); // TODO: we could bound KnownOne using the lower bound on the number // of bits which might be set provided by popcnt KnownOne2. break; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero.setBitsFrom(32); + Known.Zero.setBitsFrom(32); break; } } @@ -1456,7 +1442,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, // tracking the specific element. But at least we might find information // valid for all elements of the vector (for example if vector is sign // extended, shifted, etc). - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); break; case Instruction::ExtractValue: if (IntrinsicInst *II = dyn_cast(I->getOperand(0))) { @@ -1468,20 +1454,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: computeKnownBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + II->getArgOperand(1), false, Known, Known2, + Depth, Q); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: computeKnownBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, Depth, Q); + II->getArgOperand(1), false, Known, Known2, + Depth, Q); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false, - KnownZero, KnownOne, KnownZero2, KnownOne2, Depth, - Q); + Known, Known2, Depth, Q); break; } } @@ -1490,7 +1475,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, } /// Determine which bits of V are known to be either zero or one and return -/// them in the KnownZero/KnownOne bit sets. +/// them. +KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) { + KnownBits Known(getBitWidth(V->getType(), Q.DL)); + computeKnownBits(V, Known, Depth, Q); + return Known; +} + +/// Determine which bits of V are known to be either zero or one and return +/// them in the Known bit set. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing @@ -1504,11 +1497,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero, /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth, const Query &Q) { +void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, + const Query &Q) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); assert((V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarType()->isPointerTy()) && @@ -1516,22 +1509,19 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && (!V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "V, KnownOne and KnownZero should have same BitWidth"); + "V and Known should have same BitWidth"); (void)BitWidth; const APInt *C; if (match(V, m_APInt(C))) { // We know all of the bits for a scalar constant or a splat vector constant! - KnownOne = *C; - KnownZero = ~KnownOne; + Known.One = *C; + Known.Zero = ~Known.One; return; } // Null and aggregate-zero are all-zeros. if (isa(V) || isa(V)) { - KnownOne.clearAllBits(); - KnownZero.setAllBits(); + Known.setAllZero(); return; } // Handle a constant vector by taking the intersection of the known bits of @@ -1539,12 +1529,12 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, if (const ConstantDataSequential *CDS = dyn_cast(V)) { // We know that CDS must be a vector of integers. Take the intersection of // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); + Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { Elt = CDS->getElementAsInteger(i); - KnownZero &= ~Elt; - KnownOne &= Elt; + Known.Zero &= ~Elt; + Known.One &= Elt; } return; } @@ -1552,25 +1542,24 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, if (const auto *CV = dyn_cast(V)) { // We know that CV must be a vector of integers. Take the intersection of // each element. - KnownZero.setAllBits(); KnownOne.setAllBits(); - APInt Elt(KnownZero.getBitWidth(), 0); + Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Element = CV->getAggregateElement(i); auto *ElementCI = dyn_cast_or_null(Element); if (!ElementCI) { - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); + Known.resetAll(); return; } Elt = ElementCI->getValue(); - KnownZero &= ~Elt; - KnownOne &= Elt; + Known.Zero &= ~Elt; + Known.One &= Elt; } return; } // Start out not knowing anything. - KnownZero.clearAllBits(); KnownOne.clearAllBits(); + Known.resetAll(); // We can't imply anything about undefs. if (isa(V)) @@ -1589,44 +1578,27 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, // the bits of its aliasee. if (const GlobalAlias *GA = dyn_cast(V)) { if (!GA->isInterposable()) - computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q); return; } if (const Operator *I = dyn_cast(V)) - computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q); + computeKnownBitsFromOperator(I, Known, Depth, Q); - // Aligned pointers have trailing zeros - refine KnownZero set + // Aligned pointers have trailing zeros - refine Known.Zero set if (V->getType()->isPointerTy()) { unsigned Align = V->getPointerAlignment(Q.DL); if (Align) - KnownZero.setLowBits(countTrailingZeros(Align)); + Known.Zero.setLowBits(countTrailingZeros(Align)); } - // computeKnownBitsFromAssume strictly refines KnownZero and - // KnownOne. Therefore, we run them after computeKnownBitsFromOperator. + // computeKnownBitsFromAssume strictly refines Known. + // Therefore, we run them after computeKnownBitsFromOperator. // Check whether a nearby assume intrinsic can determine some known bits. - computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q); + computeKnownBitsFromAssume(V, Known, Depth, Q); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); -} - -/// Determine whether the sign bit is known to be zero or one. -/// Convenience wrapper around computeKnownBits. -void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne, - unsigned Depth, const Query &Q) { - unsigned BitWidth = getBitWidth(V->getType(), Q.DL); - if (!BitWidth) { - KnownZero = false; - KnownOne = false; - return; - } - APInt ZeroBits(BitWidth, 0); - APInt OneBits(BitWidth, 0); - computeKnownBits(V, ZeroBits, OneBits, Depth, Q); - KnownOne = OneBits.isNegative(); - KnownZero = ZeroBits.isNegative(); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } /// Return true if the given value is known to have exactly one @@ -1649,9 +1621,9 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, if (match(V, m_Shl(m_One(), m_Value()))) return true; - // (signbit) >>l X is clearly a power of two if the one is not shifted off the - // bottom. If it is shifted off the bottom then the result is undefined. - if (match(V, m_LShr(m_SignBit(), m_Value()))) + // (signmask) >>l X is clearly a power of two if the one is not shifted off + // the bottom. If it is shifted off the bottom then the result is undefined. + if (match(V, m_LShr(m_SignMask(), m_Value()))) return true; // The remaining tests are all recursive, so bail out if we hit the limit. @@ -1698,18 +1670,18 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth, return true; unsigned BitWidth = V->getType()->getScalarSizeInBits(); - APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - computeKnownBits(X, LHSZeroBits, LHSOneBits, Depth, Q); + KnownBits LHSBits(BitWidth); + computeKnownBits(X, LHSBits, Depth, Q); - APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - computeKnownBits(Y, RHSZeroBits, RHSOneBits, Depth, Q); + KnownBits RHSBits(BitWidth); + computeKnownBits(Y, RHSBits, Depth, Q); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 - if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2()) + if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2()) // If OrZero isn't set, we cannot give back a zero result. // Make sure either the LHS or RHS has a bit set. - if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue()) + if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue()) return true; } } @@ -1874,16 +1846,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. - if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { + if (match(V, m_Shl(m_Value(X), m_Value(Y)))) { // shl nuw can't remove any non-zero bits. const OverflowingBinaryOperator *BO = cast(V); if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, Depth, Q); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); - if (KnownOne[0]) + KnownBits Known(BitWidth); + computeKnownBits(X, Known, Depth, Q); + if (Known.One[0]) return true; } // shr X, Y != 0 if X is negative. Note that the value of the shift is not @@ -1894,25 +1865,20 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { if (BO->isExact()) return isKnownNonZero(X, Depth, Q); - bool XKnownNonNegative, XKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q); - if (XKnownNegative) + KnownBits Known = computeKnownBits(X, Depth, Q); + if (Known.isNegative()) return true; // If the shifter operand is a constant, and all of the bits shifted // out are known to be zero, and X is known non-zero then at least one // non-zero bit must remain. if (ConstantInt *Shift = dyn_cast(Y)) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); - auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); // Is there a known one in the portion not shifted out? - if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) + if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal) return true; // Are all the bits to be shifted out known zero? - if (KnownZero.countTrailingOnes() >= ShiftVal) + if (Known.countMinTrailingZeros() >= ShiftVal) return isKnownNonZero(X, Depth, Q); } } @@ -1922,40 +1888,34 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { - bool XKnownNonNegative, XKnownNegative; - bool YKnownNonNegative, YKnownNegative; - ComputeSignBit(X, XKnownNonNegative, XKnownNegative, Depth, Q); - ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, Depth, Q); + KnownBits XKnown = computeKnownBits(X, Depth, Q); + KnownBits YKnown = computeKnownBits(Y, Depth, Q); // If X and Y are both non-negative (as signed values) then their sum is not // zero unless both X and Y are zero. - if (XKnownNonNegative && YKnownNonNegative) + if (XKnown.isNonNegative() && YKnown.isNonNegative()) if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q)) return true; // If X and Y are both negative (as signed values) then their sum is not // zero unless both X and Y equal INT_MIN. - if (BitWidth && XKnownNegative && YKnownNegative) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); + if (XKnown.isNegative() && YKnown.isNegative()) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - computeKnownBits(X, KnownZero, KnownOne, Depth, Q); - if ((KnownOne & Mask) != 0) + if (XKnown.One.intersects(Mask)) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - computeKnownBits(Y, KnownZero, KnownOne, Depth, Q); - if ((KnownOne & Mask) != 0) + if (YKnown.One.intersects(Mask)) return true; } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && + if (XKnown.isNonNegative() && isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q)) return true; - if (YKnownNonNegative && + if (YKnown.isNonNegative() && isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q)) return true; } @@ -2001,11 +1961,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { return true; } - if (!BitWidth) return false; - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); - return KnownOne != 0; + KnownBits Known(BitWidth); + computeKnownBits(V, Known, Depth, Q); + return Known.One != 0; } /// Return true if V2 == V1 + X, where X is known non-zero. @@ -2025,7 +1983,7 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) { /// Return true if it is known that V1 != V2. static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { - if (V1->getType()->isVectorTy() || V1 == V2) + if (V1 == V2) return false; if (V1->getType() != V2->getType()) // We can't look through casts yet. @@ -2033,19 +1991,14 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q)) return true; - if (IntegerType *Ty = dyn_cast(V1->getType())) { + if (V1->getType()->isIntOrIntVectorTy()) { // Are any known bits in V1 contradictory to known bits in V2? If V1 // has a known zero where V2 has a known one, they must not be equal. - auto BitWidth = Ty->getBitWidth(); - APInt KnownZero1(BitWidth, 0); - APInt KnownOne1(BitWidth, 0); - computeKnownBits(V1, KnownZero1, KnownOne1, 0, Q); - APInt KnownZero2(BitWidth, 0); - APInt KnownOne2(BitWidth, 0); - computeKnownBits(V2, KnownZero2, KnownOne2, 0, Q); - - auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); - if (OppositeBits.getBoolValue()) + KnownBits Known1 = computeKnownBits(V1, 0, Q); + KnownBits Known2 = computeKnownBits(V2, 0, Q); + + if (Known1.Zero.intersects(Known2.One) || + Known2.Zero.intersects(Known1.One)) return true; } return false; @@ -2062,9 +2015,9 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) { /// for all of the elements in the vector. bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth, const Query &Q) { - APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); - return (KnownZero & Mask) == Mask; + KnownBits Known(Mask.getBitWidth()); + computeKnownBits(V, Known, Depth, Q); + return Mask.isSubsetOf(Known.Zero); } /// For vector constants, loop over the elements and find the constant with the @@ -2242,17 +2195,17 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Special case decrementing a value (ADD X, -1): if (const auto *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(0), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If we are subtracting one from a positive number, there is no carry // out of the result. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp; } @@ -2267,16 +2220,16 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, // Handle NEG. if (const auto *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(U->getOperand(1), KnownZero, KnownOne, Depth + 1, Q); + KnownBits Known(TyBits); + computeKnownBits(U->getOperand(1), Known, Depth + 1, Q); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return TyBits; // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -2328,19 +2281,12 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth, if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits)) return VecSignBits; - APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - computeKnownBits(V, KnownZero, KnownOne, Depth, Q); + KnownBits Known(TyBits); + computeKnownBits(V, Known, Depth, Q); // If we know that the sign bit is either zero or one, determine the number of // identical bits in the top of the input value. - if (KnownZero.isNegative()) - return std::max(FirstAnswer, KnownZero.countLeadingOnes()); - - if (KnownOne.isNegative()) - return std::max(FirstAnswer, KnownOne.countLeadingOnes()); - - // computeKnownBits gave us no extra information about the top bits. - return FirstAnswer; + return std::max(FirstAnswer, Known.countMinSignBits()); } /// This function computes the integer multiple of Base that equals V. @@ -2390,6 +2336,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, case Instruction::SExt: if (!LookThroughSExt) return false; // otherwise fall through to ZExt + LLVM_FALLTHROUGH; case Instruction::ZExt: return ComputeMultiple(I->getOperand(0), Base, Multiple, LookThroughSExt, Depth+1); @@ -3019,14 +2966,16 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, return Ptr; } -bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) { +bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP, + unsigned CharSize) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; - // Make sure the index-ee is a pointer to array of i8. + // Make sure the index-ee is a pointer to array of \p CharSize integers. + // CharSize. ArrayType *AT = dyn_cast(GEP->getSourceElementType()); - if (!AT || !AT->getElementType()->isIntegerTy(8)) + if (!AT || !AT->getElementType()->isIntegerTy(CharSize)) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -3038,11 +2987,9 @@ bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP) { return true; } -/// This function computes the length of a null-terminated C string pointed to -/// by V. If successful, it returns true and returns the string in Str. -/// If unsuccessful, it returns false. -bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, - uint64_t Offset, bool TrimAtNul) { +bool llvm::getConstantDataArrayInfo(const Value *V, + ConstantDataArraySlice &Slice, + unsigned ElementSize, uint64_t Offset) { assert(V); // Look through bitcast instructions and geps. @@ -3053,7 +3000,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (const GEPOperator *GEP = dyn_cast(V)) { // The GEP operator should be based on a pointer to string constant, and is // indexing into the string constant. - if (!isGEPBasedOnPointerToString(GEP)) + if (!isGEPBasedOnPointerToString(GEP, ElementSize)) return false; // If the second index isn't a ConstantInt, then this is a variable index @@ -3064,8 +3011,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, StartIdx = CI->getZExtValue(); else return false; - return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx + Offset, - TrimAtNul); + return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize, + StartIdx + Offset); } // The GEP instruction, constant or instruction, must reference a global @@ -3075,30 +3022,72 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; - // Handle the all-zeros case. + const ConstantDataArray *Array; + ArrayType *ArrayTy; if (GV->getInitializer()->isNullValue()) { - // This is a degenerate case. The initializer is constant zero so the - // length of the string must be zero. - Str = ""; - return true; + Type *GVTy = GV->getValueType(); + if ( (ArrayTy = dyn_cast(GVTy)) ) { + // A zeroinitializer for the array; there is no ConstantDataArray. + Array = nullptr; + } else { + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy); + uint64_t Length = SizeInBytes / (ElementSize / 8); + if (Length <= Offset) + return false; + + Slice.Array = nullptr; + Slice.Offset = 0; + Slice.Length = Length - Offset; + return true; + } + } else { + // This must be a ConstantDataArray. + Array = dyn_cast(GV->getInitializer()); + if (!Array) + return false; + ArrayTy = Array->getType(); } + if (!ArrayTy->getElementType()->isIntegerTy(ElementSize)) + return false; - // This must be a ConstantDataArray. - const auto *Array = dyn_cast(GV->getInitializer()); - if (!Array || !Array->isString()) + uint64_t NumElts = ArrayTy->getArrayNumElements(); + if (Offset > NumElts) return false; - // Get the number of elements in the array. - uint64_t NumElts = Array->getType()->getArrayNumElements(); + Slice.Array = Array; + Slice.Offset = Offset; + Slice.Length = NumElts - Offset; + return true; +} - // Start out with the entire array in the StringRef. - Str = Array->getAsString(); +/// This function computes the length of a null-terminated C string pointed to +/// by V. If successful, it returns true and returns the string in Str. +/// If unsuccessful, it returns false. +bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, + uint64_t Offset, bool TrimAtNul) { + ConstantDataArraySlice Slice; + if (!getConstantDataArrayInfo(V, Slice, 8, Offset)) + return false; - if (Offset > NumElts) + if (Slice.Array == nullptr) { + if (TrimAtNul) { + Str = StringRef(); + return true; + } + if (Slice.Length == 1) { + Str = StringRef("", 1); + return true; + } + // We cannot instantiate a StringRef as we do not have an appropriate string + // of 0s at hand. return false; + } + // Start out with the entire array in the StringRef. + Str = Slice.Array->getAsString(); // Skip over 'offset' bytes. - Str = Str.substr(Offset); + Str = Str.substr(Slice.Offset); if (TrimAtNul) { // Trim off the \0 and anything after it. If the array is not nul @@ -3116,7 +3105,8 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLengthH(const Value *V, - SmallPtrSetImpl &PHIs) { + SmallPtrSetImpl &PHIs, + unsigned CharSize) { // Look through noop bitcast instructions. V = V->stripPointerCasts(); @@ -3129,7 +3119,7 @@ static uint64_t GetStringLengthH(const Value *V, // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; for (Value *IncValue : PN->incoming_values()) { - uint64_t Len = GetStringLengthH(IncValue, PHIs); + uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize); if (Len == 0) return 0; // Unknown length -> unknown. if (Len == ~0ULL) continue; @@ -3145,9 +3135,9 @@ static uint64_t GetStringLengthH(const Value *V, // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) if (const SelectInst *SI = dyn_cast(V)) { - uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); + uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize); if (Len1 == 0) return 0; - uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs); + uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize); if (Len2 == 0) return 0; if (Len1 == ~0ULL) return Len2; if (Len2 == ~0ULL) return Len1; @@ -3156,20 +3146,30 @@ static uint64_t GetStringLengthH(const Value *V, } // Otherwise, see if we can read the string. - StringRef StrData; - if (!getConstantStringInfo(V, StrData)) + ConstantDataArraySlice Slice; + if (!getConstantDataArrayInfo(V, Slice, CharSize)) return 0; - return StrData.size()+1; + if (Slice.Array == nullptr) + return 1; + + // Search for nul characters + unsigned NullIndex = 0; + for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) { + if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0) + break; + } + + return NullIndex + 1; } /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. -uint64_t llvm::GetStringLength(const Value *V) { +uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { if (!V->getType()->isPointerTy()) return 0; SmallPtrSet PHIs; - uint64_t Len = GetStringLengthH(V, PHIs); + uint64_t Len = GetStringLengthH(V, PHIs, CharSize); // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return // an empty string as a length. return Len == ~0ULL ? 1 : Len; @@ -3216,6 +3216,9 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, if (GA->isInterposable()) return V; V = GA->getAliasee(); + } else if (isa(V)) { + // An alloca can't be further simplified. + return V; } else { if (auto CS = CallSite(V)) if (Value *RV = CS.getReturnedArgOperand()) { @@ -3226,7 +3229,7 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast(V)) // TODO: Acquire a DominatorTree and AssumptionCache and use them. - if (Value *Simplified = SimplifyInstruction(I, DL, nullptr)) { + if (Value *Simplified = SimplifyInstruction(I, {DL, I})) { V = Simplified; continue; } @@ -3347,63 +3350,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, LI->getAlignment(), DL, CtxI, DT); } case Instruction::Call: { - if (const IntrinsicInst *II = dyn_cast(Inst)) { - switch (II->getIntrinsicID()) { - // These synthetic intrinsics have no side-effects and just mark - // information about their operands. - // FIXME: There are other no-op synthetic instructions that potentially - // should be considered at least *safe* to speculate... - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - return true; + auto *CI = cast(Inst); + const Function *Callee = CI->getCalledFunction(); - case Intrinsic::bitreverse: - case Intrinsic::bswap: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::objectsize: - case Intrinsic::sadd_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::usub_with_overflow: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions except for setting errno. - case Intrinsic::sqrt: - case Intrinsic::fma: - case Intrinsic::fmuladd: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, and the corresponding libm functions never set errno. - case Intrinsic::trunc: - case Intrinsic::copysign: - case Intrinsic::fabs: - case Intrinsic::minnum: - case Intrinsic::maxnum: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, which never overflow when operating on the IEEE754 types - // that we support, and never set errno otherwise. - case Intrinsic::ceil: - case Intrinsic::floor: - case Intrinsic::nearbyint: - case Intrinsic::rint: - case Intrinsic::round: - return true; - // These intrinsics do not correspond to any libm function, and - // do not set errno. - case Intrinsic::powi: - return true; - // TODO: are convert_{from,to}_fp16 safe? - // TODO: can we list target-specific intrinsics here? - default: break; - } - } - return false; // The called function could have undefined behavior or - // side-effects, even if marked readnone nounwind. + // The called function could have undefined behavior or side-effects, even + // if marked readnone nounwind. + return Callee && Callee->isSpeculatable(); } case Instruction::VAArg: case Instruction::Alloca: @@ -3540,38 +3492,34 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS, // we can guarantee that the result does not overflow. // Ref: "Hacker's Delight" by Henry Warren unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI, - DT); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI, - DT); + KnownBits LHSKnown(BitWidth); + KnownBits RHSKnown(BitWidth); + computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); + computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT); // Note that underestimating the number of zero bits gives a more // conservative answer. - unsigned ZeroBits = LHSKnownZero.countLeadingOnes() + - RHSKnownZero.countLeadingOnes(); + unsigned ZeroBits = LHSKnown.countMinLeadingZeros() + + RHSKnown.countMinLeadingZeros(); // First handle the easy case: if we have enough zero bits there's // definitely no overflow. if (ZeroBits >= BitWidth) return OverflowResult::NeverOverflows; // Get the largest possible values for each operand. - APInt LHSMax = ~LHSKnownZero; - APInt RHSMax = ~RHSKnownZero; + APInt LHSMax = ~LHSKnown.Zero; + APInt RHSMax = ~RHSKnown.Zero; // We know the multiply operation doesn't overflow if the maximum values for // each operand will not overflow after we multiply them together. bool MaxOverflow; - LHSMax.umul_ov(RHSMax, MaxOverflow); + (void)LHSMax.umul_ov(RHSMax, MaxOverflow); if (!MaxOverflow) return OverflowResult::NeverOverflows; // We know it always overflows if multiplying the smallest possible values for // the operands also results in overflow. bool MinOverflow; - LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow); + (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow); if (MinOverflow) return OverflowResult::AlwaysOverflows; @@ -3584,21 +3532,17 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - bool LHSKnownNonNegative, LHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); - if (LHSKnownNonNegative || LHSKnownNegative) { - bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); - - if (LHSKnownNegative && RHSKnownNegative) { + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); + if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) { + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); + + if (LHSKnown.isNegative() && RHSKnown.isNegative()) { // The sign bit is set in both cases: this MUST overflow. // Create a simple add instruction, and insert it into the struct. return OverflowResult::AlwaysOverflows; } - if (LHSKnownNonNegative && RHSKnownNonNegative) { + if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) { // The sign bit is clear in both cases: this CANNOT overflow. // Create a simple add instruction, and insert it into the struct. return OverflowResult::NeverOverflows; @@ -3608,6 +3552,51 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS, return OverflowResult::MayOverflow; } +/// \brief Return true if we can prove that adding the two values of the +/// knownbits will not overflow. +/// Otherwise return false. +static bool checkRippleForSignedAdd(const KnownBits &LHSKnown, + const KnownBits &RHSKnown) { + // Addition of two 2's complement numbers having opposite signs will never + // overflow. + if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNegative())) + return true; + + // If either of the values is known to be non-negative, adding them can only + // overflow if the second is also non-negative, so we can assume that. + // Two non-negative numbers will only overflow if there is a carry to the + // sign bit, so we can check if even when the values are as big as possible + // there is no overflow to the sign bit. + if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) { + APInt MaxLHS = ~LHSKnown.Zero; + MaxLHS.clearSignBit(); + APInt MaxRHS = ~RHSKnown.Zero; + MaxRHS.clearSignBit(); + APInt Result = std::move(MaxLHS) + std::move(MaxRHS); + return Result.isSignBitClear(); + } + + // If either of the values is known to be negative, adding them can only + // overflow if the second is also negative, so we can assume that. + // Two negative number will only overflow if there is no carry to the sign + // bit, so we can check if even when the values are as small as possible + // there is overflow to the sign bit. + if (LHSKnown.isNegative() || RHSKnown.isNegative()) { + APInt MinLHS = LHSKnown.One; + MinLHS.clearSignBit(); + APInt MinRHS = RHSKnown.One; + MinRHS.clearSignBit(); + APInt Result = std::move(MinLHS) + std::move(MinRHS); + return Result.isSignBitSet(); + } + + // If we reached here it means that we know nothing about the sign bits. + // In this case we can't know if there will be an overflow, since by + // changing the sign bits any two values can be made to overflow. + return false; +} + static OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const AddOperator *Add, @@ -3619,18 +3608,29 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, return OverflowResult::NeverOverflows; } - bool LHSKnownNonNegative, LHSKnownNegative; - bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, - AC, CxtI, DT); + // If LHS and RHS each have at least two sign bits, the addition will look + // like + // + // XX..... + + // YY..... + // + // If the carry into the most significant position is 0, X and Y can't both + // be 1 and therefore the carry out of the addition is also 0. + // + // If the carry into the most significant position is 1, X and Y can't both + // be 0 and therefore the carry out of the addition is also 1. + // + // Since the carry into the most significant position is always equal to + // the carry out of the addition, there is no signed overflow. + if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 && + ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1) + return OverflowResult::NeverOverflows; + + KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); - if ((LHSKnownNonNegative && RHSKnownNegative) || - (LHSKnownNegative && RHSKnownNonNegative)) { - // The sign bits are opposite: this CANNOT overflow. + if (checkRippleForSignedAdd(LHSKnown, RHSKnown)) return OverflowResult::NeverOverflows; - } // The remaining code needs Add to be available. Early returns if not so. if (!Add) @@ -3641,14 +3641,13 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS, // @llvm.assume'ed non-negative rather than proved so from analyzing its // operands. bool LHSOrRHSKnownNonNegative = - (LHSKnownNonNegative || RHSKnownNonNegative); - bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative); + (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()); + bool LHSOrRHSKnownNegative = + (LHSKnown.isNegative() || RHSKnown.isNegative()); if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { - bool AddKnownNonNegative, AddKnownNegative; - ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, - /*Depth=*/0, AC, CxtI, DT); - if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || - (AddKnownNegative && LHSOrRHSKnownNegative)) { + KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT); + if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) || + (AddKnown.isNegative() && LHSOrRHSKnownNegative)) { return OverflowResult::NeverOverflows; } } @@ -3868,7 +3867,7 @@ const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { } } -bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { +bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { // We currently only look for uses of poison values within the same basic // block, as that makes it easier to guarantee that the uses will be // executed given that PoisonI is executed. @@ -4290,11 +4289,10 @@ static bool isTruePredicate(CmpInst::Predicate Pred, // If X & C == 0 then (X | C) == X +_{nuw} C if (match(A, m_Or(m_Value(X), m_APInt(CA))) && match(B, m_Or(m_Specific(X), m_APInt(CB)))) { - unsigned BitWidth = CA->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT); + KnownBits Known(CA->getBitWidth()); + computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT); - if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB) + if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) return true; } diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 722f17a8067eeef9c4bd84cc393b13520860e736..0ace8fa382bc7655a7cdd90e151f0b63266ebab8 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -11,18 +11,19 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Constants.h" using namespace llvm; using namespace llvm::PatternMatch; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 49a8ce4bed0b5f0fcf75ce09cc60f80447b871a1..a49276099f194b9de3c5479157155cae21fa0e13 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -601,6 +601,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(hhvm_ccc); KEYWORD(cxx_fast_tlscc); KEYWORD(amdgpu_vs); + KEYWORD(amdgpu_hs); KEYWORD(amdgpu_gs); KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); @@ -648,6 +649,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(returned); KEYWORD(returns_twice); KEYWORD(signext); + KEYWORD(speculatable); KEYWORD(sret); KEYWORD(ssp); KEYWORD(sspreq); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 68d448ed7e066de4e76fce230dc496c9235965f6..9ad31125f4b8c3dd6ff72b83facb2a7fcfa19108 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -15,9 +15,10 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" @@ -41,7 +42,6 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SaveAndRestore.h" @@ -143,28 +143,29 @@ bool LLParser::ValidateEndOfModule() { FnAttrs.removeAttribute(Attribute::Alignment); } - AS = AS.addAttributes( - Context, AttributeList::FunctionIndex, - AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs)); + AS = AS.addAttributes(Context, AttributeList::FunctionIndex, + AttributeSet::get(Context, FnAttrs)); Fn->setAttributes(AS); } else if (CallInst *CI = dyn_cast(V)) { AttributeList AS = CI->getAttributes(); AttrBuilder FnAttrs(AS.getFnAttributes()); AS = AS.removeAttributes(Context, AttributeList::FunctionIndex); FnAttrs.merge(B); - AS = AS.addAttributes( - Context, AttributeList::FunctionIndex, - AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs)); + AS = AS.addAttributes(Context, AttributeList::FunctionIndex, + AttributeSet::get(Context, FnAttrs)); CI->setAttributes(AS); } else if (InvokeInst *II = dyn_cast(V)) { AttributeList AS = II->getAttributes(); AttrBuilder FnAttrs(AS.getFnAttributes()); AS = AS.removeAttributes(Context, AttributeList::FunctionIndex); FnAttrs.merge(B); - AS = AS.addAttributes( - Context, AttributeList::FunctionIndex, - AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs)); + AS = AS.addAttributes(Context, AttributeList::FunctionIndex, + AttributeSet::get(Context, FnAttrs)); II->setAttributes(AS); + } else if (auto *GV = dyn_cast(V)) { + AttrBuilder Attrs(GV->getAttributes()); + Attrs.merge(B); + GV->setAttributes(AttributeSet::get(Context,Attrs)); } else { llvm_unreachable("invalid object with forward attribute group reference"); } @@ -835,10 +836,10 @@ bool LLParser::parseIndirectSymbol( /// ParseGlobal /// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalDLLStorageClass /// OptionalThreadLocal OptionalUnnamedAddr OptionalAddrSpace -/// OptionalExternallyInitialized GlobalType Type Const +/// OptionalExternallyInitialized GlobalType Type Const OptionalAttrs /// ::= OptionalLinkage OptionalVisibility OptionalDLLStorageClass /// OptionalThreadLocal OptionalUnnamedAddr OptionalAddrSpace -/// OptionalExternallyInitialized GlobalType Type Const +/// OptionalExternallyInitialized GlobalType Type Const OptionalAttrs /// /// Everything up to and including OptionalUnnamedAddr has been parsed /// already. @@ -953,6 +954,16 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, } } + AttrBuilder Attrs; + LocTy BuiltinLoc; + std::vector FwdRefAttrGrps; + if (ParseFnAttributeValuePairs(Attrs, FwdRefAttrGrps, false, BuiltinLoc)) + return true; + if (Attrs.hasAttributes() || !FwdRefAttrGrps.empty()) { + GV->setAttributes(AttributeSet::get(Context, Attrs)); + ForwardRefAttrGroups[GV] = FwdRefAttrGrps; + } + return false; } @@ -1098,6 +1109,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; + case lltok::kw_speculatable: B.addAttribute(Attribute::Speculatable); break; case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; case lltok::kw_sspstrong: @@ -1670,8 +1682,7 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'hhvm_ccc' /// ::= 'cxx_fast_tlscc' /// ::= 'amdgpu_vs' -/// ::= 'amdgpu_tcs' -/// ::= 'amdgpu_tes' +/// ::= 'amdgpu_hs' /// ::= 'amdgpu_gs' /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' @@ -1713,6 +1724,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break; case lltok::kw_amdgpu_vs: CC = CallingConv::AMDGPU_VS; break; + case lltok::kw_amdgpu_hs: CC = CallingConv::AMDGPU_HS; break; case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break; case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; @@ -2490,7 +2502,7 @@ LLParser::PerFunctionState::~PerFunctionState() { continue; P.second.first->replaceAllUsesWith( UndefValue::get(P.second.first->getType())); - delete P.second.first; + P.second.first->deleteValue(); } for (const auto &P : ForwardRefValIDs) { @@ -2498,7 +2510,7 @@ LLParser::PerFunctionState::~PerFunctionState() { continue; P.second.first->replaceAllUsesWith( UndefValue::get(P.second.first->getType())); - delete P.second.first; + P.second.first->deleteValue(); } } @@ -2630,7 +2642,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, getTypeString(FI->second.first->getType()) + "'"); Sentinel->replaceAllUsesWith(Inst); - delete Sentinel; + Sentinel->deleteValue(); ForwardRefValIDs.erase(FI); } @@ -2647,7 +2659,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, getTypeString(FI->second.first->getType()) + "'"); Sentinel->replaceAllUsesWith(Inst); - delete Sentinel; + Sentinel->deleteValue(); ForwardRefVals.erase(FI); } @@ -4074,7 +4086,7 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { /// virtuality: DW_VIRTUALTIY_pure_virtual, /// virtualIndex: 10, thisAdjustment: 4, flags: 11, /// isOptimized: false, templateParams: !4, declaration: !5, -/// variables: !6) +/// variables: !6, thrownTypes: !7) bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { auto Loc = Lex.getLoc(); #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ @@ -4096,7 +4108,8 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { OPTIONAL(unit, MDField, ); \ OPTIONAL(templateParams, MDField, ); \ OPTIONAL(declaration, MDField, ); \ - OPTIONAL(variables, MDField, ); + OPTIONAL(variables, MDField, ); \ + OPTIONAL(thrownTypes, MDField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS @@ -4106,12 +4119,12 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) { "missing 'distinct', required for !DISubprogram when 'isDefinition'"); Result = GET_OR_DISTINCT( - DISubprogram, (Context, scope.Val, name.Val, linkageName.Val, file.Val, - line.Val, type.Val, isLocal.Val, isDefinition.Val, - scopeLine.Val, containingType.Val, virtuality.Val, - virtualIndex.Val, thisAdjustment.Val, flags.Val, - isOptimized.Val, unit.Val, templateParams.Val, - declaration.Val, variables.Val)); + DISubprogram, + (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val, + type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val, + containingType.Val, virtuality.Val, virtualIndex.Val, thisAdjustment.Val, + flags.Val, isOptimized.Val, unit.Val, templateParams.Val, + declaration.Val, variables.Val, thrownTypes.Val)); return false; } @@ -4151,15 +4164,13 @@ bool LLParser::ParseDILexicalBlockFile(MDNode *&Result, bool IsDistinct) { bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) { #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ REQUIRED(scope, MDField, ); \ - OPTIONAL(file, MDField, ); \ OPTIONAL(name, MDStringField, ); \ - OPTIONAL(line, LineField, ); \ OPTIONAL(exportSymbols, MDBoolField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS Result = GET_OR_DISTINCT(DINamespace, - (Context, scope.Val, file.Val, name.Val, line.Val, exportSymbols.Val)); + (Context, scope.Val, name.Val, exportSymbols.Val)); return false; } @@ -4764,16 +4775,14 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { std::vector ParamTypeList; SmallVector Attrs; - Attrs.push_back(AttributeSet::get(Context, RetAttrs)); - for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { ParamTypeList.push_back(ArgList[i].Ty); Attrs.push_back(ArgList[i].Attrs); } - Attrs.push_back(AttributeSet::get(Context, FuncAttrs)); - - AttributeList PAL = AttributeList::get(Context, Attrs); + AttributeList PAL = + AttributeList::get(Context, AttributeSet::get(Context, FuncAttrs), + AttributeSet::get(Context, RetAttrs), Attrs); if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy()) return Error(RetTypeLoc, "functions with 'sret' argument must return void"); @@ -5383,10 +5392,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return true; // Set up the Attribute for the function. - SmallVector Attrs; - Attrs.push_back(AttributeSet::get(Context, RetAttrs)); - - SmallVector Args; + SmallVector Args; + SmallVector ArgAttrs; // Loop through FunctionType's arguments and ensure they are specified // correctly. Also, gather any parameter attributes. @@ -5404,7 +5411,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - Attrs.push_back(ArgList[i].Attrs); + ArgAttrs.push_back(ArgList[i].Attrs); } if (I != E) @@ -5413,10 +5420,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { if (FnAttrs.hasAlignmentAttr()) return Error(CallLoc, "invoke instructions may not have an alignment"); - Attrs.push_back(AttributeSet::get(Context, FnAttrs)); - // Finish off the Attribute and check them - AttributeList PAL = AttributeList::get(Context, Attrs); + AttributeList PAL = + AttributeList::get(Context, AttributeSet::get(Context, FnAttrs), + AttributeSet::get(Context, RetAttrs), ArgAttrs); InvokeInst *II = InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args, BundleList); @@ -5978,7 +5985,6 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Set up the Attribute for the function. SmallVector Attrs; - Attrs.push_back(AttributeSet::get(Context, RetAttrs)); SmallVector Args; @@ -6007,10 +6013,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, if (FnAttrs.hasAlignmentAttr()) return Error(CallLoc, "call instructions may not have an alignment"); - Attrs.push_back(AttributeSet::get(Context, FnAttrs)); - // Finish off the Attribute and check them - AttributeList PAL = AttributeList::get(Context, Attrs); + AttributeList PAL = + AttributeList::get(Context, AttributeSet::get(Context, FnAttrs), + AttributeSet::get(Context, RetAttrs), Attrs); CallInst *CI = CallInst::Create(Ty, Callee, Args, BundleList); CI->setTailCallKind(TCK); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 33f8e63daa059749f8d99ede31dbe1ea14068a7a..6c8ed7da495d131779cba277319b7a32d7c23e14 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -153,6 +153,7 @@ enum Kind { kw_hhvm_ccc, kw_cxx_fast_tlscc, kw_amdgpu_vs, + kw_amdgpu_hs, kw_amdgpu_gs, kw_amdgpu_ps, kw_amdgpu_cs, @@ -198,6 +199,7 @@ enum Kind { kw_returned, kw_returns_twice, kw_signext, + kw_speculatable, kw_ssp, kw_sspreq, kw_sspstrong, diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt index 3bc31ed910a791e88421f6f67b8f0fe736fda7e5..82dba8c15bb8d1c2da98fe9e76e3221a49445342 100644 --- a/lib/AsmParser/LLVMBuild.txt +++ b/lib/AsmParser/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmParser parent = Libraries -required_libraries = Core Support +required_libraries = BinaryFormat Core Support diff --git a/lib/BinaryFormat/CMakeLists.txt b/lib/BinaryFormat/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb78ea6fdf927a08587d5ae105a7388940225e74 --- /dev/null +++ b/lib/BinaryFormat/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMBinaryFormat + Dwarf.cpp + Magic.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/BinaryFormat + ) + \ No newline at end of file diff --git a/lib/Support/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp similarity index 50% rename from lib/Support/Dwarf.cpp rename to lib/BinaryFormat/Dwarf.cpp index f13da62e4a87cdbb6ea9fa33dfdc6617388d256d..37c4579ef0f89b42d255015f6327c42ccaf5dc99 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/BinaryFormat/Dwarf.cpp @@ -1,4 +1,4 @@ -//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===// +//===-- llvm/BinaryFormat/Dwarf.cpp - Dwarf Framework ------------*- C++-*-===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" @@ -22,24 +22,49 @@ StringRef llvm::dwarf::TagString(unsigned Tag) { switch (Tag) { default: return StringRef(); -#define HANDLE_DW_TAG(ID, NAME) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return "DW_TAG_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getTag(StringRef TagString) { return StringSwitch(TagString) -#define HANDLE_DW_TAG(ID, NAME) .Case("DW_TAG_" #NAME, DW_TAG_##NAME) -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + .Case("DW_TAG_" #NAME, DW_TAG_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Default(DW_TAG_invalid); } +unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) { + switch (Tag) { + default: + return 0; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + case DW_TAG_##NAME: \ + return VERSION; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) { + switch (Tag) { + default: + return 0; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + case DW_TAG_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + StringRef llvm::dwarf::ChildrenString(unsigned Children) { switch (Children) { - case DW_CHILDREN_no: return "DW_CHILDREN_no"; - case DW_CHILDREN_yes: return "DW_CHILDREN_yes"; + case DW_CHILDREN_no: + return "DW_CHILDREN_no"; + case DW_CHILDREN_yes: + return "DW_CHILDREN_yes"; } return StringRef(); } @@ -48,10 +73,32 @@ StringRef llvm::dwarf::AttributeString(unsigned Attribute) { switch (Attribute) { default: return StringRef(); -#define HANDLE_DW_AT(ID, NAME) \ - case DW_AT_##NAME: \ +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ return "DW_AT_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) { + switch (Attribute) { + default: + return 0; +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ + return VERSION; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) { + switch (Attribute) { + default: + return 0; +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -59,10 +106,32 @@ StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_FORM(ID, NAME) \ - case DW_FORM_##NAME: \ +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ return "DW_FORM_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::FormVersion(dwarf::Form Form) { + switch (Form) { + default: + return 0; +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ + return VERSION; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::FormVendor(dwarf::Form Form) { + switch (Form) { + default: + return 0; +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -70,10 +139,10 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_OP(ID, NAME) \ +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return "DW_OP_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" case DW_OP_LLVM_fragment: return "DW_OP_LLVM_fragment"; } @@ -81,48 +150,104 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { return StringSwitch(OperationEncodingString) -#define HANDLE_DW_OP(ID, NAME) .Case("DW_OP_" #NAME, DW_OP_##NAME) -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + .Case("DW_OP_" #NAME, DW_OP_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) .Default(0); } +unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) { + switch (Op) { + default: + return 0; +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + case DW_OP_##NAME: \ + return VERSION; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) { + switch (Op) { + default: + return 0; +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + case DW_OP_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_ATE(ID, NAME) \ +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return "DW_ATE_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) { return StringSwitch(EncodingString) -#define HANDLE_DW_ATE(ID, NAME) .Case("DW_ATE_" #NAME, DW_ATE_##NAME) -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + .Case("DW_ATE_" #NAME, DW_ATE_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } +unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) { + switch (ATE) { + default: + return 0; +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + case DW_ATE_##NAME: \ + return VERSION; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) { + switch (ATE) { + default: + return 0; +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + case DW_ATE_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + StringRef llvm::dwarf::DecimalSignString(unsigned Sign) { switch (Sign) { - case DW_DS_unsigned: return "DW_DS_unsigned"; - case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch"; - case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch"; - case DW_DS_leading_separate: return "DW_DS_leading_separate"; - case DW_DS_trailing_separate: return "DW_DS_trailing_separate"; + case DW_DS_unsigned: + return "DW_DS_unsigned"; + case DW_DS_leading_overpunch: + return "DW_DS_leading_overpunch"; + case DW_DS_trailing_overpunch: + return "DW_DS_trailing_overpunch"; + case DW_DS_leading_separate: + return "DW_DS_leading_separate"; + case DW_DS_trailing_separate: + return "DW_DS_trailing_separate"; } return StringRef(); } StringRef llvm::dwarf::EndianityString(unsigned Endian) { switch (Endian) { - case DW_END_default: return "DW_END_default"; - case DW_END_big: return "DW_END_big"; - case DW_END_little: return "DW_END_little"; - case DW_END_lo_user: return "DW_END_lo_user"; - case DW_END_hi_user: return "DW_END_hi_user"; + case DW_END_default: + return "DW_END_default"; + case DW_END_big: + return "DW_END_big"; + case DW_END_little: + return "DW_END_little"; + case DW_END_lo_user: + return "DW_END_lo_user"; + case DW_END_hi_user: + return "DW_END_hi_user"; } return StringRef(); } @@ -130,18 +255,24 @@ StringRef llvm::dwarf::EndianityString(unsigned Endian) { StringRef llvm::dwarf::AccessibilityString(unsigned Access) { switch (Access) { // Accessibility codes - case DW_ACCESS_public: return "DW_ACCESS_public"; - case DW_ACCESS_protected: return "DW_ACCESS_protected"; - case DW_ACCESS_private: return "DW_ACCESS_private"; + case DW_ACCESS_public: + return "DW_ACCESS_public"; + case DW_ACCESS_protected: + return "DW_ACCESS_protected"; + case DW_ACCESS_private: + return "DW_ACCESS_private"; } return StringRef(); } StringRef llvm::dwarf::VisibilityString(unsigned Visibility) { switch (Visibility) { - case DW_VIS_local: return "DW_VIS_local"; - case DW_VIS_exported: return "DW_VIS_exported"; - case DW_VIS_qualified: return "DW_VIS_qualified"; + case DW_VIS_local: + return "DW_VIS_local"; + case DW_VIS_exported: + return "DW_VIS_exported"; + case DW_VIS_qualified: + return "DW_VIS_qualified"; } return StringRef(); } @@ -153,7 +284,7 @@ StringRef llvm::dwarf::VirtualityString(unsigned Virtuality) { #define HANDLE_DW_VIRTUALITY(ID, NAME) \ case DW_VIRTUALITY_##NAME: \ return "DW_VIRTUALITY_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -161,7 +292,7 @@ unsigned llvm::dwarf::getVirtuality(StringRef VirtualityString) { return StringSwitch(VirtualityString) #define HANDLE_DW_VIRTUALITY(ID, NAME) \ .Case("DW_VIRTUALITY_" #NAME, DW_VIRTUALITY_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(DW_VIRTUALITY_invalid); } @@ -169,26 +300,53 @@ StringRef llvm::dwarf::LanguageString(unsigned Language) { switch (Language) { default: return StringRef(); -#define HANDLE_DW_LANG(ID, NAME) \ +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return "DW_LANG_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getLanguage(StringRef LanguageString) { return StringSwitch(LanguageString) -#define HANDLE_DW_LANG(ID, NAME) .Case("DW_LANG_" #NAME, DW_LANG_##NAME) -#include "llvm/Support/Dwarf.def" +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + .Case("DW_LANG_" #NAME, DW_LANG_##NAME) +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } +unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) { + switch (Lang) { + default: + return 0; +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + case DW_LANG_##NAME: \ + return VERSION; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + +unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) { + switch (Lang) { + default: + return 0; +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + case DW_LANG_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/BinaryFormat/Dwarf.def" + } +} + StringRef llvm::dwarf::CaseString(unsigned Case) { switch (Case) { - case DW_ID_case_sensitive: return "DW_ID_case_sensitive"; - case DW_ID_up_case: return "DW_ID_up_case"; - case DW_ID_down_case: return "DW_ID_down_case"; - case DW_ID_case_insensitive: return "DW_ID_case_insensitive"; + case DW_ID_case_sensitive: + return "DW_ID_case_sensitive"; + case DW_ID_up_case: + return "DW_ID_up_case"; + case DW_ID_down_case: + return "DW_ID_down_case"; + case DW_ID_case_insensitive: + return "DW_ID_case_insensitive"; } return StringRef(); } @@ -197,42 +355,50 @@ StringRef llvm::dwarf::ConventionString(unsigned CC) { switch (CC) { default: return StringRef(); -#define HANDLE_DW_CC(ID, NAME) \ - case DW_CC_##NAME: \ +#define HANDLE_DW_CC(ID, NAME) \ + case DW_CC_##NAME: \ return "DW_CC_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } unsigned llvm::dwarf::getCallingConvention(StringRef CCString) { return StringSwitch(CCString) #define HANDLE_DW_CC(ID, NAME) .Case("DW_CC_" #NAME, DW_CC_##NAME) -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" .Default(0); } StringRef llvm::dwarf::InlineCodeString(unsigned Code) { switch (Code) { - case DW_INL_not_inlined: return "DW_INL_not_inlined"; - case DW_INL_inlined: return "DW_INL_inlined"; - case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined"; - case DW_INL_declared_inlined: return "DW_INL_declared_inlined"; + case DW_INL_not_inlined: + return "DW_INL_not_inlined"; + case DW_INL_inlined: + return "DW_INL_inlined"; + case DW_INL_declared_not_inlined: + return "DW_INL_declared_not_inlined"; + case DW_INL_declared_inlined: + return "DW_INL_declared_inlined"; } return StringRef(); } StringRef llvm::dwarf::ArrayOrderString(unsigned Order) { switch (Order) { - case DW_ORD_row_major: return "DW_ORD_row_major"; - case DW_ORD_col_major: return "DW_ORD_col_major"; + case DW_ORD_row_major: + return "DW_ORD_row_major"; + case DW_ORD_col_major: + return "DW_ORD_col_major"; } return StringRef(); } StringRef llvm::dwarf::DiscriminantString(unsigned Discriminant) { switch (Discriminant) { - case DW_DSC_label: return "DW_DSC_label"; - case DW_DSC_range: return "DW_DSC_range"; + case DW_DSC_label: + return "DW_DSC_label"; + case DW_DSC_range: + return "DW_DSC_range"; } return StringRef(); } @@ -241,10 +407,10 @@ StringRef llvm::dwarf::LNStandardString(unsigned Standard) { switch (Standard) { default: return StringRef(); -#define HANDLE_DW_LNS(ID, NAME) \ - case DW_LNS_##NAME: \ +#define HANDLE_DW_LNS(ID, NAME) \ + case DW_LNS_##NAME: \ return "DW_LNS_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -252,22 +418,28 @@ StringRef llvm::dwarf::LNExtendedString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_LNE(ID, NAME) \ - case DW_LNE_##NAME: \ +#define HANDLE_DW_LNE(ID, NAME) \ + case DW_LNE_##NAME: \ return "DW_LNE_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } StringRef llvm::dwarf::MacinfoString(unsigned Encoding) { switch (Encoding) { // Macinfo Type Encodings - case DW_MACINFO_define: return "DW_MACINFO_define"; - case DW_MACINFO_undef: return "DW_MACINFO_undef"; - case DW_MACINFO_start_file: return "DW_MACINFO_start_file"; - case DW_MACINFO_end_file: return "DW_MACINFO_end_file"; - case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext"; - case DW_MACINFO_invalid: return "DW_MACINFO_invalid"; + case DW_MACINFO_define: + return "DW_MACINFO_define"; + case DW_MACINFO_undef: + return "DW_MACINFO_undef"; + case DW_MACINFO_start_file: + return "DW_MACINFO_start_file"; + case DW_MACINFO_end_file: + return "DW_MACINFO_end_file"; + case DW_MACINFO_vendor_ext: + return "DW_MACINFO_vendor_ext"; + case DW_MACINFO_invalid: + return "DW_MACINFO_invalid"; } return StringRef(); } @@ -286,10 +458,10 @@ StringRef llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_CFA(ID, NAME) \ - case DW_CFA_##NAME: \ +#define HANDLE_DW_CFA(ID, NAME) \ + case DW_CFA_##NAME: \ return "DW_CFA_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -297,10 +469,10 @@ StringRef llvm::dwarf::ApplePropertyString(unsigned Prop) { switch (Prop) { default: return StringRef(); -#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) \ - case DW_APPLE_PROPERTY_##NAME: \ +#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) \ + case DW_APPLE_PROPERTY_##NAME: \ return "DW_APPLE_PROPERTY_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -311,7 +483,7 @@ StringRef llvm::dwarf::UnitTypeString(unsigned UT) { #define HANDLE_DW_UT(ID, NAME) \ case DW_UT_##NAME: \ return "DW_UT_" #NAME; -#include "llvm/Support/Dwarf.def" +#include "llvm/BinaryFormat/Dwarf.def" } } @@ -394,3 +566,12 @@ StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) { return StringRef(); } + +bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version, + bool ExtensionsOk) { + if (FormVendor(F) == DWARF_VENDOR_DWARF) { + unsigned FV = FormVersion(F); + return FV > 0 && FV <= Version; + } + return ExtensionsOk; +} diff --git a/lib/BinaryFormat/LLVMBuild.txt b/lib/BinaryFormat/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..d7d4dcb5f23d38d7cb2e2bd2ce4ab6637bb8a649 --- /dev/null +++ b/lib/BinaryFormat/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/BinaryFormat/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = BinaryFormat +parent = Libraries +required_libraries = Support diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ca4d93f99d92db9f97cc61a0b4b961d2abcc72c5 --- /dev/null +++ b/lib/BinaryFormat/Magic.cpp @@ -0,0 +1,216 @@ +//===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/Magic.h" + +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileSystem.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include +#else +#include +#endif + +using namespace llvm; +using namespace llvm::support::endian; +using namespace llvm::sys::fs; + +template +static bool startswith(StringRef Magic, const char (&S)[N]) { + return Magic.startswith(StringRef(S, N - 1)); +} + +/// @brief Identify the magic in magic. +file_magic llvm::identify_magic(StringRef Magic) { + if (Magic.size() < 4) + return file_magic::unknown; + switch ((unsigned char)Magic[0]) { + case 0x00: { + // COFF bigobj, CL.exe's LTO object file, or short import library file + if (startswith(Magic, "\0\0\xFF\xFF")) { + size_t MinSize = + offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); + if (Magic.size() < MinSize) + return file_magic::coff_import_library; + + const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); + if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0) + return file_magic::coff_object; + if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0) + return file_magic::coff_cl_gl_object; + return file_magic::coff_import_library; + } + // Windows resource file + if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF")) + return file_magic::windows_resource; + // 0x0000 = COFF unknown machine type + if (Magic[1] == 0) + return file_magic::coff_object; + if (startswith(Magic, "\0asm")) + return file_magic::wasm_object; + break; + } + case 0xDE: // 0x0B17C0DE = BC wraper + if (startswith(Magic, "\xDE\xC0\x17\x0B")) + return file_magic::bitcode; + break; + case 'B': + if (startswith(Magic, "BC\xC0\xDE")) + return file_magic::bitcode; + break; + case '!': + if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) + return file_magic::archive; + break; + + case '\177': + if (startswith(Magic, "\177ELF") && Magic.size() >= 18) { + bool Data2MSB = Magic[5] == 2; + unsigned high = Data2MSB ? 16 : 17; + unsigned low = Data2MSB ? 17 : 16; + if (Magic[high] == 0) { + switch (Magic[low]) { + default: + return file_magic::elf; + case 1: + return file_magic::elf_relocatable; + case 2: + return file_magic::elf_executable; + case 3: + return file_magic::elf_shared_object; + case 4: + return file_magic::elf_core; + } + } + // It's still some type of ELF file. + return file_magic::elf; + } + break; + + case 0xCA: + if (startswith(Magic, "\xCA\xFE\xBA\xBE") || + startswith(Magic, "\xCA\xFE\xBA\xBF")) { + // This is complicated by an overlap with Java class files. + // See the Mach-O section in /usr/share/file/magic for details. + if (Magic.size() >= 8 && Magic[7] < 43) + return file_magic::macho_universal_binary; + } + break; + + // The two magic numbers for mach-o are: + // 0xfeedface - 32-bit mach-o + // 0xfeedfacf - 64-bit mach-o + case 0xFE: + case 0xCE: + case 0xCF: { + uint16_t type = 0; + if (startswith(Magic, "\xFE\xED\xFA\xCE") || + startswith(Magic, "\xFE\xED\xFA\xCF")) { + /* Native endian */ + size_t MinSize; + if (Magic[3] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; + } else if (startswith(Magic, "\xCE\xFA\xED\xFE") || + startswith(Magic, "\xCF\xFA\xED\xFE")) { + /* Reverse endian */ + size_t MinSize; + if (Magic[0] == char(0xCE)) + MinSize = sizeof(MachO::mach_header); + else + MinSize = sizeof(MachO::mach_header_64); + if (Magic.size() >= MinSize) + type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12]; + } + switch (type) { + default: + break; + case 1: + return file_magic::macho_object; + case 2: + return file_magic::macho_executable; + case 3: + return file_magic::macho_fixed_virtual_memory_shared_lib; + case 4: + return file_magic::macho_core; + case 5: + return file_magic::macho_preload_executable; + case 6: + return file_magic::macho_dynamically_linked_shared_lib; + case 7: + return file_magic::macho_dynamic_linker; + case 8: + return file_magic::macho_bundle; + case 9: + return file_magic::macho_dynamically_linked_shared_lib_stub; + case 10: + return file_magic::macho_dsym_companion; + case 11: + return file_magic::macho_kext_bundle; + } + break; + } + case 0xF0: // PowerPC Windows + case 0x83: // Alpha 32-bit + case 0x84: // Alpha 64-bit + case 0x66: // MPS R4000 Windows + case 0x50: // mc68K + case 0x4c: // 80386 Windows + case 0xc4: // ARMNT Windows + if (Magic[1] == 0x01) + return file_magic::coff_object; + LLVM_FALLTHROUGH; + + case 0x90: // PA-RISC Windows + case 0x68: // mc68K Windows + if (Magic[1] == 0x02) + return file_magic::coff_object; + break; + + case 'M': // Possible MS-DOS stub on Windows PE file + if (startswith(Magic, "MZ")) { + uint32_t off = read32le(Magic.data() + 0x3c); + // PE/COFF file, either EXE or DLL. + if (off < Magic.size() && + memcmp(Magic.data() + off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) + return file_magic::pecoff_executable; + } + break; + + case 0x64: // x86-64 Windows. + if (Magic[1] == char(0x86)) + return file_magic::coff_object; + break; + + default: + break; + } + return file_magic::unknown; +} + +std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) { + int FD; + if (std::error_code EC = openFileForRead(Path, FD)) + return EC; + + char Buffer[32]; + int Length = read(FD, Buffer, sizeof(Buffer)); + if (close(FD) != 0 || Length < 0) + return std::error_code(errno, std::generic_category()); + + Result = identify_magic(StringRef(Buffer, Length)); + return std::error_code(); +} diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index fdd8024a9b05425b9893d358186d946f10bf6550..0629c2d326ae99979680db3b6120aa1928adfb02 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -28,8 +28,8 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -40,13 +40,13 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" @@ -372,12 +372,26 @@ Expected readTriple(BitstreamCursor &Stream) { class BitcodeReaderBase { protected: - BitcodeReaderBase(BitstreamCursor Stream) : Stream(std::move(Stream)) { + BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab) + : Stream(std::move(Stream)), Strtab(Strtab) { this->Stream.setBlockInfo(&BlockInfo); } BitstreamBlockInfo BlockInfo; BitstreamCursor Stream; + StringRef Strtab; + + /// In version 2 of the bitcode we store names of global values and comdats in + /// a string table rather than in the VST. + bool UseStrtab = false; + + Expected parseVersionRecord(ArrayRef Record); + + /// If this module uses a string table, pop the reference to the string table + /// and return the referenced string and the rest of the record. Otherwise + /// just return the record itself. + std::pair> + readNameFromStrtab(ArrayRef Record); bool readBlockInfo(); @@ -395,6 +409,27 @@ Error BitcodeReaderBase::error(const Twine &Message) { return ::error(FullMsg); } +Expected +BitcodeReaderBase::parseVersionRecord(ArrayRef Record) { + if (Record.size() < 1) + return error("Invalid record"); + unsigned ModuleVersion = Record[0]; + if (ModuleVersion > 2) + return error("Invalid value"); + UseStrtab = ModuleVersion >= 2; + return ModuleVersion; +} + +std::pair> +BitcodeReaderBase::readNameFromStrtab(ArrayRef Record) { + if (!UseStrtab) + return {"", Record}; + // Invalid reference. Let the caller complain about the record being empty. + if (Record[0] + Record[1] > Strtab.size()) + return {"", {}}; + return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)}; +} + class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { LLVMContext &Context; Module *TheModule = nullptr; @@ -405,6 +440,9 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { bool SeenValueSymbolTable = false; uint64_t VSTOffset = 0; + std::vector SectionTable; + std::vector GCTable; + std::vector TypeList; BitcodeReaderValueList ValueList; Optional MDLoader; @@ -477,8 +515,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { std::vector BundleTags; public: - BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification, - LLVMContext &Context); + BitcodeReader(BitstreamCursor Stream, StringRef Strtab, + StringRef ProducerIdentification, LLVMContext &Context); Error materializeForwardReferencedFunctions(); @@ -598,6 +636,13 @@ private: Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false); + + Error parseComdatRecord(ArrayRef Record); + Error parseGlobalVarRecord(ArrayRef Record); + Error parseFunctionRecord(ArrayRef Record); + Error parseGlobalIndirectSymbolRecord(unsigned BitCode, + ArrayRef Record); + Error parseAttributeBlock(); Error parseAttributeGroupBlock(); Error parseTypeTable(); @@ -606,7 +651,10 @@ private: Expected recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT); + void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F, + ArrayRef Record); Error parseValueSymbolTable(uint64_t Offset = 0); + Error parseGlobalValueSymbolTable(); Error parseConstants(); Error rememberAndSkipFunctionBodies(); Error rememberAndSkipFunctionBody(); @@ -639,15 +687,16 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { /// Used to enable on-demand parsing of the VST. uint64_t VSTOffset = 0; - // Map to save ValueId to GUID association that was recorded in the + // Map to save ValueId to ValueInfo association that was recorded in the // ValueSymbolTable. It is used after the VST is parsed to convert // call graph edges read from the function summary from referencing - // callees by their ValueId to using the GUID instead, which is how + // callees by their ValueId to using the ValueInfo instead, which is how // they are recorded in the summary index being built. - // We save a second GUID which is the same as the first one, but ignoring the - // linkage, i.e. for value other than local linkage they are identical. - DenseMap> - ValueIdToCallGraphGUIDMap; + // We save a GUID which refers to the same global as the ValueInfo, but + // ignoring the linkage, i.e. for values other than local linkage they are + // identical. + DenseMap> + ValueIdToValueInfoMap; /// Map populated during module path string table parsing, from the /// module ID to a string reference owned by the index's module @@ -658,13 +707,25 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { /// Original source file name recorded in a bitcode record. std::string SourceFileName; + /// The string identifier given to this module by the client, normally the + /// path to the bitcode file. + StringRef ModulePath; + + /// For per-module summary indexes, the unique numerical identifier given to + /// this module by the client. + unsigned ModuleId; + public: - ModuleSummaryIndexBitcodeReader( - BitstreamCursor Stream, ModuleSummaryIndex &TheIndex); + ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab, + ModuleSummaryIndex &TheIndex, + StringRef ModulePath, unsigned ModuleId); - Error parseModule(StringRef ModulePath); + Error parseModule(); private: + void setValueGUID(uint64_t ValueID, StringRef ValueName, + GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName); Error parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap); @@ -672,11 +733,13 @@ private: std::vector makeCallList(ArrayRef Record, bool IsOldProfileFormat, bool HasProfile); - Error parseEntireSummary(StringRef ModulePath); + Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); - std::pair - getGUIDFromValueId(unsigned ValueId); + std::pair + getValueInfoFromValueId(unsigned ValueId); + + ModuleSummaryIndex::ModuleInfo *addThisModule(); }; } // end anonymous namespace @@ -694,10 +757,10 @@ std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, return std::error_code(); } -BitcodeReader::BitcodeReader(BitstreamCursor Stream, +BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context) - : BitcodeReaderBase(std::move(Stream)), Context(Context), + : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context), ValueList(Context) { this->ProducerIdentification = ProducerIdentification; } @@ -802,11 +865,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags, auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits RawFlags = RawFlags >> 4; bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3; - // The LiveRoot flag wasn't introduced until version 3. For dead stripping + // The Live flag wasn't introduced until version 3. For dead stripping // to work correctly on earlier versions, we must conservatively treat all // values as live. - bool LiveRoot = (RawFlags & 0x2) || Version < 3; - return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot); + bool Live = (RawFlags & 0x2) || Version < 3; + return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live); } static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { @@ -1068,6 +1131,7 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { case Attribute::SwiftSelf: return 1ULL << 51; case Attribute::SwiftError: return 1ULL << 52; case Attribute::WriteOnly: return 1ULL << 53; + case Attribute::Speculatable: return 1ULL << 54; case Attribute::Dereferenceable: llvm_unreachable("dereferenceable attribute not supported in raw format"); break; @@ -1264,6 +1328,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::ReturnsTwice; case bitc::ATTR_KIND_S_EXT: return Attribute::SExt; + case bitc::ATTR_KIND_SPECULATABLE: + return Attribute::Speculatable; case bitc::ATTR_KIND_STACK_ALIGNMENT: return Attribute::StackAlignment; case bitc::ATTR_KIND_STACK_PROTECT: @@ -1727,6 +1793,54 @@ static uint64_t jumpToValueSymbolTable(uint64_t Offset, return CurrentBit; } +void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, + Function *F, + ArrayRef Record) { + // Note that we subtract 1 here because the offset is relative to one word + // before the start of the identification or module block, which was + // historically always the start of the regular bitcode header. + uint64_t FuncWordOffset = Record[1] - 1; + uint64_t FuncBitOffset = FuncWordOffset * 32; + DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; + // Set the LastFunctionBlockBit to point to the last function block. + // Later when parsing is resumed after function materialization, + // we can simply skip that last function block. + if (FuncBitOffset > LastFunctionBlockBit) + LastFunctionBlockBit = FuncBitOffset; +} + +/// Read a new-style GlobalValue symbol table. +Error BitcodeReader::parseGlobalValueSymbolTable() { + unsigned FuncBitcodeOffsetDelta = + Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; + + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + while (true) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Record: + break; + } + + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + case bitc::VST_CODE_FNENTRY: // [valueid, offset] + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, + cast(ValueList[Record[0]]), Record); + break; + } + } +} + /// Parse the value symbol table at either the current parsing location or /// at the given bit offset if provided. Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { @@ -1734,8 +1848,18 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { // Pass in the Offset to distinguish between calling for the module-level // VST (where we want to jump to the VST offset) and the function-level // VST (where we don't). - if (Offset > 0) + if (Offset > 0) { CurrentBit = jumpToValueSymbolTable(Offset, Stream); + // If this module uses a string table, read this as a module-level VST. + if (UseStrtab) { + if (Error Err = parseGlobalValueSymbolTable()) + return Err; + Stream.JumpToBit(CurrentBit); + return Error::success(); + } + // Otherwise, the VST will be in a similar format to a function-level VST, + // and will contain symbol names. + } // Compute the delta between the bitcode indices in the VST (the word offset // to the word-aligned ENTER_SUBBLOCK for the function block, and that @@ -1796,23 +1920,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { return Err; Value *V = ValOrErr.get(); - auto *F = dyn_cast(V); // Ignore function offsets emitted for aliases of functions in older // versions of LLVM. - if (!F) - break; - - // Note that we subtract 1 here because the offset is relative to one word - // before the start of the identification or module block, which was - // historically always the start of the regular bitcode header. - uint64_t FuncWordOffset = Record[1] - 1; - uint64_t FuncBitOffset = FuncWordOffset * 32; - DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; - // Set the LastFunctionBlockBit to point to the last function block. - // Later when parsing is resumed after function materialization, - // we can simply skip that last function block. - if (FuncBitOffset > LastFunctionBlockBit) - LastFunctionBlockBit = FuncBitOffset; + if (auto *F = dyn_cast(V)) + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record); break; } case bitc::VST_CODE_BBENTRY: { @@ -2497,6 +2608,16 @@ Error BitcodeReader::materializeMetadata() { if (Error Err = MDLoader->parseModuleMetadata()) return Err; } + + // Upgrade "Linker Options" module flag to "llvm.linker.options" module-level + // metadata. + if (Metadata *Val = TheModule->getModuleFlag("Linker Options")) { + NamedMDNode *LinkerOpts = + TheModule->getOrInsertNamedMetadata("llvm.linker.options"); + for (const MDOperand &MDOptions : cast(Val)->operands()) + LinkerOpts->addOperand(cast(MDOptions)); + } + DeferredMetadataInfo.clear(); return Error::success(); } @@ -2535,6 +2656,7 @@ Error BitcodeReader::globalCleanup() { // Look for intrinsic functions which need to be upgraded at some point for (Function &F : *TheModule) { + MDLoader->upgradeDebugIntrinsics(F); Function *NewFn; if (UpgradeIntrinsicFunction(&F, NewFn)) UpgradedIntrinsics[&F] = NewFn; @@ -2603,6 +2725,272 @@ bool BitcodeReaderBase::readBlockInfo() { return false; } +Error BitcodeReader::parseComdatRecord(ArrayRef Record) { + // v1: [selection_kind, name] + // v2: [strtab_offset, strtab_size, selection_kind] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + + if (Record.size() < 1) + return error("Invalid record"); + Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); + std::string OldFormatName; + if (!UseStrtab) { + if (Record.size() < 2) + return error("Invalid record"); + unsigned ComdatNameSize = Record[1]; + OldFormatName.reserve(ComdatNameSize); + for (unsigned i = 0; i != ComdatNameSize; ++i) + OldFormatName += (char)Record[2 + i]; + Name = OldFormatName; + } + Comdat *C = TheModule->getOrInsertComdat(Name); + C->setSelectionKind(SK); + ComdatList.push_back(C); + return Error::success(); +} + +Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { + // v1: [pointer type, isconst, initid, linkage, alignment, section, + // visibility, threadlocal, unnamed_addr, externally_initialized, + // dllstorageclass, comdat, attributes] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + + if (Record.size() < 6) + return error("Invalid record"); + Type *Ty = getTypeByID(Record[0]); + if (!Ty) + return error("Invalid record"); + bool isConstant = Record[1] & 1; + bool explicitType = Record[1] & 2; + unsigned AddressSpace; + if (explicitType) { + AddressSpace = Record[1] >> 2; + } else { + if (!Ty->isPointerTy()) + return error("Invalid type for value"); + AddressSpace = cast(Ty)->getAddressSpace(); + Ty = cast(Ty)->getElementType(); + } + + uint64_t RawLinkage = Record[3]; + GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); + unsigned Alignment; + if (Error Err = parseAlignmentValue(Record[4], Alignment)) + return Err; + std::string Section; + if (Record[5]) { + if (Record[5] - 1 >= SectionTable.size()) + return error("Invalid ID"); + Section = SectionTable[Record[5] - 1]; + } + GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; + // Local linkage must have default visibility. + if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage)) + // FIXME: Change to an error if non-default in 4.0. + Visibility = getDecodedVisibility(Record[6]); + + GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal; + if (Record.size() > 7) + TLM = getDecodedThreadLocalMode(Record[7]); + + GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; + if (Record.size() > 8) + UnnamedAddr = getDecodedUnnamedAddrType(Record[8]); + + bool ExternallyInitialized = false; + if (Record.size() > 9) + ExternallyInitialized = Record[9]; + + GlobalVariable *NewGV = + new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name, + nullptr, TLM, AddressSpace, ExternallyInitialized); + NewGV->setAlignment(Alignment); + if (!Section.empty()) + NewGV->setSection(Section); + NewGV->setVisibility(Visibility); + NewGV->setUnnamedAddr(UnnamedAddr); + + if (Record.size() > 10) + NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10])); + else + upgradeDLLImportExportLinkage(NewGV, RawLinkage); + + ValueList.push_back(NewGV); + + // Remember which value to use for the global initializer. + if (unsigned InitID = Record[2]) + GlobalInits.push_back(std::make_pair(NewGV, InitID - 1)); + + if (Record.size() > 11) { + if (unsigned ComdatID = Record[11]) { + if (ComdatID > ComdatList.size()) + return error("Invalid global variable comdat ID"); + NewGV->setComdat(ComdatList[ComdatID - 1]); + } + } else if (hasImplicitComdat(RawLinkage)) { + NewGV->setComdat(reinterpret_cast(1)); + } + + if (Record.size() > 12) { + auto AS = getAttributes(Record[12]).getFnAttributes(); + NewGV->setAttributes(AS); + } + return Error::success(); +} + +Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { + // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, + // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, + // prefixdata] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + + if (Record.size() < 8) + return error("Invalid record"); + Type *Ty = getTypeByID(Record[0]); + if (!Ty) + return error("Invalid record"); + if (auto *PTy = dyn_cast(Ty)) + Ty = PTy->getElementType(); + auto *FTy = dyn_cast(Ty); + if (!FTy) + return error("Invalid type for value"); + auto CC = static_cast(Record[1]); + if (CC & ~CallingConv::MaxID) + return error("Invalid calling convention ID"); + + Function *Func = + Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule); + + Func->setCallingConv(CC); + bool isProto = Record[2]; + uint64_t RawLinkage = Record[3]; + Func->setLinkage(getDecodedLinkage(RawLinkage)); + Func->setAttributes(getAttributes(Record[4])); + + unsigned Alignment; + if (Error Err = parseAlignmentValue(Record[5], Alignment)) + return Err; + Func->setAlignment(Alignment); + if (Record[6]) { + if (Record[6] - 1 >= SectionTable.size()) + return error("Invalid ID"); + Func->setSection(SectionTable[Record[6] - 1]); + } + // Local linkage must have default visibility. + if (!Func->hasLocalLinkage()) + // FIXME: Change to an error if non-default in 4.0. + Func->setVisibility(getDecodedVisibility(Record[7])); + if (Record.size() > 8 && Record[8]) { + if (Record[8] - 1 >= GCTable.size()) + return error("Invalid ID"); + Func->setGC(GCTable[Record[8] - 1]); + } + GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; + if (Record.size() > 9) + UnnamedAddr = getDecodedUnnamedAddrType(Record[9]); + Func->setUnnamedAddr(UnnamedAddr); + if (Record.size() > 10 && Record[10] != 0) + FunctionPrologues.push_back(std::make_pair(Func, Record[10] - 1)); + + if (Record.size() > 11) + Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11])); + else + upgradeDLLImportExportLinkage(Func, RawLinkage); + + if (Record.size() > 12) { + if (unsigned ComdatID = Record[12]) { + if (ComdatID > ComdatList.size()) + return error("Invalid function comdat ID"); + Func->setComdat(ComdatList[ComdatID - 1]); + } + } else if (hasImplicitComdat(RawLinkage)) { + Func->setComdat(reinterpret_cast(1)); + } + + if (Record.size() > 13 && Record[13] != 0) + FunctionPrefixes.push_back(std::make_pair(Func, Record[13] - 1)); + + if (Record.size() > 14 && Record[14] != 0) + FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1)); + + ValueList.push_back(Func); + + // If this is a function with a body, remember the prototype we are + // creating now, so that we can match up the body with them later. + if (!isProto) { + Func->setIsMaterializable(true); + FunctionsWithBodies.push_back(Func); + DeferredFunctionInfo[Func] = 0; + } + return Error::success(); +} + +Error BitcodeReader::parseGlobalIndirectSymbolRecord( + unsigned BitCode, ArrayRef Record) { + // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST) + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, + // dllstorageclass] (name in VST) + // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage, + // visibility, dllstorageclass] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + + bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD; + if (Record.size() < (3 + (unsigned)NewRecord)) + return error("Invalid record"); + unsigned OpNum = 0; + Type *Ty = getTypeByID(Record[OpNum++]); + if (!Ty) + return error("Invalid record"); + + unsigned AddrSpace; + if (!NewRecord) { + auto *PTy = dyn_cast(Ty); + if (!PTy) + return error("Invalid type for value"); + Ty = PTy->getElementType(); + AddrSpace = PTy->getAddressSpace(); + } else { + AddrSpace = Record[OpNum++]; + } + + auto Val = Record[OpNum++]; + auto Linkage = Record[OpNum++]; + GlobalIndirectSymbol *NewGA; + if (BitCode == bitc::MODULE_CODE_ALIAS || + BitCode == bitc::MODULE_CODE_ALIAS_OLD) + NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, + TheModule); + else + NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, + nullptr, TheModule); + // Old bitcode files didn't have visibility field. + // Local linkage must have default visibility. + if (OpNum != Record.size()) { + auto VisInd = OpNum++; + if (!NewGA->hasLocalLinkage()) + // FIXME: Change to an error if non-default in 4.0. + NewGA->setVisibility(getDecodedVisibility(Record[VisInd])); + } + if (OpNum != Record.size()) + NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++])); + else + upgradeDLLImportExportLinkage(NewGA, Linkage); + if (OpNum != Record.size()) + NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++])); + if (OpNum != Record.size()) + NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++])); + ValueList.push_back(NewGA); + IndirectSymbolInits.push_back(std::make_pair(NewGA, Val)); + return Error::success(); +} + Error BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata) { if (ResumeBit) @@ -2611,8 +2999,6 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, return error("Invalid record"); SmallVector Record; - std::vector SectionTable; - std::vector GCTable; // Read all the records for this module. while (true) { @@ -2758,21 +3144,11 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: break; // Default behavior, ignore unknown content. - case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] - if (Record.size() < 1) - return error("Invalid record"); - // Only version #0 and #1 are supported so far. - unsigned module_version = Record[0]; - switch (module_version) { - default: - return error("Invalid value"); - case 0: - UseRelativeIDs = false; - break; - case 1: - UseRelativeIDs = true; - break; - } + case bitc::MODULE_CODE_VERSION: { + Expected VersionOrErr = parseVersionRecord(Record); + if (!VersionOrErr) + return VersionOrErr.takeError(); + UseRelativeIDs = *VersionOrErr >= 1; break; } case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] @@ -2818,240 +3194,26 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, GCTable.push_back(S); break; } - case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name] - if (Record.size() < 2) - return error("Invalid record"); - Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); - unsigned ComdatNameSize = Record[1]; - std::string ComdatName; - ComdatName.reserve(ComdatNameSize); - for (unsigned i = 0; i != ComdatNameSize; ++i) - ComdatName += (char)Record[2 + i]; - Comdat *C = TheModule->getOrInsertComdat(ComdatName); - C->setSelectionKind(SK); - ComdatList.push_back(C); - break; - } - // GLOBALVAR: [pointer type, isconst, initid, - // linkage, alignment, section, visibility, threadlocal, - // unnamed_addr, externally_initialized, dllstorageclass, - // comdat] + case bitc::MODULE_CODE_COMDAT: { + if (Error Err = parseComdatRecord(Record)) + return Err; + break; + } case bitc::MODULE_CODE_GLOBALVAR: { - if (Record.size() < 6) - return error("Invalid record"); - Type *Ty = getTypeByID(Record[0]); - if (!Ty) - return error("Invalid record"); - bool isConstant = Record[1] & 1; - bool explicitType = Record[1] & 2; - unsigned AddressSpace; - if (explicitType) { - AddressSpace = Record[1] >> 2; - } else { - if (!Ty->isPointerTy()) - return error("Invalid type for value"); - AddressSpace = cast(Ty)->getAddressSpace(); - Ty = cast(Ty)->getElementType(); - } - - uint64_t RawLinkage = Record[3]; - GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); - unsigned Alignment; - if (Error Err = parseAlignmentValue(Record[4], Alignment)) + if (Error Err = parseGlobalVarRecord(Record)) return Err; - std::string Section; - if (Record[5]) { - if (Record[5]-1 >= SectionTable.size()) - return error("Invalid ID"); - Section = SectionTable[Record[5]-1]; - } - GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; - // Local linkage must have default visibility. - if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage)) - // FIXME: Change to an error if non-default in 4.0. - Visibility = getDecodedVisibility(Record[6]); - - GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal; - if (Record.size() > 7) - TLM = getDecodedThreadLocalMode(Record[7]); - - GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; - if (Record.size() > 8) - UnnamedAddr = getDecodedUnnamedAddrType(Record[8]); - - bool ExternallyInitialized = false; - if (Record.size() > 9) - ExternallyInitialized = Record[9]; - - GlobalVariable *NewGV = - new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", nullptr, - TLM, AddressSpace, ExternallyInitialized); - NewGV->setAlignment(Alignment); - if (!Section.empty()) - NewGV->setSection(Section); - NewGV->setVisibility(Visibility); - NewGV->setUnnamedAddr(UnnamedAddr); - - if (Record.size() > 10) - NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10])); - else - upgradeDLLImportExportLinkage(NewGV, RawLinkage); - - ValueList.push_back(NewGV); - - // Remember which value to use for the global initializer. - if (unsigned InitID = Record[2]) - GlobalInits.push_back(std::make_pair(NewGV, InitID-1)); - - if (Record.size() > 11) { - if (unsigned ComdatID = Record[11]) { - if (ComdatID > ComdatList.size()) - return error("Invalid global variable comdat ID"); - NewGV->setComdat(ComdatList[ComdatID - 1]); - } - } else if (hasImplicitComdat(RawLinkage)) { - NewGV->setComdat(reinterpret_cast(1)); - } - break; } - // FUNCTION: [type, callingconv, isproto, linkage, paramattr, - // alignment, section, visibility, gc, unnamed_addr, - // prologuedata, dllstorageclass, comdat, prefixdata] case bitc::MODULE_CODE_FUNCTION: { - if (Record.size() < 8) - return error("Invalid record"); - Type *Ty = getTypeByID(Record[0]); - if (!Ty) - return error("Invalid record"); - if (auto *PTy = dyn_cast(Ty)) - Ty = PTy->getElementType(); - auto *FTy = dyn_cast(Ty); - if (!FTy) - return error("Invalid type for value"); - auto CC = static_cast(Record[1]); - if (CC & ~CallingConv::MaxID) - return error("Invalid calling convention ID"); - - Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, - "", TheModule); - - Func->setCallingConv(CC); - bool isProto = Record[2]; - uint64_t RawLinkage = Record[3]; - Func->setLinkage(getDecodedLinkage(RawLinkage)); - Func->setAttributes(getAttributes(Record[4])); - - unsigned Alignment; - if (Error Err = parseAlignmentValue(Record[5], Alignment)) + if (Error Err = parseFunctionRecord(Record)) return Err; - Func->setAlignment(Alignment); - if (Record[6]) { - if (Record[6]-1 >= SectionTable.size()) - return error("Invalid ID"); - Func->setSection(SectionTable[Record[6]-1]); - } - // Local linkage must have default visibility. - if (!Func->hasLocalLinkage()) - // FIXME: Change to an error if non-default in 4.0. - Func->setVisibility(getDecodedVisibility(Record[7])); - if (Record.size() > 8 && Record[8]) { - if (Record[8]-1 >= GCTable.size()) - return error("Invalid ID"); - Func->setGC(GCTable[Record[8] - 1]); - } - GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; - if (Record.size() > 9) - UnnamedAddr = getDecodedUnnamedAddrType(Record[9]); - Func->setUnnamedAddr(UnnamedAddr); - if (Record.size() > 10 && Record[10] != 0) - FunctionPrologues.push_back(std::make_pair(Func, Record[10]-1)); - - if (Record.size() > 11) - Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11])); - else - upgradeDLLImportExportLinkage(Func, RawLinkage); - - if (Record.size() > 12) { - if (unsigned ComdatID = Record[12]) { - if (ComdatID > ComdatList.size()) - return error("Invalid function comdat ID"); - Func->setComdat(ComdatList[ComdatID - 1]); - } - } else if (hasImplicitComdat(RawLinkage)) { - Func->setComdat(reinterpret_cast(1)); - } - - if (Record.size() > 13 && Record[13] != 0) - FunctionPrefixes.push_back(std::make_pair(Func, Record[13]-1)); - - if (Record.size() > 14 && Record[14] != 0) - FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1)); - - ValueList.push_back(Func); - - // If this is a function with a body, remember the prototype we are - // creating now, so that we can match up the body with them later. - if (!isProto) { - Func->setIsMaterializable(true); - FunctionsWithBodies.push_back(Func); - DeferredFunctionInfo[Func] = 0; - } break; } - // ALIAS: [alias type, addrspace, aliasee val#, linkage] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass] - // IFUNC: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass] case bitc::MODULE_CODE_IFUNC: case bitc::MODULE_CODE_ALIAS: case bitc::MODULE_CODE_ALIAS_OLD: { - bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD; - if (Record.size() < (3 + (unsigned)NewRecord)) - return error("Invalid record"); - unsigned OpNum = 0; - Type *Ty = getTypeByID(Record[OpNum++]); - if (!Ty) - return error("Invalid record"); - - unsigned AddrSpace; - if (!NewRecord) { - auto *PTy = dyn_cast(Ty); - if (!PTy) - return error("Invalid type for value"); - Ty = PTy->getElementType(); - AddrSpace = PTy->getAddressSpace(); - } else { - AddrSpace = Record[OpNum++]; - } - - auto Val = Record[OpNum++]; - auto Linkage = Record[OpNum++]; - GlobalIndirectSymbol *NewGA; - if (BitCode == bitc::MODULE_CODE_ALIAS || - BitCode == bitc::MODULE_CODE_ALIAS_OLD) - NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), - "", TheModule); - else - NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), - "", nullptr, TheModule); - // Old bitcode files didn't have visibility field. - // Local linkage must have default visibility. - if (OpNum != Record.size()) { - auto VisInd = OpNum++; - if (!NewGA->hasLocalLinkage()) - // FIXME: Change to an error if non-default in 4.0. - NewGA->setVisibility(getDecodedVisibility(Record[VisInd])); - } - if (OpNum != Record.size()) - NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++])); - else - upgradeDLLImportExportLinkage(NewGA, Linkage); - if (OpNum != Record.size()) - NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++])); - if (OpNum != Record.size()) - NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++])); - ValueList.push_back(NewGA); - IndirectSymbolInits.push_back(std::make_pair(NewGA, Val)); + if (Error Err = parseGlobalIndirectSymbolRecord(BitCode, Record)) + return Err; break; } /// MODULE_CODE_VSTOFFSET: [offset] @@ -4337,11 +4499,11 @@ Error BitcodeReader::parseFunctionBody(Function *F) { // Add instruction to end of current BB. If there is no current BB, reject // this file. if (!CurBB) { - delete I; + I->deleteValue(); return error("Invalid instruction with no BB"); } if (!OperandBundles.empty()) { - delete I; + I->deleteValue(); return error("Operand bundles found with no consumer"); } CurBB->getInstList().push_back(I); @@ -4534,14 +4696,37 @@ std::vector BitcodeReader::getIdentifiedStructTypes() const { } ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( - BitstreamCursor Cursor, ModuleSummaryIndex &TheIndex) - : BitcodeReaderBase(std::move(Cursor)), TheIndex(TheIndex) {} - -std::pair -ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { - auto VGI = ValueIdToCallGraphGUIDMap.find(ValueId); - assert(VGI != ValueIdToCallGraphGUIDMap.end()); - return VGI->second; + BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex, + StringRef ModulePath, unsigned ModuleId) + : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex), + ModulePath(ModulePath), ModuleId(ModuleId) {} + +ModuleSummaryIndex::ModuleInfo * +ModuleSummaryIndexBitcodeReader::addThisModule() { + return TheIndex.addModule(ModulePath, ModuleId); +} + +std::pair +ModuleSummaryIndexBitcodeReader::getValueInfoFromValueId(unsigned ValueId) { + auto VGI = ValueIdToValueInfoMap[ValueId]; + assert(VGI.first); + return VGI; +} + +void ModuleSummaryIndexBitcodeReader::setValueGUID( + uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName) { + std::string GlobalId = + GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); + auto ValueGUID = GlobalValue::getGUID(GlobalId); + auto OriginalNameID = ValueGUID; + if (GlobalValue::isLocalLinkage(Linkage)) + OriginalNameID = GlobalValue::getGUID(ValueName); + if (PrintSummaryGUIDs) + dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " + << ValueName << "\n"; + ValueIdToValueInfoMap[ValueID] = + std::make_pair(TheIndex.getOrInsertValueInfo(ValueGUID), OriginalNameID); } // Specialized value symbol table parser used when reading module index @@ -4550,6 +4735,10 @@ ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap) { + // With a strtab the VST is not required to parse the summary. + if (UseStrtab) + return Error::success(); + assert(Offset > 0 && "Expected non-zero VST offset"); uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream); @@ -4591,17 +4780,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string GlobalId = - GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); - auto ValueGUID = GlobalValue::getGUID(GlobalId); - auto OriginalNameID = ValueGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(ValueGUID, OriginalNameID); + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4615,18 +4794,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string FunctionGlobalId = GlobalValue::getGlobalIdentifier( - ValueName, VLI->second, SourceFileName); - auto FunctionGUID = GlobalValue::getGUID(FunctionGlobalId); - auto OriginalNameID = FunctionGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << FunctionGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(FunctionGUID, OriginalNameID); - + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4636,7 +4804,8 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( GlobalValue::GUID RefGUID = Record[1]; // The "original name", which is the second value of the pair will be // overriden later by a FS_COMBINED_ORIGINAL_NAME in the combined index. - ValueIdToCallGraphGUIDMap[ValueID] = std::make_pair(RefGUID, RefGUID); + ValueIdToValueInfoMap[ValueID] = + std::make_pair(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID); break; } } @@ -4646,7 +4815,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( // Parse just the blocks needed for building the index out of the module. // At the end of this routine the module Index is populated with a map // from global value id to GlobalValueSummary objects. -Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { +Error ModuleSummaryIndexBitcodeReader::parseModule() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); @@ -4685,6 +4854,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { return error("Invalid record"); break; case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: + case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: assert(!SeenValueSymbolTable && "Already read VST when parsing summary block?"); // We might not have a VST if there were no values in the @@ -4697,7 +4867,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { SeenValueSymbolTable = true; } SeenGlobalValSummary = true; - if (Error Err = parseEntireSummary(ModulePath)) + if (Error Err = parseEntireSummary(Entry.ID)) return Err; break; case bitc::MODULE_STRTAB_BLOCK_ID: @@ -4713,6 +4883,11 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { switch (BitCode) { default: break; // Default behavior, ignore unknown content. + case bitc::MODULE_CODE_VERSION: { + if (Error Err = parseVersionRecord(Record).takeError()) + return Err; + break; + } /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: { SmallString<128> ValueName; @@ -4725,12 +4900,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { case bitc::MODULE_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); - if (TheIndex.modulePaths().empty()) - // We always seed the index with the module. - TheIndex.addModulePath(ModulePath, 0); - if (TheIndex.modulePaths().size() != 1) - return error("Don't expect multiple modules defined?"); - auto &Hash = TheIndex.modulePaths().begin()->second.second; + auto &Hash = addThisModule()->second.second; int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); @@ -4747,17 +4917,26 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { // was historically always the start of the regular bitcode header. VSTOffset = Record[0] - 1; break; - // GLOBALVAR: [pointer type, isconst, initid, linkage, ...] - // FUNCTION: [type, callingconv, isproto, linkage, ...] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v1 GLOBALVAR: [pointer type, isconst, initid, linkage, ...] + // v1 FUNCTION: [type, callingconv, isproto, linkage, ...] + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v2: [strtab offset, strtab size, v1] case bitc::MODULE_CODE_GLOBALVAR: case bitc::MODULE_CODE_FUNCTION: case bitc::MODULE_CODE_ALIAS: { - if (Record.size() <= 3) + StringRef Name; + ArrayRef GVRecord; + std::tie(Name, GVRecord) = readNameFromStrtab(Record); + if (GVRecord.size() <= 3) return error("Invalid record"); - uint64_t RawLinkage = Record[3]; + uint64_t RawLinkage = GVRecord[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); - ValueIdToLinkageMap[ValueId++] = Linkage; + if (!UseStrtab) { + ValueIdToLinkageMap[ValueId++] = Linkage; + break; + } + + setValueGUID(ValueId++, Name, Linkage, SourceFileName); break; } } @@ -4772,7 +4951,7 @@ ModuleSummaryIndexBitcodeReader::makeRefList(ArrayRef Record) { std::vector Ret; Ret.reserve(Record.size()); for (uint64_t RefValueId : Record) - Ret.push_back(getGUIDFromValueId(RefValueId).first); + Ret.push_back(getValueInfoFromValueId(RefValueId).first); return Ret; } @@ -4782,23 +4961,22 @@ std::vector ModuleSummaryIndexBitcodeReader::makeCallLi Ret.reserve(Record.size()); for (unsigned I = 0, E = Record.size(); I != E; ++I) { CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown; - GlobalValue::GUID CalleeGUID = getGUIDFromValueId(Record[I]).first; + ValueInfo Callee = getValueInfoFromValueId(Record[I]).first; if (IsOldProfileFormat) { I += 1; // Skip old callsitecount field if (HasProfile) I += 1; // Skip old profilecount field } else if (HasProfile) Hotness = static_cast(Record[++I]); - Ret.push_back(FunctionSummary::EdgeTy{CalleeGUID, CalleeInfo{Hotness}}); + Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo{Hotness}}); } return Ret; } // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. -Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( - StringRef ModulePath) { - if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID)) +Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { + if (Stream.EnterSubBlock(ID)) return error("Invalid record"); SmallVector Record; @@ -4821,7 +4999,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( // "OriginalName" attachement. GlobalValueSummary *LastSeenSummary = nullptr; GlobalValue::GUID LastSeenGUID = 0; - bool Combined = false; // We can expect to see any number of type ID information records before // each function summary records; these variables store the information @@ -4840,16 +5017,6 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - // For a per-module index, remove any entries that still have empty - // summaries. The VST parsing creates entries eagerly for all symbols, - // but not all have associated summaries (e.g. it doesn't know how to - // distinguish between VST_CODE_ENTRY for function declarations vs global - // variables with initializers that end up with a summary). Remove those - // entries now so that we don't need to rely on the combined index merger - // to clean them up (especially since that may not run for the first - // module's index if we merge into that). - if (!Combined) - TheIndex.removeEmptySummaryEntries(); return Error::success(); case BitstreamEntry::Record: // The interesting case. @@ -4868,6 +5035,13 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( switch (BitCode) { default: // Default behavior: ignore. break; + case bitc::FS_VALUE_GUID: { // [valueid, refguid] + uint64_t ValueID = Record[0]; + GlobalValue::GUID RefGUID = Record[1]; + ValueIdToValueInfoMap[ValueID] = + std::make_pair(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID); + break; + } // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, // n x (valueid)] // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, @@ -4906,10 +5080,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( PendingTypeCheckedLoadVCalls.clear(); PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); - auto GUID = getGUIDFromValueId(ValueID); - FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first()); - FS->setOriginalName(GUID.second); - TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); + auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); + FS->setModulePath(addThisModule()->first()); + FS->setOriginalName(VIAndOriginalGUID.second); + TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS)); break; } // FS_ALIAS: [valueid, flags, valueid] @@ -4927,15 +5101,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. - AS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first()); + AS->setModulePath(addThisModule()->first()); - GlobalValue::GUID AliaseeGUID = getGUIDFromValueId(AliaseeID).first; - auto *AliaseeSummary = TheIndex.getGlobalValueSummary(AliaseeGUID); - if (!AliaseeSummary) + GlobalValue::GUID AliaseeGUID = + getValueInfoFromValueId(AliaseeID).first.getGUID(); + auto AliaseeInModule = + TheIndex.findSummaryInModule(AliaseeGUID, ModulePath); + if (!AliaseeInModule) return error("Alias expects aliasee summary to be parsed"); - AS->setAliasee(AliaseeSummary); + AS->setAliasee(AliaseeInModule); - auto GUID = getGUIDFromValueId(ValueID); + auto GUID = getValueInfoFromValueId(ValueID); AS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(AS)); break; @@ -4948,8 +5124,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( std::vector Refs = makeRefList(ArrayRef(Record).slice(2)); auto FS = llvm::make_unique(Flags, std::move(Refs)); - FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first()); - auto GUID = getGUIDFromValueId(ValueID); + FS->setModulePath(addThisModule()->first()); + auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); break; @@ -4976,7 +5152,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( std::vector Edges = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile); - GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; + ValueInfo VI = getValueInfoFromValueId(ValueID).first; auto FS = llvm::make_unique( Flags, InstCount, std::move(Refs), std::move(Edges), std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls), @@ -4989,10 +5165,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); LastSeenSummary = FS.get(); - LastSeenGUID = GUID; + LastSeenGUID = VI.getGUID(); FS->setModulePath(ModuleIdMap[ModuleId]); - TheIndex.addGlobalValueSummary(GUID, std::move(FS)); - Combined = true; + TheIndex.addGlobalValueSummary(VI, std::move(FS)); break; } // FS_COMBINED_ALIAS: [valueid, modid, flags, valueid] @@ -5008,17 +5183,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( LastSeenSummary = AS.get(); AS->setModulePath(ModuleIdMap[ModuleId]); - auto AliaseeGUID = getGUIDFromValueId(AliaseeValueId).first; + auto AliaseeGUID = + getValueInfoFromValueId(AliaseeValueId).first.getGUID(); auto AliaseeInModule = TheIndex.findSummaryInModule(AliaseeGUID, AS->modulePath()); if (!AliaseeInModule) return error("Alias expects aliasee summary to be parsed"); AS->setAliasee(AliaseeInModule); - GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; - LastSeenGUID = GUID; - TheIndex.addGlobalValueSummary(GUID, std::move(AS)); - Combined = true; + ValueInfo VI = getValueInfoFromValueId(ValueID).first; + LastSeenGUID = VI.getGUID(); + TheIndex.addGlobalValueSummary(VI, std::move(AS)); break; } // FS_COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid] @@ -5032,10 +5207,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( auto FS = llvm::make_unique(Flags, std::move(Refs)); LastSeenSummary = FS.get(); FS->setModulePath(ModuleIdMap[ModuleId]); - GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first; - LastSeenGUID = GUID; - TheIndex.addGlobalValueSummary(GUID, std::move(FS)); - Combined = true; + ValueInfo VI = getValueInfoFromValueId(ValueID).first; + LastSeenGUID = VI.getGUID(); + TheIndex.addGlobalValueSummary(VI, std::move(FS)); break; } // FS_COMBINED_ORIGINAL_NAME: [original_name] @@ -5078,6 +5252,20 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}}); break; } + case bitc::FS_CFI_FUNCTION_DEFS: { + std::set &CfiFunctionDefs = TheIndex.cfiFunctionDefs(); + for (unsigned I = 0; I != Record.size(); I += 2) + CfiFunctionDefs.insert( + {Strtab.data() + Record[I], static_cast(Record[I + 1])}); + break; + } + case bitc::FS_CFI_FUNCTION_DECLS: { + std::set &CfiFunctionDecls = TheIndex.cfiFunctionDecls(); + for (unsigned I = 0; I != Record.size(); I += 2) + CfiFunctionDecls.insert( + {Strtab.data() + Record[I], static_cast(Record[I + 1])}); + break; + } } } llvm_unreachable("Exit infinite loop"); @@ -5092,7 +5280,7 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { SmallVector Record; SmallString<128> ModulePath; - ModulePathStringTableTy::iterator LastSeenModulePath; + ModuleSummaryIndex::ModuleInfo *LastSeenModule = nullptr; while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -5119,8 +5307,8 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { if (convertToString(Record, 1, ModulePath)) return error("Invalid record"); - LastSeenModulePath = TheIndex.addModulePath(ModulePath, ModuleId); - ModuleIdMap[ModuleId] = LastSeenModulePath->first(); + LastSeenModule = TheIndex.addModule(ModulePath, ModuleId); + ModuleIdMap[ModuleId] = LastSeenModule->first(); ModulePath.clear(); break; @@ -5129,15 +5317,15 @@ Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { case bitc::MST_CODE_HASH: { if (Record.size() != 5) return error("Invalid hash length " + Twine(Record.size()).str()); - if (LastSeenModulePath == TheIndex.modulePaths().end()) + if (!LastSeenModule) return error("Invalid hash that does not follow a module path"); int Pos = 0; for (auto &Val : Record) { assert(!(Val >> 32) && "Unexpected high bits set"); - LastSeenModulePath->second.second[Pos++] = Val; + LastSeenModule->second.second[Pos++] = Val; } - // Reset LastSeenModulePath to avoid overriding the hash unexpectedly. - LastSeenModulePath = TheIndex.modulePaths().end(); + // Reset LastSeenModule to avoid overriding the hash unexpectedly. + LastSeenModule = nullptr; break; } } @@ -5172,18 +5360,55 @@ const std::error_category &llvm::BitcodeErrorCategory() { return *ErrorCategory; } +static Expected readStrtab(BitstreamCursor &Stream) { + if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID)) + return error("Invalid record"); + + StringRef Strtab; + while (1) { + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + case BitstreamEntry::EndBlock: + return Strtab; + + case BitstreamEntry::Error: + return error("Malformed block"); + + case BitstreamEntry::SubBlock: + if (Stream.SkipBlock()) + return error("Malformed block"); + break; + + case BitstreamEntry::Record: + StringRef Blob; + SmallVector Record; + if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB) + Strtab = Blob; + break; + } + } +} + //===----------------------------------------------------------------------===// // External interface //===----------------------------------------------------------------------===// Expected> llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { + auto FOrErr = getBitcodeFileContents(Buffer); + if (!FOrErr) + return FOrErr.takeError(); + return std::move(FOrErr->Mods); +} + +Expected +llvm::getBitcodeFileContents(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); BitstreamCursor &Stream = *StreamOrErr; - std::vector Modules; + BitcodeFileContents F; while (true) { uint64_t BCBegin = Stream.getCurrentByteNo(); @@ -5191,7 +5416,7 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to // the end that there cannot possibly be another module, stop looking. if (BCBegin + 8 >= Stream.getBitcodeBytes().size()) - return Modules; + return F; BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { @@ -5217,10 +5442,26 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { if (Stream.SkipBlock()) return error("Malformed block"); - Modules.push_back({Stream.getBitcodeBytes().slice( - BCBegin, Stream.getCurrentByteNo() - BCBegin), - Buffer.getBufferIdentifier(), IdentificationBit, - ModuleBit}); + F.Mods.push_back({Stream.getBitcodeBytes().slice( + BCBegin, Stream.getCurrentByteNo() - BCBegin), + Buffer.getBufferIdentifier(), IdentificationBit, + ModuleBit}); + continue; + } + + if (Entry.ID == bitc::STRTAB_BLOCK_ID) { + Expected Strtab = readStrtab(Stream); + if (!Strtab) + return Strtab.takeError(); + // This string table is used by every preceding bitcode module that does + // not have its own string table. A bitcode file may have multiple + // string tables if it was created by binary concatenation, for example + // with "llvm-cat -b". + for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) { + if (!I->Strtab.empty()) + break; + I->Strtab = *Strtab; + } continue; } @@ -5260,8 +5501,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, } Stream.JumpToBit(ModuleBit); - auto *R = - new BitcodeReader(std::move(Stream), ProducerIdentification, Context); + auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification, + Context); std::unique_ptr M = llvm::make_unique(ModuleIdentifier, Context); @@ -5290,22 +5531,37 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting); } +// Parse the specified bitcode buffer and merge the index into CombinedIndex. +// We don't use ModuleIdentifier here because the client may need to control the +// module path used in the combined summary (e.g. when reading summaries for +// regular LTO modules). +Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex, + StringRef ModulePath, uint64_t ModuleId) { + BitstreamCursor Stream(Buffer); + Stream.JumpToBit(ModuleBit); + + ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex, + ModulePath, ModuleId); + return R.parseModule(); +} + // Parse the specified bitcode buffer, returning the function info index. Expected> BitcodeModule::getSummary() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); auto Index = llvm::make_unique(); - ModuleSummaryIndexBitcodeReader R(std::move(Stream), *Index); + ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index, + ModuleIdentifier, 0); - if (Error Err = R.parseModule(ModuleIdentifier)) + if (Error Err = R.parseModule()) return std::move(Err); return std::move(Index); } // Check if the given bitcode buffer contains a global value summary block. -Expected BitcodeModule::hasSummary() { +Expected BitcodeModule::getLTOInfo() { BitstreamCursor Stream(Buffer); Stream.JumpToBit(ModuleBit); @@ -5319,11 +5575,14 @@ Expected BitcodeModule::hasSummary() { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - return false; + return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false}; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) - return true; + return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true}; + + if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) + return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true}; // Ignore other sub-blocks. if (Stream.SkipBlock()) @@ -5408,6 +5667,16 @@ Expected llvm::getBitcodeProducerString(MemoryBufferRef Buffer) { return readIdentificationCode(*StreamOrErr); } +Error llvm::readModuleSummaryIndex(MemoryBufferRef Buffer, + ModuleSummaryIndex &CombinedIndex, + uint64_t ModuleId) { + Expected BM = getSingleModule(Buffer); + if (!BM) + return BM.takeError(); + + return BM->readSummary(CombinedIndex, BM->getModuleIdentifier(), ModuleId); +} + Expected> llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) { Expected BM = getSingleModule(Buffer); @@ -5417,10 +5686,22 @@ llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) { return BM->getSummary(); } -Expected llvm::hasGlobalValueSummary(MemoryBufferRef Buffer) { +Expected llvm::getBitcodeLTOInfo(MemoryBufferRef Buffer) { Expected BM = getSingleModule(Buffer); if (!BM) return BM.takeError(); - return BM->hasSummary(); + return BM->getLTOInfo(); +} + +Expected> +llvm::getModuleSummaryIndexForFile(StringRef Path, + bool IgnoreEmptyThinLTOIndexFile) { + ErrorOr> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (!FileOrErr) + return errorCodeToError(FileOrErr.getError()); + if (IgnoreEmptyThinLTOIndexFile && !(*FileOrErr)->getBufferSize()) + return nullptr; + return getModuleSummaryIndex(**FileOrErr); } diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp index 274dfe89cce544b5ce58d2ac298c2b8cb06b529f..b1504a8034e0c02d2f299f1424a3e1fe57794bb2 100644 --- a/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/lib/Bitcode/Reader/MetadataLoader.cpp @@ -53,6 +53,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -406,6 +407,11 @@ void PlaceholderQueue::flush(BitcodeReaderMetadataList &MetadataList) { } // anonynous namespace +static Error error(const Twine &Message) { + return make_error( + Message, make_error_code(BitcodeError::CorruptedBitcode)); +} + class MetadataLoader::MetadataLoaderImpl { BitcodeReaderMetadataList MetadataList; BitcodeReaderValueList &ValueList; @@ -452,6 +458,7 @@ class MetadataLoader::MetadataLoaderImpl { bool StripTBAA = false; bool HasSeenOldLoopTags = false; bool NeedUpgradeToDIGlobalVariableExpression = false; + bool NeedDeclareExpressionUpgrade = false; /// True if metadata is being parsed for a module being ThinLTO imported. bool IsImporting = false; @@ -472,8 +479,8 @@ class MetadataLoader::MetadataLoaderImpl { for (auto CU_SP : CUSubprograms) if (auto *SPs = dyn_cast_or_null(CU_SP.second)) for (auto &Op : SPs->operands()) - if (auto *SP = dyn_cast_or_null(Op)) - SP->replaceOperandWith(7, CU_SP.first); + if (auto *SP = dyn_cast_or_null(Op)) + SP->replaceUnit(CU_SP.first); CUSubprograms.clear(); } @@ -498,7 +505,7 @@ class MetadataLoader::MetadataLoaderImpl { // Upgrade variables attached to globals. for (auto &GV : TheModule.globals()) { - SmallVector MDs, NewMDs; + SmallVector MDs; GV.getMetadata(LLVMContext::MD_dbg, MDs); GV.eraseMetadata(LLVMContext::MD_dbg); for (auto *MD : MDs) @@ -511,6 +518,108 @@ class MetadataLoader::MetadataLoaderImpl { } } + /// Remove a leading DW_OP_deref from DIExpressions in a dbg.declare that + /// describes a function argument. + void upgradeDeclareExpressions(Function &F) { + if (!NeedDeclareExpressionUpgrade) + return; + + for (auto &BB : F) + for (auto &I : BB) + if (auto *DDI = dyn_cast(&I)) + if (auto *DIExpr = DDI->getExpression()) + if (DIExpr->startsWithDeref() && + dyn_cast_or_null(DDI->getAddress())) { + SmallVector Ops; + Ops.append(std::next(DIExpr->elements_begin()), + DIExpr->elements_end()); + auto *E = DIExpression::get(Context, Ops); + DDI->setOperand(2, MetadataAsValue::get(Context, E)); + } + } + + /// Upgrade the expression from previous versions. + Error upgradeDIExpression(uint64_t FromVersion, + MutableArrayRef &Expr, + SmallVectorImpl &Buffer) { + auto N = Expr.size(); + switch (FromVersion) { + default: + return error("Invalid record"); + case 0: + if (N >= 3 && Expr[N - 3] == dwarf::DW_OP_bit_piece) + Expr[N - 3] = dwarf::DW_OP_LLVM_fragment; + LLVM_FALLTHROUGH; + case 1: + // Move DW_OP_deref to the end. + if (N && Expr[0] == dwarf::DW_OP_deref) { + auto End = Expr.end(); + if (Expr.size() >= 3 && + *std::prev(End, 3) == dwarf::DW_OP_LLVM_fragment) + End = std::prev(End, 3); + std::move(std::next(Expr.begin()), End, Expr.begin()); + *std::prev(End) = dwarf::DW_OP_deref; + } + NeedDeclareExpressionUpgrade = true; + LLVM_FALLTHROUGH; + case 2: { + // Change DW_OP_plus to DW_OP_plus_uconst. + // Change DW_OP_minus to DW_OP_uconst, DW_OP_minus + auto SubExpr = ArrayRef(Expr); + while (!SubExpr.empty()) { + // Skip past other operators with their operands + // for this version of the IR, obtained from + // from historic DIExpression::ExprOperand::getSize(). + size_t HistoricSize; + switch (SubExpr.front()) { + default: + HistoricSize = 1; + break; + case dwarf::DW_OP_constu: + case dwarf::DW_OP_minus: + case dwarf::DW_OP_plus: + HistoricSize = 2; + break; + case dwarf::DW_OP_LLVM_fragment: + HistoricSize = 3; + break; + } + + // If the expression is malformed, make sure we don't + // copy more elements than we should. + HistoricSize = std::min(SubExpr.size(), HistoricSize); + ArrayRef Args = SubExpr.slice(1, HistoricSize-1); + + switch (SubExpr.front()) { + case dwarf::DW_OP_plus: + Buffer.push_back(dwarf::DW_OP_plus_uconst); + Buffer.append(Args.begin(), Args.end()); + break; + case dwarf::DW_OP_minus: + Buffer.push_back(dwarf::DW_OP_constu); + Buffer.append(Args.begin(), Args.end()); + Buffer.push_back(dwarf::DW_OP_minus); + break; + default: + Buffer.push_back(*SubExpr.begin()); + Buffer.append(Args.begin(), Args.end()); + break; + } + + // Continue with remaining elements. + SubExpr = SubExpr.slice(HistoricSize); + } + Expr = MutableArrayRef(Buffer); + LLVM_FALLTHROUGH; + } + case 3: + // Up-to-date! + break; + } + + return Error::success(); + } + void upgradeDebugInfo() { upgradeCUSubprograms(); upgradeCUVariables(); @@ -565,13 +674,9 @@ public: unsigned size() const { return MetadataList.size(); } void shrinkTo(unsigned N) { MetadataList.shrinkTo(N); } + void upgradeDebugIntrinsics(Function &F) { upgradeDeclareExpressions(F); } }; -static Error error(const Twine &Message) { - return make_error( - Message, make_error_code(BitcodeError::CorruptedBitcode)); -} - Expected MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { IndexCursor = Stream; @@ -1275,7 +1380,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_SUBPROGRAM: { - if (Record.size() < 18 || Record.size() > 20) + if (Record.size() < 18 || Record.size() > 21) return error("Invalid record"); IsDistinct = @@ -1291,29 +1396,31 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( unsigned Offset = Record.size() >= 19 ? 1 : 0; bool HasFn = Offset && !HasUnit; bool HasThisAdj = Record.size() >= 20; + bool HasThrownTypes = Record.size() >= 21; DISubprogram *SP = GET_OR_DISTINCT( - DISubprogram, (Context, - getDITypeRefOrNull(Record[1]), // scope - getMDString(Record[2]), // name - getMDString(Record[3]), // linkageName - getMDOrNull(Record[4]), // file - Record[5], // line - getMDOrNull(Record[6]), // type - Record[7], // isLocal - Record[8], // isDefinition - Record[9], // scopeLine - getDITypeRefOrNull(Record[10]), // containingType - Record[11], // virtuality - Record[12], // virtualIndex - HasThisAdj ? Record[19] : 0, // thisAdjustment - static_cast(Record[13] // flags - ), - Record[14], // isOptimized - HasUnit ? CUorFn : nullptr, // unit - getMDOrNull(Record[15 + Offset]), // templateParams - getMDOrNull(Record[16 + Offset]), // declaration - getMDOrNull(Record[17 + Offset]) // variables - )); + DISubprogram, + (Context, + getDITypeRefOrNull(Record[1]), // scope + getMDString(Record[2]), // name + getMDString(Record[3]), // linkageName + getMDOrNull(Record[4]), // file + Record[5], // line + getMDOrNull(Record[6]), // type + Record[7], // isLocal + Record[8], // isDefinition + Record[9], // scopeLine + getDITypeRefOrNull(Record[10]), // containingType + Record[11], // virtuality + Record[12], // virtualIndex + HasThisAdj ? Record[19] : 0, // thisAdjustment + static_cast(Record[13]), // flags + Record[14], // isOptimized + HasUnit ? CUorFn : nullptr, // unit + getMDOrNull(Record[15 + Offset]), // templateParams + getMDOrNull(Record[16 + Offset]), // declaration + getMDOrNull(Record[17 + Offset]), // variables + HasThrownTypes ? getMDOrNull(Record[20]) : nullptr // thrownTypes + )); MetadataList.assignValue(SP, NextMetadataNo); NextMetadataNo++; @@ -1358,16 +1465,20 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } case bitc::METADATA_NAMESPACE: { - if (Record.size() != 5) + // Newer versions of DINamespace dropped file and line. + MDString *Name; + if (Record.size() == 3) + Name = getMDString(Record[2]); + else if (Record.size() == 5) + Name = getMDString(Record[3]); + else return error("Invalid record"); IsDistinct = Record[0] & 1; bool ExportSymbols = Record[0] & 2; MetadataList.assignValue( GET_OR_DISTINCT(DINamespace, - (Context, getMDOrNull(Record[1]), - getMDOrNull(Record[2]), getMDString(Record[3]), - Record[4], ExportSymbols)), + (Context, getMDOrNull(Record[1]), Name, ExportSymbols)), NextMetadataNo); NextMetadataNo++; break; @@ -1520,16 +1631,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( return error("Invalid record"); IsDistinct = Record[0] & 1; - bool HasOpFragment = Record[0] & 2; + uint64_t Version = Record[0] >> 1; auto Elts = MutableArrayRef(Record).slice(1); - if (!HasOpFragment) - if (unsigned N = Elts.size()) - if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece) - Elts[N - 3] = dwarf::DW_OP_LLVM_fragment; + + SmallVector Buffer; + if (Error Err = upgradeDIExpression(Version, Elts, Buffer)) + return Err; MetadataList.assignValue( - GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))), - NextMetadataNo); + GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo); NextMetadataNo++; break; } @@ -1858,3 +1968,7 @@ bool MetadataLoader::isStrippingTBAA() { return Pimpl->isStrippingTBAA(); } unsigned MetadataLoader::size() const { return Pimpl->size(); } void MetadataLoader::shrinkTo(unsigned N) { return Pimpl->shrinkTo(N); } + +void MetadataLoader::upgradeDebugIntrinsics(Function &F) { + return Pimpl->upgradeDebugIntrinsics(F); +} diff --git a/lib/Bitcode/Reader/MetadataLoader.h b/lib/Bitcode/Reader/MetadataLoader.h index 442dfc94e4e195e2c3b04fc6afbba42cbc797884..f23dcc06cc949df105dc1cbfaddb6e4f9a9da6c9 100644 --- a/lib/Bitcode/Reader/MetadataLoader.h +++ b/lib/Bitcode/Reader/MetadataLoader.h @@ -79,6 +79,9 @@ public: unsigned size() const; void shrinkTo(unsigned N); + + /// Perform bitcode upgrades on llvm.dbg.* calls. + void upgradeDebugIntrinsics(Function &F); }; } diff --git a/lib/Bitcode/Reader/ValueList.cpp b/lib/Bitcode/Reader/ValueList.cpp index 7152a51cea6e77e9119648d6ed60daa31905e505..f2a3439a87be6ea5a99b909370cfa7b8e56e624d 100644 --- a/lib/Bitcode/Reader/ValueList.cpp +++ b/lib/Bitcode/Reader/ValueList.cpp @@ -58,7 +58,7 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) { if (Idx >= size()) resize(Idx + 1); - WeakVH &OldV = ValuePtrs[Idx]; + WeakTrackingVH &OldV = ValuePtrs[Idx]; if (!OldV) { OldV = V; return; @@ -73,7 +73,7 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) { // If there was a forward reference to this value, replace it. Value *PrevVal = OldV; OldV->replaceAllUsesWith(V); - delete PrevVal; + PrevVal->deleteValue(); } } @@ -194,6 +194,6 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() { // Update all ValueHandles, they should be the only users at this point. Placeholder->replaceAllUsesWith(RealVal); - delete Placeholder; + Placeholder->deleteValue(); } } diff --git a/lib/Bitcode/Reader/ValueList.h b/lib/Bitcode/Reader/ValueList.h index 3119d7735e221b00fd664255874636c7758659f9..72775a3cf3bc74cf6eed6ee77a3e6516e907bf58 100644 --- a/lib/Bitcode/Reader/ValueList.h +++ b/lib/Bitcode/Reader/ValueList.h @@ -20,7 +20,7 @@ namespace llvm { class Constant; class BitcodeReaderValueList { - std::vector ValuePtrs; + std::vector ValuePtrs; /// As we resolve forward-referenced constants, we add information about them /// to this vector. This allows us to resolve them in bulk instead of diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 043441bac4dead056ccbb51d272f9d10b545d6dd..feeba31908ae5b75725b3ff432bf92bb108eee89 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" @@ -76,21 +77,24 @@ protected: /// The stream created and owned by the client. BitstreamWriter &Stream; - /// Saves the offset of the VSTOffset record that must eventually be - /// backpatched with the offset of the actual VST. - uint64_t VSTOffsetPlaceholder = 0; + StringTableBuilder &StrtabBuilder; public: /// Constructs a BitcodeWriterBase object that writes to the provided /// \p Stream. - BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {} + BitcodeWriterBase(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder) + : Stream(Stream), StrtabBuilder(StrtabBuilder) {} protected: - bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; } - void writeValueSymbolTableForwardDecl(); void writeBitcodeHeader(); + void writeModuleVersion(); }; +void BitcodeWriterBase::writeModuleVersion() { + // VERSION: [version#] + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef{2}); +} + /// Class to manage the bitcode writing for a module. class ModuleBitcodeWriter : public BitcodeWriterBase { /// Pointer to the buffer allocated by caller for bitcode writing. @@ -127,14 +131,19 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { /// Tracks the last value id recorded in the GUIDToValueMap. unsigned GlobalValueId; + /// Saves the offset of the VSTOffset record that must eventually be + /// backpatched with the offset of the actual VST. + uint64_t VSTOffsetPlaceholder = 0; + public: /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, + StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash = nullptr) - : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M), + : BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index), GenerateHash(GenerateHash), ModHash(ModHash), BitcodeStartBit(Stream.GetCurrentBitNo()) { @@ -148,14 +157,14 @@ public: return; for (const auto &GUIDSummaryLists : *Index) // Examine all summaries for this GUID. - for (auto &Summary : GUIDSummaryLists.second) + for (auto &Summary : GUIDSummaryLists.second.SummaryList) if (auto FS = dyn_cast(Summary.get())) // For each call in the function summary, see if the call // is to a GUID (which means it is for an indirect call, // otherwise we would have a Value for it). If so, synthesize // a value id. for (auto &CallEdge : FS->calls()) - if (CallEdge.first.isGUID()) + if (!CallEdge.first.getValue()) assignValueId(CallEdge.first.getGUID()); } @@ -169,6 +178,7 @@ private: void writeAttributeTable(); void writeTypeTable(); void writeComdats(); + void writeValueSymbolTableForwardDecl(); void writeModuleInfo(); void writeValueAsMetadata(const ValueAsMetadata *MD, SmallVectorImpl &Record); @@ -261,9 +271,9 @@ private: SmallVectorImpl &Vals); void writeInstruction(const Instruction &I, unsigned InstID, SmallVectorImpl &Vals); - void writeValueSymbolTable( - const ValueSymbolTable &VST, bool IsModuleLevel = false, - DenseMap *FunctionToBitcodeIndex = nullptr); + void writeFunctionLevelValueSymbolTable(const ValueSymbolTable &VST); + void writeGlobalValueSymbolTable( + DenseMap &FunctionToBitcodeIndex); void writeUseList(UseListOrder &&Order); void writeUseListBlock(const Function *F); void @@ -295,7 +305,7 @@ private: } // Helper to get the valueId for the type of value recorded in VI. unsigned getValueId(ValueInfo VI) { - if (VI.isGUID()) + if (!VI.getValue()) return getValueId(VI.getGUID()); return VE.getValueID(VI.getValue()); } @@ -322,185 +332,75 @@ public: /// Constructs a IndexBitcodeWriter object for the given combined index, /// writing to the provided \p Buffer. When writing a subset of the index /// for a distributed backend, provide a \p ModuleToSummariesForIndex map. - IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index, + IndexBitcodeWriter(BitstreamWriter &Stream, StringTableBuilder &StrtabBuilder, + const ModuleSummaryIndex &Index, const std::map *ModuleToSummariesForIndex = nullptr) - : BitcodeWriterBase(Stream), Index(Index), + : BitcodeWriterBase(Stream, StrtabBuilder), Index(Index), ModuleToSummariesForIndex(ModuleToSummariesForIndex) { // Assign unique value ids to all summaries to be written, for use // in writing out the call graph edges. Save the mapping from GUID // to the new global value id to use when writing those edges, which // are currently saved in the index in terms of GUID. - for (const auto &I : *this) + forEachSummary([&](GVInfo I) { GUIDToValueIdMap[I.first] = ++GlobalValueId; + }); } /// The below iterator returns the GUID and associated summary. typedef std::pair GVInfo; - /// Iterator over the value GUID and summaries to be written to bitcode, - /// hides the details of whether they are being pulled from the entire - /// index or just those in a provided ModuleToSummariesForIndex map. - class iterator - : public llvm::iterator_facade_base { - /// Enables access to parent class. - const IndexBitcodeWriter &Writer; - - // Iterators used when writing only those summaries in a provided - // ModuleToSummariesForIndex map: - - /// Points to the last element in outer ModuleToSummariesForIndex map. - std::map::const_iterator ModuleSummariesBack; - /// Iterator on outer ModuleToSummariesForIndex map. - std::map::const_iterator ModuleSummariesIter; - /// Iterator on an inner global variable summary map. - GVSummaryMapTy::const_iterator ModuleGVSummariesIter; - - // Iterators used when writing all summaries in the index: - - /// Points to the last element in the Index outer GlobalValueMap. - const_gvsummary_iterator IndexSummariesBack; - /// Iterator on outer GlobalValueMap. - const_gvsummary_iterator IndexSummariesIter; - /// Iterator on an inner GlobalValueSummaryList. - GlobalValueSummaryList::const_iterator IndexGVSummariesIter; - - public: - /// Construct iterator from parent \p Writer and indicate if we are - /// constructing the end iterator. - iterator(const IndexBitcodeWriter &Writer, bool IsAtEnd) : Writer(Writer) { - // Set up the appropriate set of iterators given whether we are writing - // the full index or just a subset. - // Can't setup the Back or inner iterators if the corresponding map - // is empty. This will be handled specially in operator== as well. - if (Writer.ModuleToSummariesForIndex && - !Writer.ModuleToSummariesForIndex->empty()) { - for (ModuleSummariesBack = Writer.ModuleToSummariesForIndex->begin(); - std::next(ModuleSummariesBack) != - Writer.ModuleToSummariesForIndex->end(); - ModuleSummariesBack++) - ; - ModuleSummariesIter = !IsAtEnd - ? Writer.ModuleToSummariesForIndex->begin() - : ModuleSummariesBack; - ModuleGVSummariesIter = !IsAtEnd ? ModuleSummariesIter->second.begin() - : ModuleSummariesBack->second.end(); - } else if (!Writer.ModuleToSummariesForIndex && - Writer.Index.begin() != Writer.Index.end()) { - for (IndexSummariesBack = Writer.Index.begin(); - std::next(IndexSummariesBack) != Writer.Index.end(); - IndexSummariesBack++) - ; - IndexSummariesIter = - !IsAtEnd ? Writer.Index.begin() : IndexSummariesBack; - IndexGVSummariesIter = !IsAtEnd ? IndexSummariesIter->second.begin() - : IndexSummariesBack->second.end(); - } + /// Calls the callback for each value GUID and summary to be written to + /// bitcode. This hides the details of whether they are being pulled from the + /// entire index or just those in a provided ModuleToSummariesForIndex map. + template + void forEachSummary(Functor Callback) { + if (ModuleToSummariesForIndex) { + for (auto &M : *ModuleToSummariesForIndex) + for (auto &Summary : M.second) + Callback(Summary); + } else { + for (auto &Summaries : Index) + for (auto &Summary : Summaries.second.SummaryList) + Callback({Summaries.first, Summary.get()}); } + } - /// Increment the appropriate set of iterators. - iterator &operator++() { - // First the inner iterator is incremented, then if it is at the end - // and there are more outer iterations to go, the inner is reset to - // the start of the next inner list. - if (Writer.ModuleToSummariesForIndex) { - ++ModuleGVSummariesIter; - if (ModuleGVSummariesIter == ModuleSummariesIter->second.end() && - ModuleSummariesIter != ModuleSummariesBack) { - ++ModuleSummariesIter; - ModuleGVSummariesIter = ModuleSummariesIter->second.begin(); - } - } else { - ++IndexGVSummariesIter; - if (IndexGVSummariesIter == IndexSummariesIter->second.end() && - IndexSummariesIter != IndexSummariesBack) { - ++IndexSummariesIter; - IndexGVSummariesIter = IndexSummariesIter->second.begin(); + /// Calls the callback for each entry in the modulePaths StringMap that + /// should be written to the module path string table. This hides the details + /// of whether they are being pulled from the entire index or just those in a + /// provided ModuleToSummariesForIndex map. + template void forEachModule(Functor Callback) { + if (ModuleToSummariesForIndex) { + for (const auto &M : *ModuleToSummariesForIndex) { + const auto &MPI = Index.modulePaths().find(M.first); + if (MPI == Index.modulePaths().end()) { + // This should only happen if the bitcode file was empty, in which + // case we shouldn't be importing (the ModuleToSummariesForIndex + // would only include the module we are writing and index for). + assert(ModuleToSummariesForIndex->size() == 1); + continue; } + Callback(*MPI); } - return *this; - } - - /// Access the pair corresponding to the current - /// outer and inner iterator positions. - GVInfo operator*() { - if (Writer.ModuleToSummariesForIndex) - return std::make_pair(ModuleGVSummariesIter->first, - ModuleGVSummariesIter->second); - return std::make_pair(IndexSummariesIter->first, - IndexGVSummariesIter->get()); - } - - /// Checks if the iterators are equal, with special handling for empty - /// indexes. - bool operator==(const iterator &RHS) const { - if (Writer.ModuleToSummariesForIndex) { - // First ensure that both are writing the same subset. - if (Writer.ModuleToSummariesForIndex != - RHS.Writer.ModuleToSummariesForIndex) - return false; - // Already determined above that maps are the same, so if one is - // empty, they both are. - if (Writer.ModuleToSummariesForIndex->empty()) - return true; - // Ensure the ModuleGVSummariesIter are iterating over the same - // container before checking them below. - if (ModuleSummariesIter != RHS.ModuleSummariesIter) - return false; - return ModuleGVSummariesIter == RHS.ModuleGVSummariesIter; - } - // First ensure RHS also writing the full index, and that both are - // writing the same full index. - if (RHS.Writer.ModuleToSummariesForIndex || - &Writer.Index != &RHS.Writer.Index) - return false; - // Already determined above that maps are the same, so if one is - // empty, they both are. - if (Writer.Index.begin() == Writer.Index.end()) - return true; - // Ensure the IndexGVSummariesIter are iterating over the same - // container before checking them below. - if (IndexSummariesIter != RHS.IndexSummariesIter) - return false; - return IndexGVSummariesIter == RHS.IndexGVSummariesIter; + } else { + for (const auto &MPSE : Index.modulePaths()) + Callback(MPSE); } - }; - - /// Obtain the start iterator over the summaries to be written. - iterator begin() { return iterator(*this, /*IsAtEnd=*/false); } - /// Obtain the end iterator over the summaries to be written. - iterator end() { return iterator(*this, /*IsAtEnd=*/true); } + } /// Main entry point for writing a combined index to bitcode. void write(); private: void writeModStrings(); - void writeCombinedValueSymbolTable(); void writeCombinedGlobalValueSummary(); - /// Indicates whether the provided \p ModulePath should be written into - /// the module string table, e.g. if full index written or if it is in - /// the provided subset. - bool doIncludeModule(StringRef ModulePath) { - return !ModuleToSummariesForIndex || - ModuleToSummariesForIndex->count(ModulePath); - } - - bool hasValueId(GlobalValue::GUID ValGUID) { - const auto &VMI = GUIDToValueIdMap.find(ValGUID); - return VMI != GUIDToValueIdMap.end(); - } - unsigned getValueId(GlobalValue::GUID ValGUID) { - const auto &VMI = GUIDToValueIdMap.find(ValGUID); - // If this GUID doesn't have an entry, assign one. - if (VMI == GUIDToValueIdMap.end()) { - GUIDToValueIdMap[ValGUID] = ++GlobalValueId; - return GlobalValueId; - } else { - return VMI->second; - } + Optional getValueId(GlobalValue::GUID ValGUID) { + auto VMI = GUIDToValueIdMap.find(ValGUID); + if (VMI == GUIDToValueIdMap.end()) + return None; + return VMI->second; } std::map &valueIds() { return GUIDToValueIdMap; } }; @@ -680,6 +580,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_RETURNS_TWICE; case Attribute::SExt: return bitc::ATTR_KIND_S_EXT; + case Attribute::Speculatable: + return bitc::ATTR_KIND_SPECULATABLE; case Attribute::StackAlignment: return bitc::ATTR_KIND_STACK_ALIGNMENT; case Attribute::StackProtect: @@ -718,63 +620,62 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { } void ModuleBitcodeWriter::writeAttributeGroupTable() { - const std::vector &AttrGrps = VE.getAttributeGroups(); + const std::vector &AttrGrps = + VE.getAttributeGroups(); if (AttrGrps.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); SmallVector Record; - for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) { - AttributeList AS = AttrGrps[i]; - for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) { - AttributeList A = AS.getSlotAttributes(i); - - Record.push_back(VE.getAttributeGroupID(A)); - Record.push_back(AS.getSlotIndex(i)); - - for (AttributeList::iterator I = AS.begin(0), E = AS.end(0); I != E; - ++I) { - Attribute Attr = *I; - if (Attr.isEnumAttribute()) { - Record.push_back(0); - Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); - } else if (Attr.isIntAttribute()) { - Record.push_back(1); - Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); - Record.push_back(Attr.getValueAsInt()); - } else { - StringRef Kind = Attr.getKindAsString(); - StringRef Val = Attr.getValueAsString(); - - Record.push_back(Val.empty() ? 3 : 4); - Record.append(Kind.begin(), Kind.end()); + for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) { + unsigned AttrListIndex = Pair.first; + AttributeSet AS = Pair.second; + Record.push_back(VE.getAttributeGroupID(Pair)); + Record.push_back(AttrListIndex); + + for (Attribute Attr : AS) { + if (Attr.isEnumAttribute()) { + Record.push_back(0); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); + } else if (Attr.isIntAttribute()) { + Record.push_back(1); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); + Record.push_back(Attr.getValueAsInt()); + } else { + StringRef Kind = Attr.getKindAsString(); + StringRef Val = Attr.getValueAsString(); + + Record.push_back(Val.empty() ? 3 : 4); + Record.append(Kind.begin(), Kind.end()); + Record.push_back(0); + if (!Val.empty()) { + Record.append(Val.begin(), Val.end()); Record.push_back(0); - if (!Val.empty()) { - Record.append(Val.begin(), Val.end()); - Record.push_back(0); - } } } - - Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); - Record.clear(); } + + Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); + Record.clear(); } Stream.ExitBlock(); } void ModuleBitcodeWriter::writeAttributeTable() { - const std::vector &Attrs = VE.getAttributes(); + const std::vector &Attrs = VE.getAttributeLists(); if (Attrs.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - const AttributeList &A = Attrs[i]; - for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) - Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i))); + AttributeList AL = Attrs[i]; + for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) { + AttributeSet AS = AL.getAttributes(i); + if (AS.hasAttributes()) + Record.push_back(VE.getAttributeGroupID({i, AS})); + } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); @@ -981,7 +882,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) { uint64_t RawFlags = 0; RawFlags |= Flags.NotEligibleToImport; // bool - RawFlags |= (Flags.LiveRoot << 1); + RawFlags |= (Flags.Live << 1); // Linkage don't need to be remapped at that time for the summary. Any future // change to the getEncodedLinkage() function will need to be taken into // account here as well. @@ -1047,13 +948,10 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) { void ModuleBitcodeWriter::writeComdats() { SmallVector Vals; for (const Comdat *C : VE.getComdats()) { - // COMDAT: [selection_kind, name] + // COMDAT: [strtab offset, strtab size, selection_kind] + Vals.push_back(StrtabBuilder.add(C->getName())); + Vals.push_back(C->getName().size()); Vals.push_back(getEncodedComdatSelectionKind(*C)); - size_t Size = C->getName().size(); - assert(isUInt<32>(Size)); - Vals.push_back(Size); - for (char Chr : C->getName()) - Vals.push_back((unsigned char)Chr); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); Vals.clear(); } @@ -1062,7 +960,7 @@ void ModuleBitcodeWriter::writeComdats() { /// Write a record that will eventually hold the word offset of the /// module-level VST. For now the offset is 0, which will be backpatched /// after the real VST is written. Saves the bit offset to backpatch. -void BitcodeWriterBase::writeValueSymbolTableForwardDecl() { +void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() { // Write a placeholder value in for the offset of the real VST, // which is written after the function blocks so that it can include // the offset of each function. The placeholder offset will be @@ -1088,19 +986,18 @@ void BitcodeWriterBase::writeValueSymbolTableForwardDecl() { enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; /// Determine the encoding to use for the given string name and length. -static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { +static StringEncoding getStringEncoding(StringRef Str) { bool isChar6 = true; - for (const char *C = Str, *E = C + StrLen; C != E; ++C) { + for (char C : Str) { if (isChar6) - isChar6 = BitCodeAbbrevOp::isChar6(*C); - if ((unsigned char)*C & 128) + isChar6 = BitCodeAbbrevOp::isChar6(C); + if ((unsigned char)C & 128) // don't bother scanning the rest. return SE_Fixed8; } if (isChar6) return SE_Char6; - else - return SE_Fixed7; + return SE_Fixed7; } /// Emit top-level description of module, including target triple, inline asm, @@ -1165,6 +1062,8 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Add an abbrev for common globals with no visibility or thread localness. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(MaxGlobalType+1))); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2 @@ -1188,15 +1087,41 @@ void ModuleBitcodeWriter::writeModuleInfo() { SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv)); } - // Emit the global variable information. SmallVector Vals; + // Emit the module's source file name. + { + StringEncoding Bits = getStringEncoding(M.getSourceFileName()); + BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); + if (Bits == SE_Char6) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); + else if (Bits == SE_Fixed7) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); + + // MODULE_CODE_SOURCE_FILENAME: [namechar x N] + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(AbbrevOpToUse); + unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const auto P : M.getSourceFileName()) + Vals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); + Vals.clear(); + } + + // Emit the global variable information. for (const GlobalVariable &GV : M.globals()) { unsigned AbbrevToUse = 0; - // GLOBALVAR: [type, isconst, initid, + // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, - // comdat] + // comdat, attributes] + Vals.push_back(StrtabBuilder.add(GV.getName())); + Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); Vals.push_back(GV.isDeclaration() ? 0 : @@ -1209,13 +1134,17 @@ void ModuleBitcodeWriter::writeModuleInfo() { GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None || GV.isExternallyInitialized() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || - GV.hasComdat()) { + GV.hasComdat() || + GV.hasAttributes()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(getEncodedUnnamedAddr(GV)); Vals.push_back(GV.isExternallyInitialized()); Vals.push_back(getEncodedDLLStorageClass(GV)); Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0); + + auto AL = GV.getAttributesAsList(AttributeList::FunctionIndex); + Vals.push_back(VE.getAttributeListID(AL)); } else { AbbrevToUse = SimpleGVarAbbrev; } @@ -1226,14 +1155,17 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit the function proto information. for (const Function &F : M) { - // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, - // section, visibility, gc, unnamed_addr, prologuedata, - // dllstorageclass, comdat, prefixdata, personalityfn] + // FUNCTION: [strtab offset, strtab size, type, callingconv, isproto, + // linkage, paramattrs, alignment, section, visibility, gc, + // unnamed_addr, prologuedata, dllstorageclass, comdat, + // prefixdata, personalityfn] + Vals.push_back(StrtabBuilder.add(F.getName())); + Vals.push_back(F.getName().size()); Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); Vals.push_back(F.isDeclaration()); Vals.push_back(getEncodedLinkage(F)); - Vals.push_back(VE.getAttributeID(F.getAttributes())); + Vals.push_back(VE.getAttributeListID(F.getAttributes())); Vals.push_back(Log2_32(F.getAlignment())+1); Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); @@ -1255,8 +1187,10 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit the alias information. for (const GlobalAlias &A : M.aliases()) { - // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass, - // threadlocal, unnamed_addr] + // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage, + // visibility, dllstorageclass, threadlocal, unnamed_addr] + Vals.push_back(StrtabBuilder.add(A.getName())); + Vals.push_back(A.getName().size()); Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(A.getAliasee())); @@ -1272,7 +1206,10 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit the ifunc information. for (const GlobalIFunc &I : M.ifuncs()) { - // IFUNC: [ifunc type, address space, resolver val#, linkage, visibility] + // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver + // val#, linkage, visibility] + Vals.push_back(StrtabBuilder.add(I.getName())); + Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); Vals.push_back(I.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(I.getResolver())); @@ -1282,34 +1219,6 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.clear(); } - // Emit the module's source file name. - { - StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(), - M.getSourceFileName().size()); - BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); - if (Bits == SE_Char6) - AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); - else if (Bits == SE_Fixed7) - AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); - - // MODULE_CODE_SOURCE_FILENAME: [namechar x N] - auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(AbbrevOpToUse); - unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - - for (const auto P : M.getSourceFileName()) - Vals.push_back((unsigned char)P); - - // Emit the finished record. - Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); - Vals.clear(); - } - - // If we have a VST, write the VSTOFFSET record placeholder. - if (M.getValueSymbolTable().empty()) - return; writeValueSymbolTableForwardDecl(); } @@ -1597,6 +1506,7 @@ void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N, Record.push_back(VE.getMetadataOrNullID(N->getDeclaration())); Record.push_back(VE.getMetadataOrNullID(N->getVariables().get())); Record.push_back(N->getThisAdjustment()); + Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get())); Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev); Record.clear(); @@ -1632,9 +1542,7 @@ void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N, unsigned Abbrev) { Record.push_back(N->isDistinct() | N->getExportSymbols() << 1); Record.push_back(VE.getMetadataOrNullID(N->getScope())); - Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); - Record.push_back(N->getLine()); Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev); Record.clear(); @@ -1757,9 +1665,8 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N, SmallVectorImpl &Record, unsigned Abbrev) { Record.reserve(N->getElements().size() + 1); - - const uint64_t HasOpFragmentFlag = 1 << 1; - Record.push_back((uint64_t)N->isDistinct() | HasOpFragmentFlag); + const uint64_t Version = 3 << 1; + Record.push_back((uint64_t)N->isDistinct() | Version); Record.append(N->elements_begin(), N->elements_end()); Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev); @@ -2603,7 +2510,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Code = bitc::FUNC_CODE_INST_INVOKE; - Vals.push_back(VE.getAttributeID(II->getAttributes())); + Vals.push_back(VE.getAttributeListID(II->getAttributes())); Vals.push_back(II->getCallingConv() | 1 << 13); Vals.push_back(VE.getValueID(II->getNormalDest())); Vals.push_back(VE.getValueID(II->getUnwindDest())); @@ -2795,7 +2702,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Code = bitc::FUNC_CODE_INST_CALL; - Vals.push_back(VE.getAttributeID(CI.getAttributes())); + Vals.push_back(VE.getAttributeListID(CI.getAttributes())); unsigned Flags = getOptimizationFlags(&I); Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV | @@ -2839,77 +2746,59 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.clear(); } -/// Emit names for globals/functions etc. \p IsModuleLevel is true when -/// we are writing the module-level VST, where we are including a function -/// bitcode index and need to backpatch the VST forward declaration record. -void ModuleBitcodeWriter::writeValueSymbolTable( - const ValueSymbolTable &VST, bool IsModuleLevel, - DenseMap *FunctionToBitcodeIndex) { - if (VST.empty()) { - // writeValueSymbolTableForwardDecl should have returned early as - // well. Ensure this handling remains in sync by asserting that - // the placeholder offset is not set. - assert(!IsModuleLevel || !hasVSTOffsetPlaceholder()); - return; - } +/// Write a GlobalValue VST to the module. The purpose of this data structure is +/// to allow clients to efficiently find the function body. +void ModuleBitcodeWriter::writeGlobalValueSymbolTable( + DenseMap &FunctionToBitcodeIndex) { + // Get the offset of the VST we are writing, and backpatch it into + // the VST forward declaration record. + uint64_t VSTOffset = Stream.GetCurrentBitNo(); + // The BitcodeStartBit was the stream offset of the identification block. + VSTOffset -= bitcodeStartBit(); + assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); + // Note that we add 1 here because the offset is relative to one word + // before the start of the identification block, which was historically + // always the start of the regular bitcode header. + Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); - if (IsModuleLevel && hasVSTOffsetPlaceholder()) { - // Get the offset of the VST we are writing, and backpatch it into - // the VST forward declaration record. - uint64_t VSTOffset = Stream.GetCurrentBitNo(); - // The BitcodeStartBit was the stream offset of the identification block. - VSTOffset -= bitcodeStartBit(); - assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + unsigned FnEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const Function &F : M) { + uint64_t Record[2]; + + if (F.isDeclaration()) + continue; + + Record[0] = VE.getValueID(&F); + + // Save the word offset of the function (from the start of the + // actual bitcode written to the stream). + uint64_t BitcodeIndex = FunctionToBitcodeIndex[&F] - bitcodeStartBit(); + assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); // Note that we add 1 here because the offset is relative to one word // before the start of the identification block, which was historically // always the start of the regular bitcode header. - Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); - } + Record[1] = BitcodeIndex / 32 + 1; - Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); - - // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY - // records, which are not used in the per-function VSTs. - unsigned FnEntry8BitAbbrev; - unsigned FnEntry7BitAbbrev; - unsigned FnEntry6BitAbbrev; - unsigned GUIDEntryAbbrev; - if (IsModuleLevel && hasVSTOffsetPlaceholder()) { - // 8-bit fixed-width VST_CODE_FNENTRY function strings. - auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Stream.EmitRecord(bitc::VST_CODE_FNENTRY, Record, FnEntryAbbrev); + } - // 7-bit fixed width VST_CODE_FNENTRY function strings. - Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); - FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Stream.ExitBlock(); +} - // 6-bit char6 VST_CODE_FNENTRY function strings. - Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); - FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); +/// Emit names for arguments, instructions and basic blocks in a function. +void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable( + const ValueSymbolTable &VST) { + if (VST.empty()) + return; - // FIXME: Change the name of this record as it is now used by - // the per-module index as well. - Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid - GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - } + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. @@ -2917,44 +2806,18 @@ void ModuleBitcodeWriter::writeValueSymbolTable( for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. - StringEncoding Bits = - getStringEncoding(Name.getKeyData(), Name.getKeyLength()); + StringEncoding Bits = getStringEncoding(Name.getKey()); unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; NameVals.push_back(VE.getValueID(Name.getValue())); - Function *F = dyn_cast(Name.getValue()); - // VST_CODE_ENTRY: [valueid, namechar x N] - // VST_CODE_FNENTRY: [valueid, funcoffset, namechar x N] // VST_CODE_BBENTRY: [bbid, namechar x N] unsigned Code; if (isa(Name.getValue())) { Code = bitc::VST_CODE_BBENTRY; if (Bits == SE_Char6) AbbrevToUse = VST_BBENTRY_6_ABBREV; - } else if (F && !F->isDeclaration()) { - // Must be the module-level VST, where we pass in the Index and - // have a VSTOffsetPlaceholder. The function-level VST should not - // contain any Function symbols. - assert(FunctionToBitcodeIndex); - assert(hasVSTOffsetPlaceholder()); - - // Save the word offset of the function (from the start of the - // actual bitcode written to the stream). - uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit(); - assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); - // Note that we add 1 here because the offset is relative to one word - // before the start of the identification block, which was historically - // always the start of the regular bitcode header. - NameVals.push_back(BitcodeIndex / 32 + 1); - - Code = bitc::VST_CODE_FNENTRY; - AbbrevToUse = FnEntry8BitAbbrev; - if (Bits == SE_Char6) - AbbrevToUse = FnEntry6BitAbbrev; - else if (Bits == SE_Fixed7) - AbbrevToUse = FnEntry7BitAbbrev; } else { Code = bitc::VST_CODE_ENTRY; if (Bits == SE_Char6) @@ -2970,47 +2833,7 @@ void ModuleBitcodeWriter::writeValueSymbolTable( Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); } - // Emit any GUID valueIDs created for indirect call edges into the - // module-level VST. - if (IsModuleLevel && hasVSTOffsetPlaceholder()) - for (const auto &GI : valueIds()) { - NameVals.push_back(GI.second); - NameVals.push_back(GI.first); - Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, - GUIDEntryAbbrev); - NameVals.clear(); - } - Stream.ExitBlock(); -} - -/// Emit function names and summary offsets for the combined index -/// used by ThinLTO. -void IndexBitcodeWriter::writeCombinedValueSymbolTable() { - assert(hasVSTOffsetPlaceholder() && "Expected non-zero VSTOffsetPlaceholder"); - // Get the offset of the VST we are writing, and backpatch it into - // the VST forward declaration record. - uint64_t VSTOffset = Stream.GetCurrentBitNo(); - assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); - Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32); - - Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); - - auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid - unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - SmallVector NameVals; - for (const auto &GVI : valueIds()) { - // VST_CODE_COMBINED_ENTRY: [valueid, refguid] - NameVals.push_back(GVI.second); - NameVals.push_back(GVI.first); - - // Emit the finished record. - Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, EntryAbbrev); - NameVals.clear(); - } Stream.ExitBlock(); } @@ -3114,7 +2937,7 @@ void ModuleBitcodeWriter::writeFunction( // Emit names for all the instructions etc. if (auto *Symtab = F.getValueSymbolTable()) - writeValueSymbolTable(*Symtab); + writeFunctionLevelValueSymbolTable(*Symtab); if (NeedsMetadataAttachment) writeFunctionMetadataAttachment(F); @@ -3341,41 +3164,33 @@ void IndexBitcodeWriter::writeModStrings() { unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv)); SmallVector Vals; - for (const auto &MPSE : Index.modulePaths()) { - if (!doIncludeModule(MPSE.getKey())) - continue; - StringEncoding Bits = - getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); - unsigned AbbrevToUse = Abbrev8Bit; - if (Bits == SE_Char6) - AbbrevToUse = Abbrev6Bit; - else if (Bits == SE_Fixed7) - AbbrevToUse = Abbrev7Bit; - - Vals.push_back(MPSE.getValue().first); - - for (const auto P : MPSE.getKey()) - Vals.push_back((unsigned char)P); - - // Emit the finished record. - Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); - - Vals.clear(); - // Emit an optional hash for the module now - auto &Hash = MPSE.getValue().second; - bool AllZero = true; // Detect if the hash is empty, and do not generate it - for (auto Val : Hash) { - if (Val) - AllZero = false; - Vals.push_back(Val); - } - if (!AllZero) { - // Emit the hash record. - Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); - } + forEachModule( + [&](const StringMapEntry> &MPSE) { + StringRef Key = MPSE.getKey(); + const auto &Value = MPSE.getValue(); + StringEncoding Bits = getStringEncoding(Key); + unsigned AbbrevToUse = Abbrev8Bit; + if (Bits == SE_Char6) + AbbrevToUse = Abbrev6Bit; + else if (Bits == SE_Fixed7) + AbbrevToUse = Abbrev7Bit; + + Vals.push_back(Value.first); + Vals.append(Key.begin(), Key.end()); + + // Emit the finished record. + Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); + + // Emit an optional hash for the module now + const auto &Hash = Value.second; + if (llvm::any_of(Hash, [](uint32_t H) { return H; })) { + Vals.assign(Hash.begin(), Hash.end()); + // Emit the hash record. + Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); + } - Vals.clear(); - } + Vals.clear(); + }); Stream.ExitBlock(); } @@ -3460,15 +3275,14 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord( void ModuleBitcodeWriter::writeModuleLevelReferences( const GlobalVariable &V, SmallVector &NameVals, unsigned FSModRefsAbbrev) { - auto Summaries = - Index->findGlobalValueSummaryList(GlobalValue::getGUID(V.getName())); - if (Summaries == Index->end()) { + auto VI = Index->getValueInfo(GlobalValue::getGUID(V.getName())); + if (!VI || VI.getSummaryList().empty()) { // Only declarations should not have a summary (a declaration might however // have a summary if the def was in module level asm). assert(V.isDeclaration()); return; } - auto *Summary = Summaries->second.front().get(); + auto *Summary = VI.getSummaryList()[0].get(); NameVals.push_back(VE.getValueID(&V)); GlobalVarSummary *VS = cast(Summary); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); @@ -3493,7 +3307,15 @@ static const uint64_t INDEX_VERSION = 3; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { - Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4); + // By default we compile with ThinLTO if the module has a summary, but the + // client can request full LTO with a module flag. + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + Stream.EnterSubblock(IsThinLTO ? bitc::GLOBALVAL_SUMMARY_BLOCK_ID + : bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, + 4); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); @@ -3502,6 +3324,11 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { return; } + for (const auto &GVI : valueIds()) { + Stream.EmitRecord(bitc::FS_VALUE_GUID, + ArrayRef{GVI.second, GVI.first}); + } + // Abbrev for FS_PERMODULE. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); @@ -3552,15 +3379,14 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { if (!F.hasName()) report_fatal_error("Unexpected anonymous function when writing summary"); - auto Summaries = - Index->findGlobalValueSummaryList(GlobalValue::getGUID(F.getName())); - if (Summaries == Index->end()) { + ValueInfo VI = Index->getValueInfo(GlobalValue::getGUID(F.getName())); + if (!VI || VI.getSummaryList().empty()) { // Only declarations should not have a summary (a declaration might // however have a summary if the def was in module level asm). assert(F.isDeclaration()); continue; } - auto *Summary = Summaries->second.front().get(); + auto *Summary = VI.getSummaryList()[0].get(); writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F), FSCallsAbbrev, FSCallsProfileAbbrev, F); } @@ -3594,6 +3420,11 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); + for (const auto &GVI : valueIds()) { + Stream.EmitRecord(bitc::FS_VALUE_GUID, + ArrayRef{GVI.second, GVI.first}); + } + // Abbrev for FS_COMBINED. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED)); @@ -3658,27 +3489,30 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { NameVals.clear(); }; - for (const auto &I : *this) { + forEachSummary([&](GVInfo I) { GlobalValueSummary *S = I.second; assert(S); - assert(hasValueId(I.first)); - unsigned ValueId = getValueId(I.first); - SummaryToValueIdMap[S] = ValueId; + auto ValueId = getValueId(I.first); + assert(ValueId); + SummaryToValueIdMap[S] = *ValueId; if (auto *AS = dyn_cast(S)) { // Will process aliases as a post-pass because the reader wants all // global to be loaded first. Aliases.push_back(AS); - continue; + return; } if (auto *VS = dyn_cast(S)) { - NameVals.push_back(ValueId); + NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(VS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); for (auto &RI : VS->refs()) { - NameVals.push_back(getValueId(RI.getGUID())); + auto RefValueId = getValueId(RI.getGUID()); + if (!RefValueId) + continue; + NameVals.push_back(*RefValueId); } // Emit the finished record. @@ -3686,21 +3520,28 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { FSModRefsAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); - continue; + return; } auto *FS = cast(S); writeFunctionTypeMetadataRecords(Stream, FS); - NameVals.push_back(ValueId); + NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(FS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); - NameVals.push_back(FS->refs().size()); + // Fill in below + NameVals.push_back(0); + unsigned Count = 0; for (auto &RI : FS->refs()) { - NameVals.push_back(getValueId(RI.getGUID())); + auto RefValueId = getValueId(RI.getGUID()); + if (!RefValueId) + continue; + NameVals.push_back(*RefValueId); + Count++; } + NameVals[4] = Count; bool HasProfileData = false; for (auto &EI : FS->calls()) { @@ -3713,15 +3554,19 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { // If this GUID doesn't have a value id, it doesn't have a function // summary and we don't need to record any calls to it. GlobalValue::GUID GUID = EI.first.getGUID(); - if (!hasValueId(GUID)) { + auto CallValueId = getValueId(GUID); + if (!CallValueId) { // For SamplePGO, the indirect call targets for local functions will // have its original name annotated in profile. We try to find the // corresponding PGOFuncName as the GUID. GUID = Index.getGUIDFromOriginalID(GUID); - if (GUID == 0 || !hasValueId(GUID)) + if (GUID == 0) + continue; + CallValueId = getValueId(GUID); + if (!CallValueId) continue; } - NameVals.push_back(getValueId(GUID)); + NameVals.push_back(*CallValueId); if (HasProfileData) NameVals.push_back(static_cast(EI.second.Hotness)); } @@ -3734,7 +3579,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); MaybeEmitOriginalName(*S); - } + }); for (auto *AS : Aliases) { auto AliasValueId = SummaryToValueIdMap[AS]; @@ -3752,6 +3597,24 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { MaybeEmitOriginalName(*AS); } + if (!Index.cfiFunctionDefs().empty()) { + for (auto &S : Index.cfiFunctionDefs()) { + NameVals.push_back(StrtabBuilder.add(S)); + NameVals.push_back(S.size()); + } + Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals); + NameVals.clear(); + } + + if (!Index.cfiFunctionDecls().empty()) { + for (auto &S : Index.cfiFunctionDecls()) { + NameVals.push_back(StrtabBuilder.add(S)); + NameVals.push_back(S.size()); + } + Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals); + NameVals.clear(); + } + Stream.ExitBlock(); } @@ -3808,10 +3671,7 @@ void ModuleBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); size_t BlockStartPos = Buffer.size(); - SmallVector Vals; - unsigned CurVersion = 1; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); + writeModuleVersion(); // Emit blockinfo, which defines the standard abbreviations etc. writeBlockInfo(); @@ -3857,8 +3717,7 @@ void ModuleBitcodeWriter::write() { if (Index) writePerModuleGlobalValueSummary(); - writeValueSymbolTable(M.getValueSymbolTable(), - /* IsModuleLevel */ true, &FunctionToBitcodeIndex); + writeGlobalValueSymbolTable(FunctionToBitcodeIndex); writeModuleHash(BlockStartPos); @@ -3946,18 +3805,58 @@ BitcodeWriter::BitcodeWriter(SmallVectorImpl &Buffer) writeBitcodeHeader(*Stream); } -BitcodeWriter::~BitcodeWriter() = default; +BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); } + +void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) { + Stream->EnterSubblock(Block, 3); + + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(Record)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv)); + + Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef{Record}, Blob); + + Stream->ExitBlock(); +} + +void BitcodeWriter::writeStrtab() { + assert(!WroteStrtab); + + std::vector Strtab; + StrtabBuilder.finalizeInOrder(); + Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)Strtab.data()); + + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, + {Strtab.data(), Strtab.size()}); + + WroteStrtab = true; +} + +void BitcodeWriter::copyStrtab(StringRef Strtab) { + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab); + WroteStrtab = true; +} void BitcodeWriter::writeModule(const Module *M, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { - ModuleBitcodeWriter ModuleWriter(M, Buffer, *Stream, + ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); ModuleWriter.write(); } +void BitcodeWriter::writeIndex( + const ModuleSummaryIndex *Index, + const std::map *ModuleToSummariesForIndex) { + IndexBitcodeWriter IndexWriter(*Stream, StrtabBuilder, *Index, + ModuleToSummariesForIndex); + IndexWriter.write(); +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, @@ -3976,6 +3875,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, BitcodeWriter Writer(Buffer); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); + Writer.writeStrtab(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) emitDarwinBCHeaderAndTrailer(Buffer, TT); @@ -3987,13 +3887,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, void IndexBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); - SmallVector Vals; - unsigned CurVersion = 1; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); - - // If we have a VST, write the VSTOFFSET record placeholder. - writeValueSymbolTableForwardDecl(); + writeModuleVersion(); // Write the module paths in the combined index. writeModStrings(); @@ -4001,10 +3895,6 @@ void IndexBitcodeWriter::write() { // Write the summary combined index records. writeCombinedGlobalValueSummary(); - // Need a special VST writer for the combined index (we don't have a - // real VST and real values when this is invoked). - writeCombinedValueSymbolTable(); - Stream.ExitBlock(); } @@ -4018,11 +3908,9 @@ void llvm::WriteIndexToFile( SmallVector Buffer; Buffer.reserve(256 * 1024); - BitstreamWriter Stream(Buffer); - writeBitcodeHeader(Stream); - - IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex); - IndexWriter.write(); + BitcodeWriter Writer(Buffer); + Writer.writeIndex(&Index, ModuleToSummariesForIndex); + Writer.writeStrtab(); Out.write((char *)&Buffer.front(), Buffer.size()); } diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt index a450b38fba2c16ef8ec5702fd5f09c7766a821a9..a07c280fa9e3f51491e0338bead7681561408bbf 100644 --- a/lib/Bitcode/Writer/LLVMBuild.txt +++ b/lib/Bitcode/Writer/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = BitWriter parent = Bitcode -required_libraries = Analysis Core Support +required_libraries = Analysis Core MC Support diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 3800d9abd429ab0eb4d53a983f85c458d00c2370..bb626baabd129e7640caf72b4f0b4c0591d8ae3b 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -314,10 +314,13 @@ ValueEnumerator::ValueEnumerator(const Module &M, // Remember what is the cutoff between globalvalue's and other constants. unsigned FirstConstant = Values.size(); - // Enumerate the global variable initializers. - for (const GlobalVariable &GV : M.globals()) + // Enumerate the global variable initializers and attributes. + for (const GlobalVariable &GV : M.globals()) { if (GV.hasInitializer()) EnumerateValue(GV.getInitializer()); + if (GV.hasAttributes()) + EnumerateAttributes(GV.getAttributesAsList(AttributeList::FunctionIndex)); + } // Enumerate the aliasees. for (const GlobalAlias &GA : M.aliases()) @@ -891,19 +894,22 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { if (PAL.isEmpty()) return; // null is always 0. // Do a lookup. - unsigned &Entry = AttributeMap[PAL]; + unsigned &Entry = AttributeListMap[PAL]; if (Entry == 0) { // Never saw this before, add it. - Attribute.push_back(PAL); - Entry = Attribute.size(); + AttributeLists.push_back(PAL); + Entry = AttributeLists.size(); } // Do lookups for all attribute groups. - for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { - AttributeList AS = PAL.getSlotAttributes(i); - unsigned &Entry = AttributeGroupMap[AS]; + for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) { + AttributeSet AS = PAL.getAttributes(i); + if (!AS.hasAttributes()) + continue; + IndexAndAttrSet Pair = {i, AS}; + unsigned &Entry = AttributeGroupMap[Pair]; if (Entry == 0) { - AttributeGroups.push_back(AS); + AttributeGroups.push_back(Pair); Entry = AttributeGroups.size(); } } diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 8a82aab2983637623cc5920be82a66042f391207..e7ccc8df1e5f8998a77bd1a17bee3bcb3852b8a6 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -48,6 +48,10 @@ public: // For each value, we remember its Value* and occurrence frequency. typedef std::vector > ValueList; + /// Attribute groups as encoded in bitcode are almost AttributeSets, but they + /// include the AttributeList index, so we have to track that in our map. + typedef std::pair IndexAndAttrSet; + UseListOrderStack UseListOrders; private: @@ -102,13 +106,13 @@ private: bool ShouldPreserveUseListOrder; - typedef DenseMap AttributeGroupMapType; + typedef DenseMap AttributeGroupMapType; AttributeGroupMapType AttributeGroupMap; - std::vector AttributeGroups; + std::vector AttributeGroups; - typedef DenseMap AttributeMapType; - AttributeMapType AttributeMap; - std::vector Attribute; + typedef DenseMap AttributeListMapType; + AttributeListMapType AttributeListMap; + std::vector AttributeLists; /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by /// the "getGlobalBasicBlockID" method. @@ -166,16 +170,17 @@ public: unsigned getInstructionID(const Instruction *I) const; void setInstructionID(const Instruction *I); - unsigned getAttributeID(AttributeList PAL) const { + unsigned getAttributeListID(AttributeList PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeMapType::const_iterator I = AttributeMap.find(PAL); - assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!"); + AttributeListMapType::const_iterator I = AttributeListMap.find(PAL); + assert(I != AttributeListMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } - unsigned getAttributeGroupID(AttributeList PAL) const { - if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL); + unsigned getAttributeGroupID(IndexAndAttrSet Group) const { + if (!Group.second.hasAttributes()) + return 0; // Null maps to zero. + AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(Group); assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } @@ -206,8 +211,8 @@ public: const std::vector &getBasicBlocks() const { return BasicBlocks; } - const std::vector &getAttributes() const { return Attribute; } - const std::vector &getAttributeGroups() const { + const std::vector &getAttributeLists() const { return AttributeLists; } + const std::vector &getAttributeGroups() const { return AttributeGroups; } diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 76549540ce0f762e507e815537fc10f7a7e995d6..946067e6358f3293798ce1dcf2fe32dfd5376a76 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(IR) add_subdirectory(IRReader) add_subdirectory(CodeGen) +add_subdirectory(BinaryFormat) add_subdirectory(Bitcode) add_subdirectory(Transforms) add_subdirectory(Linker) @@ -21,5 +22,6 @@ add_subdirectory(LineEditor) add_subdirectory(ProfileData) add_subdirectory(Fuzzer) add_subdirectory(Passes) -add_subdirectory(LibDriver) +add_subdirectory(ToolDrivers) add_subdirectory(XRay) +add_subdirectory(Testing) diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 955524c2a676e784eec8699b827cfe1aa89c17f4..5abf50e5bd10c3a0c7ee64d2b2deca70b3cbf658 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -128,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( } DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); - DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; - r = CriticalPathSet.find_next(r)) + DEBUG(for (unsigned r : CriticalPathSet.set_bits()) dbgs() << " " << TRI->getName(r)); DEBUG(dbgs() << '\n'); } @@ -166,7 +165,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; - if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg))) + if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; @@ -571,7 +570,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( DEBUG({ dbgs() << " ::"; - for (int r = BV.find_first(); r != -1; r = BV.find_next(r)) + for (unsigned r : BV.set_bits()) dbgs() << " " << TRI->getName(r); dbgs() << "\n"; }); @@ -964,10 +963,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // sure to update that as well. const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; if (!SU) continue; - for (DbgValueVector::iterator DVI = DbgValues.begin(), - DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q.second.Operand->getParent()) - UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); + UpdateDbgValues(DbgValues, Q.second.Operand->getParent(), + AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 09a37a77e9fbc91d404161e9e65468932d41e4fa..c2aecc651b792a67d86ab415f4472ccd3b3cb50b 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -24,8 +24,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index 04f7f419f5eacfbb1a136cbf3835591ce6b19ee1..d14d93100adbf0389ba533d930c0cd128becd464 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -60,6 +60,25 @@ public: if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg) MI.getOperand(0).setReg(NewReg); } + + /// Update all DBG_VALUE instructions that may be affected by the dependency + /// breaker's update of ParentMI to use NewReg. + void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI, + unsigned OldReg, unsigned NewReg) { + // The following code is dependent on the order in which the DbgValues are + // constructed in ScheduleDAGInstrs::buildSchedGraph. + MachineInstr *PrevDbgMI = nullptr; + for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) { + MachineInstr *PrevMI = DV.second; + if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) { + MachineInstr *DbgMI = DV.first; + UpdateDbgValue(*DbgMI, OldReg, NewReg); + PrevDbgMI = DbgMI; + } else if (PrevDbgMI) { + break; // If no match and already found a DBG_VALUE, we're done. + } + } + } }; } diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 61149d9229b77d6118de19482d767424d82a5a09..8b1376ab363d7f1ae3b384fa5e81844890241159 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -14,6 +14,7 @@ #include "DwarfException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 834a59a12a6a9a0b26ec7b924dad2a7b3475c495..ad348d723bae000e4588e04ebd1edc56c3c5aadd 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/AsmPrinter.h" #include "AsmPrinterHandler.h" #include "CodeViewDebug.h" #include "DwarfDebug.h" @@ -19,18 +20,19 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" @@ -82,14 +84,12 @@ #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" @@ -123,6 +123,10 @@ static const char *const CodeViewLineTablesGroupDescription = STATISTIC(EmittedInsts, "Number of machine instrs printed"); +static cl::opt + PrintSchedule("print-schedule", cl::Hidden, cl::init(false), + cl::desc("Print 'sched: [latency:throughput]' in .s output")); + char AsmPrinter::ID = 0; typedef DenseMap> gcp_map_type; @@ -624,12 +628,15 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value, /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::EmitFunctionHeader() { + const Function *F = MF->getFunction(); + + if (isVerbose()) + OutStreamer->GetCommentOS() << "-- Begin function " << F->getName() << '\n'; + // Print out constants referenced by the function EmitConstantPool(); // Print the 'header' of function. - const Function *F = MF->getFunction(); - OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); @@ -720,7 +727,8 @@ void AsmPrinter::EmitFunctionEntryLabel() { } /// emitComments - Pretty-print comments for instructions. -static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS, + AsmPrinter *AP) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -728,6 +736,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { int FI; const MachineFrameInfo &MFI = MF->getFrameInfo(); + bool Commented = false; // We assume a single instruction only has a spill or reload, not // both. @@ -735,24 +744,39 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { if (TII->isLoadFromStackSlotPostFE(MI, FI)) { if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); - CommentOS << MMO->getSize() << "-byte Reload\n"; + CommentOS << MMO->getSize() << "-byte Reload"; + Commented = true; } } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) - CommentOS << MMO->getSize() << "-byte Folded Reload\n"; + if (MFI.isSpillSlotObjectIndex(FI)) { + CommentOS << MMO->getSize() << "-byte Folded Reload"; + Commented = true; + } } else if (TII->isStoreToStackSlotPostFE(MI, FI)) { if (MFI.isSpillSlotObjectIndex(FI)) { MMO = *MI.memoperands_begin(); - CommentOS << MMO->getSize() << "-byte Spill\n"; + CommentOS << MMO->getSize() << "-byte Spill"; + Commented = true; } } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) { - if (MFI.isSpillSlotObjectIndex(FI)) - CommentOS << MMO->getSize() << "-byte Folded Spill\n"; + if (MFI.isSpillSlotObjectIndex(FI)) { + CommentOS << MMO->getSize() << "-byte Folded Spill"; + Commented = true; + } } // Check for spill-induced copies - if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) - CommentOS << " Reload Reuse\n"; + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) { + Commented = true; + CommentOS << " Reload Reuse"; + } + + if (Commented && AP->EnablePrintSchedInfo) + // If any comment was added above and we need sched info comment then + // add this new comment just after the above comment w/o "\n" between them. + CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n"; + else if (Commented) + CommentOS << "\n"; } /// emitImplicitDef - This method emits the specified machine instruction @@ -799,46 +823,30 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { const DILocalVariable *V = MI->getDebugVariable(); if (auto *SP = dyn_cast(V->getScope())) { - StringRef Name = SP->getDisplayName(); + StringRef Name = SP->getName(); if (!Name.empty()) OS << Name << ":"; } OS << V->getName(); - - const DIExpression *Expr = MI->getDebugExpression(); - auto Fragment = Expr->getFragmentInfo(); - if (Fragment) - OS << " [fragment offset=" << Fragment->OffsetInBits - << " size=" << Fragment->SizeInBits << "]"; OS << " <- "; // The second operand is only an offset if it's an immediate. - bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); - int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; - - for (unsigned i = 0; i < Expr->getNumElements(); ++i) { - uint64_t Op = Expr->getElement(i); - if (Op == dwarf::DW_OP_LLVM_fragment) { - // There can't be any operands after this in a valid expression - break; - } else if (Deref) { - // We currently don't support extra Offsets or derefs after the first - // one. Bail out early instead of emitting an incorrect comment - OS << " [complex expression]"; - AP.OutStreamer->emitRawComment(OS.str()); - return true; - } else if (Op == dwarf::DW_OP_deref) { - Deref = true; - continue; - } - - uint64_t ExtraOffset = Expr->getElement(i++); - if (Op == dwarf::DW_OP_plus) - Offset += ExtraOffset; - else { - assert(Op == dwarf::DW_OP_minus); - Offset -= ExtraOffset; + bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0; + const DIExpression *Expr = MI->getDebugExpression(); + if (Expr->getNumElements()) { + OS << '['; + bool NeedSep = false; + for (auto Op : Expr->expr_ops()) { + if (NeedSep) + OS << ", "; + else + NeedSep = true; + OS << dwarf::OperationEncodingString(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + OS << ' ' << Op.getArg(I); } + OS << "] "; } // Register or immediate value. Register 0 means undef. @@ -869,7 +877,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); Offset += TFI->getFrameIndexReference(*AP.MF, MI->getOperand(0).getIndex(), Reg); - Deref = true; + MemLoc = true; } if (Reg == 0) { // Suppress offset, it is not meaningful here. @@ -878,12 +886,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { AP.OutStreamer->emitRawComment(OS.str()); return true; } - if (Deref) + if (MemLoc) OS << '['; OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); } - if (Deref) + if (MemLoc) OS << '+' << Offset << ']'; // NOTE: Want this comment at start of line, don't emit with AddComment. @@ -915,6 +923,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { if (needsCFIMoves() == CFI_M_None) return; + // If there is no "real" instruction following this CFI instruction, skip + // emitting it; it would be beyond the end of the function's FDE range. + auto *MBB = MI.getParent(); + auto I = std::next(MI.getIterator()); + while (I != MBB->end() && I->isTransient()) + ++I; + if (I == MBB->instr_end() && + MBB->getReverseIterator() == MBB->getParent()->rbegin()) + return; + const std::vector &Instrs = MF->getFrameInstructions(); unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); const MCCFIInstruction &CFI = Instrs[CFIIndex]; @@ -931,6 +949,19 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { MCConstantExpr::create(FrameOffset, OutContext)); } +static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, + MachineModuleInfo *MMI) { + if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo()) + return true; + + // We might emit an EH table that uses function begin and end labels even if + // we don't have any landingpads. + if (!MF.getFunction()->hasPersonalityFn()) + return false; + return !isNoOpWithoutInvoke( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { @@ -966,7 +997,7 @@ void AsmPrinter::EmitFunctionBody() { } if (isVerbose()) - emitComments(MI, OutStreamer->GetCommentOS()); + emitComments(MI, OutStreamer->GetCommentOS(), this); switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: @@ -1025,15 +1056,23 @@ void AsmPrinter::EmitFunctionBody() { // If the function is empty and the object file uses .subsections_via_symbols, // then we need to emit *something* to the function body to prevent the // labels from collapsing together. Just emit a noop. - if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { + // Similarly, don't emit empty functions on Windows either. It can lead to + // duplicate entries (two functions with the same RVA) in the Guard CF Table + // after linking, causing the kernel not to load the binary: + // https://developercommunity.visualstudio.com/content/problem/45366/vc-linker-creates-invalid-dll-with-clang-cl.html + // FIXME: Hide this behind some API in e.g. MCAsmInfo or MCTargetStreamer. + const Triple &TT = TM.getTargetTriple(); + if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() || + (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) { MCInst Noop; - MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop); - OutStreamer->AddComment("avoids zero-length function"); + MF->getSubtarget().getInstrInfo()->getNoop(Noop); // Targets can opt-out of emitting the noop here by leaving the opcode // unspecified. - if (Noop.getOpcode()) + if (Noop.getOpcode()) { + OutStreamer->AddComment("avoids zero-length function"); OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); + } } const Function *F = MF->getFunction(); @@ -1050,8 +1089,8 @@ void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); - if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() || - MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { + if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) || + MAI->hasDotTypeDotSizeDirective()) { // Create a symbol for the end of function. CurrentFnEnd = createTempSymbol("func_end"); OutStreamer->EmitLabel(CurrentFnEnd); @@ -1084,6 +1123,9 @@ void AsmPrinter::EmitFunctionBody() { HI.Handler->endFunction(MF); } + if (isVerbose()) + OutStreamer->GetCommentOS() << "-- End function\n"; + OutStreamer->AddBlankLine(); } @@ -1244,11 +1286,7 @@ bool AsmPrinter::doFinalization(Module &M) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - // Emit module flags. - SmallVector ModuleFlags; - M.getModuleFlagsMetadata(ModuleFlags); - if (!ModuleFlags.empty()) - TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM); + TLOF.emitModuleMetadata(*OutStreamer, M, TM); if (TM.getTargetTriple().isOSBinFormatELF()) { MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo(); @@ -1335,7 +1373,7 @@ bool AsmPrinter::doFinalization(Module &M) { OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); OutStreamer->EmitLabel(AddrSymbol); - unsigned PtrSize = M.getDataLayout().getPointerSize(0); + unsigned PtrSize = MAI->getCodePointerSize(); OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), PtrSize); } @@ -1373,8 +1411,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { CurrentFnBegin = nullptr; CurExceptionSym = nullptr; bool NeedsLocalForSize = MAI->needsLocalForSize(); - if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() || - MF.hasEHFunclets() || NeedsLocalForSize) { + if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize) { CurrentFnBegin = createTempSymbol("func_begin"); if (NeedsLocalForSize) CurrentFnSymForSize = CurrentFnBegin; @@ -1383,6 +1420,11 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { ORE = &getAnalysis().getORE(); if (isVerbose()) LI = &getAnalysis(); + + const TargetSubtargetInfo &STI = MF.getSubtarget(); + EnablePrintSchedInfo = PrintSchedule.getNumOccurrences() + ? PrintSchedule + : STI.supportPrintSchedInfo(); } namespace { @@ -2220,7 +2262,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] ExtraBits = Realigned.getRawData()[0] & (((uint64_t)-1) >> (64 - ExtraBitsSize)); - Realigned = Realigned.lshr(ExtraBitsSize); + Realigned.lshrInPlace(ExtraBitsSize); } else ExtraBits = Realigned.getRawData()[BitWidth / 64]; } @@ -2733,37 +2775,63 @@ void AsmPrinter::emitXRayTable() { auto PrevSection = OutStreamer->getCurrentSectionOnly(); auto Fn = MF->getFunction(); - MCSection *Section = nullptr; + MCSection *InstMap = nullptr; + MCSection *FnSledIndex = nullptr; if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { if (Fn->hasComdat()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Fn->getComdat()->getName()); + FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, + Fn->getComdat()->getName()); } else { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, + InstMap = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + FnSledIndex = OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, + ELF::SHF_ALLOC); } } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, + InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, SectionKind::getReadOnlyWithRel()); + FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx", 0, + SectionKind::getReadOnlyWithRel()); } else { llvm_unreachable("Unsupported target"); } // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since this function is always called just - // before the function's end, we assume that this is happening after - // the last return instruction. - - auto WordSizeBytes = TM.getPointerSize(); - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); + // xray_instr_map and xray_fn_idx sections. Since this function is always + // called just before the function's end, we assume that this is happening + // after the last return instruction. We also use the synthetic label in the + // xray_inster_map as a delimeter for the range of sleds for this function in + // the index. + auto WordSizeBytes = MAI->getCodePointerSize(); + MCSymbol *SledsStart = OutContext.createTempSymbol("xray_synthetic_", true); + MCSymbol *IdxRef = OutContext.createTempSymbol("xray_fn_idx_synth_", true); OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, WordSizeBytes, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); + OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); + OutStreamer->EmitSymbolValue(IdxRef, WordSizeBytes, false); + + // Now we switch to the instrumentation map section. Because this is done + // per-function, we are able to create an index entry that will represent the + // range of sleds associated with a function. + OutStreamer->SwitchSection(InstMap); + OutStreamer->EmitLabel(SledsStart); for (const auto &Sled : Sleds) Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); - + MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_synthetic_end", true); + OutStreamer->EmitLabel(SledsEnd); + + // We then emit a single entry in the index per function. We use the symbols + // that bound the instrumentation map as the range for a specific function. + // Each entry here will be 2 * word size aligned, as we're writing down two + // pointers. This should work for both 32-bit and 64-bit platforms. + OutStreamer->SwitchSection(FnSledIndex); + OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); + OutStreamer->EmitLabel(IdxRef); + OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes); + OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes); OutStreamer->SwitchSection(PrevSection); Sleds.clear(); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 0185c380cc394fe047f477da0593e059549cc9aa..0edf9051d342e010cf9ee78b5dd492e48ef4b78e 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -15,6 +15,7 @@ #include "DwarfDebug.h" #include "DwarfExpression.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,7 +27,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 683e622e3d5379fb884324194f6133c24e221901..eae79ad101d380725e3113422c2bfad45833b258 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -144,6 +144,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, " we don't have an asm parser for this target\n"); Parser->setAssemblerDialect(Dialect); Parser->setTargetParser(*TAP.get()); + if (Dialect == InlineAsm::AD_Intel) + // We need this flag to be able to parse numbers like "0bH" + Parser->setParsingInlineAsm(true); if (MF) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 383b8cddb1a06384fafe131ebf077f9b7302f7cd..e94616fd59006da4547e821037e641c49fae77fe 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1,4 +1,4 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp --*- C++ -*--===// +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===// // // The LLVM Compiler Infrastructure // @@ -12,36 +12,82 @@ //===----------------------------------------------------------------------===// #include "CodeViewDebug.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Config/llvm-config.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/DebugInfo/CodeView/TypeTableCollection.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::codeview; CodeViewDebug::CodeViewDebug(AsmPrinter *AP) - : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(), - TypeTable(Allocator), CurFn(nullptr) { + : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || @@ -177,7 +223,8 @@ static const DISubprogram *getQualifiedNameComponents( static std::string getQualifiedName(ArrayRef QualifiedNameComponents, StringRef TypeName) { std::string FullyQualifiedName; - for (StringRef QualifiedNameComponent : reverse(QualifiedNameComponents)) { + for (StringRef QualifiedNameComponent : + llvm::reverse(QualifiedNameComponents)) { FullyQualifiedName.append(QualifiedNameComponent); FullyQualifiedName.append("::"); } @@ -237,7 +284,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { // The display name includes function template arguments. Drop them to match // MSVC. - StringRef DisplayName = SP->getDisplayName().split('<').first; + StringRef DisplayName = SP->getName().split('<').first; const DIScope *Scope = SP->getScope().resolve(); TypeIndex TI; @@ -392,7 +439,7 @@ void CodeViewDebug::endModule() { // subprograms. switchToDebugSectionForSymbol(nullptr); - MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols); emitCompilerInformation(); endCVSubsection(CompilerInfo); @@ -416,7 +463,7 @@ void CodeViewDebug::endModule() { // Emit UDT records for any types used by global variables. if (!GlobalUDTs.empty()) { - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); emitDebugInfoForUDTs(GlobalUDTs); endCVSubsection(SymbolsEnd); } @@ -468,17 +515,21 @@ void CodeViewDebug::emitTypeInformation() { CommentPrefix += ' '; } - TypeDatabase TypeDB; - CVTypeDumper CVTD(TypeDB); - TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef Record) { + TypeTableCollection Table(TypeTable.records()); + Optional B = Table.getFirst(); + while (B) { + // This will fail if the record data is invalid. + CVType Record = Table.getType(*B); + if (OS.isVerboseAsm()) { // Emit a block comment describing the type record for readability. SmallString<512> CommentBlock; raw_svector_ostream CommentOS(CommentBlock); ScopedPrinter SP(CommentOS); SP.setPrefix(CommentPrefix); - TypeDumpVisitor TDV(TypeDB, &SP, false); - Error E = CVTD.dump(Record, TDV); + TypeDumpVisitor TDV(Table, &SP, false); + + Error E = codeview::visitTypeRecord(Record, *B, TDV); if (E) { logAllUnhandledErrors(std::move(E), errs(), "error: "); llvm_unreachable("produced malformed type record"); @@ -488,29 +539,10 @@ void CodeViewDebug::emitTypeInformation() { // newline. OS.emitRawComment( CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim()); - } else { -#ifndef NDEBUG - // Assert that the type data is valid even if we aren't dumping - // comments. The MSVC linker doesn't do much type record validation, - // so the first link of an invalid type record can succeed while - // subsequent links will fail with LNK1285. - BinaryByteStream Stream(Record, llvm::support::little); - CVTypeArray Types; - BinaryStreamReader Reader(Stream); - Error E = Reader.readArray(Types, Reader.getLength()); - if (!E) { - TypeVisitorCallbacks C; - E = CVTypeVisitor(C).visitTypeStream(Types); - } - if (E) { - logAllUnhandledErrors(std::move(E), errs(), "error: "); - llvm_unreachable("produced malformed type record"); - } -#endif } - StringRef S(reinterpret_cast(Record.data()), Record.size()); - OS.EmitBinaryData(S); - }); + OS.EmitBinaryData(Record.str_data()); + B = Table.getNext(*B); + } } namespace { @@ -585,7 +617,7 @@ static CPUType mapArchToCVCPUType(Triple::ArchType Type) { } } -} // anonymous namespace +} // end anonymous namespace void CodeViewDebug::emitCompilerInformation() { MCContext &Context = MMI->getContext(); @@ -644,7 +676,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() { return; OS.AddComment("Inlinee lines subsection"); - MCSymbol *InlineEnd = beginCVSubsection(ModuleSubstreamKind::InlineeLines); + MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines); // We don't provide any extra file info. // FIXME: Find out if debuggers use this info. @@ -657,7 +689,7 @@ void CodeViewDebug::emitInlineeLinesSubsection() { OS.AddBlankLine(); unsigned FileId = maybeRecordFile(SP->getFile()); - OS.AddComment("Inlined function " + SP->getDisplayName() + " starts at " + + OS.AddComment("Inlined function " + SP->getName() + " starts at " + SP->getFilename() + Twine(':') + Twine(SP->getLine())); OS.AddBlankLine(); // The filechecksum table uses 8 byte entries for now, and file ids start at @@ -759,17 +791,17 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, // If we have a display name, build the fully qualified name by walking the // chain of scopes. - if (!SP->getDisplayName().empty()) + if (!SP->getName().empty()) FuncName = - getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName()); + getFullyQualifiedName(SP->getScope().resolve(), SP->getName()); // If our DISubprogram name is empty, use the mangled name. if (FuncName.empty()) - FuncName = GlobalValue::getRealLinkageName(GV->getName()); + FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); - MCSymbol *SymbolsEnd = beginCVSubsection(ModuleSubstreamKind::Symbols); + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); { MCSymbol *ProcRecordBegin = MMI->getContext().createTempSymbol(), *ProcRecordEnd = MMI->getContext().createTempSymbol(); @@ -886,13 +918,21 @@ void CodeViewDebug::collectVariableInfoFromMFTable( if (!Scope) continue; + // If the variable has an attached offset expression, extract it. + // FIXME: Try to handle DW_OP_deref as well. + int64_t ExprOffset = 0; + if (VI.Expr) + if (!VI.Expr->extractIfOffset(ExprOffset)) + continue; + // Get the frame register used and the offset. unsigned FrameReg = 0; int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg); uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg); // Calculate the label ranges. - LocalVarDefRange DefRange = createDefRangeMem(CVReg, FrameOffset); + LocalVarDefRange DefRange = + createDefRangeMem(CVReg, FrameOffset + ExprOffset); for (const InsnRange &Range : Scope->getRanges()) { const MCSymbol *Begin = getLabelBeforeInsn(Range.first); const MCSymbol *End = getLabelAfterInsn(Range.second); @@ -1030,11 +1070,11 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { bool EmptyPrologue = true; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { - if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { PrologEndLoc = MI.getDebugLoc(); break; - } else if (!MI.isDebugValue()) { + } else if (!MI.isMetaInstruction()) { EmptyPrologue = false; } } @@ -1136,7 +1176,7 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { DITypeRef ElementTypeRef = Ty->getBaseType(); TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef); // IndexType is size_t, which depends on the bitness of the target. - TypeIndex IndexType = Asm->MAI->getPointerSize() == 8 + TypeIndex IndexType = Asm->TM.getPointerSize() == 8 ? TypeIndex(SimpleTypeKind::UInt64Quad) : TypeIndex(SimpleTypeKind::UInt32Long); @@ -1342,8 +1382,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) { assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type); TypeIndex ClassTI = getTypeIndex(Ty->getClassType()); TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType()); - PointerKind PK = Asm->MAI->getPointerSize() == 8 ? PointerKind::Near64 - : PointerKind::Near32; + PointerKind PK = Asm->TM.getPointerSize() == 8 ? PointerKind::Near64 + : PointerKind::Near32; bool IsPMF = isa(Ty->getBaseType()); PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction : PointerMode::PointerToDataMember; @@ -1458,7 +1498,8 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, } TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { - unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize()); + unsigned VSlotCount = + Ty->getSizeInBits() / (8 * Asm->MAI->getCodePointerSize()); SmallVector Slots(VSlotCount, VFTableSlotKind::Near); VFTableShapeRecord VFTSR(Slots); @@ -1566,7 +1607,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { EnumeratorCount++; } } - FTI = FLRB.end(); + FTI = FLRB.end(true); } std::string FullName = getFullyQualifiedName(Ty); @@ -1586,11 +1627,11 @@ struct llvm::ClassInfo { uint64_t BaseOffset; }; // [MemberInfo] - typedef std::vector MemberList; + using MemberList = std::vector; - typedef TinyPtrVector MethodsList; + using MethodsList = TinyPtrVector; // MethodName -> MethodsList - typedef MapVector MethodsMap; + using MethodsMap = MapVector; /// Base classes. std::vector Inheritance; @@ -1702,10 +1743,12 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { SizeInBytes, FullName, Ty->getIdentifier()); TypeIndex ClassTI = TypeTable.writeKnownType(CR); - StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(Ty->getFile())); - TypeIndex SIDI = TypeTable.writeKnownType(SIDR); - UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); - TypeTable.writeKnownType(USLR); + if (const auto *File = Ty->getFile()) { + StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); + TypeIndex SIDI = TypeTable.writeKnownType(SIDR); + UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine()); + TypeTable.writeKnownType(USLR); + } addToUDTs(Ty, ClassTI); @@ -1853,7 +1896,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { translateMethodOptionFlags(SP), VFTableOffset, Name)); MemberCount++; } - assert(Methods.size() > 0 && "Empty methods map entry"); + assert(!Methods.empty() && "Empty methods map entry"); if (Methods.size() == 1) FLBR.writeMemberType(Methods[0]); else { @@ -1871,7 +1914,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { MemberCount++; } - TypeIndex FieldTI = FLBR.end(); + TypeIndex FieldTI = FLBR.end(true); return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount, !Info.NestedClasses.empty()); } @@ -2113,7 +2156,7 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) { maybeRecordLocation(DL, Asm->MF); } -MCSymbol *CodeViewDebug::beginCVSubsection(ModuleSubstreamKind Kind) { +MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) { MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), *EndLabel = MMI->getContext().createTempSymbol(); OS.EmitIntValue(unsigned(Kind), 4); @@ -2173,7 +2216,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() { if (!GV->hasComdat() && !GV->isDeclarationForLinker()) { if (!EndLabel) { OS.AddComment("Symbol subsection for globals"); - EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); } // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV)); @@ -2189,9 +2232,9 @@ void CodeViewDebug::emitDebugInfoForGlobals() { if (GV->hasComdat()) { MCSymbol *GVSym = Asm->getSymbol(GV); OS.AddComment("Symbol subsection for " + - Twine(GlobalValue::getRealLinkageName(GV->getName()))); + Twine(GlobalValue::dropLLVMManglingEscape(GV->getName()))); switchToDebugSectionForSymbol(GVSym); - EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols); + EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym); endCVSubsection(EndLabel); diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 343384c5177285d790a66a7ade988641c49d8b3d..2cd495aec6dc4fec3350ce6d4b38e366e282a8cd 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -1,4 +1,4 @@ -//===-- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h ----*- C++ -*--===// +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,29 +14,44 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H #define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H +#include "DbgValueHistoryCalculator.h" #include "DebugHandlerBase.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" +#include +#include +#include +#include +#include +#include +#include namespace llvm { -class StringRef; -class LexicalScope; struct ClassInfo; +class StringRef; +class AsmPrinter; +class Function; +class GlobalVariable; +class MCSectionCOFF; +class MCStreamer; +class MCSymbol; +class MachineFunction; /// \brief Collects and handles line tables information in a CodeView format. class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { MCStreamer &OS; - llvm::BumpPtrAllocator Allocator; + BumpPtrAllocator Allocator; codeview::TypeTableBuilder TypeTable; /// Represents the most general definition range. @@ -110,7 +125,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { unsigned LastFileId = 0; bool HaveLineInfo = false; }; - FunctionInfo *CurFn; + FunctionInfo *CurFn = nullptr; /// The set of comdat .debug$S sections that we've seen so far. Each section /// must start with a magic version number that must only be emitted once. @@ -176,8 +191,9 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { std::vector> LocalUDTs, GlobalUDTs; - typedef std::map FileToFilepathMapTy; + using FileToFilepathMapTy = std::map; FileToFilepathMapTy FileToFilepathMap; + StringRef getFullFilepath(const DIFile *S); unsigned maybeRecordFile(const DIFile *F); @@ -216,14 +232,14 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Opens a subsection of the given kind in a .debug$S codeview section. /// Returns an end label for use with endCVSubsection when the subsection is /// finished. - MCSymbol *beginCVSubsection(codeview::ModuleSubstreamKind Kind); + MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind); void endCVSubsection(MCSymbol *EndLabel); void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, const InlineSite &Site); - typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + using InlinedVariable = DbgValueHistoryMap::InlinedVariable; void collectVariableInfo(const DISubprogram *SP); @@ -309,7 +325,7 @@ protected: public: CodeViewDebug(AsmPrinter *Asm); - void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} + void setSymbolSize(const MCSymbol *, uint64_t) override {} /// \brief Emit the COFF section that holds the line table information. void endModule() override; @@ -317,6 +333,7 @@ public: /// \brief Process beginning of an instruction. void beginInstruction(const MachineInstr *MI) override; }; -} // End of namespace llvm -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index b510e0ef36ac63e383e7491fbf4657410fb4ecb6..30bfd7c94e68b91ff0cca047e7bc4402838fbade 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -31,6 +31,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + //===----------------------------------------------------------------------===// // DIEAbbrevData Implementation //===----------------------------------------------------------------------===// @@ -79,15 +81,22 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const { dwarf::AttributeString(AttrData.getAttribute()).data()); // Emit form type. +#ifndef NDEBUG + // Could be an assertion, but this way we can see the failing form code + // easily, which helps track down where it came from. + if (!dwarf::isValidFormForVersion(AttrData.getForm(), + AP->getDwarfVersion())) { + DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) + << " for DWARF version " << AP->getDwarfVersion() << "\n"); + llvm_unreachable("Invalid form for specified DWARF version"); + } +#endif AP->EmitULEB128(AttrData.getForm(), dwarf::FormEncodingString(AttrData.getForm()).data()); // Emit value for DW_FORM_implicit_const. - if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) { - assert(AP->getDwarfVersion() >= 5 && - "DW_FORM_implicit_const is supported starting from DWARFv5"); + if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) AP->EmitSLEB128(AttrData.getValue()); - } } // Mark end of abbreviation. @@ -518,7 +527,7 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); } LLVM_DUMP_METHOD @@ -540,7 +549,7 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); } LLVM_DUMP_METHOD @@ -646,20 +655,12 @@ void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { case dwarf::DW_FORM_ref_addr: { // Get the absolute offset for this DIE within the debug info/types section. unsigned Addr = Entry->getDebugSectionOffset(); - if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { - const DwarfDebug *DD = AP->getDwarfDebug(); - if (DD) - assert(!DD->useSplitDwarf() && - "TODO: dwo files can't have relocations."); - const DIEUnit *Unit = Entry->getUnit(); - assert(Unit && "CUDie should belong to a CU."); - MCSection *Section = Unit->getSection(); - if (Section) { - const MCSymbol *SectionSym = Section->getBeginSymbol(); - AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); - return; - } + if (const MCSymbol *SectionSym = + Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { + AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); + return; } + AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form)); return; } @@ -682,7 +683,7 @@ unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return getULEB128Size(Entry->getOffset()); case dwarf::DW_FORM_ref_addr: if (AP->getDwarfVersion() == 2) - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); switch (AP->OutStreamer->getContext().getDwarfFormat()) { case dwarf::DWARF32: return 4; @@ -808,7 +809,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; - return AP->getPointerSize(); + return AP->MAI->getCodePointerSize(); } /// EmitValue - Emit label value. diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index 8e3b88d0af0e5b9a07c61e2e970ea45cc107099a..15ade3c96dfea2ae1e5927901231f394c1717659 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "ByteStreamer.h" #include "DIEHash.h" +#include "ByteStreamer.h" #include "DwarfDebug.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DIE.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" @@ -116,65 +116,17 @@ void DIEHash::addParentContext(const DIE &Parent) { // Collect all of the attributes for a particular DIE in single structure. void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { -#define COLLECT_ATTR(NAME) \ - case dwarf::NAME: \ - Attrs.NAME = V; \ - break for (const auto &V : Die.values()) { DEBUG(dbgs() << "Attribute: " << dwarf::AttributeString(V.getAttribute()) << " added.\n"); switch (V.getAttribute()) { - COLLECT_ATTR(DW_AT_name); - COLLECT_ATTR(DW_AT_accessibility); - COLLECT_ATTR(DW_AT_address_class); - COLLECT_ATTR(DW_AT_allocated); - COLLECT_ATTR(DW_AT_artificial); - COLLECT_ATTR(DW_AT_associated); - COLLECT_ATTR(DW_AT_binary_scale); - COLLECT_ATTR(DW_AT_bit_offset); - COLLECT_ATTR(DW_AT_bit_size); - COLLECT_ATTR(DW_AT_bit_stride); - COLLECT_ATTR(DW_AT_byte_size); - COLLECT_ATTR(DW_AT_byte_stride); - COLLECT_ATTR(DW_AT_const_expr); - COLLECT_ATTR(DW_AT_const_value); - COLLECT_ATTR(DW_AT_containing_type); - COLLECT_ATTR(DW_AT_count); - COLLECT_ATTR(DW_AT_data_bit_offset); - COLLECT_ATTR(DW_AT_data_location); - COLLECT_ATTR(DW_AT_data_member_location); - COLLECT_ATTR(DW_AT_decimal_scale); - COLLECT_ATTR(DW_AT_decimal_sign); - COLLECT_ATTR(DW_AT_default_value); - COLLECT_ATTR(DW_AT_digit_count); - COLLECT_ATTR(DW_AT_discr); - COLLECT_ATTR(DW_AT_discr_list); - COLLECT_ATTR(DW_AT_discr_value); - COLLECT_ATTR(DW_AT_encoding); - COLLECT_ATTR(DW_AT_enum_class); - COLLECT_ATTR(DW_AT_endianity); - COLLECT_ATTR(DW_AT_explicit); - COLLECT_ATTR(DW_AT_is_optional); - COLLECT_ATTR(DW_AT_location); - COLLECT_ATTR(DW_AT_lower_bound); - COLLECT_ATTR(DW_AT_mutable); - COLLECT_ATTR(DW_AT_ordering); - COLLECT_ATTR(DW_AT_picture_string); - COLLECT_ATTR(DW_AT_prototyped); - COLLECT_ATTR(DW_AT_small); - COLLECT_ATTR(DW_AT_segment); - COLLECT_ATTR(DW_AT_string_length); - COLLECT_ATTR(DW_AT_threads_scaled); - COLLECT_ATTR(DW_AT_upper_bound); - COLLECT_ATTR(DW_AT_use_location); - COLLECT_ATTR(DW_AT_use_UTF8); - COLLECT_ATTR(DW_AT_variable_parameter); - COLLECT_ATTR(DW_AT_virtuality); - COLLECT_ATTR(DW_AT_visibility); - COLLECT_ATTR(DW_AT_vtable_elem_location); - COLLECT_ATTR(DW_AT_type); +#define HANDLE_DIE_HASH_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME = V; \ + break; +#include "DIEHashAttributes.def" default: break; } @@ -366,62 +318,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { // Go through the attributes from \param Attrs in the order specified in 7.27.4 // and hash them. void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { -#define ADD_ATTR(ATTR) \ +#define HANDLE_DIE_HASH_ATTR(NAME) \ { \ - if (ATTR) \ - hashAttribute(ATTR, Tag); \ + if (Attrs.NAME) \ + hashAttribute(Attrs.NAME, Tag); \ } - - ADD_ATTR(Attrs.DW_AT_name); - ADD_ATTR(Attrs.DW_AT_accessibility); - ADD_ATTR(Attrs.DW_AT_address_class); - ADD_ATTR(Attrs.DW_AT_allocated); - ADD_ATTR(Attrs.DW_AT_artificial); - ADD_ATTR(Attrs.DW_AT_associated); - ADD_ATTR(Attrs.DW_AT_binary_scale); - ADD_ATTR(Attrs.DW_AT_bit_offset); - ADD_ATTR(Attrs.DW_AT_bit_size); - ADD_ATTR(Attrs.DW_AT_bit_stride); - ADD_ATTR(Attrs.DW_AT_byte_size); - ADD_ATTR(Attrs.DW_AT_byte_stride); - ADD_ATTR(Attrs.DW_AT_const_expr); - ADD_ATTR(Attrs.DW_AT_const_value); - ADD_ATTR(Attrs.DW_AT_containing_type); - ADD_ATTR(Attrs.DW_AT_count); - ADD_ATTR(Attrs.DW_AT_data_bit_offset); - ADD_ATTR(Attrs.DW_AT_data_location); - ADD_ATTR(Attrs.DW_AT_data_member_location); - ADD_ATTR(Attrs.DW_AT_decimal_scale); - ADD_ATTR(Attrs.DW_AT_decimal_sign); - ADD_ATTR(Attrs.DW_AT_default_value); - ADD_ATTR(Attrs.DW_AT_digit_count); - ADD_ATTR(Attrs.DW_AT_discr); - ADD_ATTR(Attrs.DW_AT_discr_list); - ADD_ATTR(Attrs.DW_AT_discr_value); - ADD_ATTR(Attrs.DW_AT_encoding); - ADD_ATTR(Attrs.DW_AT_enum_class); - ADD_ATTR(Attrs.DW_AT_endianity); - ADD_ATTR(Attrs.DW_AT_explicit); - ADD_ATTR(Attrs.DW_AT_is_optional); - ADD_ATTR(Attrs.DW_AT_location); - ADD_ATTR(Attrs.DW_AT_lower_bound); - ADD_ATTR(Attrs.DW_AT_mutable); - ADD_ATTR(Attrs.DW_AT_ordering); - ADD_ATTR(Attrs.DW_AT_picture_string); - ADD_ATTR(Attrs.DW_AT_prototyped); - ADD_ATTR(Attrs.DW_AT_small); - ADD_ATTR(Attrs.DW_AT_segment); - ADD_ATTR(Attrs.DW_AT_string_length); - ADD_ATTR(Attrs.DW_AT_threads_scaled); - ADD_ATTR(Attrs.DW_AT_upper_bound); - ADD_ATTR(Attrs.DW_AT_use_location); - ADD_ATTR(Attrs.DW_AT_use_UTF8); - ADD_ATTR(Attrs.DW_AT_variable_parameter); - ADD_ATTR(Attrs.DW_AT_virtuality); - ADD_ATTR(Attrs.DW_AT_visibility); - ADD_ATTR(Attrs.DW_AT_vtable_elem_location); - ADD_ATTR(Attrs.DW_AT_type); - +#include "DIEHashAttributes.def" // FIXME: Add the extended attributes. } @@ -478,10 +380,12 @@ void DIEHash::computeHash(const DIE &Die) { /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE /// with the inclusion of the full CU and all top level CU entities. // TODO: Initialize the type chain at 0 instead of 1 for CU signatures. -uint64_t DIEHash::computeCUSignature(const DIE &Die) { +uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) { Numbering.clear(); Numbering[&Die] = 1; + if (!DWOName.empty()) + Hash.update(DWOName); // Hash the DIE. computeHash(Die); diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index 996cd7ef3d2e56bd84818ae9f810d1b5cf16dcf9..29337ae38a996b699fd50c3b2236690b87fbbc8a 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -28,64 +28,15 @@ class CompileUnit; class DIEHash { // Collection of all attributes used in hashing a particular DIE. struct DIEAttrs { - DIEValue DW_AT_name; - DIEValue DW_AT_accessibility; - DIEValue DW_AT_address_class; - DIEValue DW_AT_allocated; - DIEValue DW_AT_artificial; - DIEValue DW_AT_associated; - DIEValue DW_AT_binary_scale; - DIEValue DW_AT_bit_offset; - DIEValue DW_AT_bit_size; - DIEValue DW_AT_bit_stride; - DIEValue DW_AT_byte_size; - DIEValue DW_AT_byte_stride; - DIEValue DW_AT_const_expr; - DIEValue DW_AT_const_value; - DIEValue DW_AT_containing_type; - DIEValue DW_AT_count; - DIEValue DW_AT_data_bit_offset; - DIEValue DW_AT_data_location; - DIEValue DW_AT_data_member_location; - DIEValue DW_AT_decimal_scale; - DIEValue DW_AT_decimal_sign; - DIEValue DW_AT_default_value; - DIEValue DW_AT_digit_count; - DIEValue DW_AT_discr; - DIEValue DW_AT_discr_list; - DIEValue DW_AT_discr_value; - DIEValue DW_AT_encoding; - DIEValue DW_AT_enum_class; - DIEValue DW_AT_endianity; - DIEValue DW_AT_explicit; - DIEValue DW_AT_is_optional; - DIEValue DW_AT_location; - DIEValue DW_AT_lower_bound; - DIEValue DW_AT_mutable; - DIEValue DW_AT_ordering; - DIEValue DW_AT_picture_string; - DIEValue DW_AT_prototyped; - DIEValue DW_AT_small; - DIEValue DW_AT_segment; - DIEValue DW_AT_string_length; - DIEValue DW_AT_threads_scaled; - DIEValue DW_AT_upper_bound; - DIEValue DW_AT_use_location; - DIEValue DW_AT_use_UTF8; - DIEValue DW_AT_variable_parameter; - DIEValue DW_AT_virtuality; - DIEValue DW_AT_visibility; - DIEValue DW_AT_vtable_elem_location; - DIEValue DW_AT_type; - - // Insert any additional ones here... +#define HANDLE_DIE_HASH_ATTR(NAME) DIEValue NAME; +#include "DIEHashAttributes.def" }; public: DIEHash(AsmPrinter *A = nullptr) : AP(A) {} /// \brief Computes the CU signature. - uint64_t computeCUSignature(const DIE &Die); + uint64_t computeCUSignature(StringRef DWOName, const DIE &Die); /// \brief Computes the type signature. uint64_t computeTypeSignature(const DIE &Die); diff --git a/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/lib/CodeGen/AsmPrinter/DIEHashAttributes.def new file mode 100644 index 0000000000000000000000000000000000000000..28a02390fccb6f719166809de61e876a5736bd25 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DIEHashAttributes.def @@ -0,0 +1,55 @@ +#ifndef HANDLE_DIE_HASH_ATTR +#error "Missing macro definition of HANDLE_DIE_HASH_ATTR" +#endif + +HANDLE_DIE_HASH_ATTR(DW_AT_name) +HANDLE_DIE_HASH_ATTR(DW_AT_accessibility) +HANDLE_DIE_HASH_ATTR(DW_AT_address_class) +HANDLE_DIE_HASH_ATTR(DW_AT_allocated) +HANDLE_DIE_HASH_ATTR(DW_AT_artificial) +HANDLE_DIE_HASH_ATTR(DW_AT_associated) +HANDLE_DIE_HASH_ATTR(DW_AT_binary_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_size) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_size) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_const_expr) +HANDLE_DIE_HASH_ATTR(DW_AT_const_value) +HANDLE_DIE_HASH_ATTR(DW_AT_containing_type) +HANDLE_DIE_HASH_ATTR(DW_AT_count) +HANDLE_DIE_HASH_ATTR(DW_AT_data_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_data_location) +HANDLE_DIE_HASH_ATTR(DW_AT_data_member_location) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_sign) +HANDLE_DIE_HASH_ATTR(DW_AT_default_value) +HANDLE_DIE_HASH_ATTR(DW_AT_digit_count) +HANDLE_DIE_HASH_ATTR(DW_AT_discr) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_list) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_value) +HANDLE_DIE_HASH_ATTR(DW_AT_encoding) +HANDLE_DIE_HASH_ATTR(DW_AT_enum_class) +HANDLE_DIE_HASH_ATTR(DW_AT_endianity) +HANDLE_DIE_HASH_ATTR(DW_AT_explicit) +HANDLE_DIE_HASH_ATTR(DW_AT_is_optional) +HANDLE_DIE_HASH_ATTR(DW_AT_location) +HANDLE_DIE_HASH_ATTR(DW_AT_lower_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_mutable) +HANDLE_DIE_HASH_ATTR(DW_AT_ordering) +HANDLE_DIE_HASH_ATTR(DW_AT_picture_string) +HANDLE_DIE_HASH_ATTR(DW_AT_prototyped) +HANDLE_DIE_HASH_ATTR(DW_AT_small) +HANDLE_DIE_HASH_ATTR(DW_AT_segment) +HANDLE_DIE_HASH_ATTR(DW_AT_string_length) +HANDLE_DIE_HASH_ATTR(DW_AT_threads_scaled) +HANDLE_DIE_HASH_ATTR(DW_AT_upper_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_use_location) +HANDLE_DIE_HASH_ATTR(DW_AT_use_UTF8) +HANDLE_DIE_HASH_ATTR(DW_AT_variable_parameter) +HANDLE_DIE_HASH_ATTR(DW_AT_virtuality) +HANDLE_DIE_HASH_ATTR(DW_AT_visibility) +HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location) +HANDLE_DIE_HASH_ATTR(DW_AT_type) + +#undef HANDLE_DIE_HASH_ATTR diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 22fd7bb46056039f6ff2f92f5c201c85d71e00b4..c2ad9db81cfd974fb166524aaad6404242f1f2bc 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -194,6 +194,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, // some variables. for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isDef() && MO.getReg()) { + // Ignore call instructions that claim to clobber SP. The AArch64 + // backend does this for aggregate function arguments. + if (MI.isCall() && MO.getReg() == SP) + continue; // If this is a virtual register, only clobber it since it doesn't // have aliases. if (TRI->isVirtualRegister(MO.getReg())) @@ -209,8 +213,7 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF, } else if (MO.isRegMask()) { // If this is a register mask operand, clobber all debug values in // non-CSRs. - for (int I = ChangingRegs.find_first(); I != -1; - I = ChangingRegs.find_next(I)) { + for (unsigned I : ChangingRegs.set_bits()) { // Don't consider SP to be clobbered by register masks. if (unsigned(I) != SP && TRI->isPhysicalRegister(I) && MO.clobbersPhysReg(I)) { diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 1d63e33a4d33af585e98617aa1df3676916a1e40..0971c5942203c002bdd5de3ed9cab3473328b5b5 100644 --- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -115,7 +115,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { return getBaseTypeSize(BaseType); } -bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { +static bool hasDebugInfo(const MachineModuleInfo *MMI, + const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return false; auto *SP = MF->getFunction()->getSubprogram(); @@ -129,10 +130,9 @@ bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { } void DebugHandlerBase::beginFunction(const MachineFunction *MF) { - assert(Asm); PrevInstBB = nullptr; - if (!hasDebugInfo(MMI, MF)) { + if (!Asm || !hasDebugInfo(MMI, MF)) { skippedNonDebugFunction(); return; } @@ -224,9 +224,9 @@ void DebugHandlerBase::endInstruction() { return; assert(CurMI != nullptr); - // Don't create a new label after DBG_VALUE instructions. - // They don't generate code. - if (!CurMI->isDebugValue()) { + // Don't create a new label after DBG_VALUE and other instructions that don't + // generate code. + if (!CurMI->isMetaInstruction()) { PrevLabel = nullptr; PrevInstBB = CurMI->getParent(); } diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h index 3656e9d950992a6339779a14fcbd5e7ba9a9d0a1..0c551dfff9ccbe67b20bf0ddb1fb9452031e2227 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocStream.h +++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -10,9 +10,9 @@ #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H +#include "ByteStreamer.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "ByteStreamer.h" namespace llvm { diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 05ac1cb02f7606cfb85f1a64fb72becbfa3bbdcf..b1ef8cfe989d0cc05ec1effb4acee111a87202c0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -16,12 +16,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index e08306b001fbfe88059f19820eca09eac7cf337d..dd7f7931b06b82f4a9a9cfdfae46fc5ed513d6e9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -14,6 +14,7 @@ #include "DwarfException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -28,7 +29,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a550ff2fb90f32a3a50a1db841746168d4341b71..dc39d1e6cb52505eab61cd43ada8edd5be707460 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -440,7 +440,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP]; + DIE *OriginDIE = getAbstractSPDies()[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); @@ -547,18 +547,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); for (auto &Fragment : DV.getFrameIndexExprs()) { unsigned FrameReg = 0; + const DIExpression *Expr = Fragment.Expr; const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); - DwarfExpr.addFragmentOffset(Fragment.Expr); + DwarfExpr.addFragmentOffset(Expr); SmallVector Ops; - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); - Ops.push_back(dwarf::DW_OP_deref); - Ops.append(Fragment.Expr->elements_begin(), Fragment.Expr->elements_end()); - DIExpressionCursor Expr(Ops); + Ops.append(Expr->elements_begin(), Expr->elements_end()); + DIExpressionCursor Cursor(Ops); + DwarfExpr.setMemoryLocationKind(); DwarfExpr.addMachineRegExpression( - *Asm->MF->getSubtarget().getRegisterInfo(), Expr, FrameReg); - DwarfExpr.addExpression(std::move(Expr)); + *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); + DwarfExpr.addExpression(std::move(Cursor)); } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); @@ -633,7 +634,7 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( LexicalScope *Scope) { - DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; + DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()]; if (AbsDef) return; @@ -695,7 +696,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE( void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); - if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) { + if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) { if (D) // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); @@ -707,6 +708,42 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { } } +void DwarfCompileUnit::finishVariableDefinition(const DbgVariable &Var) { + DbgVariable *AbsVar = getExistingAbstractVariable( + InlinedVariable(Var.getVariable(), Var.getInlinedAt())); + auto *VariableDie = Var.getDIE(); + if (AbsVar && AbsVar->getDIE()) { + addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, + *AbsVar->getDIE()); + } else + applyVariableAttributes(Var, *VariableDie); +} + +DbgVariable *DwarfCompileUnit::getExistingAbstractVariable(InlinedVariable IV) { + const DILocalVariable *Cleansed; + return getExistingAbstractVariable(IV, Cleansed); +} + +// Find abstract variable, if any, associated with Var. +DbgVariable *DwarfCompileUnit::getExistingAbstractVariable( + InlinedVariable IV, const DILocalVariable *&Cleansed) { + // More then one inlined variable corresponds to one abstract variable. + Cleansed = IV.first; + auto &AbstractVariables = getAbstractVariables(); + auto I = AbstractVariables.find(Cleansed); + if (I != AbstractVariables.end()) + return I->second.get(); + return nullptr; +} + +void DwarfCompileUnit::createAbstractVariable(const DILocalVariable *Var, + LexicalScope *Scope) { + assert(Scope && Scope->isAbstractScope()); + auto AbsDbgVariable = make_unique(Var, /* IA */ nullptr); + DU->addScopeVariable(Scope, AbsDbgVariable.get()); + getAbstractVariables()[Var] = std::move(AbsDbgVariable); +} + void DwarfCompileUnit::emitHeader(bool UseOffsets) { // Don't bother labeling the .dwo unit, as its offset isn't used. if (!Skeleton) { @@ -723,7 +760,7 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) { /// addGlobalName - Add a new global name to the compile unit. void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Name.str(); GlobalNames[FullName] = &Die; @@ -731,7 +768,7 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Name.str(); // Insert, allowing the entry to remain as-is if it's already present @@ -744,7 +781,7 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, /// Add a new global type to the unit. void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); GlobalTypes[FullName] = &Die; @@ -752,7 +789,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); // Insert, allowing the entry to remain as-is if it's already present @@ -779,12 +816,13 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector Ops; - if (Location.isIndirect()) { - Ops.push_back(dwarf::DW_OP_plus); + if (Location.isIndirect() && Location.getOffset()) { + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } DIExpressionCursor Cursor(Ops); const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); @@ -807,12 +845,13 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); const DIExpression *DIExpr = DV.getSingleExpression(); DwarfExpr.addFragmentOffset(DIExpr); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector Ops; - if (Location.isIndirect()) { - Ops.push_back(dwarf::DW_OP_plus); + if (Location.isIndirect() && Location.getOffset()) { + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpressionCursor Cursor(Ops); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 9a64b4b76b06ec4449e7e7e59b5c71affcd20827..3c2fb8d99db75142fa6525b74af5fa7fc402907d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,8 +15,8 @@ #define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DwarfUnit.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/Support/Dwarf.h" namespace llvm { @@ -28,7 +28,7 @@ class DwarfFile; class MCSymbol; class LexicalScope; -class DwarfCompileUnit : public DwarfUnit { +class DwarfCompileUnit final : public DwarfUnit { /// A numeric ID unique among all CUs in the module unsigned UniqueID; @@ -68,13 +68,26 @@ class DwarfCompileUnit : public DwarfUnit { // ranges/locs. const MCSymbol *BaseAddress; + DenseMap AbstractSPDies; + DenseMap> AbstractVariables; + /// \brief Construct a DIE for the given DbgVariable without initializing the /// DbgVariable's DIE reference. DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract); bool isDwoUnit() const override; - bool includeMinimalInlineScopes() const; + DenseMap &getAbstractSPDies() { + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return AbstractSPDies; + return DU->getAbstractSPDies(); + } + + DenseMap> &getAbstractVariables() { + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return AbstractVariables; + return DU->getAbstractVariables(); + } public: DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, @@ -86,6 +99,8 @@ public: return Skeleton; } + bool includeMinimalInlineScopes() const; + void initStmtList(); /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. @@ -189,6 +204,13 @@ public: DIE *constructImportedEntityDIE(const DIImportedEntity *Module); void finishSubprogramDefinition(const DISubprogram *SP); + void finishVariableDefinition(const DbgVariable &Var); + /// Find abstract variable associated with Var. + typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + DbgVariable *getExistingAbstractVariable(InlinedVariable IV, + const DILocalVariable *&Cleansed); + DbgVariable *getExistingAbstractVariable(InlinedVariable IV); + void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); /// Set the skeleton unit associated with this unit. void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5ce11130920885ed49f1e17e33229f336cfbc7f2..d392e372863693bea041448b23fb5e63bfa198f3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/DIE.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -38,7 +39,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" @@ -71,6 +71,10 @@ static cl::opt GenerateARangeSection("generate-arange-section", cl::desc("Generate dwarf aranges"), cl::init(false)); +static cl::opt SplitDwarfCrossCuReferences( + "split-dwarf-cross-cu-references", cl::Hidden, + cl::desc("Enable cross-cu references in DWO files"), cl::init(false)); + namespace { enum DefaultOnOff { Default, Enable, Disable }; } @@ -90,14 +94,6 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden, clEnumVal(Disable, "Disabled")), cl::init(Default)); -static cl::opt -SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output DWARF5 split debug info."), - cl::values(clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled")), - cl::init(Default)); - static cl::opt DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, cl::desc("Generate DWARF pubnames and pubtypes sections"), @@ -253,17 +249,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) HasAppleExtensionAttributes = tuneForLLDB(); - // Handle split DWARF. Off by default for now. - if (SplitDwarf == Default) - HasSplitDwarf = false; - else - HasSplitDwarf = SplitDwarf == Enable; - - // Pubnames/pubtypes on by default for GDB. - if (DwarfPubSections == Default) - HasDwarfPubSections = tuneForGDB(); - else - HasDwarfPubSections = DwarfPubSections == Enable; + // Handle split DWARF. + HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty(); // SCE defaults to linkage names only for abstract subprograms. if (DwarfLinkageNames == DefaultLinkageNames) @@ -373,25 +360,49 @@ template static void forBothCUs(DwarfCompileUnit &CU, Func F) { F(*SkelCU); } -void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { +bool DwarfDebug::shareAcrossDWOCUs() const { + return SplitDwarfCrossCuReferences; +} + +void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, + LexicalScope *Scope) { assert(Scope && Scope->getScopeNode()); assert(Scope->isAbstractScope()); assert(!Scope->getInlinedAt()); auto *SP = cast(Scope->getScopeNode()); - ProcessedSPNodes.insert(SP); - // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - auto &CU = *CUMap.lookup(SP->getUnit()); - forBothCUs(CU, [&](DwarfCompileUnit &CU) { - CU.constructAbstractSubprogramScopeDIE(Scope); - }); + if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining()) + // Avoid building the original CU if it won't be used + SrcCU.constructAbstractSubprogramScopeDIE(Scope); + else { + auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + if (auto *SkelCU = CU.getSkeleton()) { + (shareAcrossDWOCUs() ? CU : SrcCU) + .constructAbstractSubprogramScopeDIE(Scope); + if (CU.getCUNode()->getSplitDebugInlining()) + SkelCU->constructAbstractSubprogramScopeDIE(Scope); + } else + CU.constructAbstractSubprogramScopeDIE(Scope); + } } -void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { - if (!GenerateGnuPubSections) +bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const { + // Opting in to GNU Pubnames/types overrides the default to ensure these are + // generated for things like Gold's gdb_index generation. + if (GenerateGnuPubSections) + return true; + + if (DwarfPubSections == Default) + return tuneForGDB() && !includeMinimalInlineScopes; + + return DwarfPubSections == Enable; +} + +void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const { + if (!hasDwarfPubSections(U.includeMinimalInlineScopes())) return; U.addFlag(D, dwarf::DW_AT_GNU_pubnames); @@ -400,7 +411,9 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & -DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { +DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { + if (auto *CU = CUMap.lookup(DIUnit)) + return *CU; StringRef FN = DIUnit->getFilename(); CompilationDir = DIUnit->getDirectory(); @@ -412,7 +425,7 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { if (useSplitDwarf()) { NewCU.setSkeleton(constructSkeletonCU(NewCU)); NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit->getSplitDebugFilename()); + Asm->TM.Options.MCOptions.SplitDwarfFile); } // LTO with assembly output shares a single line table amongst multiple CUs. @@ -533,7 +546,12 @@ void DwarfDebug::beginModule() { } for (DICompileUnit *CUNode : M->debug_compile_units()) { - DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); + if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() && + CUNode->getGlobalVariables().empty() && + CUNode->getImportedEntities().empty() && CUNode->getMacros().empty()) + continue; + + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) CU.addImportedEntity(IE); @@ -575,22 +593,17 @@ void DwarfDebug::finishVariableDefinitions() { // DIE::getUnit isn't simple - it walks parent pointers, etc. DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie()); assert(Unit); - DbgVariable *AbsVar = getExistingAbstractVariable( - InlinedVariable(Var->getVariable(), Var->getInlinedAt())); - if (AbsVar && AbsVar->getDIE()) { - Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, - *AbsVar->getDIE()); - } else - Unit->applyVariableAttributes(*Var, *VariableDie); + Unit->finishVariableDefinition(*Var); } } void DwarfDebug::finishSubprogramDefinitions() { - for (const DISubprogram *SP : ProcessedSPNodes) - if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug) - forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) { - CU.finishSubprogramDefinition(SP); - }); + for (const DISubprogram *SP : ProcessedSPNodes) { + assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug); + forBothCUs( + getOrCreateDwarfCompileUnit(SP->getUnit()), + [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); }); + } } void DwarfDebug::finalizeModuleInfo() { @@ -600,6 +613,13 @@ void DwarfDebug::finalizeModuleInfo() { finishVariableDefinitions(); + // Include the DWO file name in the hash if there's more than one CU. + // This handles ThinLTO's situation where imported CUs may very easily be + // duplicate with the same CU partially imported into another ThinLTO unit. + StringRef DWOName; + if (CUMap.size() > 1) + DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile; + // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &P : CUMap) { @@ -614,7 +634,8 @@ void DwarfDebug::finalizeModuleInfo() { auto *SkCU = TheCU.getSkeleton(); if (useSplitDwarf()) { // Emit a unique identifier for this CU. - uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie()); + uint64_t ID = + DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, ID); SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, @@ -723,64 +744,40 @@ void DwarfDebug::endModule() { } // Emit the pubnames and pubtypes sections if requested. - if (HasDwarfPubSections) { + // The condition is optimistically correct - any CU not using GMLT (& + // implicit/default pubnames state) might still have pubnames. + if (hasDwarfPubSections(/* gmlt */ false)) { emitDebugPubNames(GenerateGnuPubSections); emitDebugPubTypes(GenerateGnuPubSections); } // clean up. - AbstractVariables.clear(); -} - -// Find abstract variable, if any, associated with Var. -DbgVariable * -DwarfDebug::getExistingAbstractVariable(InlinedVariable IV, - const DILocalVariable *&Cleansed) { - // More then one inlined variable corresponds to one abstract variable. - Cleansed = IV.first; - auto I = AbstractVariables.find(Cleansed); - if (I != AbstractVariables.end()) - return I->second.get(); - return nullptr; -} - -DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) { - const DILocalVariable *Cleansed; - return getExistingAbstractVariable(IV, Cleansed); + // FIXME: AbstractVariables.clear(); } -void DwarfDebug::createAbstractVariable(const DILocalVariable *Var, - LexicalScope *Scope) { - assert(Scope && Scope->isAbstractScope()); - auto AbsDbgVariable = make_unique(Var, /* IA */ nullptr); - InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); - AbstractVariables[Var] = std::move(AbsDbgVariable); -} - -void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV, +void DwarfDebug::ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable IV, const MDNode *ScopeNode) { const DILocalVariable *Cleansed = nullptr; - if (getExistingAbstractVariable(IV, Cleansed)) + if (CU.getExistingAbstractVariable(IV, Cleansed)) return; - createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( + CU.createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( cast(ScopeNode))); } -void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped( +void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable IV, const MDNode *ScopeNode) { const DILocalVariable *Cleansed = nullptr; - if (getExistingAbstractVariable(IV, Cleansed)) + if (CU.getExistingAbstractVariable(IV, Cleansed)) return; if (LexicalScope *Scope = LScopes.findAbstractScope(cast_or_null(ScopeNode))) - createAbstractVariable(Cleansed, Scope); + CU.createAbstractVariable(Cleansed, Scope); } - // Collect variable information from side table maintained by MF. void DwarfDebug::collectVariableInfoFromMFTable( - DenseSet &Processed) { + DwarfCompileUnit &TheCU, DenseSet &Processed) { for (const auto &VI : Asm->MF->getVariableDbgInfo()) { if (!VI.Var) continue; @@ -795,7 +792,7 @@ void DwarfDebug::collectVariableInfoFromMFTable( if (!Scope) continue; - ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode()); + ensureAbstractVariableIsCreatedIfScoped(TheCU, Var, Scope->getScopeNode()); auto RegVar = make_unique(Var.first, Var.second); RegVar->initializeMMI(VI.Expr, VI.Slot); if (InfoHolder.addScopeVariable(Scope, RegVar.get())) @@ -966,24 +963,69 @@ DwarfDebug::buildLocationList(SmallVectorImpl &DebugLoc, } } -DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope, +DbgVariable *DwarfDebug::createConcreteVariable(DwarfCompileUnit &TheCU, + LexicalScope &Scope, InlinedVariable IV) { - ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode()); + ensureAbstractVariableIsCreatedIfScoped(TheCU, IV, Scope.getScopeNode()); ConcreteVariables.push_back(make_unique(IV.first, IV.second)); InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get()); return ConcreteVariables.back().get(); } -// Determine whether this DBG_VALUE is valid at the beginning of the function. -static bool validAtEntry(const MachineInstr *MInsn) { - auto MBB = MInsn->getParent(); - // Is it in the entry basic block? - if (!MBB->pred_empty()) +/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its +/// enclosing lexical scope. The check ensures there are no other instructions +/// in the same lexical scope preceding the DBG_VALUE and that its range is +/// either open or otherwise rolls off the end of the scope. +static bool validThroughout(LexicalScopes &LScopes, + const MachineInstr *DbgValue, + const MachineInstr *RangeEnd) { + assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); + auto MBB = DbgValue->getParent(); + auto DL = DbgValue->getDebugLoc(); + auto *LScope = LScopes.findLexicalScope(DL); + // Scope doesn't exist; this is a dead DBG_VALUE. + if (!LScope) + return false; + auto &LSRange = LScope->getRanges(); + if (LSRange.size() == 0) + return false; + + // Determine if the DBG_VALUE is valid at the beginning of its lexical block. + const MachineInstr *LScopeBegin = LSRange.front().first; + // Early exit if the lexical scope begins outside of the current block. + if (LScopeBegin->getParent() != MBB) return false; - for (MachineBasicBlock::const_reverse_iterator I(MInsn); I != MBB->rend(); ++I) - if (!(I->isDebugValue() || I->getFlag(MachineInstr::FrameSetup))) + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); + for (++Pred; Pred != MBB->rend(); ++Pred) { + if (Pred->getFlag(MachineInstr::FrameSetup)) + break; + auto PredDL = Pred->getDebugLoc(); + if (!PredDL || Pred->isDebugValue()) + continue; + // Check whether the instruction preceding the DBG_VALUE is in the same + // (sub)scope as the DBG_VALUE. + if (DL->getScope() == PredDL->getScope() || + LScope->dominates(LScopes.findLexicalScope(PredDL))) return false; - return true; + } + + // If the range of the DBG_VALUE is open-ended, report success. + if (!RangeEnd) + return true; + + // Fail if there are instructions belonging to our scope in another block. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (LScopeEnd->getParent() != MBB) + return false; + + // Single, constant DBG_VALUEs in the prologue are promoted to be live + // throughout the function. This is a hack, presumably for DWARF v2 and not + // necessarily correct. It would be much better to use a dbg.declare instead + // if we know the constant is live throughout the scope. + if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) + return true; + + return false; } // Find variables for each lexical scope. @@ -991,7 +1033,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, DenseSet &Processed) { // Grab the variable info that was squirreled away in the MMI side-table. - collectVariableInfoFromMFTable(Processed); + collectVariableInfoFromMFTable(TheCU, Processed); for (const auto &I : DbgValues) { InlinedVariable IV = I.first; @@ -1013,16 +1055,14 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, continue; Processed.insert(IV); - DbgVariable *RegVar = createConcreteVariable(*Scope, IV); + DbgVariable *RegVar = createConcreteVariable(TheCU, *Scope, IV); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - // Check if there is a single DBG_VALUE, valid throughout the function. - // A single constant is also considered valid for the entire function. + // Check if there is a single DBG_VALUE, valid throughout the var's scope. if (Ranges.size() == 1 && - (MInsn->getOperand(0).isImm() || - (validAtEntry(MInsn) && Ranges.front().second == nullptr))) { + validThroughout(LScopes, MInsn, Ranges.front().second)) { RegVar->initializeDbgValue(MInsn); continue; } @@ -1049,7 +1089,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, for (const DILocalVariable *DV : SP->getVariables()) { if (Processed.insert(InlinedVariable(DV, nullptr)).second) if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) - createConcreteVariable(*Scope, InlinedVariable(DV, nullptr)); + createConcreteVariable(TheCU, *Scope, InlinedVariable(DV, nullptr)); } } @@ -1058,8 +1098,12 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); + const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram(); + if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) + return; + // Check if source location changes, but ignore DBG_VALUE and CFI locations. - if (MI->isDebugValue() || MI->isCFIInstruction()) + if (MI->isMetaInstruction()) return; const DebugLoc &DL = MI->getDebugLoc(); // When we emit a line-0 record, we don't update PrevInstLoc; so look at @@ -1141,7 +1185,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { // the beginning of the function body. for (const auto &MBB : *MF) for (const auto &MI : MBB) - if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) return MI.getDebugLoc(); return DebugLoc(); @@ -1152,40 +1196,28 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { CurFn = MF; - if (LScopes.empty()) + auto *SP = MF->getFunction()->getSubprogram(); + assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode()); + if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - // FnScope->getScopeNode() and DI->second should represent the same function, - // though they may not be the same MDNode due to inline functions merged in - // LTO where the debug info metadata still differs (either due to distinct - // written differences - two versions of a linkonce_odr function - // written/copied into two separate files, or some sub-optimal metadata that - // isn't structurally identical (see: file path/name info from clang, which - // includes the directory of the cpp file being built, even when the file name - // is absolute (such as an <> lookup header))) - auto *SP = cast(FnScope->getScopeNode()); - DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit()); - if (!TheCU) { - assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug && - "DICompileUnit missing from llvm.dbg.cu?"); - return; - } if (Asm->OutStreamer->hasRawTextSupport()) // Use a single line table if we are generating assembly. Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); else - Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID()); // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); - if (DILocation *L = PrologEndLoc) { + if (PrologEndLoc) { // We'd like to list the prologue as "not statements" but GDB behaves // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - auto *SP = L->getInlinedAtScope()->getSubprogram(); + auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram(); recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); } } @@ -1240,12 +1272,12 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { for (const DILocalVariable *DV : SP->getVariables()) { if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) continue; - ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr), + ensureAbstractVariableIsCreated(TheCU, InlinedVariable(DV, nullptr), DV->getScope()); assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes && "ensureAbstractVariableIsCreated inserted abstract scopes"); } - constructAbstractSubprogramScopeDIE(AScope); + constructAbstractSubprogramScopeDIE(TheCU, AScope); } ProcessedSPNodes.insert(SP); @@ -1425,7 +1457,7 @@ void DwarfDebug::emitDebugPubSection( const auto &Globals = (TheU->*Accessor)(); - if (Globals.empty()) + if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes())) continue; if (auto *Skeleton = TheU->getSkeleton()) @@ -1517,13 +1549,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.addUnsignedConstant(Value.getInt()); } else if (Value.isLocation()) { MachineLocation Location = Value.getLoc(); - + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector Ops; - // FIXME: Should this condition be Location.isIndirect() instead? - if (Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + if (Location.isIndirect() && Location.getOffset()) { + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIExpressionCursor Cursor(Ops); @@ -1575,10 +1606,13 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { // Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { + if (DebugLocs.getLists().empty()) + return; + // Start the dwarf loc section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); - unsigned char Size = Asm->getDataLayout().getPointerSize(); + unsigned char Size = Asm->MAI->getCodePointerSize(); for (const auto &List : DebugLocs.getLists()) { Asm->OutStreamer->EmitLabel(List.Label); const DwarfCompileUnit *CU = List.CU; @@ -1708,7 +1742,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfARangesSection()); - unsigned PtrSize = Asm->getDataLayout().getPointerSize(); + unsigned PtrSize = Asm->MAI->getCodePointerSize(); // Build a list of CUs used. std::vector CUs; @@ -1786,12 +1820,15 @@ void DwarfDebug::emitDebugARanges() { /// Emit address ranges into a debug ranges section. void DwarfDebug::emitDebugRanges() { + if (CUMap.empty()) + return; + // Start the dwarf ranges section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); // Size for our labels. - unsigned char Size = Asm->getDataLayout().getPointerSize(); + unsigned char Size = Asm->MAI->getCodePointerSize(); // Grab the specific ranges for the compile units in the module. for (const auto &I : CUMap) { @@ -1865,6 +1902,9 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { /// Emit macros into a debug macinfo section. void DwarfDebug::emitDebugMacinfo() { + if (CUMap.empty()) + return; + // Start the dwarf macinfo section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfMacinfoSection()); @@ -1886,7 +1926,7 @@ void DwarfDebug::emitDebugMacinfo() { void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, std::unique_ptr NewU) { NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, - U.getCUNode()->getSplitDebugFilename()); + Asm->TM.Options.MCOptions.SplitDwarfFile); if (!CompilationDir.empty()) NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 8a96e7867b6e3da02a28ad9c30c56482a865c422..5dfe06c64ec22b6134e7498012f16bc6c999dd2d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -134,6 +134,13 @@ public: assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); + if (FrameIndexExprs.size()) { + auto *Expr = FrameIndexExprs.back().Expr; + // Get rid of duplicate non-fragment entries. More than one non-fragment + // dbg.declare makes no sense so ignore all but the first. + if (!Expr || !Expr->isFragment()) + return; + } FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); assert(all_of(FrameIndexExprs, [](FrameIndexExpr &FIE) { @@ -210,7 +217,6 @@ class DwarfDebug : public DebugHandlerBase { DenseMap SymSize; /// Collection of abstract variables. - DenseMap> AbstractVariables; SmallVector, 64> ConcreteVariables; /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists @@ -247,9 +253,6 @@ class DwarfDebug : public DebugHandlerBase { std::pair, const DICompositeType *>, 1> TypeUnitsUnderConstruction; - /// Whether to emit the pubnames/pubtypes sections. - bool HasDwarfPubSections; - /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; @@ -313,20 +316,16 @@ class DwarfDebug : public DebugHandlerBase { typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; - /// Find abstract variable associated with Var. - DbgVariable *getExistingAbstractVariable(InlinedVariable IV, - const DILocalVariable *&Cleansed); - DbgVariable *getExistingAbstractVariable(InlinedVariable IV); - void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); - void ensureAbstractVariableIsCreated(InlinedVariable Var, + void ensureAbstractVariableIsCreated(DwarfCompileUnit &CU, InlinedVariable Var, const MDNode *Scope); - void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var, + void ensureAbstractVariableIsCreatedIfScoped(DwarfCompileUnit &CU, InlinedVariable Var, const MDNode *Scope); - DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV); + DbgVariable *createConcreteVariable(DwarfCompileUnit &TheCU, + LexicalScope &Scope, InlinedVariable IV); /// Construct a DIE for this abstract scope. - void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); void finishVariableDefinitions(); @@ -420,11 +419,11 @@ class DwarfDebug : public DebugHandlerBase { /// Flags to let the linker know we have emitted new style pubnames. Only /// emit it here if we don't have a skeleton CU for split dwarf. - void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; + void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const; /// Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit); + DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit); /// Construct imported_module or imported_declaration DIE. void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, @@ -446,7 +445,8 @@ class DwarfDebug : public DebugHandlerBase { const DbgValueHistoryMap::InstrRanges &Ranges); /// Collect variable information from the side table maintained by MF. - void collectVariableInfoFromMFTable(DenseSet &P); + void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, + DenseSet &P); protected: /// Gather pre-function debug information. @@ -518,6 +518,8 @@ public: /// split dwarf proposal support. bool useSplitDwarf() const { return HasSplitDwarf; } + bool shareAcrossDWOCUs() const; + /// Returns the Dwarf Version. uint16_t getDwarfVersion() const; @@ -558,6 +560,8 @@ public: /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); + + bool hasDwarfPubSections(bool includeMinimalInlineScopes) const; }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index debe88f3b1ee168f3316138515f4b96303ea9f81..fe38ee805682c5f147cd57da88dce940b03efc4f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -14,8 +14,8 @@ #include "DwarfExpression.h" #include "DwarfDebug.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -23,9 +23,12 @@ using namespace llvm; void DwarfExpression::addReg(int DwarfReg, const char *Comment) { - assert(DwarfReg >= 0 && "invalid negative dwarf register number"); - if (DwarfReg < 32) { - emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + assert((LocationKind == Unknown || LocationKind == Register) && + "location description already locked down"); + LocationKind = Register; + if (DwarfReg < 32) { + emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); } else { emitOp(dwarf::DW_OP_regx, Comment); emitUnsigned(DwarfReg); @@ -34,6 +37,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) { void DwarfExpression::addBReg(int DwarfReg, int Offset) { assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + assert(LocationKind != Register && "location description already locked down"); if (DwarfReg < 32) { emitOp(dwarf::DW_OP_breg0 + DwarfReg); } else { @@ -113,8 +117,9 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, // Otherwise, attempt to find a covering set of sub-register numbers. // For example, Q0 on ARM is a composition of D0+D1. unsigned CurPos = 0; - // The size of the register in bits, assuming 8 bits per byte. - unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; + // The size of the register in bits. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg); + unsigned RegSize = TRI.getRegSizeInBits(*RC); // Keep track of the bits in the register we already emitted, so we // can avoid emitting redundant aliasing subregs. SmallBitVector Coverage(RegSize, false); @@ -156,18 +161,23 @@ void DwarfExpression::addStackValue() { } void DwarfExpression::addSignedConstant(int64_t Value) { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; emitOp(dwarf::DW_OP_consts); emitSigned(Value); - addStackValue(); } void DwarfExpression::addUnsignedConstant(uint64_t Value) { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; emitOp(dwarf::DW_OP_constu); emitUnsigned(Value); - addStackValue(); } void DwarfExpression::addUnsignedConstant(const APInt &Value) { + assert(LocationKind == Implicit || LocationKind == Unknown); + LocationKind = Implicit; + unsigned Size = Value.getBitWidth(); const uint64_t *Data = Value.getRawData(); @@ -178,7 +188,8 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) { addUnsignedConstant(*Data++); if (Offset == 0 && Size <= 64) break; - addOpPiece(std::min(Size-Offset, 64u), Offset); + addStackValue(); + addOpPiece(std::min(Size - Offset, 64u), Offset); Offset += 64; } } @@ -188,8 +199,10 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, unsigned MachineReg, unsigned FragmentOffsetInBits) { auto Fragment = ExprCursor.getFragmentInfo(); - if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) + if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) { + LocationKind = Unknown; return false; + } bool HasComplexExpression = false; auto Op = ExprCursor.peek(); @@ -202,11 +215,12 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, // operation to multiple DW_OP_pieces. if (HasComplexExpression && DwarfRegs.size() > 1) { DwarfRegs.clear(); + LocationKind = Unknown; return false; } // Handle simple register locations. - if (!HasComplexExpression) { + if (LocationKind != Memory && !HasComplexExpression) { for (auto &Reg : DwarfRegs) { if (Reg.DwarfRegNo >= 0) addReg(Reg.DwarfRegNo, Reg.Comment); @@ -216,62 +230,76 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, return true; } + // Don't emit locations that cannot be expressed without DW_OP_stack_value. + if (DwarfVersion < 4) + if (std::any_of(ExprCursor.begin(), ExprCursor.end(), + [](DIExpression::ExprOperand Op) -> bool { + return Op.getOp() == dwarf::DW_OP_stack_value; + })) { + DwarfRegs.clear(); + LocationKind = Unknown; + return false; + } + assert(DwarfRegs.size() == 1); auto Reg = DwarfRegs[0]; - bool FBReg = isFrameRegister(TRI, MachineReg); + bool FBReg = isFrameRegister(TRI, MachineReg); + int SignedOffset = 0; assert(Reg.Size == 0 && "subregister has same size as superregister"); // Pattern-match combinations for which more efficient representations exist. - switch (Op->getOp()) { - default: { - if (FBReg) - addFBReg(0); - else - addReg(Reg.DwarfRegNo, 0); - break; + // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. + if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { + SignedOffset = Op->getArg(0); + ExprCursor.take(); } - case dwarf::DW_OP_plus: - case dwarf::DW_OP_minus: { - // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset]. - // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset]. + + // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] + // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] + // If Reg is a subregister we need to mask it out before subtracting. + if (Op && Op->getOp() == dwarf::DW_OP_constu) { auto N = ExprCursor.peekNext(); - if (N && N->getOp() == dwarf::DW_OP_deref) { + if (N && (N->getOp() == dwarf::DW_OP_plus || + (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { int Offset = Op->getArg(0); - int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset; - if (FBReg) - addFBReg(SignedOffset); - else - addBReg(Reg.DwarfRegNo, SignedOffset); - + SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset; ExprCursor.consume(2); - break; } - addReg(Reg.DwarfRegNo, 0); - break; - } - case dwarf::DW_OP_deref: - // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. - if (FBReg) - addFBReg(0); - else - addBReg(Reg.DwarfRegNo, 0); - ExprCursor.take(); - break; } + + if (FBReg) + addFBReg(SignedOffset); + else + addBReg(Reg.DwarfRegNo, SignedOffset); DwarfRegs.clear(); return true; } -void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, - unsigned FragmentOffsetInBits) { +/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". +static bool isMemoryLocation(DIExpressionCursor ExprCursor) { while (ExprCursor) { auto Op = ExprCursor.take(); + switch (Op->getOp()) { + case dwarf::DW_OP_deref: + case dwarf::DW_OP_LLVM_fragment: + break; + default: + return false; + } + } + return true; +} - // If we need to mask out a subregister, do it now, unless the next - // operation would emit an OpPiece anyway. - if (SubRegisterSizeInBits && Op->getOp() != dwarf::DW_OP_LLVM_fragment) - maskSubRegister(); +void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, + unsigned FragmentOffsetInBits) { + // If we need to mask out a subregister, do it now, unless the next + // operation would emit an OpPiece anyway. + auto N = ExprCursor.peek(); + if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment)) + maskSubRegister(); + while (ExprCursor) { + auto Op = ExprCursor.take(); switch (Op->getOp()) { case dwarf::DW_OP_LLVM_fragment: { unsigned SizeInBits = Op->getArg(1); @@ -281,50 +309,70 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, // location. assert(OffsetInBits >= FragmentOffset && "fragment offset not added?"); - // If \a addMachineReg already emitted DW_OP_piece operations to represent + // If addMachineReg already emitted DW_OP_piece operations to represent // a super-register by splicing together sub-registers, subtract the size // of the pieces that was already emitted. SizeInBits -= OffsetInBits - FragmentOffset; - // If \a addMachineReg requested a DW_OP_bit_piece to stencil out a + // If addMachineReg requested a DW_OP_bit_piece to stencil out a // sub-register that is smaller than the current fragment's size, use it. if (SubRegisterSizeInBits) SizeInBits = std::min(SizeInBits, SubRegisterSizeInBits); - + + // Emit a DW_OP_stack_value for implicit location descriptions. + if (LocationKind == Implicit) + addStackValue(); + + // Emit the DW_OP_piece. addOpPiece(SizeInBits, SubRegisterOffsetInBits); setSubRegisterPiece(0, 0); - break; + // Reset the location description kind. + LocationKind = Unknown; + return; } - case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus_uconst: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_plus_uconst); emitUnsigned(Op->getArg(0)); break; + case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: - // There is no OP_minus_uconst. - emitOp(dwarf::DW_OP_constu); - emitUnsigned(Op->getArg(0)); - emitOp(dwarf::DW_OP_minus); + emitOp(Op->getOp()); break; - case dwarf::DW_OP_deref: - emitOp(dwarf::DW_OP_deref); + case dwarf::DW_OP_deref: { + assert(LocationKind != Register); + if (LocationKind != Memory && isMemoryLocation(ExprCursor)) + // Turning this into a memory location description makes the deref + // implicit. + LocationKind = Memory; + else + emitOp(dwarf::DW_OP_deref); break; + } case dwarf::DW_OP_constu: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_constu); emitUnsigned(Op->getArg(0)); break; case dwarf::DW_OP_stack_value: - addStackValue(); + LocationKind = Implicit; break; case dwarf::DW_OP_swap: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_swap); break; case dwarf::DW_OP_xderef: + assert(LocationKind != Register); emitOp(dwarf::DW_OP_xderef); break; default: llvm_unreachable("unhandled opcode found in expression"); } } + + if (LocationKind == Implicit) + // Turn this into an implicit location description. + addStackValue(); } /// add masking operations to stencil out a subregister. diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index e8dc211eb3c22778739ab0f7157804eb708d457b..728f8ad9225bc20e0c2b9457c9c3a36da42c4d3a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -42,6 +42,9 @@ public: DIExpressionCursor(ArrayRef Expr) : Start(Expr.begin()), End(Expr.end()) {} + DIExpressionCursor(const DIExpressionCursor &C) + : Start(C.Start), End(C.End) {} + /// Consume one operation. Optional take() { if (Start == End) @@ -72,6 +75,8 @@ public: } /// Determine whether there are any operations left in this expression. operator bool() const { return Start != End; } + DIExpression::expr_op_iterator begin() const { return Start; } + DIExpression::expr_op_iterator end() const { return End; } /// Retrieve the fragment information, if any. Optional getFragmentInfo() const { @@ -102,6 +107,9 @@ protected: unsigned SubRegisterSizeInBits = 0; unsigned SubRegisterOffsetInBits = 0; + /// The kind of location description being produced. + enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown; + /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed /// to represent a subregister. void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) { @@ -122,7 +130,8 @@ protected: /// current function. virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0; - /// Emit a DW_OP_reg operation. + /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF + /// register location description. void addReg(int DwarfReg, const char *Comment = nullptr); /// Emit a DW_OP_breg operation. void addBReg(int DwarfReg, int Offset); @@ -185,11 +194,18 @@ public: /// Emit an unsigned constant. void addUnsignedConstant(const APInt &Value); + /// Lock this down to become a memory location description. + void setMemoryLocationKind() { + assert(LocationKind == Unknown); + LocationKind = Memory; + } + /// Emit a machine register location. As an optimization this may also consume /// the prefix of a DwarfExpression if a more efficient representation for /// combining the register location and the first operation exists. /// - /// \param FragmentOffsetInBits If this is one fragment out of a fragmented + /// \param FragmentOffsetInBits If this is one fragment out of a + /// fragmented /// location, this is the offset of the /// fragment inside the entire variable. /// \return false if no DWARF register exists diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h index d4d2ed27727485acd9640eaa0e22aae9e02d868c..54924e9806ed1565059cce430040d010e7cba90d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -53,6 +53,7 @@ class DwarfFile { // Collection of abstract subprogram DIEs. DenseMap AbstractSPDies; + DenseMap> AbstractVariables; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can /// be shared across CUs, that is why we keep the map here instead @@ -105,6 +106,9 @@ public: DenseMap &getAbstractSPDies() { return AbstractSPDies; } + DenseMap> &getAbstractVariables() { + return AbstractVariables; + } void insertDIE(const MDNode *TypeMD, DIE *Die) { DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index bad5b09553cdc123fc48b9952482bbf5ad3b09a7..708f5f7536ff1a1dcfc80cfd16e247fe37f55a59 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -18,18 +18,19 @@ #include "DwarfExpression.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Metadata.h" -#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -73,8 +74,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node), - Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) { + : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag), + CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) { } DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, @@ -172,7 +173,7 @@ int64_t DwarfUnit::getDefaultLowerBound() const { } /// Check whether the DIE for this MDNode can be shared across CUs. -static bool isShareableAcrossCUs(const DINode *D) { +bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { // When the MDNode can be part of the type system, the DIE can be shared // across CUs. // Combining type units and cross-CU DIE sharing is lower value (since @@ -180,6 +181,8 @@ static bool isShareableAcrossCUs(const DINode *D) { // level already) but may be implementable for some value in projects // building multiple independent libraries with LTO and then linking those // together. + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return false; return (isa(D) || (isa(D) && !cast(D)->isDefinition())) && !GenerateDwarfTypeUnits; @@ -374,10 +377,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); } -void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) { - addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory()); -} - /* Byref variables, in Blocks, are declared by the programmer as "SomeType VarName;", but the compiler creates a __Block_byref_x_VarName struct, and gives the variable VarName either the struct, or a pointer to the struct, as @@ -471,12 +470,13 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // variable's location. DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); SmallVector Ops; - if (Location.isIndirect()) { - Ops.push_back(dwarf::DW_OP_plus); + if (Location.isIndirect() && Location.getOffset()) { + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); - Ops.push_back(dwarf::DW_OP_deref); } // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). @@ -487,7 +487,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(forwardingFieldOffset); } @@ -499,7 +499,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(varFieldOffset); } @@ -647,7 +647,7 @@ void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { addString(Die, DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name : dwarf::DW_AT_MIPS_linkage_name, - GlobalValue::getRealLinkageName(LinkageName)); + GlobalValue::dropLLVMManglingEscape(LinkageName)); } void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { @@ -660,6 +660,14 @@ void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { } } +/// Add thrown types. +void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) { + for (const auto *Ty : ThrownTypes) { + DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die); + addType(TT, cast(Ty)); + } +} + DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { if (!Context || isa(Context)) return &getUnitDie(); @@ -1075,7 +1083,6 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { Name = "(anonymous namespace)"; DD->addAccelNamespace(Name, NDie); addGlobalName(Name, NDie, NS->getScope()); - addSourceLine(NDie, NS); if (NS->getExportSymbols()) addFlag(NDie, dwarf::DW_AT_export_symbols); return &NDie; @@ -1247,6 +1254,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, constructSubprogramArguments(SPDie, Args); } + addThrownTypes(SPDie, SP->getThrownTypes()); + if (SP->isArtificial()) addFlag(SPDie, dwarf::DW_AT_artificial); @@ -1546,7 +1555,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { Asm->OutStreamer->AddComment("DWARF Unit Type"); Asm->EmitInt8(UT); Asm->OutStreamer->AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + Asm->EmitInt8(Asm->MAI->getCodePointerSize()); } // We share one abbreviations table across all units so it's always at the @@ -1562,7 +1571,7 @@ void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { if (Version <= 4) { Asm->OutStreamer->AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + Asm->EmitInt8(Asm->MAI->getCodePointerSize()); } } @@ -1593,3 +1602,11 @@ void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { getCU().addGlobalTypeUnitType(Ty, Context); } + +const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const { + if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return nullptr; + if (isDwoUnit()) + return nullptr; + return getSection()->getBeginSymbol(); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index d626ef920f956a9440a12bba0a2fc31487b83f51..7acad2cbd89fc1ddcc268529dc6ccb822175a364 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -65,7 +65,7 @@ public: //===----------------------------------------------------------------------===// /// This dwarf writer support class manages information associated with a /// source file. - class DwarfUnit : public DIEUnit { +class DwarfUnit : public DIEUnit { protected: /// MDNode for the compile unit. const DICompileUnit *CUNode; @@ -103,9 +103,10 @@ protected: bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); -public: - virtual ~DwarfUnit(); + bool shareAcrossDWOCUs() const; + bool isShareableAcrossCUs(const DINode *D) const; +public: // Accessors. AsmPrinter* getAsmPrinter() const { return Asm; } uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } @@ -212,7 +213,6 @@ public: void addSourceLine(DIE &Die, const DIGlobalVariable *G); void addSourceLine(DIE &Die, const DISubprogram *SP); void addSourceLine(DIE &Die, const DIType *Ty); - void addSourceLine(DIE &Die, const DINamespace *NS); void addSourceLine(DIE &Die, const DIObjCProperty *Ty); /// Add constant value entry in variable DIE. @@ -232,6 +232,9 @@ public: /// Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DINodeArray TParams); + /// Add thrown types. + void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); + // FIXME: Should be reformulated in terms of addComplexAddress. /// Start with the address based on the location provided, and generate the /// DWARF information necessary to find the actual Block variable (navigating @@ -289,6 +292,8 @@ public: void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); protected: + ~DwarfUnit(); + /// Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); @@ -335,9 +340,10 @@ private: void setIndexTyDie(DIE *D) { IndexTyDie = D; } virtual bool isDwoUnit() const = 0; + const MCSymbol *getCrossSectionRelativeBaseAddress() const override; }; -class DwarfTypeUnit : public DwarfUnit { +class DwarfTypeUnit final : public DwarfUnit { uint64_t TypeSignature; const DIE *Ty; DwarfCompileUnit &CU; diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 0a4a7a06cb2e79f1d89de39bdae564e30c3b6404..e14d5be1177a6e915a2f344691a642ac4e43c98c 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl &CallSites, // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) { + if (SawPotentiallyThrowing && !IsSJLJ) { CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp index 342efc3611c784ff0af28a40b5c5a0f26e604438..c5795559fb7d62e7d0e74687e33b27a347b51de1 100644 --- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" @@ -25,8 +26,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt index 2bb66d12f376166ce0a9543f46d4de0d046b71a5..bde8148d259bc1e6a8e76fd85d438e6a9c81601a 100644 --- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt +++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = AsmPrinter parent = Libraries -required_libraries = Analysis CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target +required_libraries = Analysis BinaryFormat CodeGen Core DebugInfoCodeView DebugInfoMSF MC MCParser Support Target diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 8baee4db772e8d6d1736f683c5185c8a9813efc2..035f1a0063aae8772a31347faed560b8347df211 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -1,4 +1,4 @@ -//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===// // // The LLVM Compiler Infrastructure // @@ -11,23 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCs.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include +#include +#include +#include + using namespace llvm; namespace { @@ -37,7 +41,8 @@ public: void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; }; -} + +} // end anonymous namespace static GCMetadataPrinterRegistry::Add Y("ocaml", "ocaml 3.10-compatible collector"); @@ -50,7 +55,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { std::string SymName; SymName += "caml"; size_t Letter = SymName.size(); - SymName.append(MId.begin(), find(MId, '.')); + SymName.append(MId.begin(), llvm::find(MId, '.')); SymName += "__"; SymName += Id; diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp index 704f0ac2f191951a99b94b4fb5de6119de7acebe..5d485f213573d1fe525803731c92f5719c92cf23 100644 --- a/lib/CodeGen/AsmPrinter/WinException.cpp +++ b/lib/CodeGen/AsmPrinter/WinException.cpp @@ -14,6 +14,8 @@ #include "WinException.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,8 +31,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -101,7 +101,7 @@ void WinException::beginFunction(const MachineFunction *MF) { // functions may still refer to it. const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); StringRef FLinkageName = - GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); } shouldEmitLSDA = hasEHFunclets; @@ -174,7 +174,7 @@ static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, // their funclet entry block's number. const MachineFunction *MF = MBB->getParent(); const Function *F = MF->getFunction(); - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); MCContext &Ctx = MF->getContext(); StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + @@ -252,7 +252,7 @@ void WinException::endFunclet() { !CurrentFuncletEntry->isCleanupFuncletEntry()) { // If this is a C++ catch funclet (or the parent function), // emit a reference to the LSDA for the parent function. - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( Twine("$cppxdata$", FuncLinkageName)); Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); @@ -536,7 +536,7 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { // Emit a label assignment with the SEH frame offset so we can use it for // llvm.x86.seh.recoverfp. StringRef FLinkageName = - GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + GlobalValue::dropLLVMManglingEscape(MF->getFunction()->getName()); MCSymbol *ParentFrameOffset = Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); const MCExpr *MCOffset = @@ -635,7 +635,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { auto &OS = *Asm->OutStreamer; const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); - StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); SmallVector, 4> IPToStateTable; MCSymbol *FuncInfoXData = nullptr; @@ -942,7 +942,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, void WinException::emitExceptHandlerTable(const MachineFunction *MF) { MCStreamer &OS = *Asm->OutStreamer; const Function *F = MF->getFunction(); - StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName()); + StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F->getName()); bool VerboseAsm = OS.isVerboseAsm(); auto AddComment = [&](const Twine &Comment) { diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 9c19a4fd3c3e0044c56415d35d973a4550af52ae..344136b1f19565cbd022e2658f1f2d56410b0f40 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -35,20 +36,17 @@ using namespace llvm; namespace { class AtomicExpand: public FunctionPass { - const TargetMachine *TM; const TargetLowering *TLI; public: static char ID; // Pass identification, replacement for typeid - explicit AtomicExpand(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr) { + AtomicExpand() : FunctionPass(ID), TLI(nullptr) { initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; private: - bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, - bool IsStore, bool IsLoad); + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); bool tryExpandAtomicLoad(LoadInst *LI); @@ -98,12 +96,10 @@ namespace { char AtomicExpand::ID = 0; char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions", - false, false) +INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) -FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { - return new AtomicExpand(TM); -} +FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } namespace { // Helper functions to retrieve the size of atomic instructions. @@ -173,9 +169,14 @@ bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) { } // end anonymous namespace bool AtomicExpand::runOnFunction(Function &F) { - if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand()) + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) return false; - TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + TLI = TM.getSubtargetImpl(F)->getTargetLowering(); SmallVector AtomicInsts; @@ -224,22 +225,16 @@ bool AtomicExpand::runOnFunction(Function &F) { if (TLI->shouldInsertFencesForAtomic(I)) { auto FenceOrdering = AtomicOrdering::Monotonic; - bool IsStore, IsLoad; if (LI && isAcquireOrStronger(LI->getOrdering())) { FenceOrdering = LI->getOrdering(); LI->setOrdering(AtomicOrdering::Monotonic); - IsStore = false; - IsLoad = true; } else if (SI && isReleaseOrStronger(SI->getOrdering())) { FenceOrdering = SI->getOrdering(); SI->setOrdering(AtomicOrdering::Monotonic); - IsStore = true; - IsLoad = false; } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) || isAcquireOrStronger(RMWI->getOrdering()))) { FenceOrdering = RMWI->getOrdering(); RMWI->setOrdering(AtomicOrdering::Monotonic); - IsStore = IsLoad = true; } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && (isReleaseOrStronger(CASI->getSuccessOrdering()) || isAcquireOrStronger(CASI->getSuccessOrdering()))) { @@ -250,11 +245,10 @@ bool AtomicExpand::runOnFunction(Function &F) { FenceOrdering = CASI->getSuccessOrdering(); CASI->setSuccessOrdering(AtomicOrdering::Monotonic); CASI->setFailureOrdering(AtomicOrdering::Monotonic); - IsStore = IsLoad = true; } if (FenceOrdering != AtomicOrdering::Monotonic) { - MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + MadeChange |= bracketInstWithFences(I, FenceOrdering); } } @@ -320,13 +314,12 @@ bool AtomicExpand::runOnFunction(Function &F) { return MadeChange; } -bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, - bool IsStore, bool IsLoad) { +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) { IRBuilder<> Builder(I); - auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad); + auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order); - auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad); + auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order); // The trailing fence is emitted before the instruction instead of after // because there is no easy way of setting Builder insertion point after // an instruction. So we must erase it from the BB, and insert it back @@ -1048,8 +1041,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) - TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitLeadingFence(Builder, CI, SuccessOrder); Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. @@ -1064,8 +1056,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(ReleasingStoreBB); if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) - TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitLeadingFence(Builder, CI, SuccessOrder); Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); @@ -1094,8 +1085,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // necessary. Builder.SetInsertPoint(SuccessBB); if (ShouldInsertFencesForAtomic) - TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitTrailingFence(Builder, CI, SuccessOrder); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); @@ -1107,8 +1097,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.SetInsertPoint(FailureBB); if (ShouldInsertFencesForAtomic) - TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, - /*IsLoad=*/true); + TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index a67e194356d828c24e1c1207499e2fd26f7a8ce2..be93ff0dad29d2ec2e0cf75cfff7a7096316e576 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,17 +15,15 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include using namespace llvm; -#define DEBUG_TYPE "basictti" - // This flag is used by the template base class for BasicTTIImpl, and here to // provide a definition. cl::opt diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/CodeGen/BranchCoalescing.cpp index efdf300df85063cef2de9d31c9a466b64d896f6e..2c41b597843c985b0e153af3263c1832f645e2f0 100644 --- a/lib/CodeGen/BranchCoalescing.cpp +++ b/lib/CodeGen/BranchCoalescing.cpp @@ -27,7 +27,7 @@ using namespace llvm; -#define DEBUG_TYPE "coal-branch" +#define DEBUG_TYPE "branch-coalescing" static cl::opt EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, @@ -193,11 +193,11 @@ public: char BranchCoalescing::ID = 0; char &llvm::BranchCoalescingID = BranchCoalescing::ID; -INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing", +INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing", +INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 2d01301402f04370a81d87be965d7ec2793711b0..53095497629205bdf245c7ae8436b7636ba1be28 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1,4 +1,4 @@ -//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +//===- BranchFolding.cpp - Fold machine code branch instructions ----------===// // // The LLVM Compiler Infrastructure // @@ -18,33 +18,49 @@ //===----------------------------------------------------------------------===// #include "BranchFolding.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include +#include +#include +#include +#include +#include + using namespace llvm; -#define DEBUG_TYPE "branchfolding" +#define DEBUG_TYPE "branch-folder" STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); @@ -69,10 +85,12 @@ TailMergeSize("tail-merge-size", cl::init(3), cl::Hidden); namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. class BranchFolderPass : public MachineFunctionPass { public: static char ID; + explicit BranchFolderPass(): MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -84,12 +102,13 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char BranchFolderPass::ID = 0; char &llvm::BranchFolderPassID = BranchFolderPass::ID; -INITIALIZE_PASS(BranchFolderPass, "branch-folder", +INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { @@ -153,13 +172,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TriedMerging.clear(); + MachineRegisterInfo &MRI = MF.getRegInfo(); AfterBlockPlacement = AfterPlacement; TII = tii; TRI = tri; MMI = mmi; MLI = mli; + this->MRI = &MRI; - MachineRegisterInfo &MRI = MF.getRegInfo(); UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF); if (!UpdateLiveIns) MRI.invalidateLiveness(); @@ -351,7 +371,7 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, if (UpdateLiveIns) { NewDest->clearLiveIns(); - computeLiveIns(LiveRegs, *TRI, *NewDest); + computeLiveIns(LiveRegs, *MRI, *NewDest); } ++NumTailMerge; @@ -367,7 +387,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = CurMBB.getIterator(); - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -388,7 +408,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); if (UpdateLiveIns) - computeLiveIns(LiveRegs, *TRI, *NewMBB); + computeLiveIns(LiveRegs, *MRI, *NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); @@ -505,7 +525,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB, MachineBasicBlock::iterator &I) { I = MBB->end(); unsigned NumTerms = 0; - for (;;) { + while (true) { if (I == MBB->begin()) { I = MBB->end(); break; @@ -1600,7 +1620,6 @@ ReoptimizeBlock: // block doesn't fall through into some other block, see if we can find a // place to move this block where a fall-through will happen. if (!PrevBB.canFallThrough()) { - // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. bool CurFallsThru = MBB->canFallThrough(); @@ -1850,8 +1869,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { return false; bool HasDups = false; - SmallVector LocalDefs; - SmallSet LocalDefsSet; + SmallVector LocalDefs, LocalKills; + SmallSet ActiveDefsSet, AllDefsSet; MachineBasicBlock::iterator TIB = TBB->begin(); MachineBasicBlock::iterator FIB = FBB->begin(); MachineBasicBlock::iterator TIE = TBB->end(); @@ -1905,7 +1924,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { IsSafe = false; break; } - } else if (!LocalDefsSet.count(Reg)) { + } else if (!ActiveDefsSet.count(Reg)) { if (Defs.count(Reg)) { // Use is defined by the instruction at the point of insertion. IsSafe = false; @@ -1925,18 +1944,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore)) break; - // Remove kills from LocalDefsSet, these registers had short live ranges. + // Remove kills from ActiveDefsSet, these registers had short live ranges. for (const MachineOperand &MO : TIB->operands()) { if (!MO.isReg() || !MO.isUse() || !MO.isKill()) continue; unsigned Reg = MO.getReg(); - if (!Reg || !LocalDefsSet.count(Reg)) + if (!Reg) continue; + if (!AllDefsSet.count(Reg)) { + LocalKills.push_back(Reg); + continue; + } if (TargetRegisterInfo::isPhysicalRegister(Reg)) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - LocalDefsSet.erase(*AI); + ActiveDefsSet.erase(*AI); } else { - LocalDefsSet.erase(Reg); + ActiveDefsSet.erase(Reg); } } @@ -1948,7 +1971,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg)) continue; LocalDefs.push_back(Reg); - addRegAndItsAliases(Reg, TRI, LocalDefsSet); + addRegAndItsAliases(Reg, TRI, ActiveDefsSet); + addRegAndItsAliases(Reg, TRI, AllDefsSet); } HasDups = true; @@ -1963,17 +1987,22 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { FBB->erase(FBB->begin(), FIB); // Update livein's. - bool AddedLiveIns = false; + bool ChangedLiveIns = false; for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { unsigned Def = LocalDefs[i]; - if (LocalDefsSet.count(Def)) { + if (ActiveDefsSet.count(Def)) { TBB->addLiveIn(Def); FBB->addLiveIn(Def); - AddedLiveIns = true; + ChangedLiveIns = true; } } + for (unsigned K : LocalKills) { + TBB->removeLiveIn(K); + FBB->removeLiveIn(K); + ChangedLiveIns = true; + } - if (AddedLiveIns) { + if (ChangedLiveIns) { TBB->sortUniqueLiveIns(); FBB->sortUniqueLiveIns(); } diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 4852721eea10247f434c6d2aa6cfea826ed1b09e..92681137e4c63fd024163e7298691542940a4c78 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -108,6 +108,7 @@ namespace llvm { bool UpdateLiveIns; unsigned MinCommonTailLength; const TargetInstrInfo *TII; + const MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; MachineLoopInfo *MLI; diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp index 7af1369416615b447e667b5c6478936c27c04953..27ee12c4c5ff2a0f67cba22f2dd36104aa75c485 100644 --- a/lib/CodeGen/BranchRelaxation.cpp +++ b/lib/CodeGen/BranchRelaxation.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -259,7 +259,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeLiveIns(LiveRegs, *TRI, *NewBB); + computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB); ++NumSplit; @@ -345,6 +345,10 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { // Do it here since if there's no split, no update is needed. MBB->replaceSuccessor(FBB, &NewBB); NewBB.addSuccessor(FBB); + + // Need to fix live-in lists if we track liveness. + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB); } // We now have an appropriate fall-through block in place (either naturally or diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp index e4eab8c513d991fd5e1fc0f0f31e5a37a2d7e4d9..abac555d6602552104cc2be931324208adc9c4da 100644 --- a/lib/CodeGen/BuiltinGCs.cpp +++ b/lib/CodeGen/BuiltinGCs.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GCs.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/Casting.h" diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 0912d9f68aff20b8c923fa83dc54fb7c8fc148c2..55a27e2fb79e5a1fb9561235ae4be9c0d6bde50d 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMCodeGen ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp + ExpandReductions.cpp FaultMaps.cpp FEntryInserter.cpp FuncletLayout.cpp @@ -48,6 +49,7 @@ add_llvm_library(LLVMCodeGen LivePhysRegs.cpp LiveRangeCalc.cpp LiveRangeEdit.cpp + LiveRangeShrink.cpp LiveRegMatrix.cpp LiveRegUnits.cpp LiveStackAnalysis.cpp @@ -65,6 +67,7 @@ add_llvm_library(LLVMCodeGen MachineCSE.cpp MachineDominanceFrontier.cpp MachineDominators.cpp + MachineFrameInfo.cpp MachineFunction.cpp MachineFunctionPass.cpp MachineFunctionPrinterPass.cpp @@ -117,6 +120,7 @@ add_llvm_library(LLVMCodeGen SafeStack.cpp SafeStackColoring.cpp SafeStackLayout.cpp + ScalarizeMaskedMemIntrin.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index dc2d38a95f998aa3ee331cc46e0790b363379781..c2ced19458ed60559b77a9d626d9ac006725e355 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 3fc12ccc3b60c8bc944a4beac2e93e9c0e72d022..faa5f139cf7b33898e23d5c0abb677236693efd9 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" #include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; @@ -43,6 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveDebugValuesPass(Registry); initializeLiveDebugVariablesPass(Registry); initializeLiveIntervalsPass(Registry); + initializeLiveRangeShrinkPass(Registry); initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeLocalStackSlotPassPass(Registry); @@ -76,10 +77,12 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePostRASchedulerPass(Registry); initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); + initializeRABasicPass(Registry); initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); - initializeSafeStackPass(Registry); + initializeSafeStackLegacyPassPass(Registry); + initializeScalarizeMaskedMemIntrinPass(Registry); initializeShrinkWrapPass(Registry); initializeSlotIndexesPass(Registry); initializeStackColoringPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 2bdd189557b40dc6cc5f8af4e3c717a3aa3ffec7..37e176099ea7acaac35a8970fdd4a55fc25df24d 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -23,12 +22,14 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -59,6 +60,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Utils/ValueMapper.h" + using namespace llvm; using namespace llvm::PatternMatch; @@ -83,6 +85,12 @@ STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpGreaterThanMax, + "Number of memcmp calls with size greater than max size"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); + static cl::opt DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); @@ -126,7 +134,7 @@ static cl::opt DisablePreheaderProtect( cl::desc("Disable protection against removing loop preheaders")); static cl::opt ProfileGuidedSectionPrefix( - "profile-guided-section-prefix", cl::Hidden, cl::init(true), + "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Use profile info to add section prefix for hot/cold functions")); static cl::opt FreqRatioToSkipMerge( @@ -143,6 +151,11 @@ EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true)); +static cl::opt MemCmpNumLoadsPerBlock( + "memcmp-num-loads-per-block", cl::Hidden, cl::init(1), + cl::desc("The number of loads per basic block for inline expansion of " + "memcmp that is only being compared against zero.")); + namespace { typedef SmallPtrSet SetOfInstrs; typedef PointerIntPair TypeIsSExt; @@ -197,10 +210,11 @@ class TypePromotionTransaction; public: static char ID; // Pass identification, replacement for typeid - explicit CodeGenPrepare(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) { - initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); - } + CodeGenPrepare() + : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr), + DL(nullptr) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "CodeGen Prepare"; } @@ -221,12 +235,12 @@ class TypePromotionTransaction; void eliminateMostlyEmptyBlock(BasicBlock *BB); bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, bool isPreheader); - bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); - bool optimizeInst(Instruction *I, bool& ModifiedDT); + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy, unsigned AS); bool optimizeInlineAsmInst(CallInst *CS); - bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *I); @@ -255,15 +269,13 @@ class TypePromotionTransaction; } char CodeGenPrepare::ID = 0; -INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare", - "Optimize for code generation", false, false) +INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, + "Optimize for code generation", false, false) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare", - "Optimize for code generation", false, false) +INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, + "Optimize for code generation", false, false) -FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { - return new CodeGenPrepare(TM); -} +FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } bool CodeGenPrepare::runOnFunction(Function &F) { if (skipFunction(F)) @@ -279,7 +291,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) { BPI.reset(); ModifiedDT = false; - if (TM) { + if (auto *TPC = getAnalysisIfAvailable()) { + TM = &TPC->getTM(); SubtargetInfo = TM->getSubtargetImpl(F); TLI = SubtargetInfo->getTargetLowering(); TRI = SubtargetInfo->getRegisterInfo(); @@ -295,7 +308,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { if (PSI->isFunctionHotInCallGraph(&F)) F.setSectionPrefix(".hot"); else if (PSI->isFunctionColdInCallGraph(&F)) - F.setSectionPrefix(".cold"); + F.setSectionPrefix(".unlikely"); } /// This optimization identifies DIV instructions that can be @@ -349,7 +362,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // Really free removed instructions during promotion. for (Instruction *I : RemovedInsts) - delete I; + I->deleteValue(); EverMadeChange |= MadeChange; } @@ -570,8 +583,14 @@ bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) { ValueToValueMapTy VMap; BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F); - for (BasicBlock *Pred : OtherPreds) - Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + for (BasicBlock *Pred : OtherPreds) { + // If the target is a loop to itself, then the terminator of the split + // block needs to be updated. + if (Pred == Target) + BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + else + Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc); + } // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that // they are clones, so the number of PHIs are the same. @@ -1543,519 +1562,6 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, return MadeChange; } -// Translate a masked load intrinsic like -// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, -// <16 x i1> %mask, <16 x i32> %passthru) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.load, label %else -// -//cond.load: ; preds = %0 -// %4 = getelementptr i32* %1, i32 0 -// %5 = load i32* %4 -// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 -// br label %else -// -//else: ; preds = %0, %cond.load -// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] -// %7 = extractelement <16 x i1> %mask, i32 1 -// %8 = icmp eq i1 %7, true -// br i1 %8, label %cond.load1, label %else2 -// -//cond.load1: ; preds = %else -// %9 = getelementptr i32* %1, i32 1 -// %10 = load i32* %9 -// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 -// br label %else2 -// -//else2: ; preds = %else, %cond.load1 -// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] -// %12 = extractelement <16 x i1> %mask, i32 2 -// %13 = icmp eq i1 %12, true -// br i1 %13, label %cond.load4, label %else5 -// -static void scalarizeMaskedLoad(CallInst *CI) { - Value *Ptr = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - unsigned AlignVal = cast(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast(CI->getType()); - assert(VecType && "Unexpected return type of masked load intrinsic"); - - Type *EltTy = CI->getType()->getVectorElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); - - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - bool IsAllOnesMask = isa(Mask) && - cast(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { - Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8); - // Bitcast %addr fron i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); - - Value *UndefVal = UndefValue::get(VecType); - - // The result vector - Value *VResult = UndefVal; - - if (isa(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx)); - } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_load = icmp eq i1 %mask_1, true - // br i1 %to_load, label %cond.load, label %else - // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } - - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - } - - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); -} - -// Translate a masked store intrinsic, like -// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, -// <16 x i1> %mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set -// -// %1 = bitcast i8* %addr to i32* -// %2 = extractelement <16 x i1> %mask, i32 0 -// %3 = icmp eq i1 %2, true -// br i1 %3, label %cond.store, label %else -// -// cond.store: ; preds = %0 -// %4 = extractelement <16 x i32> %val, i32 0 -// %5 = getelementptr i32* %1, i32 0 -// store i32 %4, i32* %5 -// br label %else -// -// else: ; preds = %0, %cond.store -// %6 = extractelement <16 x i1> %mask, i32 1 -// %7 = icmp eq i1 %6, true -// br i1 %7, label %cond.store1, label %else2 -// -// cond.store1: ; preds = %else -// %8 = extractelement <16 x i32> %val, i32 1 -// %9 = getelementptr i32* %1, i32 1 -// store i32 %8, i32* %9 -// br label %else2 -// . . . -static void scalarizeMaskedStore(CallInst *CI) { - Value *Src = CI->getArgOperand(0); - Value *Ptr = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - unsigned AlignVal = cast(Alignment)->getZExtValue(); - VectorType *VecType = dyn_cast(Src->getType()); - assert(VecType && "Unexpected data type in masked store intrinsic"); - - Type *EltTy = VecType->getElementType(); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - // Short-cut if the mask is all-true. - bool IsAllOnesMask = isa(Mask) && - cast(Mask)->isAllOnesValue(); - - if (IsAllOnesMask) { - Builder.CreateAlignedStore(Src, Ptr, AlignVal); - CI->eraseFromParent(); - return; - } - - // Adjust alignment for the scalar instruction. - AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8); - // Bitcast %addr fron i8* to EltTy* - Type *NewPtrType = - EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); - Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); - unsigned VectorWidth = VecType->getNumElements(); - - if (isa(Mask)) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); - } - CI->eraseFromParent(); - return; - } - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %mask_1 = extractelement <16 x i1> %mask, i32 Idx - // %to_store = icmp eq i1 %mask_1, true - // br i1 %to_store, label %cond.store, label %else - // - Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1)); - - // Create "cond" block - // - // %OneElt = extractelement <16 x i32> %Src, i32 Idx - // %EltAddr = getelementptr i32* %1, i32 0 - // %store i32 %OneElt, i32* %EltAddr - // - BasicBlock *CondBlock = - IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); - Value *Gep = - Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); - Builder.CreateAlignedStore(OneElt, Gep, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = - CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); -} - -// Translate a masked gather intrinsic like -// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, -// <16 x i1> %Mask, <16 x i32> %Src) -// to a chain of basic blocks, with loading element one-by-one if -// the appropriate mask bit is set -// -// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> %Mask, i32 0 -// % ToLoad0 = icmp eq i1 % Mask0, true -// br i1 % ToLoad0, label %cond.load, label %else -// -// cond.load: -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// % Load0 = load i32, i32* % Ptr0, align 4 -// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 -// br label %else -// -// else: -// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] -// % Mask1 = extractelement <16 x i1> %Mask, i32 1 -// % ToLoad1 = icmp eq i1 % Mask1, true -// br i1 % ToLoad1, label %cond.load1, label %else2 -// -// cond.load1: -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// % Load1 = load i32, i32* % Ptr1, align 4 -// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 -// br label %else2 -// . . . -// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src -// ret <16 x i32> %Result -static void scalarizeMaskedGather(CallInst *CI) { - Value *Ptrs = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); - - VectorType *VecType = dyn_cast(CI->getType()); - - assert(VecType && "Unexpected return type of masked load intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - BasicBlock *CondBlock = nullptr; - BasicBlock *PrevIfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - unsigned AlignVal = cast(Alignment)->getZExtValue(); - - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - Value *UndefVal = UndefValue::get(VecType); - - // The result vector - Value *VResult = UndefVal; - unsigned VectorWidth = VecType->getNumElements(); - - // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa(Mask); - - if (IsConstMask) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, - "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, - Builder.getInt32(Idx), - "Res" + Twine(Idx)); - } - Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); - return; - } - - PHINode *Phi = nullptr; - Value *PrevPhi = UndefVal; - - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - - // Fill the "else" block, created in the previous iteration - // - // %Mask1 = extractelement <16 x i1> %Mask, i32 1 - // %ToLoad1 = icmp eq i1 %Mask1, true - // br i1 %ToLoad1, label %cond.load, label %else - // - if (Idx > 0) { - Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - PrevPhi = Phi; - VResult = Phi; - } - - Value *Predicate = Builder.CreateExtractElement(Mask, - Builder.getInt32(Idx), - "Mask" + Twine(Idx)); - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToLoad" + Twine(Idx)); - - // Create "cond" block - // - // %EltAddr = getelementptr i32* %1, i32 0 - // %Elt = load i32* %EltAddr - // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx - // - CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); - Builder.SetInsertPoint(InsertPt); - - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, - "Load" + Twine(Idx)); - VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), - "Res" + Twine(Idx)); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - PrevIfBlock = IfBlock; - IfBlock = NewIfBlock; - } - - Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); - Phi->addIncoming(VResult, CondBlock); - Phi->addIncoming(PrevPhi, PrevIfBlock); - Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); - CI->replaceAllUsesWith(NewI); - CI->eraseFromParent(); -} - -// Translate a masked scatter intrinsic, like -// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, -// <16 x i1> %Mask) -// to a chain of basic blocks, that stores element one-by-one if -// the appropriate mask bit is set. -// -// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind -// % Mask0 = extractelement <16 x i1> % Mask, i32 0 -// % ToStore0 = icmp eq i1 % Mask0, true -// br i1 %ToStore0, label %cond.store, label %else -// -// cond.store: -// % Elt0 = extractelement <16 x i32> %Src, i32 0 -// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 -// store i32 %Elt0, i32* % Ptr0, align 4 -// br label %else -// -// else: -// % Mask1 = extractelement <16 x i1> % Mask, i32 1 -// % ToStore1 = icmp eq i1 % Mask1, true -// br i1 % ToStore1, label %cond.store1, label %else2 -// -// cond.store1: -// % Elt1 = extractelement <16 x i32> %Src, i32 1 -// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 -// store i32 % Elt1, i32* % Ptr1, align 4 -// br label %else2 -// . . . -static void scalarizeMaskedScatter(CallInst *CI) { - Value *Src = CI->getArgOperand(0); - Value *Ptrs = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); - - assert(isa(Src->getType()) && - "Unexpected data type in masked scatter intrinsic"); - assert(isa(Ptrs->getType()) && - isa(Ptrs->getType()->getVectorElementType()) && - "Vector of pointers is expected in masked scatter intrinsic"); - - IRBuilder<> Builder(CI->getContext()); - Instruction *InsertPt = CI; - BasicBlock *IfBlock = CI->getParent(); - Builder.SetInsertPoint(InsertPt); - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - - unsigned AlignVal = cast(Alignment)->getZExtValue(); - unsigned VectorWidth = Src->getType()->getVectorNumElements(); - - // Shorten the way if the mask is a vector of constants. - bool IsConstMask = isa(Mask); - - if (IsConstMask) { - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - if (cast(Mask)->getOperand(Idx)->isNullValue()) - continue; - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - } - CI->eraseFromParent(); - return; - } - for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { - // Fill the "else" block, created in the previous iteration - // - // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx - // % ToStore = icmp eq i1 % Mask1, true - // br i1 % ToStore, label %cond.store, label %else - // - Value *Predicate = Builder.CreateExtractElement(Mask, - Builder.getInt32(Idx), - "Mask" + Twine(Idx)); - Value *Cmp = - Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, - ConstantInt::get(Predicate->getType(), 1), - "ToStore" + Twine(Idx)); - - // Create "cond" block - // - // % Elt1 = extractelement <16 x i32> %Src, i32 1 - // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 - // %store i32 % Elt1, i32* % Ptr1 - // - BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); - Builder.SetInsertPoint(InsertPt); - - Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), - "Elt" + Twine(Idx)); - Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), - "Ptr" + Twine(Idx)); - Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); - - // Create "else" block, fill it in the next iteration - BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); - Builder.SetInsertPoint(InsertPt); - Instruction *OldBr = IfBlock->getTerminator(); - BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); - OldBr->eraseFromParent(); - IfBlock = NewIfBlock; - } - CI->eraseFromParent(); -} - /// If counting leading or trailing zeros is an expensive operation and a zero /// input is defined, add a check for zero to avoid calling the intrinsic. /// @@ -2135,7 +1641,621 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, return true; } -bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { +// This class provides helper functions to expand a memcmp library call into an +// inline expansion. +class MemCmpExpansion { + struct ResultBlock { + BasicBlock *BB; + PHINode *PhiSrc1; + PHINode *PhiSrc2; + ResultBlock(); + }; + + CallInst *CI; + ResultBlock ResBlock; + unsigned MaxLoadSize; + unsigned NumBlocks; + unsigned NumBlocksNonOneByte; + unsigned NumLoadsPerBlock; + std::vector LoadCmpBlocks; + BasicBlock *EndBlock; + PHINode *PhiRes; + bool IsUsedForZeroCmp; + const DataLayout &DL; + + int calculateNumBlocks(unsigned Size); + void createLoadCmpBlocks(); + void createResultBlock(); + void setupResultBlockPHINodes(); + void setupEndBlockPHINodes(); + void emitLoadCompareBlock(unsigned Index, int LoadSize, int GEPIndex); + Value *getCompareLoadPairs(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed, IRBuilder<> &Builder); + void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed); + void emitLoadCompareByteBlock(unsigned Index, int GEPIndex); + void emitMemCmpResultBlock(); + Value *getMemCmpExpansionZeroCase(unsigned Size); + Value *getMemCmpEqZeroOneBlock(unsigned Size); + unsigned getLoadSize(unsigned Size); + unsigned getNumLoads(unsigned Size); + +public: + MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize, + unsigned NumLoadsPerBlock, const DataLayout &DL); + Value *getMemCmpExpansion(uint64_t Size); +}; + +MemCmpExpansion::ResultBlock::ResultBlock() + : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {} + +// Initialize the basic block structure required for expansion of memcmp call +// with given maximum load size and memcmp size parameter. +// This structure includes: +// 1. A list of load compare blocks - LoadCmpBlocks. +// 2. An EndBlock, split from original instruction point, which is the block to +// return from. +// 3. ResultBlock, block to branch to for early exit when a +// LoadCmpBlock finds a difference. +MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, + unsigned MaxLoadSize, unsigned LoadsPerBlock, + const DataLayout &TheDataLayout) + : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock), + DL(TheDataLayout) { + + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. + IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + NumBlocks = calculateNumBlocks(Size); + if (!IsUsedForZeroCmp || NumBlocks != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) + setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } + + IRBuilder<> Builder(CI->getContext()); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); +} + +void MemCmpExpansion::createLoadCmpBlocks() { + for (unsigned i = 0; i < NumBlocks; i++) { + BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", + EndBlock->getParent(), EndBlock); + LoadCmpBlocks.push_back(BB); + } +} + +void MemCmpExpansion::createResultBlock() { + ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", + EndBlock->getParent(), EndBlock); +} + +// This function creates the IR instructions for loading and comparing 1 byte. +// It loads 1 byte from each source of the memcmp parameters with the given +// GEPIndex. It then subtracts the two loaded values and adds this result to the +// final phi node for selecting the memcmp result. +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, int GEPIndex) { + IRBuilder<> Builder(CI->getContext()); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); + LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]); + + if (Index < (LoadCmpBlocks.size() - 1)) { + // Early exit branch if difference found to EndBlock. Otherwise, continue to + // next LoadCmpBlock, + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BranchInst *CmpBr = + BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp); + Builder.Insert(CmpBr); + } else { + // The last block has an unconditional branch to EndBlock. + BranchInst *CmpBr = BranchInst::Create(EndBlock); + Builder.Insert(CmpBr); + } +} + +unsigned MemCmpExpansion::getNumLoads(unsigned Size) { + return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize); +} + +unsigned MemCmpExpansion::getLoadSize(unsigned Size) { + return MinAlign(PowerOf2Floor(Size), MaxLoadSize); +} + +/// Generate an equality comparison for one or more pairs of loaded values. +/// This is used in the case where the memcmp() call is compared equal or not +/// equal to zero. +Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, + unsigned &NumBytesProcessed, + IRBuilder<> &Builder) { + std::vector XorList, OrList; + Value *Diff; + + unsigned RemainingBytes = Size - NumBytesProcessed; + unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); + unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); + + // For a single-block expansion, start inserting before the memcmp call. + if (LoadCmpBlocks.empty()) + Builder.SetInsertPoint(CI); + else + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + + Value *Cmp = nullptr; + for (unsigned i = 0; i < NumLoads; ++i) { + unsigned LoadSize = getLoadSize(RemainingBytes); + unsigned GEPIndex = NumBytesProcessed / LoadSize; + NumBytesProcessed += LoadSize; + RemainingBytes -= LoadSize; + + Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + if (NumLoads != 1) { + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + } + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); + XorList.push_back(Diff); + } else { + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + } + } + + auto pairWiseOr = [&](std::vector &InList) -> std::vector { + std::vector OutList; + for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { + Value *Or = Builder.CreateOr(InList[i], InList[i + 1]); + OutList.push_back(Or); + } + if (InList.size() % 2 != 0) + OutList.push_back(InList.back()); + return OutList; + }; + + if (!Cmp) { + // Pairwise OR the XOR results. + OrList = pairWiseOr(XorList); + + // Pairwise OR the OR results until one result left. + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); + } + + return Cmp; +} + +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( + unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { + IRBuilder<> Builder(CI->getContext()); + Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed, Builder); + + BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[Index + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, + // continue to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (Index == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + } +} + +// This function creates the IR intructions for loading and comparing using the +// given LoadSize. It loads the number of bytes specified by LoadSize from each +// source of the memcmp parameters. It then does a subtract to see if there was +// a difference in the loaded values. If a difference is found, it branches +// with an early exit to the ResultBlock for calculating which source was +// larger. Otherwise, it falls through to the either the next LoadCmpBlock or +// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with +// a special case through emitLoadCompareByteBlock. The special handling can +// simply subtract the loaded values and add it to the result phi node. +void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, int LoadSize, + int GEPIndex) { + if (LoadSize == 1) { + MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); + return; + } + + IRBuilder<> Builder(CI->getContext()); + + Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + + Value *Source1 = CI->getArgOperand(0); + Value *Source2 = CI->getArgOperand(1); + + Builder.SetInsertPoint(LoadCmpBlocks[Index]); + // Cast source to LoadSizeType*. + if (Source1->getType() != LoadSizeType) + Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); + if (Source2->getType() != LoadSizeType) + Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); + + // Get the base address using the GEPIndex. + if (GEPIndex != 0) { + Source1 = Builder.CreateGEP(LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, GEPIndex)); + Source2 = Builder.CreateGEP(LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, GEPIndex)); + } + + // Load LoadSizeType from the base address. + Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); + Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + + if (DL.isLittleEndian()) { + Function *F = LoadCmpBlocks[Index]->getParent(); + + Function *Bswap = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::bswap, LoadSizeType); + LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); + LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); + } + + if (LoadSizeType != MaxLoadType) { + LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); + LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); + } + + // Add the loaded values to the phi nodes for calculating memcmp result only + // if result is not used in a zero equality. + if (!IsUsedForZeroCmp) { + ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]); + ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); + } + + Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, + ConstantInt::get(Diff->getType(), 0)); + BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + ? EndBlock + : LoadCmpBlocks[Index + 1]; + // Early exit branch if difference found to ResultBlock. Otherwise, continue + // to next LoadCmpBlock or EndBlock. + BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + Builder.Insert(CmpBr); + + // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 + // since early exit to ResultBlock was not taken (no difference was found in + // any of the bytes). + if (Index == LoadCmpBlocks.size() - 1) { + Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); + PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + } +} + +// This function populates the ResultBlock with a sequence to calculate the +// memcmp result. It compares the two loaded source values and returns -1 if +// src1 < src2 and 1 if src1 > src2. +void MemCmpExpansion::emitMemCmpResultBlock() { + IRBuilder<> Builder(CI->getContext()); + + // Special case: if memcmp result is used in a zero equality, result does not + // need to be calculated and can simply return 1. + if (IsUsedForZeroCmp) { + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); + PhiRes->addIncoming(Res, ResBlock.BB); + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + return; + } + BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); + Builder.SetInsertPoint(ResBlock.BB, InsertPt); + + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1, + ResBlock.PhiSrc2); + + Value *Res = + Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), + ConstantInt::get(Builder.getInt32Ty(), 1)); + + BranchInst *NewBr = BranchInst::Create(EndBlock); + Builder.Insert(NewBr); + PhiRes->addIncoming(Res, ResBlock.BB); +} + +int MemCmpExpansion::calculateNumBlocks(unsigned Size) { + int NumBlocks = 0; + bool HaveOneByteLoad = false; + unsigned RemainingSize = Size; + unsigned LoadSize = MaxLoadSize; + while (RemainingSize) { + if (LoadSize == 1) + HaveOneByteLoad = true; + NumBlocks += RemainingSize / LoadSize; + RemainingSize = RemainingSize % LoadSize; + LoadSize = LoadSize / 2; + } + NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks; + + if (IsUsedForZeroCmp) + NumBlocks = NumBlocks / NumLoadsPerBlock + + (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0); + + return NumBlocks; +} + +void MemCmpExpansion::setupResultBlockPHINodes() { + IRBuilder<> Builder(CI->getContext()); + Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); + Builder.SetInsertPoint(ResBlock.BB); + ResBlock.PhiSrc1 = + Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1"); + ResBlock.PhiSrc2 = + Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2"); +} + +void MemCmpExpansion::setupEndBlockPHINodes() { + IRBuilder<> Builder(CI->getContext()); + + Builder.SetInsertPoint(&EndBlock->front()); + PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); +} + +Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) { + unsigned NumBytesProcessed = 0; + // This loop populates each of the LoadCmpBlocks with the IR sequence to + // handle multiple loads per block. + for (unsigned i = 0; i < NumBlocks; ++i) + emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed); + + emitMemCmpResultBlock(); + return PhiRes; +} + +/// A memcmp expansion that compares equality with 0 and only has one block of +/// load and compare can bypass the compare, branch, and phi IR that is required +/// in the general case. +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { + unsigned NumBytesProcessed = 0; + IRBuilder<> Builder(CI->getContext()); + Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed, Builder); + return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); +} + +// This function expands the memcmp call into an inline expansion and returns +// the memcmp result. +Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { + if (IsUsedForZeroCmp) + return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : + getMemCmpExpansionZeroCase(Size); + + // This loop calls emitLoadCompareBlock for comparing Size bytes of the two + // memcmp sources. It starts with loading using the maximum load size set by + // the target. It processes any remaining bytes using a load size which is the + // next smallest power of 2. + int LoadSize = MaxLoadSize; + int NumBytesToBeProcessed = Size; + unsigned Index = 0; + while (NumBytesToBeProcessed) { + // Calculate how many blocks we can create with the current load size. + int NumBlocks = NumBytesToBeProcessed / LoadSize; + int GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; + NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; + + // For each NumBlocks, populate the instruction sequence for loading and + // comparing LoadSize bytes. + while (NumBlocks--) { + emitLoadCompareBlock(Index, LoadSize, GEPIndex); + Index++; + GEPIndex++; + } + // Get the next LoadSize to use. + LoadSize = LoadSize / 2; + } + + emitMemCmpResultBlock(); + return PhiRes; +} + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, returns false to leave as a library call. +// Otherwise, the library call is replaced with a new IR instruction sequence. +/// We want to transform: +/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15) +/// To: +/// loadbb: +/// %0 = bitcast i32* %buffer2 to i8* +/// %1 = bitcast i32* %buffer1 to i8* +/// %2 = bitcast i8* %1 to i64* +/// %3 = bitcast i8* %0 to i64* +/// %4 = load i64, i64* %2 +/// %5 = load i64, i64* %3 +/// %6 = call i64 @llvm.bswap.i64(i64 %4) +/// %7 = call i64 @llvm.bswap.i64(i64 %5) +/// %8 = sub i64 %6, %7 +/// %9 = icmp ne i64 %8, 0 +/// br i1 %9, label %res_block, label %loadbb1 +/// res_block: ; preds = %loadbb2, +/// %loadbb1, %loadbb +/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ] +/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ] +/// %10 = icmp ult i64 %phi.src1, %phi.src2 +/// %11 = select i1 %10, i32 -1, i32 1 +/// br label %endblock +/// loadbb1: ; preds = %loadbb +/// %12 = bitcast i32* %buffer2 to i8* +/// %13 = bitcast i32* %buffer1 to i8* +/// %14 = bitcast i8* %13 to i32* +/// %15 = bitcast i8* %12 to i32* +/// %16 = getelementptr i32, i32* %14, i32 2 +/// %17 = getelementptr i32, i32* %15, i32 2 +/// %18 = load i32, i32* %16 +/// %19 = load i32, i32* %17 +/// %20 = call i32 @llvm.bswap.i32(i32 %18) +/// %21 = call i32 @llvm.bswap.i32(i32 %19) +/// %22 = zext i32 %20 to i64 +/// %23 = zext i32 %21 to i64 +/// %24 = sub i64 %22, %23 +/// %25 = icmp ne i64 %24, 0 +/// br i1 %25, label %res_block, label %loadbb2 +/// loadbb2: ; preds = %loadbb1 +/// %26 = bitcast i32* %buffer2 to i8* +/// %27 = bitcast i32* %buffer1 to i8* +/// %28 = bitcast i8* %27 to i16* +/// %29 = bitcast i8* %26 to i16* +/// %30 = getelementptr i16, i16* %28, i16 6 +/// %31 = getelementptr i16, i16* %29, i16 6 +/// %32 = load i16, i16* %30 +/// %33 = load i16, i16* %31 +/// %34 = call i16 @llvm.bswap.i16(i16 %32) +/// %35 = call i16 @llvm.bswap.i16(i16 %33) +/// %36 = zext i16 %34 to i64 +/// %37 = zext i16 %35 to i64 +/// %38 = sub i64 %36, %37 +/// %39 = icmp ne i64 %38, 0 +/// br i1 %39, label %res_block, label %loadbb3 +/// loadbb3: ; preds = %loadbb2 +/// %40 = bitcast i32* %buffer2 to i8* +/// %41 = bitcast i32* %buffer1 to i8* +/// %42 = getelementptr i8, i8* %41, i8 14 +/// %43 = getelementptr i8, i8* %40, i8 14 +/// %44 = load i8, i8* %42 +/// %45 = load i8, i8* %43 +/// %46 = zext i8 %44 to i32 +/// %47 = zext i8 %45 to i32 +/// %48 = sub i32 %46, %47 +/// br label %endblock +/// endblock: ; preds = %res_block, +/// %loadbb3 +/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ] +/// ret i32 %phi.res +static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, + const TargetLowering *TLI, const DataLayout *DL) { + NumMemCmpCalls++; + IRBuilder<> Builder(CI->getContext()); + + // TTI call to check if target would like to expand memcmp. Also, get the + // MaxLoadSize. + unsigned MaxLoadSize; + if (!TTI->expandMemCmp(CI, MaxLoadSize)) + return false; + + // Early exit from expansion if -Oz. + if (CI->getFunction()->optForMinSize()) + return false; + + // Early exit from expansion if size is not a constant. + ConstantInt *SizeCast = dyn_cast(CI->getArgOperand(2)); + if (!SizeCast) { + NumMemCmpNotConstant++; + return false; + } + + // Early exit from expansion if size greater than max bytes to load. + uint64_t SizeVal = SizeCast->getZExtValue(); + unsigned NumLoads = 0; + unsigned RemainingSize = SizeVal; + unsigned LoadSize = MaxLoadSize; + while (RemainingSize) { + NumLoads += RemainingSize / LoadSize; + RemainingSize = RemainingSize % LoadSize; + LoadSize = LoadSize / 2; + } + + if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) { + NumMemCmpGreaterThanMax++; + return false; + } + + NumMemCmpInlined++; + + // MemCmpHelper object creates and sets up basic blocks required for + // expanding memcmp with size SizeVal. + unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock; + MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL); + + Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal); + + // Replace call with result of expansion and erase call. + CI->replaceAllUsesWith(Res); + CI->eraseFromParent(); + + return true; +} + +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. @@ -2220,10 +2340,11 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { ConstantInt *RetVal = lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); // Substituting this can cause recursive simplifications, which can - // invalidate our iterator. Use a WeakVH to hold onto it in case this + // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case + // this // happens. Value *CurValue = &*CurInstIterator; - WeakVH IterHandle(CurValue); + WeakTrackingVH IterHandle(CurValue); replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); @@ -2235,39 +2356,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { } return true; } - case Intrinsic::masked_load: { - // Scalarize unsupported vector masked load - if (!TTI->isLegalMaskedLoad(CI->getType())) { - scalarizeMaskedLoad(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_store: { - if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { - scalarizeMaskedStore(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_gather: { - if (!TTI->isLegalMaskedGather(CI->getType())) { - scalarizeMaskedGather(CI); - ModifiedDT = true; - return true; - } - return false; - } - case Intrinsic::masked_scatter: { - if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { - scalarizeMaskedScatter(CI); - ModifiedDT = true; - return true; - } - return false; - } case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); @@ -2318,6 +2406,13 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { CI->eraseFromParent(); return true; } + + LibFunc Func; + if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) && + Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) { + ModifiedDT = true; + return true; + } return false; } @@ -3863,7 +3958,7 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI) { - const Function *F = CI->getParent()->getParent(); + const Function *F = CI->getFunction(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, ImmutableCallSite(CI)); @@ -4436,9 +4531,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // using it. if (Repl->use_empty()) { // This can cause recursive deletion, which can invalidate our iterator. - // Use a WeakVH to hold onto it in case this happens. + // Use a WeakTrackingVH to hold onto it in case this happens. Value *CurValue = &*CurInstIterator; - WeakVH IterHandle(CurValue); + WeakTrackingVH IterHandle(CurValue); BasicBlock *BB = CurInstIterator->getParent(); RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); @@ -4460,7 +4555,7 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { bool MadeChange = false; const TargetRegisterInfo *TRI = - TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); + TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI->ParseConstraints(*DL, TRI, CS); unsigned ArgNo = 0; @@ -5059,16 +5154,14 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { if (!ShlC) return false; uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); - auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt); - DemandBits |= ShlDemandBits; + DemandBits.setLowBits(BitWidth - ShiftAmt); break; } case llvm::Instruction::Trunc: { EVT TruncVT = TLI->getValueType(*DL, I->getType()); unsigned TruncBitWidth = TruncVT.getSizeInBits(); - auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth); - DemandBits |= TruncBits; + DemandBits.setLowBits(TruncBitWidth); break; } @@ -5467,6 +5560,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { return true; } + namespace { /// \brief Helper class to promote a scalar operation to a vector one. /// This class is used to move downward extractelement transition. @@ -5945,7 +6039,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, return true; } -bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. if (InsertedInsts.count(I)) @@ -5955,7 +6049,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) { + if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) { P->replaceAllUsesWith(V); P->eraseFromParent(); ++NumPHIsElim; @@ -6100,7 +6194,7 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL, // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. -bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { SunkAddrs.clear(); bool MadeChange = false; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index e1eeddf0816c164fd70421e08a462c69f848b036..a3cf2846d2f5d082a0bf78ce3790865b5058e609 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -74,7 +74,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; - if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg))) + if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; @@ -648,10 +648,8 @@ BreakAntiDependencies(const std::vector& SUnits, // as well. const SUnit *SU = MISUnitMap[Q->second->getParent()]; if (!SU) continue; - for (DbgValueVector::iterator DVI = DbgValues.begin(), - DVE = DbgValues.end(); DVI != DVE; ++DVI) - if (DVI->second == Q->second->getParent()) - UpdateDbgValue(*DVI->first, AntiDepReg, NewReg); + UpdateDbgValues(DbgValues, Q->second->getParent(), + AntiDepReg, NewReg); } // We just went back in time and modified history; the diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 7b1b2d64fcccece53254bc834ff73f45126e7dd5..853b9afa1026cc83e66ba590cf06fe42e703d209 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,49 +23,59 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "packets" - #include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include using namespace llvm; +#define DEBUG_TYPE "packets" + static cl::opt InstrLimit("dfa-instr-limit", cl::Hidden, cl::init(0), cl::desc("If present, stops packetizing after N instructions")); + static unsigned InstrCount = 0; // -------------------------------------------------------------------- // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp -namespace { - DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { - return (Inp << DFA_MAX_RESOURCES) | FuncUnits; - } +static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { + return (Inp << DFA_MAX_RESOURCES) | FuncUnits; +} - /// Return the DFAInput for an instruction class input vector. - /// This function is used in both DFAPacketizer.cpp and in - /// DFAPacketizerEmitter.cpp. - DFAInput getDFAInsnInput(const std::vector &InsnClass) { - DFAInput InsnInput = 0; - assert((InsnClass.size() <= DFA_MAX_RESTERMS) && - "Exceeded maximum number of DFA terms"); - for (auto U : InsnClass) - InsnInput = addDFAFuncUnits(InsnInput, U); - return InsnInput; - } +/// Return the DFAInput for an instruction class input vector. +/// This function is used in both DFAPacketizer.cpp and in +/// DFAPacketizerEmitter.cpp. +static DFAInput getDFAInsnInput(const std::vector &InsnClass) { + DFAInput InsnInput = 0; + assert((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); + for (auto U : InsnClass) + InsnInput = addDFAFuncUnits(InsnInput, U); + return InsnInput; } + // -------------------------------------------------------------------- DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], const unsigned *SET): - InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), - DFAStateEntryTable(SET) { + InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) { // Make sure DFA types are large enough for the number of terms & resources. static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)), @@ -75,7 +85,6 @@ DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); } - // Read the DFA transition table and update CachedTable. // // Format of the transition tables: @@ -97,7 +106,6 @@ void DFAPacketizer::ReadTable(unsigned int state) { DFAStateInputTable[i][1]; } - // Return the DFAInput for an instruction class. DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. @@ -112,16 +120,14 @@ DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { return InsnInput; } - // Return the DFAInput for an instruction class input vector. DFAInput DFAPacketizer::getInsnInput(const std::vector &InsnClass) { return getDFAInsnInput(InsnClass); } - // Check if the resources occupied by a MCInstrDesc are available in the // current state. -bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { +bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); @@ -129,10 +135,9 @@ bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { return CachedTable.count(StateTrans) != 0; } - // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. -void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { +void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); DFAInput InsnInput = getInsnInput(InsnClass); UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); @@ -141,24 +146,22 @@ void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { CurrentState = CachedTable[StateTrans]; } - // Check if the resources occupied by a machine instruction are available // in the current state. -bool DFAPacketizer::canReserveResources(llvm::MachineInstr &MI) { - const llvm::MCInstrDesc &MID = MI.getDesc(); +bool DFAPacketizer::canReserveResources(MachineInstr &MI) { + const MCInstrDesc &MID = MI.getDesc(); return canReserveResources(&MID); } - // Reserve the resources occupied by a machine instruction and change the // current state to reflect that change. -void DFAPacketizer::reserveResources(llvm::MachineInstr &MI) { - const llvm::MCInstrDesc &MID = MI.getDesc(); +void DFAPacketizer::reserveResources(MachineInstr &MI) { + const MCInstrDesc &MID = MI.getDesc(); reserveResources(&MID); } - namespace llvm { + // This class extends ScheduleDAGInstrs and overrides the schedule method // to build the dependence graph. class DefaultVLIWScheduler : public ScheduleDAGInstrs { @@ -166,9 +169,11 @@ private: AliasAnalysis *AA; /// Ordered list of DAG postprocessing steps. std::vector> Mutations; + public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA); + // Actual scheduling work. void schedule() override; @@ -176,11 +181,12 @@ public: void addMutation(std::unique_ptr Mutation) { Mutations.push_back(std::move(Mutation)); } + protected: void postprocessDAG(); }; -} +} // end namespace llvm DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, @@ -189,21 +195,18 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, CanHandleTerminators = true; } - /// Apply each ScheduleDAGMutation step in order. void DefaultVLIWScheduler::postprocessDAG() { for (auto &M : Mutations) M->apply(this); } - void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. buildSchedGraph(AA); postprocessDAG(); } - VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, MachineLoopInfo &mli, AliasAnalysis *aa) : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { @@ -211,15 +214,11 @@ VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); } - VLIWPacketizerList::~VLIWPacketizerList() { - if (VLIWScheduler) - delete VLIWScheduler; - if (ResourceTracker) - delete ResourceTracker; + delete VLIWScheduler; + delete ResourceTracker; } - // End the current packet, bundle packet instructions and reset DFA state. void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI) { @@ -239,7 +238,6 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, DEBUG(dbgs() << "End packet\n"); } - // Bundle machine instructions into packets. void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, @@ -338,7 +336,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, VLIWScheduler->finishBlock(); } - // Add a DAG mutation object to the ordered list. void VLIWPacketizerList::addMutation( std::unique_ptr Mutation) { diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 7ac2e5445435dac423097c400cb99993cb7ddd2a..91d18e2bcaa697b5daf5bec889a6df1ec8365bf7 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -23,7 +23,7 @@ using namespace llvm; -#define DEBUG_TYPE "codegen-dce" +#define DEBUG_TYPE "dead-mi-elimination" STATISTIC(NumDeletes, "Number of dead instructions deleted"); @@ -54,7 +54,7 @@ namespace { char DeadMachineInstructionElim::ID = 0; char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; -INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", +INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE, "Remove dead machine instructions", false, false) bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp index 6f4ea1912cf4e27795405cfc9e10712b055fddd1..ab9a0592e0177ae7cce34623bb5bade86226467f 100644 --- a/lib/CodeGen/DetectDeadLanes.cpp +++ b/lib/CodeGen/DetectDeadLanes.cpp @@ -132,8 +132,7 @@ private: char DetectDeadLanes::ID = 0; char &llvm::DetectDeadLanesID = DetectDeadLanes::ID; -INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes", - false, false) +INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false) /// Returns true if \p MI will get lowered to a series of COPY instructions. /// We call this a COPY-like instruction. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 38af19a0444852ad9d883ebbb516a112a5bd5e49..2f833260bca209f7f2b219ef6be7207729df2db7 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -12,12 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -34,8 +35,6 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { - const TargetMachine *TM; - // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; @@ -52,15 +51,9 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in - // practice. DwarfEHPrepare() - : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr), - TLI(nullptr) {} - - DwarfEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr), - TLI(nullptr) {} + : FunctionPass(ID), RewindFunction(nullptr), DT(nullptr), TLI(nullptr) { + } bool runOnFunction(Function &Fn) override; @@ -78,18 +71,18 @@ namespace { } // end anonymous namespace char DwarfEHPrepare::ID = 0; -INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare", - "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE, + "Prepare DWARF exceptions", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare", - "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE, + "Prepare DWARF exceptions", false, false) -FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { - return new DwarfEHPrepare(TM); -} +FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); } void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -254,9 +247,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { } bool DwarfEHPrepare::runOnFunction(Function &Fn) { - assert(TM && "DWARF EH preparation requires a target machine"); + const TargetMachine &TM = + getAnalysis().getTM(); DT = &getAnalysis().getDomTree(); - TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); + TLI = TM.getSubtargetImpl(Fn)->getTargetLowering(); bool Changed = InsertUnwindResumeCalls(Fn); DT = nullptr; TLI = nullptr; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 729172796453091095ee0959aae815dbfc906991..402afe75b1414f26439059a5adeffeb3e7a9cf8e 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -616,13 +616,13 @@ private: char EarlyIfConverter::ID = 0; char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; -INITIALIZE_PASS_BEGIN(EarlyIfConverter, - "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, + "Early If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(EarlyIfConverter, - "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, + "Early If Converter", false, false) void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index 0ec79c2e69f94ed148014cb9838e288d3c364cd1..324ea171293dba847432bbadbe0f52a16a050aff 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -41,7 +41,7 @@ namespace { char ExpandISelPseudos::ID = 0; char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; -INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", +INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE, "Expand ISel Pseudo-instructions", false, false) bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index ab2382e2db6d81cf7e5f662d206420f32536c29e..4ce86f27a7dd63584c14e9e4adbba2ab262cf20a 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -58,7 +58,7 @@ private: char ExpandPostRA::ID = 0; char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; -INITIALIZE_PASS(ExpandPostRA, "postrapseudos", +INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE, "Post-RA pseudo instruction expansion pass", false, false) /// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered @@ -142,8 +142,9 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); - if (SrcMO.getReg() == DstMO.getReg()) { - DEBUG(dbgs() << "identity copy: " << *MI); + bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg()); + if (IdentityCopy || SrcMO.isUndef()) { + DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ") << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. if (SrcMO.isUndef() || MI->getNumOperands() > 2) { diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..70dca3b74b2f30a13485ffd14d43743477526252 --- /dev/null +++ b/lib/CodeGen/ExpandReductions.cpp @@ -0,0 +1,167 @@ +//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR expansion for reduction intrinsics, allowing targets +// to enable the experimental intrinsics until just before codegen. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; + +namespace { + +unsigned getOpcode(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_vector_reduce_fadd: + return Instruction::FAdd; + case Intrinsic::experimental_vector_reduce_fmul: + return Instruction::FMul; + case Intrinsic::experimental_vector_reduce_add: + return Instruction::Add; + case Intrinsic::experimental_vector_reduce_mul: + return Instruction::Mul; + case Intrinsic::experimental_vector_reduce_and: + return Instruction::And; + case Intrinsic::experimental_vector_reduce_or: + return Instruction::Or; + case Intrinsic::experimental_vector_reduce_xor: + return Instruction::Xor; + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + return Instruction::ICmp; + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + return Instruction::FCmp; + default: + llvm_unreachable("Unexpected ID"); + } +} + +RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::experimental_vector_reduce_smax: + return RecurrenceDescriptor::MRK_SIntMax; + case Intrinsic::experimental_vector_reduce_smin: + return RecurrenceDescriptor::MRK_SIntMin; + case Intrinsic::experimental_vector_reduce_umax: + return RecurrenceDescriptor::MRK_UIntMax; + case Intrinsic::experimental_vector_reduce_umin: + return RecurrenceDescriptor::MRK_UIntMin; + case Intrinsic::experimental_vector_reduce_fmax: + return RecurrenceDescriptor::MRK_FloatMax; + case Intrinsic::experimental_vector_reduce_fmin: + return RecurrenceDescriptor::MRK_FloatMin; + default: + return RecurrenceDescriptor::MRK_Invalid; + } +} + +bool expandReductions(Function &F, const TargetTransformInfo *TTI) { + bool Changed = false; + SmallVector Worklist; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + if (auto II = dyn_cast(&*I)) + Worklist.push_back(II); + + for (auto *II : Worklist) { + IRBuilder<> Builder(II); + Value *Vec = nullptr; + auto ID = II->getIntrinsicID(); + auto MRK = RecurrenceDescriptor::MRK_Invalid; + switch (ID) { + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + // FMFs must be attached to the call, otherwise it's an ordered reduction + // and it can't be handled by generating this shuffle sequence. + // TODO: Implement scalarization of ordered reductions here for targets + // without native support. + if (!II->getFastMathFlags().unsafeAlgebra()) + continue; + Vec = II->getArgOperand(1); + break; + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: + Vec = II->getArgOperand(0); + MRK = getMRK(ID); + break; + default: + continue; + } + if (!TTI->shouldExpandReduction(II)) + continue; + auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); + II->replaceAllUsesWith(Rdx); + II->eraseFromParent(); + Changed = true; + } + return Changed; +} + +class ExpandReductions : public FunctionPass { +public: + static char ID; + ExpandReductions() : FunctionPass(ID) { + initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + const auto *TTI =&getAnalysis().getTTI(F); + return expandReductions(F, TTI); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + } +}; +} + +char ExpandReductions::ID; +INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", + "Expand reduction intrinsics", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", + "Expand reduction intrinsics", false, false) + +FunctionPass *llvm::createExpandReductionsPass() { + return new ExpandReductions(); +} + +PreservedAnalyses ExpandReductionsPass::run(Function &F, + FunctionAnalysisManager &AM) { + const auto &TTI = AM.getResult(F); + if (!expandReductions(F, &TTI)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserveSet(); + return PA; +} diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp index 43f3641289787531d1d4578ec135f50496883f6e..2924b011e0c1dbed5d4f1e7eea1f49b8bf2e26a1 100644 --- a/lib/CodeGen/FaultMaps.cpp +++ b/lib/CodeGen/FaultMaps.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/FaultMaps.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/FaultMaps.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp index d61afad4db5778380d57957647c49574d754e299..9c71b18619a1e7e0c2c8fd679d6f3c338fbe7191 100644 --- a/lib/CodeGen/FuncletLayout.cpp +++ b/lib/CodeGen/FuncletLayout.cpp @@ -11,10 +11,10 @@ // funclets being contiguous. // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" using namespace llvm; #define DEBUG_TYPE "funclet-layout" @@ -37,7 +37,7 @@ public: char FuncletLayout::ID = 0; char &llvm::FuncletLayoutID = FuncletLayout::ID; -INITIALIZE_PASS(FuncletLayout, "funclet-layout", +INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE, "Contiguously Lay Out Funclets", false, false) bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index be21c7306da160ba6122d91bf7d5b93ef233d986..456fa799e8e1ae119438fb3514d71ae6d02db034 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -11,22 +11,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Pass.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + using namespace llvm; namespace { class Printer : public FunctionPass { static char ID; + raw_ostream &OS; public: @@ -38,7 +43,8 @@ public: bool runOnFunction(Function &F) override; bool doFinalization(Module &M) override; }; -} + +} // end anonymous namespace INITIALIZE_PASS(GCModuleInfo, "collector-metadata", "Create Garbage Collector Module Metadata", false, false) @@ -48,7 +54,7 @@ INITIALIZE_PASS(GCModuleInfo, "collector-metadata", GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) : F(F), S(S), FrameSize(~0LL) {} -GCFunctionInfo::~GCFunctionInfo() {} +GCFunctionInfo::~GCFunctionInfo() = default; // ----------------------------------------------------------------------------- @@ -67,7 +73,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { return *I->second; GCStrategy *S = getGCStrategy(F.getGC()); - Functions.push_back(make_unique(F, *S)); + Functions.push_back(llvm::make_unique(F, *S)); GCFunctionInfo *GFI = Functions.back().get(); FInfoMap[&F] = GFI; return *GFI; diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp index d183c7f2980b0296ac3d149426065fe434e395f8..bc7beb6f6c2d3ee9e4a273739726adc5342dffd3 100644 --- a/lib/CodeGen/GCMetadataPrinter.cpp +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -1,4 +1,4 @@ -//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===// // // The LLVM Compiler Infrastructure // @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCMetadataPrinter.h" + using namespace llvm; LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry) -GCMetadataPrinter::GCMetadataPrinter() {} +GCMetadataPrinter::GCMetadataPrinter() = default; -GCMetadataPrinter::~GCMetadataPrinter() {} +GCMetadataPrinter::~GCMetadataPrinter() = default; diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt index 03a8c4f5f909ad58228b0a9e7255cb93324f0249..eba7ea8132e3bdcf148e13a1f4bdcc367becdcd3 100644 --- a/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -8,6 +8,7 @@ set(GLOBAL_ISEL_FILES LegalizerHelper.cpp Legalizer.cpp LegalizerInfo.cpp + Localizer.cpp RegBankSelect.cpp RegisterBank.cpp RegisterBankInfo.cpp @@ -24,11 +25,11 @@ endif() # In LLVMBuild.txt files, it is not possible to mark a dependency to a # library as optional. So instead, generate an empty library if we did -# not ask for it. +# not ask for it. add_llvm_library(LLVMGlobalISel ${GLOBAL_ISEL_BUILD_FILES} GlobalISel.cpp - + DEPENDS intrinsics_gen ) diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index 035a2ac78ed997ff6faf482fd94719088f1658e9..be0c5c2bb70e60f13c730852d045970ed5172521 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -37,7 +37,7 @@ bool CallLowering::lowerCall( for (auto &Arg : CS.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; - setArgFlags(OrigArg, i + 1, DL, CS); + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); OrigArgs.push_back(OrigArg); ++i; } @@ -83,8 +83,8 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (FuncInfo.getParamAlignment(OpIdx)) - FrameAlign = FuncInfo.getParamAlignment(OpIdx); + if (FuncInfo.getParamAlignment(OpIdx - 2)) + FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); Arg.Flags.setByValAlign(FrameAlign); diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp index fcd2722f1c2fa5f891f56b87d52aada09ece13c1..29d1209bb02a5b572f8a5c3e18f09044f618d290 100644 --- a/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -26,6 +26,7 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) { void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); initializeLegalizerPass(Registry); + initializeLocalizerPass(Registry); initializeRegBankSelectPass(Registry); initializeInstructionSelectPass(Registry); } diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 766187378446903fa26e2c3d58885b51d2fa4003..dccd8e0706ca69c72e747fd1e1dfbde26025e4a3 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -16,10 +16,10 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -340,6 +340,15 @@ bool IRTranslator::translateExtractValue(const User &U, Type *Int32Ty = Type::getInt32Ty(U.getContext()); SmallVector Indices; + // If Src is a single element ConstantStruct, translate extractvalue + // to that element to avoid inserting a cast instruction. + if (auto CS = dyn_cast(Src)) + if (CS->getNumOperands() == 1) { + unsigned Res = getOrCreateVReg(*CS->getOperand(0)); + ValToVReg[&U] = Res; + return true; + } + // getIndexedOffsetInType is designed for GEPs, so the first index is the // usual array element rather than looking into the actual aggregate. Indices.push_back(ConstantInt::get(Int32Ty, 0)); @@ -381,18 +390,19 @@ bool IRTranslator::translateInsertValue(const User &U, uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); unsigned Res = getOrCreateVReg(U); - const Value &Inserted = *U.getOperand(1); - MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted), - Offset); + unsigned Inserted = getOrCreateVReg(*U.getOperand(1)); + MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset); return true; } bool IRTranslator::translateSelect(const User &U, MachineIRBuilder &MIRBuilder) { - MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)), - getOrCreateVReg(*U.getOperand(1)), - getOrCreateVReg(*U.getOperand(2))); + unsigned Res = getOrCreateVReg(U); + unsigned Tst = getOrCreateVReg(*U.getOperand(0)); + unsigned Op0 = getOrCreateVReg(*U.getOperand(1)); + unsigned Op1 = getOrCreateVReg(*U.getOperand(2)); + MIRBuilder.buildSelect(Res, Tst, Op0, Op1); return true; } @@ -774,6 +784,21 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { return false; MIB.addUse(getOrCreateVReg(*Arg)); } + + // Add a MachineMemOperand if it is a target mem intrinsic. + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + TargetLowering::IntrinsicInfo Info; + // TODO: Add a GlobalISel version of getTgtMemIntrinsic. + if (TLI.getTgtMemIntrinsic(Info, CI, ID)) { + MachineMemOperand::Flags Flags = + Info.vol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + Flags |= + Info.readMem ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore; + uint64_t Size = Info.memVT.getSizeInBits() >> 3; + MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), + Flags, Size, Info.align)); + } + return true; } @@ -984,9 +1009,11 @@ bool IRTranslator::translateInsertElement(const User &U, ValToVReg[&U] = Elt; return true; } - MIRBuilder.buildInsertVectorElement( - getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)), - getOrCreateVReg(*U.getOperand(1)), getOrCreateVReg(*U.getOperand(2))); + unsigned Res = getOrCreateVReg(U); + unsigned Val = getOrCreateVReg(*U.getOperand(0)); + unsigned Elt = getOrCreateVReg(*U.getOperand(1)); + unsigned Idx = getOrCreateVReg(*U.getOperand(2)); + MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); return true; } @@ -999,9 +1026,10 @@ bool IRTranslator::translateExtractElement(const User &U, ValToVReg[&U] = Elt; return true; } - MIRBuilder.buildExtractVectorElement(getOrCreateVReg(U), - getOrCreateVReg(*U.getOperand(0)), - getOrCreateVReg(*U.getOperand(1))); + unsigned Res = getOrCreateVReg(U); + unsigned Val = getOrCreateVReg(*U.getOperand(0)); + unsigned Idx = getOrCreateVReg(*U.getOperand(1)); + MIRBuilder.buildExtractVectorElement(Res, Val, Idx); return true; } @@ -1104,6 +1132,31 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { default: return false; } + } else if (auto CS = dyn_cast(&C)) { + // Return the element if it is a single element ConstantStruct. + if (CS->getNumOperands() == 1) { + unsigned EltReg = getOrCreateVReg(*CS->getOperand(0)); + EntryBuilder.buildCast(Reg, EltReg); + return true; + } + SmallVector Ops; + SmallVector Indices; + uint64_t Offset = 0; + for (unsigned i = 0; i < CS->getNumOperands(); ++i) { + unsigned OpReg = getOrCreateVReg(*CS->getOperand(i)); + Ops.push_back(OpReg); + Indices.push_back(Offset); + Offset += MRI->getType(OpReg).getSizeInBits(); + } + EntryBuilder.buildSequence(Reg, Ops, Indices); + } else if (auto CV = dyn_cast(&C)) { + if (CV->getNumOperands() == 1) + return translate(*CV->getOperand(0), Reg); + SmallVector Ops; + for (unsigned i = 0; i < CV->getNumOperands(); ++i) { + Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); + } + EntryBuilder.buildMerge(Reg, Ops); } else return false; @@ -1117,6 +1170,11 @@ void IRTranslator::finalizeFunction() { ValToVReg.clear(); FrameIndices.clear(); MachinePreds.clear(); + // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it + // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid + // destroying it twice (in ~IRTranslator() and ~LLVMContext()) + EntryBuilder = MachineIRBuilder(); + CurBuilder = MachineIRBuilder(); } bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { @@ -1195,10 +1253,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { finishPendingPhis(); - // Now that the MachineFrameInfo has been configured, no further changes to - // the reserved registers are possible. - MRI->freezeReservedRegs(*MF); - // Merge the argument lowering and constants block with its single // successor, the LLVM-IR entry block. We want the basic block to // be maximal. diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 26454c1ef00f942dc84b43769a281a98f7274074..a16e14fe2db6d5afee65d4aa7c91fed5c7b8a2cf 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #define DEBUG_TYPE "instruction-select" @@ -70,8 +71,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); - // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many - // other MF/MFI fields we need to initialize. + // FIXME: There are many other MF/MFI fields we need to initialize. #ifndef NDEBUG // Check that our input is fully legal: we require the function to have the @@ -145,6 +145,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { } } + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + // Now that selection is complete, there are no more generic vregs. Verify // that the size of the now-constrained vreg is unchanged and that it has a // register class. @@ -165,7 +167,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; if (VRegToType.second.isValid() && - VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) { + VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) { reportGISelFailure(MF, TPC, MORE, "gisel-select", "VReg has explicit size different from class size", *MI); @@ -182,6 +184,9 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; } + auto &TLI = *MF.getSubtarget().getTargetLowering(); + TLI.finalizeLowering(MF); + // FIXME: Should we accurately track changes? return true; } diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index fb9d01ef8542a3efd4646914111ec0f88bec4b0f..4c0b06dffd2162b131922dcced0e06def28bf6c7 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -58,38 +58,22 @@ bool InstructionSelector::constrainSelectedInstRegOperands( MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(), Reg, OpI)); - // Tie uses to defs as indicated in MCInstrDesc. + // Tie uses to defs as indicated in MCInstrDesc if this hasn't already been + // done. if (MO.isUse()) { int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO); - if (DefIdx != -1) + if (DefIdx != -1 && !I.isRegTiedToUseOperand(DefIdx)) I.tieOperands(DefIdx, OpI); } } return true; } -Optional -InstructionSelector::getConstantVRegVal(unsigned VReg, - const MachineRegisterInfo &MRI) const { - MachineInstr *MI = MRI.getVRegDef(VReg); - if (MI->getOpcode() != TargetOpcode::G_CONSTANT) - return None; - - if (MI->getOperand(1).isImm()) - return MI->getOperand(1).getImm(); - - if (MI->getOperand(1).isCImm() && - MI->getOperand(1).getCImm()->getBitWidth() <= 64) - return MI->getOperand(1).getCImm()->getSExtValue(); - - return None; -} - bool InstructionSelector::isOperandImmEqual( const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const { - if (MO.getReg()) + if (MO.isReg() && MO.getReg()) if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI)) return *VRegVal == Value; return false; diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index 657ddb30791952af164493ff81a6a93bebe83993..1b50489deeba9090a1d45f4673c3b6073a65284e 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -15,7 +15,6 @@ #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -24,6 +23,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include + #define DEBUG_TYPE "legalizer" using namespace llvm; @@ -161,7 +162,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // convergence for performance reasons. bool Changed = false; MachineBasicBlock::iterator NextMI; - for (auto &MBB : MF) + for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { // Get the next Instruction before we try to legalize, because there's a // good chance MI will be deleted. @@ -171,18 +172,26 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // and are assumed to be legal. if (!isPreISelGenericOpcode(MI->getOpcode())) continue; + unsigned NumNewInsns = 0; SmallVector WorkList; - Helper.MIRBuilder.recordInsertions( - [&](MachineInstr *MI) { WorkList.push_back(MI); }); + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + // Only legalize pre-isel generic instructions. + // Legalization process could generate Target specific pseudo + // instructions with generic types. Don't record them + if (isPreISelGenericOpcode(MI->getOpcode())) { + ++NumNewInsns; + WorkList.push_back(MI); + } + }); WorkList.push_back(&*MI); + bool Changed = false; LegalizerHelper::LegalizeResult Res; unsigned Idx = 0; do { Res = Helper.legalizeInstrStep(*WorkList[Idx]); // Error out if we couldn't legalize this instruction. We may want to - // fall - // back to DAG ISel instead in the future. + // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { Helper.MIRBuilder.stopRecordingInsertions(); if (Res == LegalizerHelper::UnableToLegalize) { @@ -194,10 +203,21 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { } Changed |= Res == LegalizerHelper::Legalized; ++Idx; + +#ifndef NDEBUG + if (NumNewInsns) { + DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n"); + for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end(); + I != E; ++I) + DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs())); + NumNewInsns = 0; + } +#endif } while (Idx < WorkList.size()); Helper.MIRBuilder.stopRecordingInsertions(); } + } MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -207,7 +227,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // good chance MI will be deleted. NextMI = std::next(MI); - Changed |= combineExtracts(*MI, MRI, TII); + // combineExtracts erases MI. + if (combineExtracts(*MI, MRI, TII)) { + Changed = true; + continue; + } Changed |= combineMerges(*MI, MRI, TII); } } diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 20358f7ee6c2ed99f5cc0890d1a94854b91b573a..1d0d3dffa4c59cba76254764a759ed78e120c079 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -24,7 +24,7 @@ #include -#define DEBUG_TYPE "legalize-mir" +#define DEBUG_TYPE "legalizer" using namespace llvm; @@ -35,24 +35,34 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF) LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { + DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + auto Action = LI.getAction(MI, MRI); switch (std::get<0>(Action)) { case LegalizerInfo::Legal: + DEBUG(dbgs() << ".. Already legal\n"); return AlreadyLegal; case LegalizerInfo::Libcall: + DEBUG(dbgs() << ".. Convert to libcall\n"); return libcall(MI); case LegalizerInfo::NarrowScalar: + DEBUG(dbgs() << ".. Narrow scalar\n"); return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::WidenScalar: + DEBUG(dbgs() << ".. Widen scalar\n"); return widenScalar(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::Lower: + DEBUG(dbgs() << ".. Lower\n"); return lower(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::FewerElements: + DEBUG(dbgs() << ".. Reduce number of elements\n"); return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::Custom: + DEBUG(dbgs() << ".. Custom legalization\n"); return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized : UnableToLegalize; default: + DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; } } @@ -66,6 +76,18 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { + case TargetOpcode::G_SDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SDIV_I32; + case TargetOpcode::G_UDIV: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UDIV_I32; + case TargetOpcode::G_SREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SREM_I32; + case TargetOpcode::G_UREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UREM_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -77,31 +99,57 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +LegalizerHelper::LegalizeResult llvm::replaceWithLibcall( + MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, ArrayRef Args) { + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + const char *Name = TLI.getLibcallName(Libcall); + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); + MIRBuilder.setInstr(MI); + if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), + MachineOperand::CreateES(Name), Result, Args)) + return LegalizerHelper::UnableToLegalize; + + // We're about to remove MI, so move the insert point after it. + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), + std::next(MIRBuilder.getInsertPt())); + + MI.eraseFromParent(); + return LegalizerHelper::Legalized; +} + +static LegalizerHelper::LegalizeResult +simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType) { + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + return replaceWithLibcall(MI, MIRBuilder, Libcall, + {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned Size = Ty.getSizeInBits(); - MIRBuilder.setInstr(MI); + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + Type *HLTy = Type::getInt32Ty(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, HLTy); + } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); - Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); - auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - auto Libcall = getRTLibDesc(MI.getOpcode(), Size); - const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - CLI.lowerCall( - MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), {MI.getOperand(0).getReg(), Ty}, - {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}}); - MI.eraseFromParent(); - return Legalized; + Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, HLTy); } } } @@ -209,17 +257,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector DstRegs; for (int i = 0; i < NumParts; ++i) { unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); + unsigned SrcReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(SrcReg, MI.getOperand(1).getReg(), Offset); // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin()); @@ -235,17 +284,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector SrcRegs; extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(DstReg, MI.getOperand(1).getReg(), Offset); + unsigned DstReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); + // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin()); diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index eaf4056e47eafd24771abd5d107cfaa3c72b95fb..4d45910422967eab528f14d4dadf3382b72be7fd 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -162,7 +162,7 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI, return std::get<0>(getAction(MI, MRI)) == Legal; } -LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect, +Optional LegalizerInfo::findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const { switch(Action) { default: @@ -174,20 +174,20 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect, return Aspect.Type; case NarrowScalar: { return findLegalType(Aspect, - [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); + [](LLT Ty) -> LLT { return Ty.halfScalarSize(); }); } case WidenScalar: { - return findLegalType(Aspect, [&](LLT Ty) -> LLT { + return findLegalType(Aspect, [](LLT Ty) -> LLT { return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize(); }); } case FewerElements: { return findLegalType(Aspect, - [&](LLT Ty) -> LLT { return Ty.halfElements(); }); + [](LLT Ty) -> LLT { return Ty.halfElements(); }); } case MoreElements: { return findLegalType(Aspect, - [&](LLT Ty) -> LLT { return Ty.doubleElements(); }); + [](LLT Ty) -> LLT { return Ty.doubleElements(); }); } } } diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c5d0999fe43889c6839dc0fa7945cb9c9e7f88eb --- /dev/null +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -0,0 +1,123 @@ +//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the Localizer class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "localizer" + +using namespace llvm; + +char Localizer::ID = 0; +INITIALIZE_PASS(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", false, + false) + +Localizer::Localizer() : MachineFunctionPass(ID) { + initializeLocalizerPass(*PassRegistry::getPassRegistry()); +} + +void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } + +bool Localizer::shouldLocalize(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + // Constants-like instructions should be close to their users. + // We don't want long live-ranges for them. + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + return true; + } +} + +bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, + MachineBasicBlock *&InsertMBB) { + MachineInstr &MIUse = *MOUse.getParent(); + InsertMBB = MIUse.getParent(); + if (MIUse.isPHI()) + InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB(); + return InsertMBB == Def.getParent(); +} + +bool Localizer::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); + + init(MF); + + bool Changed = false; + // Keep track of the instructions we localized. + // We won't need to process them if we see them later in the CFG. + SmallPtrSet LocalizedInstrs; + DenseMap, unsigned> MBBWithLocalDef; + // TODO: Do bottom up traversal. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) + continue; + DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + // Don't try to be smart for the insertion point. + // There is no guarantee that the first seen use is the first + // use in the block. + InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = + MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + } + return Changed; +} diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 8d1a263395a0e913d52825e00883e16b6f6ba4e1..79d312fb52ca4373a10da01f79e85e8e39ebd8ed 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -191,6 +191,24 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, .addUse(Op1); } +Optional +MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, uint64_t Value) { + assert(Res == 0 && "Res is a result argument"); + assert(ValueTy.isScalar() && "invalid offset type"); + + if (Value == 0) { + Res = Op0; + return None; + } + + Res = MRI->createGenericVirtualRegister(MRI->getType(Op0)); + unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy); + + buildConstant(TmpReg, Value); + return buildGEP(Res, Op0, TmpReg); +} + MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits) { assert(MRI->getType(Res).isPointer() && @@ -592,7 +610,7 @@ MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res, LLT EltTy = MRI->getType(Elt); LLT IdxTy = MRI->getType(Idx); assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type"); - assert(EltTy.isScalar() && IdxTy.isScalar() && "invalid operand type"); + assert(IdxTy.isScalar() && "invalid operand type"); assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch"); assert(ResTy.getElementType() == EltTy && "type mismatch"); #endif @@ -612,7 +630,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res, LLT ValTy = MRI->getType(Val); LLT IdxTy = MRI->getType(Idx); assert(ValTy.isVector() && "invalid operand type"); - assert(ResTy.isScalar() && IdxTy.isScalar() && "invalid operand type"); + assert((ResTy.isScalar() || ResTy.isPointer()) && "invalid operand type"); + assert(IdxTy.isScalar() && "invalid operand type"); assert(ValTy.getElementType() == ResTy && "type mismatch"); #endif diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index f935390a8d1bd0c8438c4c035d34866ac7b0d195..2eb3cdee694d44dd9ec2fe1aecbd80553eb166eb 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -204,30 +204,28 @@ uint64_t RegBankSelect::getRepairCost( // TODO: use a dedicated constant for ImpossibleCost. if (Cost != UINT_MAX) return Cost; - assert(!TPC->isGlobalISelAbortEnabled() && - "Legalization not available yet"); // Return the legalization cost of that repairing. } - assert(!TPC->isGlobalISelAbortEnabled() && - "Complex repairing not implemented yet"); return UINT_MAX; } -RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( +const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings, SmallVectorImpl &RepairPts) { assert(!PossibleMappings.empty() && "Do not know how to map this instruction"); - RegisterBankInfo::InstructionMapping *BestMapping = nullptr; + const RegisterBankInfo::InstructionMapping *BestMapping = nullptr; MappingCost Cost = MappingCost::ImpossibleCost(); SmallVector LocalRepairPts; - for (RegisterBankInfo::InstructionMapping &CurMapping : PossibleMappings) { - MappingCost CurCost = computeMapping(MI, CurMapping, LocalRepairPts, &Cost); + for (const RegisterBankInfo::InstructionMapping *CurMapping : + PossibleMappings) { + MappingCost CurCost = + computeMapping(MI, *CurMapping, LocalRepairPts, &Cost); if (CurCost < Cost) { DEBUG(dbgs() << "New best: " << CurCost << '\n'); Cost = CurCost; - BestMapping = &CurMapping; + BestMapping = CurMapping; RepairPts.clear(); for (RepairingPlacement &RepairPt : LocalRepairPts) RepairPts.emplace_back(std::move(RepairPt)); @@ -237,7 +235,7 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping( // If none of the mapping worked that means they are all impossible. // Thus, pick the first one and set an impossible repairing point. // It will trigger the failed isel mode. - BestMapping = &(*PossibleMappings.begin()); + BestMapping = *PossibleMappings.begin(); RepairPts.emplace_back( RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible)); } else @@ -450,6 +448,11 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping( // Sums up the repairing cost of MO at each insertion point. uint64_t RepairCost = getRepairCost(MO, ValMapping); + + // This is an impossible to repair cost. + if (RepairCost == UINT_MAX) + continue; + // Bias used for splitting: 5%. const uint64_t PercentageForBias = 5; uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100; @@ -543,10 +546,10 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { // Remember the repairing placement for all the operands. SmallVector RepairPts; - RegisterBankInfo::InstructionMapping BestMapping; + const RegisterBankInfo::InstructionMapping *BestMapping; if (OptMode == RegBankSelect::Mode::Fast) { - BestMapping = RBI->getInstrMapping(MI); - MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts); + BestMapping = &RBI->getInstrMapping(MI); + MappingCost DefaultCost = computeMapping(MI, *BestMapping, RepairPts); (void)DefaultCost; if (DefaultCost == MappingCost::ImpossibleCost()) return false; @@ -555,16 +558,16 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) { RBI->getInstrPossibleMappings(MI); if (PossibleMappings.empty()) return false; - BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts)); + BestMapping = &findBestMapping(MI, PossibleMappings, RepairPts); } // Make sure the mapping is valid for MI. - assert(BestMapping.verify(MI) && "Invalid instruction mapping"); + assert(BestMapping->verify(MI) && "Invalid instruction mapping"); - DEBUG(dbgs() << "Best Mapping: " << BestMapping << '\n'); + DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n'); // After this call, MI may not be valid anymore. // Do not use it. - return applyMapping(MI, BestMapping, RepairPts); + return applyMapping(MI, *BestMapping, RepairPts); } bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 940957d021524582dc06ff66562a786802bb53d8..83b21e63709716ded83fb1cdeaa076aefe02c813 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -48,7 +48,7 @@ bool RegisterBank::verify(const TargetRegisterInfo &TRI) const { // Verify that the Size of the register bank is big enough to cover // all the register classes it covers. - assert((getSize() >= SubRC.getSize() * 8) && + assert(getSize() >= TRI.getRegSizeInBits(SubRC) && "Size is not big enough for all the subclasses!"); assert(covers(SubRC) && "Not all subclasses are covered"); } diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index b2df2f1596769d1bf85fe144f01d55e03346604d..a841902feed119a8675cb615f835fb57d245c3a0 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -45,6 +45,10 @@ STATISTIC(NumOperandsMappingsCreated, "Number of operands mappings dynamically created"); STATISTIC(NumOperandsMappingsAccessed, "Number of operands mappings dynamically accessed"); +STATISTIC(NumInstructionMappingsCreated, + "Number of instruction mappings dynamically created"); +STATISTIC(NumInstructionMappingsAccessed, + "Number of instruction mappings dynamically accessed"); const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX; const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1; @@ -137,7 +141,7 @@ static bool isCopyLike(const MachineInstr &MI) { MI.getOpcode() == TargetOpcode::REG_SEQUENCE; } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // For copies we want to walk over the operands and try to find one // that has a register bank since the instruction itself will not get @@ -147,9 +151,6 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // is important. The rest is not constrained. unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); - RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1, - /*OperandsMapping*/ nullptr, - NumOperandsForMapping); const MachineFunction &MF = *MI.getParent()->getParent(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); @@ -190,7 +191,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { if (!IsCopyLike) // MI does not carry enough information to guess the mapping. - return InstructionMapping(); + return getInvalidInstructionMapping(); continue; } } @@ -206,11 +207,13 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { if (IsCopyLike && !CompleteMapping) // No way to deduce the type from what we have. - return InstructionMapping(); + return getInvalidInstructionMapping(); assert(CompleteMapping && "Setting an uncomplete mapping"); - Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping)); - return Mapping; + return getInstructionMapping( + DefaultMappingID, /*Cost*/ 1, + /*OperandsMapping*/ getOperandsMapping(OperandsMapping), + NumOperandsForMapping); } /// Hashing function for PartialMapping. @@ -320,9 +323,44 @@ const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping( return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end()); } -RegisterBankInfo::InstructionMapping +static hash_code +hashInstructionMapping(unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) { + return hash_combine(ID, Cost, OperandsMapping, NumOperands); +} + +const RegisterBankInfo::InstructionMapping & +RegisterBankInfo::getInstructionMappingImpl( + bool IsInvalid, unsigned ID, unsigned Cost, + const RegisterBankInfo::ValueMapping *OperandsMapping, + unsigned NumOperands) const { + assert(((IsInvalid && ID == InvalidMappingID && Cost == 0 && + OperandsMapping == nullptr && NumOperands == 0) || + !IsInvalid) && + "Mismatch argument for invalid input"); + ++NumInstructionMappingsAccessed; + + hash_code Hash = + hashInstructionMapping(ID, Cost, OperandsMapping, NumOperands); + const auto &It = MapOfInstructionMappings.find(Hash); + if (It != MapOfInstructionMappings.end()) + return *It->second; + + ++NumInstructionMappingsCreated; + + auto &InstrMapping = MapOfInstructionMappings[Hash]; + if (IsInvalid) + InstrMapping = llvm::make_unique(); + else + InstrMapping = llvm::make_unique( + ID, Cost, OperandsMapping, NumOperands); + return *InstrMapping; +} + +const RegisterBankInfo::InstructionMapping & RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; llvm_unreachable("The target must implement this"); @@ -332,14 +370,14 @@ RegisterBankInfo::InstructionMappings RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const { InstructionMappings PossibleMappings; // Put the default mapping first. - PossibleMappings.push_back(getInstrMapping(MI)); + PossibleMappings.push_back(&getInstrMapping(MI)); // Then the alternative mapping, if any. InstructionMappings AltMappings = getInstrAlternativeMappings(MI); - for (InstructionMapping &AltMapping : AltMappings) - PossibleMappings.emplace_back(std::move(AltMapping)); + for (const InstructionMapping *AltMapping : AltMappings) + PossibleMappings.push_back(AltMapping); #ifndef NDEBUG - for (const InstructionMapping &Mapping : PossibleMappings) - assert(Mapping.verify(MI) && "Mapping is invalid"); + for (const InstructionMapping *Mapping : PossibleMappings) + assert(Mapping->verify(MI) && "Mapping is invalid"); #endif return PossibleMappings; } @@ -421,7 +459,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, RC = MRI.getRegClass(Reg); } assert(RC && "Unable to deduce the register class"); - return RC->getSize() * 8; + return TRI.getRegSizeInBits(*RC); } //------------------------------------------------------------------------------ diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 606a59680a3d4a618caedf55077da371be44df55..254bdf10d804f345410a9328111cf1f3aa890c0d 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -93,3 +94,27 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC, R << Msg << ": " << ore::MNV("Inst", MI); reportGISelFailure(MF, TPC, MORE, R); } + +Optional llvm::getConstantVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (MI->getOpcode() != TargetOpcode::G_CONSTANT) + return None; + + if (MI->getOperand(1).isImm()) + return MI->getOperand(1).getImm(); + + if (MI->getOperand(1).isCImm() && + MI->getOperand(1).getCImm()->getBitWidth() <= 64) + return MI->getOperand(1).getCImm()->getSExtValue(); + + return None; +} + +const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg, + const MachineRegisterInfo &MRI) { + MachineInstr *MI = MRI.getVRegDef(VReg); + if (TargetOpcode::G_FCONSTANT != MI->getOpcode()) + return nullptr; + return MI->getOperand(1).getFPImm(); +} diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 1ea5349399486e0221f150459c04ac457ba4b33b..c6ca49ce24d7323c99c9ba1c3eb69aa23b487e72 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -192,10 +192,7 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", - false, false) -INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", - false, false) +INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false) bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { @@ -556,7 +553,12 @@ bool GlobalMerge::doInitialization(Module &M) { // Grab all non-const globals. for (auto &GV : M.globals()) { // Merge is safe for "normal" internal or external globals only - if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection()) + if (GV.isDeclaration() || GV.isThreadLocal() || + GV.hasSection() || GV.hasImplicitSection()) + continue; + + // It's not safe to merge globals that may be preempted + if (TM && !TM->shouldAssumeDSOLocal(M, &GV)) continue; if (!(MergeExternalGlobals && GV.hasExternalLinkage()) && diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 37fe41582333dfd0bd036f0de00062a38f5e4845..c98c9b68ac0e42a9ed2f7b7e4a605859c9842b07 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" @@ -25,6 +24,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -39,7 +39,7 @@ using namespace llvm; -#define DEBUG_TYPE "ifcvt" +#define DEBUG_TYPE "if-converter" // Hidden options for help debugging. static cl::opt IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); @@ -316,9 +316,9 @@ namespace { char &llvm::IfConverterID = IfConverter::ID; -INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) @@ -1318,7 +1318,8 @@ static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) { return false; PI = I++; } - return true; + // Finally see if the last I is indeed a successor to PI. + return PI->isSuccessor(&*I); } /// Invalidate predecessor BB info so it would be re-analyzed to determine if it diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 920c2a372a9b8d3416ef1f48f5bc3a177ec27594..b831ddfa601a67dddaf1dec86bb5f876bf2448de 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -31,21 +31,21 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/FaultMaps.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -607,8 +607,20 @@ MachineInstr *ImplicitNullChecks::insertFaultingInstr( .addMBB(HandlerMBB) .addImm(MI->getOpcode()); - for (auto &MO : MI->uses()) - MIB.add(MO); + for (auto &MO : MI->uses()) { + if (MO.isReg()) { + MachineOperand NewMO = MO; + if (MO.isUse()) { + NewMO.setIsKill(false); + } else { + assert(MO.isDef() && "Expected def or use"); + NewMO.setIsDead(false); + } + MIB.add(NewMO); + } else { + MIB.add(MO); + } + } MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); @@ -674,8 +686,8 @@ void ImplicitNullChecks::rewriteNullChecks( char ImplicitNullChecks::ID = 0; char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID; -INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks", +INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks", +INITIALIZE_PASS_END(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index a1cb0a0695bfa85a75934546a3a4873ded3d35ad..4e6a3ec218666b2bddb7dfaf4d9f049bfd44bbf7 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -857,21 +857,46 @@ void InlineSpiller::insertReload(unsigned NewVReg, ++NumReloads; } +/// Check if \p Def fully defines a VReg with an undefined value. +/// If that's the case, that means the value of VReg is actually +/// not relevant. +static bool isFullUndefDef(const MachineInstr &Def) { + if (!Def.isImplicitDef()) + return false; + assert(Def.getNumOperands() == 1 && + "Implicit def with more than one definition"); + // We can say that the VReg defined by Def is undef, only if it is + // fully defined by Def. Otherwise, some of the lanes may not be + // undef and the value of the VReg matters. + return !Def.getOperand(0).getSubReg(); +} + /// insertSpill - Insert a spill of NewVReg after MI. void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); MachineInstrSpan MIS(MI); - TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, - MRI.getRegClass(NewVReg), &TRI); + bool IsRealSpill = true; + if (isFullUndefDef(*MI)) { + // Don't spill undef value. + // Anything works for undef, in particular keeping the memory + // uninitialized is a viable option and it saves code size and + // run time. + BuildMI(MBB, std::next(MI), MI->getDebugLoc(), TII.get(TargetOpcode::KILL)) + .addReg(NewVReg, getKillRegState(isKill)); + IsRealSpill = false; + } else + TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, "spill")); ++NumSpills; - HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); + if (IsRealSpill) + HSpiller.addToMergeableSpills(*std::next(MI), StackSlot, Original); } /// spillAroundUses - insert spill code around each use of Reg. @@ -888,20 +913,10 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - bool IsIndirect = MI->isIndirectDebugValue(); - uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; - const MDNode *Var = MI->getDebugVariable(); - const MDNode *Expr = MI->getDebugExpression(); - DebugLoc DL = MI->getDebugLoc(); - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); - assert(cast(Var)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(StackSlot) - .addImm(Offset) - .addMetadata(Var) - .addMetadata(Expr); + DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); + buildDbgValueForSpill(*MBB, MI, *MI, StackSlot); + MBB->erase(MI); continue; } diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp index ec35b3f6449e168f319922b99b4d19ac428a17b1..ee4929c91482cf3c88edd6a485163c8138a464e9 100644 --- a/lib/CodeGen/InterleavedAccessPass.cpp +++ b/lib/CodeGen/InterleavedAccessPass.cpp @@ -45,6 +45,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" @@ -68,8 +69,7 @@ class InterleavedAccess : public FunctionPass { public: static char ID; - InterleavedAccess(const TargetMachine *TM = nullptr) - : FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) { + InterleavedAccess() : FunctionPass(ID), DT(nullptr), TLI(nullptr) { initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); } @@ -84,7 +84,6 @@ public: private: DominatorTree *DT; - const TargetMachine *TM; const TargetLowering *TLI; /// The maximum supported interleave factor. @@ -108,18 +107,16 @@ private: } // end anonymous namespace. char InterleavedAccess::ID = 0; -INITIALIZE_TM_PASS_BEGIN( - InterleavedAccess, "interleaved-access", +INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_TM_PASS_END( - InterleavedAccess, "interleaved-access", +INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) -FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) { - return new InterleavedAccess(TM); +FunctionPass *llvm::createInterleavedAccessPass() { + return new InterleavedAccess(); } /// \brief Check if the mask is a DE-interleave mask of the given factor @@ -426,13 +423,15 @@ bool InterleavedAccess::lowerInterleavedStore( } bool InterleavedAccess::runOnFunction(Function &F) { - if (!TM || !LowerInterleavedAccesses) + auto *TPC = getAnalysisIfAvailable(); + if (!TPC || !LowerInterleavedAccesses) return false; DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); DT = &getAnalysis().getDomTree(); - TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + auto &TM = TPC->getTM(); + TLI = TM.getSubtargetImpl(F)->getTargetLowering(); MaxFactor = TLI->getMaxSupportedInterleaveFactor(); // Holds dead instructions that will be erased later. diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index 86d3624a9d6e050570c2cf25c54c92c60d19fbe3..07ea9dcaea7aef2422a6b29014492c7755d841c5 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG MIRParser GlobalISel type = Library name = CodeGen parent = Libraries -required_libraries = Analysis BitReader BitWriter Core MC Scalar Support Target TransformUtils +required_libraries = Analysis BitReader BitWriter Core MC ProfileData Scalar Support Target TransformUtils diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 7b1706f0f4ba9336e181639c45657ce2a9385409..f2defb4fd6234ffa5c9d56efede5cb59c5fd0de9 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetMachine.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/BasicTTIImpl.h" @@ -31,21 +30,11 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; -// Enable or disable FastISel. Both options are needed, because -// FastISel is enabled by default with -fast, and we wish to be -// able to enable or disable fast-isel independently from -O0. -static cl::opt -EnableFastISelOption("fast-isel", cl::Hidden, - cl::desc("Enable the \"fast\" instruction selector")); - -static cl::opt - EnableGlobalISel("global-isel", cl::Hidden, - cl::desc("Enable the \"global\" instruction selector")); - void LLVMTargetMachine::initAsmInfo() { MRI = TheTarget.createMCRegInfo(getTargetTriple().str()); MII = TheTarget.createMCInstrInfo(); @@ -71,8 +60,7 @@ void LLVMTargetMachine::initAsmInfo() { TmpAsmInfo->setPreserveAsmComments(Options.MCOptions.PreserveAsmComments); - if (Options.CompressDebugSections) - TmpAsmInfo->setCompressDebugSections(DebugCompressionType::DCT_ZlibGnu); + TmpAsmInfo->setCompressDebugSections(Options.CompressDebugSections); TmpAsmInfo->setRelaxELFRelocations(Options.RelaxELFRelocations); @@ -106,112 +94,31 @@ static MCContext * addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, AnalysisID StopBefore, - AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer = nullptr) { - - // When in emulated TLS mode, add the LowerEmuTLS pass. - if (TM->Options.EmulatedTLS) - PM.add(createLowerEmuTLSPass(TM)); - - PM.add(createPreISelIntrinsicLoweringPass()); - - // Add internal analysis passes from the target machine. - PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); - + AnalysisID StopAfter) { // Targets may override createPassConfig to provide a target-specific // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore, StopAfter); - // Set PassConfig options provided by TargetMachine. PassConfig->setDisableVerify(DisableVerify); - PM.add(PassConfig); - - PassConfig->addIRPasses(); - - PassConfig->addCodeGenPrepare(); - - PassConfig->addPassesToHandleExceptions(); - - PassConfig->addISelPrepare(); - MachineModuleInfo *MMI = new MachineModuleInfo(TM); - MMI->setMachineFunctionInitializer(MFInitializer); PM.add(MMI); - // Enable FastISel with -fast, but allow that to be overridden. - TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); - if (EnableFastISelOption == cl::BOU_TRUE || - (TM->getOptLevel() == CodeGenOpt::None && - TM->getO0WantsFastISel())) - TM->setFastISel(true); - - // Ask the target for an isel. - // Enable GlobalISel if the target wants to, but allow that to be overriden. - if (EnableGlobalISel == cl::BOU_TRUE || (EnableGlobalISel == cl::BOU_UNSET && - PassConfig->isGlobalISelEnabled())) { - if (PassConfig->addIRTranslator()) - return nullptr; - - PassConfig->addPreLegalizeMachineIR(); - - if (PassConfig->addLegalizeMachineIR()) - return nullptr; - - // Before running the register bank selector, ask the target if it - // wants to run some passes. - PassConfig->addPreRegBankSelect(); - - if (PassConfig->addRegBankSelect()) - return nullptr; - - PassConfig->addPreGlobalInstructionSelect(); - - if (PassConfig->addGlobalInstructionSelect()) - return nullptr; - - // Pass to reset the MachineFunction if the ISel failed. - PM.add(createResetMachineFunctionPass( - PassConfig->reportDiagnosticWhenGlobalISelFallback(), - PassConfig->isGlobalISelAbortEnabled())); - - // Provide a fallback path when we do not want to abort on - // not-yet-supported input. - if (!PassConfig->isGlobalISelAbortEnabled() && - PassConfig->addInstSelector()) - return nullptr; - - } else if (PassConfig->addInstSelector()) + if (PassConfig->addISelPasses()) return nullptr; - PassConfig->addMachinePasses(); - PassConfig->setInitialized(); return &MMI->getContext(); } -bool LLVMTargetMachine::addPassesToEmitFile( - PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, - bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, - AnalysisID StopBefore, AnalysisID StopAfter, - MachineFunctionInitializer *MFInitializer) { - // Add common CodeGen passes. - MCContext *Context = - addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, - StopBefore, StopAfter, MFInitializer); - if (!Context) - return true; - - if (StopBefore || StopAfter) { - PM.add(createPrintMIRPass(Out)); - return false; - } - +bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, + raw_pwrite_stream &Out, CodeGenFileType FileType, + MCContext &Context) { if (Options.MCOptions.MCSaveTempLabels) - Context->setAllowTemporaryLabels(false); + Context.setAllowTemporaryLabels(false); const MCSubtargetInfo &STI = *getMCSubtargetInfo(); const MCAsmInfo &MAI = *getMCAsmInfo(); @@ -228,14 +135,14 @@ bool LLVMTargetMachine::addPassesToEmitFile( // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = nullptr; if (Options.MCOptions.ShowMCEncoding) - MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, Options.MCOptions); auto FOut = llvm::make_unique(Out); MCStreamer *S = getTarget().createAsmStreamer( - *Context, std::move(FOut), Options.MCOptions.AsmVerbose, + Context, std::move(FOut), Options.MCOptions.AsmVerbose, Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); @@ -244,7 +151,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU, Options.MCOptions); @@ -252,11 +159,11 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; // Don't waste memory on names of temp labels. - Context->setUseNamesOnTempLabels(false); + Context.setUseNamesOnTempLabels(false); Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( - T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); break; @@ -264,7 +171,7 @@ bool LLVMTargetMachine::addPassesToEmitFile( case CGFT_Null: // The Null output is intended for use for performance analysis and testing, // not real users. - AsmStreamer.reset(getTarget().createNullStreamer(*Context)); + AsmStreamer.reset(getTarget().createNullStreamer(Context)); break; } @@ -275,8 +182,28 @@ bool LLVMTargetMachine::addPassesToEmitFile( return true; PM.add(Printer); - PM.add(createFreeMachineFunctionPass()); + return false; +} +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopBefore, AnalysisID StopAfter) { + // Add common CodeGen passes. + MCContext *Context = + addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, + StopBefore, StopAfter); + if (!Context) + return true; + + if (StopBefore || StopAfter) { + PM.add(createPrintMIRPass(Out)); + } else { + if (addAsmPrinter(PM, Out, FileType, *Context)) + return true; + } + + PM.add(createFreeMachineFunctionPass()); return false; } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 275d84e2c185ff17e6d421f5b2619e2feecf8095..995c58a6356498f045d868077d505074d91cfe5f 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -86,8 +86,9 @@ void LexicalScopes::extractLexicalScopes( continue; } - // Ignore DBG_VALUE. It does not contribute to any instruction in output. - if (MInsn.isDebugValue()) + // Ignore DBG_VALUE and similar instruction that do not contribute to any + // instruction in the output. + if (MInsn.isMetaInstruction()) continue; if (RangeBeginMI) { diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index f956974b1aafee9cc011aed41b21535d2dac05a5..b5e705f6455dff6a165e52307c0c11cea2ce31e0 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -43,7 +43,7 @@ using namespace llvm; -#define DEBUG_TYPE "live-debug-values" +#define DEBUG_TYPE "livedebugvalues" STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); @@ -283,7 +283,7 @@ public: char LiveDebugValues::ID = 0; char &llvm::LiveDebugValuesID = LiveDebugValues::ID; -INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis", +INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false, false) /// Default construct and initialize the pass. diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index bcf7c8e99c7ff7f5ee5b5156c546a11a0b7b73bc..bbd783367c9e874afb33c11f356c51046172b428 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -45,7 +45,7 @@ using namespace llvm; -#define DEBUG_TYPE "livedebug" +#define DEBUG_TYPE "livedebugvars" static cl::opt EnableLDV("live-debug-variables", cl::init(true), @@ -54,11 +54,11 @@ EnableLDV("live-debug-variables", cl::init(true), STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); char LiveDebugVariables::ID = 0; -INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars", +INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars", +INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3f5b8e19d1f0cb7f710562e1a80b13f4186bdb37..471dcea4bb3903c588e6099b283fae72092c5697 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1,4 +1,4 @@ -//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===// // // The LLVM Compiler Infrastructure // @@ -16,26 +16,43 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "LiveRangeCalc.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include -#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "regalloc" @@ -59,11 +76,13 @@ static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG namespace llvm { + cl::opt UseSegmentSetForPhysRegs( "use-segment-set-for-physregs", cl::Hidden, cl::init(true), cl::desc( "Use segment set for the computation of the live ranges of physregs.")); -} + +} // end namespace llvm void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -78,8 +97,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), - DomTree(nullptr), LRCalc(nullptr) { +LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); } @@ -168,12 +186,10 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? - llvm::huge_valf : 0.0F; + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } - /// Compute the live interval of a virtual register, based on defs and uses. void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); @@ -337,7 +353,7 @@ static void createSegmentsForValues(LiveRange &LR, } } -typedef SmallVector, 16> ShrinkToUsesWorkList; +using ShrinkToUsesWorkList = SmallVector, 16>; static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, ShrinkToUsesWorkList &WorkList, @@ -593,7 +609,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, // Find all blocks that are reachable from KillMBB without leaving VNI's live // range. It is possible that KillMBB itself is reachable, so start a DFS // from each successor. - typedef df_iterator_default_set VisitedTy; + using VisitedTy = df_iterator_default_set; VisitedTy Visited; for (MachineBasicBlock *Succ : KillMBB->successors()) { for (df_ext_iterator @@ -822,7 +838,6 @@ LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) { return S; } - //===----------------------------------------------------------------------===// // Register mask functions //===----------------------------------------------------------------------===// @@ -855,7 +870,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, return false; bool Found = false; - for (;;) { + while (true) { assert(*SlotI >= LiveI->start); // Loop over all slots overlapping this segment. while (*SlotI < LiveI->end) { diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index b4aa0dc326a58452343089924d7381a9ce68ac73..b3248e53d0a5a0f0564efa9806dae91500160e49 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SparseBitVector.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp index 9f7d7cf54848077557d368cc3a3c99025e0dd0c9..cde6ccd29dfd8b5ac431854cc301212f1be92dc5 100644 --- a/lib/CodeGen/LivePhysRegs.cpp +++ b/lib/CodeGen/LivePhysRegs.cpp @@ -53,7 +53,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { continue; removeReg(Reg); } else if (O->isRegMask()) - removeRegsInMask(*O, nullptr); + removeRegsInMask(*O); } // Add uses to the set. @@ -142,66 +142,84 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI, /// Add live-in registers of basic block \p MBB to \p LiveRegs. void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) { - MCSubRegIndexIterator S(LI.PhysReg, TRI); - if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) { - addReg(LI.PhysReg); + unsigned Reg = LI.PhysReg; + LaneBitmask Mask = LI.LaneMask; + MCSubRegIndexIterator S(Reg, TRI); + assert(Mask.any() && "Invalid livein mask"); + if (Mask.all() || !S.isValid()) { + addReg(Reg); continue; } for (; S.isValid(); ++S) { unsigned SI = S.getSubRegIndex(); - if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any()) + if ((Mask & TRI->getSubRegIndexLaneMask(SI)).any()) addReg(S.getSubReg()); } } } -/// Add pristine registers to the given \p LiveRegs. This function removes -/// actually saved callee save registers when \p InPrologueEpilogue is false. -static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, - const MachineFrameInfo &MFI, - const TargetRegisterInfo &TRI) { +/// Adds all callee saved registers to \p LiveRegs. +static void addCalleeSavedRegs(LivePhysRegs &LiveRegs, + const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; - ++CSR) + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) LiveRegs.addReg(*CSR); +} + +/// Adds pristine registers to the given \p LiveRegs. Pristine registers are +/// callee saved registers that are unused in the function. +static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + /// Add all callee saved regs, then remove the ones that are saved+restored. + addCalleeSavedRegs(LiveRegs, MF); + /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); } void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addBlockLiveIns(*Succ); + if (!MBB.succ_empty()) { + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*Succ); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers that are saved and + // restored (somewhere); This does not include callee saved registers that + // are unused and hence not saved and restored; they are called pristine. + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) { + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + addReg(Info.getReg()); + } + } } void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - if (MBB.isReturnBlock()) { - // The return block has no successors whose live-ins we could merge - // below. So instead we add the callee saved registers manually. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I) - addReg(*I); - } else { - addPristines(*this, MF, MFI, *TRI); - } + if (!MBB.succ_empty()) { + addPristines(*this, MF); + addLiveOutsNoPristines(MBB); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) + addCalleeSavedRegs(*this, MF); } - - addLiveOutsNoPristines(MBB); } void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) - addPristines(*this, MF, MFI, *TRI); + addPristines(*this, MF); addBlockLiveIns(MBB); } -void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, +void llvm::computeLiveIns(LivePhysRegs &LiveRegs, + const MachineRegisterInfo &MRI, MachineBasicBlock &MBB) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); assert(MBB.livein_empty()); LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); @@ -209,10 +227,12 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, LiveRegs.stepBackward(MI); for (unsigned Reg : LiveRegs) { + if (MRI.isReserved(Reg)) + continue; // Skip the register if we are about to add one of its super registers. bool ContainsSuperReg = false; for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) { - if (LiveRegs.contains(*SReg)) { + if (LiveRegs.contains(*SReg) && !MRI.isReserved(*SReg)) { ContainsSuperReg = true; break; } diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp new file mode 100644 index 0000000000000000000000000000000000000000..552f4b5393fef3bdc58cd31a6e314f2bc878c5a1 --- /dev/null +++ b/lib/CodeGen/LiveRangeShrink.cpp @@ -0,0 +1,231 @@ +//===-- LiveRangeShrink.cpp - Move instructions to shrink live range ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +///===---------------------------------------------------------------------===// +/// +/// \file +/// This pass moves instructions close to the definition of its operands to +/// shrink live range of the def instruction. The code motion is limited within +/// the basic block. The moved instruction should have 1 def, and more than one +/// uses, all of which are the only use of the def. +/// +///===---------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "lrshrink" + +STATISTIC(NumInstrsHoistedToShrinkLiveRange, + "Number of insructions hoisted to shrink live range."); + +using namespace llvm; + +namespace { +class LiveRangeShrink : public MachineFunctionPass { +public: + static char ID; + + LiveRangeShrink() : MachineFunctionPass(ID) { + initializeLiveRangeShrinkPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Live Range Shrink"; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char LiveRangeShrink::ID = 0; +char &llvm::LiveRangeShrinkID = LiveRangeShrink::ID; + +INITIALIZE_PASS(LiveRangeShrink, "lrshrink", "Live Range Shrink Pass", false, + false) +namespace { +typedef DenseMap InstOrderMap; + +/// Returns \p New if it's dominated by \p Old, otherwise return \p Old. +/// \p M maintains a map from instruction to its dominating order that satisfies +/// M[A] > M[B] guarantees that A is dominated by B. +/// If \p New is not in \p M, return \p Old. Otherwise if \p Old is null, return +/// \p New. +MachineInstr *FindDominatedInstruction(MachineInstr &New, MachineInstr *Old, + const InstOrderMap &M) { + auto NewIter = M.find(&New); + if (NewIter == M.end()) + return Old; + if (Old == nullptr) + return &New; + unsigned OrderOld = M.find(Old)->second; + unsigned OrderNew = NewIter->second; + if (OrderOld != OrderNew) + return OrderOld < OrderNew ? &New : Old; + // OrderOld == OrderNew, we need to iterate down from Old to see if it + // can reach New, if yes, New is dominated by Old. + for (MachineInstr *I = Old->getNextNode(); M.find(I)->second == OrderNew; + I = I->getNextNode()) + if (I == &New) + return &New; + return Old; +} + +/// Builds Instruction to its dominating order number map \p M by traversing +/// from instruction \p Start. +void BuildInstOrderMap(MachineBasicBlock::iterator Start, InstOrderMap &M) { + M.clear(); + unsigned i = 0; + for (MachineInstr &I : make_range(Start, Start->getParent()->end())) + M[&I] = i++; +} +} // end anonymous namespace + +bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + InstOrderMap IOM; + // Map from register to instruction order (value of IOM) where the + // register is used last. When moving instructions up, we need to + // make sure all its defs (including dead def) will not cross its + // last use when moving up. + DenseMap> UseMap; + + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; + bool SawStore = false; + BuildInstOrderMap(MBB.begin(), IOM); + UseMap.clear(); + + for (MachineBasicBlock::iterator Next = MBB.begin(); Next != MBB.end();) { + MachineInstr &MI = *Next; + ++Next; + if (MI.isPHI() || MI.isDebugValue()) + continue; + if (MI.mayStore()) + SawStore = true; + + unsigned CurrentOrder = IOM[&MI]; + unsigned Barrier = 0; + MachineInstr *BarrierMI = nullptr; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isDebug()) + continue; + if (MO.isUse()) + UseMap[MO.getReg()] = std::make_pair(CurrentOrder, &MI); + else if (MO.isDead() && UseMap.count(MO.getReg())) + // Barrier is the last instruction where MO get used. MI should not + // be moved above Barrier. + if (Barrier < UseMap[MO.getReg()].first) { + Barrier = UseMap[MO.getReg()].first; + BarrierMI = UseMap[MO.getReg()].second; + } + } + + if (!MI.isSafeToMove(nullptr, SawStore)) { + // If MI has side effects, it should become a barrier for code motion. + // IOM is rebuild from the next instruction to prevent later + // instructions from being moved before this MI. + if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { + BuildInstOrderMap(Next, IOM); + SawStore = false; + } + continue; + } + + const MachineOperand *DefMO = nullptr; + MachineInstr *Insert = nullptr; + + // Number of live-ranges that will be shortened. We do not count + // live-ranges that are defined by a COPY as it could be coalesced later. + unsigned NumEligibleUse = 0; + + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || MO.isDead() || MO.isDebug()) + continue; + unsigned Reg = MO.getReg(); + // Do not move the instruction if it def/uses a physical register, + // unless it is a constant physical register or a noreg. + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + if (!Reg || MRI.isConstantPhysReg(Reg)) + continue; + Insert = nullptr; + break; + } + if (MO.isDef()) { + // Do not move if there is more than one def. + if (DefMO) { + Insert = nullptr; + break; + } + DefMO = &MO; + } else if (MRI.hasOneNonDBGUse(Reg) && MRI.hasOneDef(Reg) && DefMO && + MRI.getRegClass(DefMO->getReg()) == + MRI.getRegClass(MO.getReg())) { + // The heuristic does not handle different register classes yet + // (registers of different sizes, looser/tighter constraints). This + // is because it needs more accurate model to handle register + // pressure correctly. + MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); + if (!DefInstr.isCopy()) + NumEligibleUse++; + Insert = FindDominatedInstruction(DefInstr, Insert, IOM); + } else { + Insert = nullptr; + break; + } + } + + // If Barrier equals IOM[I], traverse forward to find if BarrierMI is + // after Insert, if yes, then we should not hoist. + for (MachineInstr *I = Insert; I && IOM[I] == Barrier; + I = I->getNextNode()) + if (I == BarrierMI) { + Insert = nullptr; + break; + } + // Move the instruction when # of shrunk live range > 1. + if (DefMO && Insert && NumEligibleUse > 1 && Barrier <= IOM[Insert]) { + MachineBasicBlock::iterator I = std::next(Insert->getIterator()); + // Skip all the PHI and debug instructions. + while (I != MBB.end() && (I->isPHI() || I->isDebugValue())) + I = std::next(I); + if (I == MI.getIterator()) + continue; + + // Update the dominator order to be the same as the insertion point. + // We do this to maintain a non-decreasing order without need to update + // all instruction orders after the insertion point. + unsigned NewOrder = IOM[&*I]; + IOM[&MI] = NewOrder; + NumInstrsHoistedToShrinkLiveRange++; + + // Find MI's debug value following MI. + MachineBasicBlock::iterator EndIter = std::next(MI.getIterator()); + if (MI.getOperand(0).isReg()) + for (; EndIter != MBB.end() && EndIter->isDebugValue() && + EndIter->getOperand(0).isReg() && + EndIter->getOperand(0).getReg() == MI.getOperand(0).getReg(); + ++EndIter, ++Next) + IOM[&*EndIter] = NewOrder; + MBB.splice(I, &MBB, MI.getIterator(), EndIter); + } + } + } + return false; +} diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 882de1a3fad966d50e2ab49d68df480a73a9cedb..60033db38ee44561e868f19846859d41fa7058b1 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/LiveRegMatrix.h" #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveRegMatrix.h" -#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Pass.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index dff555f49565e9071e9e2a1d50b1d59b2d1dd3f7..3746b74e0528abb678aae62cab57f28a73e98342 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveRegUnits.h" + #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -81,46 +83,50 @@ void LiveRegUnits::accumulateBackward(const MachineInstr &MI) { } /// Add live-in registers of basic block \p MBB to \p LiveUnits. -static void addLiveIns(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) { +static void addBlockLiveIns(LiveRegUnits &LiveUnits, + const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask); } -static void addLiveOuts(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) { - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addLiveIns(LiveUnits, *Succ); +/// Adds all callee saved registers to \p LiveUnits. +static void addCalleeSavedRegs(LiveRegUnits &LiveUnits, + const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) + LiveUnits.addReg(*CSR); } -/// Add pristine registers to the given \p LiveUnits. This function removes -/// actually saved callee save registers when \p InPrologueEpilogue is false. -static void removeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF, - const MachineFrameInfo &MFI, - const TargetRegisterInfo &TRI) { +/// Adds pristine registers to the given \p LiveUnits. Pristine registers are +/// callee saved registers that are unused in the function. +static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + /// Add all callee saved regs, then remove the ones that are saved+restored. + addCalleeSavedRegs(LiveUnits, MF); + /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveUnits.removeReg(Info.getReg()); } void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) - addReg(*I); - if (!MBB.isReturnBlock()) - removeSavedRegs(*this, MF, MFI, *TRI); + if (!MBB.succ_empty()) { + addPristines(*this, MF); + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*this, *Succ); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) + addCalleeSavedRegs(*this, MF); } - ::addLiveOuts(*this, MBB); } void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) - addReg(*I); - if (&MBB != &MF.front()) - removeSavedRegs(*this, MF, MFI, *TRI); - } - ::addLiveIns(*this, MBB); + addPristines(*this, MF); + addBlockLiveIns(*this, MBB); } diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index dbf1f96102d14762e6269cf0159764aeff3e15e4..b51f8b0aa6bb7168d22023d566ed7afadc1a15bc 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,10 +25,10 @@ using namespace llvm; #define DEBUG_TYPE "livestacks" char LiveStacks::ID = 0; -INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", +INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_END(LiveStacks, "livestacks", +INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) char &llvm::LiveStacksID = LiveStacks::ID; diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 3568b0294ad9a576e0ef65301df1931220c45835..a9aec926115aa5c14a4a61cca79a3dbcd09f74cb 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -767,7 +767,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *SuccBB) { const unsigned NumNew = BB->getNumber(); - SmallSet Defs, Kills; + DenseSet Defs, Kills; MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); for (; BBI != BBE && BBI->isPHI(); ++BBI) { diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index e189fb0dd89d819ab69df37f92673023cc73f26a..b109f1922a3ec1eb86721f2d92acf812a8ba14c6 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -23,6 +22,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -103,10 +103,10 @@ namespace { char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; -INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc", +INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", +INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) diff --git a/lib/CodeGen/LowLevelType.cpp b/lib/CodeGen/LowLevelType.cpp index c4b9068fa905ab29bc2d1a02381367d2f1ad1a90..1c682e72fa49145aaf795fb67f0b271683d6d9f0 100644 --- a/lib/CodeGen/LowLevelType.cpp +++ b/lib/CodeGen/LowLevelType.cpp @@ -21,10 +21,10 @@ using namespace llvm; LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) { if (auto VTy = dyn_cast(&Ty)) { auto NumElements = VTy->getNumElements(); - auto ScalarSizeInBits = VTy->getElementType()->getPrimitiveSizeInBits(); + LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL); if (NumElements == 1) - return LLT::scalar(ScalarSizeInBits); - return LLT::vector(NumElements, ScalarSizeInBits); + return ScalarTy; + return LLT::vector(NumElements, ScalarTy); } else if (auto PTy = dyn_cast(&Ty)) { return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty)); } else if (Ty.isSized()) { diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp index 6966c8ca4a5f8c36cab63c2d0fd112f916c0b057..0fc48d4e0b6bc539e4437707a2ff36f18fc4b296 100644 --- a/lib/CodeGen/LowerEmuTLS.cpp +++ b/lib/CodeGen/LowerEmuTLS.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" @@ -28,14 +29,12 @@ using namespace llvm; namespace { class LowerEmuTLS : public ModulePass { - const TargetMachine *TM; public: static char ID; // Pass identification, replacement for typeid - explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { } - explicit LowerEmuTLS(const TargetMachine *TM) - : ModulePass(ID), TM(TM) { + LowerEmuTLS() : ModulePass(ID) { initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry()); } + bool runOnModule(Module &M) override; private: bool addEmuTlsVar(Module &M, const GlobalVariable *GV); @@ -54,19 +53,22 @@ private: char LowerEmuTLS::ID = 0; -INITIALIZE_PASS(LowerEmuTLS, "loweremutls", - "Add __emutls_[vt]. variables for emultated TLS model", - false, false) +INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE, + "Add __emutls_[vt]. variables for emultated TLS model", false, + false) -ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) { - return new LowerEmuTLS(TM); -} +ModulePass *llvm::createLowerEmuTLSPass() { return new LowerEmuTLS(); } bool LowerEmuTLS::runOnModule(Module &M) { if (skipModule(M)) return false; - if (!TM || !TM->Options.EmulatedTLS) + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + if (!TM.Options.EmulatedTLS) return false; bool Changed = false; diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index edba749b5fce4651df21813cb6f422a80ffa7973..3e9513111bf4f5b63fe43715ac4cb96bafbf3409 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -16,8 +16,8 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include namespace llvm { diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index cac22af32956ebe44b9d0a23bcc06ba6a015879f..f58d1f8b83aebe8b065011dff6b0642bc281ed3b 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -11,12 +11,22 @@ // //===----------------------------------------------------------------------===// -#include "MIParser.h" #include "MILexer.h" +#include "MIParser.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/AsmParser/Parser.h" #include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -24,19 +34,48 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include #include +#include +#include +#include +#include +#include using namespace llvm; @@ -134,7 +173,8 @@ public: bool parseBasicBlockDefinition(DenseMap &MBBSlots); - bool parseBasicBlock(MachineBasicBlock &MBB); + bool parseBasicBlock(MachineBasicBlock &MBB, + MachineBasicBlock *&AddFalthroughFrom); bool parseBasicBlockLiveins(MachineBasicBlock &MBB); bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); @@ -518,7 +558,8 @@ bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) { return false; } -bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { +bool MIParser::parseBasicBlock(MachineBasicBlock &MBB, + MachineBasicBlock *&AddFalthroughFrom) { // Skip the definition. assert(Token.is(MIToken::MachineBasicBlockLabel)); lex(); @@ -538,10 +579,12 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { // // is equivalent to // liveins: %edi, %esi + bool ExplicitSuccesors = false; while (true) { if (Token.is(MIToken::kw_successors)) { if (parseBasicBlockSuccessors(MBB)) return true; + ExplicitSuccesors = true; } else if (Token.is(MIToken::kw_liveins)) { if (parseBasicBlockLiveins(MBB)) return true; @@ -557,10 +600,9 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { // Parse the instructions. bool IsInBundle = false; MachineInstr *PrevMI = nullptr; - while (true) { - if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)) - return false; - else if (consumeIfPresent(MIToken::Newline)) + while (!Token.is(MIToken::MachineBasicBlockLabel) && + !Token.is(MIToken::Eof)) { + if (consumeIfPresent(MIToken::Newline)) continue; if (consumeIfPresent(MIToken::rbrace)) { // The first parsing pass should verify that all closing '}' have an @@ -592,6 +634,22 @@ bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { assert(Token.isNewlineOrEOF() && "MI is not fully parsed"); lex(); } + + // Construct successor list by searching for basic block machine operands. + if (!ExplicitSuccesors) { + SmallVector Successors; + bool IsFallthrough; + guessSuccessors(MBB, Successors, IsFallthrough); + for (MachineBasicBlock *Succ : Successors) + MBB.addSuccessor(Succ); + + if (IsFallthrough) { + AddFalthroughFrom = &MBB; + } else { + MBB.normalizeSuccProbs(); + } + } + return false; } @@ -605,11 +663,18 @@ bool MIParser::parseBasicBlocks() { // The first parsing pass should have verified that this token is a MBB label // in the 'parseBasicBlockDefinitions' method. assert(Token.is(MIToken::MachineBasicBlockLabel)); + MachineBasicBlock *AddFalthroughFrom = nullptr; do { MachineBasicBlock *MBB = nullptr; if (parseMBBReference(MBB)) return true; - if (parseBasicBlock(*MBB)) + if (AddFalthroughFrom) { + if (!AddFalthroughFrom->isSuccessor(MBB)) + AddFalthroughFrom->addSuccessor(MBB); + AddFalthroughFrom->normalizeSuccProbs(); + AddFalthroughFrom = nullptr; + } + if (parseBasicBlock(*MBB, AddFalthroughFrom)) return true; // The method 'parseBasicBlock' should parse the whole block until the next // block or the end of file. @@ -2011,7 +2076,7 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { // The token was already consumed, so use return here instead of break. return false; } - case MIToken::kw_call_entry: { + case MIToken::kw_call_entry: lex(); switch (Token.kind()) { case MIToken::GlobalValue: @@ -2031,7 +2096,6 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { "expected a global value or an external symbol after 'call-entry'"); } break; - } default: llvm_unreachable("The current token should be pseudo source value"); } diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h index 9b3879cf83772e1140a72743ccab11ae48018710..2307881068efbc546074505c9130f6f565a369f5 100644 --- a/lib/CodeGen/MIRParser/MIParser.h +++ b/lib/CodeGen/MIRParser/MIParser.h @@ -1,4 +1,4 @@ -//===- MIParser.h - Machine Instructions Parser ---------------------------===// +//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,21 +15,19 @@ #define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Allocator.h" namespace llvm { -class StringRef; -class BasicBlock; class MachineBasicBlock; class MachineFunction; -class MachineInstr; -class MachineRegisterInfo; class MDNode; class RegisterBank; struct SlotMapping; class SMDiagnostic; class SourceMgr; +class StringRef; class TargetRegisterClass; struct VRegInfo { @@ -45,8 +43,8 @@ struct VRegInfo { unsigned PreferredReg = 0; }; -typedef StringMap Name2RegClassMap; -typedef StringMap Name2RegBankMap; +using Name2RegClassMap = StringMap; +using Name2RegBankMap = StringMap; struct PerFunctionMIParsingState { BumpPtrAllocator Allocator; @@ -122,4 +120,4 @@ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index a2773cccc5dbd9859f879e75660ea0023ca9b44d..78b57f357781e83ab3950528070e7ec7c52c0c11 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -50,18 +50,24 @@ namespace llvm { /// file. class MIRParserImpl { SourceMgr SM; + yaml::Input In; StringRef Filename; LLVMContext &Context; - StringMap> Functions; SlotMapping IRSlots; /// Maps from register class names to register classes. Name2RegClassMap Names2RegClasses; /// Maps from register bank names to register banks. Name2RegBankMap Names2RegBanks; + /// True when the MIR file doesn't have LLVM IR. Dummy IR functions are + /// created and inserted into the given module when this is true. + bool NoLLVMIR = false; + /// True when a well formed MIR file does not contain any MIR/machine function + /// parts. + bool NoMIRDocuments = false; public: - MIRParserImpl(std::unique_ptr Contents, StringRef Filename, - LLVMContext &Context); + MIRParserImpl(std::unique_ptr Contents, + StringRef Filename, LLVMContext &Context); void reportDiagnostic(const SMDiagnostic &Diag); @@ -85,22 +91,22 @@ public: /// file. /// /// Return null if an error occurred. - std::unique_ptr parse(); + std::unique_ptr parseIRModule(); + + bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI); /// Parse the machine function in the current YAML document. /// - /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR. - /// A dummy IR function is created and inserted into the given module when - /// this parameter is true. /// /// Return true if an error occurred. - bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR); + bool parseMachineFunction(Module &M, MachineModuleInfo &MMI); /// Initialize the machine function to the state that's described in the MIR /// file. /// /// Return true if error occurred. - bool initializeMachineFunction(MachineFunction &MF); + bool initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF); bool parseRegisterInfo(PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF); @@ -144,9 +150,6 @@ private: SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, SMRange SourceRange); - /// Create an empty function with the given name. - void createDummyFunction(StringRef Name, Module &M); - void initNames2RegClasses(const MachineFunction &MF); void initNames2RegBanks(const MachineFunction &MF); @@ -166,10 +169,19 @@ private: } // end namespace llvm +static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { + reinterpret_cast(Context)->reportDiagnostic(Diag); +} + MIRParserImpl::MIRParserImpl(std::unique_ptr Contents, StringRef Filename, LLVMContext &Context) - : SM(), Filename(Filename), Context(Context) { - SM.AddNewSourceBuffer(std::move(Contents), SMLoc()); + : SM(), + In(SM.getMemoryBuffer( + SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(), + nullptr, handleYAMLDiag, this), + Filename(Filename), + Context(Context) { + In.setContext(&In); } bool MIRParserImpl::error(const Twine &Message) { @@ -206,24 +218,16 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); } -static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { - reinterpret_cast(Context)->reportDiagnostic(Diag); -} - -std::unique_ptr MIRParserImpl::parse() { - yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(), - /*Ctxt=*/nullptr, handleYAMLDiag, this); - In.setContext(&In); - +std::unique_ptr MIRParserImpl::parseIRModule() { if (!In.setCurrentDocument()) { if (In.error()) return nullptr; // Create an empty module when the MIR file is empty. + NoMIRDocuments = true; return llvm::make_unique(Filename, Context); } std::unique_ptr M; - bool NoLLVMIR = false; // Parse the block scalar manually so that we can return unique pointer // without having to go trough YAML traits. if (const auto *BSN = @@ -237,49 +241,68 @@ std::unique_ptr MIRParserImpl::parse() { } In.nextDocument(); if (!In.setCurrentDocument()) - return M; + NoMIRDocuments = true; } else { // Create an new, empty module. M = llvm::make_unique(Filename, Context); NoLLVMIR = true; } + return M; +} + +bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + if (NoMIRDocuments) + return false; // Parse the machine functions. do { - if (parseMachineFunction(In, *M, NoLLVMIR)) - return nullptr; + if (parseMachineFunction(M, MMI)) + return true; In.nextDocument(); } while (In.setCurrentDocument()); - return M; -} - -bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M, - bool NoLLVMIR) { - auto MF = llvm::make_unique(); - yaml::EmptyContext Ctx; - yaml::yamlize(In, *MF, false, Ctx); - if (In.error()) - return true; - auto FunctionName = MF->Name; - if (Functions.find(FunctionName) != Functions.end()) - return error(Twine("redefinition of machine function '") + FunctionName + - "'"); - Functions.insert(std::make_pair(FunctionName, std::move(MF))); - if (NoLLVMIR) - createDummyFunction(FunctionName, M); - else if (!M.getFunction(FunctionName)) - return error(Twine("function '") + FunctionName + - "' isn't defined in the provided LLVM IR"); return false; } -void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) { +/// Create an empty function with the given name. +static Function *createDummyFunction(StringRef Name, Module &M) { auto &Context = M.getContext(); Function *F = cast(M.getOrInsertFunction( Name, FunctionType::get(Type::getVoidTy(Context), false))); BasicBlock *BB = BasicBlock::Create(Context, "entry", F); new UnreachableInst(Context, BB); + return F; +} + +bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) { + // Parse the yaml. + yaml::MachineFunction YamlMF; + yaml::EmptyContext Ctx; + yaml::yamlize(In, YamlMF, false, Ctx); + if (In.error()) + return true; + + // Search for the corresponding IR function. + StringRef FunctionName = YamlMF.Name; + Function *F = M.getFunction(FunctionName); + if (!F) { + if (NoLLVMIR) { + F = createDummyFunction(FunctionName, M); + } else { + return error(Twine("function '") + FunctionName + + "' isn't defined in the provided LLVM IR"); + } + } + if (MMI.getMachineFunction(*F) != nullptr) + return error(Twine("redefinition of machine function '") + FunctionName + + "'"); + + // Create the MachineFunction. + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); + if (initializeMachineFunction(YamlMF, MF)) + return true; + + return false; } static bool isSSA(const MachineFunction &MF) { @@ -319,21 +342,16 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) { Properties.set(MachineFunctionProperties::Property::NoVRegs); } -bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { - auto It = Functions.find(MF.getName()); - if (It == Functions.end()) - return error(Twine("no machine function information for function '") + - MF.getName() + "' in the MIR file"); +bool +MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, + MachineFunction &MF) { // TODO: Recreate the machine function. initNames2RegClasses(MF); initNames2RegBanks(MF); - const yaml::MachineFunction &YamlMF = *It->getValue(); if (YamlMF.Alignment) MF.setAlignment(YamlMF.Alignment); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); - if (YamlMF.NoVRegs) - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); if (YamlMF.Legalized) MF.getProperties().set(MachineFunctionProperties::Property::Legalized); if (YamlMF.RegBankSelected) @@ -541,7 +559,8 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); MFI.setAdjustsStack(YamlMFI.AdjustsStack); MFI.setHasCalls(YamlMFI.HasCalls); - MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); + if (YamlMFI.MaxCallFrameSize != ~0u) + MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); MFI.setHasVAStart(YamlMFI.HasVAStart); MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); @@ -839,16 +858,18 @@ MIRParser::MIRParser(std::unique_ptr Impl) MIRParser::~MIRParser() {} -std::unique_ptr MIRParser::parseLLVMModule() { return Impl->parse(); } +std::unique_ptr MIRParser::parseIRModule() { + return Impl->parseIRModule(); +} -bool MIRParser::initializeMachineFunction(MachineFunction &MF) { - return Impl->initializeMachineFunction(MF); +bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) { + return Impl->parseMachineFunctions(M, MMI); } std::unique_ptr llvm::createMIRParserFromFile(StringRef Filename, SMDiagnostic &Error, LLVMContext &Context) { - auto FileOrErr = MemoryBuffer::getFile(Filename); + auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { Error = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + EC.message()); diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 6da174a536666ca3da43270d32743f1f0cf61838..c524a9835f338eb7d3c1a939c3eb72df3d04cd13 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -12,36 +12,71 @@ // //===----------------------------------------------------------------------===// -#include "MIRPrinter.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" -#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/YAMLTraits.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; +static cl::opt SimplifyMIR("simplify-mir", + cl::desc("Leave out unnecessary information when printing MIR")); + namespace { /// This structure describes how to print out stack object references. @@ -105,6 +140,9 @@ class MIPrinter { const DenseMap &RegisterMaskIds; const DenseMap &StackObjectOperandMapping; + bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const; + bool canPredictSuccessors(const MachineBasicBlock &MBB) const; + public: MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST, const DenseMap &RegisterMaskIds, @@ -139,6 +177,7 @@ template <> struct BlockScalarTraits { static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { Mod.print(OS, nullptr); } + static StringRef input(StringRef Str, void *Ctxt, Module &Mod) { llvm_unreachable("LLVM Module is supposed to be parsed separately"); return ""; @@ -175,8 +214,6 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); - YamlMF.NoVRegs = MF.getProperties().hasProperty( - MachineFunctionProperties::Property::NoVRegs); YamlMF.Legalized = MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized); YamlMF.RegBankSelected = MF.getProperties().hasProperty( @@ -204,6 +241,8 @@ void MIRPrinter::print(const MachineFunction &MF) { } StrOS.flush(); yaml::Output Out(OS); + if (!SimplifyMIR) + Out.setWriteDefaultValues(true); Out << YamlMF; } @@ -286,7 +325,8 @@ void MIRPrinter::convert(ModuleSlotTracker &MST, YamlMFI.MaxAlignment = MFI.getMaxAlignment(); YamlMFI.AdjustsStack = MFI.adjustsStack(); YamlMFI.HasCalls = MFI.hasCalls(); - YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize(); + YamlMFI.MaxCallFrameSize = MFI.isMaxCallFrameSizeComputed() + ? MFI.getMaxCallFrameSize() : ~0u; YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); YamlMFI.HasVAStart = MFI.hasVAStart(); YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); @@ -453,6 +493,62 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) { RegisterMaskIds.insert(std::make_pair(Mask, I++)); } +void llvm::guessSuccessors(const MachineBasicBlock &MBB, + SmallVectorImpl &Result, + bool &IsFallthrough) { + SmallPtrSet Seen; + + for (const MachineInstr &MI : MBB) { + if (MI.isPHI()) + continue; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isMBB()) + continue; + MachineBasicBlock *Succ = MO.getMBB(); + auto RP = Seen.insert(Succ); + if (RP.second) + Result.push_back(Succ); + } + } + MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); + IsFallthrough = I == MBB.end() || !I->isBarrier(); +} + +bool +MIPrinter::canPredictBranchProbabilities(const MachineBasicBlock &MBB) const { + if (MBB.succ_size() <= 1) + return true; + if (!MBB.hasSuccessorProbabilities()) + return true; + + SmallVector Normalized(MBB.Probs.begin(), + MBB.Probs.end()); + BranchProbability::normalizeProbabilities(Normalized.begin(), + Normalized.end()); + SmallVector Equal(Normalized.size()); + BranchProbability::normalizeProbabilities(Equal.begin(), Equal.end()); + + return std::equal(Normalized.begin(), Normalized.end(), Equal.begin()); +} + +bool MIPrinter::canPredictSuccessors(const MachineBasicBlock &MBB) const { + SmallVector GuessedSuccs; + bool GuessedFallthrough; + guessSuccessors(MBB, GuessedSuccs, GuessedFallthrough); + if (GuessedFallthrough) { + const MachineFunction &MF = *MBB.getParent(); + MachineFunction::const_iterator NextI = std::next(MBB.getIterator()); + if (NextI != MF.end()) { + MachineBasicBlock *Next = const_cast(&*NextI); + if (!is_contained(GuessedSuccs, Next)) + GuessedSuccs.push_back(Next); + } + } + if (GuessedSuccs.size() != MBB.succ_size()) + return false; + return std::equal(MBB.succ_begin(), MBB.succ_end(), GuessedSuccs.begin()); +} + void MIPrinter::print(const MachineBasicBlock &MBB) { assert(MBB.getNumber() >= 0 && "Invalid MBB number"); OS << "bb." << MBB.getNumber(); @@ -491,13 +587,15 @@ void MIPrinter::print(const MachineBasicBlock &MBB) { bool HasLineAttributes = false; // Print the successors - if (!MBB.succ_empty()) { + bool canPredictProbs = canPredictBranchProbabilities(MBB); + if (!MBB.succ_empty() && (!SimplifyMIR || !canPredictProbs || + !canPredictSuccessors(MBB))) { OS.indent(2) << "successors: "; for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { if (I != MBB.succ_begin()) OS << ", "; printMBBReference(**I); - if (MBB.hasSuccessorProbabilities()) + if (!SimplifyMIR || !canPredictProbs) OS << '(' << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator()) << ')'; @@ -842,7 +940,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << "%const." << Op.getIndex(); printOffset(Op.getOffset()); break; - case MachineOperand::MO_TargetIndex: { + case MachineOperand::MO_TargetIndex: OS << "target-index("; if (const auto *Name = getTargetIndexName( *Op.getParent()->getParent()->getParent(), Op.getIndex())) @@ -852,15 +950,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << ')'; printOffset(Op.getOffset()); break; - } case MachineOperand::MO_JumpTableIndex: OS << "%jump-table." << Op.getIndex(); break; - case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_ExternalSymbol: { + StringRef Name = Op.getSymbolName(); OS << '$'; - printLLVMNameWithoutPrefix(OS, Op.getSymbolName()); + if (Name.empty()) { + OS << "\"\""; + } else { + printLLVMNameWithoutPrefix(OS, Name); + } printOffset(Op.getOffset()); break; + } case MachineOperand::MO_GlobalAddress: Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); printOffset(Op.getOffset()); @@ -925,9 +1028,6 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, << CmpInst::getPredicateName(Pred) << ')'; break; } - case MachineOperand::MO_Placeholder: - OS << ""; - break; } } diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp index c690bcfad56713504399bffb17ee41185f9f7ab0..09354cf70c3c1a7ed7368494eacb38ae57f31942 100644 --- a/lib/CodeGen/MIRPrintingPass.cpp +++ b/lib/CodeGen/MIRPrintingPass.cpp @@ -12,10 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "MIRPrinter.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MIRPrinter.h" + #include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 06112723497b098e50fb58aea89b3e13ef3422cc..590acc01008a63bb7544586a80def5bd6b3625ca 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -350,6 +350,13 @@ void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { LiveIns.erase(I); } +MachineBasicBlock::livein_iterator +MachineBasicBlock::removeLiveIn(MachineBasicBlock::livein_iterator I) { + // Get non-const version of iterator. + LiveInVector::iterator LI = LiveIns.begin() + (I - LiveIns.begin()); + return LiveIns.erase(LI); +} + bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { livein_iterator I = find_if( LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 9c7367b4c78020ecb2b6260658c8a67f46d00750..4d1ec11df46c87cd19c33dbef8bd8187762e3ec9 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; -#define DEBUG_TYPE "block-freq" +#define DEBUG_TYPE "machine-block-freq" static cl::opt ViewMachineBlockFreqPropagationDAG( @@ -149,11 +149,11 @@ struct DOTGraphTraits } // end namespace llvm -INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", +INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", +INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) char MachineBlockFrequencyInfo::ID = 0; diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index e23f90be40992d8b7e1fba0dd0f5635a887d58a4..2d4b95974cc646cc4316109fa5af36ec8dfb6307 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -25,8 +25,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetPassConfig.h" #include "BranchFolding.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,7 +39,9 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -133,6 +133,14 @@ static cl::opt TailDupPlacementThreshold( "that won't conflict."), cl::init(2), cl::Hidden); +// Heuristic for aggressive tail duplication. +static cl::opt TailDupPlacementAggressiveThreshold( + "tail-dup-placement-aggressive-threshold", + cl::desc("Instruction cutoff for aggressive tail duplication during " + "layout. Used at -O3. Tail merging during layout is forced to " + "have a threshold that won't conflict."), cl::init(3), + cl::Hidden); + // Heuristic for tail duplication. static cl::opt TailDupPlacementPenalty( "tail-dup-placement-penalty", @@ -237,25 +245,26 @@ public: /// updating the block -> chain mapping. It does not free or tear down the /// old chain, but the old chain's block list is no longer valid. void merge(MachineBasicBlock *BB, BlockChain *Chain) { - assert(BB); - assert(!Blocks.empty()); + assert(BB && "Can't merge a null block."); + assert(!Blocks.empty() && "Can't merge into an empty chain."); // Fast path in case we don't have a chain already. if (!Chain) { - assert(!BlockToChain[BB]); + assert(!BlockToChain[BB] && + "Passed chain is null, but BB has entry in BlockToChain."); Blocks.push_back(BB); BlockToChain[BB] = this; return; } - assert(BB == *Chain->begin()); + assert(BB == *Chain->begin() && "Passed BB is not head of Chain."); assert(Chain->begin() != Chain->end()); // Update the incoming blocks to point to this chain, and add them to the // chain structure. for (MachineBasicBlock *ChainBB : *Chain) { Blocks.push_back(ChainBB); - assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain"); + assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain."); BlockToChain[ChainBB] = this; } } @@ -490,13 +499,13 @@ public: char MachineBlockPlacement::ID = 0; char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; -INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement", +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", +INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG @@ -585,8 +594,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors( // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after // A->C is chosen as a fall-through, D won't be selected as a successor of C // due to CFG constraint (the probability of C->D is not greater than - // HotProb to break top-order). If we exclude E that is not in BlockFilter - // when calculating the probability of C->D, D will be selected and we + // HotProb to break topo-order). If we exclude E that is not in BlockFilter + // when calculating the probability of C->D, D will be selected and we // will get A C D B as the layout of this loop. auto AdjustedSumProb = BranchProbability::getOne(); for (MachineBasicBlock *Succ : BB->successors()) { @@ -1147,7 +1156,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { continue; // Now we have an interesting triangle. Insert it if it's not part of an - // existing chain + // existing chain. // Note: This cannot be replaced with a call insert() or emplace() because // the find key is BB, but the insert/emplace key is PDom. auto Found = TriangleChainMap.find(&BB); @@ -1165,6 +1174,9 @@ void MachineBlockPlacement::precomputeTriangleChains() { } } + // Iterating over a DenseMap is safe here, because the only thing in the body + // of the loop is inserting into another DenseMap (ComputedEdges). + // ComputedEdges is never iterated, so this doesn't lead to non-determinism. for (auto &ChainPair : TriangleChainMap) { TriangleChain &Chain = ChainPair.second; // Benchmarking has shown that due to branch correlation duplicating 2 or @@ -1286,9 +1298,9 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( // | | | | // ---BB | | BB // | | | | - // | pred-- | Succ-- + // | Pred-- | Succ-- // | | | | - // ---succ ---pred-- + // ---Succ ---Pred-- // // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred) // = freq(S->Pred) + freq(S->BB) @@ -1536,13 +1548,15 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (MachineBasicBlock *MBB : WorkList) { - assert(MBB->isEHPad() == IsEHPad); + assert(MBB->isEHPad() == IsEHPad && + "EHPad mismatch between block and work list."); BlockChain &SuccChain = *BlockToChain[MBB]; if (&SuccChain == &Chain) continue; - assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block"); + assert(SuccChain.UnscheduledPredecessors == 0 && + "Found CFG-violating block"); BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; @@ -1610,9 +1624,12 @@ void MachineBlockPlacement::fillWorkLists( if (!UpdatedPreds.insert(&Chain).second) return; - assert(Chain.UnscheduledPredecessors == 0); + assert( + Chain.UnscheduledPredecessors == 0 && + "Attempting to place block with unscheduled predecessors in worklist."); for (MachineBasicBlock *ChainBB : Chain) { - assert(BlockToChain[ChainBB] == &Chain); + assert(BlockToChain[ChainBB] == &Chain && + "Block in chain doesn't match BlockToChain map."); for (MachineBasicBlock *Pred : ChainBB->predecessors()) { if (BlockFilter && !BlockFilter->count(Pred)) continue; @@ -2125,8 +2142,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { for (const MachineLoop *InnerLoop : L) buildLoopChains(*InnerLoop); - assert(BlockWorkList.empty()); - assert(EHPadWorkList.empty()); + assert(BlockWorkList.empty() && + "BlockWorkList not empty when starting to build loop chains."); + assert(EHPadWorkList.empty() && + "EHPadWorkList not empty when starting to build loop chains."); BlockFilterSet LoopBlockSet = collectLoopBlockSet(L); // Check if we have profile data for this function. If yes, we will rotate @@ -2156,7 +2175,8 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // walk the blocks, and use a set to prevent visiting a particular chain // twice. SmallPtrSet UpdatedPreds; - assert(LoopChain.UnscheduledPredecessors == 0); + assert(LoopChain.UnscheduledPredecessors == 0 && + "LoopChain should not have unscheduled predecessors."); UpdatedPreds.insert(&LoopChain); for (const MachineBasicBlock *LoopBB : LoopBlockSet) @@ -2245,8 +2265,10 @@ void MachineBlockPlacement::buildCFGChains() { for (MachineLoop *L : *MLI) buildLoopChains(*L); - assert(BlockWorkList.empty()); - assert(EHPadWorkList.empty()); + assert(BlockWorkList.empty() && + "BlockWorkList should be empty before building final chain."); + assert(EHPadWorkList.empty() && + "EHPadWorkList should be empty before building final chain."); SmallPtrSet UpdatedPreds; for (MachineBasicBlock &MBB : *F) @@ -2640,12 +2662,31 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // there are no MachineLoops. PreferredLoopExit = nullptr; - assert(BlockToChain.empty()); - assert(ComputedEdges.empty()); + assert(BlockToChain.empty() && + "BlockToChain map should be empty before starting placement."); + assert(ComputedEdges.empty() && + "Computed Edge map should be empty before starting placement."); + + unsigned TailDupSize = TailDupPlacementThreshold; + // If only the aggressive threshold is explicitly set, use it. + if (TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0 && + TailDupPlacementThreshold.getNumOccurrences() == 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + + TargetPassConfig *PassConfig = &getAnalysis(); + // For agressive optimization, we can adjust some thresholds to be less + // conservative. + if (PassConfig->getOptLevel() >= CodeGenOpt::Aggressive) { + // At O3 we should be more willing to copy blocks for tail duplication. This + // increases size pressure, so we only do it at O3 + // Do this unless only the regular threshold is explicitly set. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 || + TailDupPlacementAggressiveThreshold.getNumOccurrences() != 0) + TailDupSize = TailDupPlacementAggressiveThreshold; + } if (TailDupPlacement) { MPDT = &getAnalysis(); - unsigned TailDupSize = TailDupPlacementThreshold; if (MF.getFunction()->optForSize()) TailDupSize = 1; TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize); @@ -2655,7 +2696,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { buildCFGChains(); // Changing the layout can create new tail merging opportunities. - TargetPassConfig *PassConfig = &getAnalysis(); // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structured CFG. bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && @@ -2663,7 +2703,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { BranchFoldPlacement; // No tail merging opportunities if the block number is less than four. if (MF.size() > 3 && EnableTailMerge) { - unsigned TailMergeSize = TailDupPlacementThreshold + 1; + unsigned TailMergeSize = TailDupSize + 1; BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI, *MBPI, TailMergeSize); diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 0766f465456c9aab88e9ca9bbbbc904bb4e4edca..582ff139f88607d8b1ba0f7aae3ea6f490ec0ceb 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SmallSet.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" @@ -108,12 +108,12 @@ namespace { char MachineCSE::ID = 0; char &llvm::MachineCSEID = MachineCSE::ID; -INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", - "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, + "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineCSE, "machine-cse", - "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, + "Machine Common Subexpression Elimination", false, false) /// The source register of a COPY machine instruction can be propagated to all /// its users, and this propagation could increase the probability of finding @@ -180,8 +180,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, I = skipDebugInstructionsForward(I, E); if (I == E) - // Reached end of block, register is obviously dead. - return true; + // Reached end of block, we don't know if register is dead or not. + return false; bool SeenDef = false; for (const MachineOperand &MO : I->operands()) { diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index 50e453e4067ccf9b631768c692b650cc9a3234f5..c176de16b5931999d1aa840b7330e7f589b10305 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -86,11 +86,11 @@ private: char MachineCombiner::ID = 0; char &llvm::MachineCombinerID = MachineCombiner::ID; -INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", +INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", +INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 7312dc5e94bddc6a6cf3511bbbb12121cc66bcf5..7d5a68192e6b26e0b711837a7db6a4e74f123288 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -19,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -27,7 +27,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "codegen-cp" +#define DEBUG_TYPE "machine-cp" STATISTIC(NumDeletes, "Number of dead copies deleted"); @@ -79,7 +79,7 @@ namespace { char MachineCopyPropagation::ID = 0; char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; -INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", +INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) /// Remove any entry in \p Map where the register is a subregister or equal to diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp index acb7c4810b16e7514cb74249e005c1514d400534..28ecc8f96805aa4ea4ce306f8043b19736f8b4d3 100644 --- a/lib/CodeGen/MachineDominanceFrontier.cpp +++ b/lib/CodeGen/MachineDominanceFrontier.cpp @@ -12,7 +12,6 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" - using namespace llvm; namespace llvm { diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index e3a6c51c47ad55e71e3753b97781f820754bfc9c..65e9e5d195a4611b72b9f10e5ab543c0b33fd437 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..73d778ff3023565abec77d42d9c035819807e605 --- /dev/null +++ b/lib/CodeGen/MachineFrameInfo.cpp @@ -0,0 +1,244 @@ +//===-- MachineFrameInfo.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file Implements MachineFrameInfo that manages the stack frame. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFrameInfo.h" + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include + +#define DEBUG_TYPE "codegen" + +using namespace llvm; + +void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { + if (!StackRealignable) + assert(Align <= StackAlignment && + "For targets without stack realignment, Align is out of limit!"); + if (MaxAlignment < Align) MaxAlignment = Align; +} + +/// Clamp the alignment if requested and emit a warning. +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, + unsigned StackAlign) { + if (!ShouldClamp || Align <= StackAlign) + return Align; + DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); + return StackAlign; +} + +int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSS, const AllocaInst *Alloca) { + assert(Size != 0 && "Cannot allocate zero size stack objects!"); + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, + !isSS)); + int Index = (int)Objects.size() - NumFixedObjects - 1; + assert(Index >= 0 && "Bad frame index!"); + ensureMaxAlignment(Alignment); + return Index; +} + +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, + unsigned Alignment) { + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + CreateStackObject(Size, Alignment, true); + int Index = (int)Objects.size() - NumFixedObjects - 1; + ensureMaxAlignment(Alignment); + return Index; +} + +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, + const AllocaInst *Alloca) { + HasVarSizedObjects = true; + Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); + ensureMaxAlignment(Alignment); + return (int)Objects.size()-NumFixedObjects-1; +} + +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable, bool isAliased) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. Note that unlike the non-fixed case, if the + // stack needs realignment, we can't assume that the stack will in fact be + // aligned. + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ false, + /*Alloca*/ nullptr, isAliased)); + return -++NumFixedObjects; +} + +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset, + bool Immutable) { + unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); + Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ true, + /*Alloca*/ nullptr, + /*isAliased*/ false)); + return -++NumFixedObjects; +} + +BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; + ++CSR) + BV.set(*CSR); + + // Saved CSRs are not pristine. + for (auto &I : getCalleeSavedInfo()) + for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) + BV.reset(*S); + + return BV; +} + +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + assert(FrameSetupOpcode != ~0u && FrameDestroyOpcode != ~0u && + "Can only compute MaxCallFrameSize if Setup/Destroy opcode are known"); + + MaxCallFrameSize = 0; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + unsigned Opcode = MI.getOpcode(); + if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) { + unsigned Size = TII.getFrameSize(MI); + MaxCallFrameSize = std::max(MaxCallFrameSize, Size); + AdjustsStack = true; + } else if (MI.isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; + } + } + } +} + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ + if (Objects.empty()) return; + + const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + OS << "Frame Objects:\n"; + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " fi#" << (int)(i-NumFixedObjects) << ": "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size=" << SO.Size; + OS << ", align=" << SO.Alignment; + + if (i < NumFixedObjects) + OS << ", fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << ", at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, dbgs()); +} +#endif diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index c9767a25e908dcf4cc5a84a115a2dc5ee1e6a4de..bbdae6e1a49e5f5353316b611ab4d755ea5c7143 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -20,7 +20,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionInitializer.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -52,8 +51,6 @@ static cl::opt cl::desc("Force the alignment of all functions."), cl::init(0), cl::Hidden); -void MachineFunctionInitializer::anchor() {} - static const char *getPropertyName(MachineFunctionProperties::Property Prop) { typedef MachineFunctionProperties::Property P; switch(Prop) { @@ -756,214 +753,6 @@ void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) { /// \} -//===----------------------------------------------------------------------===// -// MachineFrameInfo implementation -//===----------------------------------------------------------------------===// - -/// Make sure the function is at least Align bytes aligned. -void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!StackRealignable) - assert(Align <= StackAlignment && - "For targets without stack realignment, Align is out of limit!"); - if (MaxAlignment < Align) MaxAlignment = Align; -} - -/// Clamp the alignment if requested and emit a warning. -static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, - unsigned StackAlign) { - if (!ShouldClamp || Align <= StackAlign) - return Align; - DEBUG(dbgs() << "Warning: requested alignment " << Align - << " exceeds the stack alignment " << StackAlign - << " when stack realignment is off" << '\n'); - return StackAlign; -} - -/// Create a new statically sized stack object, returning a nonnegative -/// identifier to represent it. -int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, - bool isSS, const AllocaInst *Alloca) { - assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, - !isSS)); - int Index = (int)Objects.size() - NumFixedObjects - 1; - assert(Index >= 0 && "Bad frame index!"); - ensureMaxAlignment(Alignment); - return Index; -} - -/// Create a new statically sized stack object that represents a spill slot, -/// returning a nonnegative identifier to represent it. -int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, - unsigned Alignment) { - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - CreateStackObject(Size, Alignment, true); - int Index = (int)Objects.size() - NumFixedObjects - 1; - ensureMaxAlignment(Alignment); - return Index; -} - -/// Notify the MachineFrameInfo object that a variable sized object has been -/// created. This must be created whenever a variable sized object is created, -/// whether or not the index returned is actually used. -int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, - const AllocaInst *Alloca) { - HasVarSizedObjects = true; - Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); - Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); - ensureMaxAlignment(Alignment); - return (int)Objects.size()-NumFixedObjects-1; -} - -/// Create a new object at a fixed location on the stack. -/// All fixed objects should be created before other objects are created for -/// efficiency. By default, fixed objects are immutable. This returns an -/// index with a negative value. -int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isAliased) { - assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. Note that unlike the non-fixed case, if the - // stack needs realignment, we can't assume that the stack will in fact be - // aligned. - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ false, - /*Alloca*/ nullptr, isAliased)); - return -++NumFixedObjects; -} - -/// Create a spill slot at a fixed location on the stack. -/// Returns an index with a negative value. -int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, - int64_t SPOffset, - bool Immutable) { - unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment); - Align = clampStackAlignment(!StackRealignable, Align, StackAlignment); - Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ true, - /*Alloca*/ nullptr, - /*isAliased*/ false)); - return -++NumFixedObjects; -} - -BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - BitVector BV(TRI->getNumRegs()); - - // Before CSI is calculated, no registers are considered pristine. They can be - // freely used and PEI will make sure they are saved. - if (!isCalleeSavedInfoValid()) - return BV; - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; - ++CSR) - BV.set(*CSR); - - // Saved CSRs are not pristine. - for (auto &I : getCalleeSavedInfo()) - for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) - BV.reset(*S); - - return BV; -} - -unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - unsigned MaxAlign = getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { - if (isDeadObjectIndex(i)) - continue; - Offset += getObjectSize(i); - unsigned Align = getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (adjustsStack() || hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - -void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ - if (Objects.empty()) return; - - const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); - int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); - - OS << "Frame Objects:\n"; - - for (unsigned i = 0, e = Objects.size(); i != e; ++i) { - const StackObject &SO = Objects[i]; - OS << " fi#" << (int)(i-NumFixedObjects) << ": "; - if (SO.Size == ~0ULL) { - OS << "dead\n"; - continue; - } - if (SO.Size == 0) - OS << "variable sized"; - else - OS << "size=" << SO.Size; - OS << ", align=" << SO.Alignment; - - if (i < NumFixedObjects) - OS << ", fixed"; - if (i < NumFixedObjects || SO.SPOffset != -1) { - int64_t Off = SO.SPOffset - ValOffset; - OS << ", at location [SP"; - if (Off > 0) - OS << "+" << Off; - else if (Off < 0) - OS << Off; - OS << "]"; - } - OS << "\n"; - } -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const { - print(MF, dbgs()); -} -#endif - //===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index 2265676ff8b1445836263891339abd025a8f1ba5..5ffe330061313f9d0bd5080b8112722ed2f54f79 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -42,7 +42,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) { return false; MachineModuleInfo &MMI = getAnalysis(); - MachineFunction &MF = MMI.getMachineFunction(F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(F); MachineFunctionProperties &MFProps = MF.getProperties(); diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index 0d533c3f4f23fe444d67b27d2113e937913b34f5..55d9defced3aa95b9a5abc8ed63503bd5760100b 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index c0a8b95ed8a06dfccdb908b9d52fa1e9064caca2..2a6cb07dbd2dab0b275b570a77e0d6d7dcb0bd6b 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1,4 +1,4 @@ -//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,20 +12,33 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -34,10 +47,14 @@ #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -45,6 +62,14 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; static cl::opt PrintWholeRegMask( @@ -256,7 +281,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_GlobalAddress: return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); case MachineOperand::MO_ExternalSymbol: - return !strcmp(getSymbolName(), Other.getSymbolName()) && + return strcmp(getSymbolName(), Other.getSymbolName()) == 0 && getOffset() == Other.getOffset(); case MachineOperand::MO_BlockAddress: return getBlockAddress() == Other.getBlockAddress() && @@ -287,8 +312,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return getIntrinsicID() == Other.getIntrinsicID(); case MachineOperand::MO_Predicate: return getPredicate() == Other.getPredicate(); - case MachineOperand::MO_Placeholder: - return true; } llvm_unreachable("Invalid machine operand type"); } @@ -337,8 +360,6 @@ hash_code llvm::hash_value(const MachineOperand &MO) { return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID()); case MachineOperand::MO_Predicate: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate()); - case MachineOperand::MO_Placeholder: - return hash_combine(); } llvm_unreachable("Invalid machine operand type"); } @@ -515,9 +536,6 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, << CmpInst::getPredicateName(Pred) << '>'; break; } - case MachineOperand::MO_Placeholder: - OS << ""; - break; } if (unsigned TF = getTargetFlags()) OS << "[TF=" << TF << ']'; @@ -730,9 +748,7 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, DebugLoc dl, bool NoImp) - : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), - AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), - debugLoc(std::move(dl)) { + : MCID(&tid), debugLoc(std::move(dl)) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); // Reserve space for the expected number of operands. @@ -749,9 +765,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), - Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), - MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { + : MCID(&MI.getDesc()), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), + debugLoc(MI.getDebugLoc()) { assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); CapOperands = OperandCapacity::get(MI.getNumOperands()); @@ -1640,8 +1655,8 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, // memory objects. It can save compile time, and possibly catch some // corner cases not currently covered. - assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); - assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; @@ -1674,7 +1689,7 @@ bool MachineInstr::hasOrderedMemoryRef() const { return true; // Check if any of our memory operands are ordered. - return any_of(memoperands(), [](const MachineMemOperand *MMO) { + return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) { return !MMO->isUnordered(); }); } @@ -1848,7 +1863,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, return; // Print the rest of the operands. - bool OmittedAnyCallClobbers = false; bool FirstOp = true; unsigned AsmDescOp = ~0u; unsigned AsmOpCount = 0; @@ -1885,31 +1899,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) VirtRegs.push_back(MO.getReg()); - // Omit call-clobbered registers which aren't used anywhere. This makes - // call instructions much less noisy on targets where calls clobber lots - // of registers. Don't rely on MO.isDead() because we may be called before - // LiveVariables is run, or we may be looking at a non-allocatable reg. - if (MRI && isCall() && - MO.isReg() && MO.isImplicit() && MO.isDef()) { - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (MRI->use_empty(Reg)) { - bool HasAliasLive = false; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - unsigned AliasReg = *AI; - if (!MRI->use_empty(AliasReg)) { - HasAliasLive = true; - break; - } - } - if (!HasAliasLive) { - OmittedAnyCallClobbers = true; - continue; - } - } - } - } - if (FirstOp) FirstOp = false; else OS << ","; OS << " "; if (i < getDesc().NumOperands) { @@ -1991,12 +1980,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, MO.print(OS, MST, TRI); } - // Briefly indicate whether any call clobbers were omitted. - if (OmittedAnyCallClobbers) { - if (!FirstOp) OS << ","; - OS << " ..."; - } - bool HaveSemi = false; const unsigned PrintableFlags = FrameSetup | FrameDestroy; if (Flags & PrintableFlags) { @@ -2262,8 +2245,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef UsedRegs, unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // If there are no uses, including partial uses, the def is dead. - if (none_of(UsedRegs, - [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + if (llvm::none_of(UsedRegs, + [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) MO.setIsDead(); } @@ -2351,3 +2334,26 @@ MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB, BB.insert(I, MI); return MachineInstrBuilder(MF, MI); } + +MachineInstr *llvm::buildDbgValueForSpill(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const MachineInstr &Orig, + int FrameIndex) { + const MDNode *Var = Orig.getDebugVariable(); + const auto *Expr = cast_or_null(Orig.getDebugExpression()); + bool IsIndirect = Orig.isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? Orig.getOperand(1).getImm() : 0; + DebugLoc DL = Orig.getDebugLoc(); + assert(cast(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + // If the DBG_VALUE already was a memory location, add an extra + // DW_OP_deref. Otherwise just turning this from a register into a + // memory/indirect location is sufficient. + if (IsIndirect) + Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); + return BuildMI(BB, I, DL, Orig.getDesc()) + .addFrameIndex(FrameIndex) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); +} diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index b3d18435985e808ef55ed9e13b020eea550a0110..c7113f1fdc47aa4b8fd19865805bd60443a845c1 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/CommandLine.h" @@ -38,7 +38,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "machine-licm" +#define DEBUG_TYPE "machinelicm" static cl::opt AvoidSpeculation("avoid-speculation", @@ -237,13 +237,13 @@ namespace { char MachineLICM::ID = 0; char &llvm::MachineLICMID = MachineLICM::ID; -INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", - "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, + "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineLICM, "machinelicm", - "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, + "Machine Loop Invariant Code Motion", false, false) /// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { @@ -330,7 +330,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { /// Return true if instruction stores to the specified frame. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { // If we lost memory operands, conservatively assume that the instruction - // writes to all slots. + // writes to all slots. if (MI->memoperands_empty()) return true; for (const MachineMemOperand *MemOp : MI->memoperands()) { @@ -708,7 +708,7 @@ void MachineLICM::SinkIntoLoop() { for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); I != Preheader->instr_end(); ++I) { // We need to ensure that we can safely move this instruction into the loop. - // As such, it must not have side-effects, e.g. such as a call has. + // As such, it must not have side-effects, e.g. such as a call has. if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) Candidates.push_back(&*I); } @@ -837,9 +837,9 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, /// constant pool. static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); - + // If we lost memory operands, conservatively assume that the instruction - // reads from everything.. + // reads from everything.. if (MI.memoperands_empty()) return true; @@ -895,8 +895,11 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->isConstantPhysReg(Reg)) - return false; + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent()))) + return false; // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { @@ -1337,7 +1340,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); // Since we are moving the instruction out of its basic block, we do not - // retain its debug location. Doing so would degrade the debugging + // retain its debug location. Doing so would degrade the debugging // experience and adversely affect the accuracy of profiling information. MI->setDebugLoc(DebugLoc()); diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 2f0f4297ef5c5c5b2ba3498e37be8705fa2e14d8..825290a438a6c109e0ae928c59f335815db406c3 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -8,43 +8,51 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" -#include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionInitializer.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" -#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include +#include +#include +#include +#include + using namespace llvm; using namespace llvm::dwarf; // Handle the Pass registration stuff necessary to use DataLayout's. -INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo", - "Machine Module Information", false, false) +INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false) char MachineModuleInfo::ID = 0; // Out of line virtual method. -MachineModuleInfoImpl::~MachineModuleInfoImpl() {} +MachineModuleInfoImpl::~MachineModuleInfoImpl() = default; namespace llvm { + class MMIAddrLabelMapCallbackPtr final : CallbackVH { - MMIAddrLabelMap *Map; + MMIAddrLabelMap *Map = nullptr; + public: - MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} - MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {} + MMIAddrLabelMapCallbackPtr() = default; + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V) {} void setPtr(BasicBlock *BB) { ValueHandleBase::operator=(BB); @@ -75,11 +83,12 @@ class MMIAddrLabelMap { /// This is a per-function list of symbols whose corresponding BasicBlock got /// deleted. These symbols need to be emitted at some point in the file, so /// AsmPrinter emits them after the function body. - DenseMap, std::vector > + DenseMap, std::vector> DeletedAddrLabelsNeedingEmission; -public: +public: MMIAddrLabelMap(MCContext &context) : Context(context) {} + ~MMIAddrLabelMap() { assert(DeletedAddrLabelsNeedingEmission.empty() && "Some labels for deleted blocks never got emitted"); @@ -93,7 +102,8 @@ public: void UpdateForDeletedBlock(BasicBlock *BB); void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); }; -} + +} // end namespace llvm ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { assert(BB->hasAddressTaken() && @@ -119,7 +129,7 @@ ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { /// If we have any deleted symbols for F, return them. void MMIAddrLabelMap:: takeDeletedSymbolsForFunction(Function *F, std::vector &Result) { - DenseMap, std::vector >::iterator I = + DenseMap, std::vector>::iterator I = DeletedAddrLabelsNeedingEmission.find(F); // If there are no entries for the function, just return. @@ -130,7 +140,6 @@ takeDeletedSymbolsForFunction(Function *F, std::vector &Result) { DeletedAddrLabelsNeedingEmission.erase(I); } - void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { // If the block got deleted, there is no need for the symbol. If the symbol // was already emitted, we can just forget about it, otherwise we need to @@ -177,7 +186,6 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { OldEntry.Symbols.end()); } - void MMIAddrLabelMapCallbackPtr::deleted() { Map->UpdateForDeletedBlock(cast(getValPtr())); } @@ -186,9 +194,6 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { Map->UpdateForRAUWBlock(cast(getValPtr()), cast(V2)); } - -//===----------------------------------------------------------------------===// - MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) : ImmutablePass(ID), TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(), @@ -196,11 +201,9 @@ MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM) initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } -MachineModuleInfo::~MachineModuleInfo() { -} +MachineModuleInfo::~MachineModuleInfo() = default; bool MachineModuleInfo::doInitialization(Module &M) { - ObjFileMMI = nullptr; CurCallSite = 0; DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; @@ -211,7 +214,6 @@ bool MachineModuleInfo::doInitialization(Module &M) { } bool MachineModuleInfo::doFinalization(Module &M) { - Personalities.clear(); delete AddrLabelSymbols; @@ -256,7 +258,14 @@ void MachineModuleInfo::addPersonality(const Function *Personality) { /// \} -MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) { +MachineFunction * +MachineModuleInfo::getMachineFunction(const Function &F) const { + auto I = MachineFunctions.find(&F); + return I != MachineFunctions.end() ? I->second.get() : nullptr; +} + +MachineFunction & +MachineModuleInfo::getOrCreateMachineFunction(const Function &F) { // Shortcut for the common case where a sequence of MachineFunctionPasses // all query for the same Function. if (LastRequest == &F) @@ -270,10 +279,6 @@ MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) { MF = new MachineFunction(&F, TM, NextFnNum++, *this); // Update the set entry. I.first->second.reset(MF); - - if (MFInitializer) - if (MFInitializer->initializeMachineFunction(*MF)) - report_fatal_error("Unable to initialize machine function"); } else { MF = I.first->second.get(); } @@ -290,10 +295,12 @@ void MachineModuleInfo::deleteMachineFunctionFor(Function &F) { } namespace { + /// This pass frees the MachineFunction object associated with a Function. class FreeMachineFunction : public FunctionPass { public: static char ID; + FreeMachineFunction() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -311,14 +318,14 @@ public: return "Free MachineFunction"; } }; -char FreeMachineFunction::ID; + } // end anonymous namespace -namespace llvm { -FunctionPass *createFreeMachineFunctionPass() { +char FreeMachineFunction::ID; + +FunctionPass *llvm::createFreeMachineFunctionPass() { return new FreeMachineFunction(); } -} // end namespace llvm //===- MMI building helpers -----------------------------------------------===// diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 4c81fd91cb829e9cf6bc5b16fafa5af5bd9b0b1e..22d519e5d88fa38a95e99ac1780a0daa31971e79 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -23,7 +23,6 @@ using namespace llvm; // Out of line virtual method. void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} -void MachineModuleInfoWasm::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { typedef std::pair PairTy; diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 581a8ad811497847eb6b7a777518b8cda82cbbe6..fd6b2427891d10e4f3fa1d28003ab96c2392e51a 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -901,7 +901,7 @@ namespace llvm { ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); } } -INITIALIZE_PASS(MachineOutliner, "machine-outliner", +INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) void MachineOutliner::pruneOverlaps(std::vector &CandidateList, @@ -1111,7 +1111,7 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, Builder.CreateRetVoid(); MachineModuleInfo &MMI = getAnalysis(); - MachineFunction &MF = MMI.getMachineFunction(*F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(*F); MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); @@ -1207,7 +1207,7 @@ bool MachineOutliner::runOnModule(Module &M) { return false; MachineModuleInfo &MMI = getAnalysis(); - const TargetSubtargetInfo &STI = MMI.getMachineFunction(*M.begin()) + const TargetSubtargetInfo &STI = MMI.getOrCreateMachineFunction(*M.begin()) .getSubtarget(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); @@ -1216,7 +1216,7 @@ bool MachineOutliner::runOnModule(Module &M) { // Build instruction mappings for each function in the module. for (Function &F : M) { - MachineFunction &MF = MMI.getMachineFunction(F); + MachineFunction &MF = MMI.getOrCreateMachineFunction(F); // Is the function empty? Safe to outline from? if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF)) diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index d06c38cf4ed81a92635f4ef0c26c713cbb604d7f..19e9a50e2c438bf947ab77ef6d099db90d8a2696 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -61,7 +61,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SetVector.h" @@ -69,6 +68,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" @@ -715,13 +715,13 @@ char MachinePipeliner::ID = 0; int MachinePipeliner::NumTries = 0; #endif char &llvm::MachinePipelinerID = MachinePipeliner::ID; -INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner", +INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachinePipeliner, "pipeliner", +INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) /// The "main" function for implementing Swing Modulo Scheduling. diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp index 71ad4e6aa7f520ab4a4eb50b080a69fe20dd27a2..1e74104e89edd4d292de7208c2dc51dd8217bc94 100644 --- a/lib/CodeGen/MachineRegionInfo.cpp +++ b/lib/CodeGen/MachineRegionInfo.cpp @@ -1,7 +1,19 @@ +//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + #include "llvm/CodeGen/MachineRegionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/RegionInfoImpl.h" #include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "machine-region-info" @@ -11,36 +23,29 @@ STATISTIC(numMachineRegions, "The # of machine regions"); STATISTIC(numMachineSimpleRegions, "The # of simple machine regions"); namespace llvm { + template class RegionBase>; template class RegionNodeBase>; template class RegionInfoBase>; -} + +} // end namespace llvm //===----------------------------------------------------------------------===// // MachineRegion implementation -// MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, MachineRegionInfo* RI, MachineDominatorTree *DT, MachineRegion *Parent) : - RegionBase>(Entry, Exit, RI, DT, Parent) { + RegionBase>(Entry, Exit, RI, DT, Parent) {} -} - -MachineRegion::~MachineRegion() { } +MachineRegion::~MachineRegion() = default; //===----------------------------------------------------------------------===// // MachineRegionInfo implementation -// -MachineRegionInfo::MachineRegionInfo() : - RegionInfoBase>() { +MachineRegionInfo::MachineRegionInfo() = default; -} - -MachineRegionInfo::~MachineRegionInfo() { - -} +MachineRegionInfo::~MachineRegionInfo() = default; void MachineRegionInfo::updateStatistics(MachineRegion *R) { ++numMachineRegions; @@ -73,9 +78,7 @@ MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) { initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry()); } -MachineRegionInfoPass::~MachineRegionInfoPass() { - -} +MachineRegionInfoPass::~MachineRegionInfoPass() = default; bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { releaseMemory(); @@ -137,8 +140,9 @@ INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE, // the link time optimization. namespace llvm { - FunctionPass *createMachineRegionInfoPass() { - return new MachineRegionInfoPass(); - } + +FunctionPass *createMachineRegionInfoPass() { + return new MachineRegionInfoPass(); } +} // end namespace llvm diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 128910f8eb2aa35f72915369480b6d83749d03b6..9a92ee279cdc98f9545507ccf6c00dbf8ab9812a 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -18,7 +19,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index fe7b2c8399b15d5492096cf9bcd1a7c7dd1ddfee..01a2286b8d66a958a68f2c8c6c166383d4c1c8b3 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/PriorityQueue.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -30,12 +31,11 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" -#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" @@ -69,7 +69,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace llvm { @@ -191,13 +191,13 @@ char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -532,7 +532,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, // thumb2 size reduction is currently an exception, so the PostMIScheduler // needs to do this. if (FixKillFlags) - Scheduler.fixupKills(&*MBB); + Scheduler.fixupKills(*MBB); } Scheduler.finalizeSchedule(); } @@ -2729,7 +2729,7 @@ void GenericScheduler::registerRoots() { errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; } - if (EnableCyclicPath) { + if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) { Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); checkAcyclicLatency(); } @@ -3233,6 +3233,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) return; + // Keep clustered nodes together. + if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), + Cand.SU == DAG->getNextClusterSucc(), + TryCand, Cand, Cluster)) + return; + // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 5f87b68123f1c99c25c51208b12fbb2a84619e3a..79e3fea3f90c30ca8f3615eb7059e01ae2029dff 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SparseBitVector.h" @@ -33,6 +32,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -173,14 +173,14 @@ namespace { char MachineSinking::ID = 0; char &llvm::MachineSinkingID = MachineSinking::ID; -INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", - "Machine code sinking", false, false) +INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, + "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineSinking, "machine-sink", - "Machine code sinking", false, false) +INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, + "Machine code sinking", false, false) bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB) { diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 998a9645e68bfa2761ee180bbad983a3881bbf39..6c5abc66fba15e944f192db09a1a088048ba4ccb 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" @@ -21,7 +22,6 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -44,12 +44,12 @@ using namespace llvm; char MachineTraceMetrics::ID = 0; char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; -INITIALIZE_PASS_BEGIN(MachineTraceMetrics, - "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, + "Machine Trace Metrics", false, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineTraceMetrics, - "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE, + "Machine Trace Metrics", false, true) MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) { std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index f49232b0f8ad140b6600311326a7ffc8bfd44e2c..e65c256c1bb5a7e441edf078b5d9788dd82807b9 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,7 +23,6 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -36,6 +35,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" @@ -87,7 +88,6 @@ namespace { RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; RegMaskVector regMasks; - RegSet regsLiveInButUnused; SlotIndex lastIndex; @@ -188,8 +188,9 @@ namespace { return Reg < regsReserved.size() && regsReserved.test(Reg); } - bool isAllocatable(unsigned Reg) { - return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); + bool isAllocatable(unsigned Reg) const { + return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) && + !regsReserved.test(Reg); } // Analysis information if available @@ -418,7 +419,6 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { regsDead.clear(); regsKilled.clear(); regMasks.clear(); - regsLiveInButUnused.clear(); MBBInfoMap.clear(); return foundErrors; @@ -526,7 +526,8 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionBefore() { lastIndex = SlotIndex(); - regsReserved = MRI->getReservedRegs(); + regsReserved = MRI->reservedRegsFrozen() ? MRI->getReservedRegs() + : TRI->getReservedRegs(*MF); if (!MF->empty()) markReachable(&MF->front()); @@ -754,11 +755,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { regsLive.insert(*SubRegs); } } - regsLiveInButUnused = regsLive; const MachineFrameInfo &MFI = MF->getFrameInfo(); BitVector PR = MFI.getPristineRegs(*MF); - for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { + for (unsigned I : PR.set_bits()) { for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); @@ -910,17 +910,42 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } } - // Generic loads and stores must have a single MachineMemOperand - // describing that access. - if ((MI->getOpcode() == TargetOpcode::G_LOAD || - MI->getOpcode() == TargetOpcode::G_STORE) && - !MI->hasOneMemOperand()) - report("Generic instruction accessing memory must have one mem operand", - MI); - StringRef ErrorInfo; if (!TII->verifyInstruction(*MI, ErrorInfo)) report(ErrorInfo.data(), MI); + + // Verify properties of various specific instruction types + switch(MI->getOpcode()) { + default: + break; + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: + // Generic loads and stores must have a single MachineMemOperand + // describing that access. + if (!MI->hasOneMemOperand()) + report("Generic instruction accessing memory must have one mem operand", + MI); + break; + case TargetOpcode::STATEPOINT: + if (!MI->getOperand(StatepointOpers::IDPos).isImm() || + !MI->getOperand(StatepointOpers::NBytesPos).isImm() || + !MI->getOperand(StatepointOpers::NCallArgsPos).isImm()) + report("meta operands to STATEPOINT not constant!", MI); + break; + + auto VerifyStackMapConstant = [&](unsigned Offset) { + if (!MI->getOperand(Offset).isImm() || + MI->getOperand(Offset).getImm() != StackMaps::ConstantOp || + !MI->getOperand(Offset + 1).isImm()) + report("stack map constant to STATEPOINT not well formed!", MI); + }; + const unsigned VarStart = StatepointOpers(MI).getVarIdx(); + VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset); + VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset); + VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset); + + // TODO: verify we have properly encoded deopt arguments + }; } void @@ -1266,8 +1291,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Both use and def operands can read a register. if (MO->readsReg()) { - regsLiveInButUnused.erase(Reg); - if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); @@ -1923,9 +1946,11 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); - // All predecessors must have a live-out value if this is not a - // subregister liverange. - if (!PVNI && LaneMask.none()) { + // All predecessors must have a live-out value. However for a phi + // instruction with subregister intervals + // only one of the subregisters (not necessarily the current one) needs to + // be defined. + if (!PVNI && (LaneMask.none() || !IsPHI) ) { report("Register not marked live out of predecessor", *PI); report_context(LR, Reg, LaneMask); report_context(*VNI); @@ -2030,6 +2055,8 @@ namespace { void MachineVerifier::verifyStackFrame() { unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) + return; SmallVector SPState; SPState.resize(MF->getNumBlockIDs()); @@ -2057,23 +2084,14 @@ void MachineVerifier::verifyStackFrame() { // Update stack state by checking contents of MBB. for (const auto &I : *MBB) { if (I.getOpcode() == FrameSetupOpcode) { - // The first operand of a FrameOpcode should be i32. - int Size = I.getOperand(0).getImm(); - assert(Size >= 0 && - "Value should be non-negative in FrameSetup and FrameDestroy.\n"); - if (BBState.ExitIsSetup) report("FrameSetup is after another FrameSetup", &I); - BBState.ExitValue -= Size; + BBState.ExitValue -= TII->getFrameTotalSize(I); BBState.ExitIsSetup = true; } if (I.getOpcode() == FrameDestroyOpcode) { - // The first operand of a FrameOpcode should be i32. - int Size = I.getOperand(0).getImm(); - assert(Size >= 0 && - "Value should be non-negative in FrameSetup and FrameDestroy.\n"); - + int Size = TII->getFrameTotalSize(I); if (!BBState.ExitIsSetup) report("FrameDestroy is not after a FrameSetup", &I); int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 2a8531f337a0f38b68723c2e9037730cf87ef45a..f7aeb4204c5bb45c7797e1fde328eca1a723d5b7 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -12,18 +12,18 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "phi-opt" +#define DEBUG_TYPE "opt-phis" STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); @@ -59,7 +59,7 @@ namespace { char OptimizePHIs::ID = 0; char &llvm::OptimizePHIsID = OptimizePHIs::ID; -INITIALIZE_PASS(OptimizePHIs, "opt-phis", +INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE, "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c67a25b888bff1034c88425531d9441771c90e44..9c898fa40d7e7673ec06d1652bf53a5c100fa758 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -34,7 +34,7 @@ #include using namespace llvm; -#define DEBUG_TYPE "phielim" +#define DEBUG_TYPE "phi-node-elimination" static cl::opt DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), @@ -112,11 +112,11 @@ STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; char& llvm::PHIEliminationID = PHIElimination::ID; -INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination", +INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE, "Eliminate PHI nodes for register allocation", false, false) INITIALIZE_PASS_DEPENDENCY(LiveVariables) -INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", +INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE, "Eliminate PHI nodes for register allocation", false, false) void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp index 00e72971a01e8c92639c0ca0d7bfef6c9951d228..513e82716564ee74e1ac4a1743fe9b45871dd56e 100644 --- a/lib/CodeGen/PatchableFunction.cpp +++ b/lib/CodeGen/PatchableFunction.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 6d643457e9a9610008af9dd27f69fc8a5ef9967c..da8fac6d3834a9486c1d7fd029baf5e067f0eba5 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -66,7 +66,6 @@ // C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -79,6 +78,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 5bc5f7524dbfb3862aeb89cc16129c3b70d48097..425a59dc03752d82362f02b998bc3c87d933bdfe 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -27,9 +27,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 61dccdde8f1dc8c3e3123305377999c2cbb9875c..f2249f9e37e0fcdd37742662587519929866f4e0 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -200,7 +200,7 @@ namespace { char &llvm::PostRASchedulerID = PostRAScheduler::ID; -INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", +INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE, "Post RA top-down list latency scheduler", false, false) SchedulePostRATDList::SchedulePostRATDList( @@ -367,7 +367,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.fixupKills(&MBB); + Scheduler.fixupKills(MBB); } return true; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index d27ea2f51867a60353328703ee901bd47dd338df..0118580a626afdb18c35dca0fe25bdb992ac8b08 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -20,7 +20,7 @@ using namespace llvm; -#define DEBUG_TYPE "processimplicitdefs" +#define DEBUG_TYPE "processimpdefs" namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def @@ -51,9 +51,7 @@ public: char ProcessImplicitDefs::ID = 0; char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; -INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", - "Process Implicit Definitions", false, false) -INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", +INITIALIZE_PASS(ProcessImplicitDefs, DEBUG_TYPE, "Process Implicit Definitions", false, false) void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 9f608957ca2ab9bde90542c45b2be503b9b1f352..e9f8d43fe64332cae89d0368111374939445dc59 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -45,7 +45,7 @@ using namespace llvm; -#define DEBUG_TYPE "pei" +#define DEBUG_TYPE "prologepilog" typedef SmallVector MBBVector; static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, @@ -54,25 +54,12 @@ static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks); -static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS); - namespace { class PEI : public MachineFunctionPass { public: static char ID; - explicit PEI(const TargetMachine *TM = nullptr) : MachineFunctionPass(ID) { + PEI() : MachineFunctionPass(ID) { initializePEIPass(*PassRegistry::getPassRegistry()); - - if (TM && (!TM->usesPhysRegsForPEI())) { - SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, - unsigned &, unsigned &, const MBBVector &, - const MBBVector &) {}; - ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {}; - } else { - SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; - ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs; - UsesCalleeSaves = true; - } } void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -95,7 +82,7 @@ private: const MBBVector &SaveBlocks, const MBBVector &RestoreBlocks)> SpillCalleeSavedRegisters; - std::function + std::function ScavengeFrameVirtualRegs; bool UsesCalleeSaves = false; @@ -140,21 +127,19 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), cl::desc("Warn for stack size bigger than the given" " number")); -INITIALIZE_TM_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", - false, false) +INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, + false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_TM_PASS_END(PEI, "prologepilog", - "Prologue/Epilogue Insertion & Frame Finalization", - false, false) +INITIALIZE_PASS_END(PEI, DEBUG_TYPE, + "Prologue/Epilogue Insertion & Frame Finalization", false, + false) -MachineFunctionPass * -llvm::createPrologEpilogInserterPass(const TargetMachine *TM) { - return new PEI(TM); +MachineFunctionPass *llvm::createPrologEpilogInserterPass() { + return new PEI(); } -STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); @@ -174,6 +159,20 @@ typedef SmallSetVector StackObjSet; /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { + if (!SpillCalleeSavedRegisters) { + const TargetMachine &TM = Fn.getTarget(); + if (!TM.usesPhysRegsForPEI()) { + SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, + unsigned &, unsigned &, const MBBVector &, + const MBBVector &) {}; + ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {}; + } else { + SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; + ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs; + UsesCalleeSaves = true; + } + } + const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); @@ -220,7 +219,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { - ScavengeFrameVirtualRegs(Fn, RS); + ScavengeFrameVirtualRegs(Fn, *RS); // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); @@ -265,11 +264,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { std::vector FrameSDOps; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) - if (I->getOpcode() == FrameSetupOpcode || - I->getOpcode() == FrameDestroyOpcode) { - assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" - " instructions should have a single immediate argument!"); - unsigned Size = I->getOperand(0).getImm(); + if (TII.isFrameInstr(*I)) { + unsigned Size = TII.getFrameSize(*I); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; AdjustsStack = true; FrameSDOps.push_back(I); @@ -280,6 +276,9 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) { AdjustsStack = true; } + assert(!MFI.isMaxCallFrameSizeComputed() || + (MFI.getMaxCallFrameSize() == MaxCallFrameSize && + MFI.adjustsStack() == AdjustsStack)); MFI.setAdjustsStack(AdjustsStack); MFI.setMaxCallFrameSize(MaxCallFrameSize); @@ -376,22 +375,22 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F, FixedSlot->Reg != Reg) ++FixedSlot; + unsigned Size = RegInfo->getSpillSize(*RC); if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); + unsigned Align = RegInfo->getSpillAlignment(*RC); unsigned StackAlign = TFI->getStackAlignment(); // We may not be able to satisfy the desired alignment specification of // the TargetRegisterClass if the stack alignment is smaller. Use the // min. Align = std::min(Align, StackAlign); - FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true); + FrameIdx = MFI.CreateStackObject(Size, Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = - MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset); } CS.setFrameIdx(FrameIdx); @@ -448,12 +447,13 @@ static void updateLiveness(MachineFunction &MF) { const std::vector &CSI = MFI.getCalleeSavedInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { for (MachineBasicBlock *MBB : Visited) { MCPhysReg Reg = CSI[i].getReg(); // Add the callee-saved register as live-in. // It's killed at the spill. - if (!MBB->isLiveIn(Reg)) + if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) MBB->addLiveIn(Reg); } } @@ -764,6 +764,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } else if (MaxCSFrameIndex >= MinCSFrameIndex) { // Be careful about underflow in comparisons agains MinCSFrameIndex. for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) { + if (MFI.isDeadObjectIndex(i)) + continue; + unsigned Align = MFI.getObjectAlignment(i); // Adjust to alignment boundary Offset = alignTo(Offset, Align, Skew); @@ -1049,8 +1052,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); - unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); - unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); if (RS && FrameIndexEliminationScavenging) RS->enterBasicBlock(*BB); @@ -1059,11 +1060,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == FrameSetupOpcode || - I->getOpcode() == FrameDestroyOpcode) { - InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); + if (TII.isFrameInstr(*I)) { + InsideCallSequence = TII.isFrameSetup(*I); SPAdj += TII.getSPAdjust(*I); - I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); continue; } @@ -1151,92 +1150,3 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, RS->forward(MI); } } - -/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers -/// with physical registers. Use the register scavenger to find an -/// appropriate register to use. -/// -/// FIXME: Iterating over the instruction stream is unnecessary. We can simply -/// iterate over the vreg use list, which at this point only contains machine -/// operands for which eliminateFrameIndex need a new scratch reg. -static void -doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) { - // Run through the instructions and find any virtual registers. - MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) { - RS->enterBasicBlock(MBB); - - int SPAdj = 0; - - // The instruction stream may change in the loop, so check MBB.end() - // directly. - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - // We might end up here again with a NULL iterator if we scavenged a - // register for which we inserted spill code for definition by what was - // originally the first instruction in MBB. - if (I == MachineBasicBlock::iterator(nullptr)) - I = MBB.begin(); - - const MachineInstr &MI = *I; - MachineBasicBlock::iterator J = std::next(I); - MachineBasicBlock::iterator P = - I == MBB.begin() ? MachineBasicBlock::iterator(nullptr) - : std::prev(I); - - // RS should process this instruction before we might scavenge at this - // location. This is because we might be replacing a virtual register - // defined by this instruction, and if so, registers killed by this - // instruction are available, and defined registers are not. - RS->forward(I); - - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - - // When we first encounter a new virtual register, it - // must be a definition. - assert(MO.isDef() && "frame index virtual missing def!"); - // Scavenge a new scratch register - const TargetRegisterClass *RC = MRI.getRegClass(Reg); - unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); - - ++NumScavengedRegs; - - // Replace this reference to the virtual register with the - // scratch register. - assert(ScratchReg && "Missing scratch register!"); - MRI.replaceRegWith(Reg, ScratchReg); - - // Because this instruction was processed by the RS before this - // register was allocated, make sure that the RS now records the - // register as being used. - RS->setRegUsed(ScratchReg); - } - - // If the scavenger needed to use one of its spill slots, the - // spill code will have been inserted in between I and J. This is a - // problem because we need the spill code before I: Move I to just - // prior to J. - if (I != std::prev(J)) { - MBB.splice(J, &MBB, I); - - // Before we move I, we need to prepare the RS to visit I again. - // Specifically, RS will assert if it sees uses of registers that - // it believes are undefined. Because we have already processed - // register kills in I, when it visits I again, it will believe that - // those registers are undefined. To avoid this situation, unprocess - // the instruction I. - assert(RS->getCurrentPosition() == I && - "The register scavenger has an unexpected position"); - I = P; - RS->unprocess(P); - } else - ++I; - } - } - - MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); -} diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index fb49a934431c455ea180bce97cfe967fce4a7647..a7b7a9f8ab1529c39ffa66d4ecafe058e854b9bd 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -21,13 +21,12 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index a87fed3a687e1a0d9a09fcc7679ece3b4192ec74..774306154a894b85a109b45eb583476b9ab23d78 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "LiveDebugVariables.h" #include "RegAllocBase.h" @@ -28,6 +27,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/PassAnalysisSupport.h" @@ -58,8 +58,9 @@ namespace { /// whenever a register is unavailable. This is not practical in production but /// provides a useful baseline both for measuring other allocators and comparing /// the speed of the basic algorithm against other styles of allocators. -class RABasic : public MachineFunctionPass, public RegAllocBase -{ +class RABasic : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { // context MachineFunction *MF; @@ -72,6 +73,9 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // selectOrSplit(). BitVector UsableRegs; + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + public: RABasic(); @@ -121,17 +125,46 @@ char RABasic::ID = 0; } // end anonymous namespace +char &llvm::RABasicID = RABasic::ID; + +INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) +INITIALIZE_PASS_DEPENDENCY(MachineScheduler) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) +INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false, + false) + +bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (VRM->hasPhys(VirtReg)) { + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) { + if (!VRM->hasPhys(VirtReg)) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + enqueue(&LI); +} + RABasic::RABasic(): MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); - initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); } void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { @@ -200,7 +233,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, Matrix->unassign(Spill); // Spill the extracted interval. - LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); } return true; @@ -259,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats); + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index fd759bc372b255819256a668f3ca314f7f9d1c96..c606b7b83310477bbf8f61f7497a3477343dca62 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -212,8 +212,9 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { return SS; // Already has space allocated? // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); + int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align); // Assign the slot. StackSlotForVirtReg[VirtReg] = FrameIdx; @@ -304,19 +305,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *Var = DBG->getDebugVariable(); - const MDNode *Expr = DBG->getDebugExpression(); - bool IsIndirect = DBG->isIndirectDebugValue(); - uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; - DebugLoc DL = DBG->getDebugLoc(); - assert(cast(Var)->isValidLocationForIntrinsic(DL) && - "Expected inlined-at fields to agree"); - MachineInstr *NewDV = - BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(FI) - .addImm(Offset) - .addMetadata(Var) - .addMetadata(Expr); + MachineInstr *NewDV = buildDbgValueForSpill(*MBB, MI, *DBG, FI); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 06500289c971ab85bbb9e1a520c55ef7adfe7f9f..50d241bff23d1b5d7b16ce1ba287dea5b0027528 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1,4 +1,4 @@ -//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// +//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===// // // The LLVM Compiler Infrastructure // @@ -19,36 +19,63 @@ #include "SpillPlacement.h" #include "Spiller.h" #include "SplitKit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/PassAnalysisSupport.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include #include +#include +#include using namespace llvm; @@ -106,13 +133,14 @@ static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); namespace { + class RAGreedy : public MachineFunctionPass, public RegAllocBase, private LiveRangeEdit::Delegate { // Convenient shortcuts. - typedef std::priority_queue > PQueue; - typedef SmallPtrSet SmallLISet; - typedef SmallSet SmallVirtRegSet; + using PQueue = std::priority_queue>; + using SmallLISet = SmallPtrSet; + using SmallVirtRegSet = SmallSet; // context MachineFunction *MF; @@ -201,12 +229,12 @@ class RAGreedy : public MachineFunctionPass, // RegInfo - Keep additional information about each live range. struct RegInfo { - LiveRangeStage Stage; + LiveRangeStage Stage = RS_New; // Cascade - Eviction loop prevention. See canEvictInterference(). - unsigned Cascade; + unsigned Cascade = 0; - RegInfo() : Stage(RS_New), Cascade(0) {} + RegInfo() = default; }; IndexedMap ExtraRegInfo; @@ -232,10 +260,10 @@ class RAGreedy : public MachineFunctionPass, /// Cost of evicting interference. struct EvictionCost { - unsigned BrokenHints; ///< Total number of broken hints. - float MaxWeight; ///< Maximum spill weight evicted. + unsigned BrokenHints = 0; ///< Total number of broken hints. + float MaxWeight = 0; ///< Maximum spill weight evicted. - EvictionCost(): BrokenHints(0), MaxWeight(0) {} + EvictionCost() = default; bool isMax() const { return BrokenHints == ~0u; } @@ -285,8 +313,7 @@ class RAGreedy : public MachineFunctionPass, // Set B[i] = C for every live bundle where B[i] was NoCand. unsigned getBundles(SmallVectorImpl &B, unsigned C) { unsigned Count = 0; - for (int i = LiveBundles.find_first(); i >= 0; - i = LiveBundles.find_next(i)) + for (unsigned i : LiveBundles.set_bits()) if (B[i] == NoCand) { B[i] = C; Count++; @@ -414,10 +441,12 @@ private: /// Its currently assigned register. /// In case of a physical register Reg == PhysReg. unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} }; - typedef SmallVector HintsInfo; + using HintsInfo = SmallVector; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); void collectHintInfo(unsigned, HintsInfo &); @@ -437,6 +466,7 @@ private: } } }; + } // end anonymous namespace char RAGreedy::ID = 0; @@ -476,7 +506,6 @@ const char *const RAGreedy::StageName[] = { // This helps stabilize decisions based on float comparisons. const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 - FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } @@ -512,7 +541,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } - //===----------------------------------------------------------------------===// // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// @@ -635,7 +663,6 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { return LI; } - //===----------------------------------------------------------------------===// // Direct Assignment //===----------------------------------------------------------------------===// @@ -683,7 +710,6 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, return CheapReg ? CheapReg : PhysReg; } - //===----------------------------------------------------------------------===// // Interference eviction //===----------------------------------------------------------------------===// @@ -955,7 +981,6 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, return BestPhys; } - //===----------------------------------------------------------------------===// // Region Splitting //===----------------------------------------------------------------------===// @@ -1026,7 +1051,6 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, return SpillPlacer->scanActiveBundles(); } - /// addThroughConstraints - Add constraints and links to SpillPlacer from the /// live-through blocks in Blocks. void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, @@ -1084,7 +1108,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { unsigned Visited = 0; #endif - for (;;) { + while (true) { ArrayRef NewBundles = SpillPlacer->getRecentPositive(); // Find new through blocks in the periphery of PrefRegBundles. for (int i = 0, e = NewBundles.size(); i != e; ++i) { @@ -1162,9 +1186,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { } DEBUG({ - for (int i = Cand.LiveBundles.find_first(); i>=0; - i = Cand.LiveBundles.find_next(i)) - dbgs() << " EB#" << i; + for (int i : Cand.LiveBundles.set_bits()) + dbgs() << " EB#" << i; dbgs() << ".\n"; }); return true; @@ -1199,8 +1222,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; - bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; unsigned Ins = 0; if (BI.LiveIn) @@ -1213,8 +1236,8 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { unsigned Number = Cand.ActiveBlocks[i]; - bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; - bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, false)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, true)]; if (!RegIn && !RegOut) continue; if (RegIn && RegOut) { @@ -1266,7 +1289,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned IntvIn = 0, IntvOut = 0; SlotIndex IntfIn, IntfOut; if (BI.LiveIn) { - unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; if (CandIn != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandIn]; IntvIn = Cand.IntvIdx; @@ -1275,7 +1298,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } } if (BI.LiveOut) { - unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; if (CandOut != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandOut]; IntvOut = Cand.IntvIdx; @@ -1315,7 +1338,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned IntvIn = 0, IntvOut = 0; SlotIndex IntfIn, IntfOut; - unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)]; if (CandIn != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandIn]; IntvIn = Cand.IntvIdx; @@ -1323,7 +1346,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, IntfIn = Cand.Intf.first(); } - unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)]; if (CandOut != NoCand) { GlobalSplitCandidate &Cand = GlobalCand[CandOut]; IntvOut = Cand.IntvIdx; @@ -1482,8 +1505,7 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; - for (int i = Cand.LiveBundles.find_first(); i>=0; - i = Cand.LiveBundles.find_next(i)) + for (int i : Cand.LiveBundles.set_bits()) dbgs() << " EB#" << i; dbgs() << ".\n"; }); @@ -1536,7 +1558,6 @@ unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, return 0; } - //===----------------------------------------------------------------------===// // Per-Block Splitting //===----------------------------------------------------------------------===// @@ -1583,7 +1604,6 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } - //===----------------------------------------------------------------------===// // Per-Instruction Splitting //===----------------------------------------------------------------------===// @@ -1667,12 +1687,10 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; } - //===----------------------------------------------------------------------===// // Local Splitting //===----------------------------------------------------------------------===// - /// calcGapWeights - Compute the maximum spill weight that needs to be evicted /// in order to use PhysReg between two entries in SA->UseSlots. /// @@ -1743,7 +1761,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; for (; Gap != NumGaps; ++Gap) { - GapWeight[Gap] = llvm::huge_valf; + GapWeight[Gap] = huge_valf; if (Uses[Gap+1].getBaseIndex() >= I->end) break; } @@ -1849,7 +1867,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = llvm::huge_valf; + GapWeight[RegMaskGaps[i]] = huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1861,7 +1879,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // It is the spill weight that needs to be evicted. float MaxGap = GapWeight[0]; - for (;;) { + while (true) { // Live before/after split? const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; @@ -1884,7 +1902,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Legally, without causing looping? bool Legal = !ProgressRequired || NewGaps < NumGaps; - if (Legal && MaxGap < llvm::huge_valf) { + if (Legal && MaxGap < huge_valf) { // Estimate the new spill weight. Each instruction reads or writes the // register. Conservatively assume there are no read-modify-write // instructions. @@ -2683,6 +2701,7 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, if (Reloads || FoldedReloads || Spills || FoldedSpills) { using namespace ore; + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", L->getStartLoc(), L->getHeader()); if (Spills) diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 3b5964eef55e4960fa563ec0c29279ed5e74412e..e3baff4be4bcf4f6b6ebcf211e3ae0c2efbc4f88 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -29,15 +29,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -49,12 +50,13 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PBQP/Graph.h" +#include "llvm/CodeGen/PBQP/Math.h" #include "llvm/CodeGen/PBQP/Solution.h" #include "llvm/CodeGen/PBQPRAConstraint.h" -#include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" @@ -82,8 +84,8 @@ #include #include #include -#include #include +#include using namespace llvm; @@ -139,13 +141,13 @@ public: } private: - typedef std::map LI2NodeMap; - typedef std::vector Node2LIMap; - typedef std::vector AllowedSet; - typedef std::vector AllowedSetMap; - typedef std::pair RegPair; - typedef std::map CoalesceMap; - typedef std::set RegSet; + using LI2NodeMap = std::map; + using Node2LIMap = std::vector; + using AllowedSet = std::vector; + using AllowedSetMap = std::vector; + using RegPair = std::pair; + using CoalesceMap = std::map; + using RegSet = std::set; char *customPassID; @@ -212,12 +214,12 @@ public: /// @brief Add interference edges between overlapping vregs. class Interference : public PBQPRAConstraint { private: - typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; - typedef std::pair IKey; - typedef DenseMap IMatrixCache; - typedef DenseSet DisjointAllowedRegsCache; - typedef std::pair IEdgeKey; - typedef DenseSet IEdgeCache; + using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *; + using IKey = std::pair; + using IMatrixCache = DenseMap; + using DisjointAllowedRegsCache = DenseSet; + using IEdgeKey = std::pair; + using IEdgeCache = DenseSet; bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, @@ -252,8 +254,8 @@ private: // for the fast interference graph construction algorithm. The last is there // to save us from looking up node ids via the VRegToNode map in the graph // metadata. - typedef std::tuple - IntervalInfo; + using IntervalInfo = + std::tuple; static SlotIndex getStartPoint(const IntervalInfo &I) { return std::get<0>(I)->segments[std::get<1>(I)].start; @@ -320,9 +322,10 @@ public: // Cache known disjoint allowed registers pairs DisjointAllowedRegsCache D; - typedef std::set IntervalSet; - typedef std::priority_queue, - decltype(&lowestStartPoint)> IntervalQueue; + using IntervalSet = std::set; + using IntervalQueue = + std::priority_queue, + decltype(&lowestStartPoint)>; IntervalSet Active(lowestEndPoint); IntervalQueue Inactive(lowestStartPoint); @@ -658,7 +661,6 @@ void RegAllocPBQP::spillVReg(unsigned VReg, SmallVectorImpl &NewIntervals, MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, Spiller &VRegSpiller) { - VRegsToAlloc.erase(VReg); LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM, nullptr, &DeadRemats); @@ -736,7 +738,15 @@ void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, if (PReg == 0) { const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); - PReg = RC.getRawAllocationOrder(MF).front(); + const ArrayRef RawPRegOrder = RC.getRawAllocationOrder(MF); + for (unsigned CandidateReg : RawPRegOrder) { + if (!VRM.getRegInfo().isReserved(CandidateReg)) { + PReg = CandidateReg; + break; + } + } + assert(PReg && + "No un-reserved physical registers in this register class"); } VRM.assignVirt2Phys(LI.reg, PReg); diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 82a3bd9a0bd174d48d28ce137c620fde1e53dead..956dec39fc381e1a09d78deb2bc7f64d491d4aaa 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index bf44ee8453b613ed129338d439b0e99c920bc6aa..7b3a5d5c5ff7f74fb3a5097cd4623446b5086d67 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -2666,11 +2666,17 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { // Look for values being erased. bool DidPrune = false; for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { - if (Vals[i].Resolution != CR_Erase) + // We should trigger in all cases in which eraseInstrs() does something. + // match what eraseInstrs() is doing, print a message so + if (Vals[i].Resolution != CR_Erase && + (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef || + !Vals[i].Pruned)) continue; // Check subranges at the point where the copy will be removed. SlotIndex Def = LR.getValNumInfo(i)->def; + // Print message so mismatches with eraseInstrs() can be diagnosed. + DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n'); for (LiveInterval::SubRange &S : LI.subranges()) { LiveQueryResult Q = S.Query(Def); @@ -3214,7 +3220,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { CurrList(WorkList.begin() + PrevSize, WorkList.end()); if (copyCoalesceWorkList(CurrList)) WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), - (MachineInstr*)nullptr), WorkList.end()); + nullptr), WorkList.end()); } void RegisterCoalescer::coalesceLocals() { diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index c726edc88b41c1de86eae0d4f2f3665c1162d428..88e0a3b58940eae446d2761a4ddfc109c96cde7d 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -12,9 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -24,7 +25,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 6392136fa290993a1b133d0cb6950735f686d7a9..b3326528807f3168a095a86500d31895dbe722b8 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -15,21 +15,27 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterScavenging.h" + #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/PassSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include #include #include #include @@ -39,6 +45,8 @@ using namespace llvm; #define DEBUG_TYPE "reg-scavenging" +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); + void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { LiveUnits.addRegMasked(Reg, LaneMask); } @@ -62,10 +70,9 @@ void RegScavenger::init(MachineBasicBlock &MBB) { } this->MBB = &MBB; - for (SmallVectorImpl::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { - I->Reg = 0; - I->Restore = nullptr; + for (ScavengedInfo &SI : Scavenged) { + SI.Reg = 0; + SI.Restore = nullptr; } Tracking = false; @@ -254,6 +261,14 @@ void RegScavenger::backward() { const MachineInstr &MI = *MBBI; LiveUnits.stepBackward(MI); + // Expire scavenge spill frameindex uses. + for (ScavengedInfo &I : Scavenged) { + if (I.Restore == &MI) { + I.Reg = 0; + I.Restore = nullptr; + } + } + if (MBBI == MBB->begin()) { MBBI = MachineBasicBlock::iterator(nullptr); Tracking = false; @@ -350,6 +365,78 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, return Survivor; } +/// Given the bitvector \p Available of free register units at position +/// \p From. Search backwards to find a register that is part of \p +/// Candidates and not used/clobbered until the point \p To. If there is +/// multiple candidates continue searching and pick the one that is not used/ +/// clobbered for the longest time. +/// Returns the register and the earliest position we know it to be free or +/// the position MBB.end() if no register is available. +static std::pair +findSurvivorBackwards(const TargetRegisterInfo &TRI, + MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, + BitVector &Available, BitVector &Candidates) { + bool FoundTo = false; + unsigned Survivor = 0; + MachineBasicBlock::iterator Pos; + MachineBasicBlock &MBB = *From->getParent(); + unsigned InstrLimit = 25; + unsigned InstrCountDown = InstrLimit; + for (MachineBasicBlock::iterator I = From;; --I) { + const MachineInstr &MI = *I; + + // Remove any candidates touched by instruction. + bool FoundVReg = false; + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + Candidates.clearBitsNotInMask(MO.getRegMask()); + continue; + } + if (!MO.isReg() || MO.isUndef() || MO.isDebug()) + continue; + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + FoundVReg = true; + } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + } + + if (I == To) { + // If one of the available registers survived this long take it. + Available &= Candidates; + int Reg = Available.find_first(); + if (Reg != -1) + return std::make_pair(Reg, MBB.end()); + // Otherwise we will continue up to InstrLimit instructions to find + // the register which is not defined/used for the longest time. + FoundTo = true; + Pos = To; + } + if (FoundTo) { + if (Survivor == 0 || !Candidates.test(Survivor)) { + int Reg = Candidates.find_first(); + if (Reg == -1) + break; + Survivor = Reg; + } + if (--InstrCountDown == 0) + break; + if (FoundVReg) { + // Keep searching when we find a vreg since the spilled register will + // be usefull for this other vreg as well later. + InstrCountDown = InstrLimit; + Pos = I; + } + if (I == MBB.begin()) + break; + } + } + + return std::make_pair(Survivor, Pos); +} + static unsigned getFrameIndexOperandNum(MachineInstr &MI) { unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -359,44 +446,16 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) { return i; } -unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, - MachineBasicBlock::iterator I, - int SPAdj) { - MachineInstr &MI = *I; - const MachineFunction &MF = *MI.getParent()->getParent(); - // Consider all allocatable registers in the register class initially - BitVector Candidates = TRI->getAllocatableSet(MF, RC); - - // Exclude all the registers being used by the instruction. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) - Candidates.reset(*AI); - } - - // Try to find a register that's unused if there is one, as then we won't - // have to spill. - BitVector Available = getRegsAvailable(RC); - Available &= Candidates; - if (Available.any()) - Candidates = Available; - - // Find the register whose use is furthest away. - MachineBasicBlock::iterator UseMI; - unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - - // If we found an unused register there is no reason to spill it. - if (!isRegUsed(SReg)) { - DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); - return SReg; - } - +RegScavenger::ScavengedInfo & +RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, + MachineBasicBlock::iterator Before, + MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. + const MachineFunction &MF = *Before->getParent()->getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); - unsigned NeedSize = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + unsigned NeedSize = TRI->getSpillSize(RC); + unsigned NeedAlign = TRI->getSpillAlignment(RC); unsigned SI = Scavenged.size(), Diff = std::numeric_limits::max(); int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd(); @@ -431,42 +490,307 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Avoid infinite regress - Scavenged[SI].Reg = SReg; + Scavenged[SI].Reg = Reg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. - if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { - // Spill the scavenged register before I. + if (!TRI->saveScavengerRegister(*MBB, Before, UseMI, &RC, Reg)) { + // Spill the scavenged register before \p Before. int FI = Scavenged[SI].FrameIndex; if (FI < FIB || FI >= FIE) { std::string Msg = std::string("Error while trying to spill ") + - TRI->getName(SReg) + " from class " + TRI->getRegClassName(RC) + + TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) + ": Cannot scavenge register without an emergency spill slot!"; report_fatal_error(Msg.c_str()); } - TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, - RC, TRI); - MachineBasicBlock::iterator II = std::prev(I); + TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex, + &RC, TRI); + MachineBasicBlock::iterator II = std::prev(Before); unsigned FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, - RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex, + &RC, TRI); II = std::prev(UseMI); FIOperandNum = getFrameIndexOperandNum(*II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } + return Scavenged[SI]; +} + +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + MachineInstr &MI = *I; + const MachineFunction &MF = *MI.getParent()->getParent(); + // Consider all allocatable registers in the register class initially + BitVector Candidates = TRI->getAllocatableSet(MF, RC); + + // Exclude all the registers being used by the instruction. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + BitVector Available = getRegsAvailable(RC); + Available &= Candidates; + if (Available.any()) + Candidates = Available; - Scavenged[SI].Restore = &*std::prev(UseMI); + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + + // If we found an unused register there is no reason to spill it. + if (!isRegUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + return SReg; + } - // Doing this here leads to infinite regress. - // Scavenged[SI].Reg = SReg; + ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI); + Scavenged.Restore = &*std::prev(UseMI); DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); return SReg; } + +unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, + MachineBasicBlock::iterator To, + bool RestoreAfter, int SPAdj) { + const MachineBasicBlock &MBB = *To->getParent(); + const MachineFunction &MF = *MBB.getParent(); + // Consider all allocatable registers in the register class initially + BitVector Candidates = TRI->getAllocatableSet(MF, &RC); + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + BitVector Available = getRegsAvailable(&RC); + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + std::pair P = + findSurvivorBackwards(*TRI, MBBI, To, Available, Candidates); + unsigned Reg = P.first; + MachineBasicBlock::iterator SpillBefore = P.second; + assert(Reg != 0 && "No register left to scavenge!"); + // Found an available register? + if (SpillBefore != MBB.end()) { + MachineBasicBlock::iterator ReloadAfter = + RestoreAfter ? std::next(MBBI) : MBBI; + MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter); + DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n'); + ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore); + Scavenged.Restore = &*std::prev(SpillBefore); + LiveUnits.removeReg(Reg); + DEBUG(dbgs() << "Scavenged register with spill: " << PrintReg(Reg, TRI) + << " until " << *SpillBefore); + } else { + DEBUG(dbgs() << "Scavenged free register: " << PrintReg(Reg, TRI) << '\n'); + } + return Reg; +} + +/// Allocate a register for the virtual register \p VReg. The last use of +/// \p VReg is around the current position of the register scavenger \p RS. +/// \p ReserveAfter controls whether the scavenged register needs to be reserved +/// after the current instruction, otherwise it will only be reserved before the +/// current instruction. +static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, + unsigned VReg, bool ReserveAfter) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); +#ifndef NDEBUG + // Verify that all definitions and uses are in the same basic block. + const MachineBasicBlock *CommonMBB = nullptr; + // Real definition for the reg, re-definitions are not considered. + const MachineInstr *RealDef = nullptr; + for (MachineOperand &MO : MRI.reg_nodbg_operands(VReg)) { + MachineBasicBlock *MBB = MO.getParent()->getParent(); + if (CommonMBB == nullptr) + CommonMBB = MBB; + assert(MBB == CommonMBB && "All defs+uses must be in the same basic block"); + if (MO.isDef()) { + const MachineInstr &MI = *MO.getParent(); + if (!MI.readsRegister(VReg, &TRI)) { + assert(!RealDef || RealDef == &MI && + "Can have at most one definition which is not a redefinition"); + RealDef = &MI; + } + } + } + assert(RealDef != nullptr && "Must have at least 1 Def"); +#endif + + // We should only have one definition of the register. However to accomodate + // the requirements of two address code we also allow definitions in + // subsequent instructions provided they also read the register. That way + // we get a single contiguous lifetime. + // + // Definitions in MRI.def_begin() are unordered, search for the first. + MachineRegisterInfo::def_iterator FirstDef = + std::find_if(MRI.def_begin(VReg), MRI.def_end(), + [VReg, &TRI](const MachineOperand &MO) { + return !MO.getParent()->readsRegister(VReg, &TRI); + }); + assert(FirstDef != MRI.def_end() && + "Must have one definition that does not redefine vreg"); + MachineInstr &DefMI = *FirstDef->getParent(); + + // The register scavenger will report a free register inserting an emergency + // spill/reload if necessary. + int SPAdj = 0; + const TargetRegisterClass &RC = *MRI.getRegClass(VReg); + unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(), + ReserveAfter, SPAdj); + MRI.replaceRegWith(VReg, SReg); + ++NumScavengedRegs; + return SReg; +} + +/// Allocate (scavenge) vregs inside a single basic block. +/// Returns true if the target spill callback created new vregs and a 2nd pass +/// is necessary. +static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI, + RegScavenger &RS, + MachineBasicBlock &MBB) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + RS.enterBasicBlockEnd(MBB); + + unsigned InitialNumVirtRegs = MRI.getNumVirtRegs(); + bool NextInstructionReadsVReg = false; + for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) { + --I; + // Move RegScavenger to the position between *I and *std::next(I). + RS.backward(I); + + // Look for unassigned vregs in the uses of *std::next(I). + if (NextInstructionReadsVReg) { + MachineBasicBlock::iterator N = std::next(I); + const MachineInstr &NMI = *N; + for (const MachineOperand &MO : NMI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + // We only care about virtual registers and ignore virtual registers + // created by the target callbacks in the process (those will be handled + // in a scavenging round). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + if (!MO.readsReg()) + continue; + + unsigned SReg = scavengeVReg(MRI, RS, Reg, true); + N->addRegisterKilled(SReg, &TRI, false); + RS.setRegUsed(SReg); + } + } + + // Look for unassigned vregs in the defs of *I. + NextInstructionReadsVReg = false; + const MachineInstr &MI = *I; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + // Only vregs, no newly created vregs (see above). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs) + continue; + // We have to look at all operands anyway so we can precalculate here + // whether there is a reading operand. This allows use to skip the use + // step in the next iteration if there was none. + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + if (MO.readsReg()) { + NextInstructionReadsVReg = true; + } + if (MO.isDef()) { + unsigned SReg = scavengeVReg(MRI, RS, Reg, false); + I->addRegisterDead(SReg, &TRI, false); + } + } + } +#ifndef NDEBUG + for (const MachineOperand &MO : MBB.front().operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + assert(!MO.isInternalRead() && "Cannot assign inside bundles"); + assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses"); + assert(!MO.readsReg() && "Vreg use in first instruction not allowed"); + } +#endif + + return MRI.getNumVirtRegs() != InitialNumVirtRegs; +} + +void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) { + // FIXME: Iterating over the instruction stream is unnecessary. We can simply + // iterate over the vreg use list, which at this point only contains machine + // operands for which eliminateFrameIndex need a new scratch reg. + MachineRegisterInfo &MRI = MF.getRegInfo(); + // Shortcut. + if (MRI.getNumVirtRegs() == 0) { + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + return; + } + + // Run through the instructions and find any virtual registers. + for (MachineBasicBlock &MBB : MF) { + if (MBB.empty()) + continue; + + bool Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + if (Again) { + DEBUG(dbgs() << "Warning: Required two scavenging passes for block " + << MBB.getName() << '\n'); + Again = scavengeFrameVirtualRegsInBlock(MRI, RS, MBB); + // The target required a 2nd run (because it created new vregs while + // spilling). Refuse to do another pass to keep compiletime in check. + if (Again) + report_fatal_error("Incomplete scavenging after 2nd pass"); + } + } + + MRI.clearVirtRegs(); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); +} + +namespace { +/// This class runs register scavenging independ of the PrologEpilogInserter. +/// This is used in for testing. +class ScavengerTest : public MachineFunctionPass { +public: + static char ID; + ScavengerTest() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetFrameLowering &TFL = *STI.getFrameLowering(); + + RegScavenger RS; + // Let's hope that calling those outside of PrologEpilogueInserter works + // well enough to initialize the scavenger with some emergency spillslots + // for the target. + BitVector SavedRegs; + TFL.determineCalleeSaves(MF, SavedRegs, &RS); + TFL.processFunctionBeforeFrameFinalized(MF, &RS); + + // Let's scavenge the current function + scavengeFrameVirtualRegs(MF, RS); + return true; + } +}; +char ScavengerTest::ID; + +} // end anonymous namespace + +INITIALIZE_PASS(ScavengerTest, "scavenger-test", + "Scavenge virtual registers inside basic blocks", false, false) diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp index 66f196678dea50b1345d2a5a3ce5823bb27457a0..d7a3ac0808230c33c0317c5968f16d52ca358518 100644 --- a/lib/CodeGen/RegisterUsageInfo.cpp +++ b/lib/CodeGen/RegisterUsageInfo.cpp @@ -12,11 +12,22 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include using namespace llvm; @@ -63,7 +74,7 @@ PhysicalRegisterUsageInfo::getRegUsageInfo(const Function *FP) { void PhysicalRegisterUsageInfo::print(raw_ostream &OS, const Module *M) const { const TargetRegisterInfo *TRI; - typedef std::pair> FuncPtrRegMaskPair; + using FuncPtrRegMaskPair = std::pair>; SmallVector FPRMPairVector; diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp index 2f7ee8bf414cc1a455baf68effdf58a1409e8d91..d2eff950d861a9b6156e444a0f19738fac9d51ce 100644 --- a/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/lib/CodeGen/RenameIndependentSubregs.cpp @@ -32,10 +32,10 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; @@ -112,11 +112,11 @@ char RenameIndependentSubregs::ID; char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID; -INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs", +INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs", +INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { @@ -212,7 +212,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, const SmallVectorImpl &SubRangeInfos, const SmallVectorImpl &Intervals) const { const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); - unsigned Reg = Intervals[0]->reg;; + unsigned Reg = Intervals[0]->reg; for (MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(Reg), E = MRI->reg_nodbg_end(); I != E; ) { MachineOperand &MO = *I++; @@ -243,6 +243,11 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes, unsigned VReg = Intervals[ID]->reg; MO.setReg(VReg); + if (MO.isTied()) { + /// Undef use operands are not tracked in the equivalence class but need + /// to be update if they are tied. + MO.getParent()->substituteRegister(Reg, VReg, 0, TRI); + } } // TODO: We could attempt to recompute new register classes while visiting // the operands: Some of the split register may be fine with less constraint diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp index 3e259927ac5cba7c170ec3616e3909f490744476..01b3db43b2836014fba5e4ffaa5ea0b1215b3291 100644 --- a/lib/CodeGen/ResetMachineFunctionPass.cpp +++ b/lib/CodeGen/ResetMachineFunctionPass.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp index fa68411284e77d0a61547692ed7fcc05c7509bd5..8584a9b7c897327d286ae958f95312c42e370ec2 100644 --- a/lib/CodeGen/SafeStack.cpp +++ b/lib/CodeGen/SafeStack.cpp @@ -19,10 +19,12 @@ #include "SafeStackLayout.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" @@ -50,7 +52,7 @@ using namespace llvm; using namespace llvm::safestack; -#define DEBUG_TYPE "safestack" +#define DEBUG_TYPE "safe-stack" namespace llvm { @@ -92,11 +94,11 @@ public: /// determined statically), and the unsafe stack, which contains all /// local variables that are accessed in ways that we can't prove to /// be safe. -class SafeStack : public FunctionPass { - const TargetMachine *TM; - const TargetLoweringBase *TL; - const DataLayout *DL; - ScalarEvolution *SE; +class SafeStack { + Function &F; + const TargetLoweringBase &TL; + const DataLayout &DL; + ScalarEvolution &SE; Type *StackPtrTy; Type *IntPtrTy; @@ -171,33 +173,21 @@ class SafeStack : public FunctionPass { uint64_t AllocaSize); public: - static char ID; // Pass identification, replacement for typeid. - SafeStack(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) { - initializeSafeStackPass(*PassRegistry::getPassRegistry()); - } - SafeStack() : SafeStack(nullptr) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - } - - bool doInitialization(Module &M) override { - DL = &M.getDataLayout(); - - StackPtrTy = Type::getInt8PtrTy(M.getContext()); - IntPtrTy = DL->getIntPtrType(M.getContext()); - Int32Ty = Type::getInt32Ty(M.getContext()); - Int8Ty = Type::getInt8Ty(M.getContext()); - - return false; - } - - bool runOnFunction(Function &F) override; -}; // class SafeStack + SafeStack(Function &F, const TargetLoweringBase &TL, const DataLayout &DL, + ScalarEvolution &SE) + : F(F), TL(TL), DL(DL), SE(SE), + StackPtrTy(Type::getInt8PtrTy(F.getContext())), + IntPtrTy(DL.getIntPtrType(F.getContext())), + Int32Ty(Type::getInt32Ty(F.getContext())), + Int8Ty(Type::getInt8Ty(F.getContext())) {} + + // Run the transformation on the associated function. + // Returns whether the function was changed. + bool run(); +}; uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { - uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType()); + uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); if (AI->isArrayAllocation()) { auto C = dyn_cast(AI->getArraySize()); if (!C) @@ -209,11 +199,11 @@ uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) { bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, const Value *AllocaPtr, uint64_t AllocaSize) { - AllocaOffsetRewriter Rewriter(*SE, AllocaPtr); - const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr)); + AllocaOffsetRewriter Rewriter(SE, AllocaPtr); + const SCEV *Expr = Rewriter.visit(SE.getSCEV(Addr)); - uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType()); - ConstantRange AccessStartRange = SE->getUnsignedRange(Expr); + uint64_t BitWidth = SE.getTypeSizeInBits(Expr->getType()); + ConstantRange AccessStartRange = SE.getUnsignedRange(Expr); ConstantRange SizeRange = ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize)); ConstantRange AccessRange = AccessStartRange.add(SizeRange); @@ -226,8 +216,8 @@ bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize, << *AllocaPtr << "\n" << " Access " << *Addr << "\n" << " SCEV " << *Expr - << " U: " << SE->getUnsignedRange(Expr) - << ", S: " << SE->getSignedRange(Expr) << "\n" + << " U: " << SE.getUnsignedRange(Expr) + << ", S: " << SE.getSignedRange(Expr) << "\n" << " Range " << AccessRange << "\n" << " AllocaRange " << AllocaRange << "\n" << " " << (Safe ? "safe" : "unsafe") << "\n"); @@ -266,7 +256,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { switch (I->getOpcode()) { case Instruction::Load: { - if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr, + if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getType()), AllocaPtr, AllocaSize)) return false; break; @@ -282,7 +272,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { return false; } - if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()), + if (!IsAccessSafe(UI, DL.getTypeStoreSize(I->getOperand(0)->getType()), AllocaPtr, AllocaSize)) return false; break; @@ -343,7 +333,7 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) { } Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) { - Value *StackGuardVar = TL->getIRStackGuard(IRB); + Value *StackGuardVar = TL.getIRStackGuard(IRB); if (!StackGuardVar) StackGuardVar = F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy); @@ -390,7 +380,7 @@ void SafeStack::findInsts(Function &F, if (!Arg.hasByValAttr()) continue; uint64_t Size = - DL->getTypeStoreSize(Arg.getType()->getPointerElementType()); + DL.getTypeStoreSize(Arg.getType()->getPointerElementType()); if (IsSafeStackAlloca(&Arg, Size)) continue; @@ -476,19 +466,19 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (StackGuardSlot) { Type *Ty = StackGuardSlot->getAllocatedType(); unsigned Align = - std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment()); + std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment()); SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot), Align, SSC.getFullLiveRange()); } for (Argument *Arg : ByValArguments) { Type *Ty = Arg->getType()->getPointerElementType(); - uint64_t Size = DL->getTypeStoreSize(Ty); + uint64_t Size = DL.getTypeStoreSize(Ty); if (Size == 0) Size = 1; // Don't create zero-sized stack objects. // Ensure the object is properly aligned. - unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), + unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment()); SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange()); } @@ -501,7 +491,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // Ensure the object is properly aligned. unsigned Align = - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()); + std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()); SSL.addObject(AI, Size, Align, SSC.getLiveRange(AI)); } @@ -539,7 +529,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( unsigned Offset = SSL.getObjectOffset(Arg); Type *Ty = Arg->getType()->getPointerElementType(); - uint64_t Size = DL->getTypeStoreSize(Ty); + uint64_t Size = DL.getTypeStoreSize(Ty); if (Size == 0) Size = 1; // Don't create zero-sized stack objects. @@ -550,7 +540,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( // Replace alloc with the new location. replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB, - /*Deref=*/true, -Offset); + /*Deref=*/false, -Offset); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast(NewArg)->getNextNode()); IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); @@ -565,7 +555,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack( if (Size == 0) Size = 1; // Don't create zero-sized stack objects. - replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -Offset); + replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/false, -Offset); replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset); // Replace uses of the alloca with the new location. @@ -630,7 +620,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( ArraySize = IRB.CreateIntCast(ArraySize, IntPtrTy, false); Type *Ty = AI->getAllocatedType(); - uint64_t TySize = DL->getTypeAllocSize(Ty); + uint64_t TySize = DL.getTypeAllocSize(Ty); Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize)); Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy); @@ -638,7 +628,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( // Align the SP value to satisfy the AllocaInst, type and stack alignments. unsigned Align = std::max( - std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI->getAlignment()), + std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()), (unsigned)StackAlignment); assert(isPowerOf2_32(Align)); @@ -655,7 +645,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (AI->hasName() && isa(NewAI)) NewAI->takeName(AI); - replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/false); AI->replaceAllUsesWith(NewAI); AI->eraseFromParent(); } @@ -685,25 +675,10 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( } } -bool SafeStack::runOnFunction(Function &F) { - DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); - - if (!F.hasFnAttribute(Attribute::SafeStack)) { - DEBUG(dbgs() << "[SafeStack] safestack is not requested" - " for this function\n"); - return false; - } - - if (F.isDeclaration()) { - DEBUG(dbgs() << "[SafeStack] function definition" - " is not available\n"); - return false; - } - - if (!TM) - report_fatal_error("Target machine is required"); - TL = TM->getSubtargetImpl(F)->getTargetLowering(); - SE = &getAnalysis().getSE(); +bool SafeStack::run() { + assert(F.hasFnAttribute(Attribute::SafeStack) && + "Can't run SafeStack on a function without the attribute"); + assert(!F.isDeclaration() && "Can't run SafeStack on a function declaration"); ++NumFunctions; @@ -736,7 +711,7 @@ bool SafeStack::runOnFunction(Function &F) { ++NumUnsafeStackRestorePointsFunctions; IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt()); - UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB); + UnsafeStackPtr = TL.getSafeStackPointerLocation(IRB); // Load the current stack pointer (we'll also use it as a base pointer). // FIXME: use a dedicated register for it ? @@ -788,14 +763,67 @@ bool SafeStack::runOnFunction(Function &F) { return true; } +class SafeStackLegacyPass : public FunctionPass { + const TargetMachine *TM; + +public: + static char ID; // Pass identification, replacement for typeid.. + SafeStackLegacyPass() : FunctionPass(ID), TM(nullptr) { + initializeSafeStackLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + bool runOnFunction(Function &F) override { + DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n"); + + if (!F.hasFnAttribute(Attribute::SafeStack)) { + DEBUG(dbgs() << "[SafeStack] safestack is not requested" + " for this function\n"); + return false; + } + + if (F.isDeclaration()) { + DEBUG(dbgs() << "[SafeStack] function definition" + " is not available\n"); + return false; + } + + TM = &getAnalysis().getTM(); + auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); + if (!TL) + report_fatal_error("TargetLowering instance is required"); + + auto *DL = &F.getParent()->getDataLayout(); + auto &TLI = getAnalysis().getTLI(); + auto &ACT = getAnalysis().getAssumptionCache(F); + + // Compute DT and LI only for functions that have the attribute. + // This is only useful because the legacy pass manager doesn't let us + // compute analyzes lazily. + // In the backend pipeline, nothing preserves DT before SafeStack, so we + // would otherwise always compute it wastefully, even if there is no + // function with the safestack attribute. + DominatorTree DT(F); + LoopInfo LI(DT); + + ScalarEvolution SE(F, TLI, ACT, DT, LI); + + return SafeStack(F, *TL, *DL, SE).run(); + } +}; + } // anonymous namespace -char SafeStack::ID = 0; -INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack", - "Safe Stack instrumentation pass", false, false) -INITIALIZE_TM_PASS_END(SafeStack, "safe-stack", - "Safe Stack instrumentation pass", false, false) +char SafeStackLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE, + "Safe Stack instrumentation pass", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE, + "Safe Stack instrumentation pass", false, false) -FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) { - return new SafeStack(TM); -} +FunctionPass *llvm::createSafeStackPass() { return new SafeStackLegacyPass(); } diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp index 09289f947dc96a81166175a9d6e11bd2d83d531e..21f2fa497233a5ec436cc0a630e70bdae4a3f9ed 100644 --- a/lib/CodeGen/SafeStackColoring.cpp +++ b/lib/CodeGen/SafeStackColoring.cpp @@ -20,9 +20,10 @@ using namespace llvm::safestack; #define DEBUG_TYPE "safestackcoloring" +// Disabled by default due to PR32143. static cl::opt ClColoring("safe-stack-coloring", cl::desc("enable safe stack coloring"), - cl::Hidden, cl::init(true)); + cl::Hidden, cl::init(false)); const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) { const auto IT = AllocaNumbering.find(AI); diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp new file mode 100644 index 0000000000000000000000000000000000000000..07b43a82ca994c03abcfb4ae810ab36af1a6593e --- /dev/null +++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -0,0 +1,656 @@ +//=== ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ===// +//=== instrinsics ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces masked memory intrinsics - when unsupported by the target +// - with a chain of basic blocks, that deal with the elements one-by-one if the +// appropriate mask bit is set. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "scalarize-masked-mem-intrin" + +namespace { + +class ScalarizeMaskedMemIntrin : public FunctionPass { + const TargetTransformInfo *TTI; + +public: + static char ID; // Pass identification, replacement for typeid + explicit ScalarizeMaskedMemIntrin() : FunctionPass(ID), TTI(nullptr) { + initializeScalarizeMaskedMemIntrinPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Scalarize Masked Memory Intrinsics"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + +private: + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); +}; +} // namespace + +char ScalarizeMaskedMemIntrin::ID = 0; +INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE, + "Scalarize unsupported masked memory intrinsics", false, false) + +FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { + return new ScalarizeMaskedMemIntrin(); +} + +// Translate a masked load intrinsic like +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i1> %mask, <16 x i32> %passthru) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.load, label %else +// +// cond.load: ; preds = %0 +// %4 = getelementptr i32* %1, i32 0 +// %5 = load i32* %4 +// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 +// br label %else +// +// else: ; preds = %0, %cond.load +// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] +// %7 = extractelement <16 x i1> %mask, i32 1 +// %8 = icmp eq i1 %7, true +// br i1 %8, label %cond.load1, label %else2 +// +// cond.load1: ; preds = %else +// %9 = getelementptr i32* %1, i32 1 +// %10 = load i32* %9 +// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 +// br label %else2 +// +// else2: ; preds = %else, %cond.load1 +// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] +// %12 = extractelement <16 x i1> %mask, i32 2 +// %13 = icmp eq i1 %12, true +// br i1 %13, label %cond.load4, label %else5 +// +static void scalarizeMaskedLoad(CallInst *CI) { + Value *Ptr = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + unsigned AlignVal = cast(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast(CI->getType()); + assert(VecType && "Unexpected return type of masked load intrinsic"); + + Type *EltTy = CI->getType()->getVectorElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = + isa(Mask) && cast(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits() / 8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + + if (isa(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = + Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_load = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = + Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked store intrinsic, like +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// <16 x i1> %mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.store, label %else +// +// cond.store: ; preds = %0 +// %4 = extractelement <16 x i32> %val, i32 0 +// %5 = getelementptr i32* %1, i32 0 +// store i32 %4, i32* %5 +// br label %else +// +// else: ; preds = %0, %cond.store +// %6 = extractelement <16 x i1> %mask, i32 1 +// %7 = icmp eq i1 %6, true +// br i1 %7, label %cond.store1, label %else2 +// +// cond.store1: ; preds = %else +// %8 = extractelement <16 x i32> %val, i32 1 +// %9 = getelementptr i32* %1, i32 1 +// store i32 %8, i32* %9 +// br label %else2 +// . . . +static void scalarizeMaskedStore(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + unsigned AlignVal = cast(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast(Src->getType()); + assert(VecType && "Unexpected data type in masked store intrinsic"); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = + isa(Mask) && cast(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Builder.CreateAlignedStore(Src, Ptr, AlignVal); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits() / 8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + if (isa(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + } + CI->eraseFromParent(); + return; + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_store = icmp eq i1 %mask_1, true + // br i1 %to_store, label %cond.store, label %else + // + Value *Predicate = + Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +// Translate a masked gather intrinsic like +// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, +// <16 x i1> %Mask, <16 x i32> %Src) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> %Mask, i32 0 +// % ToLoad0 = icmp eq i1 % Mask0, true +// br i1 % ToLoad0, label %cond.load, label %else +// +// cond.load: +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// % Load0 = load i32, i32* % Ptr0, align 4 +// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 +// br label %else +// +// else: +// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] +// % Mask1 = extractelement <16 x i1> %Mask, i32 1 +// % ToLoad1 = icmp eq i1 % Mask1, true +// br i1 % ToLoad1, label %cond.load1, label %else2 +// +// cond.load1: +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// % Load1 = load i32, i32* % Ptr1, align 4 +// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 +// br label %else2 +// . . . +// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// ret <16 x i32> %Result +static void scalarizeMaskedGather(CallInst *CI) { + Value *Ptrs = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast(CI->getType()); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + unsigned AlignVal = cast(Alignment)->getZExtValue(); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement( + VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %ToLoad1 = icmp eq i1 %Mask1, true + // br i1 %ToLoad1, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToLoad" + Twine(Idx)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = + Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), + "Res" + Twine(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked scatter intrinsic, like +// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, +// <16 x i1> %Mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set. +// +// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> % Mask, i32 0 +// % ToStore0 = icmp eq i1 % Mask0, true +// br i1 %ToStore0, label %cond.store, label %else +// +// cond.store: +// % Elt0 = extractelement <16 x i32> %Src, i32 0 +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* % Ptr0, align 4 +// br label %else +// +// else: +// % Mask1 = extractelement <16 x i1> % Mask, i32 1 +// % ToStore1 = icmp eq i1 % Mask1, true +// br i1 % ToStore1, label %cond.store1, label %else2 +// +// cond.store1: +// % Elt1 = extractelement <16 x i32> %Src, i32 1 +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 % Elt1, i32* % Ptr1, align 4 +// br label %else2 +// . . . +static void scalarizeMaskedScatter(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptrs = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + assert(isa(Src->getType()) && + "Unexpected data type in masked scatter intrinsic"); + assert(isa(Ptrs->getType()) && + isa(Ptrs->getType()->getVectorElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned AlignVal = cast(Alignment)->getZExtValue(); + unsigned VectorWidth = Src->getType()->getVectorNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx + // % ToStore = icmp eq i1 % Mask1, true + // br i1 % ToStore, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToStore" + Twine(Idx)); + + // Create "cond" block + // + // % Elt1 = extractelement <16 x i32> %Src, i32 1 + // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 % Elt1, i32* % Ptr1 + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + bool EverMadeChange = false; + + TTI = &getAnalysis().getTTI(F); + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end();) { + BasicBlock *BB = &*I++; + bool ModifiedDTOnIteration = false; + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); + + // Restart BB iteration if the dominator tree of the Function was changed + if (ModifiedDTOnIteration) + break; + } + + EverMadeChange |= MadeChange; + } + + return EverMadeChange; +} + +bool ScalarizeMaskedMemIntrin::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { + bool MadeChange = false; + + BasicBlock::iterator CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) { + if (CallInst *CI = dyn_cast(&*CurInstIterator++)) + MadeChange |= optimizeCallInst(CI, ModifiedDT); + if (ModifiedDT) + return true; + } + + return MadeChange; +} + +bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, + bool &ModifiedDT) { + + IntrinsicInst *II = dyn_cast(CI); + if (II) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::masked_load: { + // Scalarize unsupported vector masked load + if (!TTI->isLegalMaskedLoad(CI->getType())) { + scalarizeMaskedLoad(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_store: { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { + scalarizeMaskedStore(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_gather: { + if (!TTI->isLegalMaskedGather(CI->getType())) { + scalarizeMaskedGather(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_scatter: { + if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { + scalarizeMaskedScatter(CI); + ModifiedDT = true; + return true; + } + return false; + } + } + } + + return false; +} diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index dc72ac07325882f623ea5cb0dc17655d31a4c674..3cd270cec3a6d12d8a4b4bf83b7a2c4b36c6918d 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 18823b74c47fe13bb54a466df383fecd148c0cd1..7dd66d799be4a4ce5e754b7c6063df9bb823094a 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,30 +14,52 @@ #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -90,11 +112,9 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), - RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr), + RemoveKillFlags(RemoveKillFlags), UnknownValue(UndefValue::get( - Type::getVoidTy(mf.getFunction()->getContext()))), - FirstDbgValue(nullptr) { + Type::getVoidTy(mf.getFunction()->getContext()))) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -126,7 +146,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { return V; } assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); - } while (1); + } while (true); } /// This is a wrapper around GetUnderlyingObjects and adds support for basic @@ -563,7 +583,7 @@ void ScheduleDAGInstrs::initSUnits() { // which is contained within a basic block. SUnits.reserve(NumRegionInstrs); - for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) { + for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { if (MI.isDebugValue()) continue; @@ -606,13 +626,13 @@ void ScheduleDAGInstrs::initSUnits() { class ScheduleDAGInstrs::Value2SUsMap : public MapVector { /// Current total number of SUs in map. - unsigned NumNodes; + unsigned NumNodes = 0; /// 1 for loads, 0 for stores. (see comment in SUList) unsigned TrueMemOrderLatency; public: - Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {} + Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {} /// To keep NumNodes up to date, insert() is used instead of /// this operator w/ push_back(). @@ -630,7 +650,7 @@ public: void inline clearList(ValueType V) { iterator Itr = find(V); if (Itr != end()) { - assert (NumNodes >= Itr->second.size()); + assert(NumNodes >= Itr->second.size()); NumNodes -= Itr->second.size(); Itr->second.clear(); @@ -646,7 +666,7 @@ public: unsigned inline size() const { return NumNodes; } /// Counts the number of SUs in this map after a reduction. - void reComputeSize(void) { + void reComputeSize() { NumNodes = 0; for (auto &I : *this) NumNodes += I.second.size(); @@ -676,7 +696,7 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, } void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); + assert(BarrierChain != nullptr); for (auto &I : map) { SUList &sus = I.second; @@ -687,7 +707,7 @@ void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { } void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { - assert (BarrierChain != nullptr); + assert(BarrierChain != nullptr); // Go through all lists of SUs. for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) { @@ -1028,7 +1048,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, // The N last elements in NodeNums will be removed, and the SU with // the lowest NodeNum of them will become the new BarrierChain to // let the not yet seen SUs have a dependency to the removed SUs. - assert (N <= NodeNums.size()); + assert(N <= NodeNums.size()); SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; if (BarrierChain) { // The aliasing and non-aliasing maps reduce independently of each @@ -1057,179 +1077,71 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, loads.dump()); } -void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { - // Start with no live registers. - LiveRegs.reset(); - - // Examine the live-in regs of all successors. - for (const MachineBasicBlock *Succ : BB->successors()) { - for (const auto &LI : Succ->liveins()) { - // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } -} - -/// \brief If we change a kill flag on the bundle instruction implicit register -/// operands, then we also need to propagate that to any instructions inside -/// the bundle which had the same kill state. -static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, - bool NewKillState, - const TargetRegisterInfo *TRI) { - if (MI->getOpcode() != TargetOpcode::BUNDLE) - return; - - // Walk backwards from the last instruction in the bundle to the first. - // Once we set a kill flag on an instruction, we bail out, as otherwise we - // might set it on too many operands. We will clear as many flags as we - // can though. - MachineBasicBlock::instr_iterator Begin = MI->getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); - while (Begin != End) { - if (NewKillState) { - if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) - return; - } else - (--End)->clearRegisterKills(Reg, TRI); - } -} - -void ScheduleDAGInstrs::toggleKillFlag(MachineInstr &MI, MachineOperand &MO) { - if (MO.isDebug()) - return; - - // Setting kill flag... - if (!MO.isKill()) { - MO.setIsKill(true); - toggleBundleKillFlag(&MI, MO.getReg(), true, TRI); - return; - } - - // If MO itself is live, clear the kill flag... - if (LiveRegs.test(MO.getReg())) { - MO.setIsKill(false); - toggleBundleKillFlag(&MI, MO.getReg(), false, TRI); - return; - } - - // If any subreg of MO is live, then create an imp-def for that - // subreg and keep MO marked as killed. - MO.setIsKill(false); - toggleBundleKillFlag(&MI, MO.getReg(), false, TRI); - bool AllDead = true; - const unsigned SuperReg = MO.getReg(); - MachineInstrBuilder MIB(MF, &MI); - for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - MIB.addReg(*SubRegs, RegState::ImplicitDefine); - AllDead = false; - } - } +static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, + MachineInstr &MI, bool addToLiveRegs) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; - if(AllDead) { - MO.setIsKill(true); - toggleBundleKillFlag(&MI, MO.getReg(), true, TRI); + // Things that are available after the instruction are killed by it. + bool IsKill = LiveRegs.available(MRI, Reg); + MO.setIsKill(IsKill); + if (IsKill && addToLiveRegs) + LiveRegs.addReg(Reg); } } -void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { - // FIXME: Reuse the LivePhysRegs utility for this. - DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n'); - LiveRegs.resize(TRI->getNumRegs()); - BitVector killedRegs(TRI->getNumRegs()); - - startBlockForKills(MBB); + LiveRegs.init(*TRI); + LiveRegs.addLiveOuts(MBB); // Examine block from end to start... - unsigned Count = MBB->size(); - for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); - I != E; --Count) { - MachineInstr &MI = *--I; + for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { if (MI.isDebugValue()) continue; // Update liveness. Registers that are defed but not used in this // instruction are now dead. Mark register and all subregs as they // are completely defined. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (MO.isRegMask()) - LiveRegs.clearBitsNotInMask(MO.getRegMask()); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI.isRegTiedToUseOperand(i)) continue; - - // Repeat for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.reset(*SubRegs); - } - - // Examine all used registers and set/clear kill flag. When a - // register is used multiple times we only set the kill flag on - // the first use. Don't set kill flags on undef operands. - killedRegs.reset(); - - // toggleKillFlag can append new operands (implicit defs), so using - // a range-based loop is not safe. The new operands will be appended - // at the end of the operand list and they don't need to be visited, - // so iterating until the currently last operand is ok. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - bool kill = false; - if (!killedRegs.test(Reg)) { - kill = true; - // A register is not killed if any subregs are live... - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - kill = false; - break; - } - } - - // If subreg is not live, then register is killed if it became - // live in this instruction - if (kill) - kill = !LiveRegs.test(Reg); - } - - if (MO.isKill() != kill) { - DEBUG(dbgs() << "Fixing " << MO << " in "); - toggleKillFlag(MI, MO); - DEBUG(MI.dump()); - DEBUG({ - if (MI.getOpcode() == TargetOpcode::BUNDLE) { - MachineBasicBlock::instr_iterator Begin = MI.getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); - while (++Begin != End) - DEBUG(Begin->dump()); - } - }); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + const MachineOperand &MO = *O; + if (MO.isReg()) { + if (!MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LiveRegs.removeReg(Reg); + } else if (MO.isRegMask()) { + LiveRegs.removeRegsInMask(MO); } - - killedRegs.set(Reg); } - // Mark any used register (that is not using undef) and subregs as - // now live... - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); + // If there is a bundle header fix it up first. + if (!MI.isBundled()) { + toggleKills(MRI, LiveRegs, MI, true); + } else { + MachineBasicBlock::instr_iterator First = MI.getIterator(); + if (MI.isBundle()) { + toggleKills(MRI, LiveRegs, MI, false); + ++First; + } + // Some targets make the (questionable) assumtion that the instructions + // inside the bundle are ordered and consequently only the last use of + // a register inside the bundle can kill it. + MachineBasicBlock::instr_iterator I = std::next(First); + while (I->isBundledWithSucc()) + ++I; + do { + if (!I->isDebugValue()) + toggleKills(MRI, LiveRegs, *I, true); + --I; + } while(I != First); } } } @@ -1264,6 +1176,7 @@ std::string ScheduleDAGInstrs::getDAGName() const { //===----------------------------------------------------------------------===// namespace llvm { + /// Internal state used to compute SchedDFSResult. class SchedDFSImpl { SchedDFSResult &R; @@ -1271,16 +1184,16 @@ class SchedDFSImpl { /// Join DAG nodes into equivalence classes by their subtree. IntEqClasses SubtreeClasses; /// List PredSU, SuccSU pairs that represent data edges between subtrees. - std::vector > ConnectionPairs; + std::vector> ConnectionPairs; struct RootData { unsigned NodeID; unsigned ParentNodeID; ///< Parent node (member of the parent subtree). - unsigned SubInstrCount; ///< Instr count in this tree only, not children. + unsigned SubInstrCount = 0; ///< Instr count in this tree only, not + /// children. RootData(unsigned id): NodeID(id), - ParentNodeID(SchedDFSResult::InvalidSubtreeID), - SubInstrCount(0) {} + ParentNodeID(SchedDFSResult::InvalidSubtreeID) {} unsigned getSparseSetIndex() const { return NodeID; } }; @@ -1448,12 +1361,15 @@ protected: } while (FromTree != SchedDFSResult::InvalidSubtreeID); } }; + } // end namespace llvm namespace { + /// Manage the stack used by a reverse depth-first search over the DAG. class SchedDAGReverseDFS { - std::vector > DFSStack; + std::vector> DFSStack; + public: bool isComplete() const { return DFSStack.empty(); } @@ -1475,7 +1391,8 @@ public: return getCurr()->Preds.end(); } }; -} // anonymous + +} // end anonymous namespace static bool hasDataSucc(const SUnit *SU) { for (const SDep &SuccDep : SU->Succs) { @@ -1500,7 +1417,7 @@ void SchedDFSResult::compute(ArrayRef SUnits) { SchedDAGReverseDFS DFS; Impl.visitPreorder(&SU); DFS.follow(&SU); - for (;;) { + while (true) { // Traverse the leftmost path as far as possible. while (DFS.getPred() != DFS.getPredEnd()) { const SDep &PredDep = *DFS.getPred(); @@ -1565,4 +1482,5 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { } } // end namespace llvm + #endif diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index ca2881cb91e02e571ed6c30c2572fae88879a1f9..bb6a45996f6323b41d67822f812d34ba038ed3c4 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 306c1974ab5a8878f4541c6adf77a2f3e1501a28..a7b411cecac15565d180ce128bf136999c90513d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" @@ -113,7 +114,7 @@ namespace { SmallPtrSet CombinedNodes; // AA - Used for DAG load/store alias analysis. - AliasAnalysis &AA; + AliasAnalysis *AA; /// When an instruction is simplified, add all users of the instruction to /// the work lists because they might get more simplified now. @@ -236,10 +237,14 @@ namespace { SDValue visitSUB(SDNode *N); SDValue visitADDC(SDNode *N); SDValue visitUADDO(SDNode *N); + SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitSUBC(SDNode *N); SDValue visitUSUBO(SDNode *N); SDValue visitADDE(SDNode *N); + SDValue visitADDCARRY(SDNode *N); + SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); SDValue visitSUBE(SDNode *N); + SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); @@ -275,6 +280,7 @@ namespace { SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); SDValue visitSETCCE(SDNode *N); + SDValue visitSETCCCARRY(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); @@ -369,14 +375,14 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); - SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); + SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal); + SDNodeFlags Flags, bool Reciprocal); SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal); + SDNodeFlags Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -396,6 +402,7 @@ namespace { SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); + SDValue matchVSelectOpSizesWithSetCC(SDNode *N); SDValue GetDemandedBits(SDValue V, const APInt &Mask); @@ -490,9 +497,9 @@ namespace { SDValue distributeTruncateThroughAnd(SDNode *N); public: - DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) { ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); MaximumLegalStoreInBits = 0; @@ -644,7 +651,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->NoSignedZerosFPMath && - !Op.getNode()->getFlags()->hasNoSignedZeros()) + !Op.getNode()->getFlags().hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -682,7 +689,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const SDNodeFlags *Flags = Op.getNode()->getFlags(); + const SDNodeFlags Flags = Op.getNode()->getFlags(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown code"); @@ -965,8 +972,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + KnownBits Known; + if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) return false; // Revisit the node. @@ -1021,13 +1028,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { switch (Opc) { default: break; case ISD::AssertSext: - return DAG.getNode(ISD::AssertSext, DL, PVT, - SExtPromoteOperand(Op.getOperand(0), PVT), - Op.getOperand(1)); + if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); + break; case ISD::AssertZext: - return DAG.getNode(ISD::AssertZext, DL, PVT, - ZExtPromoteOperand(Op.getOperand(0), PVT), - Op.getOperand(1)); + if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) + return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); + break; case ISD::Constant: { unsigned ExtOpc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -1412,7 +1419,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SUBC: return visitSUBC(N); case ISD::USUBO: return visitUSUBO(N); case ISD::ADDE: return visitADDE(N); + case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); + case ISD::SUBCARRY: return visitSUBCARRY(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1449,6 +1458,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); case ISD::SETCCE: return visitSETCCE(N); + case ISD::SETCCCARRY: return visitSETCCCARRY(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); @@ -1553,7 +1563,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. - if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1721,10 +1731,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { NumLeftToConsider--; } - SDValue Result; - // If we've changed things around then replace token factor. if (Changed) { + SDValue Result; if (Ops.empty()) { // The entry token is the only possible outcome. Result = DAG.getEntryNode(); @@ -1741,13 +1750,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } } - - // Add users to worklist, since we may introduce a lot of new - // chained token factors while removing memory deps. - return CombineTo(N, Result, true /*add to worklist*/); + return Result; } - - return Result; + return SDValue(); } /// MERGE_VALUES can always be eliminated. @@ -1866,14 +1871,31 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (isNullConstant(N1)) return N0; - // fold ((c1-A)+c2) -> (c1+c2)-A if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { - if (N0.getOpcode() == ISD::SUB) - if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), - N0.getOperand(1)); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N0.getOpcode() == ISD::SUB && + isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { + // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), + N0.getOperand(1)); + } + + // add (sext i1 X), 1 -> zext (not i1 X) + // We don't transform this pattern: + // add (zext i1 X), -1 -> sext (not i1 X) + // because most (?) targets generate better code for the zext form. + if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && + isOneConstantOrOneSplatConstant(N1)) { + SDValue X = N0.getOperand(0); + if ((!LegalOperations || + (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && + X.getScalarValueSizeInBits() == 1) { + SDValue Not = DAG.getNOT(DL, X, X.getValueType()); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -1938,7 +1960,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (a+b) -> (a|b) iff a and b share no bits. if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1)) + DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); if (SDValue Combined = visitADDLike(N0, N1, N)) @@ -1950,6 +1972,44 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return SDValue(); } +static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { + bool Masked = false; + + // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. + while (true) { + if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { + V = V.getOperand(0); + continue; + } + + if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { + Masked = true; + V = V.getOperand(0); + continue; + } + + break; + } + + // If this is not a carry, return. + if (V.getResNo() != 1) + return SDValue(); + + if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && + V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) + return SDValue(); + + // If the result is masked, then no matter what kind of bool it is we can + // return. If it isn't, then we need to make sure the bool type is either 0 or + // 1 and not other values. + if (Masked || + TLI.getBooleanContents(V.getValueType()) == + TargetLoweringBase::ZeroOrOneBooleanContent) + return V; + + return SDValue(); +} + SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); @@ -1992,6 +2052,18 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) } } + // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) + return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), + N0, N1.getOperand(0), N1.getOperand(2)); + + // (add X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), N0, + DAG.getConstant(0, DL, VT), Carry); + return SDValue(); } @@ -2055,6 +2127,34 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); + if (SDValue Combined = visitUADDOLike(N0, N1, N)) + return Combined; + + if (SDValue Combined = visitUADDOLike(N1, N0, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { + auto VT = N0.getValueType(); + + // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) + // If Y + 1 cannot overflow. + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { + SDValue Y = N1.getOperand(0); + SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); + if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, + N1.getOperand(2)); + } + + // (uaddo X, Carry) -> (addcarry X, 0, Carry) + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) + if (SDValue Carry = getAsCarry(TLI, N1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), VT), Carry); + return SDValue(); } @@ -2077,6 +2177,90 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitADDCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + SDLoc DL(N); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast(N0); + ConstantSDNode *N1C = dyn_cast(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); + + // fold (addcarry x, y, false) -> (uaddo x, y) + if (isNullConstant(CarryIn)) + return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); + + // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. + if (isNullConstant(N0) && isNullConstant(N1)) { + EVT VT = N0.getValueType(); + EVT CarryVT = CarryIn.getValueType(); + SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); + AddToWorklist(CarryExt.getNode()); + return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, + DAG.getConstant(1, DL, VT)), + DAG.getConstant(0, DL, CarryVT)); + } + + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) + return Combined; + + if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) + return Combined; + + return SDValue(); +} + +SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, + SDNode *N) { + // Iff the flag result is dead: + // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) + if ((N0.getOpcode() == ISD::ADD || + (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && + isNullConstant(N1) && !N->hasAnyUseOfValue(1)) + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), + N0.getOperand(0), N0.getOperand(1), CarryIn); + + /** + * When one of the addcarry argument is itself a carry, we may be facing + * a diamond carry propagation. In which case we try to transform the DAG + * to ensure linear carry propagation if that is possible. + * + * We are trying to get: + * (addcarry X, 0, (addcarry A, B, Z):Carry) + */ + if (auto Y = getAsCarry(TLI, N1)) { + /** + * (uaddo A, B) + * / \ + * Carry Sum + * | \ + * | (addcarry *, 0, Z) + * | / + * \ Carry + * | / + * (addcarry X, *, *) + */ + if (Y.getOpcode() == ISD::UADDO && + CarryIn.getResNo() == 1 && + CarryIn.getOpcode() == ISD::ADDCARRY && + isNullConstant(CarryIn.getOperand(1)) && + CarryIn.getOperand(0) == Y.getValue(0)) { + auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), + Y.getOperand(0), Y.getOperand(1), + CarryIn.getOperand(2)); + AddToWorklist(NewY.getNode()); + return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, + DAG.getConstant(0, SDLoc(N), N0.getValueType()), + NewY.getValue(1)); + } + } + + return SDValue(); +} + // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, @@ -2143,13 +2327,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // 0 - X --> 0 if the sub is NUW. - if (N->getFlags()->hasNoUnsignedWrap()) + if (N->getFlags().hasNoUnsignedWrap()) return N0; - if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) { + if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { // N1 is either 0 or the minimum signed value. If the sub is NSW, then // N1 must be 0 because negating the minimum signed value is undefined. - if (N->getFlags()->hasNoSignedWrap()) + if (N->getFlags().hasNoSignedWrap()) return N0; // 0 - X --> X if X is 0 or the minimum signed value. @@ -2309,6 +2493,18 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (subcarry x, y, false) -> (usubo x, y) + if (isNullConstant(CarryIn)) + return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2353,14 +2549,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1IsConst && ConstValue1 == 0) + if (N1IsConst && ConstValue1.isNullValue()) return N1; // We require a splat of the entire scalar bit width for non-contiguous // bit patterns. bool IsFullSplat = ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -2589,9 +2785,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - !cast(N)->Flags.hasExact() && - (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; @@ -3490,7 +3685,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1))) - if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue())) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -3705,7 +3900,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) // fold (and (sra)) -> (and (srl)) when possible. - if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (zext_inreg (extload x)) -> (zextload x) @@ -3766,7 +3961,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, EVT VT = N->getValueType(0); if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) @@ -3878,27 +4073,36 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) return false; - ConstantSDNode *N1C = dyn_cast(N.getOperand(1)); + SDValue N0 = N.getOperand(0); + unsigned Opc0 = N0.getOpcode(); + if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) + return false; + + ConstantSDNode *N1C = nullptr; + // SHL or SRL: look upstream for AND mask operand + if (Opc == ISD::AND) + N1C = dyn_cast(N.getOperand(1)); + else if (Opc0 == ISD::AND) + N1C = dyn_cast(N0.getOperand(1)); if (!N1C) return false; - unsigned Num; + unsigned MaskByteOffset; switch (N1C->getZExtValue()) { default: return false; - case 0xFF: Num = 0; break; - case 0xFF00: Num = 1; break; - case 0xFF0000: Num = 2; break; - case 0xFF000000: Num = 3; break; + case 0xFF: MaskByteOffset = 0; break; + case 0xFF00: MaskByteOffset = 1; break; + case 0xFF0000: MaskByteOffset = 2; break; + case 0xFF000000: MaskByteOffset = 3; break; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). - SDValue N0 = N.getOperand(0); if (Opc == ISD::AND) { - if (Num == 0 || Num == 2) { + if (MaskByteOffset == 0 || MaskByteOffset == 2) { // (x >> 8) & 0xff // (x >> 8) & 0xff0000 - if (N0.getOpcode() != ISD::SRL) + if (Opc0 != ISD::SRL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3906,7 +4110,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { } else { // (x << 8) & 0xff00 // (x << 8) & 0xff000000 - if (N0.getOpcode() != ISD::SHL) + if (Opc0 != ISD::SHL) return false; ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3915,7 +4119,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { } else if (Opc == ISD::SHL) { // (x & 0xff) << 8 // (x & 0xff0000) << 8 - if (Num != 0 && Num != 2) + if (MaskByteOffset != 0 && MaskByteOffset != 2) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) @@ -3923,17 +4127,17 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef Parts) { } else { // Opc == ISD::SRL // (x & 0xff00) >> 8 // (x & 0xff000000) >> 8 - if (Num != 1 && Num != 3) + if (MaskByteOffset != 1 && MaskByteOffset != 3) return false; ConstantSDNode *C = dyn_cast(N.getOperand(1)); if (!C || C->getZExtValue() != 8) return false; } - if (Parts[Num]) + if (Parts[MaskByteOffset]) return false; - Parts[Num] = N0.getOperand(0).getNode(); + Parts[MaskByteOffset] = N0.getOperand(0).getNode(); return true; } @@ -3950,7 +4154,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { EVT VT = N->getValueType(0); if (VT != MVT::i32) return SDValue(); - if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); // Look for either @@ -3965,18 +4169,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) + if (!isBSwapHWordElement(N00, Parts)) return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + if (!isBSwapHWordElement(N01, Parts)) return SDValue(); - SDValue N010 = N01.getOperand(0); - if (!isBSwapHWordElement(N010, Parts)) + SDValue N10 = N1.getOperand(0); + if (!isBSwapHWordElement(N10, Parts)) return SDValue(); - SDValue N011 = N01.getOperand(1); - if (!isBSwapHWordElement(N011, Parts)) + SDValue N11 = N1.getOperand(1); + if (!isBSwapHWordElement(N11, Parts)) return SDValue(); } else { // (or (or (or (and), (and)), (and)), (and)) @@ -4198,20 +4400,22 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // reassociate or if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) != 0. - if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - isa(N0.getOperand(1))) { - ConstantSDNode *C1 = cast(N0.getOperand(1)); - if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, - N1C, C1)) - return DAG.getNode( - ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); - return SDValue(); + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *C1 = dyn_cast(N0.getOperand(1))) { + if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) { + if (SDValue COR = + DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); + } } } + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) if (N0.getOpcode() == N1.getOpcode()) if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) @@ -4225,8 +4429,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Load; // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); @@ -5058,8 +5261,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return Tmp; // Simplify the expression using non-local knowledge. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); @@ -5215,7 +5417,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); // fold (shl 0, x) -> 0 - if (isNullConstant(N0)) + if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) @@ -5320,7 +5522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && - cast(N0)->Flags.hasExact()) { + N0->getFlags().hasExact()) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { uint64_t C1 = N0C1->getZExtValue(); uint64_t C2 = N1C->getZExtValue(); @@ -5345,12 +5547,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); SDValue Shift; if (c2 > c1) { - Mask = Mask.shl(c2 - c1); + Mask <<= c2 - c1; SDLoc DL(N); Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), DAG.getConstant(c2 - c1, DL, N1.getValueType())); } else { - Mask = Mask.lshr(c1 - c2); + Mask.lshrInPlace(c1 - c2); SDLoc DL(N); Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), DAG.getConstant(c1 - c2, DL, N1.getValueType())); @@ -5407,6 +5609,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { unsigned OpSizeInBits = VT.getScalarSizeInBits(); // Arithmetic shifting an all-sign-bit value is a no-op. + // fold (sra 0, x) -> 0 + // fold (sra -1, x) -> -1 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) return N0; @@ -5421,12 +5625,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); - // fold (sra 0, x) -> 0 - if (isNullConstant(N0)) - return N0; - // fold (sra -1, x) -> -1 - if (isAllOnesConstant(N0)) - return N0; // fold (sra x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) return DAG.getUNDEF(VT); @@ -5576,7 +5774,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); // fold (srl 0, x) -> 0 - if (isNullConstant(N0)) + if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) @@ -5613,24 +5811,24 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) if (N1C && N0.getOpcode() == ISD::TRUNCATE && - N0.getOperand(0).getOpcode() == ISD::SRL && - isa(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = - cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - EVT InnerShiftVT = N0.getOperand(0).getValueType(); - EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); - uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); - // This is only valid if the OpSizeInBits + c1 = size of inner shift. - if (c1 + OpSizeInBits == InnerShiftSize) { - SDLoc DL(N0); - if (c1 + c2 >= InnerShiftSize) - return DAG.getConstant(0, DL, VT); - return DAG.getNode(ISD::TRUNCATE, DL, VT, - DAG.getNode(ISD::SRL, DL, InnerShiftVT, - N0.getOperand(0)->getOperand(0), - DAG.getConstant(c1 + c2, DL, - ShiftCountVT))); + N0.getOperand(0).getOpcode() == ISD::SRL) { + if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { + uint64_t c1 = N001C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, + N0.getOperand(0).getOperand(0), + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); + } } } @@ -5660,7 +5858,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { DAG.getConstant(ShiftAmt, DL0, getShiftAmountTy(SmallVT))); AddToWorklist(SmallShift.getNode()); - APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); + APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), @@ -5678,20 +5876,20 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(N0.getOperand(0), Known); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. - if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); + if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero; + APInt UnknownBits = ~Known.Zero; if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. - if ((UnknownBits & (UnknownBits - 1)) == 0) { + if (UnknownBits.isPowerOf2()) { // Okay, we know that only that the single bit specified by UnknownBits // could be set on input to the CTLZ node. If this bit is set, the SRL // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair @@ -6550,6 +6748,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (isAbs) { EVT VT = LHS.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) + return DAG.getNode(ISD::ABS, DL, VT, LHS); + SDValue Shift = DAG.getNode( ISD::SRA, DL, VT, LHS, DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); @@ -6644,6 +6845,19 @@ SDValue DAGCombiner::visitSETCCE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (isNullConstant(Carry)) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext @@ -6887,6 +7101,51 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } +/// If we're narrowing or widening the result of a vector select and the final +/// size is the same size as a setcc (compare) feeding the select, then try to +/// apply the cast operation to the select's operands because matching vector +/// sizes for a select condition and other operands should be more efficient. +SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { + unsigned CastOpcode = Cast->getOpcode(); + assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || + CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || + CastOpcode == ISD::FP_ROUND) && + "Unexpected opcode for vector select narrowing/widening"); + + // We only do this transform before legal ops because the pattern may be + // obfuscated by target-specific operations after legalization. Do not create + // an illegal select op, however, because that may be difficult to lower. + EVT VT = Cast->getValueType(0); + if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + SDValue VSel = Cast->getOperand(0); + if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || + VSel.getOperand(0).getOpcode() != ISD::SETCC) + return SDValue(); + + // Does the setcc have the same vector size as the casted select? + SDValue SetCC = VSel.getOperand(0); + EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); + if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) + SDValue A = VSel.getOperand(1); + SDValue B = VSel.getOperand(2); + SDValue CastA, CastB; + SDLoc DL(Cast); + if (CastOpcode == ISD::FP_ROUND) { + // FP_ROUND (fptrunc) has an extra flag operand to pass along. + CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); + CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); + } else { + CastA = DAG.getNode(CastOpcode, DL, VT, A); + CastB = DAG.getNode(CastOpcode, DL, VT, B); + } + return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -6969,12 +7228,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); - CombineTo(N, ExtLoad); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); - CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked! } } @@ -7030,10 +7288,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); - CombineTo(N, And); - CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + return CombineTo(N, And); // Return N so it doesn't get rechecked! } } } @@ -7110,19 +7367,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } // isTruncateOf - If N is a truncate of some other value, return true, record -// the value being truncated in Op and which of Op's bits are zero in KnownZero. -// This function computes KnownZero to avoid a duplicated call to +// the value being truncated in Op and which of Op's bits are zero/one in Known. +// This function computes KnownBits to avoid a duplicated call to // computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, - APInt &KnownZero) { - APInt KnownOne; + KnownBits &Known) { if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); return true; } @@ -7141,9 +7400,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, else return false; - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); - if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + if (!(Known.Zero | 1).isAllOnesValue()) return false; return true; @@ -7168,8 +7427,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // This is valid when the truncated bits of x are already zero. // FIXME: We should extend this to work for vectors too. SDValue Op; - APInt KnownZero; - if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + KnownBits Known; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) { APInt TruncatedBits = (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? APInt(Op.getValueSizeInBits(), 0) : @@ -7177,14 +7436,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueSizeInBits(), std::min(Op.getValueSizeInBits(), VT.getSizeInBits())); - if (TruncatedBits == (KnownZero & TruncatedBits)) { - if (VT.bitsGT(Op.getValueType())) - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); - if (VT.bitsLT(Op.getValueType())) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - - return Op; - } + if (TruncatedBits.isSubsetOf(Known.Zero)) + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); } // fold (zext (truncate (load x))) -> (zext (smaller load x)) @@ -7231,14 +7484,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { - SDValue Op = N0.getOperand(0); - if (SrcVT.bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } else if (SrcVT.bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); - AddToWorklist(Op.getNode()); - } + SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); + AddToWorklist(Op.getNode()); return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); } } @@ -7252,11 +7499,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getValueType()) || !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); - if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); - } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); - } + X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDLoc DL(N); @@ -7286,12 +7529,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), - ISD::ZERO_EXTEND); - CombineTo(N, ExtLoad); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return CombineTo(N, ExtLoad); // Return N so it doesn't get rechecked! } } @@ -7341,11 +7581,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); - CombineTo(N, And); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, - ISD::ZERO_EXTEND); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return CombineTo(N, And); // Return N so it doesn't get rechecked! } } } @@ -7444,6 +7682,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ShAmt); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -7478,14 +7719,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (truncate x)) - if (N0.getOpcode() == ISD::TRUNCATE) { - SDValue TruncOp = N0.getOperand(0); - if (TruncOp.getValueType() == VT) - return TruncOp; // x iff x size == zext size. - if (TruncOp.getValueType().bitsGT(VT)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); - } + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); // Fold (aext (and (trunc x), cst)) -> (and x, cst) // if the trunc is not free. @@ -7496,11 +7731,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getValueType())) { SDLoc DL(N); SDValue X = N0.getOperand(0).getOperand(0); - if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); - } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, DL, VT, X); - } + X = DAG.getAnyExtOrTrunc(X, DL, VT); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, @@ -7800,7 +8031,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, DL, PtrType), - &Flags); + Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -8226,17 +8457,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return SDValue(N, 0); // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) + // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) // When the adde's carry is not used. - if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() && - !N0.getNode()->hasAnyUseOfValue(1) && - (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) { + if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && + N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && + (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { SDLoc SL(N); auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); - return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue), - X, Y, N0.getOperand(2)); + auto VTs = DAG.getVTList(VT, N0->getValueType(1)); + return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -8300,11 +8535,11 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, switch (N0.getOpcode()) { case ISD::AND: FPOpcode = ISD::FABS; - SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits()); break; case ISD::XOR: FPOpcode = ISD::FNEG; - SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); + SignMask = APInt::getSignMask(SourceVT.getSizeInBits()); break; // TODO: ISD::OR --> ISD::FNABS? default: @@ -8415,7 +8650,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { assert(VT.getSizeInBits() == 128); SDValue SignBit = DAG.getConstant( - APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); SDValue FlipBit; if (N0.getOpcode() == ISD::FNEG) { FlipBit = SignBit; @@ -8435,7 +8670,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); } - APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, NewConv, DAG.getConstant(SignBit, DL, VT)); @@ -8483,7 +8718,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { - APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); @@ -8504,7 +8739,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); } - APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); @@ -8687,7 +8922,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { for (unsigned j = 0; j != NumOutputsPerInput; ++j) { APInt ThisVal = OpVal.trunc(DstBitSize); Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); - OpVal = OpVal.lshr(DstBitSize); + OpVal.lshrInPlace(DstBitSize); } // For big endian targets, swap the order of the pieces of each element. @@ -8699,7 +8934,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } static bool isContractable(SDNode *N) { - SDNodeFlags F = cast(N)->Flags; + SDNodeFlags F = N->getFlags(); return F.hasAllowContract() || F.hasUnsafeAlgebra(); } @@ -9277,6 +9512,14 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { return SDValue(); } +static bool isFMulNegTwo(SDValue &N) { + if (N.getOpcode() != ISD::FMUL) + return false; + if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1))) + return CFP->isExactlyValue(-2.0); + return false; +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -9285,7 +9528,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9315,8 +9558,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1, GetNegatedExpression(N0, DAG, LegalOperations), Flags); + // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) + // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B)) + if ((isFMulNegTwo(N0) && N0.hasOneUse()) || + (isFMulNegTwo(N1) && N1.hasOneUse())) { + bool N1IsFMul = isFMulNegTwo(N1); + SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0); + SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags); + return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); + } + // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { // fold (fadd A, 0) -> A if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) if (N1C->isZero()) @@ -9439,7 +9692,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9459,7 +9712,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { GetNegatedExpression(N1, DAG, LegalOperations), Flags); // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) { + if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { // (fsub 0, B) -> -B if (N0CFP && N0CFP->isZero()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) @@ -9510,7 +9763,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags *Flags = &cast(N)->Flags; + const SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) { @@ -9654,7 +9907,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), - &Flags), &Flags); + Flags), Flags); } // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) @@ -9664,7 +9917,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), - &Flags), + Flags), N2); } } @@ -9690,16 +9943,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(1.0, DL, VT), &Flags), - &Flags); + DAG.getConstantFP(1.0, DL, VT), Flags), + Flags); } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getNode(ISD::FADD, DL, VT, N1, - DAG.getConstantFP(-1.0, DL, VT), &Flags), - &Flags); + DAG.getConstantFP(-1.0, DL, VT), Flags), + Flags); } } @@ -9715,8 +9968,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; - const SDNodeFlags *Flags = N->getFlags(); - if (!UnsafeMath && !Flags->hasAllowReciprocal()) + const SDNodeFlags Flags = N->getFlags(); + if (!UnsafeMath && !Flags.hasAllowReciprocal()) return SDValue(); // Skip if current node is a reciprocal. @@ -9739,7 +9992,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { // This division is eligible for optimization only if global unsafe math // is enabled or if this division allows reciprocal formation. - if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) + if (UnsafeMath || U->getFlags().hasAllowReciprocal()) Users.insert(U); } } @@ -9778,7 +10031,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; - SDNodeFlags *Flags = &cast(N)->Flags; + SDNodeFlags Flags = N->getFlags(); // fold vector ops if (VT.isVector()) @@ -9892,8 +10145,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, - &cast(N)->Flags); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -9913,7 +10165,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { // For now, create a Flags object for use with all unsafe math transforms. SDNodeFlags Flags; Flags.setUnsafeAlgebra(true); - return buildSqrtEstimate(N0, &Flags); + return buildSqrtEstimate(N0, Flags); } /// copysign(x, fp_extend(y)) -> copysign(x, y) @@ -10188,6 +10440,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { Tmp, N0.getOperand(1)); } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -10254,6 +10509,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) + return NewVSel; + return SDValue(); } @@ -10315,11 +10573,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x80... per scalar element // and splat it. - SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits()); + SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x80... - SignMask = APInt::getSignBit(IntVT.getSizeInBits()); + SignMask = APInt::getSignMask(IntVT.getSizeInBits()); } SDLoc DL0(N0); Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, @@ -10339,10 +10597,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (Level >= AfterLegalizeDAG && (TLI.isFPImmLegal(CVal, VT) || TLI.isOperationLegal(ISD::ConstantFP, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N0.getOperand(1)), - &cast(N0)->Flags); + return DAG.getNode( + ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), + N0->getFlags()); } } @@ -10420,11 +10678,11 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x7f... per scalar element // and splat it. - SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits()); + SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x7f... - SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); } SDLoc DL(N0); Int = DAG.getNode(ISD::AND, DL, IntVT, Int, @@ -11643,7 +11901,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // Check if this is a trunc(lshr). if (User->getOpcode() == ISD::SRL && User->hasOneUse() && isa(User->getOperand(1))) { - Shift = cast(User->getOperand(1))->getZExtValue(); + Shift = User->getConstantOperandVal(1); User = *User->use_begin(); } @@ -12188,9 +12446,9 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getAPIntValue().zext(SizeInBits); + StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); + StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); } else { llvm_unreachable("Invalid constant element type"); } @@ -12203,10 +12461,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); - SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - FirstInChain->getAlignment()); + + // make sure we use trunc store if it's necessary to be legal. + SDValue NewStore; + if (TLI.isTypeLegal(StoredVal.getValueType())) { + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlignment()); + } else { // Must be realized as a trunc store + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); + ConstantSDNode *C = cast(StoredVal); + SDValue ExtendedStoreVal = + DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, + LegalizedStoredValueTy); + NewStore = DAG.getTruncStore( + NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } // Replace all merged stores with the new store. for (unsigned i = 0; i < NumStores; ++i) @@ -12231,12 +12506,18 @@ void DAGCombiner::getStoreMergeCandidates( if (BasePtr.Base.isUndef()) return; - bool IsLoadSrc = isa(St->getValue()); bool IsConstantSrc = isa(St->getValue()) || isa(St->getValue()); bool IsExtractVecSrc = (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsLoadSrc = isa(St->getValue()); + BaseIndexOffset LBasePtr; + // Match on loadbaseptr if relevant. + if (IsLoadSrc) + LBasePtr = BaseIndexOffset::match( + cast(St->getValue())->getBasePtr(), DAG); + auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; @@ -12245,18 +12526,25 @@ void DAGCombiner::getStoreMergeCandidates( if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && isa(Other->getValue()))) return false; - Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); - if (!Ptr.equalBaseIndex(BasePtr)) - return false; - if (IsLoadSrc) - return isa(Other->getValue()); + if (IsLoadSrc) { + // The Load's Base Ptr must also match + if (LoadSDNode *OtherLd = dyn_cast(Other->getValue())) { + auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + if (!(LBasePtr.equalBaseIndex(LPtr))) + return false; + } else + return false; + } if (IsConstantSrc) - return (isa(Other->getValue()) || - isa(Other->getValue())); + if (!(isa(Other->getValue()) || + isa(Other->getValue()))) + return false; if (IsExtractVecSrc) - return (Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); - return false; + if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) + return false; + Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); + return (Ptr.equalBaseIndex(BasePtr)); }; // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down @@ -12276,20 +12564,19 @@ void DAGCombiner::getStoreMergeCandidates( SDNode *RootNode = (St->getChain()).getNode(); - // Set of Parents of Candidates - std::set CandidateParents; - if (LoadSDNode *Ldn = dyn_cast(RootNode)) { RootNode = Ldn->getChain().getNode(); for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) if (I.getOperandNo() == 0 && isa(*I)) // walk down chain - CandidateParents.insert(*I); + for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) + if (I2.getOperandNo() == 0) + if (StoreSDNode *OtherST = dyn_cast(*I2)) { + BaseIndexOffset Ptr; + if (CandidateMatch(OtherST, Ptr)) + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset)); + } } else - CandidateParents.insert(RootNode); - - // check all parents of mergable children - for (auto P = CandidateParents.begin(); P != CandidateParents.end(); ++P) - for (auto I = (*P)->use_begin(), E = (*P)->use_end(); I != E; ++I) + for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) if (I.getOperandNo() == 0) if (StoreSDNode *OtherST = dyn_cast(*I)) { BaseIndexOffset Ptr; @@ -12375,36 +12662,62 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { return LHS.OffsetFromBase < RHS.OffsetFromBase; }); - // Scan the memory operations on the chain and find the first non-consecutive - // store memory address. - unsigned NumConsecutiveStores = 0; - int64_t StartAddress = StoreNodes[0].OffsetFromBase; - - // Check that the addresses are consecutive starting from the second - // element in the list of stores. - for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { - int64_t CurrAddress = StoreNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - NumConsecutiveStores = i + 1; - } + // Store Merge attempts to merge the lowest stores. This generally + // works out as if successful, as the remaining stores are checked + // after the first collection of stores is merged. However, in the + // case that a non-mergeable store is found first, e.g., {p[-2], + // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent + // mergeable cases. To prevent this, we prune such stores from the + // front of StoreNodes here. + + bool RV = false; + while (StoreNodes.size() > 1) { + unsigned StartIdx = 0; + while ((StartIdx + 1 < StoreNodes.size()) && + StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != + StoreNodes[StartIdx + 1].OffsetFromBase) + ++StartIdx; + + // Bail if we don't have enough candidates to merge. + if (StartIdx + 1 >= StoreNodes.size()) + return RV; - if (NumConsecutiveStores < 2) - return false; + if (StartIdx) + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); + + // Scan the memory operations on the chain and find the first + // non-consecutive store memory address. + unsigned NumConsecutiveStores = 1; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + NumConsecutiveStores = i + 1; + } - // Check that we can merge these candidates without causing a cycle - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumConsecutiveStores)) - return false; + if (NumConsecutiveStores < 2) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumConsecutiveStores); + continue; + } + // Check that we can merge these candidates without causing a cycle + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, + NumConsecutiveStores)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumConsecutiveStores); + continue; + } - // The node with the lowest store address. - LLVMContext &Context = *DAG.getContext(); - const DataLayout &DL = DAG.getDataLayout(); + // The node with the lowest store address. + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); - // Store the constants into memory as one consecutive store. - if (IsConstantSrc) { - bool RV = false; - while (NumConsecutiveStores > 1) { + // Store the constants into memory as one consecutive store. + if (IsConstantSrc) { LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); @@ -12430,6 +12743,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast = false; if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12440,6 +12754,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12455,7 +12770,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { !NoVectors) { // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) && + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12464,232 +12779,266 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } // Check if we found a legal integer type that creates a meaningful merge. - if (LastLegalType < 2 && LastLegalVectorType < 2) - break; + if (LastLegalType < 2 && LastLegalVectorType < 2) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + continue; + } bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, UseVector); - if (!Merged) - break; + if (!Merged) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + continue; + } // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); RV = true; - NumConsecutiveStores -= NumElem; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + continue; } - return RV; - } - // When extracting multiple vector elements, try to store them - // in one vector store rather than a sequence of scalar stores. - if (IsExtractVecSrc) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 0; - bool IsVec = MemVT.isVector(); - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast(StoreNodes[i].MemNode); - unsigned StoreValOpcode = St->getValue().getOpcode(); - // This restriction could be loosened. - // Bail out if any stored values are not elements extracted from a vector. - // It should be possible to handle mixed sources, but load sources need - // more careful handling (see the block of code below that handles - // consecutive loads). - if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && - StoreValOpcode != ISD::EXTRACT_SUBVECTOR) - return false; + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecSrc) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 1; + bool IsVec = MemVT.isVector(); + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + unsigned StoreValOpcode = St->getValue().getOpcode(); + // This restriction could be loosened. + // Bail out if any stored values are not elements extracted from a + // vector. It should be possible to handle mixed sources, but load + // sources need more careful handling (see the block of code below that + // handles consecutive loads). + if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && + StoreValOpcode != ISD::EXTRACT_SUBVECTOR) + return RV; + + // Find a legal type for the vector store. + unsigned Elts = i + 1; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } - // Find a legal type for the vector store. - unsigned Elts = i + 1; - if (IsVec) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); + bool Merged = MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true); + if (!Merged) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + continue; } - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast; - if (TLI.isTypeLegal(Ty) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && IsFast) - NumStoresToMerge = i + 1; + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + RV = true; + continue; } - return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, - false, true); - } + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. - // Below we handle the case of multiple consecutive stores that - // come from multiple consecutive loads. We merge them into a single - // wide load and a single wide store. + // Look for load nodes which are used by the stored values. + SmallVector LoadNodes; - // Look for load nodes which are used by the stored values. - SmallVector LoadNodes; + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast(StoreNodes[i].MemNode); + LoadSDNode *Ld = dyn_cast(St->getValue()); + if (!Ld) + break; - // Find acceptable loads. Loads need to have the same chain (token factor), - // must not be zext, volatile, indexed, and they must be consecutive. - BaseIndexOffset LdBasePtr; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast(StoreNodes[i].MemNode); - LoadSDNode *Ld = dyn_cast(St->getValue()); - if (!Ld) break; + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; - // Loads must only have one use. - if (!Ld->hasNUsesOfValue(1, 0)) - break; + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + break; - // The memory operands must not be volatile. - if (Ld->isVolatile() || Ld->isIndexed()) - break; + // We do not accept ext loads. + if (Ld->getExtensionType() != ISD::NON_EXTLOAD) + break; - // We do not accept ext loads. - if (Ld->getExtensionType() != ISD::NON_EXTLOAD) - break; + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; - // The stored memory type must be the same. - if (Ld->getMemoryVT() != MemVT) - break; + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + // If this is not the first ptr that we check. + if (LdBasePtr.Base.getNode()) { + // The base ptr must be the same. + if (!LdPtr.equalBaseIndex(LdBasePtr)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; + } - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); - // If this is not the first ptr that we check. - if (LdBasePtr.Base.getNode()) { - // The base ptr must be the same. - if (!LdPtr.equalBaseIndex(LdBasePtr)) - break; - } else { - // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr; + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset)); } - // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset)); - } + if (LoadNodes.size() < 2) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + continue; + } - if (LoadNodes.size() < 2) - return false; + // If we have load/store pair instructions and we only have two values, + // don't bother merging. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); + continue; + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. + unsigned LastConsecutiveLoad = 0; + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 0; + unsigned LastLegalIntegerType = 0; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; - // If we have load/store pair instructions and we only have two values, - // don't bother. - unsigned RequiredAlignment; - if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && - St->getAlignment() >= RequiredAlignment) - return false; - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); - unsigned FirstLoadAS = FirstLoad->getAddressSpace(); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); - - // Scan the memory operations on the chain and find the first non-consecutive - // load memory address. These variables hold the index in the store node - // array. - unsigned LastConsecutiveLoad = 0; - // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 0; - unsigned LastLegalIntegerType = 0; - StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = FirstLoad->getChain(); - for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads must share the same chain. - if (LoadNodes[i].MemNode->getChain() != FirstChain) - break; + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + // Find a legal type for the vector store. + EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); + bool IsFastSt, IsFastLd; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } - int64_t CurrAddress = LoadNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; - // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); - bool IsFastSt, IsFastLd; - if (TLI.isTypeLegal(StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && IsFastLd) { - LastLegalVectorType = i + 1; - } - - // Find a legal type for the integer store. - unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(Context, SizeInBits); - if (TLI.isTypeLegal(StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && IsFastLd) - LastLegalIntegerType = i + 1; - // Or check whether a truncstore and extload is legal. - else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFastSt) && + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstLoadAS, FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) - LastLegalIntegerType = i+1; + LastLegalIntegerType = i + 1; + // Or check whether a truncstore and extload is legal. + else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) + LastLegalIntegerType = i + 1; + } } - } - // Only use vector types if the vector type is larger than the integer type. - // If they are the same, use integers. - bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; - unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); + // Only use vector types if the vector type is larger than the integer type. + // If they are the same, use integers. + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); - // We add +1 here because the LastXXX variables refer to location while - // the NumElem refers to array/index size. - unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); - NumElem = std::min(LastLegalType, NumElem); + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); - if (NumElem < 2) - return false; + if (NumElem < 2) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + continue; + } - // Find if it is better to use vectors or integers to load and store - // to memory. - EVT JointMemOpVT; - if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); - } else { - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); - } + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } - SDLoc LoadDL(LoadNodes[0].MemNode); - SDLoc StoreDL(StoreNodes[0].MemNode); + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); - // The merged loads are required to have the same incoming chain, so - // using the first's chain is acceptable. - SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoadAlign); + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign); - SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); + AddToWorklist(NewStoreChain.getNode()); - SDValue NewStore = - DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign); + SDValue NewStore = DAG.getStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); - // Transfer chain users from old loads to the new load. - for (unsigned i = 0; i < NumElem; ++i) { - LoadSDNode *Ld = cast(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - } + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } - // Replace the all stores with the new store. - for (unsigned i = 0; i < NumElem; ++i) - CombineTo(StoreNodes[i].MemNode, NewStore); - return true; + // Replace the all stores with the new store. + for (unsigned i = 0; i < NumElem; ++i) + CombineTo(StoreNodes[i].MemNode, NewStore); + RV = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + continue; + } + return RV; } SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { @@ -12856,10 +13205,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Chain = ST->getChain(); } - // Try transforming N to an indexed store. - if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) - return SDValue(N, 0); - // FIXME: is there such a thing as a truncating indexed store? if (ST->isTruncatingStore() && ST->isUnindexed() && Value.getValueType().isInteger()) { @@ -12903,14 +13248,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } - // If this is a store followed by a store with the same value to the same - // location, then the store is dead/noop. if (StoreSDNode *ST1 = dyn_cast(Chain)) { - if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && - ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && - ST1->isUnindexed() && !ST1->isVolatile()) { - // The store is dead, remove it. - return Chain; + if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && + !ST1->isVolatile() && ST1->getBasePtr() == Ptr && + ST->getMemoryVT() == ST1->getMemoryVT()) { + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (ST1->getValue() == Value) { + // The store is dead, remove it. + return Chain; + } + + // If this is a store who's preceeding store to the same location + // and no one other node is chained to that store we can effectively + // drop the store. Do not remove stores to undef as they may be used as + // data sinks. + if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && + !ST1->getBasePtr().isUndef()) { + // ST1 is fully overwritten and can be elided. Combine with it's chain + // value. + CombineTo(ST1, ST1->getChain()); + return SDValue(); + } } } @@ -12940,6 +13299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // // Make sure to do this only after attempting to merge stores in order to @@ -13088,8 +13451,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // do this only if indices are both constants and Idx1 < Idx0. if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() && isa(InVec.getOperand(2))) { - unsigned OtherElt = - cast(InVec.getOperand(2))->getZExtValue(); + unsigned OtherElt = InVec.getConstantOperandVal(2); if (Elt < OtherElt) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, @@ -14033,7 +14395,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { if (!isa(Op.getOperand(1))) return SDValue(); - int ExtIdx = cast(Op.getOperand(1))->getZExtValue(); + int ExtIdx = Op.getConstantOperandVal(1); // Ensure that we are extracting a subvector from a vector the same // size as the result. @@ -14224,6 +14586,132 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +/// If we are extracting a subvector produced by a wide binary operator with at +/// at least one operand that was the result of a vector concatenation, then try +/// to use the narrow vector operands directly to avoid the concatenation and +/// extraction. +static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share + // some of these bailouts with other transforms. + + // The extract index must be a constant, so we can map it to a concat operand. + auto *ExtractIndex = dyn_cast(Extract->getOperand(1)); + if (!ExtractIndex) + return SDValue(); + + // Only handle the case where we are doubling and then halving. A larger ratio + // may require more than two narrow binops to replace the wide binop. + EVT VT = Extract->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + assert((ExtractIndex->getZExtValue() % NumElems) == 0 && + "Extract index is not a multiple of the vector length."); + if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2) + return SDValue(); + + // We are looking for an optionally bitcasted wide vector binary operator + // feeding an extract subvector. + SDValue BinOp = Extract->getOperand(0); + if (BinOp.getOpcode() == ISD::BITCAST) + BinOp = BinOp.getOperand(0); + + // TODO: The motivating case for this transform is an x86 AVX1 target. That + // target has temptingly almost legal versions of bitwise logic ops in 256-bit + // flavors, but no other 256-bit integer support. This could be extended to + // handle any binop, but that may require fixing/adding other folds to avoid + // codegen regressions. + unsigned BOpcode = BinOp.getOpcode(); + if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) + return SDValue(); + + // The binop must be a vector type, so we can chop it in half. + EVT WideBVT = BinOp.getValueType(); + if (!WideBVT.isVector()) + return SDValue(); + + // Bail out if the target does not support a narrower version of the binop. + EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), + WideBVT.getVectorNumElements() / 2); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) + return SDValue(); + + // Peek through bitcasts of the binary operator operands if needed. + SDValue LHS = BinOp.getOperand(0); + if (LHS.getOpcode() == ISD::BITCAST) + LHS = LHS.getOperand(0); + + SDValue RHS = BinOp.getOperand(1); + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + + // We need at least one concatenation operation of a binop operand to make + // this transform worthwhile. The concat must double the input vector sizes. + // TODO: Should we also handle INSERT_SUBVECTOR patterns? + bool ConcatL = + LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; + bool ConcatR = + RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; + if (!ConcatL && !ConcatR) + return SDValue(); + + // If one of the binop operands was not the result of a concat, we must + // extract a half-sized operand for our new narrow binop. We can't just reuse + // the original extract index operand because we may have bitcasted. + unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems; + unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); + EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); + SDLoc DL(Extract); + + // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN + // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) + // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN + SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), + DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + + SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), + DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + + SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); + return DAG.getBitcast(VT, NarrowBinOp); +} + +/// If we are extracting a subvector from a wide vector load, convert to a +/// narrow load to eliminate the extraction: +/// (extract_subvector (load wide vector)) --> (load narrow vector) +static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Add support for big-endian. The offset calculation must be adjusted. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + // TODO: The one-use check is overly conservative. Check the cost of the + // extract instead or remove that condition entirely. + auto *Ld = dyn_cast(Extract->getOperand(0)); + auto *ExtIdx = dyn_cast(Extract->getOperand(1)); + if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() || + !ExtIdx) + return SDValue(); + + // The narrow load will be offset from the base address of the old load if + // we are extracting from something besides index 0 (little-endian). + EVT VT = Extract->getValueType(0); + SDLoc DL(Extract); + SDValue BaseAddr = Ld->getOperand(1); + unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + + // TODO: Use "BaseIndexOffset" to make this more effective. + SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, + VT.getStoreSize()); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); + DAG.makeEquivalentMemoryOrdering(Ld, NewLd); + return NewLd; +} + SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -14232,6 +14720,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (V.isUndef()) return DAG.getUNDEF(NVT); + if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) + if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) + return NarrowLoad; + // Combine: // (extract_subvec (concat V1, V2, ...), i) // Into: @@ -14279,6 +14771,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } + if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) + return NarrowBOp; + return SDValue(); } @@ -14514,10 +15009,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. -SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, - SelectionDAG &DAG, - const TargetLowering &TLI, - bool LegalOperations) { +static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -14564,7 +15059,8 @@ SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, // destination type. This is often generated during legalization. // If the source node itself was a '*_extend_vector_inreg' node then we should // then be able to remove it. -SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { +static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -14617,6 +15113,55 @@ SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { return SDValue(); } +// Combine shuffles of splat-shuffles of the form: +// shuffle (shuffle V, undef, splat-mask), undef, M +// If splat-mask contains undef elements, we need to be careful about +// introducing undef's in the folded mask which are not the result of composing +// the masks of the shuffles. +static SDValue combineShuffleOfSplat(ArrayRef UserMask, + ShuffleVectorSDNode *Splat, + SelectionDAG &DAG) { + ArrayRef SplatMask = Splat->getMask(); + assert(UserMask.size() == SplatMask.size() && "Mask length mismatch"); + + // Prefer simplifying to the splat-shuffle, if possible. This is legal if + // every undef mask element in the splat-shuffle has a corresponding undef + // element in the user-shuffle's mask or if the composition of mask elements + // would result in undef. + // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): + // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] + // In this case it is not legal to simplify to the splat-shuffle because we + // may be exposing the users of the shuffle an undef element at index 1 + // which was not there before the combine. + // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] + // In this case the composition of masks yields SplatMask, so it's ok to + // simplify to the splat-shuffle. + // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] + // In this case the composed mask includes all undef elements of SplatMask + // and in addition sets element zero to undef. It is safe to simplify to + // the splat-shuffle. + auto CanSimplifyToExistingSplat = [](ArrayRef UserMask, + ArrayRef SplatMask) { + for (unsigned i = 0, e = UserMask.size(); i != e; ++i) + if (UserMask[i] != -1 && SplatMask[i] == -1 && + SplatMask[UserMask[i]] != -1) + return false; + return true; + }; + if (CanSimplifyToExistingSplat(UserMask, SplatMask)) + return SDValue(Splat, 0); + + // Create a new shuffle with a mask that is composed of the two shuffles' + // masks. + SmallVector NewMask; + for (int Idx : UserMask) + NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); + + return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), + Splat->getOperand(0), Splat->getOperand(1), + NewMask); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -14663,6 +15208,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } + // A shuffle of a single vector that is a splat can always be folded. + if (auto *N0Shuf = dyn_cast(N0)) + if (N1->isUndef() && N0Shuf->isSplat()) + return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG); + // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { @@ -15017,7 +15567,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && N1.getValueType() == N0.getOperand(1).getValueType() && isa(N0.getOperand(2))) { - unsigned OtherIdx = cast(N0.getOperand(2))->getZExtValue(); + unsigned OtherIdx = N0.getConstantOperandVal(2); if (InsIdx < OtherIdx) { // Swap nodes. SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, @@ -15122,9 +15672,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { // Extract the sub element from the constant bit mask. if (DAG.getDataLayout().isBigEndian()) { - Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); + Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); } else { - Bits = Bits.lshr(SubIdx * NumSubBits); + Bits.lshrInPlace(SubIdx * NumSubBits); } if (Split > 1) @@ -15797,7 +16347,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -15852,7 +16402,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { /// As a result, we precompute A/2 prior to the iteration loop. SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal) { + SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); @@ -15896,7 +16446,7 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, - SDNodeFlags *Flags, bool Reciprocal) { + SDNodeFlags Flags, bool Reciprocal) { EVT VT = Arg.getValueType(); SDLoc DL(Arg); SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); @@ -15941,7 +16491,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if /// Op can be zero. -SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, +SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Reciprocal) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -15994,17 +16544,17 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, return SDValue(); } -SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, true); } -SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) { +SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { return buildSqrtEstimateImpl(Op, Flags, false); } /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. -static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, +static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; @@ -16056,54 +16606,68 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { if (Op1->isInvariant() && Op0->writeMem()) return false; + unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; + unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; + + // Check for BaseIndexOffset matching. + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); + if (BasePtr0.equalBaseIndex(BasePtr1)) + return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) || + (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset)); + + // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis + // modified to use BaseIndexOffset. + // Gather base node and offset information. - SDValue Base1, Base2; - int64_t Offset1, Offset2; - const GlobalValue *GV1, *GV2; - const void *CV1, *CV2; - bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), + SDValue Base0, Base1; + int64_t Offset0, Offset1; + const GlobalValue *GV0, *GV1; + const void *CV0, *CV1; + bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(), + Base0, Offset0, GV0, CV0); + bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(), Base1, Offset1, GV1, CV1); - bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), - Base2, Offset2, GV2, CV2); - // If they have a same base address then check to see if they overlap. - if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) - return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || - (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + // If they have the same base address, then check to see if they overlap. + if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1))) + return !((Offset0 + NumBytes0) <= Offset1 || + (Offset1 + NumBytes1) <= Offset0); // It is possible for different frame indices to alias each other, mostly // when tail call optimization reuses return address slots for arguments. // To catch this case, look up the actual index of frame indices to compute // the real alias relationship. - if (isFrameIndex1 && isFrameIndex2) { + if (IsFrameIndex0 && IsFrameIndex1) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + Offset0 += MFI.getObjectOffset(cast(Base0)->getIndex()); Offset1 += MFI.getObjectOffset(cast(Base1)->getIndex()); - Offset2 += MFI.getObjectOffset(cast(Base2)->getIndex()); - return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || - (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + return !((Offset0 + NumBytes0) <= Offset1 || + (Offset1 + NumBytes1) <= Offset0); } // Otherwise, if we know what the bases are, and they aren't identical, then // we know they cannot alias. - if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1)) return false; // If we know required SrcValue1 and SrcValue2 have relatively large alignment // compared to the size and offset of the access, we may be able to prove they - // do not alias. This check is conservative for now to catch cases created by + // do not alias. This check is conservative for now to catch cases created by // splitting vector types. - if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && - (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && - (Op0->getMemoryVT().getSizeInBits() >> 3 == - Op1->getMemoryVT().getSizeInBits() >> 3) && - (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) { - int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); - int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); + int64_t SrcValOffset0 = Op0->getSrcValueOffset(); + int64_t SrcValOffset1 = Op1->getSrcValueOffset(); + unsigned OrigAlignment0 = Op0->getOriginalAlignment(); + unsigned OrigAlignment1 = Op1->getOriginalAlignment(); + if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && + NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) { + int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; + int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; // There is no overlap between these relatively aligned accesses of similar - // size, return no alias. - if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || - (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) + // size. Return no alias. + if ((OffAlign0 + NumBytes0) <= OffAlign1 || + (OffAlign1 + NumBytes1) <= OffAlign0) return false; } @@ -16115,20 +16679,18 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif - if (UseAA && + + if (UseAA && AA && Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. - int64_t MinOffset = std::min(Op0->getSrcValueOffset(), - Op1->getSrcValueOffset()); - int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + - Op0->getSrcValueOffset() - MinOffset; - int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + - Op1->getSrcValueOffset() - MinOffset; + int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); + int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; AliasResult AAResult = - AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, - UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, - UseTBAA ? Op1->getAAInfo() : AAMDNodes())); + AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, + UseTBAA ? Op1->getAAInfo() : AAMDNodes()) ); if (AAResult == NoAlias) return false; } @@ -16342,7 +16904,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { } /// This is the entry point for the file. -void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, CodeGenOpt::Level OptLevel) { /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 4f6290b751bf0fb40ef2228115eb9e3c7f98c2b3..b2599b2e17f1061908faf3401f11c6ac68edb1ee 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" @@ -50,7 +51,6 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -622,7 +622,7 @@ bool FastISel::selectStackmap(const CallInst *I) { // have to worry about calling conventions and target-specific lowering code. // Instead we perform the call lowering right here. // - // CALLSEQ_START(0...) + // CALLSEQ_START(0, 0...) // STACKMAP(id, nbytes, ...) // CALLSEQ_END(0, 0) // @@ -694,10 +694,8 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, Args.reserve(NumArgs); // Populate the argument list. - // Attributes for args start at offset 1, after the return attribute. ImmutableCallSite CS(CI); - for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; - ArgI != ArgE; ++ArgI) { + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { Value *V = CI->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); @@ -705,7 +703,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, AttrI); + Entry.setAttributes(&CS, ArgIdx); Args.push_back(Entry); } @@ -863,6 +861,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) { return true; } +bool FastISel::selectXRayCustomEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. + return true; +} + + /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) { @@ -907,7 +924,7 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgI + 1); + Entry.setAttributes(&CS, ArgI); Args.push_back(Entry); } TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args); @@ -1044,7 +1061,7 @@ bool FastISel::lowerCall(const CallInst *CI) { Entry.Ty = V->getType(); // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Entry.setAttributes(&CS, i - CS.arg_begin()); Args.push_back(Entry); } @@ -1133,16 +1150,16 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; } - unsigned Offset = 0; + // Byval arguments with frame indices were already handled after argument + // lowering and before isel. + const auto *Arg = + dyn_cast(Address->stripInBoundsConstantOffsets()); + if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + return true; + Optional Op; - if (const auto *Arg = dyn_cast(Address)) - // Some arguments' frame index is recorded during argument lowering. - Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Op = MachineOperand::CreateFI(Offset); - if (!Op) - if (unsigned Reg = lookUpRegForValue(Address)) - Op = MachineOperand::CreateReg(Reg, false); + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used // here but it has no other uses, then we have a problem. E.g., @@ -1166,9 +1183,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); if (Op->isReg()) { Op->setIsDebug(true); + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, - DI->getVariable(), DI->getExpression()); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, + Op->getReg(), 0, DI->getVariable(), DI->getExpression()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) @@ -1252,6 +1271,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: return selectPatchpoint(II); + + case Intrinsic::xray_customevent: + return selectXRayCustomEvent(II); } return fastLowerIntrinsicCall(II); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 377a5237f15a0ca6b04c9921f706aa0615785e43..b736037d71ddcc53367d185e0dd95049e2ab4532 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -85,7 +85,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, MF = &mf; TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - MachineModuleInfo &MMI = MF->getMMI(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); @@ -214,33 +213,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (!isa(I) || !StaticAllocaMap.count(cast(&I))) InitializeRegForValue(&I); - // Collect llvm.dbg.declare information. This is done now instead of - // during the initial isel pass through the IR so that it is done - // in a predictable order. - if (const DbgDeclareInst *DI = dyn_cast(&I)) { - assert(DI->getVariable() && "Missing variable"); - assert(DI->getDebugLoc() && "Missing location"); - if (MMI.hasDebugInfo()) { - // Don't handle byval struct arguments or VLAs, for example. - // Non-byval arguments are handled here (they refer to the stack - // temporary alloca at this point). - const Value *Address = DI->getAddress(); - if (Address) { - if (const BitCastInst *BCI = dyn_cast(Address)) - Address = BCI->getOperand(0); - if (const AllocaInst *AI = dyn_cast(Address)) { - DenseMap::iterator SI = - StaticAllocaMap.find(AI); - if (SI != StaticAllocaMap.end()) { // Check for VLAs. - int FI = SI->second; - MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(), - FI, DI->getDebugLoc()); - } - } - } - } - } - // Decide the preferred extend type for a value. PreferredExtendType[&I] = getPreferredExtendForValue(&I); } @@ -400,10 +372,9 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (!LOI->IsValid) return nullptr; - if (BitWidth > LOI->KnownZero.getBitWidth()) { + if (BitWidth > LOI->Known.getBitWidth()) { LOI->NumSignBits = 1; - LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth); - LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth); + LOI->Known = LOI->Known.zextOrTrunc(BitWidth); } return LOI; @@ -436,17 +407,15 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { Value *V = PN->getIncomingValue(0); if (isa(V) || isa(V)) { DestLOI.NumSignBits = 1; - APInt Zero(BitWidth, 0); - DestLOI.KnownZero = Zero; - DestLOI.KnownOne = Zero; + DestLOI.Known = KnownBits(BitWidth); return; } if (ConstantInt *CI = dyn_cast(V)) { APInt Val = CI->getValue().zextOrTrunc(BitWidth); DestLOI.NumSignBits = Val.getNumSignBits(); - DestLOI.KnownZero = ~Val; - DestLOI.KnownOne = Val; + DestLOI.Known.Zero = ~Val; + DestLOI.Known.One = Val; } else { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); @@ -463,25 +432,23 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { DestLOI = *SrcLOI; } - assert(DestLOI.KnownZero.getBitWidth() == BitWidth && - DestLOI.KnownOne.getBitWidth() == BitWidth && + assert(DestLOI.Known.Zero.getBitWidth() == BitWidth && + DestLOI.Known.One.getBitWidth() == BitWidth && "Masks should have the same bit width as the type."); for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (isa(V) || isa(V)) { DestLOI.NumSignBits = 1; - APInt Zero(BitWidth, 0); - DestLOI.KnownZero = Zero; - DestLOI.KnownOne = Zero; + DestLOI.Known = KnownBits(BitWidth); return; } if (ConstantInt *CI = dyn_cast(V)) { APInt Val = CI->getValue().zextOrTrunc(BitWidth); DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); - DestLOI.KnownZero &= ~Val; - DestLOI.KnownOne &= Val; + DestLOI.Known.Zero &= ~Val; + DestLOI.Known.One &= Val; continue; } @@ -498,8 +465,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { return; } DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); - DestLOI.KnownZero &= SrcLOI->KnownZero; - DestLOI.KnownOne &= SrcLOI->KnownOne; + DestLOI.Known.Zero &= SrcLOI->Known.Zero; + DestLOI.Known.One &= SrcLOI->Known.One; } } @@ -515,12 +482,11 @@ void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, /// If the argument does not have any assigned frame index then 0 is /// returned. int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { - DenseMap::iterator I = - ByValArgFrameIndexMap.find(A); + auto I = ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); - return 0; + return INT_MAX; } unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( @@ -557,3 +523,29 @@ void FunctionLoweringInfo::setCurrentSwiftErrorVReg( const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) { SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg; } + +std::pair +FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) { + auto Key = PointerIntPair(I, true); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + auto &DL = MF->getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} + +std::pair +FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) { + auto Key = PointerIntPair(I, false); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index e85d1951e3aed9956b0245e6b3f7560bc15e38b8..b235e19aaab29094125e59aea3a145f762c85fba 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -161,7 +161,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (VRBase) { DstRC = MRI->getRegClass(VRBase); } else if (UseRC) { - assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + assert(TRI->isTypeLegalForClass(*UseRC, VT) && + "Incompatible phys register def and uses!"); DstRC = UseRC; } else { DstRC = TLI->getRegClassFor(VT); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b7378b360211f1c9ed7226f144cc258c0b272c33..15e87b7af18dc7582df0058dcb90d8e5b8512965 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -899,6 +899,35 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } +static TargetLowering::LegalizeAction +getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { + unsigned EqOpc; + switch (Opcode) { + default: llvm_unreachable("Unexpected FP pseudo-opcode"); + case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; + case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; + case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; + case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; + case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; + case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; + case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; + case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; + case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + } + + auto Action = TLI.getOperationAction(EqOpc, VT); + + // We don't currently handle Custom or Promote for strict FP pseudo-ops. + // For now, we just expand for those cases. + if (Action != TargetLowering::Legal) + Action = TargetLowering::Expand; + + return Action; +} + /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); @@ -994,7 +1023,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: - case ISD::FPOWI: case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: @@ -1043,6 +1071,25 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; + case ISD::STRICT_FSQRT: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + // These pseudo-ops get legalized as if they were their non-strict + // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT + // is also legal, but if ISD::FSQRT requires expansion then so does + // ISD::STRICT_FSQRT. + Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), + Node->getValueType(0)); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { @@ -1192,8 +1239,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain - // we will make the index dependent on the load). - if (SDNode::hasPredecessorHelper(ST, Visited, Worklist)) + // we will make the index dependent on the load). Also, the store might be + // dependent on the extractelement and introduce a cycle when creating + // the load. + if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) || + ST->hasPredecessor(Op.getNode())) continue; StackPtr = ST->getBasePtr(); @@ -1340,7 +1390,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); - State.SignMask = APInt::getSignBit(NumBits); + State.SignMask = APInt::getSignMask(NumBits); State.SignBit = NumBits - 1; return; } @@ -1490,7 +1540,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); @@ -2029,6 +2079,9 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2139,19 +2192,6 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != nullptr; } -/// Return true if sincos libcall is available and can be used to combine sin -/// and cos. -static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, - const TargetMachine &TM) { - if (!isSinCosLibcallAvailable(Node, TLI)) - return false; - // GNU sin/cos functions set errno while sincos does not. Therefore - // combining sin and cos is only safe if unsafe-fpmath is enabled. - if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath) - return false; - return true; -} - /// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN @@ -2586,7 +2626,7 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) { DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); APInt Shift(Sz, 1); - Shift = Shift.shl(J); + Shift <<= J; Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); } @@ -2981,7 +3021,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT NVT = Node->getValueType(0); APFloat apf(DAG.EVTToAPFloatSemantics(VT), APInt::getNullValue(VT.getSizeInBits())); - APInt x = APInt::getSignBit(NVT.getSizeInBits()); + APInt x = APInt::getSignMask(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), @@ -3194,7 +3234,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || - canCombineSinCosLibcall(Node, TLI, TM)) + isSinCosLibcallAvailable(Node, TLI)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); @@ -3250,7 +3290,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { - const SDNodeFlags *Flags = &cast(Node)->Flags; + const SDNodeFlags Flags = Node->getFlags(); Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); Results.push_back(Tmp1); @@ -3494,11 +3534,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // part. unsigned LoSize = VT.getSizeInBits(); SDValue HiLHS = - DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getNode(ISD::SRA, dl, VT, LHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); SDValue HiRHS = - DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getNode(ISD::SRA, dl, VT, RHS, DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); @@ -3904,16 +3944,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMAX_PPCF128)); break; case ISD::FSQRT: + case ISD::STRICT_FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: + case ISD::STRICT_FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128)); break; case ISD::FCOS: + case ISD::STRICT_FCOS: Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128)); @@ -3923,26 +3966,31 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: + case ISD::STRICT_FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: + case ISD::STRICT_FLOG2: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: + case ISD::STRICT_FLOG10: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: + case ISD::STRICT_FEXP: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: + case ISD::STRICT_FEXP2: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128)); @@ -3963,11 +4011,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::CEIL_PPCF128)); break; case ISD::FRINT: + case ISD::STRICT_FRINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128)); break; case ISD::FNEARBYINT: + case ISD::STRICT_FNEARBYINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -3982,11 +4032,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::ROUND_PPCF128)); break; case ISD::FPOWI: + case ISD::STRICT_FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128)); break; case ISD::FPOW: + case ISD::STRICT_FPOW: Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); @@ -4184,6 +4236,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ReplacedNode(Node); break; } + case ISD::MUL: case ISD::SDIV: case ISD::SREM: case ISD::UDIV: @@ -4532,6 +4585,14 @@ void SelectionDAG::Legalize() { AssignTopologicalOrder(); SmallPtrSet LegalizedNodes; + // Use a delete listener to remove nodes which were deleted during + // legalization from LegalizeNodes. This is needed to handle the situation + // where a new node is allocated by the object pool to the same address of a + // previously deleted node. + DAGNodeDeletedListener DeleteListener( + *this, + [&LegalizedNodes](SDNode *N, SDNode *E) { LegalizedNodes.erase(N); }); + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); // Visit all the nodes. We start in topological order, so that we see diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6f2b1b94ce465069e8cbb2477be2c47055792171..c1cb5d9b5235e7cdc6da521e14890a004c51af6c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -72,7 +72,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: - R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; @@ -171,7 +171,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { } } -SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, keep the extracted value in register. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 85068e890756b0a6ae700835e3ca51cb608a105f..a3ba52a148ee4f2880bbc7d76af313e117902ff7 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -21,6 +21,7 @@ #include "LegalizeTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -134,6 +135,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULO: case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break; + case ISD::ATOMIC_LOAD: Res = PromoteIntRes_Atomic0(cast(N)); break; @@ -510,9 +514,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); EVT ValueVTs[] = { N->getValueType(0), NVT }; - SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) }; + unsigned NumOps = N->getNumOperands(); + assert(NumOps <= 3 && "Too many operands"); + if (NumOps == 3) + Ops[2] = N->getOperand(2); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), - DAG.getVTList(ValueVTs), Ops); + DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps)); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -762,6 +771,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + llvm_unreachable("Not implemented"); +} + SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { // Promote the overflow bit trivially. if (ResNo == 1) @@ -924,6 +939,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SRL: case ISD::ROTL: case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + + case ISD::ADDCARRY: + case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break; } // If the result is null, the sub-method took care of registering results etc. @@ -1276,6 +1294,30 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { N->getOperand(0).getValueType().getScalarType()); } +SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDLoc DL(N); + + auto VT = getSetCCResultType(LHS.getValueType()); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT); + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT); + break; + case TargetLoweringBase::ZeroOrOneBooleanContent: + Carry = DAG.getZExtOrTrunc(Carry, DL, VT); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Carry = DAG.getSExtOrTrunc(Carry, DL, VT); + break; + } + + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0); +} //===----------------------------------------------------------------------===// // Integer Result Expansion @@ -1395,6 +1437,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ADDE: case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + case ISD::ADDCARRY: + case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; @@ -1525,11 +1570,11 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); - APInt KnownZero, KnownOne; - DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(N->getOperand(1), Known); // If we don't know anything about the high bits, exit. - if (((KnownZero|KnownOne) & HighBitMask) == 0) + if (((Known.Zero|Known.One) & HighBitMask) == 0) return false; // Get the incoming operand to be shifted. @@ -1538,7 +1583,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // If we know that any of the high bits of the shift amount are one, then we // can do this as a couple of simple shifts. - if (KnownOne.intersects(HighBitMask)) { + if (Known.One.intersects(HighBitMask)) { // Mask out the high bit, which we know is set. Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, DAG.getConstant(~HighBitMask, dl, ShTy)); @@ -1563,7 +1608,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { // If we know that all of the high bits of the shift amount are zero, then we // can do this as a couple of simple shifts. - if ((KnownZero & HighBitMask) == HighBitMask) { + if (HighBitMask.isSubsetOf(Known.Zero)) { // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined // shift if x is zero. We can use XOR here because x is known to be smaller // than 32. @@ -1738,6 +1783,23 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasOpCarry) { + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); + } + return; + } + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support // them. TODO: Teach operation legalization how to expand unsupported // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate @@ -1766,9 +1828,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + if (hasOVF) { - SDVTList VTList = DAG.getVTList(NVT, NVT); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + EVT OvfVT = getSetCCResultType(NVT); + SDVTList VTList = DAG.getVTList(NVT, OvfVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; @@ -1783,12 +1847,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, switch (BoolType) { case TargetLoweringBase::UndefinedBooleanContent: - OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); + OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF); LLVM_FALLTHROUGH; case TargetLoweringBase::ZeroOrOneBooleanContent: + OVF = DAG.getZExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); break; case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + OVF = DAG.getSExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); } return; @@ -1799,6 +1865,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); + + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { + SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + return; + } + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); @@ -1813,9 +1886,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, - DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); + + SDValue Borrow; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1866,6 +1944,71 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); } +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc dl(N); + + SDValue Ovf; + + bool HasOpCarry = TLI.isOperationLegalOrCustom( + N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY, + TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType())); + + if (HasOpCarry) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY; + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(Opc, dl, VTList, HiOps); + + Ovf = Hi.getValue(1); + } else { + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ovf); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1)); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH, SDValue() }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -2532,29 +2675,6 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } -void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, - SDValue &Lo, SDValue &Hi) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDLoc dl(N); - - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); - SplitInteger(Sum, Lo, Hi); - - // Calculate the overflow: addition overflows iff a + b < a, and subtraction - // overflows iff a - b > a. - SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, - N->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT); - - // Use the calculated overflow everywhere. - ReplaceValueWith(SDValue(N, 1), Ofl); -} - void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2768,6 +2888,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; + case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; @@ -2902,14 +3023,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - // Lower with SETCCE if the target supports it. + // Lower with SETCCE or SETCCCARRY if the target supports it. + EVT HiVT = LHSHi.getValueType(); + EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT); + bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT); + // FIXME: Make all targets support this, then remove the other lowering. - if (TLI.getOperationAction( - ISD::SETCCE, - TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == - TargetLowering::Custom) { - // SETCCE can detect < and >= directly. For > and <=, flip operands and - // condition code. + if (HasSETCCCARRY || + TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) { + // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip + // operands and condition code. bool FlipOperands = false; switch (CCCode) { case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; @@ -2923,27 +3046,28 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, std::swap(LHSHi, RHSHi); } // Perform a wide subtraction, feeding the carry from the low part into - // SETCCE. The SETCCE operation is essentially looking at the high part of - // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or - // positive iff LHS >= RHS. - SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); - SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); - SDValue Res = - DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), - LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially + // looking at the high part of the result of LHS - RHS. It is negative + // iff LHS < RHS. It is zero or positive iff LHS >= RHS. + EVT LoVT = LHSLo.getValueType(); + SDVTList VTList = DAG.getVTList( + LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue); + SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl, + VTList, LHSLo, RHSLo); + SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl, + getSetCCResultType(HiVT), LHSHi, RHSHi, + LowCmp.getValue(1), DAG.getCondCode(CCCode)); NewLHS = Res; NewRHS = SDValue(); return; } - NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETEQ, false, - DagCombineInfo, dl); + NewLHS = TLI.SimplifySetCC(getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ, + false, DagCombineInfo, dl); if (!NewLHS.getNode()) - NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), - LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), - NewLHS, LoCmp, HiCmp); + NewLHS = + DAG.getSetCC(dl, getSetCCResultType(HiVT), LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getSelect(dl, LoCmp.getValueType(), NewLHS, LoCmp, HiCmp); NewRHS = SDValue(); } @@ -2996,8 +3120,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, - DAG.getCondCode(CCCode)), 0); + return SDValue( + DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { @@ -3018,6 +3142,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { LowCmp.getValue(1), Cond); } +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType()); + SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the @@ -3251,7 +3393,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } @@ -3294,7 +3436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { @@ -3342,7 +3484,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); + return DAG.getBuildVector(NOutVT, dl, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) { @@ -3445,5 +3587,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps); + return DAG.getBuildVector(N->getValueType(0), dl, NewOps); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 0a2b680e1c66edc630614a3c39f280db93a358eb..154af46c94464d0c8d576ad5da80f8129efe8e1c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -925,9 +925,9 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { assert(Op.getValueType().isVector() && "Only applies to vectors!"); unsigned EltWidth = Op.getScalarValueSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); - unsigned NumElts = Op.getValueType().getVectorNumElements(); + auto EltCnt = Op.getValueType().getVectorElementCount(); return DAG.getNode(ISD::BITCAST, SDLoc(Op), - EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); + EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op); } SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 80c939700518f7dbee3262fbad87580afee45627..8e999188d8e104013071d417619931b0cc693376 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -279,6 +279,7 @@ private: SDValue PromoteIntRes_SRL(SDNode *N); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); @@ -311,6 +312,7 @@ private: SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -350,6 +352,7 @@ private: void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -378,6 +381,7 @@ private: SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); SDValue ExpandIntOp_SETCCE(SDNode *N); + SDValue ExpandIntOp_SETCCCARRY(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -428,7 +432,7 @@ private: SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); @@ -672,6 +676,7 @@ private: // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); SDValue SplitVecOp_TruncateHelper(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c02b8960b36cbc908281088eafb237a4c9e56579..aa69e0e2adfce5c8c14bab61c0e6d6c78333762b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -362,8 +362,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, - makeArrayRef(Ops.data(), NumElts)); + SDValue Vec = + DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -396,10 +396,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { NewElts.push_back(Hi); } - SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*DAG.getContext(), - NewVT, NewElts.size()), - NewElts); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()); + SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); @@ -458,7 +456,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + return DAG.getBuildVector(VT, dl, Ops); } SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 5f167f8de1cfc55da1a6d539741da241b65dc0fe..9355dbe77f94e31a81c77641732bac6e42e83331 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -225,6 +225,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } return TranslateLegalizeResults(Op, Lowered); } + LLVM_FALLTHROUGH; case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandLoad(Op)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 78fddb5ce8f582a96108435091ebc79ecfb45628..ff0e609803d8a636fb5befcac15ee7170838964b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -512,7 +512,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); + return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -526,14 +526,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { /// If the input is a vector that needs to be scalarized, it must be <1 x ty>, /// so just return the element, ignoring the index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + EVT VT = N->getValueType(0); SDValue Res = GetScalarizedVector(N->getOperand(0)); - if (Res.getValueType() != N->getValueType(0)) - Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), - Res); + if (Res.getValueType() != VT) + Res = VT.isFloatingPoint() + ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res) + : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res); return Res; } - /// If the input condition is a vector that needs to be scalarized, it must be /// <1 x i1>, so just convert to a normal ISD::SELECT /// (still with vector output type since that was acceptable if we got here). @@ -730,7 +731,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), RHSLo, RHSHi); SDLoc dl(N); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); @@ -1293,12 +1294,9 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, if ((NumElements & 1) == 0 && SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { LLVMContext &Ctx = *DAG.getContext(); - EVT NewSrcVT = EVT::getVectorVT( - Ctx, EVT::getIntegerVT( - Ctx, SrcVT.getScalarSizeInBits() * 2), - NumElements); - EVT SplitSrcVT = - EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); + EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx); + EVT SplitLoVT, SplitHiVT; std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && @@ -1515,6 +1513,22 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND_VECTOR_INREG: Res = SplitVecOp_ExtVecInRegOp(N); break; + + case ISD::VECREDUCE_FADD: + case ISD::VECREDUCE_FMUL: + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_MUL: + case ISD::VECREDUCE_AND: + case ISD::VECREDUCE_OR: + case ISD::VECREDUCE_XOR: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + Res = SplitVecOp_VECREDUCE(N, OpNo); + break; } } @@ -1567,6 +1581,48 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); } +SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + + SDValue VecOp = N->getOperand(OpNo); + EVT VecVT = VecOp.getValueType(); + assert(VecVT.isVector() && "Can only split reduce vector operand"); + GetSplitVector(VecOp, Lo, Hi); + EVT LoOpVT, HiOpVT; + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); + + bool NoNaN = N->getFlags().hasNoNaNs(); + unsigned CombineOpc = 0; + switch (N->getOpcode()) { + case ISD::VECREDUCE_FADD: CombineOpc = ISD::FADD; break; + case ISD::VECREDUCE_FMUL: CombineOpc = ISD::FMUL; break; + case ISD::VECREDUCE_ADD: CombineOpc = ISD::ADD; break; + case ISD::VECREDUCE_MUL: CombineOpc = ISD::MUL; break; + case ISD::VECREDUCE_AND: CombineOpc = ISD::AND; break; + case ISD::VECREDUCE_OR: CombineOpc = ISD::OR; break; + case ISD::VECREDUCE_XOR: CombineOpc = ISD::XOR; break; + case ISD::VECREDUCE_SMAX: CombineOpc = ISD::SMAX; break; + case ISD::VECREDUCE_SMIN: CombineOpc = ISD::SMIN; break; + case ISD::VECREDUCE_UMAX: CombineOpc = ISD::UMAX; break; + case ISD::VECREDUCE_UMIN: CombineOpc = ISD::UMIN; break; + case ISD::VECREDUCE_FMAX: + CombineOpc = NoNaN ? ISD::FMAXNUM : ISD::FMAXNAN; + break; + case ISD::VECREDUCE_FMIN: + CombineOpc = NoNaN ? ISD::FMINNUM : ISD::FMINNAN; + break; + default: + llvm_unreachable("Unexpected reduce ISD node"); + } + + // Use the appropriate scalar instruction on the split subvectors before + // reducing the now partially reduced smaller vector. + SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi); + return DAG.getNode(N->getOpcode(), dl, ResVT, Partial); +} + SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); @@ -2222,7 +2278,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); while (!TLI.isTypeLegal(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); @@ -2370,7 +2426,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); - const SDNodeFlags *Flags = N->getFlags(); + const SDNodeFlags Flags = N->getFlags(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); @@ -2634,7 +2690,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { if (InVT.isVector()) NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); else - NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); + NewVec = DAG.getBuildVector(NewInVT, dl, Ops); return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } @@ -3012,8 +3068,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { // Don't touch if this will be scalarized. EVT FinalVT = VSelVT; while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) - FinalVT = EVT::getVectorVT(Ctx, FinalVT.getVectorElementType(), - FinalVT.getVectorNumElements() / 2); + FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx); + if (FinalVT.getVectorNumElements() == 1) return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index a1d70ab6f036fbc46adcfa7926059c988a534ab9..a21b4c7332540be9eaf29f6cce4e6fcdec8e0838 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -67,12 +67,11 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) unsigned ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) continue; - SUnit *PredSU = I->getSUnit(); + SUnit *PredSU = Pred.getSUnit(); const SDNode *ScegN = PredSU->getNode(); if (!ScegN) @@ -105,12 +104,11 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, unsigned RCId) { unsigned NumberDeps = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) + for (const SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); const SDNode *ScegN = SuccSU->getNode(); if (!ScegN) continue; @@ -142,9 +140,8 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, static unsigned numberCtrlDepsInSU(SUnit *SU) { unsigned NumberDeps = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) - if (I->isCtrl()) + for (const SDep &Succ : SU->Succs) + if (Succ.isCtrl()) NumberDeps++; return NumberDeps; @@ -152,9 +149,8 @@ static unsigned numberCtrlDepsInSU(SUnit *SU) { static unsigned numberCtrlPredInSU(SUnit *SU) { unsigned NumberDeps = 0; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (I->isCtrl()) + for (SDep &Pred : SU->Preds) + if (Pred.isCtrl()) NumberDeps++; return NumberDeps; @@ -212,15 +208,14 @@ bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// of SU, return it, otherwise return null. SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); - if (!Pred.isScheduled) { + for (const SDep &Pred : SU->Preds) { + SUnit &PredSU = *Pred.getSUnit(); + if (!PredSU.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + if (OnlyAvailablePred && OnlyAvailablePred != &PredSU) return nullptr; - OnlyAvailablePred = &Pred; + OnlyAvailablePred = &PredSU; } } return OnlyAvailablePred; @@ -230,9 +225,8 @@ void ResourcePriorityQueue::push(SUnit *SU) { // Look at all of the successors of this node. Count the number of nodes that // this node is the sole unscheduled node for. unsigned NumNodesBlocking = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) - if (getSingleUnscheduledPred(I->getSUnit()) == SU) + for (const SDep &Succ : SU->Succs) + if (getSingleUnscheduledPred(Succ.getSUnit()) == SU) ++NumNodesBlocking; NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; @@ -269,14 +263,13 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { // Now see if there are no other dependencies // to instructions already in the packet. for (unsigned i = 0, e = Packet.size(); i != e; ++i) - for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), - E = Packet[i]->Succs.end(); I != E; ++I) { + for (const SDep &Succ : Packet[i]->Succs) { // Since we do not add pseudos to packets, might as well // ignore order deps. - if (I->isCtrl()) + if (Succ.isCtrl()) continue; - if (I->getSUnit() == SU) + if (Succ.getSUnit() == SU) return false; } @@ -499,11 +492,10 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { } } } - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0)) continue; - --I->getSUnit()->NumRegDefsLeft; + --Pred.getSUnit()->NumRegDefsLeft; } } @@ -515,10 +507,9 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { // number of live ranges. All others, increase it. unsigned NumberNonControlDeps = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - adjustPriorityOfUnscheduledPreds(I->getSUnit()); - if (!I->isCtrl()) + for (const SDep &Succ : SU->Succs) { + adjustPriorityOfUnscheduledPreds(Succ.getSUnit()); + if (!Succ.isCtrl()) NumberNonControlDeps++; } @@ -595,8 +586,7 @@ SUnit *ResourcePriorityQueue::pop() { std::vector::iterator Best = Queue.begin(); if (!DisableDFASched) { int BestCost = SUSchedulingCost(*Best); - for (std::vector::iterator I = std::next(Queue.begin()), - E = Queue.end(); I != E; ++I) { + for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) { if (SUSchedulingCost(*I) > BestCost) { BestCost = SUSchedulingCost(*I); @@ -606,8 +596,7 @@ SUnit *ResourcePriorityQueue::pop() { } // Use default TD scheduling mechanism. else { - for (std::vector::iterator I = std::next(Queue.begin()), - E = Queue.end(); I != E; ++I) + for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 62e7733ecd2bb45f99a6a1e85701d74dc0beb3ae..1379940932772a98624d103c8f3502f596bbd117 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -160,18 +160,17 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { // Bottom up: release predecessors - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - ReleasePred(SU, &*I); - if (I->isAssignedRegDep()) { + for (SDep &Pred : SU->Preds) { + ReleasePred(SU, &Pred); + if (Pred.isAssignedRegDep()) { // This is a physical register dependency and it's impossible or // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. - if (!LiveRegDefs[I->getReg()]) { + if (!LiveRegDefs[Pred.getReg()]) { ++NumLiveRegs; - LiveRegDefs[I->getReg()] = I->getSUnit(); - LiveRegCycles[I->getReg()] = CurCycle; + LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); + LiveRegCycles[Pred.getReg()] = CurCycle; } } } @@ -191,16 +190,15 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { ReleasePredecessors(SU, CurCycle); // Release all the implicit physical register defs that are live. - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isAssignedRegDep()) { - if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + for (SDep &Succ : SU->Succs) { + if (Succ.isAssignedRegDep()) { + if (LiveRegCycles[Succ.getReg()] == Succ.getSUnit()->getHeight()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - assert(LiveRegDefs[I->getReg()] == SU && + assert(LiveRegDefs[Succ.getReg()] == SU && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = nullptr; - LiveRegCycles[I->getReg()] = 0; + LiveRegDefs[Succ.getReg()] = nullptr; + LiveRegCycles[Succ.getReg()] = 0; } } } @@ -282,22 +280,20 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SmallVector LoadPreds; SmallVector NodePreds; SmallVector NodeSuccs; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainPred = *I; - else if (I->getSUnit()->getNode() && - I->getSUnit()->getNode()->isOperandOf(LoadNode)) - LoadPreds.push_back(*I); + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPred = Pred; + else if (Pred.getSUnit()->getNode() && + Pred.getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(Pred); else - NodePreds.push_back(*I); + NodePreds.push_back(Pred); } - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isCtrl()) - ChainSuccs.push_back(*I); + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); else - NodeSuccs.push_back(*I); + NodeSuccs.push_back(Succ); } if (ChainPred.getSUnit()) { @@ -354,21 +350,19 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { NewSU = Clone(SU); // New SUnit has the exact same predecessors. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) - if (!I->isArtificial()) - AddPred(NewSU, *I); + for (SDep &Pred : SU->Preds) + if (!Pred.isArtificial()) + AddPred(NewSU, Pred); // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(NewSU); AddPred(SuccSU, D); D.setSUnit(SU); @@ -399,16 +393,15 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector, 4> DelDeps; - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isArtificial()) + for (SDep &Succ : SU->Succs) { + if (Succ.isArtificial()) continue; - SUnit *SuccSU = I->getSUnit(); + SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->isScheduled) { - SDep D = *I; + SDep D = Succ; D.setSUnit(CopyToSU); AddPred(SuccSU, D); - DelDeps.push_back(std::make_pair(SuccSU, *I)); + DelDeps.push_back(std::make_pair(SuccSU, Succ)); } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { @@ -479,10 +472,9 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, SmallSet RegAdded; // If this node would clobber any "live" register, then it's not ready. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isAssignedRegDep()) { - CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + for (SDep &Pred : SU->Preds) { + if (Pred.isAssignedRegDep()) { + CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs, RegAdded, LRegs, TRI); } } @@ -755,9 +747,8 @@ void ScheduleDAGLinearize::Schedule() { // Glue user must be scheduled together with the glue operand. So other // users of the glue operand must be treated as its users. SDNode *ImmGUser = Glue->getGluedUser(); - for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); - ui != ue; ++ui) - if (*ui == ImmGUser) + for (const SDNode *U : Glue->uses()) + if (U == ImmGUser) --Degree; GUser->setNodeId(UDegree + Degree); Glue->setNodeId(1); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e923e30e5037732c5087e83c7117bfcbe5d0a793..593efc5121f90bf7521196eba308ff7d683ab7fb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -15,13 +15,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -226,6 +226,7 @@ private: void UnscheduleNodeBottomUp(SUnit*); void RestoreHazardCheckerBottomUp(); void BacktrackBottomUp(SUnit*, SUnit*); + SUnit *TryUnfoldSU(SUnit *); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, @@ -520,21 +521,20 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, /// interference on flags. void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { // Bottom up: release predecessors - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - ReleasePred(SU, &*I); - if (I->isAssignedRegDep()) { + for (SDep &Pred : SU->Preds) { + ReleasePred(SU, &Pred); + if (Pred.isAssignedRegDep()) { // This is a physical register dependency and it's impossible or // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. - SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; - assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && + SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; + assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && "interference on register dependence"); - LiveRegDefs[I->getReg()] = I->getSUnit(); - if (!LiveRegGens[I->getReg()]) { + LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); + if (!LiveRegGens[Pred.getReg()]) { ++NumLiveRegs; - LiveRegGens[I->getReg()] = SU; + LiveRegGens[Pred.getReg()] = SU; } } } @@ -733,15 +733,14 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { ReleasePredecessors(SU); // Release all the implicit physical register defs that are live. - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. - if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { + for (SDep &Succ : SU->Succs) { + // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. + if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = nullptr; - LiveRegGens[I->getReg()] = nullptr; - releaseInterferences(I->getReg()); + LiveRegDefs[Succ.getReg()] = nullptr; + LiveRegGens[Succ.getReg()] = nullptr; + releaseInterferences(Succ.getReg()); } } // Release the special call resource dependence, if this is the beginning @@ -782,7 +781,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } /// CapturePred - This does the opposite of ReleasePred. Since SU is being -/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// unscheduled, increase the succ left count of its predecessors. Remove /// them from AvailableQueue if necessary. void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); @@ -802,17 +801,16 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - CapturePred(&*I); - if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ + for (SDep &Pred : SU->Preds) { + CapturePred(&Pred); + if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = nullptr; - LiveRegGens[I->getReg()] = nullptr; - releaseInterferences(I->getReg()); + LiveRegDefs[Pred.getReg()] = nullptr; + LiveRegGens[Pred.getReg()] = nullptr; + releaseInterferences(Pred.getReg()); } } @@ -895,7 +893,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { std::vector::const_iterator I = (Sequence.end() - LookAhead); unsigned HazardCycle = (*I)->getHeight(); - for (std::vector::const_iterator E = Sequence.end(); I != E; ++I) { + for (auto E = Sequence.end(); I != E; ++I) { SUnit *SU = *I; for (; SU->getHeight() > HazardCycle; ++HazardCycle) { HazardRec->RecedeCycle(); @@ -937,6 +935,146 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { return false; } +/// TryUnfold - Attempt to unfold +SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { + SDNode *N = SU->getNode(); + // Use while over if to ease fall through. + SmallVector NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return nullptr; + + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return nullptr; + + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + // If LoadSU has already been scheduled, we should clone it but + // this would negate the benefit to unfolding so just return SU. + if (LoadSU->isScheduled) + return SU; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); + computeLatency(LoadSU); + } + + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + + // Now that we are committed to unfolding replace DAG Uses. + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector ChainPreds; + SmallVector ChainSuccs; + SmallVector LoadPreds; + SmallVector NodePreds; + SmallVector NodeSuccs; + for (SDep &Pred : SU->Preds) { + if (Pred.isCtrl()) + ChainPreds.push_back(Pred); + else if (isOperandOf(Pred.getSUnit(), LoadNode)) + LoadPreds.push_back(Pred); + else + NodePreds.push_back(Pred); + } + for (SDep &Succ : SU->Succs) { + if (Succ.isCtrl()) + ChainSuccs.push_back(Succ); + else + NodeSuccs.push_back(Succ); + } + + // Now assign edges to the newly-created nodes. + for (const SDep &Pred : ChainPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (const SDep &Pred : LoadPreds) { + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (const SDep &Pred : NodePreds) { + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (SDep D : NodeSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && + !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; + } + for (SDep D : ChainSuccs) { + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) + NewSU->isAvailable = true; + + return NewSU; +} + /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled /// successors to the newly created node. SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { @@ -962,135 +1100,16 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return nullptr; } + // If possible unfold instruction. if (TryUnfold) { - SmallVector NewNodes; - if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + SUnit *UnfoldSU = TryUnfoldSU(SU); + if (!UnfoldSU) return nullptr; - - // unfolding an x86 DEC64m operation results in store, dec, load which - // can't be handled here so quit - if (NewNodes.size() == 3) - return nullptr; - - DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); - assert(NewNodes.size() == 2 && "Expected a load folding node!"); - - N = NewNodes[1]; - SDNode *LoadNode = NewNodes[0]; - unsigned NumVals = N->getNumValues(); - unsigned OldNumVals = SU->getNode()->getNumValues(); - for (unsigned i = 0; i != NumVals; ++i) - DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); - DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), - SDValue(LoadNode, 1)); - - // LoadNode may already exist. This can happen when there is another - // load from the same location and producing the same type of value - // but it has different alignment or volatileness. - bool isNewLoad = true; - SUnit *LoadSU; - if (LoadNode->getNodeId() != -1) { - LoadSU = &SUnits[LoadNode->getNodeId()]; - isNewLoad = false; - } else { - LoadSU = CreateNewSUnit(LoadNode); - LoadNode->setNodeId(LoadSU->NodeNum); - - InitNumRegDefsLeft(LoadSU); - computeLatency(LoadSU); - } - - SUnit *NewSU = CreateNewSUnit(N); - assert(N->getNodeId() == -1 && "Node already inserted!"); - N->setNodeId(NewSU->NodeNum); - - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { - if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { - NewSU->isTwoAddress = true; - break; - } - } - if (MCID.isCommutable()) - NewSU->isCommutable = true; - - InitNumRegDefsLeft(NewSU); - computeLatency(NewSU); - - // Record all the edges to and from the old SU, by category. - SmallVector ChainPreds; - SmallVector ChainSuccs; - SmallVector LoadPreds; - SmallVector NodePreds; - SmallVector NodeSuccs; - for (SDep &Pred : SU->Preds) { - if (Pred.isCtrl()) - ChainPreds.push_back(Pred); - else if (isOperandOf(Pred.getSUnit(), LoadNode)) - LoadPreds.push_back(Pred); - else - NodePreds.push_back(Pred); - } - for (SDep &Succ : SU->Succs) { - if (Succ.isCtrl()) - ChainSuccs.push_back(Succ); - else - NodeSuccs.push_back(Succ); - } - - // Now assign edges to the newly-created nodes. - for (const SDep &Pred : ChainPreds) { - RemovePred(SU, Pred); - if (isNewLoad) - AddPred(LoadSU, Pred); - } - for (const SDep &Pred : LoadPreds) { - RemovePred(SU, Pred); - if (isNewLoad) - AddPred(LoadSU, Pred); - } - for (const SDep &Pred : NodePreds) { - RemovePred(SU, Pred); - AddPred(NewSU, Pred); - } - for (SDep D : NodeSuccs) { - SUnit *SuccDep = D.getSUnit(); - D.setSUnit(SU); - RemovePred(SuccDep, D); - D.setSUnit(NewSU); - AddPred(SuccDep, D); - // Balance register pressure. - if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled - && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) - --NewSU->NumRegDefsLeft; - } - for (SDep D : ChainSuccs) { - SUnit *SuccDep = D.getSUnit(); - D.setSUnit(SU); - RemovePred(SuccDep, D); - if (isNewLoad) { - D.setSUnit(LoadSU); - AddPred(SuccDep, D); - } - } - - // Add a data dependency to reflect that NewSU reads the value defined - // by LoadSU. - SDep D(LoadSU, SDep::Data, 0); - D.setLatency(LoadSU->Latency); - AddPred(NewSU, D); - - if (isNewLoad) - AvailableQueue->addNode(LoadSU); - AvailableQueue->addNode(NewSU); - - ++NumUnfolds; - - if (NewSU->NumSuccsLeft == 0) { - NewSU->isAvailable = true; - return NewSU; - } - SU = NewSU; + SU = UnfoldSU; + N = SU->getNode(); + // If this can be scheduled don't bother duplicating and just return + if (SU->NumSuccsLeft == 0) + return SU; } DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); @@ -1261,10 +1280,9 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { // // If SU is the currently live definition of the same register that it uses, // then we are free to schedule it. - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) - CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(), + for (SDep &Pred : SU->Preds) { + if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU) + CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), RegAdded, LRegs, TRI); } @@ -1320,6 +1338,18 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (MCID.hasOptionalDef()) { + // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. + // This operand can be either a def of CPSR, if the S bit is set; or a use + // of %noreg. When the OptionalDef is set to a valid register, we need to + // handle it in the same way as an ImplicitDef. + for (unsigned i = 0; i < MCID.getNumDefs(); ++i) + if (MCID.OpInfo[i].isOptionalDef()) { + const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); + unsigned Reg = cast(OptionalDef)->getReg(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + } if (!MCID.ImplicitDefs) continue; for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) @@ -1731,8 +1761,7 @@ protected: template static SUnit *popFromQueueImpl(std::vector &Q, SF &Picker) { std::vector::iterator Best = Q.begin(); - for (std::vector::iterator I = std::next(Q.begin()), - E = Q.end(); I != E; ++I) + for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; SUnit *V = *Best; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index eee4a4b06718646769bc4f633708a27f571497d4..631cb34717c4fb9233ab9312fe1b09ae1ca730dd 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -18,12 +18,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 003ea5030bfce21241550e0da29349732c61815c..7abdc76cb004f66b96e54fa405078bcdc071880a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// // // The LLVM Compiler Infrastructure // @@ -13,43 +13,65 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" -#include "llvm/IR/CallingConv.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include -#include +#include +#include +#include +#include +#include +#include #include +#include using namespace llvm; @@ -268,7 +290,6 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { return ISD::CondCode(Operation); } - /// For an integer comparison, return 1 if the comparison is a signed operation /// and 2 if the result is an unsigned comparison. Return zero if the operation /// does not depend on the sign of the input (setne and seteq). @@ -337,7 +358,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, //===----------------------------------------------------------------------===// /// AddNodeIDOpcode - Add the node opcode to the NodeID data. -/// static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { ID.AddInteger(OpC); } @@ -349,7 +369,6 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. -/// static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef Ops) { for (auto& Op : Ops) { @@ -359,7 +378,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. -/// static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef Ops) { for (auto& Op : Ops) { @@ -391,10 +409,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } case ISD::TargetConstantFP: - case ISD::ConstantFP: { + case ISD::ConstantFP: ID.AddPointer(cast(N)->getConstantFPValue()); break; - } case ISD::TargetGlobalAddress: case ISD::GlobalAddress: case ISD::TargetGlobalTLSAddress: @@ -572,6 +589,11 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes) { // worklist. while (!DeadNodes.empty()) { SDNode *N = DeadNodes.pop_back_val(); + // Skip to next node if we've already managed to delete the node. This could + // happen if replacing a node causes a node previously added to the node to + // be deleted. + if (N->getOpcode() == ISD::DELETED_NODE) + continue; for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) DUL->NodeDeleted(N, nullptr); @@ -639,12 +661,15 @@ void SelectionDAG::DeallocateNode(SDNode *N) { // If we have operands, deallocate them. removeOperands(N); + NodeAllocator.Deallocate(AllNodes.remove(N)); + // Set the opcode to DELETED_NODE to help catch bugs when node // memory is reallocated. + // FIXME: There are places in SDag that have grown a dependency on the opcode + // value in the released node. + __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); N->NodeType = ISD::DELETED_NODE; - NodeAllocator.Deallocate(AllNodes.remove(N)); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate // them and forget about that node. DbgInfo->erase(N); @@ -766,7 +791,6 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { /// maps and modified in place. Add it back to the CSE maps, unless an identical /// node already exists, in which case transfer all its users to the existing /// node. This transfer can potentially trigger recursive merging. -/// void SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { // For node types that aren't CSE'd, just act as if no identical node @@ -807,8 +831,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -828,12 +851,10 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } - /// FindModifiedNodeSlot - Find a slot for the specified node if its operands /// were replaced with those specified. If this node is never memoized, /// return null, otherwise return a pointer to the slot it would take. If a @@ -848,8 +869,7 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef Ops, AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); if (Node) - if (const SDNodeFlags *Flags = N->getFlags()) - Node->intersectFlagsWith(Flags); + Node->intersectFlagsWith(N->getFlags()); return Node; } @@ -863,10 +883,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL), + : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(nullptr) { + Root(getEntryNode()) { InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } @@ -897,29 +916,6 @@ void SelectionDAG::allnodes_clear() { #endif } -SDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, const SDLoc &DL, - SDVTList VTs, SDValue N1, SDValue N2, - const SDNodeFlags *Flags) { - SDValue Ops[] = {N1, N2}; - - if (isBinOpWithFlags(Opcode)) { - // If no flags were passed in, use a default flags object. - SDNodeFlags F; - if (Flags == nullptr) - Flags = &F; - - auto *FN = newSDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, *Flags); - createOperands(FN, Ops); - - return FN; - } - - auto *N = newSDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); - createOperands(N, Ops); - return N; -} - SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); @@ -981,6 +977,12 @@ void SelectionDAG::clear() { DbgInfo->clear(); } +SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) + ? getNode(ISD::FP_EXTEND, DL, VT, Op) + : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); +} + SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : @@ -1054,7 +1056,6 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL, } /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). -/// SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = @@ -1333,7 +1334,6 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, return SDValue(N, 0); } - SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, @@ -1467,7 +1467,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, // Validate that all indices in Mask are within the range of the elements // input to the shuffle. int NElts = Mask.size(); - assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) && "Index out of range"); // Copy the mask so we can do any needed cleanup. @@ -1826,7 +1826,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { @@ -1839,7 +1839,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, @@ -1955,7 +1955,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); - return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); + return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use @@ -1963,9 +1963,9 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// for bits that V cannot have. bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { - APInt KnownZero, KnownOne; - computeKnownBits(Op, KnownZero, KnownOne, Depth); - return (KnownZero & Mask) == Mask; + KnownBits Known; + computeKnownBits(Op, Known, Depth); + return Mask.isSubsetOf(Known.Zero); } /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that @@ -1981,31 +1981,30 @@ static const APInt *getValidShiftAmountConstant(SDValue V) { } /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are -/// those that are shared by every vector element. -void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, unsigned Depth) const { +/// them in Known. For vectors, the known bits are those that are shared by +/// every vector element. +void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, + unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); - computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); + computeKnownBits(Op, Known, DemandedElts, Depth); } /// Determine which bits of Op are known to be either zero or one and return -/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows -/// us to only collect the known bits that are shared by the requested vector -/// elements. -void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, const APInt &DemandedElts, +/// them in Known. The DemandedElts argument allows us to only collect the known +/// bits that are shared by the requested vector elements. +void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, + const APInt &DemandedElts, unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); - KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + Known = KnownBits(BitWidth); // Don't know anything. if (Depth == 6) return; // Limit search depth. - APInt KnownZero2, KnownOne2; + KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); if (!DemandedElts) @@ -2015,35 +2014,34 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, switch (Opcode) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast(Op)->getAPIntValue(); - KnownZero = ~KnownOne; + Known.One = cast(Op)->getAPIntValue(); + Known.Zero = ~Known.One; break; case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. assert(NumElts == Op.getValueType().getVectorNumElements() && "Unexpected vector size"); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { if (!DemandedElts[i]) continue; SDValue SrcOp = Op.getOperand(i); - computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1); + computeKnownBits(SrcOp, Known2, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != BitWidth) { assert(SrcOp.getValueSizeInBits() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - KnownOne2 = KnownOne2.trunc(BitWidth); - KnownZero2 = KnownZero2.trunc(BitWidth); + Known2 = Known2.trunc(BitWidth); } // Known bits are the values that are shared by every demanded element. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } break; @@ -2051,7 +2049,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Collect the known bits that are shared by every vector element referenced // by the shuffle. APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); const ShuffleVectorSDNode *SVN = cast(Op); assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); for (unsigned i = 0; i != NumElts; ++i) { @@ -2062,8 +2060,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (M < 0) { // For UNDEF elements, we don't know anything about the common state of // the shuffle result. - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); + Known.resetAll(); DemandedLHS.clearAllBits(); DemandedRHS.clearAllBits(); break; @@ -2077,24 +2074,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Known bits are the values that are shared by every demanded element. if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); - computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(LHS, Known2, DemandedLHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); - computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(RHS, Known2, DemandedRHS, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } break; } case ISD::CONCAT_VECTORS: { // Split DemandedElts and test each of the demanded subvectors. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVectorVT = Op.getOperand(0).getValueType(); unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); unsigned NumSubVectors = Op.getNumOperands(); @@ -2103,12 +2100,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, DemandedSub = DemandedSub.trunc(NumSubVectorElts); if (!!DemandedSub) { SDValue Sub = Op.getOperand(i); - computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(Sub, Known2, DemandedSub, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } break; @@ -2123,9 +2120,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); - computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1); + computeKnownBits(Src, Known, DemandedSrc, Depth + 1); } else { - computeKnownBits(Src, KnownZero, KnownOne, Depth + 1); + computeKnownBits(Src, Known, Depth + 1); } break; } @@ -2139,7 +2136,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // Fast handling of 'identity' bitcasts. if (BitWidth == SubBitWidth) { - computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1); + computeKnownBits(N0, Known, DemandedElts, Depth + 1); break; } @@ -2163,10 +2160,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, SubDemandedElts.setBit(i * SubScale); for (unsigned i = 0; i != SubScale; ++i) { - computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i), + computeKnownBits(N0, Known2, SubDemandedElts.shl(i), Depth + 1); - KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i); - KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i); + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); } } @@ -2183,16 +2180,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (DemandedElts[i]) SubDemandedElts.setBit(i / SubScale); - computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1); + computeKnownBits(N0, Known2, SubDemandedElts, Depth + 1); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % SubScale) * BitWidth; - KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth); - KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth); + Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); + Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; } } @@ -2200,101 +2197,89 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; break; case ISD::OR: - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; case ISD::XOR: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); - KnownZero = KnownZeroOut; + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; break; } case ISD::MUL: { - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(1), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conservative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), + unsigned TrailZ = Known.countMinTrailingZeros() + + Known2.countMinTrailingZeros(); + unsigned LeadZ = std::max(Known.countMinLeadingZeros() + + Known2.countMinLeadingZeros(), BitWidth) - BitWidth; - KnownZero.clearAllBits(); - KnownZero.setLowBits(std::min(TrailZ, BitWidth)); - KnownZero.setHighBits(std::min(LeadZ, BitWidth)); + Known.resetAll(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); break; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned LeadZ = KnownZero2.countLeadingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned LeadZ = Known2.countMinLeadingZeros(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); - if (RHSUnknownLeadingOnes != BitWidth) - LeadZ = std::min(BitWidth, - LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); + if (RHSMaxLeadingZeros != BitWidth) + LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); - KnownZero.setHighBits(LeadZ); + Known.Zero.setHighBits(LeadZ); break; } case ISD::SELECT: - computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(2), Known, Depth+1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(3), Known, Depth+1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(2), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SMULO: case ISD::UMULO: @@ -2307,49 +2292,46 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero << *ShAmt; - KnownOne = KnownOne << *ShAmt; + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero <<= *ShAmt; + Known.One <<= *ShAmt; // Low bits are known zero. - KnownZero.setLowBits(ShAmt->getZExtValue()); + Known.Zero.setLowBits(ShAmt->getZExtValue()); } break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.lshr(*ShAmt); - KnownOne = KnownOne.lshr(*ShAmt); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero.lshrInPlace(*ShAmt); + Known.One.lshrInPlace(*ShAmt); // High bits are known zero. - KnownZero.setHighBits(ShAmt->getZExtValue()); + Known.Zero.setHighBits(ShAmt->getZExtValue()); } break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.lshr(*ShAmt); - KnownOne = KnownOne.lshr(*ShAmt); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.Zero.lshrInPlace(*ShAmt); + Known.One.lshrInPlace(*ShAmt); // If we know the value of the sign bit, then we know it is copied across // the high bits by the shift amount. - APInt SignBit = APInt::getSignBit(BitWidth); - SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask. - if (KnownZero.intersects(SignBit)) { - KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. - } else if (KnownOne.intersects(SignBit)) { - KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one. + APInt SignMask = APInt::getSignMask(BitWidth); + SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. + if (Known.Zero.intersects(SignMask)) { + Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. + } else if (Known.One.intersects(SignMask)) { + Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one. } } break; @@ -2361,40 +2343,56 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); - APInt InSignBit = APInt::getSignBit(EBits); + APInt InSignMask = APInt::getSignMask(EBits); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. - InSignBit = InSignBit.zext(BitWidth); + InSignMask = InSignMask.zext(BitWidth); if (NewBits.getBoolValue()) - InputDemandedBits |= InSignBit; + InputDemandedBits |= InSignMask; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownOne &= InputDemandedBits; - KnownZero &= InputDemandedBits; + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known.One &= InputDemandedBits; + Known.Zero &= InputDemandedBits; // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear - KnownZero |= NewBits; - KnownOne &= ~NewBits; - } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; + if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear + Known.Zero |= NewBits; + Known.One &= ~NewBits; + } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set + Known.One |= NewBits; + Known.Zero &= ~NewBits; } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; + Known.Zero &= ~NewBits; + Known.One &= ~NewBits; } break; } case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: { + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleTZ = Known2.countMaxTrailingZeros(); + unsigned LowBits = Log2_32(PossibleTZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ_ZERO_UNDEF: { + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we have a known 1, its position is our upper bound. + unsigned PossibleLZ = Known2.countMaxLeadingZeros(); + unsigned LowBits = Log2_32(PossibleLZ) + 1; + Known.Zero.setBitsFrom(LowBits); + break; + } case ISD::CTPOP: { - KnownZero.setBitsFrom(Log2_32(BitWidth)+1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + // If we know some of the bits are zero, they can't be one. + unsigned PossibleOnes = Known2.countMaxPopulation(); + Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); break; } case ISD::LOAD: { @@ -2403,36 +2401,31 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero.setBitsFrom(MemBits); + Known.Zero.setBitsFrom(MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { if (LD->getExtensionType() == ISD::NON_EXTLOAD) - computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); + computeKnownBitsFromRangeMetadata(*Ranges, Known); } break; } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, + Known = Known.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts.zext(InVT.getVectorNumElements()), Depth + 1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero.setBitsFrom(InBits); + Known = Known.zext(BitWidth); + Known.Zero.setBitsFrom(InBits); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero.setBitsFrom(InBits); + Known = Known.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = Known.zext(BitWidth); + Known.Zero.setBitsFrom(InBits); break; } // TODO ISD::SIGN_EXTEND_VECTOR_INREG @@ -2440,49 +2433,41 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); + Known = Known.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. - KnownZero = KnownZero.sext(BitWidth); - KnownOne = KnownOne.sext(BitWidth); + Known = Known.sext(BitWidth); break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.trunc(InBits); - KnownOne = KnownOne.trunc(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known = Known.trunc(InBits); + computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - KnownZero = KnownZero.zext(InBits); - KnownOne = KnownOne.zext(InBits); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known = Known.zext(InBits); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + Known = Known.trunc(BitWidth); break; } case ISD::AssertZext: { EVT VT = cast(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - KnownZero |= (~InMask); - KnownOne &= (~KnownZero); + computeKnownBits(Op.getOperand(0), Known, Depth+1); + Known.Zero |= (~InMask); + Known.One &= (~Known.Zero); break; } case ISD::FGETSIGN: // All bits are zero except the low bit. - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; case ISD::USUBO: case ISD::SSUBO: @@ -2491,7 +2476,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } LLVM_FALLTHROUGH; @@ -2505,16 +2490,16 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is // from [0-C]. - if ((KnownZero2 & MaskV) == MaskV) { + if ((Known2.Zero & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero.setHighBits(NLZ2); + Known.Zero.setHighBits(NLZ2); } } } @@ -2522,27 +2507,25 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // If low bits are know to be zero in both operands, then we know they are // going to be 0 in the result. Both addition and complement operations // preserve the low zero bits. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); if (KnownZeroLow == 0) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZeroLow = std::min(KnownZeroLow, - KnownZero2.countTrailingOnes()); - KnownZero.setBits(0, KnownZeroLow); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); break; } case ISD::UADDO: case ISD::SADDO: + case ISD::ADDCARRY: if (Op.getResNo() == 1) { // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } LLVM_FALLTHROUGH; @@ -2556,31 +2539,28 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, // known to be clear. For example, if one input has the top 10 bits clear // and the other has the top 8 bits clear, we know the top 7 bits of the // output must be clear. - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - unsigned KnownZeroHigh = KnownZero2.countLeadingOnes(); - unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); - KnownZeroHigh = std::min(KnownZeroHigh, - KnownZero2.countLeadingOnes()); - KnownZeroLow = std::min(KnownZeroLow, - KnownZero2.countTrailingOnes()); - - if (Opcode == ISD::ADDE) { - // With ADDE, a carry bit may be added in, so we can only use this - // information if we know (at least) that the low two bits are clear. - // We then return to the caller that the low bit is unknown but that - // other bits are known zero. + KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + + if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) { + // With ADDE and ADDCARRY, a carry bit may be added in, so we can only + // use this information if we know (at least) that the low two bits are + // clear. We then return to the caller that the low bit is unknown but + // that other bits are known zero. if (KnownZeroLow >= 2) - KnownZero.setBits(1, KnownZeroLow); + Known.Zero.setBits(1, KnownZeroLow); break; } - KnownZero.setLowBits(KnownZeroLow); + Known.Zero.setLowBits(KnownZeroLow); if (KnownZeroHigh > 1) - KnownZero.setHighBits(KnownZeroHigh - 1); + Known.Zero.setHighBits(KnownZeroHigh - 1); break; } case ISD::SREM: @@ -2588,23 +2568,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // The low bits of the first operand are unchanged by the srem. - KnownZero = KnownZero2 & LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero & LowBits; + Known.One = Known2.One & LowBits; // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) - KnownOne |= ~LowBits; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + Known.One |= ~LowBits; + assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } } break; @@ -2613,42 +2592,37 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // The upper bits are all zero, the lower ones are unchanged. - KnownZero = KnownZero2 | ~LowBits; - KnownOne = KnownOne2 & LowBits; + Known.Zero = Known2.Zero | ~LowBits; + Known.One = Known2.One & LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); - uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), - KnownZero2.countLeadingOnes()); - KnownOne.clearAllBits(); - KnownZero.clearAllBits(); - KnownZero.setHighBits(Leaders); + uint32_t Leaders = + std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); + Known.resetAll(); + Known.Zero.setHighBits(Leaders); break; } case ISD::EXTRACT_ELEMENT: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), Known, Depth+1); const unsigned Index = Op.getConstantOperandVal(1); const unsigned BitWidth = Op.getValueSizeInBits(); // Remove low part of known bits mask - KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); - KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth); + Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth); // Remove high part of known bit mask - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known = Known.trunc(BitWidth); break; } case ISD::EXTRACT_VECTOR_ELT: { @@ -2660,24 +2634,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const unsigned NumSrcElts = VecVT.getVectorNumElements(); // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. - if (BitWidth > EltBitWidth) { - KnownZero = KnownZero.trunc(EltBitWidth); - KnownOne = KnownOne.trunc(EltBitWidth); - } + if (BitWidth > EltBitWidth) + Known = Known.trunc(EltBitWidth); ConstantSDNode *ConstEltNo = dyn_cast(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { // If we know the element index, just demand that vector element. unsigned Idx = ConstEltNo->getZExtValue(); APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1); + computeKnownBits(InVec, Known, DemandedElt, Depth + 1); } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); - } - if (BitWidth > EltBitWidth) { - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + computeKnownBits(InVec, Known, Depth + 1); } + if (BitWidth > EltBitWidth) + Known = Known.zext(BitWidth); break; } case ISD::INSERT_VECTOR_ELT: { @@ -2689,117 +2659,110 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { // If we know the element index, split the demand between the // source vector and the inserted element. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); unsigned EltIdx = CEltNo->getZExtValue(); // If we demand the inserted element then add its common known bits. if (DemandedElts[EltIdx]) { - computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); - KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); - KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + computeKnownBits(InVal, Known2, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } // If we demand the source vector then add its common known bits, ensuring // that we don't demand the inserted element. APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); if (!!VectorElts) { - computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1); - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + computeKnownBits(InVec, Known2, VectorElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } } else { // Unknown element index, so ignore DemandedElts and demand them all. - computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1); - computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1); - KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth()); - KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());; + computeKnownBits(InVec, Known, Depth + 1); + computeKnownBits(InVal, Known2, Depth + 1); + Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); + Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); } break; } case ISD::BITREVERSE: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero = KnownZero2.reverseBits(); - KnownOne = KnownOne2.reverseBits(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.reverseBits(); + Known.One = Known2.One.reverseBits(); break; } case ISD::BSWAP: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero = KnownZero2.byteSwap(); - KnownOne = KnownOne2.byteSwap(); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); + Known.Zero = Known2.Zero.byteSwap(); + Known.One = Known2.One.byteSwap(); break; } case ISD::ABS: { - computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known2, DemandedElts, Depth + 1); // If the source's MSB is zero then we know the rest of the bits already. - if (KnownZero2[BitWidth - 1]) { - KnownZero = KnownZero2; - KnownOne = KnownOne2; + if (Known2.isNonNegative()) { + Known.Zero = Known2.Zero; + Known.One = Known2.One; break; } // We only know that the absolute values's MSB will be zero iff there is // a set bit that isn't the sign bit (otherwise it could be INT_MIN). - KnownOne2.clearBit(BitWidth - 1); - if (KnownOne2.getBoolValue()) { - KnownZero = APInt::getSignBit(BitWidth); + Known2.One.clearSignBit(); + if (Known2.One.getBoolValue()) { + Known.Zero = APInt::getSignMask(BitWidth); break; } break; } case ISD::UMIN: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // UMIN - we know that the result will have the maximum of the // known zero leading bits of the inputs. - unsigned LeadZero = KnownZero.countLeadingOnes(); - LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes()); + unsigned LeadZero = Known.countMinLeadingZeros(); + LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros()); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - KnownZero.setHighBits(LeadZero); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.Zero.setHighBits(LeadZero); break; } case ISD::UMAX: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, - Depth + 1); - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); // UMAX - we know that the result will have the maximum of the // known one leading bits of the inputs. - unsigned LeadOne = KnownOne.countLeadingOnes(); - LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes()); + unsigned LeadOne = Known.countMinLeadingOnes(); + LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes()); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - KnownOne.setHighBits(LeadOne); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + Known.One.setHighBits(LeadOne); break; } case ISD::SMIN: case ISD::SMAX: { - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If we don't know any bits, early out. - if (!KnownOne && !KnownZero) + if (!Known.One && !Known.Zero) break; - computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, - Depth + 1); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; break; } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { // The low bits are known zero if the pointer is aligned. - KnownZero.setLowBits(Log2_32(Align)); + Known.Zero.setLowBits(Log2_32(Align)); break; } break; @@ -2812,12 +2775,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, DemandedElts, - *this, Depth); + TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); break; } - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); } SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, @@ -2826,28 +2788,28 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, if (isNullConstant(N1)) return OFK_Never; - APInt N1Zero, N1One; - computeKnownBits(N1, N1Zero, N1One); - if (N1Zero.getBoolValue()) { - APInt N0Zero, N0One; - computeKnownBits(N0, N0Zero, N0One); + KnownBits N1Known; + computeKnownBits(N1, N1Known); + if (N1Known.Zero.getBoolValue()) { + KnownBits N0Known; + computeKnownBits(N0, N0Known); bool overflow; - (~N0Zero).uadd_ov(~N1Zero, overflow); + (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); if (!overflow) return OFK_Never; } // mulhi + 1 never overflow if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && - (~N1Zero & 0x01) == ~N1Zero) + (~N1Known.Zero & 0x01) == ~N1Known.Zero) return OFK_Never; if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { - APInt N0Zero, N0One; - computeKnownBits(N0, N0Zero, N0One); + KnownBits N0Known; + computeKnownBits(N0, N0Known); - if ((~N0Zero & 0x01) == ~N0Zero) + if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) return OFK_Never; } @@ -2865,7 +2827,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // A left-shift of a constant one will have exactly one bit set because // shifting the bit off the end is undefined. if (Val.getOpcode() == ISD::SHL) { - auto *C = dyn_cast(Val.getOperand(0)); + auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue() == 1) return true; } @@ -2873,8 +2835,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // Similarly, a logical right-shift of a constant sign-bit will have exactly // one bit set. if (Val.getOpcode() == ISD::SRL) { - auto *C = dyn_cast(Val.getOperand(0)); - if (C && C->getAPIntValue().isSignBit()) + auto *C = isConstOrConstSplat(Val.getOperand(0)); + if (C && C->getAPIntValue().isSignMask()) return true; } @@ -2891,10 +2853,9 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // to handle some common cases. // Fall back to computeKnownBits to catch other known cases. - APInt KnownZero, KnownOne; - computeKnownBits(Val, KnownZero, KnownOne); - return (KnownZero.countPopulation() == BitWidth - 1) && - (KnownOne.countPopulation() == 1); + KnownBits Known; + computeKnownBits(Val, Known); + return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1); } unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { @@ -2910,6 +2871,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarSizeInBits(); + unsigned NumElts = DemandedElts.getBitWidth(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; @@ -2953,6 +2915,39 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } return Tmp; + case ISD::VECTOR_SHUFFLE: { + // Collect the minimum number of sign bits that are shared by every vector + // element referenced by the shuffle. + APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + const ShuffleVectorSDNode *SVN = cast(Op); + assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + int M = SVN->getMaskElt(i); + if (!DemandedElts[i]) + continue; + // For UNDEF elements, we don't know anything about the common state of + // the shuffle result. + if (M < 0) + return 1; + if ((unsigned)M < NumElts) + DemandedLHS.setBit((unsigned)M % NumElts); + else + DemandedRHS.setBit((unsigned)M % NumElts); + } + Tmp = std::numeric_limits::max(); + if (!!DemandedLHS) + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); + if (!!DemandedRHS) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + // If we don't know anything, early out and try computeKnownBits fall-back. + if (Tmp == 1) + break; + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } + case ISD::SIGN_EXTEND: case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); @@ -3064,17 +3059,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Special case decrementing a value (ADD X, -1): if (ConstantSDNode *CRHS = dyn_cast(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { - APInt KnownZero, KnownOne; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownBits Known; + computeKnownBits(Op.getOperand(0), Known, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry // out of the result. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp; } @@ -3089,16 +3084,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Handle NEG. if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) if (CLHS->isNullValue()) { - APInt KnownZero, KnownOne; - computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + KnownBits Known; + computeKnownBits(Op.getOperand(1), Known, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. - if (KnownZero.isNegative()) + if (Known.isNonNegative()) return Tmp2; // Otherwise, we treat this like a SUB. @@ -3143,9 +3138,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned EltIdx = CEltNo->getZExtValue(); // If we demand the inserted element then get its sign bits. - Tmp = UINT_MAX; - if (DemandedElts[EltIdx]) + Tmp = std::numeric_limits::max(); + if (DemandedElts[EltIdx]) { + // TODO - handle implicit truncation of inserted elements. + if (InVal.getScalarValueSizeInBits() != VTBits) + break; Tmp = ComputeNumSignBits(InVal, Depth + 1); + } // If we demand the source vector then get its sign bits, and determine // the minimum. @@ -3188,14 +3187,36 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } - case ISD::EXTRACT_SUBVECTOR: - return ComputeNumSignBits(Op.getOperand(0), Depth + 1); + case ISD::EXTRACT_SUBVECTOR: { + // If we know the element index, just demand that subvector elements, + // otherwise demand them all. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + } + return ComputeNumSignBits(Src, Depth + 1); + } case ISD::CONCAT_VECTORS: - // Determine the minimum number of sign bits across all input vectors. - // Early out if the result is already 1. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); - for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) - Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1)); + // Determine the minimum number of sign bits across all demanded + // elts of the input vectors. Early out if the result is already 1. + Tmp = std::numeric_limits::max(); + EVT SubVectorVT = Op.getOperand(0).getValueType(); + unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); + unsigned NumSubVectors = Op.getNumOperands(); + for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1); ++i) { + APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); + DemandedSub = DemandedSub.trunc(NumSubVectorElts); + if (!DemandedSub) + continue; + Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -3229,14 +3250,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. - APInt KnownZero, KnownOne; - computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); + KnownBits Known; + computeKnownBits(Op, Known, DemandedElts, Depth); APInt Mask; - if (KnownZero.isNegative()) { // sign bit is 0 - Mask = KnownZero; - } else if (KnownOne.isNegative()) { // sign bit is 1; - Mask = KnownOne; + if (Known.isNonNegative()) { // sign bit is 0 + Mask = Known.Zero; + } else if (Known.isNegative()) { // sign bit is 1; + Mask = Known.One; } else { // Nothing known. return FirstAnswer; @@ -3269,8 +3290,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { if (getTarget().Options.NoNaNsFPMath) return true; - if (const BinaryWithFlagsSDNode *BF = dyn_cast(Op)) - return BF->Flags.hasNoNaNs(); + if (Op->getFlags().hasNoNaNs()) + return true; // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast(Op)) @@ -3314,16 +3335,15 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); - APInt AZero, AOne; - APInt BZero, BOne; - computeKnownBits(A, AZero, AOne); - computeKnownBits(B, BZero, BOne); - return (AZero | BZero).isAllOnesValue(); + KnownBits AKnown, BKnown; + computeKnownBits(A, AKnown); + computeKnownBits(B, BKnown); + return (AKnown.Zero | BKnown.Zero).isAllOnesValue(); } static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ArrayRef Ops, - llvm::SelectionDAG &DAG) { + SelectionDAG &DAG) { assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); assert(llvm::all_of(Ops, [Ops](SDValue Op) { @@ -3387,7 +3407,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue Operand) { + SDValue Operand, const SDNodeFlags Flags) { // Constant fold unary operations with an integer constant operand. Even // opaque constant will be folded, because the folding of unary operations // doesn't create new constants with different values. Nevertheless, the @@ -3590,7 +3610,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Operand.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) - return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); else if (OpOpcode == ISD::UNDEF) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, DL, VT); @@ -3606,8 +3626,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Operand.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) - return getNode(ISD::ZERO_EXTEND, DL, VT, - Operand.getNode()->getOperand(0)); + return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0)); else if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. return getConstant(0, DL, VT); @@ -3626,13 +3645,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) - return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); else if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); // (ext (trunx x)) -> x if (OpOpcode == ISD::TRUNCATE) { - SDValue OpOp = Operand.getNode()->getOperand(0); + SDValue OpOp = Operand.getOperand(0); if (OpOp.getValueType() == VT) return OpOp; } @@ -3648,16 +3667,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Operand.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); if (OpOpcode == ISD::TRUNCATE) - return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ANY_EXTEND) { // If the source is smaller than the dest, we still need an extend. - if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + if (Operand.getOperand(0).getValueType().getScalarType() .bitsLT(VT.getScalarType())) - return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); - if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) - return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); - return Operand.getNode()->getOperand(0); + return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); + if (Operand.getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); + return Operand.getOperand(0); } if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); @@ -3712,15 +3731,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? - return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), - Operand.getNode()->getOperand(0), - &cast(Operand.getNode())->Flags); + return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), + Operand.getOperand(0), Operand.getNode()->getFlags()); if (OpOpcode == ISD::FNEG) // --X -> X - return Operand.getNode()->getOperand(0); + return Operand.getOperand(0); break; case ISD::FABS: if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) - return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); break; } @@ -3731,10 +3749,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = newSDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { @@ -3831,8 +3852,9 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef Ops) { return true; return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && - any_of(Divisor->op_values(), - [](SDValue V) { return V.isUndef() || isNullConstant(V); }); + llvm::any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || + isNullConstant(V); }); // TODO: Handle signed overflow. } // TODO: Handle oversized shifts. @@ -3866,7 +3888,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // fold (add Sym, c) -> Sym+c if (GlobalAddressSDNode *GA = dyn_cast(Cst1)) return FoldSymbolOffset(Opcode, VT, GA, Cst2); - if (isCommutativeBinOp(Opcode)) + if (TLI->isCommutativeBinOp(Opcode)) if (GlobalAddressSDNode *GA = dyn_cast(Cst2)) return FoldSymbolOffset(Opcode, VT, GA, Cst1); @@ -3913,7 +3935,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef Ops, - const SDNodeFlags *Flags) { + const SDNodeFlags Flags) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. @@ -3943,8 +3965,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // All operands must be vector types with the same number of elements as // the result type and must be either UNDEF or a build vector of constant // or UNDEF scalars. - if (!all_of(Ops, IsConstantBuildVectorOrUndef) || - !all_of(Ops, IsScalarOrSameVectorSize)) + if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) || + !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); // If we are comparing vectors, then the result needs to be a i1 boolean @@ -4005,15 +4027,14 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue N1, SDValue N2, - const SDNodeFlags *Flags) { + SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); ConstantFPSDNode *N1CFP = dyn_cast(N1); ConstantFPSDNode *N2CFP = dyn_cast(N2); // Canonicalize constant to RHS if commutative. - if (isCommutativeBinOp(Opcode)) { + if (TLI->isCommutativeBinOp(Opcode)) { if (N1C && !N2C) { std::swap(N1C, N2C); std::swap(N1, N2); @@ -4191,7 +4212,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { unsigned FromBits = EVT.getScalarSizeInBits(); Val <<= Val.getBitWidth() - FromBits; - Val = Val.ashr(Val.getBitWidth() - FromBits); + Val.ashrInPlace(Val.getBitWidth() - FromBits); return getConstant(Val, DL, ConstantVT); }; @@ -4397,7 +4418,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { - if (isCommutativeBinOp(Opcode)) { + if (TLI->isCommutativeBinOp(Opcode)) { std::swap(N1, N2); } else { switch (Opcode) { @@ -4473,21 +4494,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Memoize this node if possible. SDNode *N; SDVTList VTs = getVTList(VT); + SDValue Ops[] = {N1, N2}; if (VT != MVT::Glue) { - SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { - if (Flags) - E->intersectFlagsWith(Flags); + E->intersectFlagsWith(Flags); return SDValue(E, 0); } - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + N = newSDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); + createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + N = newSDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + createOperands(N, Ops); } InsertNode(N); @@ -4679,9 +4702,10 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, - const TargetLowering &TLI, StringRef Str) { + const TargetLowering &TLI, + const ConstantDataArraySlice &Slice) { // Handle vector with all elements zero. - if (Str.empty()) { + if (Slice.Array == nullptr) { if (VT.isInteger()) return DAG.getConstant(0, dl, VT); else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) @@ -4700,15 +4724,15 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, assert(!VT.isVector() && "Can't handle vector type here!"); unsigned NumVTBits = VT.getSizeInBits(); unsigned NumVTBytes = NumVTBits / 8; - unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length)); APInt Val(NumVTBits, 0); if (DAG.getDataLayout().isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) - Val |= (uint64_t)(unsigned char)Str[i] << i*8; + Val |= (uint64_t)(unsigned char)Slice[i] << i*8; } else { for (unsigned i = 0; i != NumBytes; ++i) - Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; + Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8; } // If the "cost" of materializing the integer immediate is less than the cost @@ -4725,9 +4749,8 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); } -/// isMemSrcFromString - Returns true if memcpy source is a string constant. -/// -static bool isMemSrcFromString(SDValue Src, StringRef &Str) { +/// Returns true if memcpy source is constant data. +static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { uint64_t SrcDelta = 0; GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) @@ -4741,8 +4764,8 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { if (!G) return false; - return getConstantStringInfo(G->getGlobal(), Str, - SrcDelta + G->getOffset(), false); + return getConstantDataArrayInfo(G->getGlobal(), Slice, 8, + SrcDelta + G->getOffset()); } /// Determines the optimal series of memory ops to replace the memset / memcpy. @@ -4773,23 +4796,23 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { - VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); - } else { - switch (DstAlign & 7) { - case 0: VT = MVT::i64; break; - case 4: VT = MVT::i32; break; - case 2: VT = MVT::i16; break; - default: VT = MVT::i8; break; - } - } - + // Use the largest integer type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + VT = MVT::i64; + while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && + !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + assert(VT.isInteger()); + + // Find the largest legal integer type. MVT LVT = MVT::i64; while (!TLI.isTypeLegal(LVT)) LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); + // If the type we've chosen is larger than the largest legal integer type + // then use that instead. if (VT.bitsGT(LVT)) VT = LVT; } @@ -4885,15 +4908,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; - StringRef Str; - bool CopyFromStr = isMemSrcFromString(Src, Str); - bool isZeroStr = CopyFromStr && Str.empty(); + ConstantDataArraySlice Slice; + bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); + bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - (isZeroStr ? 0 : SrcAlign), - false, false, CopyFromStr, true, + (isZeroConstant ? 0 : SrcAlign), + false, false, CopyFromConstant, true, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), DAG, TLI)) @@ -4937,18 +4960,29 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, DstOff -= VTSize - Size; } - if (CopyFromStr && - (isZeroStr || (VT.isInteger() && !VT.isVector()))) { + if (CopyFromConstant && + (isZeroConstant || (VT.isInteger() && !VT.isVector()))) { // It's unlikely a store of a vector immediate can be done in a single // instruction. It would require a load from a constantpool first. // We only handle zero vectors here. // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. - Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); + ConstantDataArraySlice SubSlice; + if (SrcOff < Slice.Length) { + SubSlice = Slice; + SubSlice.move(SrcOff); + } else { + // This is an out-of-bounds access and hence UB. Pretend we read zero. + SubSlice.Array = nullptr; + SubSlice.Offset = 0; + SubSlice.Length = VTSize; + } + Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) Store = DAG.getStore(Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Align, + MMOFlags); } if (!Store.getNode()) { @@ -5533,7 +5567,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, Opcode == ISD::PREFETCH || Opcode == ISD::LIFETIME_START || Opcode == ISD::LIFETIME_END || - (Opcode <= INT_MAX && + ((int)Opcode <= std::numeric_limits::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -5867,7 +5901,6 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::LoadExtType ExtTy, bool isExpanding) { - SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; FoldingSetNodeID ID; @@ -6009,11 +6042,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef Ops, const SDNodeFlags *Flags) { + ArrayRef Ops, const SDNodeFlags Flags) { unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); - case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 1: return getNode(Opcode, DL, VT, Ops[0], Flags); case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; @@ -6021,13 +6054,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, switch (Opcode) { default: break; - case ISD::CONCAT_VECTORS: { + case ISD::CONCAT_VECTORS: // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) return V; break; - } - case ISD::SELECT_CC: { + case ISD::SELECT_CC: assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); assert(Ops[0].getValueType() == Ops[1].getValueType() && "LHS and RHS of condition must have same type!"); @@ -6036,14 +6068,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); break; - } - case ISD::BR_CC: { + case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); assert(Ops[2].getValueType() == Ops[3].getValueType() && "LHS/RHS of comparison should match types!"); break; } - } // Memoize nodes. SDNode *N; @@ -6525,6 +6555,62 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, return N; } +SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { + unsigned OrigOpc = Node->getOpcode(); + unsigned NewOpc; + bool IsUnary = false; + switch (OrigOpc) { + default: + llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); + case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; + case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; + case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break; + case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break; + case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break; + case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break; + case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break; + case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break; + case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break; + case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break; + case ISD::STRICT_FNEARBYINT: + NewOpc = ISD::FNEARBYINT; + IsUnary = true; + break; + } + + // We're taking this node out of the chain, so we need to re-link things. + SDValue InputChain = Node->getOperand(0); + SDValue OutputChain = SDValue(Node, 1); + ReplaceAllUsesOfValueWith(OutputChain, InputChain); + + SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); + SDNode *Res = nullptr; + if (IsUnary) + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); + else + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), + Node->getOperand(2) }); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } else { + ReplaceAllUsesWith(Node, Res); + RemoveDeadNode(Node); + } + + return Res; +} /// getMachineNode - These are used for target selectors to create a new node /// with specified return type(s), MachineInstr opcode, and operands. @@ -6671,14 +6757,13 @@ SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef Ops, - const SDNodeFlags *Flags) { + const SDNodeFlags Flags) { if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { - if (Flags) - E->intersectFlagsWith(Flags); + E->intersectFlagsWith(Flags); return E; } } @@ -6739,7 +6824,7 @@ public: : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} }; -} +} // end anonymous namespace /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. @@ -6785,7 +6870,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { AddModifiedNodeToCSEMaps(User); } - // If we just RAUW'd the root, take note. if (FromN == getRoot()) setRoot(To); @@ -6955,6 +7039,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ } namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith /// to record information about a use. struct UseMemo { @@ -6967,7 +7052,8 @@ namespace { bool operator<(const UseMemo &L, const UseMemo &R) { return (intptr_t)L.User < (intptr_t)R.User; } -} + +} // end anonymous namespace /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value @@ -7033,7 +7119,6 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, /// based on their topological order. It returns the maximum id and a vector /// of the SDNodes* in assigned order by reference. unsigned SelectionDAG::AssignTopologicalOrder() { - unsigned DAGSize = 0; // SortedPos tracks the progress of the algorithm. Nodes before it are @@ -7159,6 +7244,24 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { AddDbgValue(I, ToNode, false); } +void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { + assert(isa(NewMemOp.getNode()) && "Expected a memop node"); + if (!OldLoad->hasAnyUseOfValue(1)) + return; + + // The new memory operation must have the same position as the old load in + // terms of memory dependency. Create a TokenFactor for the old load and new + // memory operation and update uses of the old load's output chain to use that + // TokenFactor. + SDValue OldChain = SDValue(OldLoad, 1); + SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + SDValue TokenFactor = + getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); + ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// @@ -7260,6 +7363,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const { } namespace { + struct EVTArray { std::vector VTs; @@ -7269,11 +7373,12 @@ namespace { VTs.push_back(MVT((MVT::SimpleValueType)i)); } }; -} -static ManagedStatic > EVTs; +} // end anonymous namespace + +static ManagedStatic> EVTs; static ManagedStatic SimpleVTArray; -static ManagedStatic > VTMutex; +static ManagedStatic> VTMutex; /// getValueTypeList - Return a pointer to the specified value type. /// @@ -7307,7 +7412,6 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { return NUses == 0; } - /// hasAnyUseOfValue - Return true if there are any use of the indicated /// value. This method ignores uses of other values defined by this operation. bool SDNode::hasAnyUseOfValue(unsigned Value) const { @@ -7320,9 +7424,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const { return false; } - /// isOnlyUserOf - Return true if this node is the only use of N. -/// bool SDNode::isOnlyUserOf(const SDNode *N) const { bool Seen = false; for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { @@ -7352,7 +7454,6 @@ bool SDNode::areOnlyUsersOf(ArrayRef Nodes, const SDNode *N) { } /// isOperand - Return true if this node is an operand of N. -/// bool SDValue::isOperandOf(const SDNode *N) const { for (const SDValue &Op : N->op_values()) if (*this == Op) @@ -7402,7 +7503,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, } // Next, try a deep search: check whether every operand of the TokenFactor // reaches Dest. - return all_of((*this)->ops(), [=](SDValue Op) { + return llvm::all_of((*this)->ops(), [=](SDValue Op) { return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); }); } @@ -7422,15 +7523,8 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -const SDNodeFlags *SDNode::getFlags() const { - if (auto *FlagsNode = dyn_cast(this)) - return &FlagsNode->Flags; - return nullptr; -} - -void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) { - if (auto *FlagsNode = dyn_cast(this)) - FlagsNode->Flags.intersectWith(Flags); +void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { + this->Flags.intersectWith(Flags); } SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { @@ -7561,7 +7655,6 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } - /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if /// it cannot be inferred. unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { @@ -7570,10 +7663,9 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::computeKnownBits(const_cast(GV), KnownZero, KnownOne, - getDataLayout()); - unsigned AlignBits = KnownZero.countTrailingOnes(); + KnownBits Known(PtrWidth); + llvm::computeKnownBits(GV, Known, getDataLayout()); + unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) return MinAlign(Align, GVOffset); @@ -7607,14 +7699,11 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { std::pair SelectionDAG::GetSplitDestVTs(const EVT &VT) const { // Currently all types are split in half. EVT LoVT, HiVT; - if (!VT.isVector()) { + if (!VT.isVector()) LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); - } else { - unsigned NumElements = VT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), - NumElements/2); - } + else + LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext()); + return std::make_pair(LoVT, HiVT); } @@ -7656,59 +7745,58 @@ unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); } - Type *ConstantPoolSDNode::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); return Val.ConstVal->getType(); } -bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, - APInt &SplatUndef, +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits, - bool isBigEndian) const { + bool IsBigEndian) const { EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); - unsigned sz = VT.getSizeInBits(); - if (MinSplatBits > sz) + unsigned VecWidth = VT.getSizeInBits(); + if (MinSplatBits > VecWidth) return false; - SplatValue = APInt(sz, 0); - SplatUndef = APInt(sz, 0); + // FIXME: The widths are based on this node's type, but build vectors can + // truncate their operands. + SplatValue = APInt(VecWidth, 0); + SplatUndef = APInt(VecWidth, 0); - // Get the bits. Bits with undefined values (when the corresponding element + // Get the bits. Bits with undefined values (when the corresponding element // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared - // in SplatValue. If any of the values are not constant, give up and return + // in SplatValue. If any of the values are not constant, give up and return // false. - unsigned int nOps = getNumOperands(); - assert(nOps > 0 && "isConstantSplat has 0-size build vector"); - unsigned EltBitSize = VT.getScalarSizeInBits(); + unsigned int NumOps = getNumOperands(); + assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltWidth = VT.getScalarSizeInBits(); - for (unsigned j = 0; j < nOps; ++j) { - unsigned i = isBigEndian ? nOps-1-j : j; + for (unsigned j = 0; j < NumOps; ++j) { + unsigned i = IsBigEndian ? NumOps - 1 - j : j; SDValue OpVal = getOperand(i); - unsigned BitPos = j * EltBitSize; + unsigned BitPos = j * EltWidth; if (OpVal.isUndef()) - SplatUndef.setBits(BitPos, BitPos + EltBitSize); - else if (ConstantSDNode *CN = dyn_cast(OpVal)) - SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize), - BitPos); - else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) + SplatUndef.setBits(BitPos, BitPos + EltWidth); + else if (auto *CN = dyn_cast(OpVal)) + SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); + else if (auto *CN = dyn_cast(OpVal)) SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); else return false; } - // The build_vector is all constants or undefs. Find the smallest element + // The build_vector is all constants or undefs. Find the smallest element // size that splats the vector. - HasAnyUndefs = (SplatUndef != 0); - while (sz > 8) { - unsigned HalfSize = sz / 2; + // FIXME: This does not work for vectors with elements less than 8 bits. + while (VecWidth > 8) { + unsigned HalfSize = VecWidth / 2; APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); APInt LowValue = SplatValue.trunc(HalfSize); APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); @@ -7722,10 +7810,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SplatValue = HighValue | LowValue; SplatUndef = HighUndef & LowUndef; - sz = HalfSize; + VecWidth = HalfSize; } - SplatBitSize = sz; + SplatBitSize = VecWidth; return true; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 315d841cf3cb8057daab5aeede1172abf3a10213..f9f431db55be3f3a8d3c6976597290bab5eda2ec 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -83,20 +83,6 @@ LimitFPPrecision("limit-float-precision", "for some float libcalls"), cl::location(LimitFloatPrecision), cl::init(0)); - -/// Minimum jump table density for normal functions. -static cl::opt -JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "a normal function")); - -/// Minimum jump table density for -Os or -Oz functions. -static cl::opt -OptsizeJumpTableDensity("optsize-jump-table-density", cl::init(40), cl::Hidden, - cl::desc("Minimum density for building a jump table in " - "an optsize function")); - - // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -115,7 +101,8 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V); + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -125,10 +112,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional AssertOp = None) { + Optional AssertOp = None, + bool IsABIRegCopy = false) { if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, - PartVT, ValueVT, V); + PartVT, ValueVT, V, IsABIRegCopy); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -272,7 +260,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V) { + MVT PartVT, EVT ValueVT, const Value *V, + bool IsABIRegCopy) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -283,9 +272,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = - TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -314,9 +312,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + (IntermediateVT.isVector() + ? IntermediateVT.getVectorNumElements() * NumParts + : NumIntermediates)); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, ValueVT, Ops); + DL, BuiltVectorTy, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -355,23 +358,40 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // Handle cases such as i8 -> <1 x i1> if (ValueVT.getVectorNumElements() != 1) { - diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, - "non-trivial scalar-to-vector conversion"); - return DAG.getUNDEF(ValueVT); + // Certain ABIs require that vectors are passed as integers. For vectors + // are the same size, this is an obvious bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + // Bitcast Val back the original type and extract the corresponding + // vector we want. + unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); + EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), + ValueVT.getVectorElementType(), Elts); + Val = DAG.getBitcast(WiderVecType, Val); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + diagnosePossiblyInvalidConstraint( + *DAG.getContext(), V, "non-trivial scalar-to-vector conversion"); + return DAG.getUNDEF(ValueVT); } - if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartEVT) - Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); + // Handle cases such as i8 -> <1 x i1> + EVT ValueSVT = ValueVT.getVectorElementType(); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) + Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); - return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); + return DAG.getBuildVector(ValueVT, DL, Val); } static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V); + MVT PartVT, const Value *V, bool IsABIRegCopy); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for @@ -379,12 +399,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + ISD::NodeType ExtendKind = ISD::ANY_EXTEND, + bool IsABIRegCopy = false) { EVT ValueVT = Val.getValueType(); // Handle the vector case separately. if (ValueVT.isVector()) - return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, + IsABIRegCopy); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; @@ -509,7 +531,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, - MVT PartVT, const Value *V) { + MVT PartVT, const Value *V, + bool IsABIRegCopy) { + EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -537,7 +561,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); + Val = DAG.getBuildVector(PartVT, DL, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -550,17 +574,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); - } else{ - // Vector -> scalar conversion. - assert(ValueVT.getVectorNumElements() == 1 && - "Only trivial vector-to-scalar conversions should get here!"); - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } else { + if (ValueVT.getVectorNumElements() == 1) { + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } else { + assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + "lossy conversion of vector to scalar type"); + EVT IntermediateType = + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getBitcast(IntermediateType, Val); + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } } + assert(Val.getValueType() == PartVT && "Unexpected vector part value type"); Parts[0] = Val; return; } @@ -569,15 +599,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT IntermediateVT; MVT RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, - IntermediateVT, - NumIntermediates, RegisterVT); + unsigned NumRegs; + if (IsABIRegCopy) { + NumRegs = TLI.getVectorTypeBreakdownForCallingConv( + *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); + } else { + NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + } unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + // Convert the vector to the appropiate type if necessary. + unsigned DestVectorNoElts = + NumIntermediates * + (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1); + EVT BuiltVectorTy = EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + if (Val.getValueType() != BuiltVectorTy) + Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { @@ -610,22 +656,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue() {} +RegsForValue::RegsForValue() { IsABIMangled = false; } RegsForValue::RegsForValue(const SmallVector ®s, MVT regvt, - EVT valuevt) - : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + EVT valuevt, bool IsABIMangledValue) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), + RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty) { + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); + IsABIMangled = IsABIMangledValue; + for (EVT ValueVT : ValueVTs) { - unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); - MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + unsigned NumRegs = IsABIMangledValue + ? TLI.getNumRegistersForCallingConv(Context, ValueVT) + : TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = IsABIMangledValue + ? TLI.getRegisterTypeForCallingConv(Context, ValueVT) + : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); + RegCount.push_back(NumRegs); Reg += NumRegs; } } @@ -646,8 +701,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = RegCount[Value]; + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -675,7 +732,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned RegSize = RegisterVT.getSizeInBits(); unsigned NumSignBits = LOI->NumSignBits; - unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + unsigned NumZeroBits = LOI->Known.countMinLeadingZeros(); if (NumZeroBits == RegSize) { // The current value is a zero. @@ -742,9 +799,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - MVT RegisterVT = RegVTs[Value]; + unsigned NumParts = RegCount[Value]; + + MVT RegisterVT = IsABIMangled + ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -825,9 +884,9 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, const TargetLibraryInfo *li) { - AA = &aa; + AA = aa; GFI = gfi; LibInfo = li; DL = &DAG.getDataLayout(); @@ -967,10 +1026,16 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), InReg, Ty); + DAG.getDataLayout(), InReg, Ty, IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); - Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, + V); resolveDanglingDebugInfo(V, Result); } @@ -1088,8 +1153,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa(CDS->getType())) return DAG.getMergeValues(Ops, getCurSDLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1141,7 +1205,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -1151,14 +1215,19 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFrameIndexTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType()); + Inst->getType(), IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1350,7 +1419,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], getCurSDLoc()), - &Flags); + Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. @@ -1386,12 +1455,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI.getNumRegisters(Context, VT); - MVT PartVT = TLI.getRegisterType(Context, VT); + unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT); + MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), - &Parts[0], NumParts, PartVT, &I, ExtendKind); + &Parts[0], NumParts, PartVT, &I, ExtendKind, true); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1427,9 +1496,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. - OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg( - FuncInfo.MBB, FuncInfo.SwiftErrorArg), - EVT(TLI.getPointerTy(DL)))); + OutVals.push_back( + DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( + &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); @@ -2590,7 +2660,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, &Flags); + Op1, Op2, Flags); setValue(&I, BinNodeValue); } @@ -2643,7 +2713,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - &Flags); + Flags); setValue(&I, Res); } @@ -2655,7 +2725,7 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { Flags.setExact(isa(&I) && cast(&I)->isExact()); setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, - Op2, &Flags)); + Op2, Flags)); } void SelectionDAGBuilder::visitICmp(const User &I) { @@ -3147,7 +3217,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops)); + setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3267,7 +3337,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Flags.setNoUnsignedWrap(true); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType()), &Flags); + DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { MVT PtrTy = @@ -3297,7 +3367,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Offs.isNonNegative() && cast(I).isInBounds()) Flags.setNoUnsignedWrap(true); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } @@ -3375,7 +3445,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); + DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3438,7 +3508,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile || NumValues > MaxParallelChains) // Serialize volatile loads with other side effects. Root = getRoot(); - else if (AA->pointsToConstantMemory(MemoryLocation( + else if (AA && AA->pointsToConstantMemory(MemoryLocation( SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); @@ -3479,7 +3549,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), - &Flags); + Flags); auto MMOFlags = MachineMemOperand::MONone; if (isVolatile) MMOFlags |= MachineMemOperand::MOVolatile; @@ -3512,8 +3582,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - assert(TLI.supportSwiftError() && + assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector ValueVTs; @@ -3526,15 +3595,15 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - auto &DL = DAG.getDataLayout(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3550,8 +3619,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { Type *Ty = I.getType(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - assert(!AA->pointsToConstantMemory(MemoryLocation( - SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo)) && + assert((!AA || !AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) && "load_from_swift_error should not be constant memory"); SmallVector ValueVTs; @@ -3564,7 +3633,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), - FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); + FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, + ValueVTs[0]); setValue(&I, L); } @@ -3634,7 +3704,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); + DAG.getConstant(Offsets[i], dl, PtrVT), Flags); SDValue St = DAG.getStore( Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); @@ -3832,7 +3902,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. - bool AddToChain = !AA->pointsToConstantMemory(MemoryLocation( + bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation( PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo)); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); @@ -3876,7 +3946,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; if (UniformBase && - AA->pointsToConstantMemory(MemoryLocation( + AA && AA->pointsToConstantMemory(MemoryLocation( BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. @@ -3969,9 +4039,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDValue Ops[3]; Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFenceOperandTy(DAG.getDataLayout())); Ops[2] = DAG.getConstant(I.getSynchScope(), dl, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -4674,7 +4744,7 @@ static unsigned getUnderlyingArgReg(const SDValue &N) { /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) return false; @@ -4688,9 +4758,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) return false; + bool IsIndirect = false; Optional Op; // Some arguments' frame index is recorded during argument lowering. - if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + int FI = FuncInfo.getArgumentFrameIndex(Arg); + if (FI != INT_MAX) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { @@ -4701,15 +4773,19 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (PR) Reg = PR; } - if (Reg) + if (Reg) { Op = MachineOperand::CreateReg(Reg, false); + IsIndirect = IsDbgDeclare; + } } if (!Op) { // Check if ValueMap has reg number. DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) + if (VMI != FuncInfo.ValueMap.end()) { Op = MachineOperand::CreateReg(VMI->second, false); + IsIndirect = IsDbgDeclare; + } } if (!Op && N.getNode()) @@ -4745,24 +4821,15 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DIExpression *Expr, int64_t Offset, const DebugLoc &dl, unsigned DbgSDNodeOrder) { - SDDbgValue *SDV; - auto *FISDN = dyn_cast(N.getNode()); - if (FISDN && Expr->startsWithDeref()) { + if (auto *FISDN = dyn_cast(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. - ArrayRef TrailingElements(Expr->elements_begin() + 1, - Expr->elements_end()); - DIExpression *DerefedDIExpr = - DIExpression::get(*DAG.getContext(), TrailingElements); - int FI = FISDN->getIndex(); - SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl, - DbgSDNodeOrder); - } else { - SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, - Offset, dl, DbgSDNodeOrder); + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl, + DbgSDNodeOrder); } - return SDV; + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, + Offset, dl, DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -4876,11 +4943,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { updateDAGForMaybeTailCall(MM); return nullptr; } - case Intrinsic::memcpy_element_atomic: { - SDValue Dst = getValue(I.getArgOperand(0)); - SDValue Src = getValue(I.getArgOperand(1)); - SDValue NumElements = getValue(I.getArgOperand(2)); - SDValue ElementSize = getValue(I.getArgOperand(3)); + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst &MI = + cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); // Emit a library call. TargetLowering::ArgListTy Args; @@ -4891,19 +4959,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Entry.Node = Src; Args.push_back(Entry); - - Entry.Ty = I.getArgOperand(2)->getType(); - Entry.Node = NumElements; - Args.push_back(Entry); - - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.Node = ElementSize; + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; Args.push_back(Entry); - uint64_t ElementSizeConstant = - cast(I.getArgOperand(3))->getZExtValue(); + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); RTLIB::Libcall LibraryCall = - RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported element size"); @@ -4937,6 +5000,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + // Byval arguments with frame indices were already handled after argument + // lowering and before isel. + const auto *Arg = + dyn_cast(Address->stripInBoundsConstantOffsets()); + if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + return nullptr; + SDValue &N = NodeMap[Address]; if (!N.getNode() && isa(Address)) // Check unused arguments map. @@ -4955,8 +5025,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else if (isa(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, - N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), @@ -4966,22 +5035,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N)) { - // If variable is pinned by a alloca in dominating bb then - // use StaticAllocaMap. - if (const AllocaInst *AI = dyn_cast(Address)) { - if (AI->getParent() != DI.getParent()) { - DenseMap::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - return nullptr; - } - } - } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } @@ -5003,45 +5058,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); - } else { - // Do not use getValue() in here; we don't want to generate code at - // this point if it hasn't been done yet. - SDValue N = NodeMap[V]; - if (!N.getNode() && isa(V)) - // Check unused arguments map. - N = UnusedArgNodeMap[V]; - if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, - false, N)) { - SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - } - } else if (!V->use_empty() ) { - // Do not call getValue(V) yet, as we don't want to generate code. - // Remember it for later. - DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); - DanglingDebugInfoMap[V] = DDI; - } else { - // We may expand this to cover more cases. One case where we have no - // data available is an unreferenced parameter. - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - } + return nullptr; + } + + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, + N)) + return nullptr; + SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + return nullptr; } - // Build a debug info table entry. - if (const BitCastInst *BCI = dyn_cast(V)) - V = BCI->getOperand(0); - const AllocaInst *AI = dyn_cast(V); - // Don't handle byval struct arguments or VLAs, for example. - if (!AI) { - DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); + if (!V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; return nullptr; } - DenseMap::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI == FuncInfo.StaticAllocaMap.end()) - return nullptr; // VLAs. + + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } @@ -5179,7 +5222,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); + ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, @@ -5283,7 +5326,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: - visitConstrainedFPIntrinsic(I, Intrinsic); + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + visitConstrainedFPIntrinsic(cast(I)); return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -5613,7 +5668,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = - DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); + DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -5674,7 +5729,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { int FI = FuncInfo.StaticAllocaMap[Slot]; MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(MF.getName()), Idx); + GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, TII->get(TargetOpcode::LOCAL_ESCAPE)) .addSym(FrameAllocSym) @@ -5695,7 +5750,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( - GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); + GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. @@ -5726,20 +5781,68 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, N); return nullptr; } + case Intrinsic::xray_customevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector Ops; + // We want to say that we always want the arguments in registers. + SDValue LogEntryVal = getValue(I.getArgOperand(0)); + SDValue StrSizeVal = getValue(I.getArgOperand(1)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, + DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; + + case Intrinsic::experimental_vector_reduce_fadd: + case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::experimental_vector_reduce_fmax: + case Intrinsic::experimental_vector_reduce_fmin: { + visitVectorReduce(I, Intrinsic); + return nullptr; + } + } } -void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, - unsigned Intrinsic) { +void SelectionDAGBuilder::visitConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); unsigned Opcode; - switch (Intrinsic) { + switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fadd: Opcode = ISD::STRICT_FADD; break; case Intrinsic::experimental_constrained_fsub: @@ -5754,23 +5857,64 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, case Intrinsic::experimental_constrained_frem: Opcode = ISD::STRICT_FREM; break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_pow: + Opcode = ISD::STRICT_FPOW; + break; + case Intrinsic::experimental_constrained_powi: + Opcode = ISD::STRICT_FPOWI; + break; + case Intrinsic::experimental_constrained_sin: + Opcode = ISD::STRICT_FSIN; + break; + case Intrinsic::experimental_constrained_cos: + Opcode = ISD::STRICT_FCOS; + break; + case Intrinsic::experimental_constrained_exp: + Opcode = ISD::STRICT_FEXP; + break; + case Intrinsic::experimental_constrained_exp2: + Opcode = ISD::STRICT_FEXP2; + break; + case Intrinsic::experimental_constrained_log: + Opcode = ISD::STRICT_FLOG; + break; + case Intrinsic::experimental_constrained_log10: + Opcode = ISD::STRICT_FLOG10; + break; + case Intrinsic::experimental_constrained_log2: + Opcode = ISD::STRICT_FLOG2; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getRoot(); - SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)) }; SmallVector ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); ValueVTs.push_back(MVT::Other); // Out chain SDVTList VTs = DAG.getVTList(ValueVTs); - SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops); + SDValue Result; + if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)) }); + else + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }); assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); DAG.setRoot(OutChain); SDValue FPResult = Result.getValue(0); - setValue(&I, FPResult); + setValue(&FPI, FPResult); } std::pair @@ -5876,17 +6020,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Entry.setAttributes(&CS, i - CS.arg_begin()); // Use swifterror virtual register as input to the call. if (Entry.IsSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = - DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = DAG.getRegister(FuncInfo + .getOrCreateSwiftErrorVRegUseAt( + CS.getInstruction(), FuncInfo.MBB, V) + .first, + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -5927,29 +6072,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = + FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } -/// Return true if it only matters that the value is equal or not-equal to zero. -static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { - for (const User *U : V->users()) { - if (const ICmpInst *IC = dyn_cast(U)) - if (IC->isEquality()) - if (const Constant *C = dyn_cast(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SelectionDAGBuilder &Builder) { @@ -5976,7 +6109,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, bool ConstantMemory = false; // Do not serialize (non-volatile) loads of constant memory with anything. - if (Builder.AA->pointsToConstantMemory(PtrVal)) { + if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) { Root = Builder.DAG.getEntryNode(); ConstantMemory = true; } else { @@ -6036,7 +6169,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I)) + if (!CSize || !isOnlyUsedInZeroEqualityComparison(&I)) return false; // If the target has a fast compare for the given size, it will return a @@ -6627,7 +6760,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL)); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI)); OpInfo.CallOperand = StackSlot; @@ -6650,12 +6783,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, MachineFunction &MF = DAG.getMachineFunction(); SmallVector Regs; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); // If this is a constraint for a single physreg, or a constraint for a // register class, find it. std::pair PhysReg = - TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), - OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode, OpInfo.ConstraintVT); unsigned NumRegs = 1; @@ -6663,12 +6796,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // If this is a FP input in an integer register (or visa versa) insert a bit // cast of the input value. More generally, handle any case where the input // value disagrees with the register class we plan to stick this in. - if (OpInfo.Type == InlineAsm::isInput && - PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && + !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - MVT RegVT = *PhysReg.second->vt_begin(); + MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); @@ -6696,12 +6829,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = *RC->vt_begin(); + ValueVT = *TRI.legalclasstypes_begin(*RC); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); @@ -6727,7 +6860,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, // Otherwise, if this was a reference to an LLVM register class, create vregs // for this reference. if (const TargetRegisterClass *RC = PhysReg.second) { - RegVT = *RC->vt_begin(); + RegVT = *TRI.legalclasstypes_begin(*RC); if (OpInfo.ConstraintVT == MVT::Other) ValueVT = RegVT; @@ -7064,8 +7197,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -7340,7 +7473,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. - for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { const Value *V = CS->getOperand(ArgI); @@ -7349,7 +7482,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, AttrI); + Entry.setAttributes(&CS, ArgIdx); Args.push_back(Entry); } @@ -7390,7 +7523,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); + FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -7416,11 +7549,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // have to worry about calling conventions and target specific lowering code. // Instead we perform the call lowering right here. // - // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = CALLSEQ_START(chain, 0, 0) // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) // chain, flag = CALLSEQ_END(chain, 0, 0, flag) // - Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL); InFlag = Chain.getValue(1); // Add the and constants. @@ -7610,6 +7743,76 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, FuncInfo.MF->getFrameInfo().setHasPatchPoint(); } +void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, + unsigned Intrinsic) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2; + if (I.getNumArgOperands() > 1) + Op2 = getValue(I.getArgOperand(1)); + SDLoc dl = getCurSDLoc(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue Res; + FastMathFlags FMF; + if (isa(I)) + FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + SDFlags.setNoNaNs(FMF.noNaNs()); + + switch (Intrinsic) { + case Intrinsic::experimental_vector_reduce_fadd: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_fmul: + if (FMF.unsafeAlgebra()) + Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); + else + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + break; + case Intrinsic::experimental_vector_reduce_add: + Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_mul: + Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_and: + Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_or: + Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_xor: + Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smax: + Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_smin: + Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umax: + Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_umin: + Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); + break; + case Intrinsic::experimental_vector_reduce_fmax: { + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); + break; + } + case Intrinsic::experimental_vector_reduce_fmin: { + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); + break; + } + default: + llvm_unreachable("Unhandled vector reduce intrinsic"); + } + setValue(&I, Res); +} + /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { @@ -7658,7 +7861,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; @@ -7681,8 +7884,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; @@ -7731,7 +7936,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); if (Args[i].IsZExt) Flags.setZExt(); @@ -7786,8 +7995,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumParts = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -7817,7 +8027,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, + true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -7895,7 +8106,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT), &Flags); + PtrVT), Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), @@ -7917,12 +8128,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + MVT RegisterVT = + getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT); + unsigned NumRegs = + getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr, - AssertOp)); + AssertOp, true)); CurReg += NumRegs; } @@ -7958,8 +8171,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // If this is an InlineAsm we have to match the registers required, not the + // notional registers required by the type. + bool IsABIRegCopy = + V && ((isa(V) && + !(static_cast(V))->isInlineAsm()) || + isa(V)); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType()); + V->getType(), IsABIRegCopy); SDValue Chain = DAG.getEntryNode(); ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == @@ -8185,15 +8405,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); // Set up the incoming argument description vector. - unsigned Idx = 0; for (const Argument &Arg : F.args()) { - ++Idx; + unsigned ArgNo = Arg.getArgNo(); SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; Type *FinalType = Arg.getType(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) FinalType = cast(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); @@ -8202,13 +8421,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + // Certain targets (such as MIPS), may have a different ABI alignment + // for a type depending on the context. Give the target a chance to + // specify the alignment it wants. + unsigned OriginalAlignment = + TLI->getABIAlignmentForCallingConv(ArgTy, DL); + + if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) Flags.setSExt(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) { + if (Arg.hasAttribute(Attribute::InReg)) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (F.getCallingConv() == CallingConv::X86_VectorCall && @@ -8221,15 +8445,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set InReg Flag Flags.setInReg(); } - if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) + if (Arg.hasAttribute(Attribute::StructRet)) Flags.setSRet(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf)) + if (Arg.hasAttribute(Attribute::SwiftSelf)) Flags.setSwiftSelf(); - if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) + if (Arg.hasAttribute(Attribute::SwiftError)) Flags.setSwiftError(); - if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); - if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -8240,7 +8464,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. - if (Idx == 1) + if (ArgNo == 0) Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { @@ -8250,13 +8474,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. unsigned FrameAlign; - if (F.getParamAlignment(Idx)) - FrameAlign = F.getParamAlignment(Idx); + if (Arg.getParamAlignment()) + FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); @@ -8264,11 +8488,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumRegs = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - Idx-1, PartBase+i*RegisterVT.getStoreSize()); + ArgNo, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -8309,7 +8535,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set up the argument values. unsigned i = 0; - Idx = 0; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. @@ -8331,14 +8556,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. - // Idx indexes LLVM arguments. Don't touch it. ++i; } SmallVector Chains; DenseMap ArgCopyElisionFrameIndexMap; for (const Argument &Arg : F.args()) { - ++Idx; SmallVector ArgValues; SmallVector ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); @@ -8360,7 +8583,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && - F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); + Arg.hasAttribute(Attribute::SwiftError); if (!ArgHasUses && !isSwiftErrorArg) { SDB->setUnusedArgValue(&Arg, InVals[i]); @@ -8372,21 +8595,24 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = + TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT); + unsigned NumParts = + TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT); // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. if (ArgHasUses || isSwiftErrorArg) { Optional AssertOp; - if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; - else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + else if (Arg.hasAttribute(Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, - PartVT, VT, nullptr, AssertOp)); + PartVT, VT, nullptr, AssertOp, + true)); } i += NumParts; @@ -8587,13 +8813,10 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, - const SmallVectorImpl &TotalCases, - unsigned First, unsigned Last, - unsigned Density) const { +uint64_t +SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters, + unsigned First, unsigned Last) const { assert(Last >= First); - assert(TotalCases[Last] >= TotalCases[First]); - const APInt &LowCase = Clusters[First].Low->getValue(); const APInt &HighCase = Clusters[Last].High->getValue(); assert(LowCase.getBitWidth() == HighCase.getBitWidth()); @@ -8602,26 +8825,17 @@ bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, // comparison to lower. We should discriminate against such consecutive ranges // in jump tables. - uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); - uint64_t Range = Diff + 1; + return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; +} +uint64_t SelectionDAGBuilder::getJumpTableNumCases( + const SmallVectorImpl &TotalCases, unsigned First, + unsigned Last) const { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); uint64_t NumCases = TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); - - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - - return NumCases * 100 >= Range * Density; -} - -static inline bool areJTsAllowed(const TargetLowering &TLI, - const SwitchInst *SI) { - const Function *Fn = SI->getParent()->getParent(); - if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") - return false; - - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return NumCases; } bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, @@ -8660,10 +8874,11 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, JTProbs[Clusters[I].MBB] += Clusters[I].Prob; } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumDests = JTProbs.size(); - if (isSuitableForBitTests(NumDests, NumCmps, - Clusters[First].Low->getValue(), - Clusters[Last].High->getValue())) { + if (TLI.isSuitableForBitTests( + NumDests, NumCmps, Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), DAG.getDataLayout())) { // Clusters[First..Last] should be lowered as bit tests instead. return false; } @@ -8684,7 +8899,6 @@ bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, } JumpTableMBB->normalizeSuccProbs(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) ->createJumpTableIndex(Table); @@ -8713,17 +8927,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, #endif const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!areJTsAllowed(TLI, SI)) + if (!TLI.areJTsAllowed(SI->getParent()->getParent())) return; - const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize(); - const int64_t N = Clusters.size(); const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; - const unsigned MaxJumpTableSize = - OptForSize || TLI.getMaximumJumpTableSize() == 0 - ? UINT_MAX : TLI.getMaximumJumpTableSize(); if (N < 2 || N < MinJumpTableEntries) return; @@ -8738,15 +8947,12 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, TotalCases[i] += TotalCases[i - 1]; } - const unsigned MinDensity = - OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; - // Cheap case: the whole range may be suitable for jump table. - unsigned JumpTableSize = (Clusters[N - 1].High->getValue() - - Clusters[0].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters,0, N - 1); + uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { CaseCluster JTCluster; if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { Clusters[0] = JTCluster; @@ -8799,11 +9005,11 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, // Search for a solution that results in fewer partitions. for (int64_t j = N - 1; j > i; j--) { // Try building a partition from Clusters[i..j]. - JumpTableSize = (Clusters[j].High->getValue() - - Clusters[i].Low->getValue()) - .getLimitedValue(UINT_MAX - 1) + 1; - if (JumpTableSize <= MaxJumpTableSize && - isDense(Clusters, TotalCases, i, j, MinDensity)) { + uint64_t Range = getJumpTableRange(Clusters, i, j); + uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j); + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; int64_t NumEntries = j - i + 1; @@ -8847,36 +9053,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, Clusters.resize(DstIndex); } -bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { - // FIXME: Using the pointer type doesn't seem ideal. - uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); - uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; - return Range <= BW; -} - -bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, - unsigned NumCmps, - const APInt &Low, - const APInt &High) { - // FIXME: I don't think NumCmps is the correct metric: a single case and a - // range of cases both require only one branch to lower. Just looking at the - // number of clusters and destinations should be enough to decide whether to - // build bit tests. - - // To lower a range with bit tests, the range must fit the bitwidth of a - // machine word. - if (!rangeFitsInWord(Low, High)) - return false; - - // Decide whether it's profitable to lower this range with bit tests. Each - // destination requires a bit test and branch, and there is an overall range - // check branch. For a small number of clusters, separate comparisons might be - // cheaper, and for many destinations, splitting the range might be better. - return (NumDests == 1 && NumCmps >= 3) || - (NumDests == 2 && NumCmps >= 5) || - (NumDests == 3 && NumCmps >= 6); -} - bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, const SwitchInst *SI, @@ -8898,16 +9074,17 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, APInt High = Clusters[Last].High->getValue(); assert(Low.slt(High)); - if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL)) return false; APInt LowBound; APInt CmpRange; - const int BitWidth = DAG.getTargetLoweringInfo() - .getPointerTy(DAG.getDataLayout()) - .getSizeInBits(); - assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); + const int BitWidth = TLI.getPointerTy(DL).getSizeInBits(); + assert(TLI.rangeFitsInWord(Low, High, DL) && + "Case range must fit in bit mask!"); // Check if the clusters cover a contiguous range such that no value in the // range will jump to the default statement. @@ -8997,7 +9174,9 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); + const DataLayout &DL = DAG.getDataLayout(); + + EVT PTy = TLI.getPointerTy(DL); if (!TLI.isOperationLegal(ISD::SHL, PTy)) return; @@ -9028,8 +9207,8 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, // Try building a partition from Clusters[i..j]. // Check the range. - if (!rangeFitsInWord(Clusters[i].Low->getValue(), - Clusters[j].High->getValue())) + if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue(), DL)) continue; // Check nbr of destinations and cluster types. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c6acc09b660289be7e2b71c476438dff8ef9c80a..431d52b4b9b9f37ed79d742c9af3b9aef229b1cd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -304,10 +304,13 @@ private: BranchProbability DefaultProb; }; - /// Check whether a range of clusters is dense enough for a jump table. - bool isDense(const CaseClusterVector &Clusters, - const SmallVectorImpl &TotalCases, - unsigned First, unsigned Last, unsigned MinDensity) const; + /// Return the range of value in [First..Last]. + uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First, + unsigned Last) const; + + /// Return the number of cases in [First..Last]. + uint64_t getJumpTableNumCases(const SmallVectorImpl &TotalCases, + unsigned First, unsigned Last) const; /// Build a jump table cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. @@ -319,14 +322,6 @@ private: void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, MachineBasicBlock *DefaultMBB); - /// Check whether the range [Low,High] fits in a machine word. - bool rangeFitsInWord(const APInt &Low, const APInt &High); - - /// Check whether these clusters are suitable for lowering with bit tests based - /// on the number of destinations, comparison metric, and range. - bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, - const APInt &Low, const APInt &High); - /// Build a bit test cluster from Clusters[First..Last]. Returns false if it /// decides it's not a good idea. bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, @@ -609,11 +604,11 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), - DAG(dag), FuncInfo(funcinfo), + DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo), HasTailCall(false) { } - void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + void init(GCFunctionInfo *gfi, AliasAnalysis *AA, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare @@ -777,6 +772,11 @@ public: bool VarArgDisallowed, bool ForceVoidReturnTy); + /// Returns the type of FrameIndex and TargetFrameIndex nodes. + MVT getFrameIndexTy() { + return DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()); + } + private: // Terminator instructions. void visitRet(const ReturnInst &I); @@ -895,7 +895,7 @@ private: void visitInlineAsm(ImmutableCallSite CS); const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); - void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic); + void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); @@ -909,6 +909,8 @@ private: void visitGCRelocate(const GCRelocateInst &I); void visitGCResult(const GCResultInst &I); + void visitVectorReduce(const CallInst &I, unsigned Intrinsic); + void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); } @@ -928,7 +930,7 @@ private: /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, - int64_t Offset, bool IsIndirect, + int64_t Offset, bool IsDbgDeclare, const SDValue &N); /// Return the next block after MBB, or nullptr if there is none. @@ -973,18 +975,28 @@ struct RegsForValue { /// expanded value requires multiple registers. SmallVector Regs; + /// This list holds the number of registers for each value. + SmallVector RegCount; + + /// Records if this value needs to be treated in an ABI dependant manner, + /// different to normal type legalization. + bool IsABIMangled; + RegsForValue(); - RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt); + RegsForValue(const SmallVector ®s, MVT regvt, EVT valuevt, + bool IsABIMangledValue = false); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, - const DataLayout &DL, unsigned Reg, Type *Ty); + const DataLayout &DL, unsigned Reg, Type *Ty, + bool IsABIMangledValue = false); /// Add the specified values to this one. void append(const RegsForValue &RHS) { ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + RegCount.push_back(RHS.Regs.size()); } /// Emit a series of CopyFromReg nodes that copies from this value and returns diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 488c60a28ffbcaac12acb1d5db5b39ab2a88df89..3dd58975b1f1062ea6a6ed2b6c514504117036df 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" @@ -214,6 +214,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::SETCCE: return "setcce"; + case ISD::SETCCCARRY: return "setcccarry"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; @@ -227,6 +228,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; + case ISD::ADDCARRY: return "addcarry"; case ISD::SADDO: return "saddo"; case ISD::UADDO: return "uaddo"; case ISD::SSUBO: return "ssubo"; @@ -235,6 +237,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMULO: return "umulo"; case ISD::SUBC: return "subc"; case ISD::SUBE: return "sube"; + case ISD::SUBCARRY: return "subcarry"; case ISD::SHL_PARTS: return "shl_parts"; case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; @@ -344,6 +347,19 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETFALSE: return "setfalse"; case ISD::SETFALSE2: return "setfalse2"; } + case ISD::VECREDUCE_FADD: return "vecreduce_fadd"; + case ISD::VECREDUCE_FMUL: return "vecreduce_fmul"; + case ISD::VECREDUCE_ADD: return "vecreduce_add"; + case ISD::VECREDUCE_MUL: return "vecreduce_mul"; + case ISD::VECREDUCE_AND: return "vecreduce_and"; + case ISD::VECREDUCE_OR: return "vecreduce_or"; + case ISD::VECREDUCE_XOR: return "vecreduce_xor"; + case ISD::VECREDUCE_SMAX: return "vecreduce_smax"; + case ISD::VECREDUCE_SMIN: return "vecreduce_smin"; + case ISD::VECREDUCE_UMAX: return "vecreduce_umax"; + case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; + case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; + case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index e21204dbb966194d9836e6b61f337c288efa2f0b..f711ca71f79fe7817915afc9a1455af3a78dbed4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===// // // The LLVM Compiler Infrastructure // @@ -17,11 +17,11 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -31,6 +31,7 @@ #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -50,9 +51,11 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" @@ -63,6 +66,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" @@ -73,6 +77,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -87,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -298,7 +304,7 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, FuncInfo(new FunctionLoweringInfo()), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), - GFI(), + AA(), GFI(), OptLevel(OL), DAGSize(0) { initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); @@ -316,7 +322,8 @@ SelectionDAGISel::~SelectionDAGISel() { } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); + if (OptLevel != CodeGenOpt::None) + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -330,11 +337,13 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { /// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that /// may trap on it. In this case we have to split the edge so that the path /// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. -/// +/// execute the possibly trapping instruction. If available, we pass domtree +/// and loop info to be updated when we split critical edges. This is because +/// SelectionDAGISel preserves these analyses. /// This is required for correctness, so it must be done at -O0. /// -static void SplitCriticalSideEffectEdges(Function &Fn) { +static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, + LoopInfo *LI) { // Loop for blocks with phi nodes. for (BasicBlock &BB : Fn) { PHINode *PN = dyn_cast(BB.begin()); @@ -360,7 +369,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn) { // Okay, we have to split this edge. SplitCriticalEdge( Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); + CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); goto ReprocessBlock; } } @@ -393,24 +402,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - AA = &getAnalysis().getAAResults(); LibInfo = &getAnalysis().getTLI(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; ORE = make_unique(&Fn); + auto *DTWP = getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto *LIWP = getAnalysisIfAvailable(); + LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast(Fn)); + SplitCriticalSideEffectEdges(const_cast(Fn), DT, LI); CurDAG->init(*MF, *ORE); FuncInfo->set(Fn, *MF, CurDAG); + // Now get the optional analyzes if we want to. + // This is based on the possibly changed OptLevel (after optnone is taken + // into account). That's unfortunate but OK because it just means we won't + // ask for passes that have been required anyway. + if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis().getBPI(); else FuncInfo->BPI = nullptr; - SDB->init(GFI, *AA, LibInfo); + if (OptLevel != CodeGenOpt::None) + AA = &getAnalysis().getAAResults(); + else + AA = nullptr; + + SDB->init(GFI, AA, LibInfo); MF->setHasInlineAsm(false); @@ -592,13 +614,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MRI.replaceRegWith(From, To); } - if (TLI->hasCopyImplyingStackAdjustment(MF)) - MFI.setHasCopyImplyingStackAdjustment(true); - - // Freeze the set of reserved registers now that MachineFrameInfo has been - // set up. All the information required by getReservedRegs() should be - // available now. - MRI.freezeReservedRegs(*MF); + TLI->finalizeLowering(*MF); // Release function-specific state. SDB and CurDAG are already cleared // at this point. @@ -650,8 +666,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt KnownZero; - APInt KnownOne; + KnownBits Known; do { SDNode *N = Worklist.pop_back_val(); @@ -680,8 +695,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - CurDAG->computeKnownBits(Src, KnownZero, KnownOne); - FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); + CurDAG->computeKnownBits(Src, Known); + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, Known); } while (!Worklist.empty()); } @@ -721,7 +736,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); + CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber @@ -753,12 +768,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine_lt", "DAG Combining after legalize types", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - } { @@ -787,7 +801,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" @@ -813,7 +827,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("combine2", "DAG Combining 2", GroupName, GroupDescription, TimePassesIsEnabled); - CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber @@ -900,50 +914,6 @@ public: } // end anonymous namespace -static bool isStrictFPOp(SDNode *Node, unsigned &NewOpc) { - unsigned OrigOpc = Node->getOpcode(); - switch (OrigOpc) { - case ISD::STRICT_FADD: NewOpc = ISD::FADD; return true; - case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; return true; - case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; return true; - case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; return true; - case ISD::STRICT_FREM: NewOpc = ISD::FREM; return true; - default: return false; - } -} - -SDNode* SelectionDAGISel::MutateStrictFPToFP(SDNode *Node, unsigned NewOpc) { - assert(((Node->getOpcode() == ISD::STRICT_FADD && NewOpc == ISD::FADD) || - (Node->getOpcode() == ISD::STRICT_FSUB && NewOpc == ISD::FSUB) || - (Node->getOpcode() == ISD::STRICT_FMUL && NewOpc == ISD::FMUL) || - (Node->getOpcode() == ISD::STRICT_FDIV && NewOpc == ISD::FDIV) || - (Node->getOpcode() == ISD::STRICT_FREM && NewOpc == ISD::FREM)) && - "Unexpected StrictFP opcode!"); - - // We're taking this node out of the chain, so we need to re-link things. - SDValue InputChain = Node->getOperand(0); - SDValue OutputChain = SDValue(Node, 1); - CurDAG->ReplaceAllUsesOfValueWith(OutputChain, InputChain); - - SDVTList VTs = CurDAG->getVTList(Node->getOperand(1).getValueType()); - SDValue Ops[2] = { Node->getOperand(1), Node->getOperand(2) }; - SDNode *Res = CurDAG->MorphNodeTo(Node, NewOpc, VTs, Ops); - - // MorphNodeTo can operate in two ways: if an existing node with the - // specified operands exists, it can just return it. Otherwise, it - // updates the node in place to have the requested operands. - if (Res == Node) { - // If we updated the node in place, reset the node ID. To the isel, - // this should be just like a newly allocated machine node. - Res->setNodeId(-1); - } else { - CurDAG->ReplaceAllUsesWith(Node, Res); - CurDAG->RemoveDeadNode(Node); - } - - return Res; -} - void SelectionDAGISel::DoInstructionSelection() { DEBUG(dbgs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() @@ -987,15 +957,12 @@ void SelectionDAGISel::DoInstructionSelection() { // If the current node is a strict FP pseudo-op, the isStrictFPOp() // function will provide the corresponding normal FP opcode to which the // node should be mutated. - unsigned NormalFPOpc = ISD::UNDEF; - bool IsStrictFPOp = isStrictFPOp(Node, NormalFPOpc); - if (IsStrictFPOp) - Node = MutateStrictFPToFP(Node, NormalFPOpc); + // + // FIXME: The backends need a way to handle FP constraints. + if (Node->isStrictFPOpcode()) + Node = CurDAG->mutateStrictFPToFP(Node); Select(Node); - - // FIXME: Add code here to attach an implicit def and use of - // target-specific FP environment registers. } CurDAG->setRoot(Dummy.getValue()); @@ -1091,6 +1058,7 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, FuncInfo->SwiftErrorVals.clear(); FuncInfo->SwiftErrorVRegDefMap.clear(); FuncInfo->SwiftErrorVRegUpwardsUse.clear(); + FuncInfo->SwiftErrorVRegDefUses.clear(); FuncInfo->SwiftErrorArg = nullptr; // Check if function has a swifterror argument. @@ -1151,6 +1119,51 @@ static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo, } } +/// Collect llvm.dbg.declare information. This is done after argument lowering +/// in case the declarations refer to arguments. +static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { + MachineFunction *MF = FuncInfo->MF; + const DataLayout &DL = MF->getDataLayout(); + for (const BasicBlock &BB : *FuncInfo->Fn) { + for (const Instruction &I : BB) { + const DbgDeclareInst *DI = dyn_cast(&I); + if (!DI) + continue; + + assert(DI->getVariable() && "Missing variable"); + assert(DI->getDebugLoc() && "Missing location"); + const Value *Address = DI->getAddress(); + if (!Address) + continue; + + // Look through casts and constant offset GEPs. These mostly come from + // inalloca. + APInt Offset(DL.getPointerSizeInBits(0), 0); + Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + // Check if the variable is a static alloca or a byval or inalloca + // argument passed in memory. If it is not, then we will ignore this + // intrinsic and handle this during isel like dbg.value. + int FI = std::numeric_limits::max(); + if (const auto *AI = dyn_cast(Address)) { + auto SI = FuncInfo->StaticAllocaMap.find(AI); + if (SI != FuncInfo->StaticAllocaMap.end()) + FI = SI->second; + } else if (const auto *Arg = dyn_cast(Address)) + FI = FuncInfo->getArgumentFrameIndex(Arg); + + if (FI == std::numeric_limits::max()) + continue; + + DIExpression *Expr = DI->getExpression(); + if (Offset.getBoolValue()) + Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, + Offset.getZExtValue()); + MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); + } + } +} + /// Propagate swifterror values through the machine function CFG. static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { auto *TLI = FuncInfo->TLI; @@ -1269,6 +1282,80 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } } +void preassignSwiftErrorRegs(const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { + if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty()) + return; + + // Iterator over instructions and assign vregs to swifterror defs and uses. + for (auto It = Begin; It != End; ++It) { + ImmutableCallSite CS(&*It); + if (CS) { + // A call-site with a swifterror argument is both use and def. + const Value *SwiftErrorAddr = nullptr; + for (auto &Arg : CS.args()) { + if (!Arg->isSwiftError()) + continue; + // Use of swifterror. + assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments"); + SwiftErrorAddr = &*Arg; + assert(SwiftErrorAddr->isSwiftError() && + "Must have a swifterror value argument"); + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + &*It, FuncInfo->MBB, SwiftErrorAddr); + assert(CreatedReg); + } + if (!SwiftErrorAddr) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A load is a use. + } else if (const LoadInst *LI = dyn_cast(&*It)) { + const Value *V = LI->getOperand(0); + if (!V->isSwiftError()) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V); + assert(CreatedReg); + + // A store is a def. + } else if (const StoreInst *SI = dyn_cast(&*It)) { + const Value *SwiftErrorAddr = SI->getOperand(1); + if (!SwiftErrorAddr->isSwiftError()) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A return in a swiferror returning function is a use. + } else if (const ReturnInst *R = dyn_cast(&*It)) { + const Function *F = R->getParent()->getParent(); + if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + R, FuncInfo->MBB, FuncInfo->SwiftErrorArg); + assert(CreatedReg); + } + } +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. @@ -1323,6 +1410,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB); + processDbgDeclares(FuncInfo); + // Iterate over all basic blocks in the function. for (const BasicBlock *LLVMBB : RPOT) { if (OptLevel != CodeGenOpt::None) { @@ -1373,6 +1462,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->startNewBlock(); unsigned NumFastIselRemaining = std::distance(Begin, End); + + // Pre-assign swifterror vregs. + preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End); + // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = &*std::prev(BI); @@ -1930,11 +2023,11 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(LHS, Known); // If all the missing bits in the or are already known to be set, match! - if ((NeededMask & KnownOne) == NeededMask) + if (NeededMask.isSubsetOf(Known.One)) return true; // TODO: check to see if missing bits are just not demanded. @@ -2017,7 +2110,7 @@ static SDNode *findGlueUse(SDNode *N) { } /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". -/// This function recursively traverses up the operand chain, ignoring +/// This function iteratively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, SDNode *Root, SmallPtrSetImpl &Visited, @@ -2030,30 +2123,36 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // The Use may be -1 (unassigned) if it is a newly allocated node. This can // happen because we scan down to newly selected nodes in the case of glue // uses. - if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) - return false; + std::vector WorkList; + WorkList.push_back(Use); - // Don't revisit nodes if we already scanned it and didn't fail, we know we - // won't fail if we scan it again. - if (!Visited.insert(Use).second) - return false; + while (!WorkList.empty()) { + Use = WorkList.back(); + WorkList.pop_back(); + if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) + continue; - for (const SDValue &Op : Use->op_values()) { - // Ignore chain uses, they are validated by HandleMergeInputChains. - if (Op.getValueType() == MVT::Other && IgnoreChains) + // Don't revisit nodes if we already scanned it and didn't fail, we know we + // won't fail if we scan it again. + if (!Visited.insert(Use).second) continue; - SDNode *N = Op.getNode(); - if (N == Def) { - if (Use == ImmedUse || Use == Root) - continue; // We are not looking for immediate use. - assert(N != Root); - return true; - } + for (const SDValue &Op : Use->op_values()) { + // Ignore chain uses, they are validated by HandleMergeInputChains. + if (Op.getValueType() == MVT::Other && IgnoreChains) + continue; - // Traverse up the operand chain. - if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) - return true; + SDNode *N = Op.getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + WorkList.push_back(N); + } } return false; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 2764688518c2b00a94fbc83eba88fa452afbd039..11561dfa5947419d72bc0bc7bf7ee2a6e18cc28d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index d27e2455978df7bab93cdc9c3d25377a80211ca8..5d78bba86d73b4768fc9c5c48758b8e92ee297c5 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/CallingConv.h" @@ -110,8 +110,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, Builder.FuncInfo.StatepointStackSlots.size() && "Broken invariant"); - StatepointMaxSlotsRequired = std::max( - StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size()); + StatepointMaxSlotsRequired.updateMax( + Builder.FuncInfo.StatepointStackSlots.size()); return SpillSlot; } @@ -242,7 +242,8 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, // Cache this slot so we find it when going through the normal // assignment loop. - SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType()); + SDValue Loc = + Builder.DAG.getTargetFrameIndex(*Index, Builder.getFrameIndexTy()); Builder.StatepointLowering.setLocation(Incoming, Loc); } @@ -343,7 +344,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, Builder); int Index = cast(Loc)->getIndex(); // We use TargetFrameIndex so that isel will not select it into LEA - Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); // TODO: We can create TokenFactor node instead of // chaining stores one after another, this may allow @@ -391,8 +392,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, // This handles allocas as arguments to the statepoint (this is only // really meaningful for a deopt value. For GC, we'd be trying to // relocate the address of the alloca itself?) + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Incoming.getValueType())); + Builder.getFrameIndexTy())); } else if (LiveInOnly) { // If this value is live in (not live-on-return, or live-through), we can // treat it the same way patchpoint treats it's "live in" values. We'll @@ -527,8 +530,10 @@ lowerStatepointMetaArgs(SmallVectorImpl &Ops, SDValue Incoming = Builder.getValue(V); if (FrameIndexSDNode *FI = dyn_cast(Incoming)) { // This handles allocas as arguments to the statepoint + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Incoming.getValueType())); + Builder.getFrameIndexTy())); } } @@ -813,7 +818,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, SI.GCTransitionArgs = ArrayRef(ISP.gc_args_begin(), ISP.gc_args_end()); SI.ID = ISP.getID(); - SI.DeoptState = ArrayRef(ISP.vm_state_begin(), ISP.vm_state_end()); + SI.DeoptState = ArrayRef(ISP.deopt_begin(), ISP.deopt_end()); SI.StatepointFlags = ISP.getFlags(); SI.NumPatchBytes = ISP.getNumPatchBytes(); SI.EHPadBB = EHPadBB; @@ -835,7 +840,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, // completely and make statepoint call to return a tuple. unsigned Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy); + DAG.getDataLayout(), Reg, RetTy, true); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); @@ -949,8 +954,8 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, - SD.getValueType()); + SDValue SpillSlot = + DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, @@ -958,7 +963,9 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { SDValue Chain = getRoot(); SDValue SpillLoad = - DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()), + getCurSDLoc(), Chain, SpillSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), *DerivedPtrLocation)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 034591a00fecd790f6980b1694682557b8584ff1..8652df7bbd706cbde1188ee045cf3a1486c6f377 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -98,18 +99,18 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, /// \brief Set CallLoweringInfo attribute flags based on a call instruction /// and called function attributes. void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS, - unsigned AttrIdx) { - IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); - IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); - IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); - IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); - IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); - IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); - IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); - IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); - IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf); - IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError); - Alignment = CS->getParamAlignment(AttrIdx); + unsigned ArgIdx) { + IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt); + IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt); + IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg); + IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet); + IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest); + IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal); + IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca); + IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned); + IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf); + IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError); + Alignment = CS->getParamAlignment(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -341,11 +342,16 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// If the specified instruction has a constant integer operand and there are /// bits set in that constant that are not demanded, then clear those bits and /// return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( - SDValue Op, const APInt &Demanded) { +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + SelectionDAG &DAG = TLO.DAG; SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); + // Do target-specific constant optimization. + if (targetShrinkDemandedConstant(Op, Demanded, TLO)) + return TLO.New.getNode(); + // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Opcode) { default: @@ -359,14 +365,14 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( // If this is a 'not' op, don't touch it because that's a canonical form. const APInt &C = Op1C->getAPIntValue(); - if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue()) + if (Opcode == ISD::XOR && Demanded.isSubsetOf(C)) return false; - if (C.intersects(~Demanded)) { + if (!C.isSubsetOf(Demanded)) { EVT VT = Op.getValueType(); SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); - return CombineTo(Op, NewOp); + return TLO.CombineTo(Op, NewOp); } break; @@ -379,15 +385,17 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, - unsigned BitWidth, - const APInt &Demanded, - const SDLoc &dl) { +bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, + const APInt &Demanded, + TargetLoweringOpt &TLO) const { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + SelectionDAG &DAG = TLO.DAG; + SDLoc dl(Op); + // Early return, as this function cannot handle vector types. if (Op.getValueType().isVector()) return false; @@ -409,31 +417,28 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && TLI.isZExtFree(SmallVT, Op.getValueType())) { // We found a type with free casts. - SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, - DAG.getNode(ISD::TRUNCATE, dl, SmallVT, - Op.getNode()->getOperand(0)), - DAG.getNode(ISD::TRUNCATE, dl, SmallVT, - Op.getNode()->getOperand(1))); + SDValue X = DAG.getNode( + Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1))); bool NeedZext = DemandedSize > SmallVTBits; SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, dl, Op.getValueType(), X); - return CombineTo(Op, Z); + return TLO.CombineTo(Op, Z); } } return false; } bool -TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, - unsigned OpIdx, - const APInt &Demanded, - DAGCombinerInfo &DCI) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, + const APInt &Demanded, + DAGCombinerInfo &DCI, + TargetLoweringOpt &TLO) const { SDValue Op = User->getOperand(OpIdx); - APInt KnownZero, KnownOne; + KnownBits Known; - if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, - *this, 0, true)) + if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true)) return false; @@ -445,9 +450,9 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, // with the value 'x', which will give us: // Old = i32 and x, 0xffffff // New = x - if (Old.hasOneUse()) { + if (TLO.Old.hasOneUse()) { // For the one use case, we just commit the change. - DCI.CommitTargetLoweringOpt(*this); + DCI.CommitTargetLoweringOpt(TLO); return true; } @@ -455,17 +460,17 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, // AssumeSingleUse flag is not propogated to recursive calls of // SimplifyDemanded bits, so the only node with multiple use that // it will attempt to combine will be opt. - assert(Old == Op); + assert(TLO.Old == Op); SmallVector NewOps; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (i == OpIdx) { - NewOps.push_back(New); + NewOps.push_back(TLO.New); continue; } NewOps.push_back(User->getOperand(i)); } - DAG.UpdateNodeOperands(User, NewOps); + TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); @@ -481,10 +486,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, SelectionDAG &DAG = DCI.DAG; TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); - APInt KnownZero, KnownOne; + KnownBits Known; - bool Simplified = SimplifyDemandedBits(Op, DemandedMask, KnownZero, KnownOne, - TLO); + bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO); if (Simplified) DCI.CommitTargetLoweringOpt(TLO); return Simplified; @@ -494,13 +498,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the /// original and new nodes in Old and New. Otherwise, analyze the expression and -/// return a mask of KnownOne and KnownZero bits for the expression (used to -/// simplify the caller). The KnownZero/One bits may only be accurate for those -/// bits in the DemandedMask. +/// return a mask of Known bits for the expression (used to simplify the +/// caller). The Known bits may only be accurate for those bits in the +/// DemandedMask. bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth, bool AssumeSingleUse) const { @@ -512,14 +515,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, auto &DL = TLO.DAG.getDataLayout(); // Don't know anything. - KnownZero = KnownOne = APInt(BitWidth, 0); + Known = KnownBits(BitWidth); // Other users may use these bits. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { - // If not at the root, Just compute the KnownZero/KnownOne bits to + // If not at the root, Just compute the Known bits to // simplify things downstream. - TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, Known, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -534,38 +537,36 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } - APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + KnownBits Known2, KnownOut; switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast(Op)->getAPIntValue(); - KnownZero = ~KnownOne; + Known.One = cast(Op)->getAPIntValue(); + Known.Zero = ~Known.One; return false; // Don't fall through, will infinitely loop. case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every constant vector element. - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + Known.Zero.setAllBits(); Known.One.setAllBits(); for (SDValue SrcOp : Op->ops()) { if (!isa(SrcOp)) { // We can only handle all constant values - bail out with no known bits. - KnownZero = KnownOne = APInt(BitWidth, 0); + Known = KnownBits(BitWidth); return false; } - KnownOne2 = cast(SrcOp)->getAPIntValue(); - KnownZero2 = ~KnownOne2; + Known2.One = cast(SrcOp)->getAPIntValue(); + Known2.Zero = ~Known2.One; // BUILD_VECTOR can implicitly truncate sources, we must handle this. - if (KnownOne2.getBitWidth() != BitWidth) { - assert(KnownOne2.getBitWidth() > BitWidth && - KnownZero2.getBitWidth() > BitWidth && + if (Known2.One.getBitWidth() != BitWidth) { + assert(Known2.getBitWidth() > BitWidth && "Expected BUILD_VECTOR implicit truncation"); - KnownOne2 = KnownOne2.trunc(BitWidth); - KnownZero2 = KnownZero2.trunc(BitWidth); + Known2 = Known2.trunc(BitWidth); } // Known bits are the values that are shared by every element. // TODO: support per-element known bits. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } return false; // Don't fall through, will infinitely loop. case ISD::AND: @@ -573,18 +574,18 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // using the bits from the RHS. Below, we use knowledge about the RHS to // simplify the LHS, here we're using information from the LHS to simplify // the RHS. - if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { + if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) { SDValue Op0 = Op.getOperand(0); - APInt LHSZero, LHSOne; + KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth); + TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); // If the LHS already has zeros where RHSC does, this and is dead. - if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -593,182 +594,163 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the xor. For example, for a 32-bit X: // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 if (isBitwiseNot(Op0) && Op0.hasOneUse() && - LHSOne == ~RHSC->getAPIntValue()) { + LHSKnown.One == ~RHSC->getAPIntValue()) { SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), Op0.getOperand(0), Op.getOperand(1)); return TLO.CombineTo(Op, Xor); } } - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, - KnownZero2, KnownOne2, TLO, Depth+1)) + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask, + Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + if (NewMask.isSubsetOf(Known2.Zero | Known.One)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + if (NewMask.isSubsetOf(Known.Zero | Known2.One)) return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. - if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne &= KnownOne2; + Known.One &= Known2.One; // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero |= KnownZero2; + Known.Zero |= Known2.Zero; break; case ISD::OR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, - KnownZero2, KnownOne2, TLO, Depth+1)) + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask, + Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + if (NewMask.isSubsetOf(Known2.One | Known.Zero)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) - return TLO.CombineTo(Op, Op.getOperand(1)); - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) - return TLO.CombineTo(Op, Op.getOperand(0)); - if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + if (NewMask.isSubsetOf(Known.One | Known2.Zero)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero &= KnownZero2; + Known.Zero &= Known2.Zero; // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne |= KnownOne2; + Known.One |= Known2.One; break; - case ISD::XOR: - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + case ISD::XOR: { + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((KnownZero & NewMask) == NewMask) + if (NewMask.isSubsetOf(Known.Zero)) return TLO.CombineTo(Op, Op.getOperand(0)); - if ((KnownZero2 & NewMask) == NewMask) + if (NewMask.isSubsetOf(Known2.Zero)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // If all of the unknown bits are known to be zero on one side or the other // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); // If all of the demanded bits on one side are known, and all of the set // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side - if (KnownOne == KnownOne2) { // set bits are the same on both sides + if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side + if (Known.One == Known2.One) { // set bits are the same on both sides EVT VT = Op.getValueType(); - SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT); + SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); } } - // If the RHS is a constant, see if we can simplify it. - // for XOR, we prefer to force bits to 1 if they will make a -1. - // If we can't force bits, try to shrink the constant. - if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { - APInt Expanded = C->getAPIntValue() | (~NewMask); - // If we can expand it to have all bits set, do it. - if (Expanded.isAllOnesValue()) { - if (Expanded != C->getAPIntValue()) { - EVT VT = Op.getValueType(); - SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), - TLO.DAG.getConstant(Expanded, dl, VT)); - return TLO.CombineTo(Op, New); - } - // If it already has all the bits set, nothing to change - // but don't shrink either! - } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { - return true; + // If the RHS is a constant, see if we can change it. Don't alter a -1 + // constant because that's a 'not' op, and that is better for combining and + // codegen. + ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1)); + if (C && !C->isAllOnesValue()) { + if (NewMask.isSubsetOf(C->getAPIntValue())) { + // We're flipping all demanded bits. Flip the undemanded bits too. + SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType()); + return TLO.CombineTo(Op, New); } + // If we can't turn this into a 'not', try to shrink the constant. + if (ShrinkDemandedConstant(Op, NewMask, TLO)) + return true; } - KnownZero = KnownZeroOut; - KnownOne = KnownOneOut; + Known = std::move(KnownOut); break; + } case ISD::SELECT: - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1)) return true; - if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, - KnownOne2, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; case ISD::SETCC: { SDValue Op0 = Op.getOperand(0); @@ -777,7 +759,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If (1) we only need the sign-bit, (2) the setcc operands are the same // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. - if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth && + if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && getBooleanContents(Op.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! @@ -794,7 +776,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (getBooleanContents(Op0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero.setBitsFrom(1); + Known.Zero.setBitsFrom(1); break; } case ISD::SHL: @@ -828,17 +810,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1)) return true; // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits // are not demanded. This will likely allow the anyext to be folded away. if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { - SDValue InnerOp = InOp.getNode()->getOperand(0); + SDValue InnerOp = InOp.getOperand(0); EVT InnerVT = InnerOp.getValueType(); unsigned InnerBits = InnerVT.getSizeInBits(); - if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && + if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT, DL); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) @@ -860,12 +841,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InnerOp.getOpcode() == ISD::SRL && InnerOp.hasOneUse() && isa(InnerOp.getOperand(1))) { - uint64_t InnerShAmt = cast(InnerOp.getOperand(1)) + unsigned InnerShAmt = cast(InnerOp.getOperand(1)) ->getZExtValue(); if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && - NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && - NewMask.trunc(ShAmt) == 0) { + NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && + NewMask.countTrailingZeros() >= ShAmt) { SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, Op.getOperand(1).getValueType()); @@ -878,10 +859,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - KnownZero <<= SA->getZExtValue(); - KnownOne <<= SA->getZExtValue(); + Known.Zero <<= SA->getZExtValue(); + Known.One <<= SA->getZExtValue(); // low bits known zero. - KnownZero.setLowBits(SA->getZExtValue()); + Known.Zero.setLowBits(SA->getZExtValue()); } break; case ISD::SRL: @@ -899,7 +880,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the shift is exact, then it does demand the low bits (and knows that // they are zero). - if (cast(Op)->Flags.hasExact()) + if (Op->getFlags().hasExact()) InDemandedMask.setLowBits(ShAmt); // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a @@ -924,14 +905,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // Compute the new bits that are at the top now. - if (SimplifyDemandedBits(InOp, InDemandedMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); - KnownZero.setHighBits(ShAmt); // High bits known zero. + Known.Zero.setHighBits(ShAmt); // High bits known zero. } break; case ISD::SRA: @@ -939,7 +919,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (NewMask == 1) + if (NewMask.isOneValue()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); @@ -956,33 +936,30 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the shift is exact, then it does demand the low bits (and knows that // they are zero). - if (cast(Op)->Flags.hasExact()) + if (Op->getFlags().hasExact()) InDemandedMask.setLowBits(ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits()); + if (NewMask.countLeadingZeros() < ShAmt) + InDemandedMask.setSignBit(); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO, + Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); - - // Handle the sign bit, adjusted to where it is now in the mask. - APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (Known.Zero[BitWidth - ShAmt - 1] || + NewMask.countLeadingZeros() >= ShAmt) { SDNodeFlags Flags; - Flags.setExact(cast(Op)->Flags.hasExact()); + Flags.setExact(Op->getFlags().hasExact()); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), - Op.getOperand(1), &Flags)); + Op.getOperand(1), Flags)); } int Log2 = NewMask.exactLogBase2(); @@ -995,9 +972,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0), NewSA)); } - if (KnownOne.intersects(SignBit)) + if (Known.One[BitWidth - ShAmt - 1]) // New bits are known one. - KnownOne |= HighBits; + Known.One.setHighBits(ShAmt); } break; case ISD::SIGN_EXTEND_INREG: { @@ -1039,7 +1016,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth); + APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, ExVT.getScalarSizeInBits()) & @@ -1050,24 +1027,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InputDemandedBits |= InSignBit; if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, TLO, Depth+1)) + Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. // If the input sign bit is known zero, convert this into a zero extension. - if (KnownZero.intersects(InSignBit)) + if (Known.Zero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( Op.getOperand(0), dl, ExVT.getScalarType())); - if (KnownOne.intersects(InSignBit)) { // Input sign bit known set - KnownOne |= NewBits; - KnownZero &= ~NewBits; + if (Known.One.intersects(InSignBit)) { // Input sign bit known set + Known.One |= NewBits; + Known.Zero &= ~NewBits; } else { // Input sign bit unknown - KnownZero &= ~NewBits; - KnownOne &= ~NewBits; + Known.Zero &= ~NewBits; + Known.One &= ~NewBits; } break; } @@ -1078,22 +1055,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); - APInt KnownZeroLo, KnownOneLo; - APInt KnownZeroHi, KnownOneHi; + KnownBits KnownLo, KnownHi; - if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, - KnownOneLo, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, - KnownOneHi, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1)) return true; - KnownZero = KnownZeroLo.zext(BitWidth) | - KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); + Known.Zero = KnownLo.Zero.zext(BitWidth) | + KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); - KnownOne = KnownOneLo.zext(BitWidth) | - KnownOneHi.zext(BitWidth).shl(HalfBitWidth); + Known.One = KnownLo.One.zext(BitWidth) | + KnownHi.One.zext(BitWidth).shl(HalfBitWidth); break; } case ISD::ZERO_EXTEND: { @@ -1108,13 +1082,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getValueType(), Op.getOperand(0))); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - KnownZero |= NewBits; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known = Known.zext(BitWidth); + Known.Zero |= NewBits; break; } case ISD::SIGN_EXTEND: { @@ -1136,37 +1108,34 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, InDemandedBits |= InSignBit; InDemandedBits = InDemandedBits.trunc(InBits); - if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, - KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, + Depth+1)) return true; - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + Known = Known.zext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - if (KnownZero.intersects(InSignBit)) + if (Known.Zero.intersects(InSignBit)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); // If the sign bit is known one, the top bits match. - if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - assert((KnownZero & NewBits) == 0); + if (Known.One.intersects(InSignBit)) { + Known.One |= NewBits; + assert((Known.Zero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - assert((KnownOne & NewBits) == 0); - assert((KnownZero & NewBits) == 0); + assert((Known.One & NewBits) == 0); + assert((Known.Zero & NewBits) == 0); } break; } case ISD::ANY_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt InMask = NewMask.trunc(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), InMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { @@ -1174,11 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // zero/one bits live out. unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); APInt TruncMask = NewMask.zext(OperandBitWidth); - if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, - KnownZero, KnownOne, TLO, Depth+1)) + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1)) return true; - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); + Known = Known.trunc(BitWidth); // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. @@ -1204,26 +1171,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, getShiftAmountTy(Op.getValueType(), DL)); } - APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, - OperandBitWidth - BitWidth); - HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); - - if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { - // None of the shifted in bits are needed. Add a truncate of the - // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), - In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, - Shift)); + if (ShAmt->getZExtValue() < BitWidth) { + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits.lshrInPlace(ShAmt->getZExtValue()); + HighBits = HighBits.trunc(BitWidth); + + if (!(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + Shift)); + } } break; } } - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -1233,11 +1203,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, - KnownZero, KnownOne, TLO, Depth+1)) + Known, TLO, Depth+1)) return true; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); - KnownZero |= ~InMask; + Known.Zero |= ~InMask; break; } case ISD::BITCAST: @@ -1246,7 +1216,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!TLO.LegalOperations() && !Op.getValueType().isVector() && !Op.getOperand(0).getValueType().isVector() && - NewMask == APInt::getSignBit(Op.getValueSizeInBits()) && + NewMask == APInt::getSignMask(Op.getValueSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); @@ -1275,22 +1245,19 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // of the highest bit demanded of them. APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMask.countLeadingZeros()); - if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1) || - SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, - KnownOne2, TLO, Depth+1) || + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) || + SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) || // See if the operation should be performed at a smaller bit width. - TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) { - const SDNodeFlags *Flags = Op.getNode()->getFlags(); - if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) { + ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. - SDNodeFlags NewFlags = *Flags; - NewFlags.setNoSignedWrap(false); - NewFlags.setNoUnsignedWrap(false); + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1), - &NewFlags); + Flags); return TLO.CombineTo(Op, NewOp); } return true; @@ -1299,13 +1266,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } default: // Just use computeKnownBits to compute output bits. - TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, Known, Depth); break; } // If we know the value of all of the demanded bits, return this as a // constant. - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { + if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNodeIterator I = SDNodeIterator::begin(N), @@ -1316,17 +1283,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } return TLO.CombineTo(Op, - TLO.DAG.getConstant(KnownOne, dl, Op.getValueType())); + TLO.DAG.getConstant(Known.One, dl, Op.getValueType())); } return false; } /// Determine which of the bits specified in Mask are known to be either zero or -/// one and return them in the KnownZero/KnownOne bitsets. +/// one and return them in the Known. void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { @@ -1336,7 +1302,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); + Known.resetAll(); } /// This method can be implemented by targets that want to expose additional @@ -1354,31 +1320,38 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must +// work with truncating build vectors and vectors with elements of less than +// 8 bits. bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; - const ConstantSDNode *CN = dyn_cast(N); - if (!CN) { - const BuildVectorSDNode *BV = dyn_cast(N); - if (!BV) - return false; - - // Only interested in constant splats, we don't care about undef - // elements in identifying boolean constants and getConstantSplatNode - // returns NULL if all ops are undef; - CN = BV->getConstantSplatNode(); + APInt CVal; + if (auto *CN = dyn_cast(N)) { + CVal = CN->getAPIntValue(); + } else if (auto *BV = dyn_cast(N)) { + auto *CN = BV->getConstantSplatNode(); if (!CN) return false; + + // If this is a truncating build vector, truncate the splat value. + // Otherwise, we may fail to match the expected values below. + unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits(); + CVal = CN->getAPIntValue(); + if (BVEltWidth < CVal.getBitWidth()) + CVal = CVal.trunc(BVEltWidth); + } else { + return false; } switch (getBooleanContents(N->getValueType(0))) { case UndefinedBooleanContent: - return CN->getAPIntValue()[0]; + return CVal[0]; case ZeroOrOneBooleanContent: - return CN->isOne(); + return CVal.isOneValue(); case ZeroOrNegativeOneBooleanContent: - return CN->isAllOnesValue(); + return CVal.isAllOnesValue(); } llvm_unreachable("Invalid boolean contents"); @@ -1520,8 +1493,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS and fold constant comparisons. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); if (isa(N0.getNode()) && (DCI.isBeforeLegalizeOps() || @@ -1534,7 +1506,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. - if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && N0.getOperand(1).getOpcode() == ISD::Constant) { const APInt &ShAmt @@ -1665,14 +1637,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0), TopSetCC.getOperand(1), InvCond); - } } } - // If the LHS is '(and load, const)', the RHS is 0, - // the test is for equality or unsigned, and all 1 bits of the const are - // in the same partial word, see if we can shorten the load. + // If the LHS is '(and load, const)', the RHS is 0, the test is for + // equality or unsigned, and all 1 bits of the const are in the same + // partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && @@ -1695,16 +1666,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, for (unsigned width = origWidth / 2; width>=8; width /= 2) { APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offsetisNullValue() || N1C->getAPIntValue() == 1) && + } else if ((N1C->isNullValue() || N1C->isOne()) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne()); if (TrueWhenTrue) return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); // Invert the condition. @@ -1834,7 +1807,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && isa(N0.getOperand(1)) && - cast(N0.getOperand(1))->getAPIntValue() == 1) { + cast(N0.getOperand(1))->isOne()) { // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We // can only do this if the top bits are known zero. unsigned BitWidth = N0.getValueSizeInBits(); @@ -1843,9 +1816,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, BitWidth-1))) { // Okay, get the un-inverted input value. SDValue Val; - if (N0.getOpcode() == ISD::XOR) + if (N0.getOpcode() == ISD::XOR) { Val = N0.getOperand(0); - else { + } else { assert(N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR); // ((X^1)&1)^1 -> X & 1 @@ -1857,7 +1830,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, Val, N1, Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } - } else if (N1C->getAPIntValue() == 1 && + } else if (N1C->isOne() && (VT == MVT::i1 || getBooleanContents(N0->getValueType(0)) == ZeroOrOneBooleanContent)) { @@ -1875,7 +1848,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (Op0.getOpcode() == ISD::AND && isa(Op0.getOperand(1)) && - cast(Op0.getOperand(1))->getAPIntValue() == 1) { + cast(Op0.getOperand(1))->isOne()) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -1910,7 +1883,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Canonicalize GE/LE comparisons to use GT/LT comparisons. if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { - if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true + // X >= MIN --> true + if (C1 == MinVal) + return DAG.getConstant(1, dl, VT); + // X >= C0 --> X > (C0 - 1) APInt C = C1 - 1; ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; @@ -1925,7 +1901,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { - if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true + // X <= MAX --> true + if (C1 == MaxVal) + return DAG.getConstant(1, dl, VT); + // X <= C0 --> X < (C0 + 1) APInt C = C1 + 1; ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; @@ -2054,7 +2033,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } else { ShiftBits = C1.countTrailingZeros(); } - NewC = NewC.lshr(ShiftBits); + NewC.lshrInPlace(ShiftBits); if (ShiftBits && NewC.getMinSignedBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue())) { auto &DL = DAG.getDataLayout(); @@ -2187,7 +2166,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); if (N0.getOperand(1) == N1.getOperand(1)) return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); - if (DAG.isCommutativeBinOp(N0.getOpcode())) { + if (isCommutativeBinOp(N0.getOpcode())) { // If X op Y == Y op X, try other combinations. if (N0.getOperand(0) == N1.getOperand(1)) return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), @@ -2251,7 +2230,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(1), DAG.getConstant(0, dl, N0.getValueType()), Cond); if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) + if (isCommutativeBinOp(N0.getOpcode())) return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(0, dl, N0.getValueType()), Cond); @@ -2278,7 +2257,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N1.getOperand(1), DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getOperand(1) == N0) { - if (DAG.isCommutativeBinOp(N1.getOpcode())) + if (isCommutativeBinOp(N1.getOpcode())) return DAG.getSetCC(dl, VT, N1.getOperand(0), DAG.getConstant(0, dl, N1.getValueType()), Cond); if (N1.getNode()->hasOneUse()) { @@ -2503,7 +2482,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // gcc prints these as sign extended. Sign extend value to 64 bits // now; without this it would get ZExt'd later in // ScheduleDAGSDNodes::EmitNode, which is very generic. - Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(C), MVT::i64)); } return; @@ -2531,7 +2510,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, for (const TargetRegisterClass *RC : RI->regclasses()) { // If none of the value types for this register class are valid, we // can't use it. For example, 64-bit reg classes on 32-bit targets. - if (!isLegalRC(RC)) + if (!isLegalRC(*RI, *RC)) continue; for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); @@ -2543,9 +2522,9 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, // If this register class has the requested value type, return it, // otherwise keep searching and return the first class found // if no other is found which explicitly has the requested type. - if (RC->hasType(VT)) + if (RI->isTypeLegalForClass(*RC, VT)) return S; - else if (!R.second) + if (!R.second) R = S; } } @@ -2969,9 +2948,9 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, Flags); Created.push_back(Op1.getNode()); - d = d.ashr(ShAmt); + d.ashrInPlace(ShAmt); } // Calculate the multiplicative inverse, using Newton's method. @@ -3013,7 +2992,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, return SDValue(); // If the sdiv has an 'exact' bit we can use a simpler lowering. - if (cast(N)->Flags.hasExact()) + if (N->getFlags().hasExact()) return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created); APInt::ms magics = Divisor.magic(); @@ -3352,7 +3331,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); SDValue Bias = DAG.getConstant(127, dl, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl, + SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl, IntVT); SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp index ff7d205c1f4ce274f89e573d441196a11c54db10..7b60d22c7ace6db3fd3343007cd75823b535a532 100644 --- a/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/lib/CodeGen/ShadowStackGCLowering.cpp @@ -16,9 +16,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -27,7 +27,7 @@ using namespace llvm; -#define DEBUG_TYPE "shadowstackgclowering" +#define DEBUG_TYPE "shadow-stack-gc-lowering" namespace { @@ -66,10 +66,10 @@ private: }; } -INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering", +INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) -INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering", +INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); } diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index 4837495777da22664144e892e60d8e9f6df5c76b..aa75f5e2caa23e41130f7bb6c6feb558679582cd 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -210,13 +210,12 @@ public: char ShrinkWrap::ID = 0; char &llvm::ShrinkWrapID = ShrinkWrap::ID; -INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, - false) +INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) +INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { @@ -282,8 +281,14 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, if (!Restore) Restore = &MBB; - else + else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it + // means the block never returns. If that's the + // case, we don't want to call + // `findNearestCommonDominator`, which will + // return `Restore`. Restore = MPDT->findNearestCommonDominator(Restore, &MBB); + else + Restore = nullptr; // Abort, we can't find a restore point in this case. // Make sure we would be able to insert the restore code before the // terminator. @@ -293,7 +298,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, continue; // One of the terminator needs to happen before the restore point. if (MBB.succ_empty()) { - Restore = nullptr; + Restore = nullptr; // Abort, we can't find a restore point in this case. break; } // Look for a restore point that post-dominates all the successors. @@ -419,7 +424,7 @@ static bool isIrreducibleCFG(const MachineFunction &MF, } bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (MF.empty() || !isShrinkWrapEnabled(MF)) + if (skipFunction(*MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) return false; DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index ab578df4069d5462c8331f9a527fad8748ed0454..7886737b879c22bc03a0c538bc1fb7020d3ad45e 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -74,7 +74,7 @@ private: } // end anonymous namespace char SjLjEHPrepare::ID = 0; -INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions", +INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions", false, false) // Public Interface To the SjLjEHPrepare pass. @@ -93,8 +93,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) { doubleUnderDataTy, // __data VoidPtrTy, // __personality VoidPtrTy, // __lsda - doubleUnderJBufTy, // __jbuf - nullptr); + doubleUnderJBufTy // __jbuf + ); return true; } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index bc2a1d09056bd5d60427ce6604c3ae02ec0be1d9..3656832a7f1a83940f5203bfac0cd2ab7669a85c 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -19,7 +19,7 @@ using namespace llvm; #define DEBUG_TYPE "slotindexes" char SlotIndexes::ID = 0; -INITIALIZE_PASS(SlotIndexes, "slotindexes", +INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE, "Slot index numbering", false, false) STATISTIC(NumLocalRenum, "Number of local renumberings"); diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index f10c98ef4e5082dd856327bab323f180f9426844..0abe1c47da55a2dd5dc142d6245287374b424c3a 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -40,14 +40,14 @@ using namespace llvm; -#define DEBUG_TYPE "spillplacement" +#define DEBUG_TYPE "spill-code-placement" char SpillPlacement::ID = 0; -INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", +INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(EdgeBundles) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement", +INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) char &llvm::SpillPlacementID = SpillPlacement::ID; @@ -310,7 +310,7 @@ void SpillPlacement::addLinks(ArrayRef Links) { bool SpillPlacement::scanActiveBundles() { RecentPositive.clear(); - for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + for (unsigned n : ActiveNodes->set_bits()) { update(n); // A node that must spill, or a node without any links is not going to // change its value ever again, so exclude it from iterations. @@ -365,7 +365,7 @@ SpillPlacement::finish() { // Write preferences back to ActiveNodes. bool Perfect = true; - for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) + for (unsigned n : ActiveNodes->set_bits()) if (!nodes[n].preferReg()) { ActiveNodes->reset(n); Perfect = false; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 3a50aaa69985d3509e27ef7d848521d0b32bb647..008b984dd9616bff2e36e23c30a82b82d98ac297 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -569,8 +569,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, // Greedy heuristic: Keep iterating keeping the best covering subreg index // each time. - LaneBitmask LanesLeft = - LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover)); + LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx)); while (LanesLeft.any()) { unsigned BestIdx = 0; int BestCover = INT_MIN; diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index f51d959a089aa382b5a939fc1cbd63ec23da045b..6bac39c7ee77cf9514b955c50a164682fd86071f 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -53,7 +53,7 @@ using namespace llvm; -#define DEBUG_TYPE "stackcoloring" +#define DEBUG_TYPE "stack-coloring" static cl::opt DisableColoring("no-stack-coloring", @@ -86,10 +86,134 @@ STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// +// +// Stack Coloring reduces stack usage by merging stack slots when they +// can't be used together. For example, consider the following C program: +// +// void bar(char *, int); +// void foo(bool var) { +// A: { +// char z[4096]; +// bar(z, 0); +// } +// +// char *p; +// char x[4096]; +// char y[4096]; +// if (var) { +// p = x; +// } else { +// bar(y, 1); +// p = y + 1024; +// } +// B: +// bar(p, 2); +// } +// +// Naively-compiled, this program would use 12k of stack space. However, the +// stack slot corresponding to `z` is always destroyed before either of the +// stack slots for `x` or `y` are used, and then `x` is only used if `var` +// is true, while `y` is only used if `var` is false. So in no time are 2 +// of the stack slots used together, and therefore we can merge them, +// compiling the function using only a single 4k alloca: +// +// void foo(bool var) { // equivalent +// char x[4096]; +// char *p; +// bar(x, 0); +// if (var) { +// p = x; +// } else { +// bar(x, 1); +// p = x + 1024; +// } +// bar(p, 2); +// } +// +// This is an important optimization if we want stack space to be under +// control in large functions, both open-coded ones and ones created by +// inlining. // // Implementation Notes: // --------------------- // +// An important part of the above reasoning is that `z` can't be accessed +// while the latter 2 calls to `bar` are running. This is justified because +// `z`'s lifetime is over after we exit from block `A:`, so any further +// accesses to it would be UB. The way we represent this information +// in LLVM is by having frontends delimit blocks with `lifetime.start` +// and `lifetime.end` intrinsics. +// +// The effect of these intrinsics seems to be as follows (maybe I should +// specify this in the reference?): +// +// L1) at start, each stack-slot is marked as *out-of-scope*, unless no +// lifetime intrinsic refers to that stack slot, in which case +// it is marked as *in-scope*. +// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and +// the stack slot is overwritten with `undef`. +// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*. +// L4) on function exit, all stack slots are marked as *out-of-scope*. +// L5) `lifetime.end` is a no-op when called on a slot that is already +// *out-of-scope*. +// L6) memory accesses to *out-of-scope* stack slots are UB. +// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it +// are invalidated, unless the slot is "degenerate". This is used to +// justify not marking slots as in-use until the pointer to them is +// used, but feels a bit hacky in the presence of things like LICM. See +// the "Degenerate Slots" section for more details. +// +// Now, let's ground stack coloring on these rules. We'll define a slot +// as *in-use* at a (dynamic) point in execution if it either can be +// written to at that point, or if it has a live and non-undef content +// at that point. +// +// Obviously, slots that are never *in-use* together can be merged, and +// in our example `foo`, the slots for `x`, `y` and `z` are never +// in-use together (of course, sometimes slots that *are* in-use together +// might still be mergable, but we don't care about that here). +// +// In this implementation, we successively merge pairs of slots that are +// not *in-use* together. We could be smarter - for example, we could merge +// a single large slot with 2 small slots, or we could construct the +// interference graph and run a "smart" graph coloring algorithm, but with +// that aside, how do we find out whether a pair of slots might be *in-use* +// together? +// +// From our rules, we see that *out-of-scope* slots are never *in-use*, +// and from (L7) we see that "non-degenerate" slots remain non-*in-use* +// until their address is taken. Therefore, we can approximate slot activity +// using dataflow. +// +// A subtle point: naively, we might try to figure out which pairs of +// stack-slots interfere by propagating `S in-use` through the CFG for every +// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in +// which they are both *in-use*. +// +// That is sound, but overly conservative in some cases: in our (artificial) +// example `foo`, either `x` or `y` might be in use at the label `B:`, but +// as `x` is only in use if we came in from the `var` edge and `y` only +// if we came from the `!var` edge, they still can't be in use together. +// See PR32488 for an important real-life case. +// +// If we wanted to find all points of interference precisely, we could +// propagate `S in-use` and `S&T in-use` predicates through the CFG. That +// would be precise, but requires propagating `O(n^2)` dataflow facts. +// +// However, we aren't interested in the *set* of points of interference +// between 2 stack slots, only *whether* there *is* such a point. So we +// can rely on a little trick: for `S` and `T` to be in-use together, +// one of them needs to become in-use while the other is in-use (or +// they might both become in use simultaneously). We can check this +// by also keeping track of the points at which a stack slot might *start* +// being in-use. +// +// Exact first use: +// ---------------- +// // Consider the following motivating example: // // int foo() { @@ -158,6 +282,9 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 // byte stack (better). // +// Degenerate Slots: +// ----------------- +// // Relying entirely on first-use of stack slots is problematic, // however, due to the fact that optimizations can sometimes migrate // uses of a variable outside of its lifetime start/end region. Here @@ -237,10 +364,6 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // for "b" then it will appear that 'b' has a degenerate lifetime. // -//===----------------------------------------------------------------------===// -// StackColoring Pass -//===----------------------------------------------------------------------===// - namespace { /// StackColoring - A machine pass for merging disjoint stack allocations, /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. @@ -271,8 +394,11 @@ class StackColoring : public MachineFunctionPass { /// Maps basic blocks to a serial number. SmallVector BasicBlockNumbering; - /// Maps liveness intervals for each slot. + /// Maps slots to their use interval. Outside of this interval, slots + /// values are either dead or `undef` and they will not be written to. SmallVector, 16> Intervals; + /// Maps slots to the points where they can become in-use. + SmallVector, 16> LiveStarts; /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; /// SlotIndex analysis object. @@ -371,12 +497,12 @@ private: char StackColoring::ID = 0; char &llvm::StackColoringID = StackColoring::ID; -INITIALIZE_PASS_BEGIN(StackColoring, - "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, + "Merge disjoint stack slots", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(StackColoring, - "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE, + "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -672,15 +798,22 @@ void StackColoring::calculateLocalLiveness() void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SmallVector Starts; - SmallVector Finishes; + SmallVector DefinitelyInUse; // For each block, find which slots are active within this block // and update the live intervals. for (const MachineBasicBlock &MBB : *MF) { Starts.clear(); Starts.resize(NumSlots); - Finishes.clear(); - Finishes.resize(NumSlots); + DefinitelyInUse.clear(); + DefinitelyInUse.resize(NumSlots); + + // Start the interval of the slots that we previously found to be 'in-use'. + BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(&MBB); + } // Create the interval for the basic blocks containing lifetime begin/end. for (const MachineInstr &MI : MBB) { @@ -692,68 +825,35 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); for (auto Slot : slots) { if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + // If a slot is already definitely in use, we don't have to emit + // a new start marker because there is already a pre-existing + // one. + if (!DefinitelyInUse[Slot]) { + LiveStarts[Slot].push_back(ThisIndex); + DefinitelyInUse[Slot] = true; + } + if (!Starts[Slot].isValid()) Starts[Slot] = ThisIndex; } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + if (Starts[Slot].isValid()) { + VNInfo *VNI = Intervals[Slot]->getValNumInfo(0); + Intervals[Slot]->addSegment( + LiveInterval::Segment(Starts[Slot], ThisIndex, VNI)); + Starts[Slot] = SlotIndex(); // Invalidate the start index + DefinitelyInUse[Slot] = false; + } } } } - // Create the interval of the blocks that we previously found to be 'alive'. - BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; - for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; - pos = MBBLiveness.LiveIn.find_next(pos)) { - Starts[pos] = Indexes->getMBBStartIdx(&MBB); - } - for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; - pos = MBBLiveness.LiveOut.find_next(pos)) { - Finishes[pos] = Indexes->getMBBEndIdx(&MBB); - } - + // Finish up started segments for (unsigned i = 0; i < NumSlots; ++i) { - // - // When LifetimeStartOnFirstUse is turned on, data flow analysis - // is forward (from starts to ends), not bidirectional. A - // consequence of this is that we can wind up in situations - // where Starts[i] is invalid but Finishes[i] is valid and vice - // versa. Example: - // - // LIFETIME_START x - // if (...) { - // - // throw ...; - // } - // LIFETIME_END x - // return 2; - // - // - // Here the slot for "x" will not be live into the block - // containing the "return 2" (since lifetimes start with first - // use, not at the dominating LIFETIME_START marker). - // - if (Starts[i].isValid() && !Finishes[i].isValid()) { - Finishes[i] = Indexes->getMBBEndIdx(&MBB); - } if (!Starts[i].isValid()) continue; - assert(Starts[i] && Finishes[i] && "Invalid interval"); - VNInfo *ValNum = Intervals[i]->getValNumInfo(0); - SlotIndex S = Starts[i]; - SlotIndex F = Finishes[i]; - if (S < F) { - // We have a single consecutive region. - Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); - } else { - // We have two non-consecutive regions. This happens when - // LIFETIME_START appears after the LIFETIME_END marker. - SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); - SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); - Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); - Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); - } + SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB); + VNInfo *VNI = Intervals[i]->getValNumInfo(0); + Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI)); } } } @@ -983,6 +1083,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { BasicBlockNumbering.clear(); Markers.clear(); Intervals.clear(); + LiveStarts.clear(); VNInfoAllocator.Reset(); unsigned NumSlots = MFI->getObjectIndexEnd(); @@ -994,6 +1095,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { SmallVector SortedSlots; SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); + LiveStarts.resize(NumSlots); unsigned NumMarkers = collectMarkers(NumSlots); @@ -1065,6 +1167,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); }); + for (auto &s : LiveStarts) + std::sort(s.begin(), s.end()); + bool Changed = true; while (Changed) { Changed = false; @@ -1080,12 +1185,22 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int SecondSlot = SortedSlots[J]; LiveInterval *First = &*Intervals[FirstSlot]; LiveInterval *Second = &*Intervals[SecondSlot]; + auto &FirstS = LiveStarts[FirstSlot]; + auto &SecondS = LiveStarts[SecondSlot]; assert (!First->empty() && !Second->empty() && "Found an empty range"); - // Merge disjoint slots. - if (!First->overlaps(*Second)) { + // Merge disjoint slots. This is a little bit tricky - see the + // Implementation Notes section for an explanation. + if (!First->isLiveAtIndexes(SecondS) && + !Second->isLiveAtIndexes(FirstS)) { Changed = true; First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); + + int OldSize = FirstS.size(); + FirstS.append(SecondS.begin(), SecondS.end()); + auto Mid = FirstS.begin() + OldSize; + std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); + SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"< StackMapVersion( - "stackmap-version", cl::init(2), - cl::desc("Specify the stackmap encoding version (default = 2)")); + "stackmap-version", cl::init(3), + cl::desc("Specify the stackmap encoding version (default = 3)")); const char *StackMaps::WSMP = "Stack Maps: "; @@ -85,7 +85,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { } StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { - if (StackMapVersion != 2) + if (StackMapVersion != 3) llvm_unreachable("Unsupported stackmap version!"); } @@ -161,7 +161,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, if (SubRegIdx) Offset = TRI->getSubRegIdxOffset(SubRegIdx); - Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset); + Locs.emplace_back(Location::Register, TRI->getSpillSize(*RC), + DwarfRegNum, Offset); return ++MOI; } @@ -220,8 +221,9 @@ void StackMaps::print(raw_ostream &OS) { OS << "Constant Index " << Loc.Offset; break; } - OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size - << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; + OS << "\t[encoding: .byte " << Loc.Type << ", .byte 0" + << ", .short " << Loc.Size << ", .short " << Loc.Reg << ", .short 0" + << ", .int " << Loc.Offset << "]\n"; Idx++; } @@ -245,7 +247,7 @@ void StackMaps::print(raw_ostream &OS) { StackMaps::LiveOutReg StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI); - unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + unsigned Size = TRI->getSpillSize(*TRI->getMinimalPhysRegClass(Reg)); return LiveOutReg(Reg, DwarfRegNum, Size); } @@ -520,11 +522,16 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) { for (const auto &Loc : CSLocs) { OS.EmitIntValue(Loc.Type, 1); - OS.EmitIntValue(Loc.Size, 1); + OS.EmitIntValue(0, 1); // Reserved + OS.EmitIntValue(Loc.Size, 2); OS.EmitIntValue(Loc.Reg, 2); + OS.EmitIntValue(0, 2); // Reserved OS.EmitIntValue(Loc.Offset, 4); } + // Emit alignment to 8 byte. + OS.EmitValueToAlignment(8); + // Num live-out registers and padding to align to 4 byte. OS.EmitIntValue(0, 2); OS.EmitIntValue(LiveOuts.size(), 2); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index a8aafe78748dc7d10a66b748767873e67b3bc361..d8e7840a257634116a8444bc694524d044649280 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -28,6 +29,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -58,12 +60,14 @@ static cl::opt EnableSelectionDAGSP("enable-selectiondag-sp", cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors", - false, true) -FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { - return new StackProtector(TM); -} +INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE, + "Insert stack protectors", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE, + "Insert stack protectors", false, true) + +FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); } StackProtector::SSPLayoutKind StackProtector::getSSPLayout(const AllocaInst *AI) const { @@ -91,12 +95,19 @@ void StackProtector::adjustForColoring(const AllocaInst *From, } } +void StackProtector::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); +} + bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; + TM = &getAnalysis().getTM(); + Trip = TM->getTargetTriple(); TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); HasPrologue = false; HasIRCheck = false; diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 234b2043a6a140568fa2bde7b3195152f315eec9..856bca19dee856a8912a6da9613f1c2b56a4a6c6 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" @@ -32,7 +32,7 @@ #include using namespace llvm; -#define DEBUG_TYPE "stackslotcoloring" +#define DEBUG_TYPE "stack-slot-coloring" static cl::opt DisableSharing("no-stack-slot-sharing", @@ -116,12 +116,12 @@ namespace { char StackSlotColoring::ID = 0; char &llvm::StackSlotColoringID = StackSlotColoring::ID; -INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", +INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", +INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) namespace { diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index e2377d89497de9a78ad8497d66f78c0cbf34fefe..489a607eb1764918a79bdbb33f6fa3dbce08aba2 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -1,4 +1,4 @@ -//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// +//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TailDuplicator.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" + using namespace llvm; #define DEBUG_TYPE "tailduplication" namespace { + /// Perform tail duplication. Delegates to TailDuplicator class TailDuplicatePass : public MachineFunctionPass { TailDuplicator Duplicator; public: static char ID; + explicit TailDuplicatePass() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -35,13 +38,13 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; +} // end anonymous namespace + char TailDuplicatePass::ID = 0; -} char &llvm::TailDuplicateID = TailDuplicatePass::ID; -INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false, - false) +INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp index d2414200e9d579680d5560ba63d4c1b39ac66632..dc7265dcf6c244403670dfa01e91449599dca50c 100644 --- a/lib/CodeGen/TailDuplicator.cpp +++ b/lib/CodeGen/TailDuplicator.cpp @@ -1,4 +1,4 @@ -//===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===// +//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,36 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "tailduplication" @@ -41,15 +55,13 @@ STATISTIC(NumTailDupRemoved, STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumAddedPHIs, "Number of phis added"); -namespace llvm { - // Heuristic for tail duplication. static cl::opt TailDuplicateSize( "tail-dup-size", cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), cl::Hidden); -cl::opt TailDupIndirectBranchSize( +static cl::opt TailDupIndirectBranchSize( "tail-dup-indirect-size", cl::desc("Maximum instructions to consider tail duplicating blocks that " "end with indirect branches."), cl::init(20), @@ -138,7 +150,7 @@ bool TailDuplicator::tailDuplicateAndUpdate( bool IsSimple, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds, - llvm::function_ref *RemovalCallback) { + function_ref *RemovalCallback) { // Save the successors list. SmallSetVector Succs(MBB->succ_begin(), MBB->succ_end()); @@ -749,7 +761,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, if (PredBB->succ_size() > 1) return false; - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; @@ -832,7 +844,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, appendCopies(PredBB, CopyInfos, Copies); // Simplify - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); @@ -971,7 +983,7 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB, /// the CFG. void TailDuplicator::removeDeadBlock( MachineBasicBlock *MBB, - llvm::function_ref *RemovalCallback) { + function_ref *RemovalCallback) { assert(MBB->pred_empty() && "MBB must be dead!"); DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); @@ -985,5 +997,3 @@ void TailDuplicator::removeDeadBlock( // Remove the block. MBB->eraseFromParent(); } - -} // End llvm namespace diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index e5def6752e0715aa0768fde9859719c14f701a6d..9dd98b4020d251ef0eaecf6502d0c220bd03a3a3 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -1,4 +1,4 @@ -//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// +//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==// // // The LLVM Compiler Infrastructure // @@ -14,19 +14,21 @@ #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include + using namespace llvm; -TargetFrameLowering::~TargetFrameLowering() { -} +TargetFrameLowering::~TargetFrameLowering() = default; /// The default implementation just looks at attribute "no-frame-pointer-elim". bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index ef766db756eecd57c6bdcdb78802fea92aa1b4b6..14c5adc0d898bd0f33e51f6d5d6ee5409ed8be7b 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -345,12 +345,12 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, unsigned SubIdx, unsigned &Size, unsigned &Offset, const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!SubIdx) { - Size = RC->getSize(); + Size = TRI->getSpillSize(*RC); Offset = 0; return true; } - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); // Convert bit size to byte size to be consistent with // MCRegisterClass::getSize(). @@ -364,10 +364,10 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, Size = BitSize /= 8; Offset = (unsigned)BitOffset / 8; - assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range"); if (!MF.getDataLayout().isLittleEndian()) { - Offset = RC->getSize() - (Offset + Size); + Offset = TRI->getSpillSize(*RC) - (Offset + Size); } return true; } @@ -428,8 +428,8 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, return nullptr; } -void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { - llvm_unreachable("Not a MachO target"); +void TargetInstrInfo::getNoop(MCInst &NopInst) const { + llvm_unreachable("Not implemented"); } static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, @@ -941,12 +941,10 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const { unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); - if (MI.getOpcode() != FrameSetupOpcode && - MI.getOpcode() != FrameDestroyOpcode) + if (!isFrameInstr(MI)) return 0; - int SPAdj = MI.getOperand(0).getImm(); - SPAdj = TFI->alignSPAdjust(SPAdj); + int SPAdj = TFI->alignSPAdjust(getFrameSize(MI)); if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) || (StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode)) diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 27630a3055cb386235086a005783580fe60f449a..e9d38c10c86014b63cf3a8a76f19ec785ca80910 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -21,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -53,6 +54,18 @@ static cl::opt MaximumJumpTableSize ("max-jump-table-size", cl::init(0), cl::Hidden, cl::desc("Set maximum size of jump tables; zero for no limit.")); +/// Minimum jump table density for normal functions. +static cl::opt + JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "a normal function")); + +/// Minimum jump table density for -Os or -Oz functions. +static cl::opt OptsizeJumpTableDensity( + "optsize-jump-table-density", cl::init(40), cl::Hidden, + cl::desc("Minimum density for building a jump table in " + "an optsize function")); + // Although this default value is arbitrary, it is not random. It is assumed // that a condition that evaluates the same way by a higher percentage than this // is best represented as control flow. Therefore, the default value N should be @@ -361,11 +374,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memcpy_element_unordered_atomic_1"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memcpy_element_unordered_atomic_2"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memcpy_element_unordered_atomic_4"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memcpy_element_unordered_atomic_8"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memcpy_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -768,22 +786,21 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { return UNKNOWN_LIBCALL; } -RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) { +RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { switch (ElementSize) { case 1: - return MEMCPY_ELEMENT_ATOMIC_1; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; case 2: - return MEMCPY_ELEMENT_ATOMIC_2; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; case 4: - return MEMCPY_ELEMENT_ATOMIC_4; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; case 8: - return MEMCPY_ELEMENT_ATOMIC_8; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; case 16: - return MEMCPY_ELEMENT_ATOMIC_16; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; default: return UNKNOWN_LIBCALL; } - } /// InitCmpLibcallCCs - Set default comparison libcall CC. @@ -829,9 +846,10 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { initActions(); // Perform these initializations only once. - MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; - MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize - = MaxStoresPerMemmoveOptSize = 4; + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = + MaxLoadsPerMemcmp = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = + MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; HasMultipleConditionRegisters = false; @@ -910,6 +928,11 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::SMULO, VT, Expand); setOperationAction(ISD::UMULO, VT, Expand); + // ADDCARRY operations default to expand + setOperationAction(ISD::ADDCARRY, VT, Expand); + setOperationAction(ISD::SUBCARRY, VT, Expand); + setOperationAction(ISD::SETCCCARRY, VT, Expand); + // These default to Expand so they will be expanded to CTLZ/CTTZ by default. setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); @@ -918,6 +941,7 @@ void TargetLoweringBase::initActions() { // These library functions default to expand. setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); // These operations default to expand for vector types. if (VT.isVector()) { @@ -1184,12 +1208,11 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, /// isLegalRC - Return true if the value types that can be represented by the /// specified register class are all legal. -bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { +bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, + const TargetRegisterClass &RC) const { + for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) if (isTypeLegal(*I)) return true; - } return false; } @@ -1296,12 +1319,12 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, // Find the first legal register class with the largest spill size. const TargetRegisterClass *BestRC = RC; - for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + for (unsigned i : SuperRegRC.set_bits()) { const TargetRegisterClass *SuperRC = TRI->getRegClass(i); // We want the largest possible spill size. - if (SuperRC->getSize() <= BestRC->getSize()) + if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC)) continue; - if (!isLegalRC(SuperRC)) + if (!isLegalRC(*TRI, *SuperRC)) continue; BestRC = SuperRC; } @@ -1437,6 +1460,7 @@ void TargetLoweringBase::computeRegisterProperties( } if (IsLegalWiderType) break; + LLVM_FALLTHROUGH; } case TypeWidenVector: { // Try to widen the vector. @@ -1454,6 +1478,7 @@ void TargetLoweringBase::computeRegisterProperties( } if (IsLegalWiderType) break; + LLVM_FALLTHROUGH; } case TypeSplitVector: case TypeScalarizeVector: { @@ -1616,8 +1641,10 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr, VT = MinVT; } - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned NumParts = + TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT); + MVT PartVT = + TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT); // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); @@ -1902,6 +1929,10 @@ void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { MinimumJumpTableEntries = Val; } +unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { + return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; +} + unsigned TargetLoweringBase::getMaximumJumpTableSize() const { return MaximumJumpTableSize; } @@ -2093,3 +2124,7 @@ int TargetLoweringBase::getDivRefinementSteps(EVT VT, MachineFunction &MF) const { return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); } + +void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { + MF.getRegInfo().freezeReservedRegs(MF); +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 78afeda67dbf2dcd4e7e39b7b19254c26fa48373..6922e33c8d6cba2d3021426c4998f7fa8b9efb3c 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -12,14 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -48,11 +52,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include @@ -61,10 +61,54 @@ using namespace llvm; using namespace dwarf; +static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, + StringRef &Section) { + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + + for (const auto &MFE: ModuleFlags) { + // Ignore flags with 'Require' behaviour. + if (MFE.Behavior == Module::Require) + continue; + + StringRef Key = MFE.Key->getString(); + if (Key == "Objective-C Image Info Version") { + Version = mdconst::extract(MFE.Val)->getZExtValue(); + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated" || + Key == "Objective-C Class Properties" || + Key == "Objective-C Image Swift Version") { + Flags |= mdconst::extract(MFE.Val)->getZExtValue(); + } else if (Key == "Objective-C Image Info Section") { + Section = cast(MFE.Val)->getString(); + } + } +} + //===----------------------------------------------------------------------===// // ELF //===----------------------------------------------------------------------===// +void TargetLoweringObjectFileELF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { + unsigned Version = 0; + unsigned Flags = 0; + StringRef Section; + + GetObjCImageInfo(M, Version, Flags, Section); + if (Section.empty()) + return; + + auto &C = getContext(); + auto *S = C.getELFSection(Section, ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); +} + MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( const GlobalValue *GV, const TargetMachine &TM, MachineModuleInfo *MMI) const { @@ -132,8 +176,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { // section(".eh_frame") gcc will produce: // // .section .eh_frame,"a",@progbits - - if (Name == getInstrProfCoverageSectionName(false)) + + if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF, + /*AddSegmentInfo=*/false)) return SectionKind::getMetadata(); if (Name.empty() || Name[0] != '.') return K; @@ -231,7 +276,11 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO, if (!MD) return nullptr; - auto *VM = dyn_cast(MD->getOperand(0)); + const MDOperand &Op = MD->getOperand(0); + if (!Op.get()) + return nullptr; + + auto *VM = dyn_cast(Op); if (!VM) report_fatal_error("MD_associated operand is not ValueAsMetadata"); @@ -243,6 +292,25 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef SectionName = GO->getSection(); + // Check if '#pragma clang section' name is applicable. + // Note that pragma directive overrides -ffunction-section, -fdata-section + // and so section name is exactly as user specified and not uniqued. + const GlobalVariable *GV = dyn_cast(GO); + if (GV && GV->hasImplicitSection()) { + auto Attrs = GV->getAttributes(); + if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) { + SectionName = Attrs.getAttribute("bss-section").getValueAsString(); + } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) { + SectionName = Attrs.getAttribute("rodata-section").getValueAsString(); + } else if (Attrs.hasAttribute("data-section") && Kind.isData()) { + SectionName = Attrs.getAttribute("data-section").getValueAsString(); + } + } + const Function *F = dyn_cast(GO); + if (F && F->hasFnAttribute("implicit-section-name")) { + SectionName = F->getFnAttribute("implicit-section-name").getValueAsString(); + } + // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); @@ -551,40 +619,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } } -/// emitModuleFlags - Perform code emission for module flags. -void TargetLoweringObjectFileMachO::emitModuleFlags( - MCStreamer &Streamer, ArrayRef ModuleFlags, - const TargetMachine &TM) const { - unsigned VersionVal = 0; - unsigned ImageInfoFlags = 0; - MDNode *LinkerOptions = nullptr; - StringRef SectionVal; - - for (const auto &MFE : ModuleFlags) { - // Ignore flags with 'Require' behavior. - if (MFE.Behavior == Module::Require) - continue; - - StringRef Key = MFE.Key->getString(); - Metadata *Val = MFE.Val; - - if (Key == "Objective-C Image Info Version") { - VersionVal = mdconst::extract(Val)->getZExtValue(); - } else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated" || - Key == "Objective-C Class Properties" || - Key == "Objective-C Image Swift Version") { - ImageInfoFlags |= mdconst::extract(Val)->getZExtValue(); - } else if (Key == "Objective-C Image Info Section") { - SectionVal = cast(Val)->getString(); - } else if (Key == "Linker Options") { - LinkerOptions = cast(Val); - } - } - +void TargetLoweringObjectFileMachO::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { // Emit the linker options if present. - if (LinkerOptions) { + if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { for (const auto &Option : LinkerOptions->operands()) { SmallVector StrOptions; for (const auto &Piece : cast(Option)->operands()) @@ -593,8 +631,15 @@ void TargetLoweringObjectFileMachO::emitModuleFlags( } } + unsigned VersionVal = 0; + unsigned ImageInfoFlags = 0; + StringRef SectionVal; + + GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal); + // The section is mandatory. If we don't have it, then we don't have GC info. - if (SectionVal.empty()) return; + if (SectionVal.empty()) + return; StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; @@ -1106,18 +1151,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } -void TargetLoweringObjectFileCOFF::emitModuleFlags( - MCStreamer &Streamer, ArrayRef ModuleFlags, - const TargetMachine &TM) const { - MDNode *LinkerOptions = nullptr; - - for (const auto &MFE : ModuleFlags) { - StringRef Key = MFE.Key->getString(); - if (Key == "Linker Options") - LinkerOptions = cast(MFE.Val); - } - - if (LinkerOptions) { +void TargetLoweringObjectFileCOFF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { + if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for // linker. @@ -1132,6 +1168,24 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags( } } } + + unsigned Version = 0; + unsigned Flags = 0; + StringRef Section; + + GetObjCImageInfo(M, Version, Flags, Section); + if (Section.empty()) + return; + + auto &C = getContext(); + auto *S = C.getCOFFSection( + Section, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + Streamer.SwitchSection(S); + Streamer.EmitLabel(C.getOrCreateSymbol(StringRef("OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(Version, 4); + Streamer.EmitIntValue(Flags, 4); + Streamer.AddBlankLine(); } void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp index c20d5ab814f82f94d64a3279d4c7c37953695461..ed845e1706f8cae3113253c2d293599aed29544a 100644 --- a/lib/CodeGen/TargetOptionsImpl.cpp +++ b/lib/CodeGen/TargetOptionsImpl.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 150195f5f85bcff9b5b99fed84cdb8f9e847c61b..b1918b19e1dfe98792e61709cc2b638fc765e1a2 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/RegAllocRegistry.h" @@ -95,6 +96,16 @@ static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, static cl::opt EnableMachineOutliner("enable-machine-outliner", cl::Hidden, cl::desc("Enable machine outliner")); +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -O0. +static cl::opt +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the \"fast\" instruction selector")); + +static cl::opt + EnableGlobalISel("global-isel", cl::Hidden, + cl::desc("Enable the \"global\" instruction selector")); static cl::opt PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, @@ -261,9 +272,9 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. -TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) +TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm) : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false), - AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), + AddingMachinePasses(false), TM(&TM), Impl(nullptr), Initialized(false), DisableVerify(false), EnableTailMerge(true), RequireCodeGenSCCOrder(false) { @@ -282,9 +293,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&PostRAMachineLICMID, &MachineLICMID); if (StringRef(PrintMachineInstrs.getValue()).equals("")) - TM->Options.PrintMachineCode = true; + TM.Options.PrintMachineCode = true; - if (TM->Options.EnableIPRA) + if (TM.Options.EnableIPRA) setRequiresCodeGenSCCOrder(); } @@ -310,12 +321,14 @@ void TargetPassConfig::insertPass(AnalysisID TargetPassID, /// /// Targets may override this to extend TargetPassConfig. TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { - return new TargetPassConfig(this, PM); + return new TargetPassConfig(*this, PM); } TargetPassConfig::TargetPassConfig() : ImmutablePass(ID), PM(nullptr) { - llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); + report_fatal_error("Trying to construct TargetPassConfig without a target " + "machine. Scheduling a CodeGen pass without a target " + "triple set?"); } // Helper to verify the analysis is really immutable. @@ -428,7 +441,12 @@ void TargetPassConfig::addPrintPass(const std::string &Banner) { } void TargetPassConfig::addVerifyPass(const std::string &Banner) { - if (VerifyMachineCode) + bool Verify = VerifyMachineCode; +#ifdef EXPENSIVE_CHECKS + if (VerifyMachineCode == cl::BOU_UNSET) + Verify = TM->isMachineVerifierClean(); +#endif + if (Verify) PM->add(createMachineVerifierPass(Banner)); } @@ -487,6 +505,14 @@ void TargetPassConfig::addIRPasses() { // Insert calls to mcount-like functions. addPass(createCountingFunctionInserterPass()); + + // Add scalarization of target's unsupported masked memory intrinsics pass. + // the unsupported intrinsic will be replaced with a chain of basic blocks, + // that stores/loads element one-by-one if the appropriate mask bit is set. + addPass(createScalarizeMaskedMemIntrinPass()); + + // Expand reduction intrinsics into shuffle sequences if the target wants to. + addPass(createExpandReductionsPass()); } /// Turn exception handling constructs into something the code generators can @@ -506,14 +532,14 @@ void TargetPassConfig::addPassesToHandleExceptions() { LLVM_FALLTHROUGH; case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - addPass(createDwarfEHPass(TM)); + addPass(createDwarfEHPass()); break; case ExceptionHandling::WinEH: // We support using both GCC-style and MSVC-style exceptions on Windows, so // add both preparation passes. Each pass will only actually run if it // recognizes the personality function. - addPass(createWinEHPass(TM)); - addPass(createDwarfEHPass(TM)); + addPass(createWinEHPass()); + addPass(createDwarfEHPass()); break; case ExceptionHandling::None: addPass(createLowerInvokePass()); @@ -528,7 +554,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - addPass(createCodeGenPreparePass(TM)); + addPass(createCodeGenPreparePass()); addPass(createRewriteSymbolsPass()); } @@ -543,8 +569,8 @@ void TargetPassConfig::addISelPrepare() { // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. - addPass(createSafeStackPass(TM)); - addPass(createStackProtectorPass(TM)); + addPass(createSafeStackPass()); + addPass(createStackProtectorPass()); if (PrintISelInput) addPass(createPrintFunctionPass( @@ -556,6 +582,74 @@ void TargetPassConfig::addISelPrepare() { addPass(createVerifierPass()); } +bool TargetPassConfig::addCoreISelPasses() { + // Enable FastISel with -fast, but allow that to be overridden. + TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); + if (EnableFastISelOption == cl::BOU_TRUE || + (TM->getOptLevel() == CodeGenOpt::None && TM->getO0WantsFastISel())) + TM->setFastISel(true); + + // Ask the target for an isel. + // Enable GlobalISel if the target wants to, but allow that to be overriden. + if (EnableGlobalISel == cl::BOU_TRUE || + (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) { + if (addIRTranslator()) + return true; + + addPreLegalizeMachineIR(); + + if (addLegalizeMachineIR()) + return true; + + // Before running the register bank selector, ask the target if it + // wants to run some passes. + addPreRegBankSelect(); + + if (addRegBankSelect()) + return true; + + addPreGlobalInstructionSelect(); + + if (addGlobalInstructionSelect()) + return true; + + // Pass to reset the MachineFunction if the ISel failed. + addPass(createResetMachineFunctionPass( + reportDiagnosticWhenGlobalISelFallback(), isGlobalISelAbortEnabled())); + + // Provide a fallback path when we do not want to abort on + // not-yet-supported input. + if (!isGlobalISelAbortEnabled() && addInstSelector()) + return true; + + } else if (addInstSelector()) + return true; + + return false; +} + +bool TargetPassConfig::addISelPasses() { + if (TM->Options.EmulatedTLS) + addPass(createLowerEmuTLSPass()); + + addPass(createPreISelIntrinsicLoweringPass()); + addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + addIRPasses(); + addCodeGenPrepare(); + addPassesToHandleExceptions(); + addISelPrepare(); + + return addCoreISelPasses(); +} + +/// -regalloc=... command line option. +static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } +static cl::opt > +RegAlloc("regalloc", + cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use")); + /// Add the complete set of target-independent postISel code generator passes. /// /// This can be read as the standard order of major LLVM CodeGen stages. Stages @@ -614,8 +708,12 @@ void TargetPassConfig::addMachinePasses() { // including phi elimination and scheduling. if (getOptimizeRegAlloc()) addOptimizedRegAlloc(createRegAllocPass(true)); - else + else { + if (RegAlloc != &useDefaultRegisterAllocator && + RegAlloc != &createFastRegisterAllocator) + report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc."); addFastRegAlloc(createRegAllocPass(false)); + } // Run post-ra passes. addPostRegAlloc(); @@ -627,7 +725,7 @@ void TargetPassConfig::addMachinePasses() { // Prolog/Epilog inserter needs a TargetMachine to instantiate. But only // do so if it hasn't been disabled, substituted, or overridden. if (!isPassSubstitutedOrOverridden(&PrologEpilogCodeInserterID)) - addPass(createPrologEpilogInserterPass(TM)); + addPass(createPrologEpilogInserterPass()); /// Add passes that optimize machine instructions after register allocation. if (getOptLevel() != CodeGenOpt::None) @@ -748,19 +846,12 @@ MachinePassRegistry RegisterRegAlloc::Registry; /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. static llvm::once_flag InitializeDefaultRegisterAllocatorFlag; -static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } + static RegisterRegAlloc defaultRegAlloc("default", "pick register allocator based on -O option", useDefaultRegisterAllocator); -/// -regalloc=... command line option. -static cl::opt > -RegAlloc("regalloc", - cl::init(&useDefaultRegisterAllocator), - cl::desc("Register allocator to use")); - static void initializeDefaultRegisterAllocatorOnce() { RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); @@ -770,7 +861,6 @@ static void initializeDefaultRegisterAllocatorOnce() { } } - /// Instantiate the default register allocator pass for this target for either /// the optimized or unoptimized allocation path. This will be added to the pass /// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 66cdad278e8daa9c80eb068451376266c94fc839..c8537ad2f3130c9ada561ad4dd796ee25b491ee7 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -21,7 +22,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" #define DEBUG_TYPE "target-reg-info" @@ -50,8 +50,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet, ArrayRef Exceptions) const { // Check that all super registers of reserved regs are reserved as well. BitVector Checked(getNumRegs()); - for (int Reg = RegisterSet.find_first(); Reg>=0; - Reg = RegisterSet.find_next(Reg)) { + for (unsigned Reg : RegisterSet.set_bits()) { if (Checked[Reg]) continue; for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) { @@ -156,8 +155,8 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { // this physreg. const TargetRegisterClass* BestRC = nullptr; for (const TargetRegisterClass* RC : regclasses()) { - if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || BestRC->hasSubClass(RC))) + if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) && + RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } @@ -207,7 +206,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, if (unsigned Common = *A++ & *B++) { const TargetRegisterClass *RC = TRI->getRegClass(I + countTrailingZeros(Common)); - if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT)) return RC; } return nullptr; @@ -265,7 +264,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, const TargetRegisterClass *BestRC = nullptr; unsigned *BestPreA = &PreA; unsigned *BestPreB = &PreB; - if (RCA->getSize() < RCB->getSize()) { + if (getRegSizeInBits(*RCA) < getRegSizeInBits(*RCB)) { std::swap(RCA, RCB); std::swap(SubA, SubB); std::swap(BestPreA, BestPreB); @@ -273,7 +272,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Also terminate the search one we have found a register class as small as // RCA. - unsigned MinSize = RCA->getSize(); + unsigned MinSize = getRegSizeInBits(*RCA); for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); @@ -281,7 +280,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Check if a common super-register class exists for this index pair. const TargetRegisterClass *RC = firstCommonClass(IA.getMask(), IB.getMask(), this); - if (!RC || RC->getSize() < MinSize) + if (!RC || getRegSizeInBits(*RC) < MinSize) continue; // The indexes must compose identically: PreA+SubA == PreB+SubB. @@ -290,7 +289,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, continue; // Is RC a better candidate than BestRC? - if (BestRC && RC->getSize() >= BestRC->getSize()) + if (BestRC && getRegSizeInBits(*RC) >= getRegSizeInBits(*BestRC)) continue; // Yes, RC is the smallest super-register seen so far. @@ -299,7 +298,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, *BestPreB = IB.getSubReg(); // Bail early if we reached MinSize. We won't find a better candidate. - if (BestRC->getSize() == MinSize) + if (getRegSizeInBits(*BestRC) == MinSize) return BestRC; } } diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index 04edf0e62857b3fe302962ca138dc5ae5159e6d2..9210ea8a83f6bc5f5345ef4830946845be0f9a43 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" @@ -277,7 +277,11 @@ unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { if (SCDesc->isValid() && !SCDesc->isVariant()) return computeInstrLatency(*SCDesc); - llvm_unreachable("No MI sched latency"); + if (SCDesc->isValid()) { + assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()"); + return computeInstrLatency(*SCDesc); + } + return 0; } unsigned @@ -331,3 +335,68 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, } return 0; } + +static Optional +getRThroughputFromItineraries(unsigned schedClass, + const InstrItineraryData *IID){ + double Unknown = std::numeric_limits::infinity(); + double Throughput = Unknown; + + for (const InstrStage *IS = IID->beginStage(schedClass), + *E = IID->endStage(schedClass); + IS != E; ++IS) { + unsigned Cycles = IS->getCycles(); + if (!Cycles) + continue; + Throughput = + std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles); + } + // We need reciprocal throughput that's why we return such value. + return 1 / Throughput; +} + +static Optional +getRThroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc, + const TargetSubtargetInfo *STI, + const MCSchedModel &SchedModel) { + double Unknown = std::numeric_limits::infinity(); + double Throughput = Unknown; + + for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc), + *WEnd = STI->getWriteProcResEnd(SCDesc); + WPR != WEnd; ++WPR) { + unsigned Cycles = WPR->Cycles; + if (!Cycles) + return Optional(); + + unsigned NumUnits = + SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits; + Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles); + } + // We need reciprocal throughput that's why we return such value. + return 1 / Throughput; +} + +Optional +TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const { + if (hasInstrItineraries()) + return getRThroughputFromItineraries(MI->getDesc().getSchedClass(), + getInstrItineraries()); + if (hasInstrSchedModel()) + return getRThroughputFromInstrSchedModel(resolveSchedClass(MI), STI, + SchedModel); + return Optional(); +} + +Optional +TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const { + unsigned SchedClass = TII->get(Opcode).getSchedClass(); + if (hasInstrItineraries()) + return getRThroughputFromItineraries(SchedClass, getInstrItineraries()); + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + if (SCDesc->isValid() && !SCDesc->isVariant()) + return getRThroughputFromInstrSchedModel(SCDesc, STI, SchedModel); + } + return Optional(); +} diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index c74707d95b9e72997c7eefe9aac54727eba2e6a0..82e85bab14747371627ae85d1804d95e163b1337 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -12,6 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //--------------------------------------------------------------------------- @@ -52,3 +55,46 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const { bool TargetSubtargetInfo::useAA() const { return false; } + +static std::string createSchedInfoStr(unsigned Latency, + Optional RThroughput) { + static const char *SchedPrefix = " sched: ["; + std::string Comment; + raw_string_ostream CS(Comment); + if (Latency > 0 && RThroughput.hasValue()) + CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue()) + << "]"; + else if (Latency > 0) + CS << SchedPrefix << Latency << ":?]"; + else if (RThroughput.hasValue()) + CS << SchedPrefix << "?:" << RThroughput.getValue() << "]"; + CS.flush(); + return Comment; +} + +/// Returns string representation of scheduler comment +std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const { + if (MI.isPseudo() || MI.isTerminator()) + return std::string(); + // We don't cache TSchedModel because it depends on TargetInstrInfo + // that could be changed during the compilation + TargetSchedModel TSchedModel; + TSchedModel.init(getSchedModel(), this, getInstrInfo()); + unsigned Latency = TSchedModel.computeInstrLatency(&MI); + Optional RThroughput = TSchedModel.computeInstrRThroughput(&MI); + return createSchedInfoStr(Latency, RThroughput); +} + +/// Returns string representation of scheduler comment +std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const { + // We don't cache TSchedModel because it depends on TargetInstrInfo + // that could be changed during the compilation + TargetSchedModel TSchedModel; + TSchedModel.init(getSchedModel(), this, getInstrInfo()); + if (!TSchedModel.hasInstrSchedModel()) + return std::string(); + unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode()); + Optional RThroughput = + TSchedModel.computeInstrRThroughput(MCI.getOpcode()); + return createSchedInfoStr(Latency, RThroughput); +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 75359fe3c0ea695de0424e976a83065af87b858c..552a89f76ca2133b34d131557c23e1150d4b30fe 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -52,7 +52,7 @@ using namespace llvm; -#define DEBUG_TYPE "twoaddrinstr" +#define DEBUG_TYPE "twoaddressinstruction" STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); @@ -155,7 +155,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); + AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); AU.addPreserved(); AU.addPreserved(); @@ -171,10 +171,10 @@ public: } // end anonymous namespace char TwoAddressInstructionPass::ID = 0; -INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", +INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", +INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; @@ -1627,7 +1627,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { InstrItins = MF->getSubtarget().getInstrItineraryData(); LV = getAnalysisIfAvailable(); LIS = getAnalysisIfAvailable(); - AA = &getAnalysis().getAAResults(); + if (auto *AAPass = getAnalysisIfAvailable()) + AA = &AAPass->getAAResults(); + else + AA = nullptr; OptLevel = TM.getOptLevel(); bool MadeChange = false; diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index c2db56a7657ce86d326806ec61c902891b1ad1f1..407fd9b162e975a0b646e347a7b03b811a88f713 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -195,18 +196,31 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { } if (phi->getNumOperands() == 3) { - unsigned Input = phi->getOperand(1).getReg(); - unsigned Output = phi->getOperand(0).getReg(); - - phi++->eraseFromParent(); + const MachineOperand &Input = phi->getOperand(1); + const MachineOperand &Output = phi->getOperand(0); + unsigned InputReg = Input.getReg(); + unsigned OutputReg = Output.getReg(); + assert(Output.getSubReg() == 0 && "Cannot have output subregister"); ModifiedPHI = true; - if (Input != Output) { + if (InputReg != OutputReg) { MachineRegisterInfo &MRI = F.getRegInfo(); - MRI.constrainRegClass(Input, MRI.getRegClass(Output)); - MRI.replaceRegWith(Output, Input); + unsigned InputSub = Input.getSubReg(); + if (InputSub == 0 && + MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) { + MRI.replaceRegWith(OutputReg, InputReg); + } else { + // The input register to the PHI has a subregister or it can't be + // constrained to the proper register class: + // insert a COPY instead of simply replacing the output + // with the input. + const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); + BuildMI(*BB, BB->getFirstNonPHI(), phi->getDebugLoc(), + TII->get(TargetOpcode::COPY), OutputReg) + .addReg(InputReg, getRegState(Input), InputSub); + } + phi++->eraseFromParent(); } - continue; } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index c8946010e9d15fb426f493bb3527608e6cd39bc0..124c2790f68c47791d32b4f51da603242fc4688e 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -72,9 +72,21 @@ void VirtRegMap::grow() { Virt2SplitMap.resize(NumRegs); } +void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg) && + TargetRegisterInfo::isPhysicalRegister(physReg)); + assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && + "attempt to assign physical register to already mapped " + "virtual register"); + assert(!getRegInfo().isReserved(physReg) && + "Attempt to map virtReg to a reserved physReg"); + Virt2PhysMap[virtReg] = physReg; +} + unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { - int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); + int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Align); ++NumSpillSlots; return SS; } diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp index ae07e8b2fa03229a76eb51b327665d25c23a0e4e..c63a0a9e60ea32fd5c5a2e18fd1817e0934855d4 100644 --- a/lib/CodeGen/WinEHPrepare.cpp +++ b/lib/CodeGen/WinEHPrepare.cpp @@ -16,13 +16,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCSymbol.h" @@ -54,7 +54,7 @@ namespace { class WinEHPrepare : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. - WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {} + WinEHPrepare() : FunctionPass(ID) {} bool runOnFunction(Function &Fn) override; @@ -94,12 +94,10 @@ private: } // end anonymous namespace char WinEHPrepare::ID = 0; -INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions", - false, false) +INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions", + false, false) -FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { - return new WinEHPrepare(TM); -} +FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); } bool WinEHPrepare::runOnFunction(Function &Fn) { if (!Fn.hasPersonalityFn()) diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp index 7d2848bdc13b1c3d6243290ca74dee8bc7ebb73e..1a8d5a4f45dae5dde7f982dd5851a2f804c234cd 100644 --- a/lib/CodeGen/XRayInstrumentation.cpp +++ b/lib/CodeGen/XRayInstrumentation.cpp @@ -1,4 +1,4 @@ -//===-- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. -===// +//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===// // // The LLVM Compiler Infrastructure // @@ -14,18 +14,26 @@ // //===---------------------------------------------------------------------===// -#include "llvm/CodeGen/Analysis.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/TargetRegistry.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; namespace { + struct XRayInstrumentation : public MachineFunctionPass { static char ID; @@ -33,6 +41,14 @@ struct XRayInstrumentation : public MachineFunctionPass { initializeXRayInstrumentationPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; private: @@ -43,7 +59,7 @@ private: // This is the approach to go on CPUs which have a single RET instruction, // like x86/x86_64. void replaceRetWithPatchableRet(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII); // Prepend the original return instruction with the exit sled code ("patchable // function exit" pseudo-instruction), preserving the original return @@ -54,13 +70,13 @@ private: // have to call the trampoline and return from it to the original return // instruction of the function being instrumented. void prependRetWithPatchableExit(MachineFunction &MF, - const TargetInstrInfo *TII); + const TargetInstrInfo *TII); }; -} // anonymous namespace -void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, - const TargetInstrInfo *TII) -{ +} // end anonymous namespace + +void XRayInstrumentation::replaceRetWithPatchableRet( + MachineFunction &MF, const TargetInstrInfo *TII) { // We look for *all* terminators and returns, then replace those with // PATCHABLE_RET instructions. SmallVector Terminators; @@ -91,9 +107,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF, I->eraseFromParent(); } -void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, - const TargetInstrInfo *TII) -{ +void XRayInstrumentation::prependRetWithPatchableExit( + MachineFunction &MF, const TargetInstrInfo *TII) { for (auto &MBB : MF) { for (auto &T : MBB.terminators()) { unsigned Opc = 0; @@ -106,7 +121,7 @@ void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF, if (Opc != 0) { // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or // PATCHABLE_TAIL_CALL . - BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc)); + BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc)); } } } @@ -125,14 +140,24 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { return false; // XRay threshold attribute not found. if (Attr.getValueAsString().getAsInteger(10, XRayThreshold)) return false; // Invalid value for threshold. - if (F.size() < XRayThreshold) - return false; // Function is too small. + + // Count the number of MachineInstr`s in MachineFunction + int64_t MICount = 0; + for (const auto& MBB : MF) + MICount += MBB.size(); + + // Check if we have a loop. + // FIXME: Maybe make this smarter, and see whether the loops are dependent + // on inputs or side-effects? + MachineLoopInfo &MLI = getAnalysis(); + if (MLI.empty() && MICount < XRayThreshold) + return false; // Function is too small and has no loops. } // We look for the first non-empty MachineBasicBlock, so that we can insert // the function instrumentation in the appropriate place. - auto MBI = - find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); }); + auto MBI = llvm::find_if( + MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); }); if (MBI == MF.end()) return false; // The function is empty. @@ -142,12 +167,10 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { if (!MF.getSubtarget().isXRaySupported()) { FirstMI.emitError("An attempt to perform XRay instrumentation for an" - " unsupported target."); + " unsupported target."); return false; } - // FIXME: Do the loop triviality analysis here or in an earlier pass. - // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the // MachineFunction. BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(), @@ -176,5 +199,8 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) { char XRayInstrumentation::ID = 0; char &llvm::XRayInstrumentationID = XRayInstrumentation::ID; -INITIALIZE_PASS(XRayInstrumentation, "xray-instrumentation", "Insert XRay ops", - false, false) +INITIALIZE_PASS_BEGIN(XRayInstrumentation, "xray-instrumentation", + "Insert XRay ops", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(XRayInstrumentation, "xray-instrumentation", + "Insert XRay ops", false, false) diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index 6e9214d72adc94002f20a1f5ae2da0ba0757bb86..2d24dcc52fb76548ed837136816a7e2399b55c17 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -2,23 +2,38 @@ add_llvm_library(LLVMDebugInfoCodeView CodeViewError.cpp CodeViewRecordIO.cpp CVSymbolVisitor.cpp - CVTypeDumper.cpp CVTypeVisitor.cpp + DebugChecksumsSubsection.cpp + DebugCrossExSubsection.cpp + DebugCrossImpSubsection.cpp + DebugFrameDataSubsection.cpp + DebugInlineeLinesSubsection.cpp + DebugLinesSubsection.cpp + DebugStringTableSubsection.cpp + DebugSubsection.cpp + DebugSubsectionRecord.cpp + DebugSubsectionVisitor.cpp + DebugSymbolRVASubsection.cpp + DebugSymbolsSubsection.cpp EnumTables.cpp Formatters.cpp + LazyRandomTypeCollection.cpp Line.cpp - ModuleSubstream.cpp - ModuleSubstreamVisitor.cpp RecordSerialization.cpp + StringsAndChecksums.cpp SymbolRecordMapping.cpp SymbolDumper.cpp SymbolSerializer.cpp TypeDatabase.cpp TypeDatabaseVisitor.cpp TypeDumpVisitor.cpp + TypeIndex.cpp + TypeIndexDiscovery.cpp + TypeName.cpp TypeRecordMapping.cpp TypeSerializer.cpp TypeStreamMerger.cpp + TypeTableCollection.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView diff --git a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp index 4c78caf034777ecc95f0df7492314c27f6f23f1f..d058f48649754464507db585b4a230cf10cc8b3d 100644 --- a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp +++ b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp @@ -46,7 +46,7 @@ Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) { } #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ SYMBOL_RECORD(EnumVal, EnumVal, AliasName) -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" } if (auto EC = Callbacks.visitSymbolEnd(Record)) diff --git a/lib/DebugInfo/CodeView/CVTypeDumper.cpp b/lib/DebugInfo/CodeView/CVTypeDumper.cpp deleted file mode 100644 index bcc8218d94460e8a4ff98edb653207e815e0c673..0000000000000000000000000000000000000000 --- a/lib/DebugInfo/CodeView/CVTypeDumper.cpp +++ /dev/null @@ -1,77 +0,0 @@ -//===-- CVTypeDumper.cpp - CodeView type info dumper ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" -#include "llvm/Support/BinaryByteStream.h" - -using namespace llvm; -using namespace llvm::codeview; - -Error CVTypeDumper::dump(const CVType &Record, TypeVisitorCallbacks &Dumper) { - TypeDatabaseVisitor DBV(TypeDB); - TypeDeserializer Deserializer; - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(DBV); - Pipeline.addCallbackToPipeline(Dumper); - - CVTypeVisitor Visitor(Pipeline); - if (Handler) - Visitor.addTypeServerHandler(*Handler); - - CVType RecordCopy = Record; - if (auto EC = Visitor.visitTypeRecord(RecordCopy)) - return EC; - return Error::success(); -} - -Error CVTypeDumper::dump(const CVTypeArray &Types, - TypeVisitorCallbacks &Dumper) { - TypeDatabaseVisitor DBV(TypeDB); - TypeDeserializer Deserializer; - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(DBV); - Pipeline.addCallbackToPipeline(Dumper); - - CVTypeVisitor Visitor(Pipeline); - if (Handler) - Visitor.addTypeServerHandler(*Handler); - - if (auto EC = Visitor.visitTypeStream(Types)) - return EC; - return Error::success(); -} - -Error CVTypeDumper::dump(ArrayRef Data, TypeVisitorCallbacks &Dumper) { - BinaryByteStream Stream(Data, llvm::support::little); - CVTypeArray Types; - BinaryStreamReader Reader(Stream); - if (auto EC = Reader.readArray(Types, Reader.getLength())) - return EC; - - return dump(Types, Dumper); -} - -void CVTypeDumper::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, - TypeIndex TI, TypeDatabase &DB) { - StringRef TypeName; - if (!TI.isNoneType()) - TypeName = DB.getTypeName(TI); - if (!TypeName.empty()) - Printer.printHex(FieldName, TypeName, TI.getIndex()); - else - Printer.printHex(FieldName, TI.getIndex()); -} diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index 0069ee3cc9043058db648c06e3da764d8c479b15..22f166a2335d633fe9559bfd25bd71173e9af065 100644 --- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -9,9 +9,9 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" #include "llvm/DebugInfo/CodeView/TypeServerHandler.h" @@ -22,12 +22,9 @@ using namespace llvm; using namespace llvm::codeview; -CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks) - : Callbacks(Callbacks) {} template -static Error visitKnownRecord(CVTypeVisitor &Visitor, CVType &Record, - TypeVisitorCallbacks &Callbacks) { +static Error visitKnownRecord(CVType &Record, TypeVisitorCallbacks &Callbacks) { TypeRecordKind RK = static_cast(Record.Type); T KnownRecord(RK); if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord)) @@ -46,37 +43,78 @@ static Error visitKnownMember(CVMemberRecord &Record, } static Expected deserializeTypeServerRecord(CVType &Record) { - class StealTypeServerVisitor : public TypeVisitorCallbacks { - public: - explicit StealTypeServerVisitor(TypeServer2Record &TR) : TR(TR) {} + TypeServer2Record R(TypeRecordKind::TypeServer2); + if (auto EC = TypeDeserializer::deserializeAs(Record, R)) + return std::move(EC); + return R; +} - Error visitKnownRecord(CVType &CVR, TypeServer2Record &Record) override { - TR = Record; - return Error::success(); - } +static Error visitMemberRecord(CVMemberRecord &Record, + TypeVisitorCallbacks &Callbacks) { + if (auto EC = Callbacks.visitMemberBegin(Record)) + return EC; - private: - TypeServer2Record &TR; - }; + switch (Record.Kind) { + default: + if (auto EC = Callbacks.visitUnknownMember(Record)) + return EC; + break; +#define MEMBER_RECORD(EnumName, EnumVal, Name) \ + case EnumName: { \ + if (auto EC = visitKnownMember(Record, Callbacks)) \ + return EC; \ + break; \ + } +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ + MEMBER_RECORD(EnumVal, EnumVal, AliasName) +#define TYPE_RECORD(EnumName, EnumVal, Name) +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + } - TypeServer2Record R(TypeRecordKind::TypeServer2); - TypeDeserializer Deserializer; - StealTypeServerVisitor Thief(R); - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(Thief); - CVTypeVisitor Visitor(Pipeline); - if (auto EC = Visitor.visitTypeRecord(Record)) - return std::move(EC); + if (auto EC = Callbacks.visitMemberEnd(Record)) + return EC; - return R; + return Error::success(); } +namespace { + +class CVTypeVisitor { +public: + explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks); + + void addTypeServerHandler(TypeServerHandler &Handler); + + Error visitTypeRecord(CVType &Record, TypeIndex Index); + Error visitTypeRecord(CVType &Record); + + /// Visits the type records in Data. Sets the error flag on parse failures. + Error visitTypeStream(const CVTypeArray &Types); + Error visitTypeStream(CVTypeRange Types); + Error visitTypeStream(TypeCollection &Types); + + Error visitMemberRecord(CVMemberRecord Record); + Error visitFieldListMemberStream(BinaryStreamReader &Stream); + +private: + Expected handleTypeServer(CVType &Record); + Error finishVisitation(CVType &Record); + + /// The interface to the class that gets notified of each visitation. + TypeVisitorCallbacks &Callbacks; + + TinyPtrVector Handlers; +}; + +CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks) + : Callbacks(Callbacks) {} + void CVTypeVisitor::addTypeServerHandler(TypeServerHandler &Handler) { Handlers.push_back(&Handler); } -Error CVTypeVisitor::visitTypeRecord(CVType &Record) { +Expected CVTypeVisitor::handleTypeServer(CVType &Record) { if (Record.Type == TypeLeafKind::LF_TYPESERVER2 && !Handlers.empty()) { auto TS = deserializeTypeServerRecord(Record); if (!TS) @@ -90,16 +128,16 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record) { // If the handler processed the record, return success. if (*ExpectedResult) - return Error::success(); + return true; // Otherwise keep searching for a handler, eventually falling out and // using the default record handler. } } + return false; +} - if (auto EC = Callbacks.visitTypeBegin(Record)) - return EC; - +Error CVTypeVisitor::finishVisitation(CVType &Record) { switch (Record.Type) { default: if (auto EC = Callbacks.visitUnknownType(Record)) @@ -107,7 +145,7 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record) { break; #define TYPE_RECORD(EnumName, EnumVal, Name) \ case EnumName: { \ - if (auto EC = visitKnownRecord(*this, Record, Callbacks)) \ + if (auto EC = visitKnownRecord(Record, Callbacks)) \ return EC; \ break; \ } @@ -115,7 +153,7 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record) { TYPE_RECORD(EnumVal, EnumVal, AliasName) #define MEMBER_RECORD(EnumName, EnumVal, Name) #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" } if (auto EC = Callbacks.visitTypeEnd(Record)) @@ -124,36 +162,33 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record) { return Error::success(); } -static Error visitMemberRecord(CVMemberRecord &Record, - TypeVisitorCallbacks &Callbacks) { - if (auto EC = Callbacks.visitMemberBegin(Record)) +Error CVTypeVisitor::visitTypeRecord(CVType &Record, TypeIndex Index) { + auto ExpectedResult = handleTypeServer(Record); + if (!ExpectedResult) + return ExpectedResult.takeError(); + if (*ExpectedResult) + return Error::success(); + + if (auto EC = Callbacks.visitTypeBegin(Record, Index)) return EC; - switch (Record.Kind) { - default: - if (auto EC = Callbacks.visitUnknownMember(Record)) - return EC; - break; -#define MEMBER_RECORD(EnumName, EnumVal, Name) \ - case EnumName: { \ - if (auto EC = visitKnownMember(Record, Callbacks)) \ - return EC; \ - break; \ - } -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \ - MEMBER_RECORD(EnumVal, EnumVal, AliasName) -#define TYPE_RECORD(EnumName, EnumVal, Name) -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" - } + return finishVisitation(Record); +} - if (auto EC = Callbacks.visitMemberEnd(Record)) +Error CVTypeVisitor::visitTypeRecord(CVType &Record) { + auto ExpectedResult = handleTypeServer(Record); + if (!ExpectedResult) + return ExpectedResult.takeError(); + if (*ExpectedResult) + return Error::success(); + + if (auto EC = Callbacks.visitTypeBegin(Record)) return EC; - return Error::success(); + return finishVisitation(Record); } -Error CVTypeVisitor::visitMemberRecord(CVMemberRecord &Record) { +Error CVTypeVisitor::visitMemberRecord(CVMemberRecord Record) { return ::visitMemberRecord(Record, Callbacks); } @@ -174,12 +209,18 @@ Error CVTypeVisitor::visitTypeStream(CVTypeRange Types) { return Error::success(); } -Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader Reader) { - FieldListDeserializer Deserializer(Reader); - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(Callbacks); +Error CVTypeVisitor::visitTypeStream(TypeCollection &Types) { + Optional I = Types.getFirst(); + while (I) { + CVType Type = Types.getType(*I); + if (auto EC = visitTypeRecord(Type, *I)) + return EC; + I = Types.getNext(*I); + } + return Error::success(); +} +Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader &Reader) { TypeLeafKind Leaf; while (!Reader.empty()) { if (auto EC = Reader.readEnum(Leaf)) @@ -187,15 +228,116 @@ Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader Reader) { CVMemberRecord Record; Record.Kind = Leaf; - if (auto EC = ::visitMemberRecord(Record, Pipeline)) + if (auto EC = ::visitMemberRecord(Record, Callbacks)) return EC; } return Error::success(); } -Error CVTypeVisitor::visitFieldListMemberStream(ArrayRef Data) { - BinaryByteStream S(Data, llvm::support::little); - BinaryStreamReader SR(S); - return visitFieldListMemberStream(SR); +struct FieldListVisitHelper { + FieldListVisitHelper(TypeVisitorCallbacks &Callbacks, ArrayRef Data, + VisitorDataSource Source) + : Stream(Data, llvm::support::little), Reader(Stream), + Deserializer(Reader), + Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) { + if (Source == VDS_BytesPresent) { + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Callbacks); + } + } + + BinaryByteStream Stream; + BinaryStreamReader Reader; + FieldListDeserializer Deserializer; + TypeVisitorCallbackPipeline Pipeline; + CVTypeVisitor Visitor; +}; + +struct VisitHelper { + VisitHelper(TypeVisitorCallbacks &Callbacks, VisitorDataSource Source) + : Visitor((Source == VDS_BytesPresent) ? Pipeline : Callbacks) { + if (Source == VDS_BytesPresent) { + Pipeline.addCallbackToPipeline(Deserializer); + Pipeline.addCallbackToPipeline(Callbacks); + } + } + + TypeDeserializer Deserializer; + TypeVisitorCallbackPipeline Pipeline; + CVTypeVisitor Visitor; +}; +} + +Error llvm::codeview::visitTypeRecord(CVType &Record, TypeIndex Index, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source, + TypeServerHandler *TS) { + VisitHelper V(Callbacks, Source); + if (TS) + V.Visitor.addTypeServerHandler(*TS); + return V.Visitor.visitTypeRecord(Record, Index); +} + +Error llvm::codeview::visitTypeRecord(CVType &Record, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source, + TypeServerHandler *TS) { + VisitHelper V(Callbacks, Source); + if (TS) + V.Visitor.addTypeServerHandler(*TS); + return V.Visitor.visitTypeRecord(Record); +} + +Error llvm::codeview::visitTypeStream(const CVTypeArray &Types, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source, + TypeServerHandler *TS) { + VisitHelper V(Callbacks, Source); + if (TS) + V.Visitor.addTypeServerHandler(*TS); + return V.Visitor.visitTypeStream(Types); +} + +Error llvm::codeview::visitTypeStream(CVTypeRange Types, + TypeVisitorCallbacks &Callbacks, + TypeServerHandler *TS) { + VisitHelper V(Callbacks, VDS_BytesPresent); + if (TS) + V.Visitor.addTypeServerHandler(*TS); + return V.Visitor.visitTypeStream(Types); +} + +Error llvm::codeview::visitTypeStream(TypeCollection &Types, + TypeVisitorCallbacks &Callbacks, + TypeServerHandler *TS) { + // When the internal visitor calls Types.getType(Index) the interface is + // required to return a CVType with the bytes filled out. So we can assume + // that the bytes will be present when individual records are visited. + VisitHelper V(Callbacks, VDS_BytesPresent); + if (TS) + V.Visitor.addTypeServerHandler(*TS); + return V.Visitor.visitTypeStream(Types); +} + +Error llvm::codeview::visitMemberRecord(CVMemberRecord Record, + TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source) { + FieldListVisitHelper V(Callbacks, Record.Data, Source); + return V.Visitor.visitMemberRecord(Record); +} + +Error llvm::codeview::visitMemberRecord(TypeLeafKind Kind, + ArrayRef Record, + TypeVisitorCallbacks &Callbacks) { + CVMemberRecord R; + R.Data = Record; + R.Kind = Kind; + return visitMemberRecord(R, Callbacks, VDS_BytesPresent); +} + +Error llvm::codeview::visitMemberRecordStream(ArrayRef FieldList, + TypeVisitorCallbacks &Callbacks) { + FieldListVisitHelper V(Callbacks, FieldList, VDS_BytesPresent); + return V.Visitor.visitFieldListMemberStream(V.Reader); } diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 282e3103adc9381c335f1ea0fb40ba74a4b9389b..711144fc2faa6ad2b08789070c18704926dad0ac 100644 --- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -27,6 +27,14 @@ Error CodeViewRecordIO::beginRecord(Optional MaxLength) { Error CodeViewRecordIO::endRecord() { assert(!Limits.empty() && "Not in a record!"); Limits.pop_back(); + // We would like to assert that we actually read / wrote all the bytes that we + // expected to for this record, but unfortunately we can't do this. Some + // producers such as MASM over-allocate for certain types of records and + // commit the extraneous data, so when reading we can't be sure every byte + // will have been read. And when writing we over-allocate temporarily since + // we don't know how big the record is until we're finished writing it, so + // even though we don't commit the extraneous data, we still can't guarantee + // we're at the end of the allocated data. return Error::success(); } @@ -49,6 +57,12 @@ uint32_t CodeViewRecordIO::maxFieldLength() const { return *Min; } +Error CodeViewRecordIO::padToAlignment(uint32_t Align) { + if (isReading()) + return Reader->padToAlignment(Align); + return Writer->padToAlignment(Align); +} + Error CodeViewRecordIO::skipPadding() { assert(!isWriting() && "Cannot skip padding while writing!"); diff --git a/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c31b8d1c96d585f06c1c3e35ea2af431545dfe2b --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugChecksumsSubsection.cpp @@ -0,0 +1,108 @@ +//===- DebugChecksumsSubsection.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" + +using namespace llvm; +using namespace llvm::codeview; + +struct FileChecksumEntryHeader { + using ulittle32_t = support::ulittle32_t; + + ulittle32_t FileNameOffset; // Byte offset of filename in global string table. + uint8_t ChecksumSize; // Number of bytes of checksum. + uint8_t ChecksumKind; // FileChecksumKind + // Checksum bytes follow. +}; + +Error llvm::VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { + BinaryStreamReader Reader(Stream); + + const FileChecksumEntryHeader *Header; + if (auto EC = Reader.readObject(Header)) + return EC; + + Item.FileNameOffset = Header->FileNameOffset; + Item.Kind = static_cast(Header->ChecksumKind); + if (auto EC = Reader.readBytes(Item.Checksum, Header->ChecksumSize)) + return EC; + + Len = alignTo(Header->ChecksumSize + sizeof(FileChecksumEntryHeader), 4); + return Error::success(); +} + +Error DebugChecksumsSubsectionRef::initialize(BinaryStreamReader Reader) { + if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining())) + return EC; + + return Error::success(); +} +Error DebugChecksumsSubsectionRef::initialize(BinaryStreamRef Section) { + BinaryStreamReader Reader(Section); + return initialize(Reader); +} + +DebugChecksumsSubsection::DebugChecksumsSubsection( + DebugStringTableSubsection &Strings) + : DebugSubsection(DebugSubsectionKind::FileChecksums), Strings(Strings) {} + +void DebugChecksumsSubsection::addChecksum(StringRef FileName, + FileChecksumKind Kind, + ArrayRef Bytes) { + FileChecksumEntry Entry; + if (!Bytes.empty()) { + uint8_t *Copy = Storage.Allocate(Bytes.size()); + ::memcpy(Copy, Bytes.data(), Bytes.size()); + Entry.Checksum = makeArrayRef(Copy, Bytes.size()); + } + + Entry.FileNameOffset = Strings.insert(FileName); + Entry.Kind = Kind; + Checksums.push_back(Entry); + + // This maps the offset of this string in the string table to the offset + // of this checksum entry in the checksum buffer. + OffsetMap[Entry.FileNameOffset] = SerializedSize; + assert(SerializedSize % 4 == 0); + + uint32_t Len = alignTo(sizeof(FileChecksumEntryHeader) + Bytes.size(), 4); + SerializedSize += Len; +} + +uint32_t DebugChecksumsSubsection::calculateSerializedSize() const { + return SerializedSize; +} + +Error DebugChecksumsSubsection::commit(BinaryStreamWriter &Writer) const { + for (const auto &FC : Checksums) { + FileChecksumEntryHeader Header; + Header.ChecksumKind = uint8_t(FC.Kind); + Header.ChecksumSize = FC.Checksum.size(); + Header.FileNameOffset = FC.FileNameOffset; + if (auto EC = Writer.writeObject(Header)) + return EC; + if (auto EC = Writer.writeArray(makeArrayRef(FC.Checksum))) + return EC; + if (auto EC = Writer.padToAlignment(4)) + return EC; + } + return Error::success(); +} + +uint32_t DebugChecksumsSubsection::mapChecksumOffset(StringRef FileName) const { + uint32_t Offset = Strings.getStringId(FileName); + auto Iter = OffsetMap.find(Offset); + assert(Iter != OffsetMap.end()); + return Iter->second; +} diff --git a/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..21e2cc56075b70b416aae1cf522fc7fb87dfc701 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp @@ -0,0 +1,51 @@ +//===- DebugCrossExSubsection.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error DebugCrossModuleExportsSubsectionRef::initialize( + BinaryStreamReader Reader) { + if (Reader.bytesRemaining() % sizeof(CrossModuleExport) != 0) + return make_error( + cv_error_code::corrupt_record, + "Cross Scope Exports section is an invalid size!"); + + uint32_t Size = Reader.bytesRemaining() / sizeof(CrossModuleExport); + return Reader.readArray(References, Size); +} + +Error DebugCrossModuleExportsSubsectionRef::initialize(BinaryStreamRef Stream) { + BinaryStreamReader Reader(Stream); + return initialize(Reader); +} + +void DebugCrossModuleExportsSubsection::addMapping(uint32_t Local, + uint32_t Global) { + Mappings[Local] = Global; +} + +uint32_t DebugCrossModuleExportsSubsection::calculateSerializedSize() const { + return Mappings.size() * sizeof(CrossModuleExport); +} + +Error DebugCrossModuleExportsSubsection::commit( + BinaryStreamWriter &Writer) const { + for (const auto &M : Mappings) { + if (auto EC = Writer.writeInteger(M.first)) + return EC; + if (auto EC = Writer.writeInteger(M.second)) + return EC; + } + return Error::success(); +} diff --git a/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2c4a0b779342f416952c2d8c8710b88fc11b46af --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugCrossImpSubsection.cpp @@ -0,0 +1,91 @@ +//===- DebugCrossImpSubsection.cpp ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace llvm { +Error VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, + codeview::CrossModuleImportItem &Item) { + BinaryStreamReader Reader(Stream); + if (Reader.bytesRemaining() < sizeof(CrossModuleImport)) + return make_error( + cv_error_code::insufficient_buffer, + "Not enough bytes for a Cross Module Import Header!"); + if (auto EC = Reader.readObject(Item.Header)) + return EC; + if (Reader.bytesRemaining() < Item.Header->Count * sizeof(uint32_t)) + return make_error( + cv_error_code::insufficient_buffer, + "Not enough to read specified number of Cross Module References!"); + if (auto EC = Reader.readArray(Item.Imports, Item.Header->Count)) + return EC; + return Error::success(); +} +} + +Error DebugCrossModuleImportsSubsectionRef::initialize( + BinaryStreamReader Reader) { + return Reader.readArray(References, Reader.bytesRemaining()); +} + +Error DebugCrossModuleImportsSubsectionRef::initialize(BinaryStreamRef Stream) { + BinaryStreamReader Reader(Stream); + return initialize(Reader); +} + +void DebugCrossModuleImportsSubsection::addImport(StringRef Module, + uint32_t ImportId) { + Strings.insert(Module); + std::vector Targets = {support::ulittle32_t(ImportId)}; + auto Result = Mappings.insert(std::make_pair(Module, Targets)); + if (!Result.second) + Result.first->getValue().push_back(Targets[0]); +} + +uint32_t DebugCrossModuleImportsSubsection::calculateSerializedSize() const { + uint32_t Size = 0; + for (const auto &Item : Mappings) { + Size += sizeof(CrossModuleImport); + Size += sizeof(support::ulittle32_t) * Item.second.size(); + } + return Size; +} + +Error DebugCrossModuleImportsSubsection::commit( + BinaryStreamWriter &Writer) const { + using T = decltype(&*Mappings.begin()); + std::vector Ids; + Ids.reserve(Mappings.size()); + + for (const auto &M : Mappings) + Ids.push_back(&M); + + std::sort(Ids.begin(), Ids.end(), [this](const T &L1, const T &L2) { + return Strings.getStringId(L1->getKey()) < + Strings.getStringId(L2->getKey()); + }); + + for (const auto &Item : Ids) { + CrossModuleImport Imp; + Imp.ModuleNameOffset = Strings.getStringId(Item->getKey()); + Imp.Count = Item->getValue().size(); + if (auto EC = Writer.writeObject(Imp)) + return EC; + if (auto EC = Writer.writeArray(makeArrayRef(Item->getValue()))) + return EC; + } + return Error::success(); +} diff --git a/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fd558aa9cc8a5f835cfe39cb2d6abb687f2f24c4 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp @@ -0,0 +1,44 @@ +//===- DebugFrameDataSubsection.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error DebugFrameDataSubsectionRef::initialize(BinaryStreamReader Reader) { + if (auto EC = Reader.readObject(RelocPtr)) + return EC; + if (Reader.bytesRemaining() % sizeof(FrameData) != 0) + return make_error(cv_error_code::corrupt_record, + "Invalid frame data record format!"); + + uint32_t Count = Reader.bytesRemaining() / sizeof(FrameData); + if (auto EC = Reader.readArray(Frames, Count)) + return EC; + return Error::success(); +} + +uint32_t DebugFrameDataSubsection::calculateSerializedSize() const { + return 4 + sizeof(FrameData) * Frames.size(); +} + +Error DebugFrameDataSubsection::commit(BinaryStreamWriter &Writer) const { + if (auto EC = Writer.writeInteger(0)) + return EC; + + if (auto EC = Writer.writeArray(makeArrayRef(Frames))) + return EC; + return Error::success(); +} + +void DebugFrameDataSubsection::addFrameData(const FrameData &Frame) { + Frames.push_back(Frame); +} diff --git a/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e7719d05dbdc6f7c28797ef455c0b903ce5d8142 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp @@ -0,0 +1,122 @@ +//===- DebugInlineeLinesSubsection.cpp ------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error VarStreamArrayExtractor:: +operator()(BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item) { + BinaryStreamReader Reader(Stream); + + if (auto EC = Reader.readObject(Item.Header)) + return EC; + + if (HasExtraFiles) { + uint32_t ExtraFileCount; + if (auto EC = Reader.readInteger(ExtraFileCount)) + return EC; + if (auto EC = Reader.readArray(Item.ExtraFiles, ExtraFileCount)) + return EC; + } + + Len = Reader.getOffset(); + return Error::success(); +} + +DebugInlineeLinesSubsectionRef::DebugInlineeLinesSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::InlineeLines) {} + +Error DebugInlineeLinesSubsectionRef::initialize(BinaryStreamReader Reader) { + if (auto EC = Reader.readEnum(Signature)) + return EC; + + Lines.getExtractor().HasExtraFiles = hasExtraFiles(); + if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining())) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +bool DebugInlineeLinesSubsectionRef::hasExtraFiles() const { + return Signature == InlineeLinesSignature::ExtraFiles; +} + +DebugInlineeLinesSubsection::DebugInlineeLinesSubsection( + DebugChecksumsSubsection &Checksums, bool HasExtraFiles) + : DebugSubsection(DebugSubsectionKind::InlineeLines), Checksums(Checksums), + HasExtraFiles(HasExtraFiles) {} + +uint32_t DebugInlineeLinesSubsection::calculateSerializedSize() const { + // 4 bytes for the signature + uint32_t Size = sizeof(InlineeLinesSignature); + + // one header for each entry. + Size += Entries.size() * sizeof(InlineeSourceLineHeader); + if (HasExtraFiles) { + // If extra files are enabled, one count for each entry. + Size += Entries.size() * sizeof(uint32_t); + + // And one file id for each file. + Size += ExtraFileCount * sizeof(uint32_t); + } + assert(Size % 4 == 0); + return Size; +} + +Error DebugInlineeLinesSubsection::commit(BinaryStreamWriter &Writer) const { + InlineeLinesSignature Sig = InlineeLinesSignature::Normal; + if (HasExtraFiles) + Sig = InlineeLinesSignature::ExtraFiles; + + if (auto EC = Writer.writeEnum(Sig)) + return EC; + + for (const auto &E : Entries) { + if (auto EC = Writer.writeObject(E.Header)) + return EC; + + if (!HasExtraFiles) + continue; + + if (auto EC = Writer.writeInteger(E.ExtraFiles.size())) + return EC; + if (auto EC = Writer.writeArray(makeArrayRef(E.ExtraFiles))) + return EC; + } + + return Error::success(); +} + +void DebugInlineeLinesSubsection::addExtraFile(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + auto &Entry = Entries.back(); + Entry.ExtraFiles.push_back(ulittle32_t(Offset)); + ++ExtraFileCount; +} + +void DebugInlineeLinesSubsection::addInlineSite(TypeIndex FuncId, + StringRef FileName, + uint32_t SourceLine) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + Entries.emplace_back(); + auto &Entry = Entries.back(); + Entry.Header.FileID = Offset; + Entry.Header.SourceLineNum = SourceLine; + Entry.Header.Inlinee = FuncId; +} diff --git a/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fbcad61d60a623d77bd77a08ac88a7814e1bdcb5 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp @@ -0,0 +1,158 @@ +//===- DebugLinesSubsection.cpp -------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" + +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error LineColumnExtractor::operator()(BinaryStreamRef Stream, uint32_t &Len, + LineColumnEntry &Item) { + using namespace codeview; + const LineBlockFragmentHeader *BlockHeader; + BinaryStreamReader Reader(Stream); + if (auto EC = Reader.readObject(BlockHeader)) + return EC; + bool HasColumn = Header->Flags & uint16_t(LF_HaveColumns); + uint32_t LineInfoSize = + BlockHeader->NumLines * + (sizeof(LineNumberEntry) + (HasColumn ? sizeof(ColumnNumberEntry) : 0)); + if (BlockHeader->BlockSize < sizeof(LineBlockFragmentHeader)) + return make_error(cv_error_code::corrupt_record, + "Invalid line block record size"); + uint32_t Size = BlockHeader->BlockSize - sizeof(LineBlockFragmentHeader); + if (LineInfoSize > Size) + return make_error(cv_error_code::corrupt_record, + "Invalid line block record size"); + // The value recorded in BlockHeader->BlockSize includes the size of + // LineBlockFragmentHeader. + Len = BlockHeader->BlockSize; + Item.NameIndex = BlockHeader->NameIndex; + if (auto EC = Reader.readArray(Item.LineNumbers, BlockHeader->NumLines)) + return EC; + if (HasColumn) { + if (auto EC = Reader.readArray(Item.Columns, BlockHeader->NumLines)) + return EC; + } + return Error::success(); +} + +DebugLinesSubsectionRef::DebugLinesSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::Lines) {} + +Error DebugLinesSubsectionRef::initialize(BinaryStreamReader Reader) { + if (auto EC = Reader.readObject(Header)) + return EC; + + LinesAndColumns.getExtractor().Header = Header; + if (auto EC = Reader.readArray(LinesAndColumns, Reader.bytesRemaining())) + return EC; + + return Error::success(); +} + +bool DebugLinesSubsectionRef::hasColumnInfo() const { + return !!(Header->Flags & LF_HaveColumns); +} + +DebugLinesSubsection::DebugLinesSubsection(DebugChecksumsSubsection &Checksums, + DebugStringTableSubsection &Strings) + : DebugSubsection(DebugSubsectionKind::Lines), Checksums(Checksums) {} + +void DebugLinesSubsection::createBlock(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + Blocks.emplace_back(Offset); +} + +void DebugLinesSubsection::addLineInfo(uint32_t Offset, const LineInfo &Line) { + Block &B = Blocks.back(); + LineNumberEntry LNE; + LNE.Flags = Line.getRawData(); + LNE.Offset = Offset; + B.Lines.push_back(LNE); +} + +void DebugLinesSubsection::addLineAndColumnInfo(uint32_t Offset, + const LineInfo &Line, + uint32_t ColStart, + uint32_t ColEnd) { + Block &B = Blocks.back(); + assert(B.Lines.size() == B.Columns.size()); + + addLineInfo(Offset, Line); + ColumnNumberEntry CNE; + CNE.StartColumn = ColStart; + CNE.EndColumn = ColEnd; + B.Columns.push_back(CNE); +} + +Error DebugLinesSubsection::commit(BinaryStreamWriter &Writer) const { + LineFragmentHeader Header; + Header.CodeSize = CodeSize; + Header.Flags = hasColumnInfo() ? LF_HaveColumns : 0; + Header.RelocOffset = RelocOffset; + Header.RelocSegment = RelocSegment; + + if (auto EC = Writer.writeObject(Header)) + return EC; + + for (const auto &B : Blocks) { + LineBlockFragmentHeader BlockHeader; + assert(B.Lines.size() == B.Columns.size() || B.Columns.empty()); + + BlockHeader.NumLines = B.Lines.size(); + BlockHeader.BlockSize = sizeof(LineBlockFragmentHeader); + BlockHeader.BlockSize += BlockHeader.NumLines * sizeof(LineNumberEntry); + if (hasColumnInfo()) + BlockHeader.BlockSize += BlockHeader.NumLines * sizeof(ColumnNumberEntry); + BlockHeader.NameIndex = B.ChecksumBufferOffset; + if (auto EC = Writer.writeObject(BlockHeader)) + return EC; + + if (auto EC = Writer.writeArray(makeArrayRef(B.Lines))) + return EC; + + if (hasColumnInfo()) { + if (auto EC = Writer.writeArray(makeArrayRef(B.Columns))) + return EC; + } + } + return Error::success(); +} + +uint32_t DebugLinesSubsection::calculateSerializedSize() const { + uint32_t Size = sizeof(LineFragmentHeader); + for (const auto &B : Blocks) { + Size += sizeof(LineBlockFragmentHeader); + Size += B.Lines.size() * sizeof(LineNumberEntry); + if (hasColumnInfo()) + Size += B.Columns.size() * sizeof(ColumnNumberEntry); + } + return Size; +} + +void DebugLinesSubsection::setRelocationAddress(uint16_t Segment, + uint32_t Offset) { + RelocOffset = Offset; + RelocSegment = Segment; +} + +void DebugLinesSubsection::setCodeSize(uint32_t Size) { CodeSize = Size; } + +void DebugLinesSubsection::setFlags(LineFlags Flags) { this->Flags = Flags; } + +bool DebugLinesSubsection::hasColumnInfo() const { + return Flags & LF_HaveColumns; +} diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..de02525270c456d2a3e7b4a800a1a20febd77b81 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp @@ -0,0 +1,85 @@ +//===- DebugStringTableSubsection.cpp - CodeView String Table ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" + +#include "llvm/Support/BinaryStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; + +DebugStringTableSubsectionRef::DebugStringTableSubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::StringTable) {} + +Error DebugStringTableSubsectionRef::initialize(BinaryStreamRef Contents) { + Stream = Contents; + return Error::success(); +} +Error DebugStringTableSubsectionRef::initialize(BinaryStreamReader &Reader) { + return Reader.readStreamRef(Stream); +} + +Expected +DebugStringTableSubsectionRef::getString(uint32_t Offset) const { + BinaryStreamReader Reader(Stream); + Reader.setOffset(Offset); + StringRef Result; + if (auto EC = Reader.readCString(Result)) + return std::move(EC); + return Result; +} + +DebugStringTableSubsection::DebugStringTableSubsection() + : DebugSubsection(DebugSubsectionKind::StringTable) {} + +uint32_t DebugStringTableSubsection::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +uint32_t DebugStringTableSubsection::calculateSerializedSize() const { + return StringSize; +} + +Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const { + uint32_t Begin = Writer.getOffset(); + uint32_t End = Begin + StringSize; + + // Write a null string at the beginning. + if (auto EC = Writer.writeCString(StringRef())) + return EC; + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Begin + Pair.getValue(); + Writer.setOffset(Offset); + if (auto EC = Writer.writeCString(S)) + return EC; + assert(Writer.getOffset() <= End); + } + + Writer.setOffset(End); + assert((End - Begin) == StringSize); + return Error::success(); +} + +uint32_t DebugStringTableSubsection::size() const { return Strings.size(); } + +uint32_t DebugStringTableSubsection::getStringId(StringRef S) const { + auto Iter = Strings.find(S); + assert(Iter != Strings.end()); + return Iter->second; +} diff --git a/lib/DebugInfo/CodeView/DebugSubsection.cpp b/lib/DebugInfo/CodeView/DebugSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..67b428bfa713349be7f70dc9dbb0b0caff211aa6 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugSubsection.cpp @@ -0,0 +1,16 @@ +//===- DebugSubsection.cpp -----------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" + +using namespace llvm::codeview; + +DebugSubsectionRef::~DebugSubsectionRef() {} + +DebugSubsection::~DebugSubsection() {} diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp new file mode 100644 index 0000000000000000000000000000000000000000..334c5e002bbca1727da07ce42215775a222298c2 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -0,0 +1,83 @@ +//===- DebugSubsectionRecord.cpp -----------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/DebugSubsection.h" + +#include "llvm/Support/BinaryStreamReader.h" + +using namespace llvm; +using namespace llvm::codeview; + +DebugSubsectionRecord::DebugSubsectionRecord() + : Container(CodeViewContainer::ObjectFile), + Kind(DebugSubsectionKind::None) {} + +DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind, + BinaryStreamRef Data, + CodeViewContainer Container) + : Container(Container), Kind(Kind), Data(Data) {} + +Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, + DebugSubsectionRecord &Info, + CodeViewContainer Container) { + const DebugSubsectionHeader *Header; + BinaryStreamReader Reader(Stream); + if (auto EC = Reader.readObject(Header)) + return EC; + + DebugSubsectionKind Kind = + static_cast(uint32_t(Header->Kind)); + if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) + return EC; + Info.Container = Container; + Info.Kind = Kind; + return Error::success(); +} + +uint32_t DebugSubsectionRecord::getRecordLength() const { + return sizeof(DebugSubsectionHeader) + Data.getLength(); +} + +DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; } + +BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; } + +DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( + std::shared_ptr Subsection, CodeViewContainer Container) + : Subsection(std::move(Subsection)), Container(Container) {} + +uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { + // The length of the entire subsection is always padded to 4 bytes, regardless + // of the container kind. + uint32_t Size = sizeof(DebugSubsectionHeader) + + alignTo(Subsection->calculateSerializedSize(), 4); + return Size; +} + +Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const { + assert(Writer.getOffset() % alignOf(Container) == 0 && + "Debug Subsection not properly aligned"); + + DebugSubsectionHeader Header; + Header.Kind = uint32_t(Subsection->kind()); + // The value written into the Header's Length field is only padded to the + // container's alignment + Header.Length = + alignTo(Subsection->calculateSerializedSize(), alignOf(Container)); + + if (auto EC = Writer.writeObject(Header)) + return EC; + if (auto EC = Subsection->commit(Writer)) + return EC; + if (auto EC = Writer.padToAlignment(4)) + return EC; + + return Error::success(); +} diff --git a/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b824333369be31b38741eac83ea3b03db87e109 --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp @@ -0,0 +1,95 @@ +//===- DebugSubsectionVisitor.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" + +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugUnknownSubsection.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamRef.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error llvm::codeview::visitDebugSubsection( + const DebugSubsectionRecord &R, DebugSubsectionVisitor &V, + const StringsAndChecksumsRef &State) { + BinaryStreamReader Reader(R.getRecordData()); + switch (R.kind()) { + case DebugSubsectionKind::Lines: { + DebugLinesSubsectionRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + + return V.visitLines(Fragment, State); + } + case DebugSubsectionKind::FileChecksums: { + DebugChecksumsSubsectionRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + + return V.visitFileChecksums(Fragment, State); + } + case DebugSubsectionKind::InlineeLines: { + DebugInlineeLinesSubsectionRef Fragment; + if (auto EC = Fragment.initialize(Reader)) + return EC; + return V.visitInlineeLines(Fragment, State); + } + case DebugSubsectionKind::CrossScopeExports: { + DebugCrossModuleExportsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCrossModuleExports(Section, State); + } + case DebugSubsectionKind::CrossScopeImports: { + DebugCrossModuleImportsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCrossModuleImports(Section, State); + } + case DebugSubsectionKind::Symbols: { + DebugSymbolsSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitSymbols(Section, State); + } + case DebugSubsectionKind::StringTable: { + DebugStringTableSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitStringTable(Section, State); + } + case DebugSubsectionKind::FrameData: { + DebugFrameDataSubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitFrameData(Section, State); + } + case DebugSubsectionKind::CoffSymbolRVA: { + DebugSymbolRVASubsectionRef Section; + if (auto EC = Section.initialize(Reader)) + return EC; + return V.visitCOFFSymbolRVAs(Section, State); + } + default: { + DebugUnknownSubsectionRef Fragment(R.kind(), R.getRecordData()); + return V.visitUnknown(Fragment); + } + } +} diff --git a/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f91b68f3ad8afba160bc54e4a2d798f527391cc --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp @@ -0,0 +1,31 @@ +//===- DebugSymbolRVASubsection.cpp ------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" + +using namespace llvm; +using namespace llvm::codeview; + +DebugSymbolRVASubsectionRef::DebugSymbolRVASubsectionRef() + : DebugSubsectionRef(DebugSubsectionKind::CoffSymbolRVA) {} + +Error DebugSymbolRVASubsectionRef::initialize(BinaryStreamReader &Reader) { + return Reader.readArray(RVAs, Reader.bytesRemaining() / sizeof(uint32_t)); +} + +DebugSymbolRVASubsection::DebugSymbolRVASubsection() + : DebugSubsection(DebugSubsectionKind::CoffSymbolRVA) {} + +Error DebugSymbolRVASubsection::commit(BinaryStreamWriter &Writer) const { + return Writer.writeArray(makeArrayRef(RVAs)); +} + +uint32_t DebugSymbolRVASubsection::calculateSerializedSize() const { + return RVAs.size() * sizeof(uint32_t); +} diff --git a/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dc8ba8c929aed3c934cab73a26199ad3e5addcdb --- /dev/null +++ b/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp @@ -0,0 +1,34 @@ +//===- DebugSymbolsSubsection.cpp -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" + +using namespace llvm; +using namespace llvm::codeview; + +Error DebugSymbolsSubsectionRef::initialize(BinaryStreamReader Reader) { + return Reader.readArray(Records, Reader.getLength()); +} + +uint32_t DebugSymbolsSubsection::calculateSerializedSize() const { + return Length; +} + +Error DebugSymbolsSubsection::commit(BinaryStreamWriter &Writer) const { + for (const auto &Record : Records) { + if (auto EC = Writer.writeBytes(Record.RecordData)) + return EC; + } + return Error::success(); +} + +void DebugSymbolsSubsection::addSymbol(CVSymbol Symbol) { + Records.push_back(Symbol); + Length += Symbol.length(); +} \ No newline at end of file diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp index 0e20bcb27ec9331f85b85cfaeb9beb018c42f396..01d8ccf2d31e8636ed09c573d0e5d83c1e0c24a7 100644 --- a/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/lib/DebugInfo/CodeView/EnumTables.cpp @@ -20,13 +20,13 @@ using namespace codeview; static const EnumEntry SymbolTypeNames[] = { #define CV_SYMBOL(enum, val) {#enum, enum}, -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" #undef CV_SYMBOL }; static const EnumEntry TypeLeafNames[] = { #define CV_TYPE(name, val) {#name, name}, -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" #undef CV_TYPE }; @@ -245,20 +245,20 @@ static const EnumEntry FrameProcSymFlagNames[] = { }; static const EnumEntry ModuleSubstreamKindNames[] = { - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, None), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, Symbols), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, Lines), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, StringTable), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FileChecksums), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FrameData), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, InlineeLines), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeImports), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CrossScopeExports), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, ILLines), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, FuncMDTokenMap), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, TypeMDTokenMap), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, MergedAssemblyInput), - CV_ENUM_CLASS_ENT(ModuleSubstreamKind, CoffSymbolRVA), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, None), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, Symbols), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, Lines), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, StringTable), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, FileChecksums), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, FrameData), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, InlineeLines), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, CrossScopeImports), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, CrossScopeExports), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, ILLines), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, FuncMDTokenMap), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, TypeMDTokenMap), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, MergedAssemblyInput), + CV_ENUM_CLASS_ENT(DebugSubsectionKind, CoffSymbolRVA), }; static const EnumEntry ExportSymFlagNames[] = { diff --git a/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..551963ea306e68c1bec096661e0654c7e560fd1c --- /dev/null +++ b/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -0,0 +1,242 @@ +//===- LazyRandomTypeCollection.cpp ---------------------------- *- C++--*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" + +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/TypeDatabase.h" +#include "llvm/DebugInfo/CodeView/TypeName.h" +#include "llvm/DebugInfo/CodeView/TypeServerHandler.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" + +using namespace llvm; +using namespace llvm::codeview; + +static void error(Error &&EC) { + assert(!static_cast(EC)); + if (EC) + consumeError(std::move(EC)); +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(uint32_t RecordCountHint) + : LazyRandomTypeCollection(CVTypeArray(), RecordCountHint, + PartialOffsetArray()) {} + +LazyRandomTypeCollection::LazyRandomTypeCollection( + const CVTypeArray &Types, uint32_t RecordCountHint, + PartialOffsetArray PartialOffsets) + : NameStorage(Allocator), Database(RecordCountHint), Types(Types), + DatabaseVisitor(Database), PartialOffsets(PartialOffsets) { + KnownOffsets.resize(Database.capacity()); +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(ArrayRef Data, + uint32_t RecordCountHint) + : LazyRandomTypeCollection(RecordCountHint) { + reset(Data); +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(StringRef Data, + uint32_t RecordCountHint) + : LazyRandomTypeCollection( + makeArrayRef(Data.bytes_begin(), Data.bytes_end()), RecordCountHint) { +} + +LazyRandomTypeCollection::LazyRandomTypeCollection(const CVTypeArray &Types, + uint32_t NumRecords) + : LazyRandomTypeCollection(Types, NumRecords, PartialOffsetArray()) {} + +void LazyRandomTypeCollection::reset(StringRef Data) { + reset(makeArrayRef(Data.bytes_begin(), Data.bytes_end())); +} + +void LazyRandomTypeCollection::reset(ArrayRef Data) { + PartialOffsets = PartialOffsetArray(); + + BinaryStreamReader Reader(Data, support::little); + error(Reader.readArray(Types, Reader.getLength())); + + KnownOffsets.resize(Database.capacity()); +} + +CVType LazyRandomTypeCollection::getType(TypeIndex Index) { + error(ensureTypeExists(Index)); + return Database.getTypeRecord(Index); +} + +StringRef LazyRandomTypeCollection::getTypeName(TypeIndex Index) { + if (Index.isNoneType() || Index.isSimple()) + return TypeIndex::simpleTypeName(Index); + + // Try to make sure the type exists. Even if it doesn't though, it may be + // because we're dumping a symbol stream with no corresponding type stream + // present, in which case we still want to be able to print + // for the type names. + if (auto EC = ensureTypeExists(Index)) { + consumeError(std::move(EC)); + return ""; + } + + uint32_t I = Index.toArrayIndex(); + if (I >= TypeNames.size()) + TypeNames.resize(I + 1); + + if (TypeNames[I].data() == nullptr) { + StringRef Result = NameStorage.save(computeTypeName(*this, Index)); + TypeNames[I] = Result; + } + return TypeNames[I]; +} + +bool LazyRandomTypeCollection::contains(TypeIndex Index) { + return Database.contains(Index); +} + +uint32_t LazyRandomTypeCollection::size() { return Database.size(); } + +uint32_t LazyRandomTypeCollection::capacity() { return Database.capacity(); } + +Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) { + if (!Database.contains(TI)) { + if (auto EC = visitRangeForType(TI)) + return EC; + } + return Error::success(); +} + +Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) { + if (PartialOffsets.empty()) + return fullScanForType(TI); + + auto Next = std::upper_bound(PartialOffsets.begin(), PartialOffsets.end(), TI, + [](TypeIndex Value, const TypeIndexOffset &IO) { + return Value < IO.Type; + }); + + assert(Next != PartialOffsets.begin()); + auto Prev = std::prev(Next); + + TypeIndex TIB = Prev->Type; + if (Database.contains(TIB)) { + // They've asked us to fetch a type index, but the entry we found in the + // partial offsets array has already been visited. Since we visit an entire + // block every time, that means this record should have been previously + // discovered. Ultimately, this means this is a request for a non-existant + // type index. + return make_error("Invalid type index"); + } + + TypeIndex TIE; + if (Next == PartialOffsets.end()) { + TIE = TypeIndex::fromArrayIndex(Database.capacity()); + } else { + TIE = Next->Type; + } + + if (auto EC = visitRange(TIB, Prev->Offset, TIE)) + return EC; + return Error::success(); +} + +Optional LazyRandomTypeCollection::getFirst() { + TypeIndex TI = TypeIndex::fromArrayIndex(0); + if (auto EC = ensureTypeExists(TI)) { + consumeError(std::move(EC)); + return None; + } + return TI; +} + +Optional LazyRandomTypeCollection::getNext(TypeIndex Prev) { + // We can't be sure how long this type stream is, given that the initial count + // given to the constructor is just a hint. So just try to make sure the next + // record exists, and if anything goes wrong, we must be at the end. + if (auto EC = ensureTypeExists(Prev + 1)) { + consumeError(std::move(EC)); + return None; + } + + return Prev + 1; +} + +Error LazyRandomTypeCollection::fullScanForType(TypeIndex TI) { + assert(PartialOffsets.empty()); + + TypeIndex CurrentTI = TypeIndex::fromArrayIndex(0); + uint32_t Offset = 0; + auto Begin = Types.begin(); + + if (!Database.empty()) { + // In the case of type streams which we don't know the number of records of, + // it's possible to search for a type index triggering a full scan, but then + // later additional records are added since we didn't know how many there + // would be until we did a full visitation, then you try to access the new + // type triggering another full scan. To avoid this, we assume that if the + // database has some records, this must be what's going on. So we ask the + // database for the largest type index less than the one we're searching for + // and only do the forward scan from there. + auto Prev = Database.largestTypeIndexLessThan(TI); + assert(Prev.hasValue() && "Empty database with valid types?"); + Offset = KnownOffsets[Prev->toArrayIndex()]; + CurrentTI = *Prev; + ++CurrentTI; + Begin = Types.at(Offset); + ++Begin; + Offset = Begin.offset(); + } + + auto End = Types.end(); + while (Begin != End) { + if (auto EC = visitOneRecord(CurrentTI, Offset, *Begin)) + return EC; + + Offset += Begin.getRecordLength(); + ++Begin; + ++CurrentTI; + } + if (CurrentTI <= TI) { + return make_error("Type Index does not exist!"); + } + return Error::success(); +} + +Error LazyRandomTypeCollection::visitRange(TypeIndex Begin, + uint32_t BeginOffset, + TypeIndex End) { + + auto RI = Types.at(BeginOffset); + assert(RI != Types.end()); + + while (Begin != End) { + if (auto EC = visitOneRecord(Begin, BeginOffset, *RI)) + return EC; + + BeginOffset += RI.getRecordLength(); + ++Begin; + ++RI; + } + + return Error::success(); +} + +Error LazyRandomTypeCollection::visitOneRecord(TypeIndex TI, uint32_t Offset, + CVType &Record) { + assert(!Database.contains(TI)); + if (auto EC = codeview::visitTypeRecord(Record, TI, DatabaseVisitor)) + return EC; + // Keep the KnownOffsets array the same size as the Database's capacity. Since + // we don't always know how many records are in the type stream, we need to be + // prepared for the database growing and receicing a type index that can't fit + // in our current buffer. + if (KnownOffsets.size() < Database.capacity()) + KnownOffsets.resize(Database.capacity()); + KnownOffsets[TI.toArrayIndex()] = Offset; + return Error::success(); +} diff --git a/lib/DebugInfo/CodeView/ModuleSubstream.cpp b/lib/DebugInfo/CodeView/ModuleSubstream.cpp deleted file mode 100644 index 69a7c59116cff54426853bdc1104025cb5b4cc62..0000000000000000000000000000000000000000 --- a/lib/DebugInfo/CodeView/ModuleSubstream.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//===- ModuleSubstream.cpp --------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/ModuleSubstream.h" - -#include "llvm/Support/BinaryStreamReader.h" - -using namespace llvm; -using namespace llvm::codeview; - -ModuleSubstream::ModuleSubstream() : Kind(ModuleSubstreamKind::None) {} - -ModuleSubstream::ModuleSubstream(ModuleSubstreamKind Kind, BinaryStreamRef Data) - : Kind(Kind), Data(Data) {} - -Error ModuleSubstream::initialize(BinaryStreamRef Stream, - ModuleSubstream &Info) { - const ModuleSubsectionHeader *Header; - BinaryStreamReader Reader(Stream); - if (auto EC = Reader.readObject(Header)) - return EC; - - ModuleSubstreamKind Kind = - static_cast(uint32_t(Header->Kind)); - if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) - return EC; - Info.Kind = Kind; - return Error::success(); -} - -uint32_t ModuleSubstream::getRecordLength() const { - return sizeof(ModuleSubsectionHeader) + Data.getLength(); -} - -ModuleSubstreamKind ModuleSubstream::getSubstreamKind() const { return Kind; } - -BinaryStreamRef ModuleSubstream::getRecordData() const { return Data; } diff --git a/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp b/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp deleted file mode 100644 index e490a78cadbc6a920431a60bc8ef50d6d5666fdf..0000000000000000000000000000000000000000 --- a/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp +++ /dev/null @@ -1,106 +0,0 @@ -//===- ModuleSubstreamVisitor.cpp -------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/BinaryStreamRef.h" - -using namespace llvm; -using namespace llvm::codeview; - -Error IModuleSubstreamVisitor::visitSymbols(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::Symbols, Data); -} -Error IModuleSubstreamVisitor::visitLines(BinaryStreamRef Data, - const LineSubstreamHeader *Header, - const LineInfoArray &Lines) { - return visitUnknown(ModuleSubstreamKind::Lines, Data); -} -Error IModuleSubstreamVisitor::visitStringTable(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::StringTable, Data); -} -Error IModuleSubstreamVisitor::visitFileChecksums( - BinaryStreamRef Data, const FileChecksumArray &Checksums) { - return visitUnknown(ModuleSubstreamKind::FileChecksums, Data); -} -Error IModuleSubstreamVisitor::visitFrameData(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::FrameData, Data); -} -Error IModuleSubstreamVisitor::visitInlineeLines(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::InlineeLines, Data); -} -Error IModuleSubstreamVisitor::visitCrossScopeImports(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::CrossScopeExports, Data); -} -Error IModuleSubstreamVisitor::visitCrossScopeExports(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::CrossScopeImports, Data); -} -Error IModuleSubstreamVisitor::visitILLines(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::ILLines, Data); -} -Error IModuleSubstreamVisitor::visitFuncMDTokenMap(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::FuncMDTokenMap, Data); -} -Error IModuleSubstreamVisitor::visitTypeMDTokenMap(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::TypeMDTokenMap, Data); -} -Error IModuleSubstreamVisitor::visitMergedAssemblyInput(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::MergedAssemblyInput, Data); -} -Error IModuleSubstreamVisitor::visitCoffSymbolRVA(BinaryStreamRef Data) { - return visitUnknown(ModuleSubstreamKind::CoffSymbolRVA, Data); -} - -Error llvm::codeview::visitModuleSubstream(const ModuleSubstream &R, - IModuleSubstreamVisitor &V) { - switch (R.getSubstreamKind()) { - case ModuleSubstreamKind::Symbols: - return V.visitSymbols(R.getRecordData()); - case ModuleSubstreamKind::Lines: { - BinaryStreamReader Reader(R.getRecordData()); - const LineSubstreamHeader *Header; - if (auto EC = Reader.readObject(Header)) - return EC; - VarStreamArrayExtractor E(Header); - LineInfoArray LineInfos(E); - if (auto EC = Reader.readArray(LineInfos, Reader.bytesRemaining())) - return EC; - return V.visitLines(R.getRecordData(), Header, LineInfos); - } - case ModuleSubstreamKind::StringTable: - return V.visitStringTable(R.getRecordData()); - case ModuleSubstreamKind::FileChecksums: { - BinaryStreamReader Reader(R.getRecordData()); - FileChecksumArray Checksums; - if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining())) - return EC; - return V.visitFileChecksums(R.getRecordData(), Checksums); - } - case ModuleSubstreamKind::FrameData: - return V.visitFrameData(R.getRecordData()); - case ModuleSubstreamKind::InlineeLines: - return V.visitInlineeLines(R.getRecordData()); - case ModuleSubstreamKind::CrossScopeImports: - return V.visitCrossScopeImports(R.getRecordData()); - case ModuleSubstreamKind::CrossScopeExports: - return V.visitCrossScopeExports(R.getRecordData()); - case ModuleSubstreamKind::ILLines: - return V.visitILLines(R.getRecordData()); - case ModuleSubstreamKind::FuncMDTokenMap: - return V.visitFuncMDTokenMap(R.getRecordData()); - case ModuleSubstreamKind::TypeMDTokenMap: - return V.visitTypeMDTokenMap(R.getRecordData()); - case ModuleSubstreamKind::MergedAssemblyInput: - return V.visitMergedAssemblyInput(R.getRecordData()); - case ModuleSubstreamKind::CoffSymbolRVA: - return V.visitCoffSymbolRVA(R.getRecordData()); - default: - return V.visitUnknown(R.getSubstreamKind(), R.getRecordData()); - } -} diff --git a/lib/DebugInfo/CodeView/StringsAndChecksums.cpp b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp new file mode 100644 index 0000000000000000000000000000000000000000..928bf8c94f735b16c71e526ec124da8bd118d681 --- /dev/null +++ b/lib/DebugInfo/CodeView/StringsAndChecksums.cpp @@ -0,0 +1,55 @@ +//===- StringsAndChecksums.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" + +using namespace llvm; +using namespace llvm::codeview; + +StringsAndChecksumsRef::StringsAndChecksumsRef() {} + +StringsAndChecksumsRef::StringsAndChecksumsRef( + const DebugStringTableSubsectionRef &Strings) + : Strings(&Strings) {} + +StringsAndChecksumsRef::StringsAndChecksumsRef( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums) + : Strings(&Strings), Checksums(&Checksums) {} + +void StringsAndChecksumsRef::initializeStrings( + const DebugSubsectionRecord &SR) { + assert(SR.kind() == DebugSubsectionKind::StringTable); + assert(!Strings && "Found a string table even though we already have one!"); + + OwnedStrings = llvm::make_unique(); + consumeError(OwnedStrings->initialize(SR.getRecordData())); + Strings = OwnedStrings.get(); +} + +void StringsAndChecksumsRef::setChecksums( + const DebugChecksumsSubsectionRef &CS) { + OwnedChecksums = llvm::make_unique(); + *OwnedChecksums = CS; + Checksums = OwnedChecksums.get(); +} + +void StringsAndChecksumsRef::initializeChecksums( + const DebugSubsectionRecord &FCR) { + assert(FCR.kind() == DebugSubsectionKind::FileChecksums); + if (Checksums) + return; + + OwnedChecksums = llvm::make_unique(); + consumeError(OwnedChecksums->initialize(FCR.getRecordData())); + Checksums = OwnedChecksums.get(); +} diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 134471e81cacd0a275a16752e89a9d10558d9032..36abafc079edbc7b0ca24f49f34bd18b1256f5df 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -11,7 +11,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" @@ -32,16 +32,16 @@ namespace { /// the visitor out of SymbolDumper.h. class CVSymbolDumperImpl : public SymbolVisitorCallbacks { public: - CVSymbolDumperImpl(TypeDatabase &TypeDB, SymbolDumpDelegate *ObjDelegate, + CVSymbolDumperImpl(TypeCollection &Types, SymbolDumpDelegate *ObjDelegate, ScopedPrinter &W, bool PrintRecordBytes) - : TypeDB(TypeDB), ObjDelegate(ObjDelegate), W(W), + : Types(Types), ObjDelegate(ObjDelegate), W(W), PrintRecordBytes(PrintRecordBytes), InFunctionScope(false) {} /// CVSymbolVisitor overrides. #define SYMBOL_RECORD(EnumName, EnumVal, Name) \ Error visitKnownRecord(CVSymbol &CVR, Name &Record) override; #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def" +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" Error visitSymbolBegin(CVSymbol &Record) override; Error visitSymbolEnd(CVSymbol &Record) override; @@ -53,7 +53,7 @@ private: void printLocalVariableAddrGap(ArrayRef Gaps); void printTypeIndex(StringRef FieldName, TypeIndex TI); - TypeDatabase &TypeDB; + TypeCollection &Types; SymbolDumpDelegate *ObjDelegate; ScopedPrinter &W; @@ -82,7 +82,7 @@ void CVSymbolDumperImpl::printLocalVariableAddrGap( } void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) { - CVTypeDumper::printTypeIndex(W, FieldName, TI, TypeDB); + codeview::printTypeIndex(W, FieldName, TI, Types); } Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) { @@ -212,7 +212,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FileStaticSym &FileStatic) { DictScope S(W, "FileStatic"); - W.printNumber("Index", FileStatic.Index); + printTypeIndex("Index", FileStatic.Index); W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset); W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames()); W.printString("Name", FileStatic.Name); @@ -369,14 +369,14 @@ Error CVSymbolDumperImpl::visitKnownRecord( DictScope S(W, "DefRangeSubfield"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRangeSubfield.Program; - if (ProgramStringTableOffset >= StringTable.size()) + DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent); printLocalVariableAddrRange(DefRangeSubfield.Range, @@ -390,14 +390,14 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DictScope S(W, "DefRange"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRange.Program; - if (ProgramStringTableOffset >= StringTable.size()) + DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRange.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset()); printLocalVariableAddrGap(DefRange.Gaps); @@ -516,7 +516,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegisterSym &Register) { DictScope S(W, "RegisterSym"); - W.printNumber("Type", Register.Index); + printTypeIndex("Type", Register.Index); W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames()); W.printString("Name", Register.Name); return Error::success(); @@ -524,7 +524,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { DictScope S(W, "PublicSym"); - W.printNumber("Type", Public.Index); + printTypeIndex("Type", Public.Index); W.printNumber("Seg", Public.Segment); W.printNumber("Off", Public.Offset); W.printString("Name", Public.Name); @@ -631,7 +631,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, W.printHex("Offset", RegRel.Offset); printTypeIndex("Type", RegRel.Type); - W.printHex("Register", RegRel.Register); + W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames()); W.printString("VarName", RegRel.Name); return Error::success(); } @@ -668,8 +668,8 @@ Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) { Error CVSymbolDumper::dump(CVRecord &Record) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); - CVSymbolDumperImpl Dumper(TypeDB, ObjDelegate.get(), W, PrintRecordBytes); + SymbolDeserializer Deserializer(ObjDelegate.get(), Container); + CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); Pipeline.addCallbackToPipeline(Dumper); @@ -679,8 +679,8 @@ Error CVSymbolDumper::dump(CVRecord &Record) { Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); - CVSymbolDumperImpl Dumper(TypeDB, ObjDelegate.get(), W, PrintRecordBytes); + SymbolDeserializer Deserializer(ObjDelegate.get(), Container); + CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); Pipeline.addCallbackToPipeline(Dumper); diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp index bb17314654951f5d20f210302624755a1ca4f4c7..d731dc1b0a37230ed490d0fd0fff0c25afd37ecc 100644 --- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -40,6 +40,7 @@ Error SymbolRecordMapping::visitSymbolBegin(CVSymbol &Record) { } Error SymbolRecordMapping::visitSymbolEnd(CVSymbol &Record) { + error(IO.padToAlignment(alignOf(Container))); error(IO.endRecord()); return Error::success(); } @@ -306,7 +307,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, error(IO.mapInteger(FrameCookie.CodeOffset)); error(IO.mapInteger(FrameCookie.Register)); - error(IO.mapInteger(FrameCookie.CookieKind)); + error(IO.mapEnum(FrameCookie.CookieKind)); error(IO.mapInteger(FrameCookie.Flags)); return Error::success(); @@ -438,7 +439,7 @@ Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, error(IO.mapInteger(RegRel.Offset)); error(IO.mapInteger(RegRel.Type)); - error(IO.mapInteger(RegRel.Register)); + error(IO.mapEnum(RegRel.Register)); error(IO.mapStringZ(RegRel.Name)); return Error::success(); diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp index 251cc431f52b373ed2aec7872e2f78847da3cac6..9f2d619d1a1c5230c07705b683437943caf4930b 100644 --- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp +++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp @@ -12,9 +12,11 @@ using namespace llvm; using namespace llvm::codeview; -SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator) - : Storage(Allocator), RecordBuffer(MaxRecordLength), Stream(RecordBuffer, llvm::support::little), - Writer(Stream), Mapping(Writer) { } +SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator, + CodeViewContainer Container) + : Storage(Allocator), RecordBuffer(MaxRecordLength), + Stream(RecordBuffer, llvm::support::little), Writer(Stream), + Mapping(Writer, Container) {} Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) { assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!"); diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp index f9ded6ce2a86a8c24684d53192fb0ae6ddbe3211..08f848b36a9d5aeda4d73e31785c2f792b10407e 100644 --- a/lib/DebugInfo/CodeView/TypeDatabase.cpp +++ b/lib/DebugInfo/CodeView/TypeDatabase.cpp @@ -12,68 +12,36 @@ using namespace llvm; using namespace llvm::codeview; -namespace { -struct SimpleTypeEntry { - StringRef Name; - SimpleTypeKind Kind; -}; -} - -/// The names here all end in "*". If the simple type is a pointer type, we -/// return the whole name. Otherwise we lop off the last character in our -/// StringRef. -static const SimpleTypeEntry SimpleTypeNames[] = { - {"void*", SimpleTypeKind::Void}, - {"*", SimpleTypeKind::NotTranslated}, - {"HRESULT*", SimpleTypeKind::HResult}, - {"signed char*", SimpleTypeKind::SignedCharacter}, - {"unsigned char*", SimpleTypeKind::UnsignedCharacter}, - {"char*", SimpleTypeKind::NarrowCharacter}, - {"wchar_t*", SimpleTypeKind::WideCharacter}, - {"char16_t*", SimpleTypeKind::Character16}, - {"char32_t*", SimpleTypeKind::Character32}, - {"__int8*", SimpleTypeKind::SByte}, - {"unsigned __int8*", SimpleTypeKind::Byte}, - {"short*", SimpleTypeKind::Int16Short}, - {"unsigned short*", SimpleTypeKind::UInt16Short}, - {"__int16*", SimpleTypeKind::Int16}, - {"unsigned __int16*", SimpleTypeKind::UInt16}, - {"long*", SimpleTypeKind::Int32Long}, - {"unsigned long*", SimpleTypeKind::UInt32Long}, - {"int*", SimpleTypeKind::Int32}, - {"unsigned*", SimpleTypeKind::UInt32}, - {"__int64*", SimpleTypeKind::Int64Quad}, - {"unsigned __int64*", SimpleTypeKind::UInt64Quad}, - {"__int64*", SimpleTypeKind::Int64}, - {"unsigned __int64*", SimpleTypeKind::UInt64}, - {"__int128*", SimpleTypeKind::Int128}, - {"unsigned __int128*", SimpleTypeKind::UInt128}, - {"__half*", SimpleTypeKind::Float16}, - {"float*", SimpleTypeKind::Float32}, - {"float*", SimpleTypeKind::Float32PartialPrecision}, - {"__float48*", SimpleTypeKind::Float48}, - {"double*", SimpleTypeKind::Float64}, - {"long double*", SimpleTypeKind::Float80}, - {"__float128*", SimpleTypeKind::Float128}, - {"_Complex float*", SimpleTypeKind::Complex32}, - {"_Complex double*", SimpleTypeKind::Complex64}, - {"_Complex long double*", SimpleTypeKind::Complex80}, - {"_Complex __float128*", SimpleTypeKind::Complex128}, - {"bool*", SimpleTypeKind::Boolean8}, - {"__bool16*", SimpleTypeKind::Boolean16}, - {"__bool32*", SimpleTypeKind::Boolean32}, - {"__bool64*", SimpleTypeKind::Boolean64}, -}; - -/// Gets the type index for the next type record. -TypeIndex TypeDatabase::getNextTypeIndex() const { - return TypeIndex(TypeIndex::FirstNonSimpleIndex + CVUDTNames.size()); -} - -/// Records the name of a type, and reserves its type index. -void TypeDatabase::recordType(StringRef Name, const CVType &Data) { - CVUDTNames.push_back(Name); - TypeRecords.push_back(Data); +TypeDatabase::TypeDatabase(uint32_t Capacity) : TypeNameStorage(Allocator) { + CVUDTNames.resize(Capacity); + TypeRecords.resize(Capacity); + ValidRecords.resize(Capacity); +} + +TypeIndex TypeDatabase::appendType(StringRef Name, const CVType &Data) { + LargestTypeIndex = getAppendIndex(); + if (LargestTypeIndex.toArrayIndex() >= capacity()) + grow(); + recordType(Name, LargestTypeIndex, Data); + return LargestTypeIndex; +} + +void TypeDatabase::recordType(StringRef Name, TypeIndex Index, + const CVType &Data) { + LargestTypeIndex = empty() ? Index : std::max(Index, LargestTypeIndex); + + if (LargestTypeIndex.toArrayIndex() >= capacity()) + grow(Index); + + uint32_t AI = Index.toArrayIndex(); + + assert(!contains(Index)); + assert(AI < capacity()); + + CVUDTNames[AI] = Name; + TypeRecords[AI] = Data; + ValidRecords.set(AI); + ++Count; } /// Saves the name in a StringSet and creates a stable StringRef. @@ -82,37 +50,97 @@ StringRef TypeDatabase::saveTypeName(StringRef TypeName) { } StringRef TypeDatabase::getTypeName(TypeIndex Index) const { - if (Index.isNoneType()) - return ""; - - if (Index.isSimple()) { - // This is a simple type. - for (const auto &SimpleTypeName : SimpleTypeNames) { - if (SimpleTypeName.Kind == Index.getSimpleKind()) { - if (Index.getSimpleMode() == SimpleTypeMode::Direct) - return SimpleTypeName.Name.drop_back(1); - // Otherwise, this is a pointer type. We gloss over the distinction - // between near, far, 64, 32, etc, and just give a pointer type. - return SimpleTypeName.Name; - } - } - return ""; - } - - uint32_t I = Index.getIndex() - TypeIndex::FirstNonSimpleIndex; - if (I < CVUDTNames.size()) - return CVUDTNames[I]; + if (Index.isNoneType() || Index.isSimple()) + return TypeIndex::simpleTypeName(Index); + + if (contains(Index)) + return CVUDTNames[Index.toArrayIndex()]; return ""; } const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const { - return TypeRecords[Index.getIndex() - TypeIndex::FirstNonSimpleIndex]; + assert(contains(Index)); + return TypeRecords[Index.toArrayIndex()]; +} + +CVType &TypeDatabase::getTypeRecord(TypeIndex Index) { + assert(contains(Index)); + return TypeRecords[Index.toArrayIndex()]; +} + +bool TypeDatabase::contains(TypeIndex Index) const { + uint32_t AI = Index.toArrayIndex(); + if (AI >= capacity()) + return false; + + return ValidRecords.test(AI); +} + +uint32_t TypeDatabase::size() const { return Count; } + +uint32_t TypeDatabase::capacity() const { return TypeRecords.size(); } + +CVType TypeDatabase::getType(TypeIndex Index) { return getTypeRecord(Index); } + +StringRef TypeDatabase::getTypeName(TypeIndex Index) { + return static_cast(this)->getTypeName(Index); } -bool TypeDatabase::containsTypeIndex(TypeIndex Index) const { - uint32_t I = Index.getIndex() - TypeIndex::FirstNonSimpleIndex; - return I < CVUDTNames.size(); +bool TypeDatabase::contains(TypeIndex Index) { + return static_cast(this)->contains(Index); } -uint32_t TypeDatabase::size() const { return CVUDTNames.size(); } +uint32_t TypeDatabase::size() { + return static_cast(this)->size(); +} + +uint32_t TypeDatabase::capacity() { + return static_cast(this)->capacity(); +} + +void TypeDatabase::grow() { grow(LargestTypeIndex + 1); } + +void TypeDatabase::grow(TypeIndex NewIndex) { + uint32_t NewSize = NewIndex.toArrayIndex() + 1; + + if (NewSize <= capacity()) + return; + + uint32_t NewCapacity = NewSize * 3 / 2; + + TypeRecords.resize(NewCapacity); + CVUDTNames.resize(NewCapacity); + ValidRecords.resize(NewCapacity); +} + +bool TypeDatabase::empty() const { return size() == 0; } + +Optional TypeDatabase::largestTypeIndexLessThan(TypeIndex TI) const { + uint32_t AI = TI.toArrayIndex(); + int N = ValidRecords.find_prev(AI); + if (N == -1) + return None; + return TypeIndex::fromArrayIndex(N); +} + +TypeIndex TypeDatabase::getAppendIndex() const { + if (empty()) + return TypeIndex::fromArrayIndex(0); + + return LargestTypeIndex + 1; +} + +Optional TypeDatabase::getFirst() { + int N = ValidRecords.find_first(); + if (N == -1) + return None; + return TypeIndex::fromArrayIndex(N); +} + +Optional TypeDatabase::getNext(TypeIndex Prev) { + int N = ValidRecords.find_next(Prev.toArrayIndex()); + if (N == -1) + return None; + return TypeIndex::fromArrayIndex(N); +} diff --git a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp b/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp index c234afd2288bdfa862033d6bf7585874f940f0ef..8d97f8b1cb401f4d378e04684b648e8e04920756 100644 --- a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp +++ b/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp @@ -15,7 +15,7 @@ using namespace llvm; using namespace llvm::codeview; -Error TypeDatabaseVisitor::visitTypeBegin(CVRecord &Record) { +Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record) { assert(!IsInFieldList); // Reset Name to the empty string. If the visitor sets it, we know it. Name = ""; @@ -28,6 +28,22 @@ Error TypeDatabaseVisitor::visitTypeBegin(CVRecord &Record) { return Error::success(); } +Error TypeDatabaseVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { + if (auto EC = visitTypeBegin(Record)) + return EC; + + CurrentTypeIndex = Index; + return Error::success(); +} + +StringRef TypeDatabaseVisitor::getTypeName(TypeIndex Index) const { + return TypeDB->getTypeName(Index); +} + +StringRef TypeDatabaseVisitor::saveTypeName(StringRef Name) { + return TypeDB->saveTypeName(Name); +} + Error TypeDatabaseVisitor::visitTypeEnd(CVType &CVR) { if (CVR.Type == LF_FIELDLIST) { assert(IsInFieldList); @@ -39,7 +55,12 @@ Error TypeDatabaseVisitor::visitTypeEnd(CVType &CVR) { // CVUDTNames is indexed by type index, and must have one entry for every // type. Field list members are not recorded, and are only referenced by // their containing field list record. - TypeDB.recordType(Name, CVR); + if (CurrentTypeIndex) + TypeDB->recordType(Name, *CurrentTypeIndex, CVR); + else + TypeDB->appendType(Name, CVR); + + CurrentTypeIndex.reset(); return Error::success(); } @@ -73,13 +94,13 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { uint32_t Size = Indices.size(); SmallString<256> TypeName("("); for (uint32_t I = 0; I < Size; ++I) { - StringRef ArgTypeName = TypeDB.getTypeName(Indices[I]); + StringRef ArgTypeName = getTypeName(Indices[I]); TypeName.append(ArgTypeName); if (I + 1 != Size) TypeName.append(", "); } TypeName.push_back(')'); - Name = TypeDB.saveTypeName(TypeName); + Name = saveTypeName(TypeName); return Error::success(); } @@ -89,13 +110,13 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, uint32_t Size = Indices.size(); SmallString<256> TypeName("\""); for (uint32_t I = 0; I < Size; ++I) { - StringRef ArgTypeName = TypeDB.getTypeName(Indices[I]); + StringRef ArgTypeName = getTypeName(Indices[I]); TypeName.append(ArgTypeName); if (I + 1 != Size) TypeName.append("\" \""); } TypeName.push_back('\"'); - Name = TypeDB.saveTypeName(TypeName); + Name = saveTypeName(TypeName); return Error::success(); } @@ -132,26 +153,26 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ProcedureRecord &Proc) { - StringRef ReturnTypeName = TypeDB.getTypeName(Proc.getReturnType()); - StringRef ArgListTypeName = TypeDB.getTypeName(Proc.getArgumentList()); + StringRef ReturnTypeName = getTypeName(Proc.getReturnType()); + StringRef ArgListTypeName = getTypeName(Proc.getArgumentList()); SmallString<256> TypeName(ReturnTypeName); TypeName.push_back(' '); TypeName.append(ArgListTypeName); - Name = TypeDB.saveTypeName(TypeName); + Name = saveTypeName(TypeName); return Error::success(); } Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, MemberFunctionRecord &MF) { - StringRef ReturnTypeName = TypeDB.getTypeName(MF.getReturnType()); - StringRef ClassTypeName = TypeDB.getTypeName(MF.getClassType()); - StringRef ArgListTypeName = TypeDB.getTypeName(MF.getArgumentList()); + StringRef ReturnTypeName = getTypeName(MF.getReturnType()); + StringRef ClassTypeName = getTypeName(MF.getClassType()); + StringRef ArgListTypeName = getTypeName(MF.getArgumentList()); SmallString<256> TypeName(ReturnTypeName); TypeName.push_back(' '); TypeName.append(ClassTypeName); TypeName.append("::"); TypeName.append(ArgListTypeName); - Name = TypeDB.saveTypeName(TypeName); + Name = saveTypeName(TypeName); return Error::success(); } @@ -171,13 +192,13 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { if (Ptr.isPointerToMember()) { const MemberPointerInfo &MI = Ptr.getMemberInfo(); - StringRef PointeeName = TypeDB.getTypeName(Ptr.getReferentType()); - StringRef ClassName = TypeDB.getTypeName(MI.getContainingType()); + StringRef PointeeName = getTypeName(Ptr.getReferentType()); + StringRef ClassName = getTypeName(MI.getContainingType()); SmallString<256> TypeName(PointeeName); TypeName.push_back(' '); TypeName.append(ClassName); TypeName.append("::*"); - Name = TypeDB.saveTypeName(TypeName); + Name = saveTypeName(TypeName); } else { SmallString<256> TypeName; if (Ptr.isConst()) @@ -187,7 +208,7 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { if (Ptr.isUnaligned()) TypeName.append("__unaligned "); - TypeName.append(TypeDB.getTypeName(Ptr.getReferentType())); + TypeName.append(getTypeName(Ptr.getReferentType())); if (Ptr.getMode() == PointerMode::LValueReference) TypeName.append("&"); @@ -197,7 +218,7 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { TypeName.append("*"); if (!TypeName.empty()) - Name = TypeDB.saveTypeName(TypeName); + Name = saveTypeName(TypeName); } return Error::success(); } @@ -205,7 +226,7 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { uint16_t Mods = static_cast(Mod.getModifiers()); - StringRef ModifiedName = TypeDB.getTypeName(Mod.getModifiedType()); + StringRef ModifiedName = getTypeName(Mod.getModifiedType()); SmallString<256> TypeName; if (Mods & uint16_t(ModifierOptions::Const)) TypeName.append("const "); @@ -214,14 +235,14 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { if (Mods & uint16_t(ModifierOptions::Unaligned)) TypeName.append("__unaligned "); TypeName.append(ModifiedName); - Name = TypeDB.saveTypeName(TypeName); + Name = saveTypeName(TypeName); return Error::success(); } Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, VFTableShapeRecord &Shape) { - Name = TypeDB.saveTypeName(""); + Name = + saveTypeName(""); return Error::success(); } diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp index 870d95221e7d0f0549d7dc6f144cc531c3f64f50..58996670501510ca34d1d769ab69d8ea76806160 100644 --- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp +++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp @@ -10,15 +10,11 @@ #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" #include "llvm/ADT/SmallString.h" -#include "llvm/DebugInfo/CodeView/CVTypeDumper.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/Formatters.h" -#include "llvm/DebugInfo/CodeView/TypeDatabase.h" -#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ScopedPrinter.h" @@ -28,7 +24,7 @@ using namespace llvm::codeview; static const EnumEntry LeafTypeNames[] = { #define CV_TYPE(enum, val) {#enum, enum}, -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" }; #define ENUM_ENTRY(enum_class, enum) \ @@ -157,7 +153,7 @@ static StringRef getLeafTypeName(TypeLeafKind LT) { #define TYPE_RECORD(ename, value, name) \ case ename: \ return #name; -#include "llvm/DebugInfo/CodeView/TypeRecords.def" +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" default: break; } @@ -165,18 +161,20 @@ static StringRef getLeafTypeName(TypeLeafKind LT) { } void TypeDumpVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI) const { - CVTypeDumper::printTypeIndex(*W, FieldName, TI, TypeDB); + codeview::printTypeIndex(*W, FieldName, TI, TpiTypes); } void TypeDumpVisitor::printItemIndex(StringRef FieldName, TypeIndex TI) const { - CVTypeDumper::printTypeIndex(*W, FieldName, TI, getSourceDB()); + codeview::printTypeIndex(*W, FieldName, TI, getSourceTypes()); } Error TypeDumpVisitor::visitTypeBegin(CVType &Record) { + return visitTypeBegin(Record, TypeIndex::fromArrayIndex(TpiTypes.size())); +} + +Error TypeDumpVisitor::visitTypeBegin(CVType &Record, TypeIndex Index) { W->startLine() << getLeafTypeName(Record.Type); - W->getOStream() << " (" - << HexNumber(getSourceDB().getNextTypeIndex().getIndex()) - << ")"; + W->getOStream() << " (" << HexNumber(Index.getIndex()) << ")"; W->getOStream() << " {\n"; W->indent(); W->printEnum("TypeLeafKind", unsigned(Record.Type), @@ -213,8 +211,7 @@ Error TypeDumpVisitor::visitMemberEnd(CVMemberRecord &Record) { Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, FieldListRecord &FieldList) { - CVTypeVisitor Visitor(*this); - if (auto EC = Visitor.visitFieldListMemberStream(FieldList.Data)) + if (auto EC = codeview::visitMemberRecordStream(FieldList.Data, *this)) return EC; return Error::success(); @@ -243,7 +240,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, StringListRecord &Strs) { W->printNumber("NumStrings", Size); ListScope Arguments(*W, "Strings"); for (uint32_t I = 0; I < Size; ++I) { - printTypeIndex("String", Indices[I]); + printItemIndex("String", Indices[I]); } return Error::success(); } diff --git a/lib/DebugInfo/CodeView/TypeIndex.cpp b/lib/DebugInfo/CodeView/TypeIndex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..24fe5fcb28d4f96efa743b837883cd653f407b48 --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeIndex.cpp @@ -0,0 +1,104 @@ +//===-- TypeIndex.cpp - CodeView type index ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +#include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/Support/ScopedPrinter.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +struct SimpleTypeEntry { + StringRef Name; + SimpleTypeKind Kind; +}; + +/// The names here all end in "*". If the simple type is a pointer type, we +/// return the whole name. Otherwise we lop off the last character in our +/// StringRef. +static const SimpleTypeEntry SimpleTypeNames[] = { + {"void*", SimpleTypeKind::Void}, + {"*", SimpleTypeKind::NotTranslated}, + {"HRESULT*", SimpleTypeKind::HResult}, + {"signed char*", SimpleTypeKind::SignedCharacter}, + {"unsigned char*", SimpleTypeKind::UnsignedCharacter}, + {"char*", SimpleTypeKind::NarrowCharacter}, + {"wchar_t*", SimpleTypeKind::WideCharacter}, + {"char16_t*", SimpleTypeKind::Character16}, + {"char32_t*", SimpleTypeKind::Character32}, + {"__int8*", SimpleTypeKind::SByte}, + {"unsigned __int8*", SimpleTypeKind::Byte}, + {"short*", SimpleTypeKind::Int16Short}, + {"unsigned short*", SimpleTypeKind::UInt16Short}, + {"__int16*", SimpleTypeKind::Int16}, + {"unsigned __int16*", SimpleTypeKind::UInt16}, + {"long*", SimpleTypeKind::Int32Long}, + {"unsigned long*", SimpleTypeKind::UInt32Long}, + {"int*", SimpleTypeKind::Int32}, + {"unsigned*", SimpleTypeKind::UInt32}, + {"__int64*", SimpleTypeKind::Int64Quad}, + {"unsigned __int64*", SimpleTypeKind::UInt64Quad}, + {"__int64*", SimpleTypeKind::Int64}, + {"unsigned __int64*", SimpleTypeKind::UInt64}, + {"__int128*", SimpleTypeKind::Int128}, + {"unsigned __int128*", SimpleTypeKind::UInt128}, + {"__half*", SimpleTypeKind::Float16}, + {"float*", SimpleTypeKind::Float32}, + {"float*", SimpleTypeKind::Float32PartialPrecision}, + {"__float48*", SimpleTypeKind::Float48}, + {"double*", SimpleTypeKind::Float64}, + {"long double*", SimpleTypeKind::Float80}, + {"__float128*", SimpleTypeKind::Float128}, + {"_Complex float*", SimpleTypeKind::Complex32}, + {"_Complex double*", SimpleTypeKind::Complex64}, + {"_Complex long double*", SimpleTypeKind::Complex80}, + {"_Complex __float128*", SimpleTypeKind::Complex128}, + {"bool*", SimpleTypeKind::Boolean8}, + {"__bool16*", SimpleTypeKind::Boolean16}, + {"__bool32*", SimpleTypeKind::Boolean32}, + {"__bool64*", SimpleTypeKind::Boolean64}, +}; +} // namespace + +StringRef TypeIndex::simpleTypeName(TypeIndex TI) { + assert(TI.isNoneType() || TI.isSimple()); + + if (TI.isNoneType()) + return ""; + + // This is a simple type. + for (const auto &SimpleTypeName : SimpleTypeNames) { + if (SimpleTypeName.Kind == TI.getSimpleKind()) { + if (TI.getSimpleMode() == SimpleTypeMode::Direct) + return SimpleTypeName.Name.drop_back(1); + // Otherwise, this is a pointer type. We gloss over the distinction + // between near, far, 64, 32, etc, and just give a pointer type. + return SimpleTypeName.Name; + } + } + return ""; +} + +void llvm::codeview::printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, + TypeIndex TI, TypeCollection &Types) { + StringRef TypeName; + if (!TI.isNoneType()) { + if (TI.isSimple()) + TypeName = TypeIndex::simpleTypeName(TI); + else + TypeName = Types.getTypeName(TI); + } + + if (!TypeName.empty()) + Printer.printHex(FieldName, TypeName, TI.getIndex()); + else + Printer.printHex(FieldName, TI.getIndex()); +} diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8704cea607867cc956fab719057729c494f5f6bd --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -0,0 +1,371 @@ +//===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::codeview; + +static inline MethodKind getMethodKind(uint16_t Attrs) { + Attrs &= uint16_t(MethodOptions::MethodKindMask); + Attrs >>= 2; + return MethodKind(Attrs); +} + +static inline bool isIntroVirtual(uint16_t Attrs) { + MethodKind MK = getMethodKind(Attrs); + return MK == MethodKind::IntroducingVirtual || + MK == MethodKind::PureIntroducingVirtual; +} + +static inline PointerMode getPointerMode(uint32_t Attrs) { + return static_cast((Attrs >> PointerRecord::PointerModeShift) & + PointerRecord::PointerModeMask); +} + +static inline bool isMemberPointer(uint32_t Attrs) { + PointerMode Mode = getPointerMode(Attrs); + return Mode == PointerMode::PointerToDataMember || + Mode == PointerMode::PointerToMemberFunction; +} + +static inline uint32_t getEncodedIntegerLength(ArrayRef Data) { + uint16_t N = support::endian::read16le(Data.data()); + if (N < LF_NUMERIC) + return 2; + + assert(N <= LF_UQUADWORD); + + constexpr uint32_t Sizes[] = { + 1, // LF_CHAR + 2, // LF_SHORT + 2, // LF_USHORT + 4, // LF_LONG + 4, // LF_ULONG + 4, // LF_REAL32 + 8, // LF_REAL64 + 10, // LF_REAL80 + 16, // LF_REAL128 + 8, // LF_QUADWORD + 8, // LF_UQUADWORD + }; + + return Sizes[N - LF_NUMERIC]; +} + +static inline uint32_t getCStringLength(ArrayRef Data) { + const char *S = reinterpret_cast(Data.data()); + return strlen(S) + 1; +} + +static void handleMethodOverloadList(ArrayRef Content, + SmallVectorImpl &Refs) { + uint32_t Offset = 0; + + while (!Content.empty()) { + // Array of: + // 0: Attrs + // 2: Padding + // 4: TypeIndex + // if (isIntroVirtual()) + // 8: VFTableOffset + + // At least 8 bytes are guaranteed. 4 extra bytes come iff function is an + // intro virtual. + uint32_t Len = 8; + + uint16_t Attrs = support::endian::read16le(Content.data()); + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + + if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) + Len += 4; + Offset += Len; + Content = Content.drop_front(Len); + } +} + +static uint32_t handleBaseClass(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Encoded Integer + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getEncodedIntegerLength(Data.drop_front(8)); +} + +static uint32_t handleEnumerator(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: Encoded Integer + // : Name + uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4)); + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleDataMember(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Encoded Integer + // : Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8)); + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleOverloadedMethod(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleOneMethod(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Attributes + // 4: Type + // if (isIntroVirtual) + // 8: VFTableOffset + // : Name + uint32_t Size = 8; + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + + uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data()); + if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) + Size += 4; + + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleNestedType(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleStaticDataMember(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleVirtualBaseClass(ArrayRef Data, uint32_t Offset, + bool IsIndirect, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Attrs + // 4: TypeIndex + // 8: TypeIndex + // 12: Encoded Integer + // : Encoded Integer + uint32_t Size = 12; + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2}); + Size += getEncodedIntegerLength(Data.drop_front(Size)); + Size += getEncodedIntegerLength(Data.drop_front(Size)); + return Size; +} + +static uint32_t handleVFPtr(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8; +} + +static uint32_t handleListContinuation(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8; +} + +static void handleFieldList(ArrayRef Content, + SmallVectorImpl &Refs) { + uint32_t Offset = 0; + uint32_t ThisLen = 0; + while (!Content.empty()) { + TypeLeafKind Kind = + static_cast(support::endian::read16le(Content.data())); + switch (Kind) { + case LF_BCLASS: + ThisLen = handleBaseClass(Content, Offset, Refs); + break; + case LF_ENUMERATE: + ThisLen = handleEnumerator(Content, Offset, Refs); + break; + case LF_MEMBER: + ThisLen = handleDataMember(Content, Offset, Refs); + break; + case LF_METHOD: + ThisLen = handleOverloadedMethod(Content, Offset, Refs); + break; + case LF_ONEMETHOD: + ThisLen = handleOneMethod(Content, Offset, Refs); + break; + case LF_NESTTYPE: + ThisLen = handleNestedType(Content, Offset, Refs); + break; + case LF_STMEMBER: + ThisLen = handleStaticDataMember(Content, Offset, Refs); + break; + case LF_VBCLASS: + case LF_IVBCLASS: + ThisLen = + handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs); + break; + case LF_VFUNCTAB: + ThisLen = handleVFPtr(Content, Offset, Refs); + break; + case LF_INDEX: + ThisLen = handleListContinuation(Content, Offset, Refs); + break; + default: + return; + } + Content = Content.drop_front(ThisLen); + Offset += ThisLen; + if (!Content.empty()) { + uint8_t Pad = Content.front(); + if (Pad >= LF_PAD0) { + uint32_t Skip = Pad & 0x0F; + Content = Content.drop_front(Skip); + Offset += Skip; + } + } + } +} + +static void handlePointer(ArrayRef Content, + SmallVectorImpl &Refs) { + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + + uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data()); + if (isMemberPointer(Attrs)) + Refs.push_back({TiRefKind::TypeRef, 8, 1}); +} + +static void discoverTypeIndices(ArrayRef Content, TypeLeafKind Kind, + SmallVectorImpl &Refs) { + uint32_t Count; + // FIXME: In the future it would be nice if we could avoid hardcoding these + // values. One idea is to define some structures representing these types + // that would allow the use of offsetof(). + switch (Kind) { + case TypeLeafKind::LF_FUNC_ID: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); + Refs.push_back({TiRefKind::TypeRef, 4, 1}); + break; + case TypeLeafKind::LF_MFUNC_ID: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_STRING_ID: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); + break; + case TypeLeafKind::LF_SUBSTR_LIST: + Count = support::endian::read32le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::IndexRef, 4, Count}); + break; + case TypeLeafKind::LF_BUILDINFO: + Count = support::endian::read16le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::IndexRef, 2, Count}); + break; + case TypeLeafKind::LF_UDT_SRC_LINE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + Refs.push_back({TiRefKind::IndexRef, 4, 1}); + break; + case TypeLeafKind::LF_UDT_MOD_SRC_LINE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_MODIFIER: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_PROCEDURE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + Refs.push_back({TiRefKind::TypeRef, 8, 1}); + break; + case TypeLeafKind::LF_MFUNCTION: + Refs.push_back({TiRefKind::TypeRef, 0, 3}); + Refs.push_back({TiRefKind::TypeRef, 16, 1}); + break; + case TypeLeafKind::LF_ARGLIST: + Count = support::endian::read32le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::TypeRef, 4, Count}); + break; + case TypeLeafKind::LF_ARRAY: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_CLASS: + case TypeLeafKind::LF_STRUCTURE: + case TypeLeafKind::LF_INTERFACE: + Refs.push_back({TiRefKind::TypeRef, 4, 3}); + break; + case TypeLeafKind::LF_UNION: + Refs.push_back({TiRefKind::TypeRef, 4, 1}); + break; + case TypeLeafKind::LF_ENUM: + Refs.push_back({TiRefKind::TypeRef, 4, 2}); + break; + case TypeLeafKind::LF_BITFIELD: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_VFTABLE: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_VTSHAPE: + break; + case TypeLeafKind::LF_METHODLIST: + handleMethodOverloadList(Content, Refs); + break; + case TypeLeafKind::LF_FIELDLIST: + handleFieldList(Content, Refs); + break; + case TypeLeafKind::LF_POINTER: + handlePointer(Content, Refs); + break; + default: + break; + } +} + +void llvm::codeview::discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Refs) { + ::discoverTypeIndices(Type.content(), Type.kind(), Refs); +} + +void llvm::codeview::discoverTypeIndices(ArrayRef RecordData, + SmallVectorImpl &Refs) { + const RecordPrefix *P = + reinterpret_cast(RecordData.data()); + TypeLeafKind K = static_cast(uint16_t(P->RecordKind)); + ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs); +} diff --git a/lib/DebugInfo/CodeView/TypeName.cpp b/lib/DebugInfo/CodeView/TypeName.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2eb8b81862f3c4a8a5dceaf327bbf67d117e0b88 --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeName.cpp @@ -0,0 +1,243 @@ +//===- TypeName.cpp ------------------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeName.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +class TypeNameComputer : public TypeVisitorCallbacks { + /// The type collection. Used to calculate names of nested types. + TypeCollection &Types; + TypeIndex CurrentTypeIndex = TypeIndex::None(); + + /// Name of the current type. Only valid before visitTypeEnd. + SmallString<256> Name; + +public: + explicit TypeNameComputer(TypeCollection &Types) : Types(Types) {} + + StringRef name() const { return Name; } + + /// Paired begin/end actions for all types. Receives all record data, + /// including the fixed-length record prefix. + Error visitTypeBegin(CVType &Record) override; + Error visitTypeBegin(CVType &Record, TypeIndex Index) override; + Error visitTypeEnd(CVType &Record) override; + +#define TYPE_RECORD(EnumName, EnumVal, Name) \ + Error visitKnownRecord(CVType &CVR, Name##Record &Record) override; +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#define MEMBER_RECORD(EnumName, EnumVal, Name) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +}; +} // namespace + +Error TypeNameComputer::visitTypeBegin(CVType &Record) { + llvm_unreachable("Must call visitTypeBegin with a TypeIndex!"); + return Error::success(); +} + +Error TypeNameComputer::visitTypeBegin(CVType &Record, TypeIndex Index) { + // Reset Name to the empty string. If the visitor sets it, we know it. + Name = ""; + CurrentTypeIndex = Index; + return Error::success(); +} + +Error TypeNameComputer::visitTypeEnd(CVType &CVR) { return Error::success(); } + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + FieldListRecord &FieldList) { + Name = ""; + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVRecord &CVR, + StringIdRecord &String) { + Name = String.getString(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArgListRecord &Args) { + auto Indices = Args.getIndices(); + uint32_t Size = Indices.size(); + Name = "("; + for (uint32_t I = 0; I < Size; ++I) { + assert(Indices[I] < CurrentTypeIndex); + + Name.append(Types.getTypeName(Indices[I])); + if (I + 1 != Size) + Name.append(", "); + } + Name.push_back(')'); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + StringListRecord &Strings) { + auto Indices = Strings.getIndices(); + uint32_t Size = Indices.size(); + Name = "\""; + for (uint32_t I = 0; I < Size; ++I) { + Name.append(Types.getTypeName(Indices[I])); + if (I + 1 != Size) + Name.append("\" \""); + } + Name.push_back('\"'); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ClassRecord &Class) { + Name = Class.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, UnionRecord &Union) { + Name = Union.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, EnumRecord &Enum) { + Name = Enum.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ArrayRecord &AT) { + Name = AT.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, VFTableRecord &VFT) { + Name = VFT.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, MemberFuncIdRecord &Id) { + Name = Id.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ProcedureRecord &Proc) { + StringRef Ret = Types.getTypeName(Proc.getReturnType()); + StringRef Params = Types.getTypeName(Proc.getArgumentList()); + Name = formatv("{0} {1}", Ret, Params).sstr<256>(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + MemberFunctionRecord &MF) { + StringRef Ret = Types.getTypeName(MF.getReturnType()); + StringRef Class = Types.getTypeName(MF.getClassType()); + StringRef Params = Types.getTypeName(MF.getArgumentList()); + Name = formatv("{0} {1}::{2}", Ret, Class, Params).sstr<256>(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) { + Name = Func.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) { + Name = TS.getName(); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, PointerRecord &Ptr) { + + if (Ptr.isPointerToMember()) { + const MemberPointerInfo &MI = Ptr.getMemberInfo(); + + StringRef Pointee = Types.getTypeName(Ptr.getReferentType()); + StringRef Class = Types.getTypeName(MI.getContainingType()); + Name = formatv("{0} {1}::*", Pointee, Class); + } else { + if (Ptr.isConst()) + Name.append("const "); + if (Ptr.isVolatile()) + Name.append("volatile "); + if (Ptr.isUnaligned()) + Name.append("__unaligned "); + + Name.append(Types.getTypeName(Ptr.getReferentType())); + + if (Ptr.getMode() == PointerMode::LValueReference) + Name.append("&"); + else if (Ptr.getMode() == PointerMode::RValueReference) + Name.append("&&"); + else if (Ptr.getMode() == PointerMode::Pointer) + Name.append("*"); + } + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, ModifierRecord &Mod) { + uint16_t Mods = static_cast(Mod.getModifiers()); + + SmallString<256> TypeName; + if (Mods & uint16_t(ModifierOptions::Const)) + Name.append("const "); + if (Mods & uint16_t(ModifierOptions::Volatile)) + Name.append("volatile "); + if (Mods & uint16_t(ModifierOptions::Unaligned)) + Name.append("__unaligned "); + Name.append(Types.getTypeName(Mod.getModifiedType())); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + VFTableShapeRecord &Shape) { + Name = formatv("", Shape.getEntryCount()); + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord( + CVType &CVR, UdtModSourceLineRecord &ModSourceLine) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + UdtSourceLineRecord &SourceLine) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, BitFieldRecord &BF) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, + MethodOverloadListRecord &Overloads) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) { + return Error::success(); +} + +Error TypeNameComputer::visitKnownRecord(CVType &CVR, LabelRecord &R) { + return Error::success(); +} + +std::string llvm::codeview::computeTypeName(TypeCollection &Types, + TypeIndex Index) { + TypeNameComputer Computer(Types); + CVType Record = Types.getType(Index); + if (auto EC = visitTypeRecord(Record, Index, Computer)) { + consumeError(std::move(EC)); + return ""; + } + return Computer.name(); +} diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp index fd4d1853fa544f047c244a48925ffacaa9e86545..93c1198e36ce07fabbd5630234817a8d8cd3c614 100644 --- a/lib/DebugInfo/CodeView/TypeSerializer.cpp +++ b/lib/DebugInfo/CodeView/TypeSerializer.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/TypeSerializer.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/BinaryStreamWriter.h" #include @@ -16,21 +17,109 @@ using namespace llvm; using namespace llvm::codeview; -bool TypeSerializer::isInFieldList() const { - return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; +namespace { +struct HashedType { + uint64_t Hash; + const uint8_t *Data; + unsigned Size; // FIXME: Go to uint16_t? + TypeIndex Index; +}; + +/// Wrapper around a poitner to a HashedType. Hash and equality operations are +/// based on data in the pointee. +struct HashedTypePtr { + HashedTypePtr() = default; + HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {} + HashedType *Ptr = nullptr; +}; +} // namespace + +namespace llvm { +template <> struct DenseMapInfo { + static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); } + static inline HashedTypePtr getTombstoneKey() { + return HashedTypePtr(reinterpret_cast(1)); + } + static unsigned getHashValue(HashedTypePtr Val) { + assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr); + return Val.Ptr->Hash; + } + static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) { + HashedType *LHS = LHSP.Ptr; + HashedType *RHS = RHSP.Ptr; + if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr) + return LHS == RHS; + if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size) + return false; + return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0; + } +}; +} + +/// Private implementation so that we don't leak our DenseMap instantiations to +/// users. +class llvm::codeview::TypeHasher { +private: + /// Storage for type record provided by the caller. Records will outlive the + /// hasher object, so they should be allocated here. + BumpPtrAllocator &RecordStorage; + + /// Storage for hash keys. These only need to live as long as the hashing + /// operation. + BumpPtrAllocator KeyStorage; + + /// Hash table. We really want a DenseMap, TypeIndex> here, + /// but DenseMap is inefficient when the keys are long (like type records) + /// because it recomputes the hash value of every key when it grows. This + /// value type stores the hash out of line in KeyStorage, so that table + /// entries are small and easy to rehash. + DenseSet HashedRecords; + +public: + TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {} + + void reset() { HashedRecords.clear(); } + + /// Takes the bytes of type record, inserts them into the hash table, saves + /// them, and returns a pointer to an identical stable type record along with + /// its type index in the destination stream. + TypeIndex getOrCreateRecord(ArrayRef &Record, TypeIndex TI); +}; + +TypeIndex TypeHasher::getOrCreateRecord(ArrayRef &Record, + TypeIndex TI) { + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + + // Compute the hash up front so we can store it in the key. + HashedType TempHashedType = {hash_value(Record), Record.data(), + unsigned(Record.size()), TI}; + auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType)); + HashedType *&Hashed = Result.first->Ptr; + + if (Result.second) { + // This was a new type record. We need stable storage for both the key and + // the record. The record should outlive the hashing operation. + Hashed = KeyStorage.Allocate(); + *Hashed = TempHashedType; + + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Hashed->Data = Stable; + assert(Hashed->Size == Record.size()); + } + + // Update the caller's copy of Record to point a stable copy. + Record = ArrayRef(Hashed->Data, Hashed->Size); + return Hashed->Index; } -TypeIndex TypeSerializer::calcNextTypeIndex() const { - if (LastTypeIndex.isNoneType()) - return TypeIndex(TypeIndex::FirstNonSimpleIndex); - else - return TypeIndex(LastTypeIndex.getIndex() + 1); +TypeIndex TypeSerializer::nextTypeIndex() const { + return TypeIndex::fromArrayIndex(SeenRecords.size()); } -TypeIndex TypeSerializer::incrementTypeIndex() { - TypeIndex Previous = LastTypeIndex; - LastTypeIndex = calcNextTypeIndex(); - return Previous; +bool TypeSerializer::isInFieldList() const { + return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; } MutableArrayRef TypeSerializer::getCurrentSubRecordData() { @@ -51,21 +140,6 @@ Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) { return Error::success(); } -TypeIndex -TypeSerializer::insertRecordBytesPrivate(MutableArrayRef Record) { - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast(Record.data()), Record.size()); - - TypeIndex NextTypeIndex = calcNextTypeIndex(); - auto Result = HashedRecords.try_emplace(S, NextTypeIndex); - if (Result.second) { - LastTypeIndex = NextTypeIndex; - SeenRecords.push_back(Record); - } - return Result.first->getValue(); -} - Expected> TypeSerializer::addPadding(MutableArrayRef Record) { uint32_t Align = Record.size() % 4; @@ -83,27 +157,79 @@ TypeSerializer::addPadding(MutableArrayRef Record) { return MutableArrayRef(Record.data(), Record.size() + N); } -TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage) - : RecordStorage(Storage), LastTypeIndex(), - RecordBuffer(MaxRecordLength * 2), +TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash) + : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2), Stream(RecordBuffer, llvm::support::little), Writer(Stream), Mapping(Writer) { // RecordBuffer needs to be able to hold enough data so that if we are 1 // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes, // we won't overflow. + if (Hash) + Hasher = make_unique(Storage); } -ArrayRef> TypeSerializer::records() const { +TypeSerializer::~TypeSerializer() = default; + +ArrayRef> TypeSerializer::records() const { return SeenRecords; } -TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; } +void TypeSerializer::reset() { + if (Hasher) + Hasher->reset(); + Writer.setOffset(0); + CurrentSegment = RecordSegment(); + FieldListSegments.clear(); + TypeKind.reset(); + MemberKind.reset(); + SeenRecords.clear(); +} + +TypeIndex TypeSerializer::insertRecordBytes(ArrayRef &Record) { + assert(!TypeKind.hasValue() && "Already in a type mapping!"); + assert(Writer.getOffset() == 0 && "Stream has data already!"); + + if (Hasher) { + TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex()); + if (nextTypeIndex() == ActualTI) + SeenRecords.push_back(Record); + return ActualTI; + } + + TypeIndex NewTI = nextTypeIndex(); + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Record = ArrayRef(Stable, Record.size()); + SeenRecords.push_back(Record); + return NewTI; +} -TypeIndex TypeSerializer::insertRecordBytes(MutableArrayRef Record) { +TypeIndex TypeSerializer::insertRecord(const RemappedType &Record) { assert(!TypeKind.hasValue() && "Already in a type mapping!"); assert(Writer.getOffset() == 0 && "Stream has data already!"); - return insertRecordBytesPrivate(Record); + TypeIndex TI; + ArrayRef OriginalData = Record.OriginalRecord.RecordData; + if (Record.Mappings.empty()) { + // This record did not remap any type indices. Just write it. + return insertRecordBytes(OriginalData); + } + + // At least one type index was remapped. Before we can hash it we have to + // copy the full record bytes, re-write each type index, then hash the copy. + // We do this in temporary storage since only the DenseMap can decide whether + // this record already exists, and if it does we don't want the memory to + // stick around. + RemapStorage.resize(OriginalData.size()); + ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size()); + uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix); + for (const auto &M : Record.Mappings) { + // First 4 bytes of every record are the record prefix, but the mapping + // offset is relative to the content which starts after. + *(TypeIndex *)(ContentBegin + M.first) = M.second; + } + auto RemapRef = makeArrayRef(RemapStorage); + return insertRecordBytes(RemapRef); } Error TypeSerializer::visitTypeBegin(CVType &Record) { @@ -137,11 +263,14 @@ Expected TypeSerializer::visitTypeEndGetIndex(CVType &Record) { reinterpret_cast(ThisRecordData.data()); Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t); - uint8_t *Copy = RecordStorage.Allocate(ThisRecordData.size()); - ::memcpy(Copy, ThisRecordData.data(), ThisRecordData.size()); - ThisRecordData = MutableArrayRef(Copy, ThisRecordData.size()); - Record = CVType(*TypeKind, ThisRecordData); - TypeIndex InsertedTypeIndex = insertRecordBytesPrivate(ThisRecordData); + Record.Type = *TypeKind; + Record.RecordData = ThisRecordData; + + // insertRecordBytes assumes we're not in a mapping, so do this first. + TypeKind.reset(); + Writer.setOffset(0); + + TypeIndex InsertedTypeIndex = insertRecordBytes(Record.RecordData); // Write out each additional segment in reverse order, and update each // record's continuation index to point to the previous one. @@ -151,11 +280,9 @@ Expected TypeSerializer::visitTypeEndGetIndex(CVType &Record) { reinterpret_cast(CIBytes.data()); assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder"); *CI = InsertedTypeIndex.getIndex(); - InsertedTypeIndex = insertRecordBytesPrivate(X); + InsertedTypeIndex = insertRecordBytes(X); } - TypeKind.reset(); - Writer.setOffset(0); FieldListSegments.clear(); CurrentSegment.SubRecords.clear(); diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index aad20ae6dda16689e0ce0b968c86d73c2c658278..71a0966df0361f89225d04a9c7e3db4c68cf0711 100644 --- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -13,9 +13,9 @@ #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" #include "llvm/Support/Error.h" #include "llvm/Support/ScopedPrinter.h" @@ -59,34 +59,56 @@ namespace { /// looking at the record kind. class TypeStreamMerger : public TypeVisitorCallbacks { public: - TypeStreamMerger(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, TypeServerHandler *Handler) - : DestIdStream(DestIdStream), DestTypeStream(DestTypeStream), - FieldListBuilder(DestTypeStream), Handler(Handler) {} + explicit TypeStreamMerger(SmallVectorImpl &SourceToDest, + TypeServerHandler *Handler) + : Handler(Handler), IndexMap(SourceToDest) { + SourceToDest.clear(); + } static const TypeIndex Untranslated; -/// TypeVisitorCallbacks overrides. -#define TYPE_RECORD(EnumName, EnumVal, Name) \ - Error visitKnownRecord(CVType &CVR, Name##Record &Record) override; -#define MEMBER_RECORD(EnumName, EnumVal, Name) \ - Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" - - Error visitUnknownType(CVType &Record) override; - Error visitTypeBegin(CVType &Record) override; Error visitTypeEnd(CVType &Record) override; - Error visitMemberEnd(CVMemberRecord &Record) override; - Error mergeStream(const CVTypeArray &Types); + Error mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes); + Error mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids); + Error mergeTypeRecords(TypeTableBuilder &Dest, const CVTypeArray &Types); private: + Error doit(const CVTypeArray &Types); + void addMapping(TypeIndex Idx); - bool remapIndex(TypeIndex &Idx); + bool remapTypeIndex(TypeIndex &Idx); + bool remapItemIndex(TypeIndex &Idx); + + bool remapIndices(RemappedType &Record, ArrayRef Refs) { + auto OriginalData = Record.OriginalRecord.content(); + bool Success = true; + for (auto &Ref : Refs) { + uint32_t Offset = Ref.Offset; + ArrayRef Bytes = + OriginalData.slice(Ref.Offset, sizeof(TypeIndex)); + ArrayRef TIs(reinterpret_cast(Bytes.data()), + Ref.Count); + for (auto TI : TIs) { + TypeIndex NewTI = TI; + bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef) + ? remapItemIndex(NewTI) + : remapTypeIndex(NewTI); + if (ThisSuccess && NewTI != TI) + Record.Mappings.emplace_back(Offset, NewTI); + Offset += sizeof(TypeIndex); + Success &= ThisSuccess; + } + } + return Success; + } + + bool remapIndex(TypeIndex &Idx, ArrayRef Map); size_t slotForIndex(TypeIndex Idx) const { assert(!Idx.isSimple() && "simple type indices have no slots"); @@ -97,75 +119,83 @@ private: return llvm::make_error(cv_error_code::corrupt_record); } - template - Error writeRecord(RecordType &R, bool RemapSuccess) { + Error writeRecord(TypeTableBuilder &Dest, const RemappedType &Record, + bool RemapSuccess) { TypeIndex DestIdx = Untranslated; if (RemapSuccess) - DestIdx = DestTypeStream.writeKnownType(R); + DestIdx = Dest.writeSerializedRecord(Record); addMapping(DestIdx); return Error::success(); } - template - Error writeIdRecord(RecordType &R, bool RemapSuccess) { - TypeIndex DestIdx = Untranslated; - if (RemapSuccess) - DestIdx = DestIdStream.writeKnownType(R); + Error writeTypeRecord(const CVType &Record) { + TypeIndex DestIdx = + DestTypeStream->writeSerializedRecord(Record.RecordData); addMapping(DestIdx); return Error::success(); } - template - Error writeMember(RecordType &R, bool RemapSuccess) { - if (RemapSuccess) - FieldListBuilder.writeMemberType(R); - else - HadUntranslatedMember = true; - return Error::success(); + Error writeTypeRecord(const RemappedType &Record, bool RemapSuccess) { + return writeRecord(*DestTypeStream, Record, RemapSuccess); + } + + Error writeIdRecord(const RemappedType &Record, bool RemapSuccess) { + return writeRecord(*DestIdStream, Record, RemapSuccess); } Optional LastError; bool IsSecondPass = false; - bool HadUntranslatedMember = false; - unsigned NumBadIndices = 0; - BumpPtrAllocator Allocator; + TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; - TypeTableBuilder &DestIdStream; - TypeTableBuilder &DestTypeStream; - FieldListRecordBuilder FieldListBuilder; - TypeServerHandler *Handler; + TypeTableBuilder *DestIdStream = nullptr; + TypeTableBuilder *DestTypeStream = nullptr; + TypeServerHandler *Handler = nullptr; - TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; + // If we're only mapping id records, this array contains the mapping for + // type records. + ArrayRef TypeLookup; /// Map from source type index to destination type index. Indexed by source /// type index minus 0x1000. - SmallVector IndexMap; + SmallVectorImpl &IndexMap; }; } // end anonymous namespace const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); -Error TypeStreamMerger::visitTypeBegin(CVRecord &Rec) { +Error TypeStreamMerger::visitTypeBegin(CVType &Rec) { + RemappedType R(Rec); + SmallVector Refs; + discoverTypeIndices(Rec.RecordData, Refs); + bool Success = remapIndices(R, Refs); + switch (Rec.kind()) { + case TypeLeafKind::LF_FUNC_ID: + case TypeLeafKind::LF_MFUNC_ID: + case TypeLeafKind::LF_STRING_ID: + case TypeLeafKind::LF_SUBSTR_LIST: + case TypeLeafKind::LF_BUILDINFO: + case TypeLeafKind::LF_UDT_SRC_LINE: + case TypeLeafKind::LF_UDT_MOD_SRC_LINE: + return writeIdRecord(R, Success); + default: + return writeTypeRecord(R, Success); + } return Error::success(); } -Error TypeStreamMerger::visitTypeEnd(CVRecord &Rec) { - CurIndex = TypeIndex(CurIndex.getIndex() + 1); +Error TypeStreamMerger::visitTypeEnd(CVType &Rec) { + ++CurIndex; if (!IsSecondPass) assert(IndexMap.size() == slotForIndex(CurIndex) && "visitKnownRecord should add one index map entry"); return Error::success(); } -Error TypeStreamMerger::visitMemberEnd(CVMemberRecord &Rec) { - return Error::success(); -} - void TypeStreamMerger::addMapping(TypeIndex Idx) { if (!IsSecondPass) { assert(IndexMap.size() == slotForIndex(CurIndex) && @@ -177,7 +207,7 @@ void TypeStreamMerger::addMapping(TypeIndex Idx) { } } -bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { +bool TypeStreamMerger::remapIndex(TypeIndex &Idx, ArrayRef Map) { // Simple types are unchanged. if (Idx.isSimple()) return true; @@ -186,14 +216,14 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { // successfully. If it refers to a type later in the stream or a record we // had to defer, defer it until later pass. unsigned MapPos = slotForIndex(Idx); - if (MapPos < IndexMap.size() && IndexMap[MapPos] != Untranslated) { - Idx = IndexMap[MapPos]; + if (MapPos < Map.size() && Map[MapPos] != Untranslated) { + Idx = Map[MapPos]; return true; } // If this is the second pass and this index isn't in the map, then it points // outside the current type stream, and this is a corrupt record. - if (IsSecondPass && MapPos >= IndexMap.size()) { + if (IsSecondPass && MapPos >= Map.size()) { // FIXME: Print a more useful error. We can give the current record and the // index that we think its pointing to. LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); @@ -207,251 +237,61 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { return false; } -//----------------------------------------------------------------------------// -// Item records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownRecord(CVType &, FuncIdRecord &R) { - bool Success = true; - Success &= remapIndex(R.ParentScope); - Success &= remapIndex(R.FunctionType); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFuncIdRecord &R) { - bool Success = true; - Success &= remapIndex(R.ClassType); - Success &= remapIndex(R.FunctionType); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, StringIdRecord &R) { - return writeIdRecord(R, remapIndex(R.Id)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, StringListRecord &R) { - bool Success = true; - for (TypeIndex &Str : R.StringIndices) - Success &= remapIndex(Str); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, BuildInfoRecord &R) { - bool Success = true; - for (TypeIndex &Arg : R.ArgIndices) - Success &= remapIndex(Arg); - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UdtSourceLineRecord &R) { - bool Success = true; - Success &= remapIndex(R.UDT); - Success &= remapIndex(R.SourceFile); - // FIXME: Translate UdtSourceLineRecord into UdtModSourceLineRecords in the - // IPI stream. - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UdtModSourceLineRecord &R) { - bool Success = true; - Success &= remapIndex(R.UDT); - Success &= remapIndex(R.SourceFile); - return writeIdRecord(R, Success); -} - -//----------------------------------------------------------------------------// -// Type records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownRecord(CVType &, ModifierRecord &R) { - return writeRecord(R, remapIndex(R.ModifiedType)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ProcedureRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReturnType); - Success &= remapIndex(R.ArgumentList); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFunctionRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReturnType); - Success &= remapIndex(R.ClassType); - Success &= remapIndex(R.ThisType); - Success &= remapIndex(R.ArgumentList); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &Type, ArgListRecord &R) { - bool Success = true; - for (TypeIndex &Arg : R.ArgIndices) - Success &= remapIndex(Arg); - if (auto EC = writeRecord(R, Success)) - return EC; - return Error::success(); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, PointerRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReferentType); - if (R.isPointerToMember()) - Success &= remapIndex(R.MemberInfo->ContainingType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ArrayRecord &R) { - bool Success = true; - Success &= remapIndex(R.ElementType); - Success &= remapIndex(R.IndexType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ClassRecord &R) { - bool Success = true; - Success &= remapIndex(R.FieldList); - Success &= remapIndex(R.DerivationList); - Success &= remapIndex(R.VTableShape); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UnionRecord &R) { - return writeRecord(R, remapIndex(R.FieldList)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, EnumRecord &R) { - bool Success = true; - Success &= remapIndex(R.FieldList); - Success &= remapIndex(R.UnderlyingType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, BitFieldRecord &R) { - return writeRecord(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableShapeRecord &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, TypeServer2Record &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, LabelRecord &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableRecord &R) { - bool Success = true; - Success &= remapIndex(R.CompleteClass); - Success &= remapIndex(R.OverriddenVFTable); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, - MethodOverloadListRecord &R) { - bool Success = true; - for (OneMethodRecord &Meth : R.Methods) - Success &= remapIndex(Meth.Type); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, FieldListRecord &R) { - // Visit the members inside the field list. - HadUntranslatedMember = false; - FieldListBuilder.begin(); - CVTypeVisitor Visitor(*this); - if (auto EC = Visitor.visitFieldListMemberStream(R.Data)) - return EC; +bool TypeStreamMerger::remapTypeIndex(TypeIndex &Idx) { + // If we're mapping a pure index stream, then IndexMap only contains mappings + // from OldIdStream -> NewIdStream, in which case we will need to use the + // special mapping from OldTypeStream -> NewTypeStream which was computed + // externally. Regardless, we use this special map if and only if we are + // doing an id-only mapping. + if (DestTypeStream == nullptr) + return remapIndex(Idx, TypeLookup); - // Write the record if we translated all field list members. - TypeIndex DestIdx = Untranslated; - if (!HadUntranslatedMember) - DestIdx = FieldListBuilder.end(); - else - FieldListBuilder.reset(); - addMapping(DestIdx); - - return Error::success(); -} - -//----------------------------------------------------------------------------// -// Member records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - NestedTypeRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, OneMethodRecord &R) { - bool Success = true; - Success &= remapIndex(R.Type); - return writeMember(R, Success); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - OverloadedMethodRecord &R) { - return writeMember(R, remapIndex(R.MethodList)); + assert(TypeLookup.empty()); + return remapIndex(Idx, IndexMap); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - DataMemberRecord &R) { - return writeMember(R, remapIndex(R.Type)); +bool TypeStreamMerger::remapItemIndex(TypeIndex &Idx) { + assert(DestIdStream); + return remapIndex(Idx, IndexMap); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - StaticDataMemberRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} +Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest, + const CVTypeArray &Types) { + DestTypeStream = &Dest; -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - EnumeratorRecord &R) { - return writeMember(R, true); + return doit(Types); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, VFPtrRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} +Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids) { + DestIdStream = &Dest; + TypeLookup = TypeSourceToDest; -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, BaseClassRecord &R) { - return writeMember(R, remapIndex(R.Type)); + return doit(Ids); } -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - VirtualBaseClassRecord &R) { - bool Success = true; - Success &= remapIndex(R.BaseType); - Success &= remapIndex(R.VBPtrType); - return writeMember(R, Success); -} +Error TypeStreamMerger::mergeTypesAndIds(TypeTableBuilder &DestIds, + TypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes) { + DestIdStream = &DestIds; + DestTypeStream = &DestTypes; -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - ListContinuationRecord &R) { - return writeMember(R, remapIndex(R.ContinuationIndex)); + return doit(IdsAndTypes); } -Error TypeStreamMerger::visitUnknownType(CVType &Rec) { - // We failed to translate a type. Translate this index as "not translated". - addMapping(TypeIndex(SimpleTypeKind::NotTranslated)); - return errorCorruptRecord(); -} - -Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { - assert(IndexMap.empty()); - TypeVisitorCallbackPipeline Pipeline; +Error TypeStreamMerger::doit(const CVTypeArray &Types) { LastError = Error::success(); - TypeDeserializer Deserializer; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(*this); - - CVTypeVisitor Visitor(Pipeline); - if (Handler) - Visitor.addTypeServerHandler(*Handler); - - if (auto EC = Visitor.visitTypeStream(Types)) + // We don't want to deserialize records. I guess this flag is poorly named, + // but it really means "Don't deserialize records before switching on the + // concrete type. + // FIXME: We can probably get even more speed here if we don't use the visitor + // pipeline here, but instead write the switch ourselves. I don't think it + // would buy us much since it's already pretty fast, but it's probably worth + // a few cycles. + if (auto EC = + codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler)) return EC; // If we found bad indices but no other errors, try doing another pass and see @@ -466,7 +306,9 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { IsSecondPass = true; NumBadIndices = 0; CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); - if (auto EC = Visitor.visitTypeStream(Types)) + + if (auto EC = + codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler)) return EC; assert(NumBadIndices <= BadIndicesRemaining && @@ -477,17 +319,32 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { } } - IndexMap.clear(); - Error Ret = std::move(*LastError); LastError.reset(); return Ret; } -Error llvm::codeview::mergeTypeStreams(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, +Error llvm::codeview::mergeTypeRecords(TypeTableBuilder &Dest, + SmallVectorImpl &SourceToDest, TypeServerHandler *Handler, - const CVTypeArray &Types) { - return TypeStreamMerger(DestIdStream, DestTypeStream, Handler) - .mergeStream(Types); + const CVTypeArray &Types) { + TypeStreamMerger M(SourceToDest, Handler); + return M.mergeTypeRecords(Dest, Types); +} + +Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids) { + TypeStreamMerger M(SourceToDest, nullptr); + return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); +} + +Error llvm::codeview::mergeTypeAndIdRecords( + TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, TypeServerHandler *Handler, + const CVTypeArray &IdsAndTypes) { + + TypeStreamMerger M(SourceToDest, Handler); + return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes); } diff --git a/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/lib/DebugInfo/CodeView/TypeTableCollection.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8d974d522f28372bc60dbdf80015e7ba6da8b800 --- /dev/null +++ b/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -0,0 +1,83 @@ +//===- TypeTableCollection.cpp -------------------------------- *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeTableCollection.h" + +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamReader.h" + +using namespace llvm; +using namespace llvm::codeview; + +static void error(Error &&EC) { + assert(!static_cast(EC)); + if (EC) + consumeError(std::move(EC)); +} + +TypeTableCollection::TypeTableCollection(ArrayRef> Records) + : Records(Records), Database(Records.size()) {} + +Optional TypeTableCollection::getFirst() { + if (empty()) + return None; + return TypeIndex::fromArrayIndex(0); +} + +Optional TypeTableCollection::getNext(TypeIndex Prev) { + ++Prev; + assert(Prev.toArrayIndex() <= size()); + if (Prev.toArrayIndex() == size()) + return None; + return Prev; +} + +void TypeTableCollection::ensureTypeExists(TypeIndex Index) { + assert(hasCapacityFor(Index)); + + if (Database.contains(Index)) + return; + + BinaryByteStream Bytes(Records[Index.toArrayIndex()], support::little); + + CVType Type; + uint32_t Len; + VarStreamArrayExtractor Extract; + error(Extract(Bytes, Len, Type)); + + TypeDatabaseVisitor DBV(Database); + error(codeview::visitTypeRecord(Type, Index, DBV)); + assert(Database.contains(Index)); +} + +CVType TypeTableCollection::getType(TypeIndex Index) { + ensureTypeExists(Index); + return Database.getTypeRecord(Index); +} + +StringRef TypeTableCollection::getTypeName(TypeIndex Index) { + if (!Index.isSimple()) + ensureTypeExists(Index); + return Database.getTypeName(Index); +} + +bool TypeTableCollection::contains(TypeIndex Index) { + return Database.contains(Index); +} + +uint32_t TypeTableCollection::size() { return Records.size(); } + +uint32_t TypeTableCollection::capacity() { return Records.size(); } + +bool TypeTableCollection::hasCapacityFor(TypeIndex Index) const { + return Index.toArrayIndex() < Records.size(); +} diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt index 495edb7b48db0f279508683af5e95c2351b25448..6ca6e64bd8e6f9fbe583b999760d38a75fab4ad7 100644 --- a/lib/DebugInfo/DWARF/CMakeLists.txt +++ b/lib/DebugInfo/DWARF/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMDebugInfoDWARF DWARFTypeUnit.cpp DWARFUnitIndex.cpp DWARFUnit.cpp + DWARFVerifier.cpp SyntaxHighlighting.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index e7b4b777b43fae386e1445dd6fdaee3ccf7b7572..57eac91f8c1926d3d9035d20c9eaf6bde236bd2e 100644 --- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" + #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" -#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 85e1eaedfc6138c05ec00914106f93aa0d636f38..87009bf1b6a1d519a6b6f1183ee13000434afcb5 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -7,11 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -53,6 +55,13 @@ bool DWARFAcceleratorTable::extract() { return true; } +uint32_t DWARFAcceleratorTable::getNumBuckets() { return Hdr.NumBuckets; } +uint32_t DWARFAcceleratorTable::getNumHashes() { return Hdr.NumHashes; } +uint32_t DWARFAcceleratorTable::getSizeHdr() { return sizeof(Hdr); } +uint32_t DWARFAcceleratorTable::getHeaderDataLength() { + return Hdr.HeaderDataLength; +} + LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { // Dump the header. OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n' @@ -112,10 +121,8 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { continue; } while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { - unsigned StringOffset = AccelSection.getU32(&DataOffset); - RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4); - if (Reloc != Relocs.end()) - StringOffset += Reloc->second.second; + unsigned StringOffset = + getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs); if (!StringOffset) break; OS << format(" Name: %08x \"%s\"\n", StringOffset, diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index 6e550f2e9ec954ab4b6571969cd1a9b1f3ed095c..358e9bf43d003d908bffa9f58d8c7e66bc464d75 100644 --- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -15,7 +15,7 @@ using namespace llvm; -void DWARFCompileUnit::dump(raw_ostream &OS) { +void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { OS << format("0x%08x", getOffset()) << ": Compile Unit:" << " length = " << format("0x%08x", getLength()) << " version = " << format("0x%04x", getVersion()); @@ -27,7 +27,7 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { << ")\n"; if (DWARFDie CUDie = getUnitDIE(false)) - CUDie.dump(OS, -1U); + CUDie.dump(OS, -1U, 0, DumpOpts); else OS << "\n\n"; } diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index ce5c4ae89e88317a85001e16e1b81a730f6f66a3..9bafcde57f0ae95ab8f014cd2d9d731ed75ca46b 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" @@ -29,6 +29,7 @@ #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" #include "llvm/Object/Decompressor.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -42,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include +#include #include #include #include @@ -56,6 +59,19 @@ typedef DWARFDebugLine::LineTable DWARFLineTable; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; +uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size, + uint32_t *Off, const RelocAddrMap *Relocs, + uint64_t *SectionIndex) { + if (!Relocs) + return Data.getUnsigned(Off, Size); + RelocAddrMap::const_iterator AI = Relocs->find(*Off); + if (AI == Relocs->end()) + return Data.getUnsigned(Off, Size); + if (SectionIndex) + *SectionIndex = AI->second.SectionIndex; + return Data.getUnsigned(Off, Size) + AI->second.Value; +} + static void dumpAccelSection(raw_ostream &OS, StringRef Name, const DWARFSection& Section, StringRef StringSection, bool LittleEndian) { @@ -68,8 +84,129 @@ static void dumpAccelSection(raw_ostream &OS, StringRef Name, Accel.dump(OS); } -void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, - bool SummarizeTypes) { +static void +dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName, + const DWARFSection &StringOffsetsSection, + StringRef StringSection, bool LittleEndian) { + DataExtractor StrOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); + uint32_t Offset = 0; + uint64_t SectionSize = StringOffsetsSection.Data.size(); + + while (Offset < SectionSize) { + unsigned Version = 0; + DwarfFormat Format = DWARF32; + unsigned EntrySize = 4; + // Perform validation and extract the segment size from the header. + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 4)) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + uint32_t ContributionStart = Offset; + uint64_t ContributionSize = StrOffsetExt.getU32(&Offset); + // A contribution size of 0xffffffff indicates DWARF64, with the actual size + // in the following 8 bytes. Otherwise, the DWARF standard mandates that + // the contribution size must be at most 0xfffffff0. + if (ContributionSize == 0xffffffff) { + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, 8)) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + Format = DWARF64; + EntrySize = 8; + ContributionSize = StrOffsetExt.getU64(&Offset); + } else if (ContributionSize > 0xfffffff0) { + OS << "error: invalid contribution to string offsets table in section ." + << SectionName << ".\n"; + return; + } + + // We must ensure that we don't read a partial record at the end, so we + // validate for a multiple of EntrySize. Also, we're expecting a version + // number and padding, which adds an additional 4 bytes. + uint64_t ValidationSize = + 4 + ((ContributionSize + EntrySize - 1) & (-(uint64_t)EntrySize)); + if (!StrOffsetExt.isValidOffsetForDataOfSize(Offset, ValidationSize)) { + OS << "error: contribution to string offsets table in section ." + << SectionName << " has invalid length.\n"; + return; + } + + Version = StrOffsetExt.getU16(&Offset); + Offset += 2; + OS << format("0x%8.8x: ", ContributionStart); + OS << "Contribution size = " << ContributionSize + << ", Version = " << Version << "\n"; + + uint32_t ContributionBase = Offset; + DataExtractor StrData(StringSection, LittleEndian, 0); + while (Offset - ContributionBase < ContributionSize) { + OS << format("0x%8.8x: ", Offset); + // FIXME: We can only extract strings in DWARF32 format at the moment. + uint64_t StringOffset = getRelocatedValue( + StrOffsetExt, EntrySize, &Offset, &StringOffsetsSection.Relocs); + if (Format == DWARF32) { + OS << format("%8.8x ", StringOffset); + uint32_t StringOffset32 = (uint32_t)StringOffset; + const char *S = StrData.getCStr(&StringOffset32); + if (S) + OS << format("\"%s\"", S); + } else + OS << format("%16.16x ", StringOffset); + OS << "\n"; + } + } +} + +// Dump a DWARF string offsets section. This may be a DWARF v5 formatted +// string offsets section, where each compile or type unit contributes a +// number of entries (string offsets), with each contribution preceded by +// a header containing size and version number. Alternatively, it may be a +// monolithic series of string offsets, as generated by the pre-DWARF v5 +// implementation of split DWARF. +static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, + const DWARFSection &StringOffsetsSection, + StringRef StringSection, bool LittleEndian, + unsigned MaxVersion) { + if (StringOffsetsSection.Data.empty()) + return; + OS << "\n." << SectionName << " contents:\n"; + // If we have at least one (compile or type) unit with DWARF v5 or greater, + // we assume that the section is formatted like a DWARF v5 string offsets + // section. + if (MaxVersion >= 5) + dumpDWARFv5StringOffsetsSection(OS, SectionName, StringOffsetsSection, + StringSection, LittleEndian); + else { + DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); + uint32_t offset = 0; + uint64_t size = StringOffsetsSection.Data.size(); + // Ensure that size is a multiple of the size of an entry. + if (size & ((uint64_t)(sizeof(uint32_t) - 1))) { + OS << "error: size of ." << SectionName << " is not a multiple of " + << sizeof(uint32_t) << ".\n"; + size &= -(uint64_t)sizeof(uint32_t); + } + DataExtractor StrData(StringSection, LittleEndian, 0); + while (offset < size) { + OS << format("0x%8.8x: ", offset); + uint32_t StringOffset = strOffsetExt.getU32(&offset); + OS << format("%8.8x ", StringOffset); + const char *S = StrData.getCStr(&StringOffset); + if (S) + OS << format("\"%s\"", S); + OS << "\n"; + } + } +} + +void DWARFContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){ + + DIDumpType DumpType = DumpOpts.DumpType; + bool DumpEH = DumpOpts.DumpEH; + bool SummarizeTypes = DumpOpts.SummarizeTypes; + if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) { OS << ".debug_abbrev contents:\n"; getDebugAbbrev()->dump(OS); @@ -84,14 +221,14 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, if (DumpType == DIDT_All || DumpType == DIDT_Info) { OS << "\n.debug_info contents:\n"; for (const auto &CU : compile_units()) - CU->dump(OS); + CU->dump(OS, DumpOpts); } if ((DumpType == DIDT_All || DumpType == DIDT_InfoDwo) && getNumDWOCompileUnits()) { OS << "\n.debug_info.dwo contents:\n"; for (const auto &DWOCU : dwo_compile_units()) - DWOCU->dump(OS); + DWOCU->dump(OS, DumpOpts); } if ((DumpType == DIDT_All || DumpType == DIDT_Types) && getNumTypeUnits()) { @@ -212,11 +349,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, // sizes, but for simplicity we just use the address byte size of the last // compile unit (there is no easy and fast way to associate address range // list and the compile unit it describes). - DataExtractor rangesData(getRangeSection(), isLittleEndian(), + DataExtractor rangesData(getRangeSection().Data, isLittleEndian(), savedAddressByteSize); offset = 0; DWARFDebugRangeList rangeList; - while (rangeList.extract(rangesData, &offset)) + while (rangeList.extract(rangesData, &offset, getRangeSection().Relocs)) rangeList.dump(OS); } @@ -238,17 +375,15 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, true /* GnuStyle */) .dump("debug_gnu_pubtypes", OS); - if ((DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) && - !getStringOffsetDWOSection().empty()) { - OS << "\n.debug_str_offsets.dwo contents:\n"; - DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), - 0); - offset = 0; - uint64_t size = getStringOffsetDWOSection().size(); - while (offset < size) { - OS << format("0x%8.8x: ", offset); - OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); - } + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsets) + dumpStringOffsetsSection(OS, "debug_str_offsets", getStringOffsetSection(), + getStringSection(), isLittleEndian(), + getMaxVersion()); + + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) { + dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", + getStringOffsetDWOSection(), getStringDWOSection(), + isLittleEndian(), getMaxVersion()); } if ((DumpType == DIDT_All || DumpType == DIDT_GdbIndex) && @@ -274,6 +409,40 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, getStringSection(), isLittleEndian()); } +DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) { + // FIXME: Improve this for the case where this DWO file is really a DWP file + // with an index - use the index for lookup instead of a linear search. + for (const auto &DWOCU : dwo_compile_units()) + if (DWOCU->getDWOId() == Hash) + return DWOCU.get(); + return nullptr; +} + +DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { + parseCompileUnits(); + if (auto *CU = CUs.getUnitForOffset(Offset)) + return CU->getDIEForOffset(Offset); + return DWARFDie(); +} + +bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { + bool Success = true; + DWARFVerifier verifier(OS, *this); + if (DumpType == DIDT_All || DumpType == DIDT_Info) { + if (!verifier.handleDebugInfo()) + Success = false; + } + if (DumpType == DIDT_All || DumpType == DIDT_Line) { + if (!verifier.handleDebugLine()) + Success = false; + } + if (DumpType == DIDT_All || DumpType == DIDT_AppleNames) { + if (!verifier.handleAppleNames()) + Success = false; + } + return Success; +} + const DWARFUnitIndex &DWARFContext::getCUIndex() { if (CUIndex) return *CUIndex; @@ -417,6 +586,10 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) { if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset)) return lt; + // Make sure the offset is good before we try to parse. + if (stmtOffset >= U->getLineSection().size()) + return nullptr; + // We have to parse it first. DataExtractor lineData(U->getLineSection(), isLittleEndian(), U->getAddressByteSize()); @@ -579,7 +752,7 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, return InliningInfo; } - uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; + uint32_t CallFile = 0, CallLine = 0, CallColumn = 0, CallDiscriminator = 0; for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) { DWARFDie &FunctionDIE = InlinedChain[i]; DILineInfo Frame; @@ -605,10 +778,12 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, Spec.FLIKind, Frame.FileName); Frame.Line = CallLine; Frame.Column = CallColumn; + Frame.Discriminator = CallDiscriminator; } // Get call file/line/column of a current DIE. if (i + 1 < n) { - FunctionDIE.getCallerFrame(CallFile, CallLine, CallColumn); + FunctionDIE.getCallerFrame(CallFile, CallLine, CallColumn, + CallDiscriminator); } } InliningInfo.addFrame(Frame); @@ -616,24 +791,84 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, return InliningInfo; } +std::shared_ptr +DWARFContext::getDWOContext(StringRef AbsolutePath) { + if (auto S = DWP.lock()) { + DWARFContext *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); + } + + std::weak_ptr *Entry = &DWOFiles[AbsolutePath]; + + if (auto S = Entry->lock()) { + DWARFContext *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); + } + + SmallString<128> DWPName; + Expected> Obj = [&] { + if (!CheckedForDWP) { + (getFileName() + ".dwp").toVector(DWPName); + auto Obj = object::ObjectFile::createObjectFile(DWPName); + if (Obj) { + Entry = &DWP; + return Obj; + } else { + CheckedForDWP = true; + // TODO: Should this error be handled (maybe in a high verbosity mode) + // before falling back to .dwo files? + consumeError(Obj.takeError()); + } + } + + return object::ObjectFile::createObjectFile(AbsolutePath); + }(); + + if (!Obj) { + // TODO: Actually report errors helpfully. + consumeError(Obj.takeError()); + return nullptr; + } + + auto S = std::make_shared(); + S->File = std::move(Obj.get()); + S->Context = llvm::make_unique(*S->File.getBinary()); + *Entry = S; + auto *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); +} + static Error createError(const Twine &Reason, llvm::Error E) { return make_error(Reason + toString(std::move(E)), inconvertibleErrorCode()); } -/// Returns the address of symbol relocation used against. Used for futher -/// relocations computation. Symbol's section load address is taken in account if -/// LoadedObjectInfo interface is provided. -static Expected getSymbolAddress(const object::ObjectFile &Obj, - const RelocationRef &Reloc, - const LoadedObjectInfo *L) { - uint64_t Ret = 0; +/// SymInfo contains information about symbol: it's address +/// and section index which is -1LL for absolute symbols. +struct SymInfo { + uint64_t Address; + uint64_t SectionIndex; +}; + +/// Returns the address of symbol relocation used against and a section index. +/// Used for futher relocations computation. Symbol's section load address is +static Expected getSymbolInfo(const object::ObjectFile &Obj, + const RelocationRef &Reloc, + const LoadedObjectInfo *L, + std::map &Cache) { + SymInfo Ret = {0, (uint64_t)-1LL}; object::section_iterator RSec = Obj.section_end(); object::symbol_iterator Sym = Reloc.getSymbol(); + std::map::iterator CacheIt = Cache.end(); // First calculate the address of the symbol or section as it appears // in the object file if (Sym != Obj.symbol_end()) { + bool New; + std::tie(CacheIt, New) = Cache.insert({*Sym, {0, 0}}); + if (!New) + return CacheIt->second; + Expected SymAddrOrErr = Sym->getAddress(); if (!SymAddrOrErr) return createError("error: failed to compute symbol address: ", @@ -646,12 +881,15 @@ static Expected getSymbolAddress(const object::ObjectFile &Obj, SectOrErr.takeError()); RSec = *SectOrErr; - Ret = *SymAddrOrErr; + Ret.Address = *SymAddrOrErr; } else if (auto *MObj = dyn_cast(&Obj)) { RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl()); - Ret = RSec->getAddress(); + Ret.Address = RSec->getAddress(); } + if (RSec != Obj.section_end()) + Ret.SectionIndex = RSec->getIndex(); + // If we are given load addresses for the sections, we need to adjust: // SymAddr = (Address of Symbol Or Section in File) - // (Address of Section in File) + @@ -661,24 +899,48 @@ static Expected getSymbolAddress(const object::ObjectFile &Obj, // we need to perform the same computation. if (L && RSec != Obj.section_end()) if (uint64_t SectionLoadAddress = L->getSectionLoadAddress(*RSec)) - Ret += SectionLoadAddress - RSec->getAddress(); + Ret.Address += SectionLoadAddress - RSec->getAddress(); + + if (CacheIt != Cache.end()) + CacheIt->second = Ret; + return Ret; } static bool isRelocScattered(const object::ObjectFile &Obj, const RelocationRef &Reloc) { - if (!isa(&Obj)) + const MachOObjectFile *MachObj = dyn_cast(&Obj); + if (!MachObj) return false; // MachO also has relocations that point to sections and // scattered relocations. - const MachOObjectFile *MachObj = cast(&Obj); auto RelocInfo = MachObj->getRelocation(Reloc.getRawDataRefImpl()); return MachObj->isRelocationScattered(RelocInfo); } +Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec, + StringRef Name, StringRef &Data) { + if (!Decompressor::isCompressed(Sec)) + return Error::success(); + + Expected Decompressor = + Decompressor::create(Name, Data, IsLittleEndian, AddressSize == 8); + if (!Decompressor) + return Decompressor.takeError(); + + SmallString<32> Out; + if (auto Err = Decompressor->resizeAndDecompress(Out)) + return Err; + + UncompressedSections.emplace_back(std::move(Out)); + Data = UncompressedSections.back(); + + return Error::success(); +} + DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, - const LoadedObjectInfo *L) - : IsLittleEndian(Obj.isLittleEndian()), + const LoadedObjectInfo *L) + : FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()), AddressSize(Obj.getBytesInAddress()) { for (const SectionRef &Section : Obj.sections()) { StringRef name; @@ -696,19 +958,14 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. - if (!L || !L->getLoadedSectionContents(*RelocatedSection,data)) + if (!L || !L->getLoadedSectionContents(*RelocatedSection, data)) Section.getContents(data); - if (Decompressor::isCompressed(Section)) { - Expected Decompressor = - Decompressor::create(name, data, IsLittleEndian, AddressSize == 8); - if (!Decompressor) - continue; - SmallString<32> Out; - if (auto Err = Decompressor->decompress(Out)) - continue; - UncompressedSections.emplace_back(std::move(Out)); - data = UncompressedSections.back(); + if (auto Err = maybeDecompress(Section, name, data)) { + errs() << "error: failed to decompress '" + name + "', " + + toString(std::move(Err)) + << '\n'; + continue; } // Compressed sections names in GNU style starts from ".z", @@ -720,7 +977,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, *SectionData = data; if (name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection = data; + RangeDWOSection.Data = data; } } else if (name == "debug_types") { // Find debug_types data by section rather than name as there are @@ -730,6 +987,10 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, TypesDWOSections[Section].Data = data; } + // Map platform specific debug section names to DWARF standard section + // names. + name = Obj.mapDebugSectionName(name); + if (RelocatedSection == Obj.section_end()) continue; @@ -740,7 +1001,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // If the section we're relocating was relocated already by the JIT, // then we used the relocated version above, so we do not need to process // relocations for it now. - if (L && L->getLoadedSectionContents(*RelocatedSection,RelSecData)) + if (L && L->getLoadedSectionContents(*RelocatedSection, RelSecData)) continue; // In Mach-o files, the relocations do not need to be applied if @@ -752,21 +1013,25 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, continue; RelSecName = RelSecName.substr( - RelSecName.find_first_not_of("._")); // Skip . and _ prefixes. + RelSecName.find_first_not_of("._z")); // Skip . and _ prefixes. // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. - RelocAddrMap *Map = StringSwitch(RelSecName) - .Case("debug_info", &InfoSection.Relocs) - .Case("debug_loc", &LocSection.Relocs) - .Case("debug_info.dwo", &InfoDWOSection.Relocs) - .Case("debug_line", &LineSection.Relocs) - .Case("apple_names", &AppleNamesSection.Relocs) - .Case("apple_types", &AppleTypesSection.Relocs) - .Case("apple_namespaces", &AppleNamespacesSection.Relocs) - .Case("apple_namespac", &AppleNamespacesSection.Relocs) - .Case("apple_objc", &AppleObjCSection.Relocs) - .Default(nullptr); + RelocAddrMap *Map = + StringSwitch(RelSecName) + .Case("debug_info", &InfoSection.Relocs) + .Case("debug_loc", &LocSection.Relocs) + .Case("debug_info.dwo", &InfoDWOSection.Relocs) + .Case("debug_line", &LineSection.Relocs) + .Case("debug_str_offsets", &StringOffsetSection.Relocs) + .Case("debug_ranges", &RangeSection.Relocs) + .Case("debug_addr", &AddrSection.Relocs) + .Case("apple_names", &AppleNamesSection.Relocs) + .Case("apple_types", &AppleTypesSection.Relocs) + .Case("apple_namespaces", &AppleNamespacesSection.Relocs) + .Case("apple_namespac", &AppleNamespacesSection.Relocs) + .Case("apple_objc", &AppleObjCSection.Relocs) + .Default(nullptr); if (!Map) { // Find debug_types relocs by section rather than name as there are // multiple, comdat grouped, debug_types sections. @@ -778,47 +1043,33 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, continue; } - if (Section.relocation_begin() != Section.relocation_end()) { - uint64_t SectionSize = RelocatedSection->getSize(); - for (const RelocationRef &Reloc : Section.relocations()) { - // FIXME: it's not clear how to correctly handle scattered - // relocations. - if (isRelocScattered(Obj, Reloc)) - continue; - - Expected SymAddrOrErr = getSymbolAddress(Obj, Reloc, L); - if (!SymAddrOrErr) { - errs() << toString(SymAddrOrErr.takeError()) << '\n'; - continue; - } - - object::RelocVisitor V(Obj); - object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr)); - if (V.error()) { - SmallString<32> Name; - Reloc.getTypeName(Name); - errs() << "error: failed to compute relocation: " - << Name << "\n"; - continue; - } - uint64_t Address = Reloc.getOffset(); - if (Address + R.Width > SectionSize) { - errs() << "error: " << R.Width << "-byte relocation starting " - << Address << " bytes into section " << name << " which is " - << SectionSize << " bytes long.\n"; - continue; - } - if (R.Width > 8) { - errs() << "error: can't handle a relocation of more than 8 bytes at " - "a time.\n"; - continue; - } - DEBUG(dbgs() << "Writing " << format("%p", R.Value) - << " at " << format("%p", Address) - << " with width " << format("%d", R.Width) - << "\n"); - Map->insert(std::make_pair(Address, std::make_pair(R.Width, R.Value))); + if (Section.relocation_begin() == Section.relocation_end()) + continue; + + // Symbol to [address, section index] cache mapping. + std::map AddrCache; + for (const RelocationRef &Reloc : Section.relocations()) { + // FIXME: it's not clear how to correctly handle scattered + // relocations. + if (isRelocScattered(Obj, Reloc)) + continue; + + Expected SymInfoOrErr = getSymbolInfo(Obj, Reloc, L, AddrCache); + if (!SymInfoOrErr) { + errs() << toString(SymInfoOrErr.takeError()) << '\n'; + continue; + } + + object::RelocVisitor V(Obj); + uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address); + if (V.error()) { + SmallString<32> Name; + Reloc.getTypeName(Name); + errs() << "error: failed to compute relocation: " << Name << "\n"; + continue; } + llvm::RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val}; + Map->insert({Reloc.getOffset(), Rel}); } } } @@ -843,7 +1094,8 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { .Case("debug_frame", &DebugFrameSection) .Case("eh_frame", &EHFrameSection) .Case("debug_str", &StringSection) - .Case("debug_ranges", &RangeSection) + .Case("debug_str_offsets", &StringOffsetSection.Data) + .Case("debug_ranges", &RangeSection.Data) .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) .Case("debug_pubtypes", &PubTypesSection) @@ -854,8 +1106,8 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { .Case("debug_loc.dwo", &LocDWOSection.Data) .Case("debug_line.dwo", &LineDWOSection.Data) .Case("debug_str.dwo", &StringDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) - .Case("debug_addr", &AddrSection) + .Case("debug_str_offsets.dwo", &StringOffsetDWOSection.Data) + .Case("debug_addr", &AddrSection.Data) .Case("apple_names", &AppleNamesSection.Data) .Case("apple_types", &AppleTypesSection.Data) .Case("apple_namespaces", &AppleNamespacesSection.Data) diff --git a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp index 0cf71f530446b2cafcd98da9fbdf37524e824b61..6601393d7459b23a5a073dbfb14324491b9b7896 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp @@ -54,9 +54,8 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { if (ParsedCUOffsets.insert(CUOffset).second) { DWARFAddressRangesVector CURanges; CU->collectAddressRanges(CURanges); - for (const auto &R : CURanges) { - appendRange(CUOffset, R.first, R.second); - } + for (const auto &R : CURanges) + appendRange(CUOffset, R.LowPC, R.HighPC); } } diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index b55ed6a468496280446d39f5a679f65d252996ae..cf9fec2b3254ca4e979fa0ca8f6816af054bcf4c 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -7,18 +7,19 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" + #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -513,6 +514,20 @@ static uint64_t readPointer(const DataExtractor &Data, uint32_t &Offset, } } +// This is a workaround for old compilers which do not allow +// noreturn attribute usage in lambdas. Once the support for those +// compilers are phased out, we can remove this and return back to +// a ReportError lambda: [StartOffset](const char *ErrorMsg). +#define ReportError(ErrorMsg) ReportErrorImpl(StartOffset,ErrorMsg) +static void LLVM_ATTRIBUTE_NORETURN +ReportErrorImpl(uint32_t StartOffset, const char *ErrorMsg) { + std::string Str; + raw_string_ostream OS(Str); + OS << format(ErrorMsg, StartOffset); + OS.flush(); + report_fatal_error(Str); +} + void DWARFDebugFrame::parse(DataExtractor Data) { uint32_t Offset = 0; DenseMap CIEs; @@ -520,14 +535,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) { while (Data.isValidOffset(Offset)) { uint32_t StartOffset = Offset; - auto ReportError = [StartOffset](const char *ErrorMsg) { - std::string Str; - raw_string_ostream OS(Str); - OS << format(ErrorMsg, StartOffset); - OS.flush(); - report_fatal_error(Str); - }; - bool IsDWARF64 = false; uint64_t Length = Data.getU32(&Offset); uint64_t Id; diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp index 35f673c7acc690f5a0203ca9f5537deabc45a16c..dbcc64fc0832fc27058d7dda1218ead10be048b1 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index e4670519b7979212cfa8fcd98f9d451639d69795..cda3e75fbc3e7ce56be8dfdc55a39327af8753a9 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -25,11 +27,19 @@ using namespace llvm; using namespace dwarf; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; +namespace { +struct ContentDescriptor { + dwarf::LineNumberEntryFormat Type; + dwarf::Form Form; +}; +typedef SmallVector ContentDescriptors; +} // end anonmyous namespace DWARFDebugLine::Prologue::Prologue() { clear(); } void DWARFDebugLine::Prologue::clear() { TotalLength = Version = PrologueLength = 0; + AddressSize = SegSelectorSize = 0; MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; IsDWARF64 = false; @@ -42,6 +52,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) << format(" version: %u\n", Version) + << format(Version >= 5 ? " address_size: %u\n" : "", AddressSize) + << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize) << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) @@ -50,95 +62,210 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { << format(" line_range: %u\n", LineRange) << format(" opcode_base: %u\n", OpcodeBase); - for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i) + for (uint32_t I = 0; I != StandardOpcodeLengths.size(); ++I) OS << format("standard_opcode_lengths[%s] = %u\n", - LNStandardString(i + 1).data(), StandardOpcodeLengths[i]); + LNStandardString(I + 1).data(), StandardOpcodeLengths[I]); if (!IncludeDirectories.empty()) - for (uint32_t i = 0; i < IncludeDirectories.size(); ++i) - OS << format("include_directories[%3u] = '", i + 1) - << IncludeDirectories[i] << "'\n"; + for (uint32_t I = 0; I != IncludeDirectories.size(); ++I) + OS << format("include_directories[%3u] = '", I + 1) + << IncludeDirectories[I] << "'\n"; if (!FileNames.empty()) { OS << " Dir Mod Time File Len File Name\n" << " ---- ---------- ---------- -----------" "----------------\n"; - for (uint32_t i = 0; i < FileNames.size(); ++i) { - const FileNameEntry &fileEntry = FileNames[i]; - OS << format("file_names[%3u] %4" PRIu64 " ", i + 1, fileEntry.DirIdx) - << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", fileEntry.ModTime, - fileEntry.Length) - << fileEntry.Name << '\n'; + for (uint32_t I = 0; I != FileNames.size(); ++I) { + const FileNameEntry &FileEntry = FileNames[I]; + OS << format("file_names[%3u] %4" PRIu64 " ", I + 1, FileEntry.DirIdx) + << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", FileEntry.ModTime, + FileEntry.Length) + << FileEntry.Name << '\n'; } } } -bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data, - uint32_t *offset_ptr) { - const uint64_t prologue_offset = *offset_ptr; +// Parse v2-v4 directory and file tables. +static void +parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector &IncludeDirectories, + std::vector &FileNames) { + while (*OffsetPtr < EndPrologueOffset) { + StringRef S = DebugLineData.getCStrRef(OffsetPtr); + if (S.empty()) + break; + IncludeDirectories.push_back(S); + } + + while (*OffsetPtr < EndPrologueOffset) { + StringRef Name = DebugLineData.getCStrRef(OffsetPtr); + if (Name.empty()) + break; + DWARFDebugLine::FileNameEntry FileEntry; + FileEntry.Name = Name; + FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); + FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); + FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + FileNames.push_back(FileEntry); + } +} + +// Parse v5 directory/file entry content descriptions. +// Returns the descriptors, or an empty vector if we did not find a path or +// ran off the end of the prologue. +static ContentDescriptors +parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset) { + ContentDescriptors Descriptors; + int FormatCount = DebugLineData.getU8(OffsetPtr); + bool HasPath = false; + for (int I = 0; I != FormatCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return ContentDescriptors(); + ContentDescriptor Descriptor; + Descriptor.Type = + dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr)); + Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr)); + if (Descriptor.Type == dwarf::DW_LNCT_path) + HasPath = true; + Descriptors.push_back(Descriptor); + } + return HasPath ? Descriptors : ContentDescriptors(); +} + +static bool +parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector &IncludeDirectories, + std::vector &FileNames) { + // Get the directory entry description. + ContentDescriptors DirDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (DirDescriptors.empty()) + return false; + + // Get the directory entries, according to the format described above. + int DirEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != DirEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + for (auto Descriptor : DirDescriptors) { + DWARFFormValue Value(Descriptor.Form); + switch (Descriptor.Type) { + case DW_LNCT_path: + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + IncludeDirectories.push_back(Value.getAsCString().getValue()); + break; + default: + if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr)) + return false; + } + } + } + + // Get the file entry description. + ContentDescriptors FileDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (FileDescriptors.empty()) + return false; + + // Get the file entries, according to the format described above. + int FileEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != FileEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + DWARFDebugLine::FileNameEntry FileEntry; + for (auto Descriptor : FileDescriptors) { + DWARFFormValue Value(Descriptor.Form); + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + switch (Descriptor.Type) { + case DW_LNCT_path: + FileEntry.Name = Value.getAsCString().getValue(); + break; + case DW_LNCT_directory_index: + FileEntry.DirIdx = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_timestamp: + FileEntry.ModTime = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_size: + FileEntry.Length = Value.getAsUnsignedConstant().getValue(); + break; + // FIXME: Add MD5 + default: + break; + } + } + FileNames.push_back(FileEntry); + } + return true; +} + +bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, + uint32_t *OffsetPtr) { + const uint64_t PrologueOffset = *OffsetPtr; clear(); - TotalLength = debug_line_data.getU32(offset_ptr); + TotalLength = DebugLineData.getU32(OffsetPtr); if (TotalLength == UINT32_MAX) { IsDWARF64 = true; - TotalLength = debug_line_data.getU64(offset_ptr); + TotalLength = DebugLineData.getU64(OffsetPtr); } else if (TotalLength > 0xffffff00) { return false; } - Version = debug_line_data.getU16(offset_ptr); + Version = DebugLineData.getU16(OffsetPtr); if (Version < 2) return false; - PrologueLength = - debug_line_data.getUnsigned(offset_ptr, sizeofPrologueLength()); - const uint64_t end_prologue_offset = PrologueLength + *offset_ptr; - MinInstLength = debug_line_data.getU8(offset_ptr); + if (Version >= 5) { + AddressSize = DebugLineData.getU8(OffsetPtr); + SegSelectorSize = DebugLineData.getU8(OffsetPtr); + } + + PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength()); + const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr; + MinInstLength = DebugLineData.getU8(OffsetPtr); if (Version >= 4) - MaxOpsPerInst = debug_line_data.getU8(offset_ptr); - DefaultIsStmt = debug_line_data.getU8(offset_ptr); - LineBase = debug_line_data.getU8(offset_ptr); - LineRange = debug_line_data.getU8(offset_ptr); - OpcodeBase = debug_line_data.getU8(offset_ptr); + MaxOpsPerInst = DebugLineData.getU8(OffsetPtr); + DefaultIsStmt = DebugLineData.getU8(OffsetPtr); + LineBase = DebugLineData.getU8(OffsetPtr); + LineRange = DebugLineData.getU8(OffsetPtr); + OpcodeBase = DebugLineData.getU8(OffsetPtr); StandardOpcodeLengths.reserve(OpcodeBase - 1); - for (uint32_t i = 1; i < OpcodeBase; ++i) { - uint8_t op_len = debug_line_data.getU8(offset_ptr); - StandardOpcodeLengths.push_back(op_len); + for (uint32_t I = 1; I < OpcodeBase; ++I) { + uint8_t OpLen = DebugLineData.getU8(OffsetPtr); + StandardOpcodeLengths.push_back(OpLen); } - while (*offset_ptr < end_prologue_offset) { - const char *s = debug_line_data.getCStr(offset_ptr); - if (s && s[0]) - IncludeDirectories.push_back(s); - else - break; - } - - while (*offset_ptr < end_prologue_offset) { - const char *name = debug_line_data.getCStr(offset_ptr); - if (name && name[0]) { - FileNameEntry fileEntry; - fileEntry.Name = name; - fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); - fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); - fileEntry.Length = debug_line_data.getULEB128(offset_ptr); - FileNames.push_back(fileEntry); - } else { - break; + if (Version >= 5) { + if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames)) { + fprintf(stderr, + "warning: parsing line table prologue at 0x%8.8" PRIx64 + " found an invalid directory or file table description at" + " 0x%8.8" PRIx64 "\n", PrologueOffset, (uint64_t)*OffsetPtr); + return false; } - } - - if (*offset_ptr != end_prologue_offset) { - fprintf(stderr, "warning: parsing line table prologue at 0x%8.8" PRIx64 - " should have ended at 0x%8.8" PRIx64 - " but it ended at 0x%8.8" PRIx64 "\n", - prologue_offset, end_prologue_offset, (uint64_t)*offset_ptr); + } else + parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames); + + if (*OffsetPtr != EndPrologueOffset) { + fprintf(stderr, + "warning: parsing line table prologue at 0x%8.8" PRIx64 + " should have ended at 0x%8.8" PRIx64 + " but it ended at 0x%8.8" PRIx64 "\n", + PrologueOffset, EndPrologueOffset, (uint64_t)*OffsetPtr); return false; } return true; } -DWARFDebugLine::Row::Row(bool default_is_stmt) { reset(default_is_stmt); } +DWARFDebugLine::Row::Row(bool DefaultIsStmt) { reset(DefaultIsStmt); } void DWARFDebugLine::Row::postAppend() { BasicBlock = false; @@ -146,20 +273,26 @@ void DWARFDebugLine::Row::postAppend() { EpilogueBegin = false; } -void DWARFDebugLine::Row::reset(bool default_is_stmt) { +void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { Address = 0; Line = 1; Column = 0; File = 1; Isa = 0; Discriminator = 0; - IsStmt = default_is_stmt; + IsStmt = DefaultIsStmt; BasicBlock = false; EndSequence = false; PrologueEnd = false; EpilogueBegin = false; } +void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) { + OS << "Address Line Column File ISA Discriminator Flags\n" + << "------------------ ------ ------ ------ --- ------------- " + "-------------\n"; +} + void DWARFDebugLine::Row::dump(raw_ostream &OS) const { OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column) << format(" %6u %3u %13u ", File, Isa, Discriminator) @@ -186,9 +319,7 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { OS << '\n'; if (!Rows.empty()) { - OS << "Address Line Column File ISA Discriminator Flags\n" - << "------------------ ------ ------ ------ --- ------------- " - "-------------\n"; + Row::dumpTableHeader(OS); for (const Row &R : Rows) { R.dump(OS); } @@ -211,7 +342,7 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() { Sequence.reset(); } -void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) { +void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t Offset) { if (Sequence.Empty) { // Record the beginning of instruction sequence. Sequence.Empty = false; @@ -232,56 +363,56 @@ void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) { } const DWARFDebugLine::LineTable * -DWARFDebugLine::getLineTable(uint32_t offset) const { - LineTableConstIter pos = LineTableMap.find(offset); - if (pos != LineTableMap.end()) - return &pos->second; +DWARFDebugLine::getLineTable(uint32_t Offset) const { + LineTableConstIter Pos = LineTableMap.find(Offset); + if (Pos != LineTableMap.end()) + return &Pos->second; return nullptr; } const DWARFDebugLine::LineTable * -DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data, - uint32_t offset) { - std::pair pos = - LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable())); - LineTable *LT = &pos.first->second; - if (pos.second) { - if (!LT->parse(debug_line_data, RelocMap, &offset)) +DWARFDebugLine::getOrParseLineTable(DataExtractor DebugLineData, + uint32_t Offset) { + std::pair Pos = + LineTableMap.insert(LineTableMapTy::value_type(Offset, LineTable())); + LineTable *LT = &Pos.first->second; + if (Pos.second) { + if (!LT->parse(DebugLineData, RelocMap, &Offset)) return nullptr; } return LT; } -bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, +bool DWARFDebugLine::LineTable::parse(DataExtractor DebugLineData, const RelocAddrMap *RMap, - uint32_t *offset_ptr) { - const uint32_t debug_line_offset = *offset_ptr; + uint32_t *OffsetPtr) { + const uint32_t DebugLineOffset = *OffsetPtr; clear(); - if (!Prologue.parse(debug_line_data, offset_ptr)) { + if (!Prologue.parse(DebugLineData, OffsetPtr)) { // Restore our offset and return false to indicate failure! - *offset_ptr = debug_line_offset; + *OffsetPtr = DebugLineOffset; return false; } - const uint32_t end_offset = - debug_line_offset + Prologue.TotalLength + Prologue.sizeofTotalLength(); + const uint32_t EndOffset = + DebugLineOffset + Prologue.TotalLength + Prologue.sizeofTotalLength(); ParsingState State(this); - while (*offset_ptr < end_offset) { - uint8_t opcode = debug_line_data.getU8(offset_ptr); + while (*OffsetPtr < EndOffset) { + uint8_t Opcode = DebugLineData.getU8(OffsetPtr); - if (opcode == 0) { + if (Opcode == 0) { // Extended Opcodes always start with a zero opcode followed by // a uleb128 length so you can skip ones you don't know about - uint32_t ext_offset = *offset_ptr; - uint64_t len = debug_line_data.getULEB128(offset_ptr); - uint32_t arg_size = len - (*offset_ptr - ext_offset); + uint32_t ExtOffset = *OffsetPtr; + uint64_t Len = DebugLineData.getULEB128(OffsetPtr); + uint32_t ArgSize = Len - (*OffsetPtr - ExtOffset); - uint8_t sub_opcode = debug_line_data.getU8(offset_ptr); - switch (sub_opcode) { + uint8_t SubOpcode = DebugLineData.getU8(OffsetPtr); + switch (SubOpcode) { case DW_LNE_end_sequence: // Set the end_sequence register of the state machine to true and // append a row to the matrix using the current values of the @@ -291,7 +422,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // address is that of the byte after the last target machine instruction // of the sequence. State.Row.EndSequence = true; - State.appendRowToMatrix(*offset_ptr); + State.appendRowToMatrix(*OffsetPtr); State.resetRowAndSequence(); break; @@ -302,16 +433,8 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - { - // If this address is in our relocation map, apply the relocation. - RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr); - if (AI != RMap->end()) { - const std::pair &R = AI->second; - State.Row.Address = - debug_line_data.getAddress(offset_ptr) + R.second; - } else - State.Row.Address = debug_line_data.getAddress(offset_ptr); - } + State.Row.Address = getRelocatedValue( + DebugLineData, DebugLineData.getAddressSize(), OffsetPtr, RMap); break; case DW_LNE_define_file: @@ -336,33 +459,33 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // the DW_LNE_define_file instruction. These numbers are used in the // the file register of the state machine. { - FileNameEntry fileEntry; - fileEntry.Name = debug_line_data.getCStr(offset_ptr); - fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); - fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); - fileEntry.Length = debug_line_data.getULEB128(offset_ptr); - Prologue.FileNames.push_back(fileEntry); + FileNameEntry FileEntry; + FileEntry.Name = DebugLineData.getCStr(OffsetPtr); + FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); + FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); + FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + Prologue.FileNames.push_back(FileEntry); } break; case DW_LNE_set_discriminator: - State.Row.Discriminator = debug_line_data.getULEB128(offset_ptr); + State.Row.Discriminator = DebugLineData.getULEB128(OffsetPtr); break; default: // Length doesn't include the zero opcode byte or the length itself, but // it does include the sub_opcode, so we have to adjust for that below - (*offset_ptr) += arg_size; + (*OffsetPtr) += ArgSize; break; } - } else if (opcode < Prologue.OpcodeBase) { - switch (opcode) { + } else if (Opcode < Prologue.OpcodeBase) { + switch (Opcode) { // Standard Opcodes case DW_LNS_copy: // Takes no arguments. Append a row to the matrix using the // current values of the state-machine registers. Then set // the basic_block register to false. - State.appendRowToMatrix(*offset_ptr); + State.appendRowToMatrix(*OffsetPtr); break; case DW_LNS_advance_pc: @@ -370,25 +493,25 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // min_inst_length field of the prologue, and adds the // result to the address register of the state machine. State.Row.Address += - debug_line_data.getULEB128(offset_ptr) * Prologue.MinInstLength; + DebugLineData.getULEB128(OffsetPtr) * Prologue.MinInstLength; break; case DW_LNS_advance_line: // Takes a single signed LEB128 operand and adds that value to // the line register of the state machine. - State.Row.Line += debug_line_data.getSLEB128(offset_ptr); + State.Row.Line += DebugLineData.getSLEB128(OffsetPtr); break; case DW_LNS_set_file: // Takes a single unsigned LEB128 operand and stores it in the file // register of the state machine. - State.Row.File = debug_line_data.getULEB128(offset_ptr); + State.Row.File = DebugLineData.getULEB128(OffsetPtr); break; case DW_LNS_set_column: // Takes a single unsigned LEB128 operand and stores it in the // column register of the state machine. - State.Row.Column = debug_line_data.getULEB128(offset_ptr); + State.Row.Column = DebugLineData.getULEB128(OffsetPtr); break; case DW_LNS_negate_stmt: @@ -416,10 +539,10 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // than twice that range will it need to use both DW_LNS_advance_pc // and a special opcode, requiring three or more bytes. { - uint8_t adjust_opcode = 255 - Prologue.OpcodeBase; - uint64_t addr_offset = - (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength; - State.Row.Address += addr_offset; + uint8_t AdjustOpcode = 255 - Prologue.OpcodeBase; + uint64_t AddrOffset = + (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength; + State.Row.Address += AddrOffset; } break; @@ -433,7 +556,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // judge when the computation of a special opcode overflows and // requires the use of DW_LNS_advance_pc. Such assemblers, however, // can use DW_LNS_fixed_advance_pc instead, sacrificing compression. - State.Row.Address += debug_line_data.getU16(offset_ptr); + State.Row.Address += DebugLineData.getU16(OffsetPtr); break; case DW_LNS_set_prologue_end: @@ -451,7 +574,7 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, case DW_LNS_set_isa: // Takes a single unsigned LEB128 operand and stores it in the // column register of the state machine. - State.Row.Isa = debug_line_data.getULEB128(offset_ptr); + State.Row.Isa = DebugLineData.getULEB128(OffsetPtr); break; default: @@ -459,10 +582,10 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // of such opcodes because they are specified in the prologue // as a multiple of LEB128 operands for each opcode. { - assert(opcode - 1U < Prologue.StandardOpcodeLengths.size()); - uint8_t opcode_length = Prologue.StandardOpcodeLengths[opcode - 1]; - for (uint8_t i = 0; i < opcode_length; ++i) - debug_line_data.getULEB128(offset_ptr); + assert(Opcode - 1U < Prologue.StandardOpcodeLengths.size()); + uint8_t OpcodeLength = Prologue.StandardOpcodeLengths[Opcode - 1]; + for (uint8_t I = 0; I < OpcodeLength; ++I) + DebugLineData.getULEB128(OffsetPtr); } break; } @@ -500,14 +623,14 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // // line increment = line_base + (adjusted opcode % line_range) - uint8_t adjust_opcode = opcode - Prologue.OpcodeBase; - uint64_t addr_offset = - (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength; - int32_t line_offset = - Prologue.LineBase + (adjust_opcode % Prologue.LineRange); - State.Row.Line += line_offset; - State.Row.Address += addr_offset; - State.appendRowToMatrix(*offset_ptr); + uint8_t AdjustOpcode = Opcode - Prologue.OpcodeBase; + uint64_t AddrOffset = + (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength; + int32_t LineOffset = + Prologue.LineBase + (AdjustOpcode % Prologue.LineRange); + State.Row.Line += LineOffset; + State.Row.Address += AddrOffset; + State.appendRowToMatrix(*OffsetPtr); // Reset discriminator to 0. State.Row.Discriminator = 0; } @@ -529,124 +652,122 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, // rudimentary sequences for address ranges [0x0, 0xsomething). } - return end_offset; + return EndOffset; } uint32_t -DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &seq, - uint64_t address) const { - if (!seq.containsPC(address)) +DWARFDebugLine::LineTable::findRowInSeq(const DWARFDebugLine::Sequence &Seq, + uint64_t Address) const { + if (!Seq.containsPC(Address)) return UnknownRowIndex; // Search for instruction address in the rows describing the sequence. // Rows are stored in a vector, so we may use arithmetical operations with // iterators. - DWARFDebugLine::Row row; - row.Address = address; - RowIter first_row = Rows.begin() + seq.FirstRowIndex; - RowIter last_row = Rows.begin() + seq.LastRowIndex; - LineTable::RowIter row_pos = std::lower_bound( - first_row, last_row, row, DWARFDebugLine::Row::orderByAddress); - if (row_pos == last_row) { - return seq.LastRowIndex - 1; + DWARFDebugLine::Row Row; + Row.Address = Address; + RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex; + RowIter LastRow = Rows.begin() + Seq.LastRowIndex; + LineTable::RowIter RowPos = std::lower_bound( + FirstRow, LastRow, Row, DWARFDebugLine::Row::orderByAddress); + if (RowPos == LastRow) { + return Seq.LastRowIndex - 1; } - uint32_t index = seq.FirstRowIndex + (row_pos - first_row); - if (row_pos->Address > address) { - if (row_pos == first_row) + uint32_t Index = Seq.FirstRowIndex + (RowPos - FirstRow); + if (RowPos->Address > Address) { + if (RowPos == FirstRow) return UnknownRowIndex; else - index--; + Index--; } - return index; + return Index; } -uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { +uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t Address) const { if (Sequences.empty()) return UnknownRowIndex; // First, find an instruction sequence containing the given address. - DWARFDebugLine::Sequence sequence; - sequence.LowPC = address; - SequenceIter first_seq = Sequences.begin(); - SequenceIter last_seq = Sequences.end(); - SequenceIter seq_pos = std::lower_bound( - first_seq, last_seq, sequence, DWARFDebugLine::Sequence::orderByLowPC); - DWARFDebugLine::Sequence found_seq; - if (seq_pos == last_seq) { - found_seq = Sequences.back(); - } else if (seq_pos->LowPC == address) { - found_seq = *seq_pos; + DWARFDebugLine::Sequence Sequence; + Sequence.LowPC = Address; + SequenceIter FirstSeq = Sequences.begin(); + SequenceIter LastSeq = Sequences.end(); + SequenceIter SeqPos = std::lower_bound( + FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC); + DWARFDebugLine::Sequence FoundSeq; + if (SeqPos == LastSeq) { + FoundSeq = Sequences.back(); + } else if (SeqPos->LowPC == Address) { + FoundSeq = *SeqPos; } else { - if (seq_pos == first_seq) + if (SeqPos == FirstSeq) return UnknownRowIndex; - found_seq = *(seq_pos - 1); + FoundSeq = *(SeqPos - 1); } - return findRowInSeq(found_seq, address); + return findRowInSeq(FoundSeq, Address); } bool DWARFDebugLine::LineTable::lookupAddressRange( - uint64_t address, uint64_t size, std::vector &result) const { + uint64_t Address, uint64_t Size, std::vector &Result) const { if (Sequences.empty()) return false; - uint64_t end_addr = address + size; + uint64_t EndAddr = Address + Size; // First, find an instruction sequence containing the given address. - DWARFDebugLine::Sequence sequence; - sequence.LowPC = address; - SequenceIter first_seq = Sequences.begin(); - SequenceIter last_seq = Sequences.end(); - SequenceIter seq_pos = std::lower_bound( - first_seq, last_seq, sequence, DWARFDebugLine::Sequence::orderByLowPC); - if (seq_pos == last_seq || seq_pos->LowPC != address) { - if (seq_pos == first_seq) + DWARFDebugLine::Sequence Sequence; + Sequence.LowPC = Address; + SequenceIter FirstSeq = Sequences.begin(); + SequenceIter LastSeq = Sequences.end(); + SequenceIter SeqPos = std::lower_bound( + FirstSeq, LastSeq, Sequence, DWARFDebugLine::Sequence::orderByLowPC); + if (SeqPos == LastSeq || SeqPos->LowPC != Address) { + if (SeqPos == FirstSeq) return false; - seq_pos--; + SeqPos--; } - if (!seq_pos->containsPC(address)) + if (!SeqPos->containsPC(Address)) return false; - SequenceIter start_pos = seq_pos; + SequenceIter StartPos = SeqPos; // Add the rows from the first sequence to the vector, starting with the // index we just calculated - while (seq_pos != last_seq && seq_pos->LowPC < end_addr) { - const DWARFDebugLine::Sequence &cur_seq = *seq_pos; + while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { + const DWARFDebugLine::Sequence &CurSeq = *SeqPos; // For the first sequence, we need to find which row in the sequence is the // first in our range. - uint32_t first_row_index = cur_seq.FirstRowIndex; - if (seq_pos == start_pos) - first_row_index = findRowInSeq(cur_seq, address); + uint32_t FirstRowIndex = CurSeq.FirstRowIndex; + if (SeqPos == StartPos) + FirstRowIndex = findRowInSeq(CurSeq, Address); // Figure out the last row in the range. - uint32_t last_row_index = findRowInSeq(cur_seq, end_addr - 1); - if (last_row_index == UnknownRowIndex) - last_row_index = cur_seq.LastRowIndex - 1; + uint32_t LastRowIndex = findRowInSeq(CurSeq, EndAddr - 1); + if (LastRowIndex == UnknownRowIndex) + LastRowIndex = CurSeq.LastRowIndex - 1; - assert(first_row_index != UnknownRowIndex); - assert(last_row_index != UnknownRowIndex); + assert(FirstRowIndex != UnknownRowIndex); + assert(LastRowIndex != UnknownRowIndex); - for (uint32_t i = first_row_index; i <= last_row_index; ++i) { - result.push_back(i); + for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) { + Result.push_back(I); } - ++seq_pos; + ++SeqPos; } return true; } -bool -DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const { +bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const { return FileIndex != 0 && FileIndex <= Prologue.FileNames.size(); } -bool -DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, - const char *CompDir, - FileLineInfoKind Kind, - std::string &Result) const { +bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, + const char *CompDir, + FileLineInfoKind Kind, + std::string &Result) const { if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; - const char *FileName = Entry.Name; + StringRef FileName = Entry.Name; if (Kind != FileLineInfoKind::AbsoluteFilePath || sys::path::is_absolute(FileName)) { Result = FileName; @@ -655,7 +776,7 @@ DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, SmallString<16> FilePath; uint64_t IncludeDirIndex = Entry.DirIdx; - const char *IncludeDir = ""; + StringRef IncludeDir; // Be defensive about the contents of Entry. if (IncludeDirIndex > 0 && IncludeDirIndex <= Prologue.IncludeDirectories.size()) diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index e2799ab2d243da40dfc70a604ef86a4c7f163679..2178bef65d1d5b694a56eefcf0fa9bf43c39b3d4 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -48,18 +49,10 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) { // 2.6.2 Location Lists // A location list entry consists of: while (true) { + // A beginning and ending address offsets. Entry E; - RelocAddrMap::const_iterator AI = RelocMap.find(Offset); - // 1. A beginning address offset. ... - E.Begin = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.Begin += AI->second.second; - - AI = RelocMap.find(Offset); - // 2. An ending address offset. ... - E.End = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.End += AI->second.second; + E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); + E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index e0a9adde8e58db055b98336a16140062f4b2caa2..1b77be6192ddc5fac0ed08a0919e5e77e2f5aa2e 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "SyntaxHighlighting.h" #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" -#include "llvm/Support/Dwarf.h" +#include "SyntaxHighlighting.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp index 662e53d9d7e660c2d7599fc688797cd14d96e5f0..5a4e39f3c2af857aa376d785e9f62182916e6c5f 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -45,7 +45,7 @@ DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian, } void DWARFDebugPubTable::dump(StringRef Name, raw_ostream &OS) const { - OS << "\n." << Name << " contents: a\n"; + OS << "\n." << Name << " contents:\n"; for (const Set &S : Sets) { OS << "length = " << format("0x%08x", S.Length); OS << " version = " << format("0x%04x", S.Version); diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index f1d82fda8c06b5be6674a25dfbc0557f6167dfb0..43201293fe601b51c53533d8e405db4ff43736d2 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -22,7 +23,8 @@ void DWARFDebugRangeList::clear() { Entries.clear(); } -bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { +bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr, + const RelocAddrMap &Relocs) { clear(); if (!data.isValidOffset(*offset_ptr)) return false; @@ -33,8 +35,11 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) { while (true) { RangeListEntry entry; uint32_t prev_offset = *offset_ptr; - entry.StartAddress = data.getAddress(offset_ptr); - entry.EndAddress = data.getAddress(offset_ptr); + entry.StartAddress = getRelocatedValue(data, AddressSize, offset_ptr, + &Relocs, &entry.SectionIndex); + entry.EndAddress = + getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); + // Check that both values were extracted correctly. if (*offset_ptr != prev_offset + 2 * AddressSize) { clear(); @@ -64,8 +69,8 @@ DWARFDebugRangeList::getAbsoluteRanges(uint64_t BaseAddress) const { if (RLE.isBaseAddressSelectionEntry(AddressSize)) { BaseAddress = RLE.EndAddress; } else { - Res.push_back(std::make_pair(BaseAddress + RLE.StartAddress, - BaseAddress + RLE.EndAddress)); + Res.push_back({BaseAddress + RLE.StartAddress, + BaseAddress + RLE.EndAddress, RLE.SectionIndex}); } } return Res; diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index 4308cc2e26396bc0ff332769cb6e199fcbcef655..b4b682dd11b5afda51c18862e7ab6aff799afece 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -7,18 +7,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "SyntaxHighlighting.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" -#include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -60,14 +60,15 @@ static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges, OS << '\n'; OS.indent(Indent); OS << format("[0x%0*" PRIx64 " - 0x%0*" PRIx64 ")", - AddressSize*2, Range.first, - AddressSize*2, Range.second); + AddressSize*2, Range.LowPC, + AddressSize*2, Range.HighPC); } } static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, uint32_t *OffsetPtr, dwarf::Attribute Attr, - dwarf::Form Form, unsigned Indent) { + dwarf::Form Form, unsigned Indent, + DIDumpOptions DumpOpts) { if (!Die.isValid()) return; const char BaseIndent[] = " "; @@ -78,13 +79,15 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, WithColor(OS, syntax::Attribute) << attrString; else WithColor(OS, syntax::Attribute).get() << format("DW_AT_Unknown_%x", Attr); - - auto formString = FormEncodingString(Form); - if (!formString.empty()) - OS << " [" << formString << ']'; - else - OS << format(" [DW_FORM_Unknown_%x]", Form); - + + if (!DumpOpts.Brief) { + auto formString = FormEncodingString(Form); + if (!formString.empty()) + OS << " [" << formString << ']'; + else + OS << format(" [DW_FORM_Unknown_%x]", Form); + } + DWARFUnit *U = Die.getDwarfUnit(); DWARFFormValue formValue(Form); @@ -211,13 +214,16 @@ Optional DWARFDie::getHighPC(uint64_t LowPC) const { return None; } -bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const { - auto LowPcAddr = toAddress(find(DW_AT_low_pc)); +bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, + uint64_t &SectionIndex) const { + auto F = find(DW_AT_low_pc); + auto LowPcAddr = toAddress(F); if (!LowPcAddr) return false; if (auto HighPcAddr = getHighPC(*LowPcAddr)) { LowPC = *LowPcAddr; HighPC = *HighPcAddr; + SectionIndex = F->getSectionIndex(); return true; } return false; @@ -228,10 +234,10 @@ DWARFDie::getAddressRanges() const { if (isNULL()) return DWARFAddressRangesVector(); // Single range specified by low/high PC. - uint64_t LowPC, HighPC; - if (getLowAndHighPC(LowPC, HighPC)) { - return DWARFAddressRangesVector(1, std::make_pair(LowPC, HighPC)); - } + uint64_t LowPC, HighPC, Index; + if (getLowAndHighPC(LowPC, HighPC, Index)) + return {{LowPC, HighPC, Index}}; + // Multiple ranges from .debug_ranges section. auto RangesOffset = toSectionOffset(find(DW_AT_ranges)); if (RangesOffset) { @@ -257,7 +263,7 @@ DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const { bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const { for (const auto& R : getAddressRanges()) { - if (R.first <= Address && Address < R.second) + if (R.LowPC <= Address && Address < R.HighPC) return true; } return false; @@ -290,14 +296,16 @@ uint64_t DWARFDie::getDeclLine() const { } void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, - uint32_t &CallColumn) const { + uint32_t &CallColumn, + uint32_t &CallDiscriminator) const { CallFile = toUnsigned(find(DW_AT_call_file), 0); CallLine = toUnsigned(find(DW_AT_call_line), 0); CallColumn = toUnsigned(find(DW_AT_call_column), 0); + CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } -void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, - unsigned Indent) const { +void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, unsigned Indent, + DIDumpOptions DumpOpts) const { if (!isValid()) return; DataExtractor debug_info_data = U->getDebugInfoExtractor(); @@ -317,10 +325,12 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, else WithColor(OS, syntax::Tag).get().indent(Indent) << format("DW_TAG_Unknown_%x", getTag()); - - OS << format(" [%u] %c\n", abbrCode, - AbbrevDecl->hasChildren() ? '*' : ' '); - + + if (!DumpOpts.Brief) + OS << format(" [%u] %c", abbrCode, + AbbrevDecl->hasChildren() ? '*' : ' '); + OS << '\n'; + // Dump all data in the DIE for the attributes. for (const auto &AttrSpec : AbbrevDecl->attributes()) { if (AttrSpec.Form == DW_FORM_implicit_const) { @@ -330,13 +340,13 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, continue; } dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form, - Indent); + Indent, DumpOpts); } DWARFDie child = getFirstChild(); if (RecurseDepth > 0 && child) { while (child) { - child.dump(OS, RecurseDepth-1, Indent+2); + child.dump(OS, RecurseDepth-1, Indent+2, DumpOpts); child = child.getSibling(); } } @@ -350,32 +360,6 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth, } } -void DWARFDie::getInlinedChainForAddress( - const uint64_t Address, SmallVectorImpl &InlinedChain) const { - if (isNULL()) - return; - DWARFDie DIE(*this); - while (DIE) { - // Append current DIE to inlined chain only if it has correct tag - // (e.g. it is not a lexical block). - if (DIE.isSubroutineDIE()) - InlinedChain.push_back(DIE); - - // Try to get child which also contains provided address. - DWARFDie Child = DIE.getFirstChild(); - while (Child) { - if (Child.addressRangeContainsAddress(Address)) { - // Assume there is only one such child. - break; - } - Child = Child.getSibling(); - } - DIE = Child; - } - // Reverse the obtained chain to make the root of inlined chain last. - std::reverse(InlinedChain.begin(), InlinedChain.end()); -} - DWARFDie DWARFDie::getParent() const { if (isValid()) return U->getParent(Die); diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 6de57b999adcc3fd249e7588efd413306c562722..ed1f5f46dcfb8297bd1a048bc49efad477300709 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -7,16 +7,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "SyntaxHighlighting.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -29,34 +29,34 @@ using namespace dwarf; using namespace syntax; static const DWARFFormValue::FormClass DWARF4FormClasses[] = { - DWARFFormValue::FC_Unknown, // 0x0 - DWARFFormValue::FC_Address, // 0x01 DW_FORM_addr - DWARFFormValue::FC_Unknown, // 0x02 unused - DWARFFormValue::FC_Block, // 0x03 DW_FORM_block2 - DWARFFormValue::FC_Block, // 0x04 DW_FORM_block4 - DWARFFormValue::FC_Constant, // 0x05 DW_FORM_data2 - // --- These can be FC_SectionOffset in DWARF3 and below: - DWARFFormValue::FC_Constant, // 0x06 DW_FORM_data4 - DWARFFormValue::FC_Constant, // 0x07 DW_FORM_data8 - // --- - DWARFFormValue::FC_String, // 0x08 DW_FORM_string - DWARFFormValue::FC_Block, // 0x09 DW_FORM_block - DWARFFormValue::FC_Block, // 0x0a DW_FORM_block1 - DWARFFormValue::FC_Constant, // 0x0b DW_FORM_data1 - DWARFFormValue::FC_Flag, // 0x0c DW_FORM_flag - DWARFFormValue::FC_Constant, // 0x0d DW_FORM_sdata - DWARFFormValue::FC_String, // 0x0e DW_FORM_strp - DWARFFormValue::FC_Constant, // 0x0f DW_FORM_udata - DWARFFormValue::FC_Reference, // 0x10 DW_FORM_ref_addr - DWARFFormValue::FC_Reference, // 0x11 DW_FORM_ref1 - DWARFFormValue::FC_Reference, // 0x12 DW_FORM_ref2 - DWARFFormValue::FC_Reference, // 0x13 DW_FORM_ref4 - DWARFFormValue::FC_Reference, // 0x14 DW_FORM_ref8 - DWARFFormValue::FC_Reference, // 0x15 DW_FORM_ref_udata - DWARFFormValue::FC_Indirect, // 0x16 DW_FORM_indirect - DWARFFormValue::FC_SectionOffset, // 0x17 DW_FORM_sec_offset - DWARFFormValue::FC_Exprloc, // 0x18 DW_FORM_exprloc - DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present + DWARFFormValue::FC_Unknown, // 0x0 + DWARFFormValue::FC_Address, // 0x01 DW_FORM_addr + DWARFFormValue::FC_Unknown, // 0x02 unused + DWARFFormValue::FC_Block, // 0x03 DW_FORM_block2 + DWARFFormValue::FC_Block, // 0x04 DW_FORM_block4 + DWARFFormValue::FC_Constant, // 0x05 DW_FORM_data2 + // --- These can be FC_SectionOffset in DWARF3 and below: + DWARFFormValue::FC_Constant, // 0x06 DW_FORM_data4 + DWARFFormValue::FC_Constant, // 0x07 DW_FORM_data8 + // --- + DWARFFormValue::FC_String, // 0x08 DW_FORM_string + DWARFFormValue::FC_Block, // 0x09 DW_FORM_block + DWARFFormValue::FC_Block, // 0x0a DW_FORM_block1 + DWARFFormValue::FC_Constant, // 0x0b DW_FORM_data1 + DWARFFormValue::FC_Flag, // 0x0c DW_FORM_flag + DWARFFormValue::FC_Constant, // 0x0d DW_FORM_sdata + DWARFFormValue::FC_String, // 0x0e DW_FORM_strp + DWARFFormValue::FC_Constant, // 0x0f DW_FORM_udata + DWARFFormValue::FC_Reference, // 0x10 DW_FORM_ref_addr + DWARFFormValue::FC_Reference, // 0x11 DW_FORM_ref1 + DWARFFormValue::FC_Reference, // 0x12 DW_FORM_ref2 + DWARFFormValue::FC_Reference, // 0x13 DW_FORM_ref4 + DWARFFormValue::FC_Reference, // 0x14 DW_FORM_ref8 + DWARFFormValue::FC_Reference, // 0x15 DW_FORM_ref_udata + DWARFFormValue::FC_Indirect, // 0x16 DW_FORM_indirect + DWARFFormValue::FC_SectionOffset, // 0x17 DW_FORM_sec_offset + DWARFFormValue::FC_Exprloc, // 0x18 DW_FORM_exprloc + DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present }; namespace { @@ -83,10 +83,10 @@ public: uint8_t getDwarfOffsetByteSize() const { switch (Format) { - case dwarf::DwarfFormat::DWARF32: - return 4; - case dwarf::DwarfFormat::DWARF64: - return 8; + case dwarf::DwarfFormat::DWARF32: + return 4; + case dwarf::DwarfFormat::DWARF64: + return 8; } llvm_unreachable("Invalid Format value"); } @@ -97,83 +97,83 @@ public: template static Optional getFixedByteSize(dwarf::Form Form, const T *U) { switch (Form) { - case DW_FORM_addr: - if (U) - return U->getAddressByteSize(); - return None; + case DW_FORM_addr: + if (U) + return U->getAddressByteSize(); + return None; - case DW_FORM_block: // ULEB128 length L followed by L bytes. - case DW_FORM_block1: // 1 byte length L followed by L bytes. - case DW_FORM_block2: // 2 byte length L followed by L bytes. - case DW_FORM_block4: // 4 byte length L followed by L bytes. - case DW_FORM_string: // C-string with null terminator. - case DW_FORM_sdata: // SLEB128. - case DW_FORM_udata: // ULEB128. - case DW_FORM_ref_udata: // ULEB128. - case DW_FORM_indirect: // ULEB128. - case DW_FORM_exprloc: // ULEB128 length L followed by L bytes. - case DW_FORM_strx: // ULEB128. - case DW_FORM_addrx: // ULEB128. - case DW_FORM_loclistx: // ULEB128. - case DW_FORM_rnglistx: // ULEB128. - case DW_FORM_GNU_addr_index: // ULEB128. - case DW_FORM_GNU_str_index: // ULEB128. - return None; + case DW_FORM_block: // ULEB128 length L followed by L bytes. + case DW_FORM_block1: // 1 byte length L followed by L bytes. + case DW_FORM_block2: // 2 byte length L followed by L bytes. + case DW_FORM_block4: // 4 byte length L followed by L bytes. + case DW_FORM_string: // C-string with null terminator. + case DW_FORM_sdata: // SLEB128. + case DW_FORM_udata: // ULEB128. + case DW_FORM_ref_udata: // ULEB128. + case DW_FORM_indirect: // ULEB128. + case DW_FORM_exprloc: // ULEB128 length L followed by L bytes. + case DW_FORM_strx: // ULEB128. + case DW_FORM_addrx: // ULEB128. + case DW_FORM_loclistx: // ULEB128. + case DW_FORM_rnglistx: // ULEB128. + case DW_FORM_GNU_addr_index: // ULEB128. + case DW_FORM_GNU_str_index: // ULEB128. + return None; - case DW_FORM_ref_addr: - if (U) - return U->getRefAddrByteSize(); - return None; + case DW_FORM_ref_addr: + if (U) + return U->getRefAddrByteSize(); + return None; - case DW_FORM_flag: - case DW_FORM_data1: - case DW_FORM_ref1: - case DW_FORM_strx1: - case DW_FORM_addrx1: - return 1; + case DW_FORM_flag: + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_strx1: + case DW_FORM_addrx1: + return 1; - case DW_FORM_data2: - case DW_FORM_ref2: - case DW_FORM_strx2: - case DW_FORM_addrx2: - return 2; + case DW_FORM_data2: + case DW_FORM_ref2: + case DW_FORM_strx2: + case DW_FORM_addrx2: + return 2; - case DW_FORM_data4: - case DW_FORM_ref4: - case DW_FORM_ref_sup4: - case DW_FORM_strx4: - case DW_FORM_addrx4: - return 4; + case DW_FORM_data4: + case DW_FORM_ref4: + case DW_FORM_ref_sup4: + case DW_FORM_strx4: + case DW_FORM_addrx4: + return 4; - case DW_FORM_strp: - case DW_FORM_GNU_ref_alt: - case DW_FORM_GNU_strp_alt: - case DW_FORM_line_strp: - case DW_FORM_sec_offset: - case DW_FORM_strp_sup: - if (U) - return U->getDwarfOffsetByteSize(); - return None; + case DW_FORM_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + if (U) + return U->getDwarfOffsetByteSize(); + return None; - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - case DW_FORM_ref_sup8: - return 8; + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + case DW_FORM_ref_sup8: + return 8; - case DW_FORM_flag_present: - return 0; + case DW_FORM_flag_present: + return 0; - case DW_FORM_data16: - return 16; + case DW_FORM_data16: + return 16; - case DW_FORM_implicit_const: - // The implicit value is stored in the abbreviation as a SLEB128, and - // there no data in debug info. - return 0; + case DW_FORM_implicit_const: + // The implicit value is stored in the abbreviation as a SLEB128, and + // there no data in debug info. + return 0; - default: - llvm_unreachable("Handle this form in this switch statement"); + default: + llvm_unreachable("Handle this form in this switch statement"); } return None; } @@ -184,91 +184,91 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData, bool Indirect = false; do { switch (Form) { - // Blocks of inlined data that have a length field and the data bytes - // inlined in the .debug_info. - case DW_FORM_exprloc: - case DW_FORM_block: { - uint64_t size = DebugInfoData.getULEB128(OffsetPtr); - *OffsetPtr += size; - return true; - } - case DW_FORM_block1: { - uint8_t size = DebugInfoData.getU8(OffsetPtr); - *OffsetPtr += size; - return true; - } - case DW_FORM_block2: { - uint16_t size = DebugInfoData.getU16(OffsetPtr); - *OffsetPtr += size; - return true; - } - case DW_FORM_block4: { - uint32_t size = DebugInfoData.getU32(OffsetPtr); - *OffsetPtr += size; + // Blocks of inlined data that have a length field and the data bytes + // inlined in the .debug_info. + case DW_FORM_exprloc: + case DW_FORM_block: { + uint64_t size = DebugInfoData.getULEB128(OffsetPtr); + *OffsetPtr += size; + return true; + } + case DW_FORM_block1: { + uint8_t size = DebugInfoData.getU8(OffsetPtr); + *OffsetPtr += size; + return true; + } + case DW_FORM_block2: { + uint16_t size = DebugInfoData.getU16(OffsetPtr); + *OffsetPtr += size; + return true; + } + case DW_FORM_block4: { + uint32_t size = DebugInfoData.getU32(OffsetPtr); + *OffsetPtr += size; + return true; + } + + // Inlined NULL terminated C-strings. + case DW_FORM_string: + DebugInfoData.getCStr(OffsetPtr); + return true; + + case DW_FORM_addr: + case DW_FORM_ref_addr: + case DW_FORM_flag_present: + case DW_FORM_data1: + case DW_FORM_data2: + case DW_FORM_data4: + case DW_FORM_data8: + case DW_FORM_flag: + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + case DW_FORM_ref_sup4: + case DW_FORM_ref_sup8: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx4: + case DW_FORM_addrx1: + case DW_FORM_addrx2: + case DW_FORM_addrx4: + case DW_FORM_sec_offset: + case DW_FORM_strp: + case DW_FORM_strp_sup: + case DW_FORM_line_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + if (Optional FixedSize = ::getFixedByteSize(Form, U)) { + *OffsetPtr += *FixedSize; return true; } + return false; - // Inlined NULL terminated C-strings. - case DW_FORM_string: - DebugInfoData.getCStr(OffsetPtr); - return true; + // signed or unsigned LEB 128 values. + case DW_FORM_sdata: + DebugInfoData.getSLEB128(OffsetPtr); + return true; - case DW_FORM_addr: - case DW_FORM_ref_addr: - case DW_FORM_flag_present: - case DW_FORM_data1: - case DW_FORM_data2: - case DW_FORM_data4: - case DW_FORM_data8: - case DW_FORM_flag: - case DW_FORM_ref1: - case DW_FORM_ref2: - case DW_FORM_ref4: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - case DW_FORM_ref_sup4: - case DW_FORM_ref_sup8: - case DW_FORM_strx1: - case DW_FORM_strx2: - case DW_FORM_strx4: - case DW_FORM_addrx1: - case DW_FORM_addrx2: - case DW_FORM_addrx4: - case DW_FORM_sec_offset: - case DW_FORM_strp: - case DW_FORM_strp_sup: - case DW_FORM_line_strp: - case DW_FORM_GNU_ref_alt: - case DW_FORM_GNU_strp_alt: - if (Optional FixedSize = ::getFixedByteSize(Form, U)) { - *OffsetPtr += *FixedSize; - return true; - } - return false; + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + case DW_FORM_GNU_addr_index: + case DW_FORM_GNU_str_index: + DebugInfoData.getULEB128(OffsetPtr); + return true; - // signed or unsigned LEB 128 values. - case DW_FORM_sdata: - DebugInfoData.getSLEB128(OffsetPtr); - return true; + case DW_FORM_indirect: + Indirect = true; + Form = static_cast(DebugInfoData.getULEB128(OffsetPtr)); + break; - case DW_FORM_udata: - case DW_FORM_ref_udata: - case DW_FORM_strx: - case DW_FORM_addrx: - case DW_FORM_loclistx: - case DW_FORM_rnglistx: - case DW_FORM_GNU_addr_index: - case DW_FORM_GNU_str_index: - DebugInfoData.getULEB128(OffsetPtr); - return true; - - case DW_FORM_indirect: - Indirect = true; - Form = static_cast(DebugInfoData.getULEB128(OffsetPtr)); - break; - - default: - return false; + default: + return false; } } while (Indirect); return true; @@ -301,6 +301,7 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { return (FC == FC_Address); case DW_FORM_GNU_str_index: case DW_FORM_GNU_strp_alt: + case DW_FORM_strx: return (FC == FC_String); case DW_FORM_implicit_const: return (FC == FC_Constant); @@ -309,99 +310,91 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { } // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset. // Don't check for DWARF version here, as some producers may still do this - // by mistake. - return (Form == DW_FORM_data4 || Form == DW_FORM_data8) && + // by mistake. Also accept DW_FORM_strp since this is .debug_str section + // offset. + return (Form == DW_FORM_data4 || Form == DW_FORM_data8 || + Form == DW_FORM_strp) && FC == FC_SectionOffset; } -bool DWARFFormValue::extractValue(const DataExtractor &data, - uint32_t *offset_ptr, - const DWARFUnit *cu) { - U = cu; - bool indirect = false; - bool is_block = false; +bool DWARFFormValue::extractValue(const DataExtractor &Data, + uint32_t *OffsetPtr, const DWARFUnit *CU) { + U = CU; + bool Indirect = false; + bool IsBlock = false; Value.data = nullptr; // Read the value for the form into value and follow and DW_FORM_indirect // instances we run into do { - indirect = false; + Indirect = false; switch (Form) { case DW_FORM_addr: case DW_FORM_ref_addr: { if (!U) return false; - uint16_t AddrSize = - (Form == DW_FORM_addr) - ? U->getAddressByteSize() - : U->getRefAddrByteSize(); - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - if (AI != U->getRelocMap()->end()) { - Value.uval = data.getUnsigned(offset_ptr, AddrSize) + AI->second.second; - } else - Value.uval = data.getUnsigned(offset_ptr, AddrSize); + uint16_t AddrSize = (Form == DW_FORM_addr) ? U->getAddressByteSize() + : U->getRefAddrByteSize(); + Value.uval = getRelocatedValue(Data, AddrSize, OffsetPtr, + U->getRelocMap(), &Value.SectionIndex); break; } case DW_FORM_exprloc: case DW_FORM_block: - Value.uval = data.getULEB128(offset_ptr); - is_block = true; + Value.uval = Data.getULEB128(OffsetPtr); + IsBlock = true; break; case DW_FORM_block1: - Value.uval = data.getU8(offset_ptr); - is_block = true; + Value.uval = Data.getU8(OffsetPtr); + IsBlock = true; break; case DW_FORM_block2: - Value.uval = data.getU16(offset_ptr); - is_block = true; + Value.uval = Data.getU16(OffsetPtr); + IsBlock = true; break; case DW_FORM_block4: - Value.uval = data.getU32(offset_ptr); - is_block = true; + Value.uval = Data.getU32(OffsetPtr); + IsBlock = true; break; case DW_FORM_data1: case DW_FORM_ref1: case DW_FORM_flag: case DW_FORM_strx1: case DW_FORM_addrx1: - Value.uval = data.getU8(offset_ptr); + Value.uval = Data.getU8(OffsetPtr); break; case DW_FORM_data2: case DW_FORM_ref2: case DW_FORM_strx2: case DW_FORM_addrx2: - Value.uval = data.getU16(offset_ptr); + Value.uval = Data.getU16(OffsetPtr); break; case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: case DW_FORM_strx4: case DW_FORM_addrx4: { - Value.uval = data.getU32(offset_ptr); - if (!U) - break; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr-4); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + const RelocAddrMap *RelocMap = U ? U->getRelocMap() : nullptr; + Value.uval = getRelocatedValue(Data, 4, OffsetPtr, RelocMap); break; } case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sup8: - Value.uval = data.getU64(offset_ptr); + Value.uval = Data.getU64(OffsetPtr); break; case DW_FORM_sdata: - Value.sval = data.getSLEB128(offset_ptr); + Value.sval = Data.getSLEB128(OffsetPtr); break; case DW_FORM_udata: case DW_FORM_ref_udata: - Value.uval = data.getULEB128(offset_ptr); + Value.uval = Data.getULEB128(OffsetPtr); break; case DW_FORM_string: - Value.cstr = data.getCStr(offset_ptr); + Value.cstr = Data.getCStr(OffsetPtr); break; case DW_FORM_indirect: - Form = static_cast(data.getULEB128(offset_ptr)); - indirect = true; + Form = static_cast(Data.getULEB128(OffsetPtr)); + Indirect = true; break; case DW_FORM_strp: case DW_FORM_sec_offset: @@ -411,85 +404,94 @@ bool DWARFFormValue::extractValue(const DataExtractor &data, case DW_FORM_strp_sup: { if (!U) return false; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - uint8_t Size = U->getDwarfOffsetByteSize(); - Value.uval = data.getUnsigned(offset_ptr, Size); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + Value.uval = getRelocatedValue(Data, U->getDwarfOffsetByteSize(), + OffsetPtr, U->getRelocMap()); break; } case DW_FORM_flag_present: Value.uval = 1; break; case DW_FORM_ref_sig8: - Value.uval = data.getU64(offset_ptr); + Value.uval = Data.getU64(OffsetPtr); break; case DW_FORM_GNU_addr_index: case DW_FORM_GNU_str_index: - Value.uval = data.getULEB128(offset_ptr); + case DW_FORM_strx: + Value.uval = Data.getULEB128(OffsetPtr); break; default: // DWARFFormValue::skipValue() will have caught this and caused all // DWARF DIEs to fail to be parsed, so this code is not be reachable. llvm_unreachable("unsupported form"); } - } while (indirect); + } while (Indirect); - if (is_block) { - StringRef str = data.getData().substr(*offset_ptr, Value.uval); + if (IsBlock) { + StringRef Str = Data.getData().substr(*OffsetPtr, Value.uval); Value.data = nullptr; - if (!str.empty()) { - Value.data = reinterpret_cast(str.data()); - *offset_ptr += Value.uval; + if (!Str.empty()) { + Value.data = reinterpret_cast(Str.data()); + *OffsetPtr += Value.uval; } } return true; } -bool DWARFFormValue::skipValue(DataExtractor DebugInfoData, - uint32_t *offset_ptr, const DWARFUnit *U) const { - return DWARFFormValue::skipValue(Form, DebugInfoData, offset_ptr, U); +bool DWARFFormValue::skipValue(DataExtractor DebugInfoData, uint32_t *OffsetPtr, + const DWARFUnit *U) const { + return DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U); } -bool DWARFFormValue::skipValue(dwarf::Form form, DataExtractor DebugInfoData, - uint32_t *offset_ptr, const DWARFUnit *U) { - return skipFormValue(form, DebugInfoData, offset_ptr, U); +bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, + uint32_t *OffsetPtr, const DWARFUnit *U) { + return skipFormValue(Form, DebugInfoData, OffsetPtr, U); } -bool DWARFFormValue::skipValue(dwarf::Form form, DataExtractor DebugInfoData, - uint32_t *offset_ptr, uint16_t Version, +bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, + uint32_t *OffsetPtr, uint16_t Version, uint8_t AddrSize, llvm::dwarf::DwarfFormat Format) { FormSizeHelper FSH(Version, AddrSize, Format); - return skipFormValue(form, DebugInfoData, offset_ptr, &FSH); + return skipFormValue(Form, DebugInfoData, OffsetPtr, &FSH); } -void -DWARFFormValue::dump(raw_ostream &OS) const { - uint64_t uvalue = Value.uval; - bool cu_relative_offset = false; +void DWARFFormValue::dump(raw_ostream &OS) const { + uint64_t UValue = Value.uval; + bool CURelativeOffset = false; switch (Form) { - case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; + case DW_FORM_addr: + OS << format("0x%016" PRIx64, UValue); + break; case DW_FORM_GNU_addr_index: { - OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue); + OS << format(" indexed (%8.8x) address = ", (uint32_t)UValue); uint64_t Address; if (U == nullptr) OS << ""; - else if (U->getAddrOffsetSectionItem(uvalue, Address)) + else if (U->getAddrOffsetSectionItem(UValue, Address)) OS << format("0x%016" PRIx64, Address); else OS << ""; break; } - case DW_FORM_flag_present: OS << "true"; break; + case DW_FORM_flag_present: + OS << "true"; + break; case DW_FORM_flag: - case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; - case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break; - case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break; + case DW_FORM_data1: + OS << format("0x%02x", (uint8_t)UValue); + break; + case DW_FORM_data2: + OS << format("0x%04x", (uint16_t)UValue); + break; + case DW_FORM_data4: + OS << format("0x%08x", (uint32_t)UValue); + break; case DW_FORM_ref_sig8: - case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break; + case DW_FORM_data8: + OS << format("0x%016" PRIx64, UValue); + break; case DW_FORM_string: OS << '"'; OS.write_escaped(Value.cstr); @@ -500,80 +502,93 @@ DWARFFormValue::dump(raw_ostream &OS) const { case DW_FORM_block1: case DW_FORM_block2: case DW_FORM_block4: - if (uvalue > 0) { + if (UValue > 0) { switch (Form) { case DW_FORM_exprloc: - case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break; - case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break; - case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break; - case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break; - default: break; + case DW_FORM_block: + OS << format("<0x%" PRIx64 "> ", UValue); + break; + case DW_FORM_block1: + OS << format("<0x%2.2x> ", (uint8_t)UValue); + break; + case DW_FORM_block2: + OS << format("<0x%4.4x> ", (uint16_t)UValue); + break; + case DW_FORM_block4: + OS << format("<0x%8.8x> ", (uint32_t)UValue); + break; + default: + break; } - const uint8_t* data_ptr = Value.data; - if (data_ptr) { - // uvalue contains size of block - const uint8_t* end_data_ptr = data_ptr + uvalue; - while (data_ptr < end_data_ptr) { - OS << format("%2.2x ", *data_ptr); - ++data_ptr; + const uint8_t *DataPtr = Value.data; + if (DataPtr) { + // UValue contains size of block + const uint8_t *EndDataPtr = DataPtr + UValue; + while (DataPtr < EndDataPtr) { + OS << format("%2.2x ", *DataPtr); + ++DataPtr; } - } - else + } else OS << "NULL"; } break; - case DW_FORM_sdata: OS << Value.sval; break; - case DW_FORM_udata: OS << Value.uval; break; + case DW_FORM_sdata: + OS << Value.sval; + break; + case DW_FORM_udata: + OS << Value.uval; + break; case DW_FORM_strp: - OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue); + OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)UValue); dumpString(OS); break; + case DW_FORM_strx: case DW_FORM_GNU_str_index: - OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue); + OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue); dumpString(OS); break; case DW_FORM_GNU_strp_alt: - OS << format("alt indirect string, offset: 0x%" PRIx64 "", uvalue); + OS << format("alt indirect string, offset: 0x%" PRIx64 "", UValue); dumpString(OS); break; case DW_FORM_ref_addr: - OS << format("0x%016" PRIx64, uvalue); + OS << format("0x%016" PRIx64, UValue); break; case DW_FORM_ref1: - cu_relative_offset = true; - OS << format("cu + 0x%2.2x", (uint8_t)uvalue); + CURelativeOffset = true; + OS << format("cu + 0x%2.2x", (uint8_t)UValue); break; case DW_FORM_ref2: - cu_relative_offset = true; - OS << format("cu + 0x%4.4x", (uint16_t)uvalue); + CURelativeOffset = true; + OS << format("cu + 0x%4.4x", (uint16_t)UValue); break; case DW_FORM_ref4: - cu_relative_offset = true; - OS << format("cu + 0x%4.4x", (uint32_t)uvalue); + CURelativeOffset = true; + OS << format("cu + 0x%4.4x", (uint32_t)UValue); break; case DW_FORM_ref8: - cu_relative_offset = true; - OS << format("cu + 0x%8.8" PRIx64, uvalue); + CURelativeOffset = true; + OS << format("cu + 0x%8.8" PRIx64, UValue); break; case DW_FORM_ref_udata: - cu_relative_offset = true; - OS << format("cu + 0x%" PRIx64, uvalue); + CURelativeOffset = true; + OS << format("cu + 0x%" PRIx64, UValue); break; case DW_FORM_GNU_ref_alt: - OS << format("", uvalue); + OS << format("", UValue); break; - // All DW_FORM_indirect attributes should be resolved prior to calling - // this function + // All DW_FORM_indirect attributes should be resolved prior to calling + // this function case DW_FORM_indirect: OS << "DW_FORM_indirect"; break; - // Should be formatted to 64-bit for DWARF64. + // Should be formatted to 64-bit for DWARF64. case DW_FORM_sec_offset: - OS << format("0x%08x", (uint32_t)uvalue); + OS << format("0x%08x", (uint32_t)UValue); break; default: @@ -581,10 +596,10 @@ DWARFFormValue::dump(raw_ostream &OS) const { break; } - if (cu_relative_offset) { + if (CURelativeOffset) { OS << " => {"; WithColor(OS, syntax::Address).get() - << format("0x%8.8" PRIx64, uvalue + (U ? U->getOffset() : 0)); + << format("0x%8.8" PRIx64, UValue + (U ? U->getOffset() : 0)); OS << "}"; } } @@ -608,10 +623,11 @@ Optional DWARFFormValue::getAsCString() const { if (Form == DW_FORM_GNU_strp_alt || U == nullptr) return None; uint32_t Offset = Value.uval; - if (Form == DW_FORM_GNU_str_index) { - uint32_t StrOffset; + if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx) { + uint64_t StrOffset; if (!U->getStringOffsetSectionItem(Offset, StrOffset)) return None; + StrOffset += U->getStringOffsetSectionRelocation(Offset); Offset = StrOffset; } if (const char *Str = U->getStringExtractor().getCStr(&Offset)) { @@ -661,15 +677,16 @@ Optional DWARFFormValue::getAsSectionOffset() const { } Optional DWARFFormValue::getAsUnsignedConstant() const { - if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) - || Form == DW_FORM_sdata) + if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || + Form == DW_FORM_sdata) return None; return Value.uval; } Optional DWARFFormValue::getAsSignedConstant() const { if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || - (Form == DW_FORM_udata && uint64_t(std::numeric_limits::max()) < Value.uval)) + (Form == DW_FORM_udata && + uint64_t(std::numeric_limits::max()) < Value.uval)) return None; switch (Form) { case DW_FORM_data4: diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp index 76354a9b1ddb652f85062a3283f862caa97e312a..ebd6104ab8785604365ca0a60500802dc6888828 100644 --- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -39,8 +39,9 @@ void DWARFGdbIndex::dumpAddressArea(raw_ostream &OS) const { << '\n'; for (const AddressEntry &Addr : AddressArea) OS << format( - " Low address = 0x%llx, High address = 0x%llx, CU index = %d\n", - Addr.LowAddress, Addr.HighAddress, Addr.CuIndex); + " Low/High address = [0x%llx, 0x%llx) (Size: 0x%llx), CU id = %d\n", + Addr.LowAddress, Addr.HighAddress, Addr.HighAddress - Addr.LowAddress, + Addr.CuIndex); } void DWARFGdbIndex::dumpSymbolTable(raw_ostream &OS) const { diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp index e0f819383289be33d0513c8ee6645c966d2afe7c..fd1684d33a16b7ec5af057aeb3eff5f1163fef63 100644 --- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" -#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -24,7 +24,11 @@ bool DWARFTypeUnit::extractImpl(DataExtractor debug_info, return false; TypeHash = debug_info.getU64(offset_ptr); TypeOffset = debug_info.getU32(offset_ptr); - return TypeOffset < getLength(); + // TypeOffset is relative to the beginning of the header, + // so we have to account for the leading length field. + // FIXME: The size of the length field is 12 in DWARF64. + unsigned SizeOfLength = 4; + return TypeOffset < getLength() + SizeOfLength; } void DWARFTypeUnit::dump(raw_ostream &OS, bool SummarizeTypes) { diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index 4ee8e8f46d2eb540b0bc21f001023106da81d08d..09e6a292e5fe1e221835a2f76fdfdf53c7263322 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -16,7 +17,6 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" @@ -32,34 +32,31 @@ using namespace llvm; using namespace dwarf; void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) { - parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(), - C.getStringSection(), StringRef(), C.getAddrSection(), - C.getLineSection().Data, C.isLittleEndian(), false); + parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(), + C.getStringSection(), C.getStringOffsetSection(), + &C.getAddrSection(), C.getLineSection().Data, C.isLittleEndian(), + false); } void DWARFUnitSectionBase::parseDWO(DWARFContext &C, const DWARFSection &DWOSection, DWARFUnitIndex *Index) { - parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(), + parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(), C.getStringDWOSection(), C.getStringOffsetDWOSection(), - C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(), + &C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(), true); } DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, - const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS, - StringRef SOS, StringRef AOS, StringRef LS, bool LE, - bool IsDWO, const DWARFUnitSectionBase &UnitSection, + const DWARFDebugAbbrev *DA, const DWARFSection *RS, + StringRef SS, const DWARFSection &SOS, + const DWARFSection *AOS, StringRef LS, bool LE, bool IsDWO, + const DWARFUnitSectionBase &UnitSection, const DWARFUnitIndex::Entry *IndexEntry) : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS), - LineSection(LS), StringSection(SS), StringOffsetSection([&]() { - if (IndexEntry) - if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) - return SOS.slice(C->Offset, C->Offset + C->Length); - return SOS; - }()), - AddrOffsetSection(AOS), isLittleEndian(LE), isDWO(IsDWO), - UnitSection(UnitSection), IndexEntry(IndexEntry) { + LineSection(LS), StringSection(SS), StringOffsetSection(SOS), + StringOffsetSectionBase(0), AddrOffsetSection(AOS), isLittleEndian(LE), + isDWO(IsDWO), UnitSection(UnitSection), IndexEntry(IndexEntry) { clear(); } @@ -68,25 +65,33 @@ DWARFUnit::~DWARFUnit() = default; bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const { uint32_t Offset = AddrOffsetSectionBase + Index * AddrSize; - if (AddrOffsetSection.size() < Offset + AddrSize) + if (AddrOffsetSection->Data.size() < Offset + AddrSize) return false; - DataExtractor DA(AddrOffsetSection, isLittleEndian, AddrSize); - Result = DA.getAddress(&Offset); + DataExtractor DA(AddrOffsetSection->Data, isLittleEndian, AddrSize); + Result = getRelocatedValue(DA, AddrSize, &Offset, &AddrOffsetSection->Relocs); return true; } bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index, - uint32_t &Result) const { - // FIXME: string offset section entries are 8-byte for DWARF64. - const uint32_t ItemSize = 4; - uint32_t Offset = Index * ItemSize; - if (StringOffsetSection.size() < Offset + ItemSize) + uint64_t &Result) const { + unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4; + uint32_t Offset = StringOffsetSectionBase + Index * ItemSize; + if (StringOffsetSection.Data.size() < Offset + ItemSize) return false; - DataExtractor DA(StringOffsetSection, isLittleEndian, 0); - Result = DA.getU32(&Offset); + DataExtractor DA(StringOffsetSection.Data, isLittleEndian, 0); + Result = ItemSize == 4 ? DA.getU32(&Offset) : DA.getU64(&Offset); return true; } +uint64_t DWARFUnit::getStringOffsetSectionRelocation(uint32_t Index) const { + unsigned ItemSize = getFormat() == DWARF64 ? 8 : 4; + uint64_t ByteOffset = StringOffsetSectionBase + Index * ItemSize; + RelocAddrMap::const_iterator AI = getStringOffsetsRelocMap().find(ByteOffset); + if (AI != getStringOffsetsRelocMap().end()) + return AI->second.Value; + return 0; +} + bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { Length = debug_info.getU32(offset_ptr); Version = debug_info.getU16(offset_ptr); @@ -118,6 +123,9 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { if (!LengthOK || !VersionOK || !AddrSizeOK) return false; + // Keep track of the highest DWARF version we encounter across all units. + Context.setMaxVersionIfGreater(Version); + Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); return Abbrevs != nullptr; } @@ -142,9 +150,10 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(!DieArray.empty()); - DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize); + DataExtractor RangesData(RangeSection->Data, isLittleEndian, AddrSize); uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; - return RangeList.extract(RangesData, &ActualRangeListOffset); + return RangeList.extract(RangesData, &ActualRangeListOffset, + RangeSection->Relocs); } void DWARFUnit::clear() { @@ -240,6 +249,17 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { setBaseAddress(*BaseAddr); AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0); RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0); + + // In general, we derive the offset of the unit's contibution to the + // debug_str_offsets{.dwo} section from the unit DIE's + // DW_AT_str_offsets_base attribute. In dwp files we add to it the offset + // we get from the index table. + StringOffsetSectionBase = + toSectionOffset(UnitDie.find(DW_AT_str_offsets_base), 0); + if (IndexEntry) + if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS)) + StringOffsetSectionBase += C->Offset; + // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for // skeleton CU DIE, so that DWARF users not aware of it are not broken. } @@ -247,20 +267,6 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { return DieArray.size(); } -DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath) { - auto Obj = object::ObjectFile::createObjectFile(DWOPath); - if (!Obj) { - // TODO: Actually report errors helpfully. - consumeError(Obj.takeError()); - return; - } - DWOFile = std::move(Obj.get()); - DWOContext.reset( - cast(new DWARFContextInMemory(*DWOFile.getBinary()))); - if (DWOContext->getNumDWOCompileUnits() > 0) - DWOU = DWOContext->getDWOCompileUnitAtIndex(0); -} - bool DWARFUnit::parseDWO() { if (isDWO) return false; @@ -279,17 +285,21 @@ bool DWARFUnit::parseDWO() { sys::path::append(AbsolutePath, *CompilationDir); } sys::path::append(AbsolutePath, *DWOFileName); - DWO = llvm::make_unique(AbsolutePath); - DWARFUnit *DWOCU = DWO->getUnit(); - // Verify that compile unit in .dwo file is valid. - if (!DWOCU || DWOCU->getDWOId() != getDWOId()) { - DWO.reset(); + auto DWOId = getDWOId(); + if (!DWOId) return false; - } + auto DWOContext = Context.getDWOContext(AbsolutePath); + if (!DWOContext) + return false; + + DWARFCompileUnit *DWOCU = DWOContext->getDWOCompileUnitForHash(*DWOId); + if (!DWOCU) + return false; + DWO = std::shared_ptr(std::move(DWOContext), DWOCU); // Share .debug_addr and .debug_ranges section with compile unit in .dwo - DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); + DWO->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); auto DWORangesBase = UnitDie.getRangesBaseAttribute(); - DWOCU->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); + DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); return true; } @@ -332,8 +342,8 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { // Collect address ranges from DIEs in .dwo if necessary. bool DWOCreated = parseDWO(); - if (DWO.get()) - DWO->getUnit()->collectAddressRanges(CURanges); + if (DWO) + DWO->collectAddressRanges(CURanges); if (DWOCreated) DWO.reset(); @@ -343,37 +353,63 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { clearDIEs(true); } -DWARFDie -DWARFUnit::getSubprogramForAddress(uint64_t Address) { - extractDIEsIfNeeded(false); - for (const DWARFDebugInfoEntry &D : DieArray) { - DWARFDie DIE(this, &D); - if (DIE.isSubprogramDIE() && - DIE.addressRangeContainsAddress(Address)) { - return DIE; +void DWARFUnit::updateAddressDieMap(DWARFDie Die) { + if (Die.isSubroutineDIE()) { + for (const auto &R : Die.getAddressRanges()) { + // Ignore 0-sized ranges. + if (R.LowPC == R.HighPC) + continue; + auto B = AddrDieMap.upper_bound(R.LowPC); + if (B != AddrDieMap.begin() && R.LowPC < (--B)->second.first) { + // The range is a sub-range of existing ranges, we need to split the + // existing range. + if (R.HighPC < B->second.first) + AddrDieMap[R.HighPC] = B->second; + if (R.LowPC > B->first) + AddrDieMap[B->first].first = R.LowPC; + } + AddrDieMap[R.LowPC] = std::make_pair(R.HighPC, Die); } } - return DWARFDie(); + // Parent DIEs are added to the AddrDieMap prior to the Children DIEs to + // simplify the logic to update AddrDieMap. The child's range will always + // be equal or smaller than the parent's range. With this assumption, when + // adding one range into the map, it will at most split a range into 3 + // sub-ranges. + for (DWARFDie Child = Die.getFirstChild(); Child; Child = Child.getSibling()) + updateAddressDieMap(Child); +} + +DWARFDie DWARFUnit::getSubroutineForAddress(uint64_t Address) { + extractDIEsIfNeeded(false); + if (AddrDieMap.empty()) + updateAddressDieMap(getUnitDIE()); + auto R = AddrDieMap.upper_bound(Address); + if (R == AddrDieMap.begin()) + return DWARFDie(); + // upper_bound's previous item contains Address. + --R; + if (Address >= R->second.first) + return DWARFDie(); + return R->second.second; } void DWARFUnit::getInlinedChainForAddress(uint64_t Address, SmallVectorImpl &InlinedChain) { - // First, find a subprogram that contains the given address (the root - // of inlined chain). - DWARFDie SubprogramDIE; + assert(InlinedChain.empty()); // Try to look for subprogram DIEs in the DWO file. parseDWO(); - if (DWO) - SubprogramDIE = DWO->getUnit()->getSubprogramForAddress(Address); - else - SubprogramDIE = getSubprogramForAddress(Address); - - // Get inlined chain rooted at this subprogram DIE. - if (SubprogramDIE) - SubprogramDIE.getInlinedChainForAddress(Address, InlinedChain); - else - InlinedChain.clear(); + // First, find the subroutine that contains the given address (the leaf + // of inlined chain). + DWARFDie SubroutineDIE = + (DWO ? DWO.get() : this)->getSubroutineForAddress(Address); + + while (SubroutineDIE) { + if (SubroutineDIE.isSubroutineDIE()) + InlinedChain.push_back(SubroutineDIE); + SubroutineDIE = SubroutineDIE.getParent(); + } } const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context, diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 0981a4dfdfa57d4eceddb0f0df26546d122ff977..59b3d0ca55a635ae8d86c44de9aafe486f39c5b6 100644 --- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp new file mode 100644 index 0000000000000000000000000000000000000000..41907e5705637a57d65ee0eb549cabee173b7ab9 --- /dev/null +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -0,0 +1,311 @@ +//===- DWARFVerifier.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +using namespace llvm; +using namespace dwarf; +using namespace object; + +void DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Attr = AttrValue.Attr; + switch (Attr) { + case DW_AT_ranges: + // Make sure the offset in the DW_AT_ranges attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + case DW_AT_stmt_list: + // Make sure the offset in the DW_AT_stmt_list attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getLineSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx32, *SectionOffset) << "\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + + default: + break; + } +} + +void DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Form = AttrValue.Value.getForm(); + switch (Form) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: { + // Verify all CU relative references are valid CU offsets. + Optional RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + auto DieCU = Die.getDwarfUnit(); + auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); + auto CUOffset = AttrValue.Value.getRawUValue(); + if (CUOffset >= CUSize) { + ++NumDebugInfoErrors; + OS << "error: " << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx32, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_ref_addr: { + // Verify all absolute DIE references have valid offsets in the + // .debug_info section. + Optional RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + if (*RefVal >= DCtx.getInfoSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_strp: { + auto SecOffset = AttrValue.Value.getAsSectionOffset(); + assert(SecOffset); // DW_FORM_strp is a section offset. + if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + } + default: + break; + } +} + +void DWARFVerifier::verifyDebugInfoReferences() { + // Take all references and make sure they point to an actual DIE by + // getting the DIE by offset and emitting an error + OS << "Verifying .debug_info references...\n"; + for (auto Pair : ReferenceToDIEOffsets) { + auto Die = DCtx.getDIEForOffset(Pair.first); + if (Die) + continue; + ++NumDebugInfoErrors; + OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; + for (auto Offset : Pair.second) { + auto ReferencingDie = DCtx.getDIEForOffset(Offset); + ReferencingDie.dump(OS, 0); + OS << "\n"; + } + OS << "\n"; + } +} + +bool DWARFVerifier::handleDebugInfo() { + NumDebugInfoErrors = 0; + OS << "Verifying .debug_info...\n"; + for (const auto &CU : DCtx.compile_units()) { + unsigned NumDies = CU->getNumDIEs(); + for (unsigned I = 0; I < NumDies; ++I) { + auto Die = CU->getDIEAtIndex(I); + const auto Tag = Die.getTag(); + if (Tag == DW_TAG_null) + continue; + for (auto AttrValue : Die.attributes()) { + verifyDebugInfoAttribute(Die, AttrValue); + verifyDebugInfoForm(Die, AttrValue); + } + } + } + verifyDebugInfoReferences(); + return NumDebugInfoErrors == 0; +} + +void DWARFVerifier::verifyDebugLineStmtOffsets() { + std::map StmtListToDie; + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + // Get the attribute value as a section offset. No need to produce an + // error here if the encoding isn't correct because we validate this in + // the .debug_info verifier. + auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list)); + if (!StmtSectionOffset) + continue; + const uint32_t LineTableOffset = *StmtSectionOffset; + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + if (LineTableOffset < DCtx.getLineSection().Data.size()) { + if (!LineTable) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; + Die.dump(OS, 0); + OS << '\n'; + continue; + } + } else { + // Make sure we don't get a valid line table back if the offset is wrong. + assert(LineTable == nullptr); + // Skip this line table as it isn't valid. No need to create an error + // here because we validate this in the .debug_info verifier. + continue; + } + auto Iter = StmtListToDie.find(LineTableOffset); + if (Iter != StmtListToDie.end()) { + ++NumDebugLineErrors; + OS << "error: two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, Die.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; + Iter->second.dump(OS, 0); + Die.dump(OS, 0); + OS << '\n'; + // Already verified this line table before, no need to do it again. + continue; + } + StmtListToDie[LineTableOffset] = Die; + } +} + +void DWARFVerifier::verifyDebugLineRows() { + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + // If there is no line table we will have created an error in the + // .debug_info verifier or in verifyDebugLineStmtOffsets(). + if (!LineTable) + continue; + uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); + uint64_t PrevAddress = 0; + uint32_t RowIndex = 0; + for (const auto &Row : LineTable->Rows) { + if (Row.Address < PrevAddress) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; + + DWARFDebugLine::Row::dumpTableHeader(OS); + if (RowIndex > 0) + LineTable->Rows[RowIndex - 1].dump(OS); + Row.dump(OS); + OS << '\n'; + } + + if (Row.File > MaxFileIndex) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; + DWARFDebugLine::Row::dumpTableHeader(OS); + Row.dump(OS); + OS << '\n'; + } + if (Row.EndSequence) + PrevAddress = 0; + else + PrevAddress = Row.Address; + ++RowIndex; + } + } +} + +bool DWARFVerifier::handleDebugLine() { + NumDebugLineErrors = 0; + OS << "Verifying .debug_line...\n"; + verifyDebugLineStmtOffsets(); + verifyDebugLineRows(); + return NumDebugLineErrors == 0; +} + +bool DWARFVerifier::handleAppleNames() { + NumAppleNamesErrors = 0; + + DataExtractor AppleNamesSection(DCtx.getAppleNamesSection().Data, + DCtx.isLittleEndian(), 0); + DataExtractor StrData(DCtx.getStringSection(), DCtx.isLittleEndian(), 0); + DWARFAcceleratorTable AppleNames(AppleNamesSection, StrData, + DCtx.getAppleNamesSection().Relocs); + + if (!AppleNames.extract()) { + return true; + } + + OS << "Verifying .apple_names...\n"; + + // Verify that all buckets have a valid hash index or are empty + uint32_t NumBuckets = AppleNames.getNumBuckets(); + uint32_t NumHashes = AppleNames.getNumHashes(); + + uint32_t BucketsOffset = + AppleNames.getSizeHdr() + AppleNames.getHeaderDataLength(); + + for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) { + uint32_t HashIdx = AppleNamesSection.getU32(&BucketsOffset); + if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) { + OS << format("error: Bucket[%d] has invalid hash index: [%d]\n", + BucketIdx, HashIdx); + ++NumAppleNamesErrors; + } + } + return NumAppleNamesErrors == 0; +} diff --git a/lib/DebugInfo/DWARF/LLVMBuild.txt b/lib/DebugInfo/DWARF/LLVMBuild.txt index 9f8b1047ef6bf9cf758e84751fed47b3e7709934..8242a7f2e7f77375fa7d2ed9fe873aa9a9722bdd 100644 --- a/lib/DebugInfo/DWARF/LLVMBuild.txt +++ b/lib/DebugInfo/DWARF/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = DebugInfoDWARF parent = DebugInfo -required_libraries = Object Support +required_libraries = BinaryFormat Object Support diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp index 57953cfa338ef07419b683f2e2963197b12c8eb2..faf2442bc94bb9bb4d786185edd23ff90514383d 100644 --- a/lib/DebugInfo/MSF/MappedBlockStream.cpp +++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp @@ -45,45 +45,48 @@ static Interval intersect(const Interval &I1, const Interval &I2) { std::min(I1.second, I2.second)); } -MappedBlockStream::MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, +MappedBlockStream::MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - BinaryStreamRef MsfData) - : BlockSize(BlockSize), NumBlocks(NumBlocks), StreamLayout(Layout), - MsfData(MsfData) {} - -std::unique_ptr -MappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) + : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData), + Allocator(Allocator) {} + +std::unique_ptr MappedBlockStream::createStream( + uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { return llvm::make_unique>( - BlockSize, NumBlocks, Layout, MsfData); + BlockSize, Layout, MsfData, Allocator); } std::unique_ptr MappedBlockStream::createIndexedStream( - const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex) { + const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex, + BumpPtrAllocator &Allocator) { assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index"); MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; return llvm::make_unique>( - Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr MappedBlockStream::createDirectoryStream(const MSFLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr MappedBlockStream::createFpmStream(const MSFLayout &Layout, - BinaryStreamRef MsfData) { + BinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, @@ -149,7 +152,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, // into it, and return an ArrayRef to that. Do not touch existing pool // allocations, as existing clients may be holding a pointer which must // not be invalidated. - uint8_t *WriteBuffer = static_cast(Pool.Allocate(Size, 8)); + uint8_t *WriteBuffer = static_cast(Allocator.Allocate(Size, 8)); if (auto EC = readBytes(Offset, MutableArrayRef(WriteBuffer, Size))) return EC; @@ -173,7 +176,7 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset, uint32_t First = Offset / BlockSize; uint32_t Last = First; - while (Last < NumBlocks - 1) { + while (Last < getNumBlocks() - 1) { if (StreamLayout.Blocks[Last] != StreamLayout.Blocks[Last + 1] - 1) break; ++Last; @@ -270,10 +273,6 @@ Error MappedBlockStream::readBytes(uint32_t Offset, return Error::success(); } -uint32_t MappedBlockStream::getNumBytesCopied() const { - return static_cast(Pool.getBytesAllocated()); -} - void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); } void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset, @@ -313,45 +312,49 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset, } WritableMappedBlockStream::WritableMappedBlockStream( - uint32_t BlockSize, uint32_t NumBlocks, const MSFStreamLayout &Layout, - WritableBinaryStreamRef MsfData) - : ReadInterface(BlockSize, NumBlocks, Layout, MsfData), + uint32_t BlockSize, const MSFStreamLayout &Layout, + WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator) + : ReadInterface(BlockSize, Layout, MsfData, Allocator), WriteInterface(MsfData) {} std::unique_ptr -WritableMappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks, +WritableMappedBlockStream::createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, - WritableBinaryStreamRef MsfData) { + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { return llvm::make_unique>( - BlockSize, NumBlocks, Layout, MsfData); + BlockSize, Layout, MsfData, Allocator); } std::unique_ptr WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, - uint32_t StreamIndex) { + uint32_t StreamIndex, + BumpPtrAllocator &Allocator) { assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index"); MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr WritableMappedBlockStream::createDirectoryStream( - const MSFLayout &Layout, WritableBinaryStreamRef MsfData) { + const MSFLayout &Layout, WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } std::unique_ptr WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout, - WritableBinaryStreamRef MsfData) { + WritableBinaryStreamRef MsfData, + BumpPtrAllocator &Allocator) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator); } Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt index 1295d2a19ce26c5f138458cb631061279736fc58..e9fd29ccc4caf373ac43384440a3c73cbd133ff7 100644 --- a/lib/DebugInfo/PDB/CMakeLists.txt +++ b/lib/DebugInfo/PDB/CMakeLists.txt @@ -28,6 +28,9 @@ if(LLVM_ENABLE_DIA_SDK) endif() add_pdb_impl_folder(Native + Native/DbiModuleDescriptor.cpp + Native/DbiModuleDescriptorBuilder.cpp + Native/DbiModuleList.cpp Native/DbiStream.cpp Native/DbiStreamBuilder.cpp Native/EnumTables.cpp @@ -37,9 +40,7 @@ add_pdb_impl_folder(Native Native/HashTable.cpp Native/InfoStream.cpp Native/InfoStreamBuilder.cpp - Native/ModInfo.cpp - Native/ModInfoBuilder.cpp - Native/ModStream.cpp + Native/ModuleDebugStream.cpp Native/NativeCompilandSymbol.cpp Native/NativeEnumModules.cpp Native/NativeExeSymbol.cpp @@ -48,11 +49,11 @@ add_pdb_impl_folder(Native Native/NativeSession.cpp Native/PDBFile.cpp Native/PDBFileBuilder.cpp + Native/PDBStringTable.cpp + Native/PDBStringTableBuilder.cpp Native/PDBTypeServerHandler.cpp Native/PublicsStream.cpp Native/RawError.cpp - Native/StringTable.cpp - Native/StringTableBuilder.cpp Native/SymbolStream.cpp Native/TpiHashing.cpp Native/TpiStream.cpp @@ -101,6 +102,7 @@ add_llvm_library(LLVMDebugInfoPDB PDBSymbolUnknown.cpp PDBSymbolUsingNamespace.cpp PDBSymDumper.cpp + UDTLayout.cpp ${PDB_IMPL_SOURCES} ADDITIONAL_HEADER_DIRS diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp index cae817c1b367dc337081c03a72701fa579526f6f..f62c4991fe33de3ade543f46c4fd86edf7edc888 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h" +#include "llvm/DebugInfo/PDB/DIA/DIADataStream.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp index 4741d9c9a849917eb93cfe527f8775d0bb058727..796ce214b383817fa4fdc0f31967d9ff50532a78 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h" #include "llvm/DebugInfo/PDB/DIA/DIALineNumber.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp index ccf8c4e622ccadffb3ed453b63300282434ae0fa..b9311d0601287d5d616eba4e39dbb73e06f38012 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h" #include "llvm/DebugInfo/PDB/DIA/DIASourceFile.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp index 3c211b569044957c7e765d9a2a931a5c0939a3ce..266638530c2f7c59aec3ed3e73b837aff8a4cb1c 100644 --- a/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp +++ b/lib/DebugInfo/PDB/DIA/DIAEnumSymbols.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h" #include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h" #include "llvm/DebugInfo/PDB/DIA/DIASession.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" using namespace llvm; using namespace llvm::pdb; diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp index 6182dab213c448cc1725b80e0ea58554ae427a95..0b48a366bd243300bf5e244596f74d43009ec3ca 100644 --- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp +++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp @@ -14,6 +14,10 @@ #include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h" #include "llvm/DebugInfo/PDB/DIA/DIASession.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/raw_ostream.h" @@ -368,8 +372,11 @@ DIARawSymbol::findChildren(PDB_SymType Type) const { enum SymTagEnum EnumVal = static_cast(Type); CComPtr DiaEnumerator; - if (S_OK != Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) - return nullptr; + if (S_OK != + Symbol->findChildrenEx(EnumVal, nullptr, nsNone, &DiaEnumerator)) { + if (S_OK != Symbol->findChildren(EnumVal, nullptr, nsNone, &DiaEnumerator)) + return nullptr; + } return llvm::make_unique(Session, DiaEnumerator); } @@ -717,6 +724,18 @@ uint32_t DIARawSymbol::getVirtualTableShapeId() const { return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId); } +std::unique_ptr +DIARawSymbol::getVirtualBaseTableType() const { + CComPtr TableType; + if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType) + return nullptr; + + auto RawVT = llvm::make_unique(Session, TableType); + auto Pointer = + llvm::make_unique(Session, std::move(RawVT)); + return unique_dyn_cast(Pointer->getPointeeType()); +} + PDB_DataKind DIARawSymbol::getDataKind() const { return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_dataKind); diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp index 7077bda4a534795c9492bef4c021efaac1700d56..ef47b92b4f2f31bfca62cf5e4690fdedc6e69541 100644 --- a/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -21,12 +21,22 @@ #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::pdb; -static Error ErrorFromHResult(HRESULT Result, StringRef Context) { +template +static Error ErrorFromHResult(HRESULT Result, const char *Str, Ts &&... Args) { + SmallString<64> MessageStorage; + StringRef Context; + if (sizeof...(Args) > 0) { + MessageStorage = formatv(Str, std::forward(Args)...).str(); + Context = MessageStorage; + } else + Context = Str; + switch (Result) { case E_PDB_NOT_FOUND: return make_error(generic_error_code::invalid_path, Context); @@ -95,8 +105,9 @@ Error DIASession::createFromPdb(StringRef Path, const wchar_t *Path16Str = reinterpret_cast(Path16.data()); HRESULT HR; - if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str))) - return ErrorFromHResult(HR, "Calling loadDataFromPdb"); + if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str))) { + return ErrorFromHResult(HR, "Calling loadDataFromPdb {0}", Path); + } if (FAILED(HR = DiaDataSource->openSession(&DiaSession))) return ErrorFromHResult(HR, "Calling openSession"); @@ -140,7 +151,7 @@ void DIASession::setLoadAddress(uint64_t Address) { Session->put_loadAddress(Address); } -std::unique_ptr DIASession::getGlobalScope() { +std::unique_ptr DIASession::getGlobalScope() const { CComPtr GlobalScope; if (S_OK != Session->get_globalScope(&GlobalScope)) return nullptr; diff --git a/lib/DebugInfo/PDB/Native/ModInfo.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp similarity index 50% rename from lib/DebugInfo/PDB/Native/ModInfo.cpp rename to lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp index 1405286fd0885f81f9e1ba085797380d7c507ec7..dabcc3447ee5ab6013dcf93d4c49448ea867dd57 100644 --- a/lib/DebugInfo/PDB/Native/ModInfo.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp @@ -1,4 +1,4 @@ -//===- ModInfo.cpp - PDB module information -------------------------------===// +//===- DbiModuleDescriptor.cpp - PDB module information -------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" @@ -19,13 +19,15 @@ using namespace llvm; using namespace llvm::pdb; using namespace llvm::support; -ModInfo::ModInfo() = default; +DbiModuleDescriptor::DbiModuleDescriptor() = default; -ModInfo::ModInfo(const ModInfo &Info) = default; +DbiModuleDescriptor::DbiModuleDescriptor(const DbiModuleDescriptor &Info) = + default; -ModInfo::~ModInfo() = default; +DbiModuleDescriptor::~DbiModuleDescriptor() = default; -Error ModInfo::initialize(BinaryStreamRef Stream, ModInfo &Info) { +Error DbiModuleDescriptor::initialize(BinaryStreamRef Stream, + DbiModuleDescriptor &Info) { BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Info.Layout)) return EC; @@ -38,40 +40,48 @@ Error ModInfo::initialize(BinaryStreamRef Stream, ModInfo &Info) { return Error::success(); } -bool ModInfo::hasECInfo() const { +bool DbiModuleDescriptor::hasECInfo() const { return (Layout->Flags & ModInfoFlags::HasECFlagMask) != 0; } -uint16_t ModInfo::getTypeServerIndex() const { +uint16_t DbiModuleDescriptor::getTypeServerIndex() const { return (Layout->Flags & ModInfoFlags::TypeServerIndexMask) >> ModInfoFlags::TypeServerIndexShift; } -uint16_t ModInfo::getModuleStreamIndex() const { return Layout->ModDiStream; } +uint16_t DbiModuleDescriptor::getModuleStreamIndex() const { + return Layout->ModDiStream; +} -uint32_t ModInfo::getSymbolDebugInfoByteSize() const { +uint32_t DbiModuleDescriptor::getSymbolDebugInfoByteSize() const { return Layout->SymBytes; } -uint32_t ModInfo::getLineInfoByteSize() const { return Layout->LineBytes; } +uint32_t DbiModuleDescriptor::getC11LineInfoByteSize() const { + return Layout->C11Bytes; +} -uint32_t ModInfo::getC13LineInfoByteSize() const { return Layout->C13Bytes; } +uint32_t DbiModuleDescriptor::getC13LineInfoByteSize() const { + return Layout->C13Bytes; +} -uint32_t ModInfo::getNumberOfFiles() const { return Layout->NumFiles; } +uint32_t DbiModuleDescriptor::getNumberOfFiles() const { + return Layout->NumFiles; +} -uint32_t ModInfo::getSourceFileNameIndex() const { +uint32_t DbiModuleDescriptor::getSourceFileNameIndex() const { return Layout->SrcFileNameNI; } -uint32_t ModInfo::getPdbFilePathNameIndex() const { +uint32_t DbiModuleDescriptor::getPdbFilePathNameIndex() const { return Layout->PdbFilePathNI; } -StringRef ModInfo::getModuleName() const { return ModuleName; } +StringRef DbiModuleDescriptor::getModuleName() const { return ModuleName; } -StringRef ModInfo::getObjFileName() const { return ObjFileName; } +StringRef DbiModuleDescriptor::getObjFileName() const { return ObjFileName; } -uint32_t ModInfo::getRecordLength() const { +uint32_t DbiModuleDescriptor::getRecordLength() const { uint32_t M = ModuleName.str().size() + 1; uint32_t O = ObjFileName.str().size() + 1; uint32_t Size = sizeof(ModuleInfoHeader) + M + O; diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..81a9d3eeec6199ac72535a005f20f6c018dc6dfb --- /dev/null +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -0,0 +1,184 @@ +//===- DbiModuleDescriptorBuilder.cpp - PDB Mod Info Creation ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/BinaryItemStream.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::msf; +using namespace llvm::pdb; + +namespace llvm { +template <> struct BinaryItemTraits { + static size_t length(const CVSymbol &Item) { return Item.RecordData.size(); } + + static ArrayRef bytes(const CVSymbol &Item) { + return Item.RecordData; + } +}; +} + +static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, + uint32_t C13Size) { + uint32_t Size = sizeof(uint32_t); // Signature + Size += alignTo(SymbolByteSize, 4); // Symbol Data + Size += 0; // TODO: Layout.C11Bytes + Size += C13Size; // C13 Debug Info Size + Size += sizeof(uint32_t); // GlobalRefs substream size (always 0) + Size += 0; // GlobalRefs substream bytes + return Size; +} + +DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName, + uint32_t ModIndex, + msf::MSFBuilder &Msf) + : MSF(Msf), ModuleName(ModuleName) { + Layout.Mod = ModIndex; +} + +DbiModuleDescriptorBuilder::~DbiModuleDescriptorBuilder() {} + +uint16_t DbiModuleDescriptorBuilder::getStreamIndex() const { + return Layout.ModDiStream; +} + +void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { + ObjFileName = Name; +} + +void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { + Symbols.push_back(Symbol); + // Symbols written to a PDB file are required to be 4 byte aligned. The same + // is not true of object files. + assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 && + "Invalid Symbol alignment!"); + SymbolByteSize += Symbol.length(); +} + +void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { + SourceFiles.push_back(Path); +} + +uint32_t DbiModuleDescriptorBuilder::calculateC13DebugInfoSize() const { + uint32_t Result = 0; + for (const auto &Builder : C13Builders) { + assert(Builder && "Empty C13 Fragment Builder!"); + Result += Builder->calculateSerializedLength(); + } + return Result; +} + +uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const { + uint32_t L = sizeof(Layout); + uint32_t M = ModuleName.size() + 1; + uint32_t O = ObjFileName.size() + 1; + return alignTo(L + M + O, sizeof(uint32_t)); +} + +template struct Foo { + explicit Foo(T &&Answer) : Answer(Answer) {} + + T Answer; +}; + +template Foo makeFoo(T &&t) { return Foo(std::move(t)); } + +void DbiModuleDescriptorBuilder::finalize() { + Layout.FileNameOffs = 0; // TODO: Fix this + Layout.Flags = 0; // TODO: Fix this + Layout.C11Bytes = 0; + Layout.C13Bytes = calculateC13DebugInfoSize(); + (void)Layout.Mod; // Set in constructor + (void)Layout.ModDiStream; // Set in finalizeMsfLayout + Layout.NumFiles = SourceFiles.size(); + Layout.PdbFilePathNI = 0; + Layout.SrcFileNameNI = 0; + + // This value includes both the signature field as well as the record bytes + // from the symbol stream. + Layout.SymBytes = SymbolByteSize + sizeof(uint32_t); +} + +Error DbiModuleDescriptorBuilder::finalizeMsfLayout() { + this->Layout.ModDiStream = kInvalidStreamIndex; + uint32_t C13Size = calculateC13DebugInfoSize(); + auto ExpectedSN = + MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize, C13Size)); + if (!ExpectedSN) + return ExpectedSN.takeError(); + Layout.ModDiStream = *ExpectedSN; + return Error::success(); +} + +Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, + const msf::MSFLayout &MsfLayout, + WritableBinaryStreamRef MsfBuffer) { + // We write the Modi record to the `ModiWriter`, but we additionally write its + // symbol stream to a brand new stream. + if (auto EC = ModiWriter.writeObject(Layout)) + return EC; + if (auto EC = ModiWriter.writeCString(ModuleName)) + return EC; + if (auto EC = ModiWriter.writeCString(ObjFileName)) + return EC; + if (auto EC = ModiWriter.padToAlignment(sizeof(uint32_t))) + return EC; + + if (Layout.ModDiStream != kInvalidStreamIndex) { + auto NS = WritableMappedBlockStream::createIndexedStream( + MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator()); + WritableBinaryStreamRef Ref(*NS); + BinaryStreamWriter SymbolWriter(Ref); + // Write the symbols. + if (auto EC = + SymbolWriter.writeInteger(COFF::DEBUG_SECTION_MAGIC)) + return EC; + BinaryItemStream Records(llvm::support::endianness::little); + Records.setItems(Symbols); + BinaryStreamRef RecordsRef(Records); + if (auto EC = SymbolWriter.writeStreamRef(RecordsRef)) + return EC; + if (auto EC = SymbolWriter.padToAlignment(4)) + return EC; + // TODO: Write C11 Line data + assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 && + "Invalid debug section alignment!"); + for (const auto &Builder : C13Builders) { + assert(Builder && "Empty C13 Fragment Builder!"); + if (auto EC = Builder->commit(SymbolWriter)) + return EC; + } + + // TODO: Figure out what GlobalRefs substream actually is and populate it. + if (auto EC = SymbolWriter.writeInteger(0)) + return EC; + if (SymbolWriter.bytesRemaining() > 0) + return make_error(raw_error_code::stream_too_long); + } + return Error::success(); +} + +void DbiModuleDescriptorBuilder::addDebugSubsection( + std::shared_ptr Subsection) { + assert(Subsection); + C13Builders.push_back(llvm::make_unique( + std::move(Subsection), CodeViewContainer::Pdb)); +} diff --git a/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp new file mode 100644 index 0000000000000000000000000000000000000000..434f775097e0490a1e3c92c291501ed936b79afb --- /dev/null +++ b/lib/DebugInfo/PDB/Native/DbiModuleList.cpp @@ -0,0 +1,273 @@ +//===- DbiModuleList.cpp - PDB module information list ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" + +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/Support/Error.h" + +using namespace llvm; +using namespace llvm::pdb; + +DbiModuleSourceFilesIterator::DbiModuleSourceFilesIterator( + const DbiModuleList &Modules, uint32_t Modi, uint16_t Filei) + : Modules(&Modules), Modi(Modi), Filei(Filei) { + setValue(); +} + +bool DbiModuleSourceFilesIterator:: +operator==(const DbiModuleSourceFilesIterator &R) const { + // incompatible iterators are never equal + if (!isCompatible(R)) + return false; + + // If they're compatible, and they're both ends, then they're equal. + if (isEnd() && R.isEnd()) + return true; + + // If one is an end and the other is not, they're not equal. + if (isEnd() != R.isEnd()) + return false; + + // Now we know: + // - They're compatible + // - They're not *both* end iterators + // - Their endness is the same. + // Thus, they're compatible iterators pointing to a valid file on the same + // module. All we need to check are the file indices. + assert(Modules == R.Modules); + assert(Modi == R.Modi); + assert(!isEnd()); + assert(!R.isEnd()); + + return (Filei == R.Filei); +} + +bool DbiModuleSourceFilesIterator:: +operator<(const DbiModuleSourceFilesIterator &R) const { + assert(isCompatible(R)); + + // It's not sufficient to compare the file indices, because default + // constructed iterators could be equal to iterators with valid indices. To + // account for this, early-out if they're equal. + if (*this == R) + return false; + + return Filei < R.Filei; +} + +std::ptrdiff_t DbiModuleSourceFilesIterator:: +operator-(const DbiModuleSourceFilesIterator &R) const { + assert(isCompatible(R)); + assert(!(*this < R)); + + // If they're both end iterators, the distance is 0. + if (isEnd() && R.isEnd()) + return 0; + + assert(!R.isEnd()); + + // At this point, R cannot be end, but *this can, which means that *this + // might be a universal end iterator with none of its fields set. So in that + // case have to rely on R as the authority to figure out how many files there + // are to compute the distance. + uint32_t Thisi = Filei; + if (isEnd()) { + uint32_t RealModi = R.Modi; + Thisi = R.Modules->getSourceFileCount(RealModi); + } + + assert(Thisi >= R.Filei); + return Thisi - R.Filei; +} + +DbiModuleSourceFilesIterator &DbiModuleSourceFilesIterator:: +operator+=(std::ptrdiff_t N) { + assert(!isEnd()); + + Filei += N; + assert(Filei <= Modules->getSourceFileCount(Modi)); + setValue(); + return *this; +} + +DbiModuleSourceFilesIterator &DbiModuleSourceFilesIterator:: +operator-=(std::ptrdiff_t N) { + // Note that we can subtract from an end iterator, but not a universal end + // iterator. + assert(!isUniversalEnd()); + + assert(N <= Filei); + + Filei -= N; + return *this; +} + +void DbiModuleSourceFilesIterator::setValue() { + if (isEnd()) { + ThisValue = ""; + return; + } + + uint32_t Off = Modules->ModuleInitialFileIndex[Modi] + Filei; + auto ExpectedValue = Modules->getFileName(Off); + if (!ExpectedValue) { + consumeError(ExpectedValue.takeError()); + Filei = Modules->getSourceFileCount(Modi); + } else + ThisValue = *ExpectedValue; +} + +bool DbiModuleSourceFilesIterator::isEnd() const { + if (isUniversalEnd()) + return true; + + assert(Modules); + assert(Modi <= Modules->getModuleCount()); + assert(Filei <= Modules->getSourceFileCount(Modi)); + + if (Modi == Modules->getModuleCount()) + return true; + if (Filei == Modules->getSourceFileCount(Modi)) + return true; + return false; +} + +bool DbiModuleSourceFilesIterator::isUniversalEnd() const { return !Modules; } + +bool DbiModuleSourceFilesIterator::isCompatible( + const DbiModuleSourceFilesIterator &R) const { + // Universal iterators are compatible with any other iterator. + if (isUniversalEnd() || R.isUniversalEnd()) + return true; + + // At this point, neither iterator is a universal end iterator, although one + // or both might be non-universal end iterators. Regardless, the module index + // is valid, so they are compatible if and only if they refer to the same + // module. + return Modi == R.Modi; +} + +Error DbiModuleList::initialize(BinaryStreamRef ModInfo, + BinaryStreamRef FileInfo) { + if (auto EC = initializeModInfo(ModInfo)) + return EC; + if (auto EC = initializeFileInfo(FileInfo)) + return EC; + + return Error::success(); +} + +Error DbiModuleList::initializeModInfo(BinaryStreamRef ModInfo) { + ModInfoSubstream = ModInfo; + + if (ModInfo.getLength() == 0) + return Error::success(); + + BinaryStreamReader Reader(ModInfo); + + if (auto EC = Reader.readArray(Descriptors, ModInfo.getLength())) + return EC; + + return Error::success(); +} + +Error DbiModuleList::initializeFileInfo(BinaryStreamRef FileInfo) { + FileInfoSubstream = FileInfo; + + if (FileInfo.getLength() == 0) + return Error::success(); + + BinaryStreamReader FISR(FileInfo); + if (auto EC = FISR.readObject(FileInfoHeader)) + return EC; + + // First is an array of `NumModules` module indices. This does not seem to be + // used for anything meaningful, so we ignore it. + FixedStreamArray ModuleIndices; + if (auto EC = FISR.readArray(ModuleIndices, FileInfoHeader->NumModules)) + return EC; + if (auto EC = FISR.readArray(ModFileCountArray, FileInfoHeader->NumModules)) + return EC; + + // Compute the real number of source files. We can't trust the value in + // `FileInfoHeader->NumSourceFiles` because it is a unit16, and the sum of all + // source file counts might be larger than a unit16. So we compute the real + // count by summing up the individual counts. + uint32_t NumSourceFiles = 0; + for (auto Count : ModFileCountArray) + NumSourceFiles += Count; + + // In the reference implementation, this array is where the pointer documented + // at the definition of ModuleInfoHeader::FileNameOffs points to. Note that + // although the field in ModuleInfoHeader is ignored this array is not, as it + // is the authority on where each filename begins in the names buffer. + if (auto EC = FISR.readArray(FileNameOffsets, NumSourceFiles)) + return EC; + + if (auto EC = FISR.readStreamRef(NamesBuffer)) + return EC; + + auto DescriptorIter = Descriptors.begin(); + uint32_t NextFileIndex = 0; + ModuleInitialFileIndex.resize(FileInfoHeader->NumModules); + ModuleDescriptorOffsets.resize(FileInfoHeader->NumModules); + for (size_t I = 0; I < FileInfoHeader->NumModules; ++I) { + assert(DescriptorIter != Descriptors.end()); + ModuleInitialFileIndex[I] = NextFileIndex; + ModuleDescriptorOffsets[I] = DescriptorIter.offset(); + + NextFileIndex += ModFileCountArray[I]; + ++DescriptorIter; + } + + assert(DescriptorIter == Descriptors.end()); + assert(NextFileIndex == NumSourceFiles); + + return Error::success(); +} + +uint32_t DbiModuleList::getModuleCount() const { + return FileInfoHeader->NumModules; +} + +uint32_t DbiModuleList::getSourceFileCount() const { + return FileNameOffsets.size(); +} + +uint16_t DbiModuleList::getSourceFileCount(uint32_t Modi) const { + return ModFileCountArray[Modi]; +} + +DbiModuleDescriptor DbiModuleList::getModuleDescriptor(uint32_t Modi) const { + assert(Modi < getModuleCount()); + uint32_t Offset = ModuleDescriptorOffsets[Modi]; + auto Iter = Descriptors.at(Offset); + assert(Iter != Descriptors.end()); + return *Iter; +} + +iterator_range +DbiModuleList::source_files(uint32_t Modi) const { + return make_range( + DbiModuleSourceFilesIterator(*this, Modi, 0), + DbiModuleSourceFilesIterator()); +} + +Expected DbiModuleList::getFileName(uint32_t Index) const { + BinaryStreamReader Names(NamesBuffer); + if (Index >= getSourceFileCount()) + return make_error(raw_error_code::index_out_of_bounds); + + uint32_t FileOffset = FileNameOffsets[Index]; + Names.setOffset(FileOffset); + StringRef Name; + if (auto EC = Names.readCString(Name)) + return std::move(EC); + return Name; +} diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index b9f53578d3267ffc173bcdb9f3eeb115d0525154..24322d942facc90f8e031b0344dc1b62e18a8037 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -10,9 +10,9 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" @@ -72,14 +72,6 @@ Error DbiStream::reload() { return make_error(raw_error_code::feature_unsupported, "Unsupported DBI version."); - auto IS = Pdb.getPDBInfoStream(); - if (!IS) - return IS.takeError(); - - if (Header->Age != IS->getAge()) - return make_error(raw_error_code::corrupt_file, - "DBI Age does not match PDB Age."); - if (Stream->getLength() != sizeof(DbiStreamHeader) + Header->ModiSubstreamSize + Header->SecContrSubstreamSize + Header->SectionMapSize + @@ -107,11 +99,11 @@ Error DbiStream::reload() { return make_error(raw_error_code::corrupt_file, "DBI type server substream not aligned."); + BinaryStreamRef ModInfoSubstream; + BinaryStreamRef FileInfoSubstream; if (auto EC = Reader.readStreamRef(ModInfoSubstream, Header->ModiSubstreamSize)) return EC; - if (auto EC = initializeModInfoArray()) - return EC; if (auto EC = Reader.readStreamRef(SecContrSubstream, Header->SecContrSubstreamSize)) @@ -129,14 +121,15 @@ Error DbiStream::reload() { DbgStreams, Header->OptionalDbgHdrSize / sizeof(ulittle16_t))) return EC; + if (auto EC = Modules.initialize(ModInfoSubstream, FileInfoSubstream)) + return EC; + if (auto EC = initializeSectionContributionData()) return EC; if (auto EC = initializeSectionHeadersData()) return EC; if (auto EC = initializeSectionMapData()) return EC; - if (auto EC = initializeFileInfo()) - return EC; if (auto EC = initializeFpoRecords()) return EC; @@ -146,7 +139,7 @@ Error DbiStream::reload() { if (ECSubstream.getLength() > 0) { BinaryStreamReader ECReader(ECSubstream); - if (auto EC = ECNames.load(ECReader)) + if (auto EC = ECNames.reload(ECReader)) return EC; } @@ -215,17 +208,20 @@ FixedStreamArray DbiStream::getFpoRecords() { return FpoRecords; } -ArrayRef DbiStream::modules() const { return ModuleInfos; } +const DbiModuleList &DbiStream::modules() const { return Modules; } + FixedStreamArray DbiStream::getSectionMap() const { return SectionMap; } void DbiStream::visitSectionContributions( ISectionContribVisitor &Visitor) const { - if (SectionContribVersion == DbiSecContribVer60) { + if (!SectionContribs.empty()) { + assert(SectionContribVersion == DbiSecContribVer60); for (auto &SC : SectionContribs) Visitor.visit(SC); - } else if (SectionContribVersion == DbiSecContribV2) { + } else if (!SectionContribs2.empty()) { + assert(SectionContribVersion == DbiSecContribV2); for (auto &SC : SectionContribs2) Visitor.visit(SC); } @@ -248,24 +244,6 @@ Error DbiStream::initializeSectionContributionData() { "Unsupported DBI Section Contribution version"); } -Error DbiStream::initializeModInfoArray() { - if (ModInfoSubstream.getLength() == 0) - return Error::success(); - - // Since each ModInfo in the stream is a variable length, we have to iterate - // them to know how many there actually are. - BinaryStreamReader Reader(ModInfoSubstream); - - VarStreamArray ModInfoArray; - if (auto EC = Reader.readArray(ModInfoArray, ModInfoSubstream.getLength())) - return EC; - for (auto &Info : ModInfoArray) { - ModuleInfos.emplace_back(Info); - } - - return Error::success(); -} - // Initializes this->SectionHeaders. Error DbiStream::initializeSectionHeadersData() { if (DbgStreams.size() == 0) @@ -276,7 +254,7 @@ Error DbiStream::initializeSectionHeadersData() { return make_error(raw_error_code::no_stream); auto SHS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator()); size_t StreamLen = SHS->getLength(); if (StreamLen % sizeof(object::coff_section)) @@ -308,7 +286,7 @@ Error DbiStream::initializeFpoRecords() { return make_error(raw_error_code::no_stream); auto FS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator()); size_t StreamLen = FS->getLength(); if (StreamLen % sizeof(object::FpoData)) @@ -337,88 +315,9 @@ Error DbiStream::initializeSectionMapData() { return Error::success(); } -Error DbiStream::initializeFileInfo() { - if (FileInfoSubstream.getLength() == 0) - return Error::success(); - - const FileInfoSubstreamHeader *FH; - BinaryStreamReader FISR(FileInfoSubstream); - if (auto EC = FISR.readObject(FH)) - return EC; - - // The number of modules in the stream should be the same as reported by - // the FileInfoSubstreamHeader. - if (FH->NumModules != ModuleInfos.size()) - return make_error(raw_error_code::corrupt_file, - "FileInfo substream count doesn't match DBI."); - - FixedStreamArray ModIndexArray; - FixedStreamArray ModFileCountArray; - - // First is an array of `NumModules` module indices. This is not used for the - // same reason that `NumSourceFiles` is not used. It's an array of uint16's, - // but it's possible there are more than 64k source files, which would imply - // more than 64k modules (e.g. object files) as well. So we ignore this - // field. - if (auto EC = FISR.readArray(ModIndexArray, ModuleInfos.size())) - return EC; - if (auto EC = FISR.readArray(ModFileCountArray, ModuleInfos.size())) - return EC; - - // Compute the real number of source files. - uint32_t NumSourceFiles = 0; - for (auto Count : ModFileCountArray) - NumSourceFiles += Count; - - // This is the array that in the reference implementation corresponds to - // `ModInfo::FileLayout::FileNameOffs`, which is commented there as being a - // pointer. Due to the mentioned problems of pointers causing difficulty - // when reading from the file on 64-bit systems, we continue to ignore that - // field in `ModInfo`, and instead build a vector of StringRefs and stores - // them in `ModuleInfoEx`. The value written to and read from the file is - // not used anyway, it is only there as a way to store the offsets for the - // purposes of later accessing the names at runtime. - if (auto EC = FISR.readArray(FileNameOffsets, NumSourceFiles)) - return EC; - - if (auto EC = FISR.readStreamRef(NamesBuffer)) - return EC; - - // We go through each ModuleInfo, determine the number N of source files for - // that module, and then get the next N offsets from the Offsets array, using - // them to get the corresponding N names from the Names buffer and associating - // each one with the corresponding module. - uint32_t NextFileIndex = 0; - for (size_t I = 0; I < ModuleInfos.size(); ++I) { - uint32_t NumFiles = ModFileCountArray[I]; - ModuleInfos[I].SourceFiles.resize(NumFiles); - for (size_t J = 0; J < NumFiles; ++J, ++NextFileIndex) { - auto ThisName = getFileNameForIndex(NextFileIndex); - if (!ThisName) - return ThisName.takeError(); - ModuleInfos[I].SourceFiles[J] = *ThisName; - } - } - - return Error::success(); -} - uint32_t DbiStream::getDebugStreamIndex(DbgHeaderType Type) const { uint16_t T = static_cast(Type); if (T >= DbgStreams.size()) return kInvalidStreamIndex; return DbgStreams[T]; } - -Expected DbiStream::getFileNameForIndex(uint32_t Index) const { - BinaryStreamReader Names(NamesBuffer); - if (Index >= FileNameOffsets.size()) - return make_error(raw_error_code::index_out_of_bounds); - - uint32_t FileOffset = FileNameOffsets[Index]; - Names.setOffset(FileOffset); - StringRef Name; - if (auto EC = Names.readCString(Name)) - return std::move(EC); - return Name; -} diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index a203aea60fe7ae0f18d23aca7d721452f01ed423..e7304b444f23f896e588b1597410ed014dbc0ecf 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -10,14 +10,14 @@ #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" -#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Object/COFF.h" #include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/COFF.h" using namespace llvm; using namespace llvm::codeview; @@ -45,10 +45,6 @@ void DbiStreamBuilder::setFlags(uint16_t F) { Flags = F; } void DbiStreamBuilder::setMachineType(PDB_Machine M) { MachineType = M; } -void DbiStreamBuilder::setSectionContribs(ArrayRef Arr) { - SectionContribs = Arr; -} - void DbiStreamBuilder::setSectionMap(ArrayRef SecMap) { SectionMap = SecMap; } @@ -74,10 +70,11 @@ uint32_t DbiStreamBuilder::calculateSerializedLength() const { calculateSectionMapStreamSize() + calculateDbgStreamsSize(); } -Expected +Expected DbiStreamBuilder::addModuleInfo(StringRef ModuleName) { uint32_t Index = ModiList.size(); - auto MIB = llvm::make_unique(ModuleName, Index, Msf); + auto MIB = + llvm::make_unique(ModuleName, Index, Msf); auto M = MIB.get(); auto Result = ModiMap.insert(std::make_pair(ModuleName, std::move(MIB))); @@ -100,6 +97,14 @@ Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) { return Error::success(); } +Expected DbiStreamBuilder::getSourceFileNameIndex(StringRef File) { + auto NameIter = SourceFileNames.find(File); + if (NameIter == SourceFileNames.end()) + return make_error(raw_error_code::no_entry, + "The specified source file was not found"); + return NameIter->getValue(); +} + uint32_t DbiStreamBuilder::calculateModiSubstreamSize() const { uint32_t Size = 0; for (const auto &M : ModiList) @@ -120,16 +125,21 @@ uint32_t DbiStreamBuilder::calculateSectionMapStreamSize() const { return sizeof(SecMapHeader) + sizeof(SecMapEntry) * SectionMap.size(); } -uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const { - uint32_t Size = 0; - Size += sizeof(ulittle16_t); // NumModules - Size += sizeof(ulittle16_t); // NumSourceFiles - Size += ModiList.size() * sizeof(ulittle16_t); // ModIndices - Size += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts +uint32_t DbiStreamBuilder::calculateNamesOffset() const { + uint32_t Offset = 0; + Offset += sizeof(ulittle16_t); // NumModules + Offset += sizeof(ulittle16_t); // NumSourceFiles + Offset += ModiList.size() * sizeof(ulittle16_t); // ModIndices + Offset += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts uint32_t NumFileInfos = 0; for (const auto &M : ModiList) NumFileInfos += M->source_files().size(); - Size += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets + Offset += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets + return Offset; +} + +uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const { + uint32_t Size = calculateNamesOffset(); Size += calculateNamesBufferSize(); return alignTo(Size, sizeof(uint32_t)); } @@ -148,9 +158,8 @@ uint32_t DbiStreamBuilder::calculateDbgStreamsSize() const { Error DbiStreamBuilder::generateFileInfoSubstream() { uint32_t Size = calculateFileInfoSubstreamSize(); - uint32_t NameSize = calculateNamesBufferSize(); auto Data = Allocator.Allocate(Size); - uint32_t NamesOffset = Size - NameSize; + uint32_t NamesOffset = calculateNamesOffset(); FileInfoBuffer = MutableBinaryByteStream(MutableArrayRef(Data, Size), llvm::support::little); @@ -198,6 +207,9 @@ Error DbiStreamBuilder::generateFileInfoSubstream() { } } + if (auto EC = NameBufferWriter.padToAlignment(sizeof(uint32_t))) + return EC; + if (NameBufferWriter.bytesRemaining() > 0) return make_error(raw_error_code::invalid_format, "The names buffer contained unexpected data."); @@ -277,23 +289,17 @@ static uint16_t toSecMapFlags(uint32_t Flags) { return Ret; } -// A utility function to create Section Contributions -// for a given input sections. -std::vector DbiStreamBuilder::createSectionContribs( - ArrayRef SecHdrs) { - std::vector Ret; - - // Create a SectionContrib for each input section. - for (auto &Sec : SecHdrs) { - Ret.emplace_back(); - auto &Entry = Ret.back(); - memset(&Entry, 0, sizeof(Entry)); - - Entry.Off = Sec.PointerToRawData; - Entry.Size = Sec.SizeOfRawData; - Entry.Characteristics = Sec.Characteristics; - } - return Ret; +void DbiStreamBuilder::addSectionContrib(DbiModuleDescriptorBuilder *ModuleDbi, + const object::coff_section *SecHdr) { + SectionContrib SC; + memset(&SC, 0, sizeof(SC)); + SC.ISect = (uint16_t)~0U; // This represents nil. + SC.Off = SecHdr->PointerToRawData; + SC.Size = SecHdr->SizeOfRawData; + SC.Characteristics = SecHdr->Characteristics; + // Use the module index in the module dbi stream or nil (-1). + SC.Imod = ModuleDbi ? ModuleDbi->getModuleIndex() : (uint16_t)~0U; + SectionContribs.emplace_back(SC); } // A utility function to create a Section Map for a given list of COFF sections. @@ -341,8 +347,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = finalize()) return EC; - auto DbiS = WritableMappedBlockStream::createIndexedStream(Layout, MsfBuffer, - StreamDBI); + auto DbiS = WritableMappedBlockStream::createIndexedStream( + Layout, MsfBuffer, StreamDBI, Allocator); BinaryStreamWriter Writer(*DbiS); if (auto EC = Writer.writeObject(*Header)) @@ -356,7 +362,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (!SectionContribs.empty()) { if (auto EC = Writer.writeEnum(DbiSecContribVer60)) return EC; - if (auto EC = Writer.writeArray(SectionContribs)) + if (auto EC = Writer.writeArray(makeArrayRef(SectionContribs))) return EC; } @@ -380,7 +386,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (Stream.StreamNumber == kInvalidStreamIndex) continue; auto WritableStream = WritableMappedBlockStream::createIndexedStream( - Layout, MsfBuffer, Stream.StreamNumber); + Layout, MsfBuffer, Stream.StreamNumber, Allocator); BinaryStreamWriter DbgStreamWriter(*WritableStream); if (auto EC = DbgStreamWriter.writeArray(Stream.Data)) return EC; diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp index 2a1d12e82390257c52543c0b33f4b818398f2048..a3979d480bf452b096387f02f297fbae2a56ca08 100644 --- a/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -79,6 +79,7 @@ Error InfoStream::reload() { break; case uint32_t(PdbRaw_FeatureSig::MinimalDebugInfo): Features |= PdbFeatureMinimalDebugInfo; + break; default: continue; } @@ -101,6 +102,10 @@ InfoStream::named_streams() const { return NamedStreams.entries(); } +bool InfoStream::containsIdStream() const { + return !!(Features & PdbFeatureContainsIdStream); +} + PdbRaw_ImplVer InfoStream::getVersion() const { return static_cast(Version); } diff --git a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp index f019d410328a84357d30f0876da42efbf8f19012..707128f7efd45761872fbfd594938cc2cf41392a 100644 --- a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -50,8 +50,8 @@ Error InfoStreamBuilder::finalizeMsfLayout() { Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer) const { - auto InfoS = - WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB); + auto InfoS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, StreamPDB, Msf.getAllocator()); BinaryStreamWriter Writer(*InfoS); InfoStreamHeader H; diff --git a/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp b/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp deleted file mode 100644 index 73c45a9535202d786bf0395b8c9e0c2d2e6b10a4..0000000000000000000000000000000000000000 --- a/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp +++ /dev/null @@ -1,136 +0,0 @@ -//===- ModInfoBuilder.cpp - PDB Module Info Stream Creation -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h" - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/MSF/MSFBuilder.h" -#include "llvm/DebugInfo/MSF/MSFCommon.h" -#include "llvm/DebugInfo/MSF/MappedBlockStream.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" -#include "llvm/DebugInfo/PDB/Native/RawConstants.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/Support/BinaryItemStream.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/COFF.h" - -using namespace llvm; -using namespace llvm::codeview; -using namespace llvm::msf; -using namespace llvm::pdb; - -namespace llvm { -template <> struct BinaryItemTraits { - static size_t length(const CVSymbol &Item) { return Item.RecordData.size(); } - - static ArrayRef bytes(const CVSymbol &Item) { - return Item.RecordData; - } -}; -} - -static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize) { - uint32_t Size = sizeof(uint32_t); // Signature - Size += SymbolByteSize; // Symbol Data - Size += 0; // TODO: Layout.LineBytes - Size += 0; // TODO: Layout.C13Bytes - Size += sizeof(uint32_t); // GlobalRefs substream size (always 0) - Size += 0; // GlobalRefs substream bytes - return Size; -} - -ModInfoBuilder::ModInfoBuilder(StringRef ModuleName, uint32_t ModIndex, - msf::MSFBuilder &Msf) - : MSF(Msf), ModuleName(ModuleName) { - Layout.Mod = ModIndex; -} - -uint16_t ModInfoBuilder::getStreamIndex() const { return Layout.ModDiStream; } - -void ModInfoBuilder::setObjFileName(StringRef Name) { ObjFileName = Name; } - -void ModInfoBuilder::addSymbol(CVSymbol Symbol) { - Symbols.push_back(Symbol); - SymbolByteSize += Symbol.data().size(); -} - -void ModInfoBuilder::addSourceFile(StringRef Path) { - SourceFiles.push_back(Path); -} - -uint32_t ModInfoBuilder::calculateSerializedLength() const { - uint32_t L = sizeof(Layout); - uint32_t M = ModuleName.size() + 1; - uint32_t O = ObjFileName.size() + 1; - return alignTo(L + M + O, sizeof(uint32_t)); -} - -void ModInfoBuilder::finalize() { - Layout.C13Bytes = 0; - Layout.FileNameOffs = 0; // TODO: Fix this - Layout.Flags = 0; // TODO: Fix this - Layout.LineBytes = 0; - (void)Layout.Mod; // Set in constructor - (void)Layout.ModDiStream; // Set in finalizeMsfLayout - Layout.NumFiles = SourceFiles.size(); - Layout.PdbFilePathNI = 0; - Layout.SrcFileNameNI = 0; - - // This value includes both the signature field as well as the record bytes - // from the symbol stream. - Layout.SymBytes = SymbolByteSize + sizeof(uint32_t); -} - -Error ModInfoBuilder::finalizeMsfLayout() { - this->Layout.ModDiStream = kInvalidStreamIndex; - auto ExpectedSN = MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize)); - if (!ExpectedSN) - return ExpectedSN.takeError(); - Layout.ModDiStream = *ExpectedSN; - return Error::success(); -} - -Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter, - const msf::MSFLayout &MsfLayout, - WritableBinaryStreamRef MsfBuffer) { - // We write the Modi record to the `ModiWriter`, but we additionally write its - // symbol stream to a brand new stream. - if (auto EC = ModiWriter.writeObject(Layout)) - return EC; - if (auto EC = ModiWriter.writeCString(ModuleName)) - return EC; - if (auto EC = ModiWriter.writeCString(ObjFileName)) - return EC; - if (auto EC = ModiWriter.padToAlignment(sizeof(uint32_t))) - return EC; - - if (Layout.ModDiStream != kInvalidStreamIndex) { - auto NS = WritableMappedBlockStream::createIndexedStream( - MsfLayout, MsfBuffer, Layout.ModDiStream); - WritableBinaryStreamRef Ref(*NS); - BinaryStreamWriter SymbolWriter(Ref); - // Write the symbols. - if (auto EC = - SymbolWriter.writeInteger(COFF::DEBUG_SECTION_MAGIC)) - return EC; - BinaryItemStream Records(llvm::support::endianness::little); - Records.setItems(Symbols); - BinaryStreamRef RecordsRef(Records); - if (auto EC = SymbolWriter.writeStreamRef(RecordsRef)) - return EC; - // TODO: Write C11 Line data - // TODO: Write C13 Line data - // TODO: Figure out what GlobalRefs substream actually is and populate it. - if (auto EC = SymbolWriter.writeInteger(0)) - return EC; - if (SymbolWriter.bytesRemaining() > 0) - return make_error(raw_error_code::stream_too_long); - } - return Error::success(); -} diff --git a/lib/DebugInfo/PDB/Native/ModStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp similarity index 55% rename from lib/DebugInfo/PDB/Native/ModStream.cpp rename to lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp index 08798cf0ed286f63fd3f101393d0173f345492f7..4186f2eb6ba0167d2281ab2b3458ebaa577206de 100644 --- a/lib/DebugInfo/PDB/Native/ModStream.cpp +++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp @@ -1,4 +1,4 @@ -//===- ModStream.cpp - PDB Module Info Stream Access ----------------------===// +//===- ModuleDebugStream.cpp - PDB Module Info Stream Access --------------===// // // The LLVM Compiler Infrastructure // @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/DebugInfo/PDB/Native/ModStream.h" +#include "llvm/DebugInfo/PDB/Native/ModuleDebugStream.h" #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/PDB/Native/ModInfo.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" @@ -21,20 +21,22 @@ #include using namespace llvm; +using namespace llvm::codeview; using namespace llvm::msf; using namespace llvm::pdb; -ModStream::ModStream(const ModInfo &Module, - std::unique_ptr Stream) +ModuleDebugStreamRef::ModuleDebugStreamRef( + const DbiModuleDescriptor &Module, + std::unique_ptr Stream) : Mod(Module), Stream(std::move(Stream)) {} -ModStream::~ModStream() = default; +ModuleDebugStreamRef::~ModuleDebugStreamRef() = default; -Error ModStream::reload() { +Error ModuleDebugStreamRef::reload() { BinaryStreamReader Reader(*Stream); uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize(); - uint32_t C11Size = Mod.getLineInfoByteSize(); + uint32_t C11Size = Mod.getC11LineInfoByteSize(); uint32_t C13Size = Mod.getC13LineInfoByteSize(); if (C11Size > 0 && C13Size > 0) @@ -48,13 +50,14 @@ Error ModStream::reload() { if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4)) return EC; - if (auto EC = Reader.readStreamRef(LinesSubstream, C11Size)) + if (auto EC = Reader.readStreamRef(C11LinesSubstream, C11Size)) return EC; if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size)) return EC; - BinaryStreamReader LineReader(C13LinesSubstream); - if (auto EC = LineReader.readArray(LineInfo, LineReader.bytesRemaining())) + BinaryStreamReader SubsectionsReader(C13LinesSubstream); + if (auto EC = SubsectionsReader.readArray(Subsections, + SubsectionsReader.bytesRemaining())) return EC; uint32_t GlobalRefsSize; @@ -70,16 +73,31 @@ Error ModStream::reload() { } iterator_range -ModStream::symbols(bool *HadError) const { - // It's OK if the stream is empty. - if (SymbolsSubstream.getUnderlyingStream().getLength() == 0) - return make_range(SymbolsSubstream.end(), SymbolsSubstream.end()); +ModuleDebugStreamRef::symbols(bool *HadError) const { return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end()); } -iterator_range -ModStream::lines(bool *HadError) const { - return make_range(LineInfo.begin(HadError), LineInfo.end()); +llvm::iterator_range +ModuleDebugStreamRef::subsections() const { + return make_range(Subsections.begin(), Subsections.end()); } -Error ModStream::commit() { return Error::success(); } +bool ModuleDebugStreamRef::hasDebugSubsections() const { + return C13LinesSubstream.getLength() > 0; +} + +Error ModuleDebugStreamRef::commit() { return Error::success(); } + +Expected +ModuleDebugStreamRef::findChecksumsSubsection() const { + codeview::DebugChecksumsSubsectionRef Result; + for (const auto &SS : subsections()) { + if (SS.kind() != DebugSubsectionKind::FileChecksums) + continue; + + if (auto EC = Result.initialize(SS.getRecordData())) + return std::move(EC); + return Result; + } + return Result; +} diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStreamBuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 9c0cc0bf82337b6756a001cbff5c0fb72c56e9e9..77f832582f82404de2fd12783542a4396f68aff2 100644 --- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -13,7 +13,7 @@ namespace llvm { namespace pdb { NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session, - const ModuleInfoEx &MI) + DbiModuleDescriptor MI) : NativeRawSymbol(Session), Module(MI) {} PDB_SymType NativeCompilandSymbol::getSymTag() const { @@ -21,7 +21,7 @@ PDB_SymType NativeCompilandSymbol::getSymTag() const { } bool NativeCompilandSymbol::isEditAndContinueEnabled() const { - return Module.Info.hasECInfo(); + return Module.hasECInfo(); } uint32_t NativeCompilandSymbol::getLexicalParentId() const { return 0; } @@ -32,11 +32,11 @@ uint32_t NativeCompilandSymbol::getLexicalParentId() const { return 0; } // this potential confusion. std::string NativeCompilandSymbol::getLibraryName() const { - return Module.Info.getObjFileName(); + return Module.getObjFileName(); } std::string NativeCompilandSymbol::getName() const { - return Module.Info.getModuleName(); + return Module.getModuleName(); } } // namespace pdb diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp index 7532110d005c924d78e878a6c10bb40be77c138d..97319fd77d117522ca94017b4d82bc39faf6d263 100644 --- a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp +++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/Native/DbiModuleList.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" @@ -19,25 +20,25 @@ namespace llvm { namespace pdb { NativeEnumModules::NativeEnumModules(NativeSession &PDBSession, - ArrayRef Modules, + const DbiModuleList &Modules, uint32_t Index) : Session(PDBSession), Modules(Modules), Index(Index) {} uint32_t NativeEnumModules::getChildCount() const { - return static_cast(Modules.size()); + return static_cast(Modules.getModuleCount()); } std::unique_ptr NativeEnumModules::getChildAtIndex(uint32_t Index) const { - if (Index >= Modules.size()) + if (Index >= Modules.getModuleCount()) return nullptr; - return std::unique_ptr(new PDBSymbolCompiland(Session, - std::unique_ptr( - new NativeCompilandSymbol(Session, Modules[Index])))); + return std::unique_ptr(new PDBSymbolCompiland( + Session, std::unique_ptr(new NativeCompilandSymbol( + Session, Modules.getModuleDescriptor(Index))))); } std::unique_ptr NativeEnumModules::getNext() { - if (Index >= Modules.size()) + if (Index >= Modules.getModuleCount()) return nullptr; return getChildAtIndex(Index++); } diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index ec2a4b87457c1a872df356652c7f629089437efb..bb52560be167a84c5e83e152f1760dc8a55ebd05 100644 --- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -26,7 +26,7 @@ NativeExeSymbol::findChildren(PDB_SymType Type) const { case PDB_SymType::Compiland: { auto Dbi = File.getPDBDbiStream(); if (Dbi) { - const auto Modules = Dbi->modules(); + const DbiModuleList &Modules = Dbi->modules(); return std::unique_ptr( new NativeEnumModules(Session, Modules)); } diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index 4841ded7410268533ba5e720e8a79aad999d282b..70968d4330b07e28515f211379abfd383adb04ca 100644 --- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -13,6 +13,9 @@ #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/PDBExtras.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/raw_ostream.h" @@ -318,6 +321,11 @@ uint32_t NativeRawSymbol::getVirtualTableShapeId() const { return 0; } +std::unique_ptr +NativeRawSymbol::getVirtualBaseTableType() const { + return nullptr; +} + PDB_DataKind NativeRawSymbol::getDataKind() const { return PDB_DataKind::Unknown; } diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index 3a83a326cfe63bee7b39b57fb2e53bfb8e3acd15..7e6843bceb7db330d0d3770951ef944b1c92e142 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -70,8 +70,9 @@ uint64_t NativeSession::getLoadAddress() const { return 0; } void NativeSession::setLoadAddress(uint64_t Address) {} -std::unique_ptr NativeSession::getGlobalScope() { - auto RawSymbol = llvm::make_unique(*this); +std::unique_ptr NativeSession::getGlobalScope() const { + auto RawSymbol = + llvm::make_unique(const_cast(*this)); auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol))); std::unique_ptr ExeSymbol( static_cast(PdbSymbol.release())); diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 943e7fa13ab76f019abcd25d5cb125c6a2a6922a..a9597cdf4c4d32e9e00b02c951516467e4c6d8fe 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -15,9 +15,9 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/Support/BinaryStream.h" @@ -146,7 +146,8 @@ Error PDBFile::parseFileHeaders() { // at getBlockSize() intervals, so we have to be compatible. // See the function fpmPn() for more information: // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 - auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer); + auto FpmStream = + MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator); BinaryStreamReader FpmReader(*FpmStream); ArrayRef FpmBytes; if (auto EC = FpmReader.readBytes(FpmBytes, @@ -184,7 +185,8 @@ Error PDBFile::parseStreamData() { // is exactly what we are attempting to parse. By specifying a custom // subclass of IPDBStreamData which only accesses the fields that have already // been parsed, we can avoid this and reuse MappedBlockStream. - auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer); + auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer, + Allocator); BinaryStreamReader Reader(*DS); if (auto EC = Reader.readInteger(NumStreams)) return EC; @@ -337,8 +339,8 @@ Expected PDBFile::getPDBSymbolStream() { return *Symbols; } -Expected PDBFile::getStringTable() { - if (!Strings || !StringTableStream) { +Expected PDBFile::getStringTable() { + if (!Strings) { auto IS = getPDBInfoStream(); if (!IS) return IS.takeError(); @@ -350,16 +352,27 @@ Expected PDBFile::getStringTable() { if (!NS) return NS.takeError(); + auto N = llvm::make_unique(); BinaryStreamReader Reader(**NS); - auto N = llvm::make_unique(); - if (auto EC = N->load(Reader)) + if (auto EC = N->reload(Reader)) return std::move(EC); - Strings = std::move(N); + assert(Reader.bytesRemaining() == 0); StringTableStream = std::move(*NS); + Strings = std::move(N); } return *Strings; } +uint32_t PDBFile::getPointerSize() { + auto DbiS = getPDBDbiStream(); + if (!DbiS) + return 0; + PDB_Machine Machine = DbiS->getMachineType(); + if (Machine == PDB_Machine::Amd64) + return 8; + return 4; +} + bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } bool PDBFile::hasPDBGlobalsStream() { @@ -389,7 +402,7 @@ bool PDBFile::hasPDBSymbolStream() { bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } -bool PDBFile::hasStringTable() { +bool PDBFile::hasPDBStringTable() { auto IS = getPDBInfoStream(); if (!IS) return false; @@ -406,5 +419,6 @@ PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout, uint32_t StreamIndex) const { if (StreamIndex >= getNumStreams()) return make_error(raw_error_code::no_stream); - return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex); + return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex, + Allocator); } diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index b3c84903bc7e7f9c0402fe15cd48a663ea5e0676..12b0c3b36c1dd44e10c3f482c35983b078f43406 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -17,8 +17,8 @@ #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/Support/BinaryStream.h" @@ -67,7 +67,9 @@ TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() { return *Ipi; } -StringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } +PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { + return Strings; +} Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { auto ExpectedStream = Msf->addStream(Size); @@ -78,9 +80,19 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { } Expected PDBFileBuilder::finalizeMsfLayout() { - uint32_t StringTableSize = Strings.finalize(); - if (auto EC = addNamedStream("/names", StringTableSize)) + if (Ipi && Ipi->getRecordCount() > 0) { + // In theory newer PDBs always have an ID stream, but by saying that we're + // only going to *really* have an ID stream if there is at least one ID + // record, we leave open the opportunity to test older PDBs such as those + // that don't have an ID stream. + auto &Info = getInfoBuilder(); + Info.addFeature(PdbRaw_FeatureSig::VC140); + } + + uint32_t StringsLen = Strings.calculateSerializedSize(); + + if (auto EC = addNamedStream("/names", StringsLen)) return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); @@ -107,7 +119,15 @@ Expected PDBFileBuilder::finalizeMsfLayout() { return Msf->build(); } +Expected PDBFileBuilder::getNamedStreamIndex(StringRef Name) const { + uint32_t SN = 0; + if (!NamedStreams.get(Name, SN)) + return llvm::make_error(raw_error_code::no_stream); + return SN; +} + Error PDBFileBuilder::commit(StringRef Filename) { + assert(!Filename.empty()); auto ExpectedLayout = finalizeMsfLayout(); if (!ExpectedLayout) return ExpectedLayout.takeError(); @@ -130,8 +150,8 @@ Error PDBFileBuilder::commit(StringRef Filename) { if (auto EC = Writer.writeArray(Layout.DirectoryBlocks)) return EC; - auto DirStream = - WritableMappedBlockStream::createDirectoryStream(Layout, Buffer); + auto DirStream = WritableMappedBlockStream::createDirectoryStream( + Layout, Buffer, Allocator); BinaryStreamWriter DW(*DirStream); if (auto EC = DW.writeInteger(Layout.StreamSizes.size())) return EC; @@ -144,12 +164,12 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } - uint32_t StringTableStreamNo = 0; - if (!NamedStreams.get("/names", StringTableStreamNo)) - return llvm::make_error(raw_error_code::no_stream); + auto ExpectedSN = getNamedStreamIndex("/names"); + if (!ExpectedSN) + return ExpectedSN.takeError(); - auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - StringTableStreamNo); + auto NS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, *ExpectedSN, Allocator); BinaryStreamWriter NSWriter(*NS); if (auto EC = Strings.commit(NSWriter)) return EC; diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f9f8ac219d357a066ddf32385c4b3709538fadfe --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -0,0 +1,139 @@ +//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + +uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getNameCount() const { return NameCount; } +uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } +uint32_t PDBStringTable::getSignature() const { return Header->Signature; } + +Error PDBStringTable::readHeader(BinaryStreamReader &Reader) { + if (auto EC = Reader.readObject(Header)) + return EC; + + if (Header->Signature != PDBStringTableSignature) + return make_error(raw_error_code::corrupt_file, + "Invalid hash table signature"); + if (Header->HashVersion != 1 && Header->HashVersion != 2) + return make_error(raw_error_code::corrupt_file, + "Unsupported hash version"); + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { + BinaryStreamRef Stream; + if (auto EC = Reader.readStreamRef(Stream)) + return EC; + + if (auto EC = Strings.initialize(Stream)) { + return joinErrors(std::move(EC), + make_error(raw_error_code::corrupt_file, + "Invalid hash table byte length")); + } + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +const codeview::DebugStringTableSubsectionRef & +PDBStringTable::getStringTable() const { + return Strings; +} + +Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) { + const support::ulittle32_t *HashCount; + if (auto EC = Reader.readObject(HashCount)) + return EC; + + if (auto EC = Reader.readArray(IDs, *HashCount)) { + return joinErrors(std::move(EC), + make_error(raw_error_code::corrupt_file, + "Could not read bucket array")); + } + + return Error::success(); +} + +Error PDBStringTable::readEpilogue(BinaryStreamReader &Reader) { + if (auto EC = Reader.readInteger(NameCount)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::reload(BinaryStreamReader &Reader) { + + BinaryStreamReader SectionReader; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(PDBStringTableHeader)); + if (auto EC = readHeader(SectionReader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(Header->ByteSize); + if (auto EC = readStrings(SectionReader)) + return EC; + + // We don't know how long the hash table is until we parse it, so let the + // function responsible for doing that figure it out. + if (auto EC = readHashTable(Reader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(uint32_t)); + if (auto EC = readEpilogue(SectionReader)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Expected PDBStringTable::getStringForID(uint32_t ID) const { + return Strings.getString(ID); +} + +Expected PDBStringTable::getIDForString(StringRef Str) const { + uint32_t Hash = + (Header->HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); + size_t Count = IDs.size(); + uint32_t Start = Hash % Count; + for (size_t I = 0; I < Count; ++I) { + // The hash is just a starting point for the search, but if it + // doesn't work we should find the string no matter what, because + // we iterate the entire array. + uint32_t Index = (Start + I) % Count; + + uint32_t ID = IDs[Index]; + auto ExpectedStr = getStringForID(ID); + if (!ExpectedStr) + return ExpectedStr.takeError(); + + if (*ExpectedStr == Str) + return ID; + } + return make_error(raw_error_code::no_entry); +} + +FixedStreamArray PDBStringTable::name_ids() const { + return IDs; +} diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..90acfadd311ffdd11e15dad15177021d9d0e7ffb --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -0,0 +1,138 @@ +//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t PDBStringTableBuilder::insert(StringRef S) { + return Strings.insert(S); +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t PDBStringTableBuilder::calculateHashTableSize() const { + uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. + Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); + + return Size; +} + +uint32_t PDBStringTableBuilder::calculateSerializedSize() const { + uint32_t Size = 0; + Size += sizeof(PDBStringTableHeader); + Size += Strings.calculateSerializedSize(); + Size += calculateHashTableSize(); + Size += sizeof(uint32_t); // The /names stream ends with the string count. + return Size; +} + +void PDBStringTableBuilder::setStrings( + const codeview::DebugStringTableSubsection &Strings) { + this->Strings = Strings; +} + +Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { + // Write a header + PDBStringTableHeader H; + H.Signature = PDBStringTableSignature; + H.HashVersion = 1; + H.ByteSize = Strings.calculateSerializedSize(); + if (auto EC = Writer.writeObject(H)) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { + if (auto EC = Strings.commit(Writer)) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + std::vector Buckets(BucketCount); + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(ArrayRef(Buckets))) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { + if (auto EC = Writer.writeInteger(Strings.size())) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { + BinaryStreamWriter SectionWriter; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); + if (auto EC = writeHeader(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = + Writer.split(Strings.calculateSerializedSize()); + if (auto EC = writeStrings(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); + if (auto EC = writeHashTable(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); + if (auto EC = writeEpilogue(SectionWriter)) + return EC; + + return Error::success(); +} diff --git a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp index 629f3e80b0ed57b1c102bef62a8595a92071321a..9fd90102f72cf0d6fb7ed5b34632ca616da9f45e 100644 --- a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp +++ b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp @@ -21,6 +21,7 @@ #include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" @@ -46,7 +47,7 @@ void PDBTypeServerHandler::addSearchPath(StringRef Path) { if (Path.empty() || !sys::fs::is_directory(Path)) return; - SearchPaths.push_back(Path); + SearchPaths.insert(Path); } Expected @@ -55,9 +56,14 @@ PDBTypeServerHandler::handleInternal(PDBFile &File, auto ExpectedTpi = File.getPDBTpiStream(); if (!ExpectedTpi) return ExpectedTpi.takeError(); - CVTypeVisitor Visitor(Callbacks); - if (auto EC = Visitor.visitTypeStream(ExpectedTpi->types(nullptr))) + // For handling a type server, we should be using whatever the callback array + // was + // that is being used for the original file. We shouldn't allow the visitor + // to + // arbitrarily stick a deserializer in there. + if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks, + VDS_BytesExternal)) return std::move(EC); return true; @@ -80,13 +86,14 @@ Expected PDBTypeServerHandler::handle(TypeServer2Record &TS, cv_error_code::corrupt_record, "TypeServer2Record does not contain filename!"); - for (auto Path : SearchPaths) { - sys::path::append(Path, File); - if (!sys::fs::exists(Path)) + for (auto &Path : SearchPaths) { + SmallString<64> PathStr = Path.getKey(); + sys::path::append(PathStr, File); + if (!sys::fs::exists(PathStr)) continue; std::unique_ptr ThisSession; - if (auto EC = loadDataForPDB(PDB_ReaderType::Native, Path, ThisSession)) { + if (auto EC = loadDataForPDB(PDB_ReaderType::Native, PathStr, ThisSession)) { // It is not an error if this PDB fails to load, it just means that it // doesn't match and we should continue searching. ignoreErrors(std::move(EC)); diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 58202577672a31efeea76af54cc1839761a00ce9..8f3474b9ce1906687916795fcfe695501d02e0ad 100644 --- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -105,10 +105,12 @@ Error PublicsStream::reload() { "Could not read a thunk map.")); // Something called "section map" follows. - if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Could not read a section map.")); + if (Reader.bytesRemaining() > 0) { + if (auto EC = Reader.readArray(SectionOffsets, Header->NumSections)) + return joinErrors(std::move(EC), + make_error(raw_error_code::corrupt_file, + "Could not read a section map.")); + } if (Reader.bytesRemaining() > 0) return make_error(raw_error_code::corrupt_file, @@ -128,4 +130,13 @@ PublicsStream::getSymbols(bool *HadError) const { return SS.getSymbols(HadError); } +Expected +PublicsStream::getSymbolArray() const { + auto SymbolS = Pdb.getPDBSymbolStream(); + if (!SymbolS) + return SymbolS.takeError(); + + return SymbolS->getSymbolArray(); +} + Error PublicsStream::commit() { return Error::success(); } diff --git a/lib/DebugInfo/PDB/Native/StringTable.cpp b/lib/DebugInfo/PDB/Native/StringTable.cpp deleted file mode 100644 index 7e28389b838313f5a3a22dc1c309b0a9d401e8d4..0000000000000000000000000000000000000000 --- a/lib/DebugInfo/PDB/Native/StringTable.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//===- StringTable.cpp - PDB String Table -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTable.h" - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::pdb; - -StringTable::StringTable() {} - -Error StringTable::load(BinaryStreamReader &Stream) { - ByteSize = Stream.getLength(); - - const StringTableHeader *H; - if (auto EC = Stream.readObject(H)) - return EC; - - if (H->Signature != StringTableSignature) - return make_error(raw_error_code::corrupt_file, - "Invalid hash table signature"); - if (H->HashVersion != 1 && H->HashVersion != 2) - return make_error(raw_error_code::corrupt_file, - "Unsupported hash version"); - - Signature = H->Signature; - HashVersion = H->HashVersion; - if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Invalid hash table byte length")); - - const support::ulittle32_t *HashCount; - if (auto EC = Stream.readObject(HashCount)) - return EC; - - if (auto EC = Stream.readArray(IDs, *HashCount)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Could not read bucket array")); - - if (Stream.bytesRemaining() < sizeof(support::ulittle32_t)) - return make_error(raw_error_code::corrupt_file, - "Missing name count"); - - if (auto EC = Stream.readInteger(NameCount)) - return EC; - - if (Stream.bytesRemaining() > 0) - return make_error(raw_error_code::stream_too_long, - "Unexpected bytes found in string table"); - - return Error::success(); -} - -uint32_t StringTable::getByteSize() const { - return ByteSize; -} - -StringRef StringTable::getStringForID(uint32_t ID) const { - if (ID == IDs[0]) - return StringRef(); - - // NamesBuffer is a buffer of null terminated strings back to back. ID is - // the starting offset of the string we're looking for. So just seek into - // the desired offset and a read a null terminated stream from that offset. - StringRef Result; - BinaryStreamReader NameReader(NamesBuffer); - NameReader.setOffset(ID); - if (auto EC = NameReader.readCString(Result)) - consumeError(std::move(EC)); - return Result; -} - -uint32_t StringTable::getIDForString(StringRef Str) const { - uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); - size_t Count = IDs.size(); - uint32_t Start = Hash % Count; - for (size_t I = 0; I < Count; ++I) { - // The hash is just a starting point for the search, but if it - // doesn't work we should find the string no matter what, because - // we iterate the entire array. - uint32_t Index = (Start + I) % Count; - - uint32_t ID = IDs[Index]; - StringRef S = getStringForID(ID); - if (S == Str) - return ID; - } - // IDs[0] contains the ID of the "invalid" entry. - return IDs[0]; -} - -FixedStreamArray StringTable::name_ids() const { - return IDs; -} diff --git a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp deleted file mode 100644 index e0f8370ab6087e39ace75162b2da7bdea5e85da0..0000000000000000000000000000000000000000 --- a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp +++ /dev/null @@ -1,102 +0,0 @@ -//===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::support::endian; -using namespace llvm::pdb; - -uint32_t StringTableBuilder::insert(StringRef S) { - auto P = Strings.insert({S, StringSize}); - - // If a given string didn't exist in the string table, we want to increment - // the string table size. - if (P.second) - StringSize += S.size() + 1; // +1 for '\0' - return P.first->second; -} - -static uint32_t computeBucketCount(uint32_t NumStrings) { - // The /names stream is basically an on-disk open-addressing hash table. - // Hash collisions are resolved by linear probing. We cannot make - // utilization 100% because it will make the linear probing extremely - // slow. But lower utilization wastes disk space. As a reasonable - // load factor, we choose 80%. We need +1 because slot 0 is reserved. - return (NumStrings + 1) * 1.25; -} - -uint32_t StringTableBuilder::finalize() { - uint32_t Size = 0; - Size += sizeof(StringTableHeader); - Size += StringSize; - Size += sizeof(uint32_t); // Hash table begins with 4-byte size field. - - uint32_t BucketCount = computeBucketCount(Strings.size()); - Size += BucketCount * sizeof(uint32_t); - - Size += - sizeof(uint32_t); // The /names stream ends with the number of strings. - return Size; -} - -Error StringTableBuilder::commit(BinaryStreamWriter &Writer) const { - // Write a header - StringTableHeader H; - H.Signature = StringTableSignature; - H.HashVersion = 1; - H.ByteSize = StringSize; - if (auto EC = Writer.writeObject(H)) - return EC; - - // Write a string table. - uint32_t StringStart = Writer.getOffset(); - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - Writer.setOffset(StringStart + Offset); - if (auto EC = Writer.writeCString(S)) - return EC; - } - Writer.setOffset(StringStart + StringSize); - - // Write a hash table. - uint32_t BucketCount = computeBucketCount(Strings.size()); - if (auto EC = Writer.writeInteger(BucketCount)) - return EC; - std::vector Buckets(BucketCount); - - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - uint32_t Hash = hashStringV1(S); - - for (uint32_t I = 0; I != BucketCount; ++I) { - uint32_t Slot = (Hash + I) % BucketCount; - if (Slot == 0) - continue; // Skip reserved slot - if (Buckets[Slot] != 0) - continue; - Buckets[Slot] = Offset; - break; - } - } - - if (auto EC = Writer.writeArray(ArrayRef(Buckets))) - return EC; - if (auto EC = Writer.writeInteger(static_cast(Strings.size()))) - return EC; - return Error::success(); -} diff --git a/lib/DebugInfo/PDB/Native/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp index 16904a5a27ed3028c9e6f429a4c5597416b968ae..91b8d648fcf9d4a6b802f5ec5ed6b4350410880e 100644 --- a/lib/DebugInfo/PDB/Native/TpiHashing.cpp +++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/PDB/Native/TpiHashing.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp index 5fef3edf8c2db45e5b4056ccc05080f5979b0168..67c803d3124ecb18840ae183fc2d650e94052dc1 100644 --- a/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -8,11 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/TpiStream.h" + #include "llvm/ADT/iterator_range.h" -#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" -#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" -#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" #include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h" @@ -33,26 +32,11 @@ using namespace llvm::support; using namespace llvm::msf; using namespace llvm::pdb; -TpiStream::TpiStream(const PDBFile &File, - std::unique_ptr Stream) +TpiStream::TpiStream(PDBFile &File, std::unique_ptr Stream) : Pdb(File), Stream(std::move(Stream)) {} TpiStream::~TpiStream() = default; -// Verifies that a given type record matches with a given hash value. -// Currently we only verify SRC_LINE records. -Error TpiStream::verifyHashValues() { - TpiHashVerifier Verifier(HashValues, Header->NumHashBuckets); - TypeDeserializer Deserializer; - - TypeVisitorCallbackPipeline Pipeline; - Pipeline.addCallbackToPipeline(Deserializer); - Pipeline.addCallbackToPipeline(Verifier); - - CVTypeVisitor Visitor(Pipeline); - return Visitor.visitTypeStream(TypeRecords); -} - Error TpiStream::reload() { BinaryStreamReader Reader(*Stream); @@ -92,22 +76,20 @@ Error TpiStream::reload() { "Invalid TPI hash stream index."); auto HS = MappedBlockStream::createIndexedStream( - Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex); + Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex, + Pdb.getAllocator()); BinaryStreamReader HSR(*HS); // There should be a hash value for every type record, or no hashes at all. uint32_t NumHashValues = Header->HashValueBuffer.Length / sizeof(ulittle32_t); - if (NumHashValues != NumTypeRecords() && NumHashValues != 0) + if (NumHashValues != getNumTypeRecords() && NumHashValues != 0) return make_error( raw_error_code::corrupt_file, "TPI hash count does not match with the number of type records."); HSR.setOffset(Header->HashValueBuffer.Off); if (auto EC = HSR.readArray(HashValues, NumHashValues)) return EC; - std::vector HashValueList; - for (auto I : HashValues) - HashValueList.push_back(I); HSR.setOffset(Header->IndexOffsetBuffer.Off); uint32_t NumTypeIndexOffsets = @@ -122,14 +104,10 @@ Error TpiStream::reload() { } HashStream = std::move(HS); - - // TPI hash table is a parallel array for the type records. - // Verify that the hash values match with type records. - if (NumHashValues > 0) - if (auto EC = verifyHashValues()) - return EC; } + Types = llvm::make_unique( + TypeRecords, getNumTypeRecords(), getTypeIndexOffsets()); return Error::success(); } @@ -142,7 +120,7 @@ uint32_t TpiStream::TypeIndexBegin() const { return Header->TypeIndexBegin; } uint32_t TpiStream::TypeIndexEnd() const { return Header->TypeIndexEnd; } -uint32_t TpiStream::NumTypeRecords() const { +uint32_t TpiStream::getNumTypeRecords() const { return TypeIndexEnd() - TypeIndexBegin(); } @@ -154,7 +132,7 @@ uint16_t TpiStream::getTypeHashStreamAuxIndex() const { return Header->HashAuxStreamIndex; } -uint32_t TpiStream::NumHashBuckets() const { return Header->NumHashBuckets; } +uint32_t TpiStream::getNumHashBuckets() const { return Header->NumHashBuckets; } uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; } FixedStreamArray TpiStream::getHashValues() const { diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 375c35b111455c7002ed499e13ed189a38cba523..9e943c7f114d50ef3d27b2ad4c2733e62a4e97d9 100644 --- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -69,7 +69,7 @@ Error TpiStreamBuilder::finalize() { uint32_t Count = TypeRecords.size(); - H->Version = *VerHeader; + H->Version = VerHeader; H->HeaderSize = sizeof(TpiStreamHeader); H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex; H->TypeIndexEnd = H->TypeIndexBegin + Count; @@ -109,7 +109,7 @@ uint32_t TpiStreamBuilder::calculateHashBufferSize() const { } uint32_t TpiStreamBuilder::calculateIndexOffsetSize() const { - return TypeIndexOffsets.size() * sizeof(TypeIndexOffset); + return TypeIndexOffsets.size() * sizeof(codeview::TypeIndexOffset); } Error TpiStreamBuilder::finalizeMsfLayout() { @@ -147,8 +147,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = finalize()) return EC; - auto InfoS = - WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx); + auto InfoS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, + Idx, Allocator); BinaryStreamWriter Writer(*InfoS); if (auto EC = Writer.writeObject(*Header)) @@ -159,8 +159,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; if (HashStreamIndex != kInvalidStreamIndex) { - auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - HashStreamIndex); + auto HVS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, HashStreamIndex, Allocator); BinaryStreamWriter HW(*HVS); if (HashValueStream) { if (auto EC = HW.writeStreamRef(*HashValueStream)) diff --git a/lib/DebugInfo/PDB/PDBContext.cpp b/lib/DebugInfo/PDB/PDBContext.cpp index 94b81ecf561e23cdf51de7183e1c68261ebfd07c..df0feac2bc40a78ba7bb7e96ba4f6d9df8d327ef 100644 --- a/lib/DebugInfo/PDB/PDBContext.cpp +++ b/lib/DebugInfo/PDB/PDBContext.cpp @@ -12,8 +12,8 @@ #include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" +#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" #include "llvm/Object/COFF.h" @@ -29,8 +29,7 @@ PDBContext::PDBContext(const COFFObjectFile &Object, Session->setLoadAddress(ImageBase.get()); } -void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, - bool SummarizeTypes) {} +void PDBContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts){} DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier) { diff --git a/lib/DebugInfo/PDB/PDBSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbol.cpp index 2c8438f9c23416ecfdc1039b4c698a04ea1b0cec..74010c2dd7ddb88814cb5f5880f7feb859f5d57d 100644 --- a/lib/DebugInfo/PDB/PDBSymbol.cpp +++ b/lib/DebugInfo/PDB/PDBSymbol.cpp @@ -54,6 +54,9 @@ PDBSymbol::PDBSymbol(const IPDBSession &PDBSession, std::unique_ptr Symbol) : Session(PDBSession), RawSymbol(std::move(Symbol)) {} +PDBSymbol::PDBSymbol(PDBSymbol &Symbol) + : Session(Symbol.Session), RawSymbol(std::move(Symbol.RawSymbol)) {} + PDBSymbol::~PDBSymbol() = default; #define FACTORY_SYMTAG_CASE(Tag, Type) \ @@ -100,12 +103,6 @@ PDBSymbol::create(const IPDBSession &PDBSession, } } -#define TRY_DUMP_TYPE(Type) \ - if (const Type *DerivedThis = this->cast()) \ - Dumper.dump(OS, Indent, *DerivedThis); - -#define ELSE_TRY_DUMP_TYPE(Type, Dumper) else TRY_DUMP_TYPE(Type, Dumper) - void PDBSymbol::defaultDump(raw_ostream &OS, int Indent) const { RawSymbol->dump(OS, Indent); } @@ -162,6 +159,8 @@ PDBSymbol::findInlineFramesByRVA(uint32_t RVA) const { std::unique_ptr PDBSymbol::getChildStats(TagStats &Stats) const { std::unique_ptr Result(findAllChildren()); + if (!Result) + return nullptr; Stats.clear(); while (auto Child = Result->getNext()) { ++Stats[Child->getSymTag()]; diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp index 7385d3ba1489843fa19897984b7c29d454a722bd..7076b4aec34789bccfcf76f3722e7366567fa819 100644 --- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolBlock.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp index e08450e0ad0c4d6f9d233e68b3c8f30df9aa95ab..f73cd36d057a0b34a2b0c18fd04ff419107d3f10 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompilandDetails.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp index 2f1c43666ae547dfff62c81b0099b227d22cf7f1..df696fa8c5f258bfdb2d33211f4f11164028141e 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompilandEnv.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp index 9ec20bb62d758a40a7c36985b4617ce4644cc80f..a7b69a755941e78194a7df8fd399df38c55ca2da 100644 --- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCustom.h" #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/lib/DebugInfo/PDB/PDBSymbolExe.cpp index b9fcac78c36d721d4725e55bc967cd8817c6904a..7417167b61ade28ecba6d115d34157da7f26cc8b 100644 --- a/lib/DebugInfo/PDB/PDBSymbolExe.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolExe.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" #include @@ -23,3 +24,13 @@ PDBSymbolExe::PDBSymbolExe(const IPDBSession &PDBSession, } void PDBSymbolExe::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } + +uint32_t PDBSymbolExe::getPointerByteSize() const { + auto Pointer = findOneChild(); + if (Pointer) + return Pointer->getLength(); + + if (getMachineType() == PDB_Machine::x86) + return 4; + return 8; +} diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp index 3c0bd25ed096c18c2d889eaf7cde7d31ebe67377..5a5cb4c1b5cac3700378bba67a5283b840efb8ff 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp @@ -12,10 +12,10 @@ #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbolData.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" -#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include @@ -95,3 +95,14 @@ PDBSymbolFunc::getArguments() const { } void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); } + +bool PDBSymbolFunc::isDestructor() const { + std::string Name = getName(); + if (Name.empty()) + return false; + if (Name[0] == '~') + return true; + if (Name == "__vecDelDtor") + return true; + return false; +} diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp index 482c95e3a8509a590d6a4496b245a56cb3214dbd..4a4195beb4ea5d6c4c0888ef588b4668fa06d600 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp index ae23c7619e2aa3986b680d267fd3d2bd30664e12..a448a404dc4a04f2add447e1e0bdcf3f1f25108f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp index 87bb4044216b0c2263de5128932d70de7d4959db..dbec16fcbaac297e03f2c6358dad17a003abb8e9 100644 --- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp index 0ee18d47162493ab09fa0ae325fb76cad9c026a7..0fdf8b6d0f7747fa1a49e9477e33f0752a836cb5 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp index f617d8d0c2df549ed43898f33713a3d2dbb01401..726e7e1cdbb4048a1e6c384560b389e797003bee 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp index 68ba87c1cdf8dc4ed38c05259659b1e6b29ef888..6c84b984d210b5e829b95130db48b8059575d2ae 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp @@ -10,8 +10,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp index ec27985e91d1701ba1a49fd22c2c1287a726cf60..c01877287888626217db09ff0a98eacbd8617b8d 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp index 473529d1b04321fc9917a14bcb0d5c78a5e71f17..0304c6286c8f1233c30e81096a0eb0cfc3a879b3 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp @@ -12,9 +12,9 @@ #include "llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h" -#include "llvm/DebugInfo/PDB/PDBSymDumper.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp index 86e0ec4f8565769bb361d7558d70492587300a8d..7cfba823b4fa5f0d3605979a9c543ff9ad9fd1a2 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp index 4a9a9ed5fda8e82c5648c8102fb09e18a382a632..15dc153521656d865514cb15c4e0ba779af2e23f 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp @@ -9,7 +9,15 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" +#include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" +#include "llvm/DebugInfo/PDB/PDBSymbolData.h" +#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" +#include "llvm/DebugInfo/PDB/UDTLayout.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp index a516a4d2c42999f9969fd6668f7b8f8c243e3531..ddc0574617c5a6baa99e05ac00cd767e620453c1 100644 --- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp index dbbea9c93e2067528e2ff44066f970488d06dbd2..fdbe845f455a5ec2ca75429e723e75463b71b023 100644 --- a/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp index 020aec9e98a88161ba541cbdc91e4ca193a02293..f40578f4372a611877921665423cbdf308bd09e9 100644 --- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp +++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp @@ -9,8 +9,8 @@ #include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h" -#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include "llvm/DebugInfo/PDB/PDBSymDumper.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" #include diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..da353cb6977ce779ddda632838c12d817a5124e2 --- /dev/null +++ b/lib/DebugInfo/PDB/UDTLayout.cpp @@ -0,0 +1,299 @@ +//===- UDTLayout.cpp --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/UDTLayout.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/PDB/IPDBSession.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" +#include "llvm/DebugInfo/PDB/PDBSymbolData.h" +#include "llvm/DebugInfo/PDB/PDBSymbolExe.h" +#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h" +#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h" + +#include + +using namespace llvm; +using namespace llvm::pdb; + +static std::unique_ptr getSymbolType(const PDBSymbol &Symbol) { + const IPDBSession &Session = Symbol.getSession(); + const IPDBRawSymbol &RawSymbol = Symbol.getRawSymbol(); + uint32_t TypeId = RawSymbol.getTypeId(); + return Session.getSymbolById(TypeId); +} + +static uint32_t getTypeLength(const PDBSymbol &Symbol) { + auto SymbolType = getSymbolType(Symbol); + const IPDBRawSymbol &RawType = SymbolType->getRawSymbol(); + + return RawType.getLength(); +} + +LayoutItemBase::LayoutItemBase(const UDTLayoutBase *Parent, + const PDBSymbol *Symbol, const std::string &Name, + uint32_t OffsetInParent, uint32_t Size, + bool IsElided) + : Symbol(Symbol), Parent(Parent), Name(Name), + OffsetInParent(OffsetInParent), SizeOf(Size), LayoutSize(Size), + IsElided(IsElided) { + UsedBytes.resize(SizeOf, true); +} + +uint32_t LayoutItemBase::deepPaddingSize() const { + return UsedBytes.size() - UsedBytes.count(); +} + +uint32_t LayoutItemBase::tailPadding() const { + int Last = UsedBytes.find_last(); + + return UsedBytes.size() - (Last + 1); +} + +DataMemberLayoutItem::DataMemberLayoutItem( + const UDTLayoutBase &Parent, std::unique_ptr Member) + : LayoutItemBase(&Parent, Member.get(), Member->getName(), + Member->getOffset(), getTypeLength(*Member), false), + DataMember(std::move(Member)) { + auto Type = DataMember->getType(); + if (auto UDT = unique_dyn_cast(Type)) { + UdtLayout = llvm::make_unique(std::move(UDT)); + UsedBytes = UdtLayout->usedBytes(); + } +} + +VBPtrLayoutItem::VBPtrLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr Sym, + uint32_t Offset, uint32_t Size) + : LayoutItemBase(&Parent, Sym.get(), "", Offset, Size, false), + Type(std::move(Sym)) { +} + +const PDBSymbolData &DataMemberLayoutItem::getDataMember() { + return *dyn_cast(Symbol); +} + +bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; } + +const ClassLayout &DataMemberLayoutItem::getUDTLayout() const { + return *UdtLayout; +} + +VTableLayoutItem::VTableLayoutItem(const UDTLayoutBase &Parent, + std::unique_ptr VT) + : LayoutItemBase(&Parent, VT.get(), "", 0, getTypeLength(*VT), false), + VTable(std::move(VT)) { + auto VTableType = cast(VTable->getType()); + ElementSize = VTableType->getLength(); +} + +UDTLayoutBase::UDTLayoutBase(const UDTLayoutBase *Parent, const PDBSymbol &Sym, + const std::string &Name, uint32_t OffsetInParent, + uint32_t Size, bool IsElided) + : LayoutItemBase(Parent, &Sym, Name, OffsetInParent, Size, IsElided) { + // UDT storage comes from a union of all the children's storage, so start out + // uninitialized. + UsedBytes.reset(0, Size); + + initializeChildren(Sym); + if (LayoutSize < Size) + UsedBytes.resize(LayoutSize); +} + +uint32_t UDTLayoutBase::tailPadding() const { + uint32_t Abs = LayoutItemBase::tailPadding(); + if (!LayoutItems.empty()) { + const LayoutItemBase *Back = LayoutItems.back(); + uint32_t ChildPadding = Back->LayoutItemBase::tailPadding(); + if (Abs < ChildPadding) + Abs = 0; + else + Abs -= ChildPadding; + } + return Abs; +} + +ClassLayout::ClassLayout(const PDBSymbolTypeUDT &UDT) + : UDTLayoutBase(nullptr, UDT, UDT.getName(), 0, UDT.getLength(), false), + UDT(UDT) { + ImmediateUsedBytes.resize(SizeOf, false); + for (auto &LI : LayoutItems) { + uint32_t Begin = LI->getOffsetInParent(); + uint32_t End = Begin + LI->getLayoutSize(); + End = std::min(SizeOf, End); + ImmediateUsedBytes.set(Begin, End); + } +} + +ClassLayout::ClassLayout(std::unique_ptr UDT) + : ClassLayout(*UDT) { + OwnedStorage = std::move(UDT); +} + +uint32_t ClassLayout::immediatePadding() const { + return SizeOf - ImmediateUsedBytes.count(); +} + +BaseClassLayout::BaseClassLayout(const UDTLayoutBase &Parent, + uint32_t OffsetInParent, bool Elide, + std::unique_ptr B) + : UDTLayoutBase(&Parent, *B, B->getName(), OffsetInParent, B->getLength(), + Elide), + Base(std::move(B)) { + if (isEmptyBase()) { + // Special case an empty base so that it doesn't get treated as padding. + UsedBytes.resize(1); + UsedBytes.set(0); + } + IsVirtualBase = Base->isVirtualBaseClass(); +} + +void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) { + // Handled bases first, followed by VTables, followed by data members, + // followed by functions, followed by other. This ordering is necessary + // so that bases and vtables get initialized before any functions which + // may override them. + UniquePtrVector Bases; + UniquePtrVector VTables; + UniquePtrVector Members; + UniquePtrVector VirtualBaseSyms; + + auto Children = Sym.findAllChildren(); + while (auto Child = Children->getNext()) { + if (auto Base = unique_dyn_cast(Child)) { + if (Base->isVirtualBaseClass()) + VirtualBaseSyms.push_back(std::move(Base)); + else + Bases.push_back(std::move(Base)); + } + + else if (auto Data = unique_dyn_cast(Child)) { + if (Data->getDataKind() == PDB_DataKind::Member) + Members.push_back(std::move(Data)); + else + Other.push_back(std::move(Data)); + } else if (auto VT = unique_dyn_cast(Child)) + VTables.push_back(std::move(VT)); + else if (auto Func = unique_dyn_cast(Child)) + Funcs.push_back(std::move(Func)); + else { + Other.push_back(std::move(Child)); + } + } + + // We don't want to have any re-allocations in the list of bases, so make + // sure to reserve enough space so that our ArrayRefs don't get invalidated. + AllBases.reserve(Bases.size() + VirtualBaseSyms.size()); + + // Only add non-virtual bases to the class first. Only at the end of the + // class, after all non-virtual bases and data members have been added do we + // add virtual bases. This way the offsets are correctly aligned when we go + // to lay out virtual bases. + for (auto &Base : Bases) { + uint32_t Offset = Base->getOffset(); + // Non-virtual bases never get elided. + auto BL = llvm::make_unique(*this, Offset, false, + std::move(Base)); + + AllBases.push_back(BL.get()); + addChildToLayout(std::move(BL)); + } + NonVirtualBases = AllBases; + + assert(VTables.size() <= 1); + if (!VTables.empty()) { + auto VTLayout = + llvm::make_unique(*this, std::move(VTables[0])); + + VTable = VTLayout.get(); + + addChildToLayout(std::move(VTLayout)); + } + + for (auto &Data : Members) { + auto DM = llvm::make_unique(*this, std::move(Data)); + + addChildToLayout(std::move(DM)); + } + + // Make sure add virtual bases before adding functions, since functions may be + // overrides of virtual functions declared in a virtual base, so the VTables + // and virtual intros need to be correctly initialized. + for (auto &VB : VirtualBaseSyms) { + int VBPO = VB->getVirtualBasePointerOffset(); + if (!hasVBPtrAtOffset(VBPO)) { + if (auto VBP = VB->getRawSymbol().getVirtualBaseTableType()) { + auto VBPL = llvm::make_unique(*this, std::move(VBP), + VBPO, VBP->getLength()); + VBPtr = VBPL.get(); + addChildToLayout(std::move(VBPL)); + } + } + + // Virtual bases always go at the end. So just look for the last place we + // ended when writing something, and put our virtual base there. + // Note that virtual bases get elided unless this is a top-most derived + // class. + uint32_t Offset = UsedBytes.find_last() + 1; + bool Elide = (Parent != nullptr); + auto BL = + llvm::make_unique(*this, Offset, Elide, std::move(VB)); + AllBases.push_back(BL.get()); + + // Only lay this virtual base out directly inside of *this* class if this + // is a top-most derived class. Keep track of it regardless, but only + // physically lay it out if it's a topmost derived class. + addChildToLayout(std::move(BL)); + } + VirtualBases = makeArrayRef(AllBases).drop_front(NonVirtualBases.size()); + + if (Parent != nullptr) + LayoutSize = UsedBytes.find_last() + 1; +} + +bool UDTLayoutBase::hasVBPtrAtOffset(uint32_t Off) const { + if (VBPtr && VBPtr->getOffsetInParent() == Off) + return true; + for (BaseClassLayout *BL : AllBases) { + if (BL->hasVBPtrAtOffset(Off - BL->getOffsetInParent())) + return true; + } + return false; +} + +void UDTLayoutBase::addChildToLayout(std::unique_ptr Child) { + uint32_t Begin = Child->getOffsetInParent(); + + if (!Child->isElided()) { + BitVector ChildBytes = Child->usedBytes(); + + // Suppose the child occupies 4 bytes starting at offset 12 in a 32 byte + // class. When we call ChildBytes.resize(32), the Child's storage will + // still begin at offset 0, so we need to shift it left by offset bytes + // to get it into the right position. + ChildBytes.resize(UsedBytes.size()); + ChildBytes <<= Child->getOffsetInParent(); + UsedBytes |= ChildBytes; + + if (ChildBytes.count() > 0) { + auto Loc = std::upper_bound(LayoutItems.begin(), LayoutItems.end(), Begin, + [](uint32_t Off, const LayoutItemBase *Item) { + return (Off < Item->getOffsetInParent()); + }); + + LayoutItems.insert(Loc, Child.get()); + } + } + + ChildStorage.push_back(std::move(Child)); +} \ No newline at end of file diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index f672680cb9ea7c82cb38086166e4fed878a07a3a..2a89faff96470b786aa55cf1f7d12d0b56ac198f 100644 --- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -15,12 +15,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolSize.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Error.h" diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp index 1abb368127afc9c0952ae1d7219f206e11ae086a..19711ca58c6f0ff97245020f7622bb85280bbe5a 100644 --- a/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -16,6 +16,7 @@ #include "SymbolizableObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/PDB/PDB.h" @@ -24,7 +25,6 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOUniversal.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" @@ -39,6 +39,8 @@ #if defined(_MSC_VER) #include + +// This must be included after windows.h. #include #pragma comment(lib, "dbghelp.lib") @@ -461,8 +463,9 @@ extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, size_t *length, int *status); #endif -std::string LLVMSymbolizer::DemangleName(const std::string &Name, - const SymbolizableModule *ModInfo) { +std::string +LLVMSymbolizer::DemangleName(const std::string &Name, + const SymbolizableModule *DbiModuleDescriptor) { #if !defined(_MSC_VER) // We can spoil names of symbols with C linkage, so use an heuristic // approach to check if the name should be demangled. @@ -490,7 +493,7 @@ std::string LLVMSymbolizer::DemangleName(const std::string &Name, return (result == 0) ? Name : std::string(DemangledName); } #endif - if (ModInfo && ModInfo->isWin32Module()) + if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) return std::string(demanglePE32ExternCFunc(Name)); return Name; } diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp index 49dbe74d25df8527f238d7d8ea6fef8f4c78c4a2..34f4017d98283dd0d9cdea53daba73730d77d708 100644 --- a/lib/Demangle/ItaniumDemangle.cpp +++ b/lib/Demangle/ItaniumDemangle.cpp @@ -1947,7 +1947,7 @@ static const char *parse_type(const char *first, const char *last, C &db) { break; } } - // drop through + // falls through default: // must check for builtin-types before class-enum-types to avoid // ambiguities with operator-names @@ -2525,6 +2525,9 @@ static std::string base_name(std::string &s) { ++p0; break; } + if (!isalpha(*p0) && !isdigit(*p0) && *p0 != '_') { + return std::string(); + } } return std::string(p0, pe); } @@ -2612,39 +2615,45 @@ static const char *parse_unnamed_type_name(const char *first, const char *last, first = t0 + 1; } break; case 'l': { + size_t lambda_pos = db.names.size(); db.names.push_back(std::string("'lambda'(")); const char *t0 = first + 2; if (first[2] == 'v') { db.names.back().first += ')'; ++t0; } else { - const char *t1 = parse_type(t0, last, db); - if (t1 == t0) { - if (!db.names.empty()) - db.names.pop_back(); - return first; - } - if (db.names.size() < 2) - return first; - auto tmp = db.names.back().move_full(); - db.names.pop_back(); - db.names.back().first.append(tmp); - t0 = t1; + bool is_first_it = true; while (true) { - t1 = parse_type(t0, last, db); + long k0 = static_cast(db.names.size()); + const char *t1 = parse_type(t0, last, db); + long k1 = static_cast(db.names.size()); if (t1 == t0) break; - if (db.names.size() < 2) + if (k0 >= k1) return first; - tmp = db.names.back().move_full(); - db.names.pop_back(); - if (!tmp.empty()) { - db.names.back().first.append(", "); - db.names.back().first.append(tmp); - } + // If the call to parse_type above found a pack expansion + // substitution, then multiple names could have been + // inserted into the name table. Walk through the names, + // appending each onto the lambda's parameter list. + std::for_each(db.names.begin() + k0, db.names.begin() + k1, + [&](typename C::sub_type::value_type &pair) { + if (pair.empty()) + return; + auto &lambda = db.names[lambda_pos].first; + if (!is_first_it) + lambda.append(", "); + is_first_it = false; + lambda.append(pair.move_full()); + }); + db.names.erase(db.names.begin() + k0, db.names.end()); t0 = t1; } - if (db.names.empty()) + if (is_first_it) { + if (!db.names.empty()) + db.names.pop_back(); + return first; + } + if (db.names.empty() || db.names.size() - 1 != lambda_pos) return first; db.names.back().first.append(")"); } @@ -4030,6 +4039,8 @@ static const char *parse_encoding(const char *first, const char *last, C &db) { save_value sb(db.tag_templates); if (db.encoding_depth > 1) db.tag_templates = true; + save_value sp(db.parsed_ctor_dtor_cv); + db.parsed_ctor_dtor_cv = false; switch (*first) { case 'G': case 'T': @@ -4229,6 +4240,7 @@ template struct string_pair { template string_pair(const char (&s)[N]) : first(s, N - 1) {} size_t size() const { return first.size() + second.size(); } + bool empty() const { return first.empty() && second.empty(); } StrT full() const { return first + second; } StrT move_full() { return std::move(first) + std::move(second); } }; diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 0051c69efb7d785721a75842df50afc4572cb491..a7b1fe206f1089120f333973a49741f885fa5125 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "IntelJITEventsWrapper.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/config.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/ExecutionEngine/JITEventListener.h" diff --git a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c index e9668892c05bf2d0416b5220c8b9e1ce78a3fc88..f2d36a76a3154f9be3fd329d65843afb29fc364a 100644 --- a/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c +++ b/lib/ExecutionEngine/IntelJITEvents/jitprofiling.c @@ -22,8 +22,8 @@ #include #pragma optimize("", off) #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#include #include +#include #include #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #include diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index e29e9fc2c702e1e6985b711b4f5631cbd0619772..96844439e7219feccafb5254fd60a63862f3ac06 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1565,7 +1565,7 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy, Tmp = Tmp.zext(SrcBitSize); Tmp = TempSrc.AggregateVal[SrcElt++].IntVal; Tmp = Tmp.zext(DstBitSize); - Tmp = Tmp.shl(ShiftAmt); + Tmp <<= ShiftAmt; ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; Elt.IntVal |= Tmp; } @@ -1580,7 +1580,7 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy, GenericValue Elt; Elt.IntVal = Elt.IntVal.zext(SrcBitSize); Elt.IntVal = TempSrc.AggregateVal[i].IntVal; - Elt.IntVal = Elt.IntVal.lshr(ShiftAmt); + Elt.IntVal.lshrInPlace(ShiftAmt); // it could be DstBitSize == SrcBitSize, so check it if (DstBitSize < SrcBitSize) Elt.IntVal = Elt.IntVal.trunc(DstBitSize); diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index ee75bee9c533cd525989e504eb0711f9415f98f8..64dca930722e61ee7eadb2600b3eafad04d7ed05 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -22,7 +22,7 @@ #include "Interpreter.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Config/config.h" // Detect libffi +#include "llvm/Config/config.h" // Detect libffi #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -33,8 +33,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/UniqueLock.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 57b5d85bb55006da81d45eb0488fa48f5198581d..3581d6458395712fec5f74f0350da169b309d557 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt index 685e882e4a8343e8ab8e3d3e064a89a7dfe0a4aa..f83e002c758f452baa7b177d28807cfbe7409e18 100644 --- a/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMOrcJIT OrcCBindings.cpp OrcError.cpp OrcMCJITReplacement.cpp + RPCUtils.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 711b887da6ef1062259f5688d1350786404047c3..e3a456849f90351120905f916a442ddb4d731186 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" -#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/IRBuilder.h" diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp index dcbbf5f2ae7000f64bc2a39c0cb865036ae654d6..9e70c4ac1dbffcdb6d3d4d9af4024f5eda6ebd9d 100644 --- a/lib/ExecutionEngine/Orc/OrcError.cpp +++ b/lib/ExecutionEngine/Orc/OrcError.cpp @@ -39,14 +39,19 @@ public: return "Remote indirect stubs owner does not exist"; case OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse: return "Remote indirect stubs owner Id already in use"; + case OrcErrorCode::RPCConnectionClosed: + return "RPC connection closed"; + case OrcErrorCode::RPCCouldNotNegotiateFunction: + return "Could not negotiate RPC function"; case OrcErrorCode::RPCResponseAbandoned: return "RPC response abandoned"; case OrcErrorCode::UnexpectedRPCCall: return "Unexpected RPC call"; case OrcErrorCode::UnexpectedRPCResponse: return "Unexpected RPC response"; - case OrcErrorCode::UnknownRPCFunction: - return "Unknown RPC function"; + case OrcErrorCode::UnknownErrorCodeFromRemote: + return "Unknown error returned from remote RPC function " + "(Use StringError to get error message)"; } llvm_unreachable("Unhandled error code"); } @@ -58,27 +63,10 @@ static ManagedStatic OrcErrCat; namespace llvm { namespace orc { -char RPCFunctionNotSupported::ID = 0; - std::error_code orcError(OrcErrorCode ErrCode) { typedef std::underlying_type::type UT; return std::error_code(static_cast(ErrCode), *OrcErrCat); } -RPCFunctionNotSupported::RPCFunctionNotSupported(std::string RPCFunctionSignature) - : RPCFunctionSignature(std::move(RPCFunctionSignature)) {} - -std::error_code RPCFunctionNotSupported::convertToErrorCode() const { - return orcError(OrcErrorCode::UnknownRPCFunction); -} - -void RPCFunctionNotSupported::log(raw_ostream &OS) const { - OS << "Could not negotiate RPC function '" << RPCFunctionSignature << "'"; -} - -const std::string &RPCFunctionNotSupported::getFunctionSignature() const { - return RPCFunctionSignature; -} - } } diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index a5100a56bcf1cb0bd3f7990d0b4831bc939eebe1..7dd6b17d33cb4dbc10d974e431ba7472dc9c9d18 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -20,11 +20,11 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/ExecutionEngine/Orc/CompileUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h" #include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" #include "llvm/Object/Archive.h" @@ -34,10 +34,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include #include #include #include -#include #include #include #include @@ -94,9 +94,8 @@ class OrcMCJITReplacement : public ExecutionEngine { return ClientMM->registerEHFrames(Addr, LoadAddr, Size); } - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override { - return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size); + void deregisterEHFrames() override { + return ClientMM->deregisterEHFrames(); } void notifyObjectLoaded(RuntimeDyld &RTDyld, diff --git a/lib/ExecutionEngine/Orc/RPCUtils.cpp b/lib/ExecutionEngine/Orc/RPCUtils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2a7ab5ca81807df318704b5f482c25f6522e89e2 --- /dev/null +++ b/lib/ExecutionEngine/Orc/RPCUtils.cpp @@ -0,0 +1,55 @@ +//===--------------- RPCUtils.cpp - RPCUtils implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// RPCUtils implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/Orc/RPCUtils.h" + +char llvm::orc::rpc::RPCFatalError::ID = 0; +char llvm::orc::rpc::ConnectionClosed::ID = 0; +char llvm::orc::rpc::ResponseAbandoned::ID = 0; +char llvm::orc::rpc::CouldNotNegotiate::ID = 0; + +namespace llvm { +namespace orc { +namespace rpc { + +std::error_code ConnectionClosed::convertToErrorCode() const { + return orcError(OrcErrorCode::RPCConnectionClosed); +} + +void ConnectionClosed::log(raw_ostream &OS) const { + OS << "RPC connection already closed"; +} + +std::error_code ResponseAbandoned::convertToErrorCode() const { + return orcError(OrcErrorCode::RPCResponseAbandoned); +} + +void ResponseAbandoned::log(raw_ostream &OS) const { + OS << "RPC response abandoned"; +} + +CouldNotNegotiate::CouldNotNegotiate(std::string Signature) + : Signature(std::move(Signature)) {} + +std::error_code CouldNotNegotiate::convertToErrorCode() const { + return orcError(OrcErrorCode::RPCCouldNotNegotiateFunction); +} + +void CouldNotNegotiate::log(raw_ostream &OS) const { + OS << "Could not negotiate RPC function " << Signature; +} + + +} // end namespace rpc +} // end namespace orc +} // end namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp index de73fbde8eb7b97cb1120e40e0d84f030ae0d8b8..99e84b7496d4e1d65dcbf4c9d8a45b73dd910d80 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp @@ -134,6 +134,18 @@ void RTDyldMemoryManager::deregisterEHFramesInProcess(uint8_t *Addr, #endif +void RTDyldMemoryManager::registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) { + registerEHFramesInProcess(Addr, Size); + EHFrames.push_back({Addr, Size}); +} + +void RTDyldMemoryManager::deregisterEHFrames() { + for (auto &Frame : EHFrames) + deregisterEHFramesInProcess(Frame.Addr, Frame.Size); + EHFrames.clear(); +} + static int jit_noop() { return 0; } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index df9d2ceba3292ced81ad350133a7d84ca79674ea..2b69f1a0269fdeff05f19f936b26ac3f60d47889 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "RuntimeDyldCheckerImpl.h" #include "RuntimeDyldCOFF.h" +#include "RuntimeDyldCheckerImpl.h" #include "RuntimeDyldELF.h" #include "RuntimeDyldImpl.h" #include "RuntimeDyldMachO.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MutexGuard.h" @@ -73,7 +73,9 @@ namespace llvm { void RuntimeDyldImpl::registerEHFrames() {} -void RuntimeDyldImpl::deregisterEHFrames() {} +void RuntimeDyldImpl::deregisterEHFrames() { + MemMgr.deregisterEHFrames(); +} #ifndef NDEBUG static void dumpSectionMemory(const SectionEntry &S, StringRef State) { @@ -703,7 +705,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; unsigned PaddingSize = 0; unsigned StubBufSize = 0; - bool IsRequired = isRequiredForExecution(Section) || ProcessAllSections; + bool IsRequired = isRequiredForExecution(Section); bool IsVirtual = Section.isVirtual(); bool IsZeroInit = isZeroInit(Section); bool IsReadOnly = isReadOnlyData(Section); @@ -743,8 +745,8 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, Alignment = std::max(Alignment, getStubAlignment()); // Some sections, such as debug info, don't need to be loaded for execution. - // Leave those where they are. - if (IsRequired) { + // Process those only if explicitly requested. + if (IsRequired || ProcessAllSections) { Allocate = DataSize + PaddingSize + StubBufSize; if (!Allocate) Allocate = 1; @@ -788,6 +790,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj, Sections.push_back( SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData)); + // Debug info sections are linked as if their load address was zero + if (!IsRequired) + Sections.back().setLoadAddress(0); + if (Checker) Checker->registerSection(Obj.getFileName(), SectionID); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index 7bfa79445584902bd2eaee6491c2f7c9e732f6a4..e45fdc7aee18ad69b46c6bdf285554663bf5233c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -861,6 +861,15 @@ RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const { SymInfo.getOffset()); } +Optional +RuntimeDyldCheckerImpl::getSectionLoadAddress(void *LocalAddress) const { + for (auto &S : getRTDyld().Sections) { + if (S.getAddress() == LocalAddress) + return S.getLoadAddress(); + } + return Optional(); +} + void RuntimeDyldCheckerImpl::registerSection( StringRef FilePath, unsigned SectionID) { StringRef FileName = sys::path::filename(FilePath); @@ -935,3 +944,8 @@ RuntimeDyldChecker::getSectionAddr(StringRef FileName, StringRef SectionName, bool LocalAddress) { return Impl->getSectionAddr(FileName, SectionName, LocalAddress); } + +Optional +RuntimeDyldChecker::getSectionLoadAddress(void *LocalAddress) const { + return Impl->getSectionLoadAddress(LocalAddress); +} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h index b7263be09934aaa6f5115d52d0aa15f3fb5bf7d9..b462ef2c00cee6762a93bdec602640528115735e 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCheckerImpl.h @@ -60,6 +60,8 @@ private: bool IsInsideLoad) const; StringRef getSubsectionStartingAt(StringRef Name) const; + Optional getSectionLoadAddress(void *LocalAddr) const; + void registerSection(StringRef FilePath, unsigned SectionID); void registerStubMap(StringRef FilePath, unsigned SectionID, const RuntimeDyldImpl::StubMap &RTDyldStubs); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index f780137d0874f77663b50ba6717d3d9492732690..3d12eadea4dd77f335bd28583d2bca7c530c3062 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -18,10 +18,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TargetRegistry.h" @@ -221,22 +221,10 @@ void RuntimeDyldELF::registerEHFrames() { uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); size_t EHFrameSize = Sections[EHFrameSID].getSize(); MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); - RegisteredEHFrameSections.push_back(EHFrameSID); } UnregisteredEHFrameSections.clear(); } -void RuntimeDyldELF::deregisterEHFrames() { - for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) { - SID EHFrameSID = RegisteredEHFrameSections[i]; - uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress(); - uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress(); - size_t EHFrameSize = Sections[EHFrameSID].getSize(); - MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); - } - RegisteredEHFrameSections.clear(); -} - std::unique_ptr llvm::RuntimeDyldELF::create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr, @@ -749,23 +737,23 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt16BE(LocalAddress, applyPPCha(Delta)); } break; case ELF::R_PPC64_ADDR32: { - int32_t Result = static_cast(Value + Addend); - if (SignExtend32<32>(Result) != Result) + int64_t Result = static_cast(Value + Addend); + if (SignExtend64<32>(Result) != Result) llvm_unreachable("Relocation R_PPC64_ADDR32 overflow"); writeInt32BE(LocalAddress, Result); } break; case ELF::R_PPC64_REL24: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); - int32_t delta = static_cast(Value - FinalAddress + Addend); - if (SignExtend32<26>(delta) != delta) + int64_t delta = static_cast(Value - FinalAddress + Addend); + if (SignExtend64<26>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL24 overflow"); // Generates a 'bl
' instruction writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC)); } break; case ELF::R_PPC64_REL32: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); - int32_t delta = static_cast(Value - FinalAddress + Addend); - if (SignExtend32<32>(delta) != delta) + int64_t delta = static_cast(Value - FinalAddress + Addend); + if (SignExtend64<32>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL32 overflow"); writeInt32BE(LocalAddress, delta); } break; @@ -802,20 +790,63 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, writeInt32BE(LocalAddress, Delta / 2); break; } + case ELF::R_390_PC16: { + int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); + assert(int16_t(Delta) == Delta && "R_390_PC16 overflow"); + writeInt16BE(LocalAddress, Delta); + break; + } case ELF::R_390_PC32: { int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); assert(int32_t(Delta) == Delta && "R_390_PC32 overflow"); writeInt32BE(LocalAddress, Delta); break; } - case ELF::R_390_64: - writeInt64BE(LocalAddress, Value + Addend); - break; case ELF::R_390_PC64: { int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset); writeInt64BE(LocalAddress, Delta); break; } + case ELF::R_390_8: + *LocalAddress = (uint8_t)(Value + Addend); + break; + case ELF::R_390_16: + writeInt16BE(LocalAddress, Value + Addend); + break; + case ELF::R_390_32: + writeInt32BE(LocalAddress, Value + Addend); + break; + case ELF::R_390_64: + writeInt64BE(LocalAddress, Value + Addend); + break; + } +} + +void RuntimeDyldELF::resolveBPFRelocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend) { + bool isBE = Arch == Triple::bpfeb; + + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; + case ELF::R_BPF_NONE: + break; + case ELF::R_BPF_64_64: { + write(isBE, Section.getAddressWithOffset(Offset), Value + Addend); + DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at " + << format("%p\n", Section.getAddressWithOffset(Offset))); + break; + } + case ELF::R_BPF_64_32: { + Value += Addend; + assert(Value <= UINT32_MAX); + write(isBE, Section.getAddressWithOffset(Offset), static_cast(Value)); + DEBUG(dbgs() << "Writing " << format("%p", Value) << " at " + << format("%p\n", Section.getAddressWithOffset(Offset))); + break; + } } } @@ -879,6 +910,10 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, case Triple::systemz: resolveSystemZRelocation(Section, Offset, Value, Type, Addend); break; + case Triple::bpfel: + case Triple::bpfeb: + resolveBPFRelocation(Section, Offset, Value, Type, Addend); + break; default: llvm_unreachable("Unsupported CPU type!"); } @@ -1289,12 +1324,13 @@ RuntimeDyldELF::processRelocationRef( Obj.getPlatformFlags(AbiVariant); AbiVariant &= ELF::EF_PPC64_ABI; // A PPC branch relocation will need a stub function if the target is - // an external symbol (Symbol::ST_Unknown) or if the target address - // is not within the signed 24-bits branch address. + // an external symbol (either Value.SymbolName is set, or SymType is + // Symbol::ST_Unknown) or if the target address is not within the + // signed 24-bits branch address. SectionEntry &Section = Sections[SectionID]; uint8_t *Target = Section.getAddressWithOffset(Offset); bool RangeOverflow = false; - if (SymType != SymbolRef::ST_Unknown) { + if (!Value.SymbolName && SymType != SymbolRef::ST_Unknown) { if (AbiVariant != 2) { // In the ELFv1 ABI, a function call may point to the .opd entry, // so the final symbol value is calculated based on the relocation @@ -1309,21 +1345,19 @@ RuntimeDyldELF::processRelocationRef( } uint8_t *RelocTarget = Sections[Value.SectionID].getAddressWithOffset(Value.Addend); - int32_t delta = static_cast(Target - RelocTarget); + int64_t delta = static_cast(Target - RelocTarget); // If it is within 26-bits branch range, just set the branch target - if (SignExtend32<26>(delta) == delta) { + if (SignExtend64<26>(delta) == delta) { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); - if (Value.SymbolName) - addRelocationForSymbol(RE, Value.SymbolName); - else - addRelocationForSection(RE, Value.SectionID); + addRelocationForSection(RE, Value.SectionID); } else { RangeOverflow = true; } } - if (SymType == SymbolRef::ST_Unknown || RangeOverflow) { - // It is an external symbol (SymbolRef::ST_Unknown) or within a range - // larger than 24-bits. + if (Value.SymbolName || SymType == SymbolRef::ST_Unknown || + RangeOverflow) { + // It is an external symbol (either Value.SymbolName is set, or + // SymType is SymbolRef::ST_Unknown) or out of range. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { // Symbol function stub already created, just relocate to it @@ -1377,7 +1411,7 @@ RuntimeDyldELF::processRelocationRef( RelType, 0); Section.advanceStubOffset(getMaxStubSize()); } - if (SymType == SymbolRef::ST_Unknown) { + if (Value.SymbolName || SymType == SymbolRef::ST_Unknown) { // Restore the TOC for external calls if (AbiVariant == 2) writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 498979705b775c71e89dd232e585581b4d9356a4..fb5da6dd8bbb7ef8d7a9eb98be1f8663a1ade4bc 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -58,6 +58,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); + void resolveBPFRelocation(const SectionEntry &Section, uint64_t Offset, + uint64_t Value, uint32_t Type, int64_t Addend); + unsigned getMaxStubSize() override { if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) return 20; // movz; movk; movk; movk; br @@ -149,7 +152,6 @@ private: // in a table until we receive a request to register all unregistered // EH frame sections with the memory manager. SmallVector UnregisteredEHFrameSections; - SmallVector RegisteredEHFrameSections; // Map between GOT relocation value and corresponding GOT offset std::map GOTOffsetMap; @@ -177,7 +179,6 @@ public: StubMap &Stubs) override; bool isCompatibleFile(const object::ObjectFile &Obj) const override; void registerEHFrames() override; - void deregisterEHFrames() override; Error finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) override; }; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index f5cc883d98fdf189329f8a2da4662e219701f9cd..5268bc5a186842de059521c1c83b7cdac6d7e77f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -28,8 +28,8 @@ #include "llvm/Support/Mutex.h" #include "llvm/Support/SwapByteOrder.h" #include -#include #include +#include using namespace llvm; using namespace llvm::object; @@ -515,7 +515,7 @@ public: virtual void registerEHFrames(); - virtual void deregisterEHFrames(); + void deregisterEHFrames(); virtual Error finalizeLoad(const ObjectFile &ObjImg, ObjSectionToIDMap &SectionMap) { diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h index 0398413e1532c5faf44950267a10c61a9ce9408e..901f77865ba1854c45fdb04123c76e7c3f564011 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" @@ -217,7 +217,6 @@ public: } void registerEHFrames() override {} - void deregisterEHFrames() override {} }; } diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h index 8c6af0bd9c6dc54778214e632fa46c6c93b8eede..3e4b0c8f75bb44f924a7f28ddde5e4e5df89d905 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" @@ -316,7 +316,6 @@ public: } void registerEHFrames() override {} - void deregisterEHFrames() override {} }; } diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h index 109beb36f1eecf8dab2c4e3d1a585bbe3c11173d..7cbb438541519b2179ea0b3f589f0cae8aa3453a 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H #define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFF86_64_H -#include "llvm/Object/COFF.h" -#include "llvm/Support/COFF.h" #include "../RuntimeDyldCOFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/COFF.h" #define DEBUG_TYPE "dyld" @@ -194,9 +194,6 @@ public: } UnregisteredEHFrameSections.clear(); } - void deregisterEHFrames() override { - // Stub - } Error finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) override { // Look for and record the EH frame section IDs. diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp index cae4d69789a2b5c20344e61ba80731bdb50c7726..926996d6f7b3c40916b3bf0af8bbacb758618b83 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "RuntimeDyldELFMips.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" #define DEBUG_TYPE "dyld" diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp index 50478eac6827c14c85e6abe349cd0a024c763327..8904475f084f13033bbaa678f7bdcca78b53980c 100644 --- a/lib/ExecutionEngine/SectionMemoryManager.cpp +++ b/lib/ExecutionEngine/SectionMemoryManager.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/Config/config.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Process.h" diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt index 59cef04cdece1eb653bb9d446aa9c227fcd478b7..b886021aee3fd71bcf1208c5cb36f906f6406d86 100644 --- a/lib/Fuzzer/CMakeLists.txt +++ b/lib/Fuzzer/CMakeLists.txt @@ -1,6 +1,18 @@ -set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") -# Disable the coverage and sanitizer instrumentation for the fuzzer itself. -set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") +include(CheckCXXSourceCompiles) + +if( APPLE ) + CHECK_CXX_SOURCE_COMPILES(" + static thread_local int blah; + int main() { + return 0; + } + " HAS_THREAD_LOCAL) + + if( NOT HAS_THREAD_LOCAL ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Dthread_local=__thread") + endif() +endif() + if( LLVM_USE_SANITIZE_COVERAGE ) if(NOT "${LLVM_USE_SANITIZER}" STREQUAL "Address") message(FATAL_ERROR @@ -8,41 +20,50 @@ if( LLVM_USE_SANITIZE_COVERAGE ) "LLVM_USE_SANITIZE_COVERAGE=YES to be set." ) endif() + set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") + + # Disable the coverage and sanitizer instrumentation for the fuzzer itself. + set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") +endif() + +# Compile libFuzzer if the compilation is specifically requested, OR +# if the platform is known to be working. +if ( LLVM_USE_SANITIZE_COVERAGE OR CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" ) add_library(LLVMFuzzerNoMainObjects OBJECT - FuzzerCrossOver.cpp - FuzzerDriver.cpp - FuzzerExtFunctionsDlsym.cpp - FuzzerExtFunctionsDlsymWin.cpp - FuzzerExtFunctionsWeak.cpp - FuzzerExtraCounters.cpp - FuzzerIO.cpp - FuzzerIOPosix.cpp - FuzzerIOWindows.cpp - FuzzerLoop.cpp - FuzzerMerge.cpp - FuzzerMutate.cpp - FuzzerSHA1.cpp - FuzzerShmemPosix.cpp - FuzzerShmemWindows.cpp - FuzzerTracePC.cpp - FuzzerTraceState.cpp - FuzzerUtil.cpp - FuzzerUtilDarwin.cpp - FuzzerUtilLinux.cpp - FuzzerUtilPosix.cpp - FuzzerUtilWindows.cpp - ) + FuzzerCrossOver.cpp + FuzzerDriver.cpp + FuzzerExtFunctionsDlsym.cpp + FuzzerExtFunctionsDlsymWin.cpp + FuzzerExtFunctionsWeak.cpp + FuzzerExtraCounters.cpp + FuzzerIO.cpp + FuzzerIOPosix.cpp + FuzzerIOWindows.cpp + FuzzerLoop.cpp + FuzzerMerge.cpp + FuzzerMutate.cpp + FuzzerSHA1.cpp + FuzzerShmemPosix.cpp + FuzzerShmemWindows.cpp + FuzzerTracePC.cpp + FuzzerTraceState.cpp + FuzzerUtil.cpp + FuzzerUtilDarwin.cpp + FuzzerUtilLinux.cpp + FuzzerUtilPosix.cpp + FuzzerUtilWindows.cpp + ) add_library(LLVMFuzzerNoMain STATIC - $ - ) + $ + ) target_link_libraries(LLVMFuzzerNoMain ${LLVM_PTHREAD_LIB}) add_library(LLVMFuzzer STATIC - FuzzerMain.cpp - $ - ) + FuzzerMain.cpp + $ + ) target_link_libraries(LLVMFuzzer ${LLVM_PTHREAD_LIB}) +endif() - if( LLVM_INCLUDE_TESTS ) - add_subdirectory(test) - endif() +if( LLVM_USE_SANITIZE_COVERAGE AND LLVM_INCLUDE_TESTS ) + add_subdirectory(test) endif() diff --git a/lib/Fuzzer/FuzzerDefs.h b/lib/Fuzzer/FuzzerDefs.h index bd182750800257e33a308d08cb2a44223dc277f5..27f5719236dd87509ccc58ba89e156590afb2d87 100644 --- a/lib/Fuzzer/FuzzerDefs.h +++ b/lib/Fuzzer/FuzzerDefs.h @@ -36,17 +36,29 @@ #error "Support for your platform has not been implemented" #endif +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + #define LIBFUZZER_POSIX LIBFUZZER_APPLE || LIBFUZZER_LINUX #ifdef __x86_64 -#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) +# if __has_attribute(target) +# define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) +# else +# define ATTRIBUTE_TARGET_POPCNT +# endif #else -#define ATTRIBUTE_TARGET_POPCNT +# define ATTRIBUTE_TARGET_POPCNT #endif #ifdef __clang__ // avoid gcc warning. -# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# if __has_attribute(no_sanitize) +# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# else +# define ATTRIBUTE_NO_SANITIZE_MEMORY +# endif # define ALWAYS_INLINE __attribute__((always_inline)) #else # define ATTRIBUTE_NO_SANITIZE_MEMORY diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 0fb83ca64de618e1a52dd5820ab775b9b08bf61a..0453a7f443b53cc0884e1215fcf5500e53822ff4 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -10,9 +10,9 @@ //===----------------------------------------------------------------------===// #include "FuzzerCorpus.h" +#include "FuzzerIO.h" #include "FuzzerInterface.h" #include "FuzzerInternal.h" -#include "FuzzerIO.h" #include "FuzzerMutate.h" #include "FuzzerRandom.h" #include "FuzzerShmem.h" @@ -149,7 +149,7 @@ static bool ParseOneFlag(const char *Param) { int Val = MyStol(Str); *FlagDescriptions[F].IntFlag = Val; if (Flags.verbosity >= 2) - Printf("Flag: %s %d\n", Name, Val);; + Printf("Flag: %s %d\n", Name, Val); return true; } else if (FlagDescriptions[F].UIntFlag) { unsigned int Val = std::stoul(Str); @@ -289,6 +289,66 @@ static std::string GetDedupTokenFromFile(const std::string &Path) { return S.substr(Beg, End - Beg); } +int CleanseCrashInput(const std::vector &Args, + const FuzzingOptions &Options) { + if (Inputs->size() != 1 || !Flags.exact_artifact_path) { + Printf("ERROR: -cleanse_crash should be given one input file and" + " -exact_artifact_path\n"); + exit(1); + } + std::string InputFilePath = Inputs->at(0); + std::string OutputFilePath = Flags.exact_artifact_path; + std::string BaseCmd = + CloneArgsWithoutX(Args, "cleanse_crash", "cleanse_crash"); + + auto InputPos = BaseCmd.find(" " + InputFilePath + " "); + assert(InputPos != std::string::npos); + BaseCmd.erase(InputPos, InputFilePath.size() + 1); + + auto LogFilePath = DirPlusFile( + TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); + auto TmpFilePath = DirPlusFile( + TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".repro"); + auto LogFileRedirect = " > " + LogFilePath + " 2>&1 "; + + auto Cmd = BaseCmd + " " + TmpFilePath + LogFileRedirect; + + std::string CurrentFilePath = InputFilePath; + auto U = FileToVector(CurrentFilePath); + size_t Size = U.size(); + + const std::vector ReplacementBytes = {' ', 0xff}; + for (int NumAttempts = 0; NumAttempts < 5; NumAttempts++) { + bool Changed = false; + for (size_t Idx = 0; Idx < Size; Idx++) { + Printf("CLEANSE[%d]: Trying to replace byte %zd of %zd\n", NumAttempts, + Idx, Size); + uint8_t OriginalByte = U[Idx]; + if (ReplacementBytes.end() != std::find(ReplacementBytes.begin(), + ReplacementBytes.end(), + OriginalByte)) + continue; + for (auto NewByte : ReplacementBytes) { + U[Idx] = NewByte; + WriteToFile(U, TmpFilePath); + auto ExitCode = ExecuteCommand(Cmd); + RemoveFile(TmpFilePath); + if (!ExitCode) { + U[Idx] = OriginalByte; + } else { + Changed = true; + Printf("CLEANSE: Replaced byte %zd with 0x%x\n", Idx, NewByte); + WriteToFile(U, OutputFilePath); + break; + } + } + } + if (!Changed) break; + } + RemoveFile(LogFilePath); + return 0; +} + int MinimizeCrashInput(const std::vector &Args, const FuzzingOptions &Options) { if (Inputs->size() != 1) { @@ -493,12 +553,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); const size_t kMaxSaneLen = 1 << 20; - const size_t kMinDefaultLen = 64; + const size_t kMinDefaultLen = 4096; FuzzingOptions Options; Options.Verbosity = Flags.verbosity; Options.MaxLen = Flags.max_len; Options.ExperimentalLenControl = Flags.experimental_len_control; - if (Flags.experimental_len_control && Flags.max_len == 64) + if (Flags.experimental_len_control && Flags.max_len == kMinDefaultLen) Options.MaxLen = 1 << 20; Options.UnitTimeoutSec = Flags.timeout; Options.ErrorExitCode = Flags.error_exitcode; @@ -516,7 +576,6 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.PreferSmall = Flags.prefer_small; Options.ReloadIntervalSec = Flags.reload; Options.OnlyASCII = Flags.only_ascii; - Options.OutputCSV = Flags.output_csv; Options.DetectLeaks = Flags.detect_leaks; Options.TraceMalloc = Flags.trace_malloc; Options.RssLimitMb = Flags.rss_limit_mb; @@ -583,6 +642,9 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { if (Flags.minimize_crash_internal_step) return MinimizeCrashInputInternalStep(F, Corpus); + if (Flags.cleanse_crash) + return CleanseCrashInput(Args, Options); + if (auto Name = Flags.run_equivalence_server) { SMR.Destroy(Name); if (!SMR.Create(Name)) { @@ -594,7 +656,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { SMR.WaitClient(); size_t Size = SMR.ReadByteArraySize(); SMR.WriteByteArray(nullptr, 0); - F->RunOne(SMR.GetByteArray(), Size); + const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size); + F->RunOne(tmp.data(), tmp.size()); SMR.PostServer(); } return 0; diff --git a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp index 77521698c80a4decfcb2dc8e5c2e841ee2a8dfbe..321b3ec5d41405d245232f64d28c1ec7d8d5c761 100644 --- a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp +++ b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp @@ -14,6 +14,8 @@ #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" #include "Windows.h" + +// This must be included after Windows.h. #include "Psapi.h" namespace fuzzer { diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 28bf0ca8ce691fbc39d45e4a6b88480731616355..7ff196c8fa9608a77699949a7c278cf782c2d114 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -48,7 +48,15 @@ FUZZER_FLAG_STRING(load_coverage_summary, "Experimental:" " Used with -merge=1") FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided" " crash input. Use with -runs=N or -max_total_time=N to limit " - "the number attempts") + "the number attempts." + " Use with -exact_artifact_path to specify the output." + " Combine with ASAN_OPTIONS=dedup_token_length=3 (or similar) to ensure that" + " the minimized input triggers the same crash." + ) +FUZZER_FLAG_INT(cleanse_crash, 0, "If 1, tries to cleanse the provided" + " crash input to make it contain fewer original bytes." + " Use with -exact_artifact_path to specify the output." + ) FUZZER_FLAG_INT(minimize_crash_internal_step, 0, "internal flag") FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters") @@ -80,15 +88,14 @@ FUZZER_FLAG_STRING(exact_artifact_path, "as $(exact_artifact_path). This overrides -artifact_prefix " "and will not use checksum in the file name. Do not " "use the same path for several parallel processes.") -FUZZER_FLAG_INT(output_csv, 0, "Enable pulse output in CSV format.") FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.") FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.") FUZZER_FLAG_INT(print_corpus_stats, 0, "If 1, print statistics on corpus elements at exit.") -FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information at exit." - " Experimental, only with trace-pc-guard") -FUZZER_FLAG_INT(dump_coverage, 0, "If 1, dump coverage information at exit." - " Experimental, only with trace-pc-guard") +FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information as text" + " at exit.") +FUZZER_FLAG_INT(dump_coverage, 0, "If 1, dump coverage information as a" + " .sancov file at exit.") FUZZER_FLAG_INT(handle_segv, 1, "If 1, try to intercept SIGSEGV.") FUZZER_FLAG_INT(handle_bus, 1, "If 1, try to intercept SIGBUS.") FUZZER_FLAG_INT(handle_abrt, 1, "If 1, try to intercept SIGABRT.") @@ -124,3 +131,4 @@ FUZZER_DEPRECATED_FLAG(sync_timeout) FUZZER_DEPRECATED_FLAG(test_single_input) FUZZER_DEPRECATED_FLAG(drill) FUZZER_DEPRECATED_FLAG(truncate_units) +FUZZER_DEPRECATED_FLAG(output_csv) diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index c26615631ecd013856d574f7516e6f66c17d9012..5f184c2316e2ac47c3da8bf2665200a510354f7b 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -91,6 +91,7 @@ public: private: void AlarmCallback(); void CrashCallback(); + void CrashOnOverwrittenData(); void InterruptCallback(); void MutateAndTestOne(); void ReportNewCoverage(InputInfo *II, const Unit &U); @@ -145,6 +146,6 @@ private: static thread_local bool IsMyThread; }; -}; // namespace fuzzer +} // namespace fuzzer #endif // LLVM_FUZZER_INTERNAL_H diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 704092896eb6691a96c4711ecc5c33a6a2e40d7e..fbf18357ede65de76c03dcdac077334d05b36c70 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -10,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "FuzzerCorpus.h" -#include "FuzzerInternal.h" #include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerMutate.h" #include "FuzzerRandom.h" #include "FuzzerShmem.h" @@ -199,7 +199,7 @@ void Fuzzer::CrashCallback() { Printf("SUMMARY: libFuzzer: deadly signal\n"); DumpCurrentUnit("crash-"); PrintFinalStats(); - exit(Options.ErrorExitCode); + _Exit(Options.ErrorExitCode); // Stop right now. } void Fuzzer::InterruptCallback() { @@ -253,17 +253,6 @@ void Fuzzer::RssLimitCallback() { void Fuzzer::PrintStats(const char *Where, const char *End, size_t Units) { size_t ExecPerSec = execPerSec(); - if (Options.OutputCSV) { - static bool csvHeaderPrinted = false; - if (!csvHeaderPrinted) { - csvHeaderPrinted = true; - Printf("runs,block_cov,bits,cc_cov,corpus,execs_per_sec,tbms,reason\n"); - } - Printf("%zd,%zd,%zd,%zd,%s\n", TotalNumberOfRuns, - TPC.GetTotalPCCoverage(), - Corpus.size(), ExecPerSec, Where); - } - if (!Options.Verbosity) return; Printf("#%zd\t%s", TotalNumberOfRuns, Where); @@ -312,7 +301,9 @@ void Fuzzer::SetMaxInputLen(size_t MaxInputLen) { this->MaxInputLen = MaxInputLen; this->MaxMutationLen = MaxInputLen; AllocateCurrentUnitData(); - Printf("INFO: -max_len is not provided, using %zd\n", MaxInputLen); + Printf("INFO: -max_len is not provided; " + "libFuzzer will not generate inputs larger than %zd bytes\n", + MaxInputLen); } void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) { @@ -433,6 +424,24 @@ size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { return CurrentUnitSize; } +void Fuzzer::CrashOnOverwrittenData() { + Printf("==%d== ERROR: libFuzzer: fuzz target overwrites it's const input\n", + GetPid()); + DumpCurrentUnit("crash-"); + Printf("SUMMARY: libFuzzer: out-of-memory\n"); + _Exit(Options.ErrorExitCode); // Stop right now. +} + +// Compare two arrays, but not all bytes if the arrays are large. +static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { + const size_t Limit = 64; + if (Size <= 64) + return !memcmp(A, B, Size); + // Compare first and last Limit/2 bytes. + return !memcmp(A, B, Limit / 2) && + !memcmp(A + Size - Limit / 2, B + Size - Limit / 2, Limit / 2); +} + void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { assert(InFuzzingThread()); if (SMR.IsClient()) @@ -454,6 +463,8 @@ void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { (void)Res; assert(Res == 0); HasMoreMallocsThanFrees = AllocTracer.Stop(); + if (!LooseMemeq(DataCopy, Data, Size)) + CrashOnOverwrittenData(); CurrentUnitSize = 0; delete[] DataCopy; } diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp index e66460c29e2f8f69247d774d3c1e934b6a28dd44..612f4bbb28f21e6c5b76efa9945e9f02ec380924 100644 --- a/lib/Fuzzer/FuzzerMerge.cpp +++ b/lib/Fuzzer/FuzzerMerge.cpp @@ -9,9 +9,9 @@ // Merging corpora. //===----------------------------------------------------------------------===// -#include "FuzzerInternal.h" -#include "FuzzerIO.h" #include "FuzzerMerge.h" +#include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerTracePC.h" #include "FuzzerUtil.h" diff --git a/lib/Fuzzer/FuzzerMerge.h b/lib/Fuzzer/FuzzerMerge.h index cf4a0863571d70d29494289bf670edba8a0621d5..dd4c37b6e39c76771c36e7e5fba1cacdd7152c60 100644 --- a/lib/Fuzzer/FuzzerMerge.h +++ b/lib/Fuzzer/FuzzerMerge.h @@ -69,7 +69,7 @@ struct Merger { size_t Merge(const std::set &InitialFeatures, std::vector *NewFiles); size_t Merge(std::vector *NewFiles) { - return Merge({}, NewFiles); + return Merge(std::set{}, NewFiles); } size_t ApproximateMemoryConsumption() const; std::set AllFeatures() const; diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp index cd846c7deec5086ef0ee7fadfca46efa699f655a..53cb9027e455ffeffd26f2915fbc0fd1de1cdbf2 100644 --- a/lib/Fuzzer/FuzzerMutate.cpp +++ b/lib/Fuzzer/FuzzerMutate.cpp @@ -9,11 +9,11 @@ // Mutate a test input. //===----------------------------------------------------------------------===// +#include "FuzzerMutate.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" -#include "FuzzerMutate.h" #include "FuzzerOptions.h" namespace fuzzer { @@ -217,11 +217,12 @@ DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( size_t NumPositions = 0; for (const uint8_t *Cur = Data; Cur < End && NumPositions < kMaxNumPositions; Cur++) { - Cur = (uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); + Cur = + (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); if (!Cur) break; Positions[NumPositions++] = Cur - Data; } - if (!NumPositions) break; + if (!NumPositions) continue; return DictionaryEntry(W, Positions[Rand(NumPositions)]); } DictionaryEntry DE(W); diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h index 872def0326f088ddf763664b3f5ca05c143cf784..b1366789be007b2d82d27af72c4eaa232f27001f 100644 --- a/lib/Fuzzer/FuzzerOptions.h +++ b/lib/Fuzzer/FuzzerOptions.h @@ -45,7 +45,6 @@ struct FuzzingOptions { std::string ExitOnItem; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; - bool OutputCSV = false; bool PrintNewCovPcs = false; bool PrintFinalStats = false; bool PrintCorpusStats = false; diff --git a/lib/Fuzzer/FuzzerShmemPosix.cpp b/lib/Fuzzer/FuzzerShmemPosix.cpp index 2723bdd86f487518ba3fa0439f02e7911f44f477..50cdcfb509dc259281b765de558f9438d5758460 100644 --- a/lib/Fuzzer/FuzzerShmemPosix.cpp +++ b/lib/Fuzzer/FuzzerShmemPosix.cpp @@ -14,14 +14,14 @@ #include "FuzzerIO.h" #include "FuzzerShmem.h" -#include -#include #include #include -#include #include #include #include +#include +#include +#include #include namespace fuzzer { diff --git a/lib/Fuzzer/FuzzerShmemWindows.cpp b/lib/Fuzzer/FuzzerShmemWindows.cpp index 6325b4b8e5b4129bd240f5a5f23ba04ab6e565d6..d330ebf4fd07a98448b94e57490824fd6072cc17 100644 --- a/lib/Fuzzer/FuzzerShmemWindows.cpp +++ b/lib/Fuzzer/FuzzerShmemWindows.cpp @@ -14,10 +14,10 @@ #include "FuzzerIO.h" #include "FuzzerShmem.h" -#include -#include #include #include +#include +#include namespace fuzzer { diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp index ce0f7a47eee64bfdc6a0b6dee2aa741455e16667..6f5c7be4106216714dc024af9ae53c368f1621ce 100644 --- a/lib/Fuzzer/FuzzerTracePC.cpp +++ b/lib/Fuzzer/FuzzerTracePC.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "FuzzerTracePC.h" #include "FuzzerCorpus.h" #include "FuzzerDefs.h" #include "FuzzerDictionary.h" #include "FuzzerExtFunctions.h" #include "FuzzerIO.h" -#include "FuzzerTracePC.h" #include "FuzzerUtil.h" #include "FuzzerValueBitMap.h" #include @@ -53,6 +53,17 @@ size_t TracePC::GetTotalPCCoverage() { return Res; } + +void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) { + if (Start == Stop) return; + if (NumModulesWithInline8bitCounters && + ModuleCounters[NumModulesWithInline8bitCounters-1].Start == Start) return; + assert(NumModulesWithInline8bitCounters < + sizeof(ModuleCounters) / sizeof(ModuleCounters[0])); + ModuleCounters[NumModulesWithInline8bitCounters++] = {Start, Stop}; + NumInline8bitCounters += Stop - Start; +} + void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) { if (Start == Stop || *Start) return; assert(NumModules < sizeof(Modules) / sizeof(Modules[0])); @@ -76,6 +87,13 @@ void TracePC::PrintModuleInfo() { for (size_t i = 0; i < NumModules; i++) Printf("[%p, %p), ", Modules[i].Start, Modules[i].Stop); Printf("\n"); + if (NumModulesWithInline8bitCounters) { + Printf("INFO: Loaded %zd modules with %zd inline 8-bit counters\n", + NumModulesWithInline8bitCounters, NumInline8bitCounters); + for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) + Printf("[%p, %p), ", ModuleCounters[i].Start, ModuleCounters[i].Stop); + Printf("\n"); + } } ATTRIBUTE_NO_SANITIZE_ALL @@ -303,6 +321,11 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) { fuzzer::TPC.HandleInit(Start, Stop); } +ATTRIBUTE_INTERFACE +void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) { + fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop); +} + ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_ALL void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) { diff --git a/lib/Fuzzer/FuzzerTracePC.h b/lib/Fuzzer/FuzzerTracePC.h index 6523fa06005c48ace8f865a857b10f383c607cf5..5ec8c590b4df63fc1a5fc7af4aab7b06d71ea268 100644 --- a/lib/Fuzzer/FuzzerTracePC.h +++ b/lib/Fuzzer/FuzzerTracePC.h @@ -51,7 +51,8 @@ class TracePC { // How many bits of PC are used from __sanitizer_cov_trace_pc. static const size_t kTracePcBits = 18; - void HandleInit(uint32_t *start, uint32_t *stop); + void HandleInit(uint32_t *Start, uint32_t *Stop); + void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop); void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee); template void HandleCmp(uintptr_t PC, T Arg1, T Arg2); size_t GetTotalPCCoverage(); @@ -104,6 +105,10 @@ private: size_t NumModules; // linker-initialized. size_t NumGuards; // linker-initialized. + struct { uint8_t *Start, *Stop; } ModuleCounters[4096]; + size_t NumModulesWithInline8bitCounters; // linker-initialized. + size_t NumInline8bitCounters; + uint8_t *Counters() const; uintptr_t *PCs() const; @@ -118,12 +123,24 @@ void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End, size_t FirstFeature, Callback Handle8bitCounter) { typedef uintptr_t LargeType; const size_t Step = sizeof(LargeType) / sizeof(uint8_t); - assert(!(reinterpret_cast(Begin) % 64)); - for (auto P = Begin; P < End; P += Step) + const size_t StepMask = Step - 1; + auto P = Begin; + // Iterate by 1 byte until either the alignment boundary or the end. + for (; reinterpret_cast(P) & StepMask && P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature + P - Begin, V); + + // Iterate by Step bytes at a time. + for (; P < End; P += Step) if (LargeType Bundle = *reinterpret_cast(P)) for (size_t I = 0; I < Step; I++, Bundle >>= 8) if (uint8_t V = Bundle & 0xff) Handle8bitCounter(FirstFeature + P - Begin + I, V); + + // Iterate by 1 byte until the end. + for (; P < End; P++) + if (uint8_t V = *P) + Handle8bitCounter(FirstFeature + P - Begin, V); } template // bool Callback(size_t Feature) @@ -145,8 +162,16 @@ void TracePC::CollectFeatures(Callback HandleFeature) const { HandleFeature(Idx * 8 + Bit); }; - ForEachNonZeroByte(Counters, Counters + N, 0, Handle8bitCounter); - ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), N * 8, + size_t FirstFeature = 0; + ForEachNonZeroByte(Counters, Counters + N, FirstFeature, Handle8bitCounter); + FirstFeature += N * 8; + for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { + ForEachNonZeroByte(ModuleCounters[i].Start, ModuleCounters[i].Stop, + FirstFeature, Handle8bitCounter); + FirstFeature += 8 * (ModuleCounters[i].Stop - ModuleCounters[i].Start); + } + + ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), FirstFeature, Handle8bitCounter); if (UseValueProfile) diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp index a486223d650c93d8605fb9df031f8dcdf9a4b45d..8670e2ad67277191d63a2b76ffd26b5ab42b53a7 100644 --- a/lib/Fuzzer/FuzzerTraceState.cpp +++ b/lib/Fuzzer/FuzzerTraceState.cpp @@ -10,8 +10,8 @@ //===----------------------------------------------------------------------===// #include "FuzzerDictionary.h" -#include "FuzzerInternal.h" #include "FuzzerIO.h" +#include "FuzzerInternal.h" #include "FuzzerMutate.h" #include "FuzzerTracePC.h" #include diff --git a/lib/Fuzzer/FuzzerUtilPosix.cpp b/lib/Fuzzer/FuzzerUtilPosix.cpp index 0161309fbf86b12f3a4dae0e6c417854dc5d699d..bc85264ac187d2588f594d1d1470e90371ce1684 100644 --- a/lib/Fuzzer/FuzzerUtilPosix.cpp +++ b/lib/Fuzzer/FuzzerUtilPosix.cpp @@ -47,8 +47,21 @@ static void FileSizeExceedHandler(int, siginfo_t *, void *) { static void SetSigaction(int signum, void (*callback)(int, siginfo_t *, void *)) { - struct sigaction sigact; - memset(&sigact, 0, sizeof(sigact)); + struct sigaction sigact = {}; + if (sigaction(signum, nullptr, &sigact)) { + Printf("libFuzzer: sigaction failed with %d\n", errno); + exit(1); + } + if (sigact.sa_flags & SA_SIGINFO) { + if (sigact.sa_sigaction) + return; + } else { + if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN && + sigact.sa_handler != SIG_ERR) + return; + } + + sigact = {}; sigact.sa_sigaction = callback; if (sigaction(signum, &sigact, 0)) { Printf("libFuzzer: sigaction failed with %d\n", errno); diff --git a/lib/Fuzzer/FuzzerUtilWindows.cpp b/lib/Fuzzer/FuzzerUtilWindows.cpp index 08bb3cf3be157b3a98055f751d4f3ed412f0433f..25ac976fc2dbbc21850bcff08e0fba8f60313f6f 100644 --- a/lib/Fuzzer/FuzzerUtilWindows.cpp +++ b/lib/Fuzzer/FuzzerUtilWindows.cpp @@ -22,6 +22,8 @@ #include #include #include + +// This must be included after windows.h. #include namespace fuzzer { diff --git a/lib/Fuzzer/afl/afl_driver.cpp b/lib/Fuzzer/afl/afl_driver.cpp index b3a54e57fcebdbe2c4980109a9928b1b747904fa..d0521bdfdd67b64d7911440b3437a106c31a48ab 100644 --- a/lib/Fuzzer/afl/afl_driver.cpp +++ b/lib/Fuzzer/afl/afl_driver.cpp @@ -12,8 +12,8 @@ Usage: ################################################################################ cat << EOF > test_fuzzer.cc -#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size > 0 && data[0] == 'H') if (size > 1 && data[1] == 'I') @@ -50,15 +50,20 @@ statistics from the file. If that fails then the process will quit. */ #include -#include +#include +#include #include +#include #include #include -#include -#include -#include #include #include +#include + +#include +#include +#include + // Platform detection. Copied from FuzzerInternal.h #ifdef __linux__ #define LIBFUZZER_LINUX 1 @@ -245,17 +250,39 @@ extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { return 0; } +// Execute any files provided as parameters. +int ExecuteFilesOnyByOne(int argc, char **argv) { + for (int i = 1; i < argc; i++) { + std::ifstream in(argv[i]); + in.seekg(0, in.end); + size_t length = in.tellg(); + in.seekg (0, in.beg); + std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl; + // Allocate exactly length bytes so that we reliably catch buffer overflows. + std::vector bytes(length); + in.read(bytes.data(), bytes.size()); + assert(in); + LLVMFuzzerTestOneInput(reinterpret_cast(bytes.data()), + bytes.size()); + std::cout << "Execution successfull" << std::endl; + } + return 0; +} + int main(int argc, char **argv) { - fprintf(stderr, "======================= INFO =========================\n" - "This binary is built for AFL-fuzz.\n" - "To run the target function on a single input execute this:\n" - " %s < INPUT_FILE\n" - "To run the fuzzing execute this:\n" - " afl-fuzz [afl-flags] %s [N] " - "-- run N fuzzing iterations before " - "re-spawning the process (default: 1000)\n" - "======================================================\n", - argv[0], argv[0]); + fprintf(stderr, + "======================= INFO =========================\n" + "This binary is built for AFL-fuzz.\n" + "To run the target function on individual input(s) execute this:\n" + " %s < INPUT_FILE\n" + "or\n" + " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" + "To fuzz with afl-fuzz execute this:\n" + " afl-fuzz [afl-flags] %s [-N]\n" + "afl-fuzz will run N iterations before " + "re-spawning the process (default: 1000)\n" + "======================================================\n", + argv[0], argv[0], argv[0]); if (LLVMFuzzerInitialize) LLVMFuzzerInitialize(&argc, &argv); // Do any other expensive one-time initialization here. @@ -266,8 +293,14 @@ int main(int argc, char **argv) { __afl_manual_init(); int N = 1000; - if (argc >= 2) - N = atoi(argv[1]); + if (argc == 2 && argv[1][0] == '-') + N = atoi(argv[1] + 1); + else if(argc == 2 && (N = atoi(argv[1])) > 0) + fprintf(stderr, "WARNING: using the deprecated call style `%s %d`\n", + argv[0], N); + else if (argc > 1) + return ExecuteFilesOnyByOne(argc, argv); + assert(N > 0); time_t unit_time_secs; int num_runs = 0; diff --git a/lib/Fuzzer/test/AFLDriverTest.cpp b/lib/Fuzzer/test/AFLDriverTest.cpp index 3dd0b61173057b09d44b5570af1bbb93e169393a..b949adc7de159a773468367eeb62af092d5f9a00 100644 --- a/lib/Fuzzer/test/AFLDriverTest.cpp +++ b/lib/Fuzzer/test/AFLDriverTest.cpp @@ -3,20 +3,26 @@ // Contains dummy functions used to avoid dependency on AFL. #include +#include #include extern "C" void __afl_manual_init() {} -extern "C" int __afl_persistent_loop(unsigned int) { +extern "C" int __afl_persistent_loop(unsigned int N) { + static int Count = N; + fprintf(stderr, "__afl_persistent_loop calle, Count = %d\n", Count); + if (Count--) return 1; return 0; } // This declaration exists to prevent the Darwin linker // from complaining about this being a missing weak symbol. extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) { + fprintf(stderr, "LLVMFuzzerInitialize called\n"); return 0; } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + fprintf(stderr, "LLVMFuzzerTestOneInput called; Size = %zd\n", Size); return 0; } diff --git a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp index 69b0d59fb8eff432ddd5e62a33c0000b8f0f90e3..b5a61ddca715400eeeaad87e8e945a6975544d3b 100644 --- a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp +++ b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp @@ -2,14 +2,14 @@ // License. See LICENSE.TXT for details. // abs(x) < 0 and y == Const puzzle, 64-bit variant. -#include -#include -#include #include +#include #include +#include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size < 16) return 0; + if (Size < 16 || Size > 64) return 0; int64_t x; uint64_t y; memcpy(&x, Data, sizeof(x)); diff --git a/lib/Fuzzer/test/AbsNegAndConstantTest.cpp b/lib/Fuzzer/test/AbsNegAndConstantTest.cpp index 69075a454c99b1d7de31c4a04c6a3f453944698e..e9d983ff1ebfe16cc9c3c834e6836bbd6921bf87 100644 --- a/lib/Fuzzer/test/AbsNegAndConstantTest.cpp +++ b/lib/Fuzzer/test/AbsNegAndConstantTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // abs(x) < 0 and y == Const puzzle. -#include -#include -#include #include +#include #include +#include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size < 8) return 0; diff --git a/lib/Fuzzer/test/AccumulateAllocationsTest.cpp b/lib/Fuzzer/test/AccumulateAllocationsTest.cpp index 604d8fa299aeb099167ead49412c74d3192ae030..e9acd7ccbd30fb05627dbd9398e78c20656238cd 100644 --- a/lib/Fuzzer/test/AccumulateAllocationsTest.cpp +++ b/lib/Fuzzer/test/AccumulateAllocationsTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Test with a more mallocs than frees, but no leak. -#include #include +#include const int kAllocatedPointersSize = 10000; int NumAllocatedPointers = 0; diff --git a/lib/Fuzzer/test/BadStrcmpTest.cpp b/lib/Fuzzer/test/BadStrcmpTest.cpp index 159cd7ea5f7081def7d3a3401b01602fea98069b..ba2b068f741d445c228065b70c9bba76a4437b5d 100644 --- a/lib/Fuzzer/test/BadStrcmpTest.cpp +++ b/lib/Fuzzer/test/BadStrcmpTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Test that we don't creash in case of bad strcmp params. +#include #include #include -#include static volatile int Sink; diff --git a/lib/Fuzzer/test/BufferOverflowOnInput.cpp b/lib/Fuzzer/test/BufferOverflowOnInput.cpp index b9d14052aee4724cd5decbe24e01864f0608d443..75e1fb90a19a87bbffbc528e1664b94076406c57 100644 --- a/lib/Fuzzer/test/BufferOverflowOnInput.cpp +++ b/lib/Fuzzer/test/BufferOverflowOnInput.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find the string "Hi!". #include +#include #include #include -#include #include static volatile bool SeedLargeBuffer; diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index f72bc3909a3cf6fd6f5b439bf3a865cf8f6c45de..1cf6c9502a2b5f566caed81d3356462722e2f319 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -80,6 +80,7 @@ set(Tests BogusInitializeTest BufferOverflowOnInput CallerCalleeTest + CleanseTest CounterTest CustomCrossOverAndMutateTest CustomCrossOverTest @@ -103,6 +104,7 @@ set(Tests OneHugeAllocTest OutOfMemoryTest OutOfMemorySingleLargeMallocTest + OverwriteInputTest RepeatedMemcmp RepeatedBytesTest SimpleCmpTest @@ -204,6 +206,9 @@ include_directories(..) add_subdirectory(no-coverage) add_subdirectory(trace-pc) add_subdirectory(ubsan) +if (NOT MSVC) + add_subdirectory(inline-8bit-counters) +endif() add_library(LLVMFuzzer-DSO1 SHARED DSO1.cpp) add_library(LLVMFuzzer-DSO2 SHARED DSO2.cpp) diff --git a/lib/Fuzzer/test/CallerCalleeTest.cpp b/lib/Fuzzer/test/CallerCalleeTest.cpp index 3ec025d02301d800d5a23f8c4f1f6cbed564261d..ed9f37cc15218f53442ba3a464be93322fdc55e5 100644 --- a/lib/Fuzzer/test/CallerCalleeTest.cpp +++ b/lib/Fuzzer/test/CallerCalleeTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. // Try to find the target using the indirect caller-callee pairs. +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/CleanseTest.cpp b/lib/Fuzzer/test/CleanseTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ee18457012693c68266d4a7a7cd7bba7a7e5496f --- /dev/null +++ b/lib/Fuzzer/test/CleanseTest.cpp @@ -0,0 +1,16 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// Test the the fuzzer is able to 'cleanse' the reproducer +// by replacing all irrelevant bytes with garbage. +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size >= 20 && Data[1] == '1' && Data[5] == '5' && Data[10] == 'A' && + Data[19] == 'Z') + abort(); + return 0; +} + diff --git a/lib/Fuzzer/test/CustomMutatorTest.cpp b/lib/Fuzzer/test/CustomMutatorTest.cpp index 4f84519a90e64624dce98dce96487d7e4bc64398..521d7f506b4d3c53987e4470fb368462df342662 100644 --- a/lib/Fuzzer/test/CustomMutatorTest.cpp +++ b/lib/Fuzzer/test/CustomMutatorTest.cpp @@ -3,9 +3,9 @@ // Simple test for a cutom mutator. #include +#include #include #include -#include #include #include "FuzzerInterface.h" diff --git a/lib/Fuzzer/test/CxxStringEqTest.cpp b/lib/Fuzzer/test/CxxStringEqTest.cpp index e0e23c972ccbacff501387cf6726a6be37d1a9f0..924851c5ad536d8d7f8d7d43d231cf746359b9df 100644 --- a/lib/Fuzzer/test/CxxStringEqTest.cpp +++ b/lib/Fuzzer/test/CxxStringEqTest.cpp @@ -3,11 +3,11 @@ // Simple test for a fuzzer. Must find a specific string // used in std::string operator ==. +#include #include #include -#include -#include #include +#include static volatile int Sink; diff --git a/lib/Fuzzer/test/DSOTestMain.cpp b/lib/Fuzzer/test/DSOTestMain.cpp index 3e225d88612891c5ef735e13501671618715d863..e0c857d4fdec320d189a3f9a3c36f54dbbf06f80 100644 --- a/lib/Fuzzer/test/DSOTestMain.cpp +++ b/lib/Fuzzer/test/DSOTestMain.cpp @@ -4,9 +4,9 @@ // Source code for a simple DSO. #include +#include #include #include -#include extern int DSO1(int a); extern int DSO2(int a); extern int DSOTestExtra(int a); diff --git a/lib/Fuzzer/test/DivTest.cpp b/lib/Fuzzer/test/DivTest.cpp index 63f6960f4e909858215f5c71b94cbfeebab79164..bce13feb790f09ad336ebae5a39c429bb765294d 100644 --- a/lib/Fuzzer/test/DivTest.cpp +++ b/lib/Fuzzer/test/DivTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer: find the interesting argument for div. #include +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp index 62b3be76e3aa55d75a77e6d5c543613aa95d9ed0..ba963d9b1de882fcb11e4c01ea691faeb84392b1 100644 --- a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp +++ b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp @@ -2,12 +2,13 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "FUZZ". +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 64) return 0; int bits = 0; if (Size > 0 && Data[0] == 'F') bits |= 1; if (Size > 1 && Data[1] == 'U') bits |= 2; diff --git a/lib/Fuzzer/test/FullCoverageSetTest.cpp b/lib/Fuzzer/test/FullCoverageSetTest.cpp index 415e0b4760c50fc50e5550a04b62b483df3bfca0..6d7e48fe51f8ba85963af3e16bd84c4fddc9bd54 100644 --- a/lib/Fuzzer/test/FullCoverageSetTest.cpp +++ b/lib/Fuzzer/test/FullCoverageSetTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "FUZZER". +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 78ea874f2ce23adb9c975ea88f3647e528c22e31..812894fd947f99a53c0e45b1b36c0336eaeb6ebd 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -6,12 +6,12 @@ #define _LIBCPP_HAS_NO_ASAN #include "FuzzerCorpus.h" -#include "FuzzerInternal.h" #include "FuzzerDictionary.h" +#include "FuzzerInternal.h" #include "FuzzerMerge.h" #include "FuzzerMutate.h" -#include "FuzzerTracePC.h" #include "FuzzerRandom.h" +#include "FuzzerTracePC.h" #include "gtest/gtest.h" #include #include @@ -772,4 +772,16 @@ TEST(Fuzzer, ForEachNonZeroByte) { Expected = {{108, 1}, {109, 2}, {118, 3}, {120, 4}, {135, 5}, {137, 6}, {146, 7}, {163, 8}}; EXPECT_EQ(Res, Expected); + + Res.clear(); + ForEachNonZeroByte(Ar + 9, Ar + N, 109, CB); + Expected = { {109, 2}, {118, 3}, {120, 4}, + {135, 5}, {137, 6}, {146, 7}, {163, 8}}; + EXPECT_EQ(Res, Expected); + + Res.clear(); + ForEachNonZeroByte(Ar + 9, Ar + N - 9, 109, CB); + Expected = { {109, 2}, {118, 3}, {120, 4}, + {135, 5}, {137, 6}, {146, 7}}; + EXPECT_EQ(Res, Expected); } diff --git a/lib/Fuzzer/test/LeakTest.cpp b/lib/Fuzzer/test/LeakTest.cpp index 22e5164050e516f28f5f3d1b677aa7cfd635ceca..ea89e39010573b58eed2a63535df2b1cdef98eec 100644 --- a/lib/Fuzzer/test/LeakTest.cpp +++ b/lib/Fuzzer/test/LeakTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Test with a leak. -#include #include +#include static volatile void *Sink; diff --git a/lib/Fuzzer/test/LeakTimeoutTest.cpp b/lib/Fuzzer/test/LeakTimeoutTest.cpp index 4f31b3e52c1656e3fb77b43e54e112ac442b299d..92526194a508aff39604af9e582e4c874dc678d9 100644 --- a/lib/Fuzzer/test/LeakTimeoutTest.cpp +++ b/lib/Fuzzer/test/LeakTimeoutTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Test with a leak. -#include #include +#include static volatile int *Sink; diff --git a/lib/Fuzzer/test/LoadTest.cpp b/lib/Fuzzer/test/LoadTest.cpp index eef16c7be51eee4572d2c5f4b0dc1d1e0d75ce62..67a28c7cb22ffa57de463afd3f649b78c6b8a0de 100644 --- a/lib/Fuzzer/test/LoadTest.cpp +++ b/lib/Fuzzer/test/LoadTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer: find interesting value of array index. #include +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/Memcmp64BytesTest.cpp b/lib/Fuzzer/test/Memcmp64BytesTest.cpp index e81526b578a36ff66c58d78bca5361b023455fbf..5b6cb707173f214b18834644e6d9bfd63bb2fac0 100644 --- a/lib/Fuzzer/test/Memcmp64BytesTest.cpp +++ b/lib/Fuzzer/test/Memcmp64BytesTest.cpp @@ -3,10 +3,10 @@ // Simple test for a fuzzer. The fuzzer must find a particular string. #include -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { const char kString64Bytes[] = diff --git a/lib/Fuzzer/test/MemcmpTest.cpp b/lib/Fuzzer/test/MemcmpTest.cpp index fdbf94683f765834f27fb6c88557a24819b572d3..8dbb7d84fbbaf355f06d203e0a4c5d3ea7dda20a 100644 --- a/lib/Fuzzer/test/MemcmpTest.cpp +++ b/lib/Fuzzer/test/MemcmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { // TODO: check other sizes. diff --git a/lib/Fuzzer/test/NotinstrumentedTest.cpp b/lib/Fuzzer/test/NotinstrumentedTest.cpp index ffe952c749d223e5bb3a2c4ede7ce3a04e1db982..91418990b1922b1807f553b69a1137e1c05eb495 100644 --- a/lib/Fuzzer/test/NotinstrumentedTest.cpp +++ b/lib/Fuzzer/test/NotinstrumentedTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // This test should not be instrumented. -#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { return 0; diff --git a/lib/Fuzzer/test/NthRunCrashTest.cpp b/lib/Fuzzer/test/NthRunCrashTest.cpp index b43e69e51b256926c6d4a7331f7b0db5ff2a33cc..da5fbd33e9626e236fa36505eb40270763e5b128 100644 --- a/lib/Fuzzer/test/NthRunCrashTest.cpp +++ b/lib/Fuzzer/test/NthRunCrashTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Crash on the N-th execution. -#include #include +#include #include static int Counter; diff --git a/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp b/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp index 153710920a5fd75bef1f164163f2b0edf6790d12..459db51f8a3b86ff177d7ab1338f36a5ca27ecfb 100644 --- a/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp +++ b/lib/Fuzzer/test/NullDerefOnEmptyTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the empty string. +#include #include #include -#include #include static volatile int *Null = 0; diff --git a/lib/Fuzzer/test/NullDerefTest.cpp b/lib/Fuzzer/test/NullDerefTest.cpp index 3f03d24981972def7237d12354f2c40218dd116a..1b44b682ace6887eaa24e386d5c8ebd3f0cd5beb 100644 --- a/lib/Fuzzer/test/NullDerefTest.cpp +++ b/lib/Fuzzer/test/NullDerefTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "Hi!". +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/OneHugeAllocTest.cpp b/lib/Fuzzer/test/OneHugeAllocTest.cpp index 8d3d1d6d302dc753a52ffc4af6a36d58d79822f6..32a5578710008e1d8d97dfa2df4f245bb8711091 100644 --- a/lib/Fuzzer/test/OneHugeAllocTest.cpp +++ b/lib/Fuzzer/test/OneHugeAllocTest.cpp @@ -3,9 +3,9 @@ // Tests OOM handling when there is a single large allocation. #include +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp index 316b7682b8e60ca1148f16636ee24aca0d7f377c..a07795a08dffa589a14f4a01fd54401de9f8b208 100644 --- a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp +++ b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp @@ -3,9 +3,9 @@ // Tests OOM handling. #include +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/OutOfMemoryTest.cpp b/lib/Fuzzer/test/OutOfMemoryTest.cpp index 078a39ee1fe94dc636dc77a6f01fa08723a95b35..5e59bde09853f44a535834dae6995f27ecfc1e02 100644 --- a/lib/Fuzzer/test/OutOfMemoryTest.cpp +++ b/lib/Fuzzer/test/OutOfMemoryTest.cpp @@ -3,9 +3,9 @@ // Tests OOM handling. #include +#include #include #include -#include #include #include #include diff --git a/lib/Fuzzer/test/OverwriteInputTest.cpp b/lib/Fuzzer/test/OverwriteInputTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e688682346a61ad45017cb2210f4b5c69f2d598f --- /dev/null +++ b/lib/Fuzzer/test/OverwriteInputTest.cpp @@ -0,0 +1,13 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// Simple test for a fuzzer. Make sure we abort if Data is overwritten. +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size) + *const_cast(Data) = 1; + return 0; +} + diff --git a/lib/Fuzzer/test/RepeatedBytesTest.cpp b/lib/Fuzzer/test/RepeatedBytesTest.cpp index 2fa6c78c26d82b2efc3321e09ee3a0e7991c951d..14222f2847472e5bd56f6ed3937d51a7409108f4 100644 --- a/lib/Fuzzer/test/RepeatedBytesTest.cpp +++ b/lib/Fuzzer/test/RepeatedBytesTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find repeated bytes. #include +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/lib/Fuzzer/test/RepeatedMemcmp.cpp b/lib/Fuzzer/test/RepeatedMemcmp.cpp index 7377f65ed76de47fc3838ade8a05fc7a0f1b1f77..18369deac3b0085d16cef7f44caea876e850ffaf 100644 --- a/lib/Fuzzer/test/RepeatedMemcmp.cpp +++ b/lib/Fuzzer/test/RepeatedMemcmp.cpp @@ -1,11 +1,10 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. - -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { int Matches1 = 0; diff --git a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp index 0fd7c5e9a1fbf14f92f80e6df32a9dc141ae6e88..37eeede7cbffc5c4b675503123583333b86aa7b3 100644 --- a/lib/Fuzzer/test/ShrinkControlFlowTest.cpp +++ b/lib/Fuzzer/test/ShrinkControlFlowTest.cpp @@ -2,15 +2,16 @@ // License. See LICENSE.TXT for details. // Test that we can find the minimal item in the corpus (3 bytes: "FUZ"). +#include #include +#include #include -#include #include -#include static volatile int Sink; extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 64) return 0; int8_t Ids[256]; memset(Ids, -1, sizeof(Ids)); for (size_t i = 0; i < Size; i++) diff --git a/lib/Fuzzer/test/ShrinkValueProfileTest.cpp b/lib/Fuzzer/test/ShrinkValueProfileTest.cpp index 026b8ce26591f3a26693b4ab8175c4bd08350713..86e4e3cb0d9ae0a6bff8e7fb3cdf83d062902fcf 100644 --- a/lib/Fuzzer/test/ShrinkValueProfileTest.cpp +++ b/lib/Fuzzer/test/ShrinkValueProfileTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Test that we can find the minimal item in the corpus (3 bytes: "FUZ"). +#include #include +#include #include -#include #include -#include static volatile uint32_t Sink; diff --git a/lib/Fuzzer/test/SignedIntOverflowTest.cpp b/lib/Fuzzer/test/SignedIntOverflowTest.cpp index 7df32ad57933dde6fb5e975c3ccd34999729a26a..d80060207dee1ba00647488d7423ea584f03c23e 100644 --- a/lib/Fuzzer/test/SignedIntOverflowTest.cpp +++ b/lib/Fuzzer/test/SignedIntOverflowTest.cpp @@ -3,11 +3,11 @@ // Test for signed-integer-overflow. #include +#include +#include #include #include -#include #include -#include static volatile int Sink; static int Large = INT_MAX; diff --git a/lib/Fuzzer/test/SimpleCmpTest.cpp b/lib/Fuzzer/test/SimpleCmpTest.cpp index 12b5cdda066076f1ed6b66522fb36b6e0aa46069..8acad4ac77e8fa338a7c8748166cf0f325a25192 100644 --- a/lib/Fuzzer/test/SimpleCmpTest.cpp +++ b/lib/Fuzzer/test/SimpleCmpTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find several narrow ranges. #include +#include #include #include -#include extern int AllLines[]; diff --git a/lib/Fuzzer/test/SimpleDictionaryTest.cpp b/lib/Fuzzer/test/SimpleDictionaryTest.cpp index cd7292bd006c8388c64e0f0018356b0596b2a4a8..a1cd200472249c3a3fb7866550322bad385694a7 100644 --- a/lib/Fuzzer/test/SimpleDictionaryTest.cpp +++ b/lib/Fuzzer/test/SimpleDictionaryTest.cpp @@ -5,9 +5,9 @@ // The fuzzer must find a string based on dictionary words: // "Elvis" // "Presley" +#include #include #include -#include #include #include diff --git a/lib/Fuzzer/test/SimpleHashTest.cpp b/lib/Fuzzer/test/SimpleHashTest.cpp index 00599de78ebed9ffc23c3febc514a21fa5853d52..a3f4211ebeeffe4fb8ce9c7cb1af020bc4a41f26 100644 --- a/lib/Fuzzer/test/SimpleHashTest.cpp +++ b/lib/Fuzzer/test/SimpleHashTest.cpp @@ -5,9 +5,9 @@ // and then compares the last 4 bytes with the computed value. // A fuzzer with cmp traces is expected to defeat this check. #include +#include #include #include -#include // A modified jenkins_one_at_a_time_hash initialized by non-zero, // so that simple_hash(0) != 0. See also @@ -26,7 +26,7 @@ static uint32_t simple_hash(const uint8_t *Data, size_t Size) { } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { - if (Size < 14) + if (Size < 14 || Size > 64) return 0; uint32_t Hash = simple_hash(&Data[0], Size - 4); diff --git a/lib/Fuzzer/test/SimpleTest.cpp b/lib/Fuzzer/test/SimpleTest.cpp index e53ea160ed8fbf6731db58dae940e6a85b5a0427..a8b4988dff10ec31353b2f89ace9222ed4a9b88d 100644 --- a/lib/Fuzzer/test/SimpleTest.cpp +++ b/lib/Fuzzer/test/SimpleTest.cpp @@ -3,9 +3,9 @@ // Simple test for a fuzzer. The fuzzer must find the string "Hi!". #include +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/SimpleThreadedTest.cpp b/lib/Fuzzer/test/SimpleThreadedTest.cpp index 5f02d3f8457cb039b0fec01fc0bd416c4172611e..1abdc3fc6d6be3a6d6f4d3a6ce07766acecccc7a 100644 --- a/lib/Fuzzer/test/SimpleThreadedTest.cpp +++ b/lib/Fuzzer/test/SimpleThreadedTest.cpp @@ -3,8 +3,8 @@ // Threaded test for a fuzzer. The fuzzer should find "H" #include -#include #include +#include #include #include #include diff --git a/lib/Fuzzer/test/SingleByteInputTest.cpp b/lib/Fuzzer/test/SingleByteInputTest.cpp index 4ce819d230cecd604a502c07859f263d22d257e3..72b58ba912eb3264df22ed150011ea72b4ca549c 100644 --- a/lib/Fuzzer/test/SingleByteInputTest.cpp +++ b/lib/Fuzzer/test/SingleByteInputTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer, need just one byte to crash. -#include -#include #include +#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size > 0 && Data[Size/2] == 42) { diff --git a/lib/Fuzzer/test/SingleMemcmpTest.cpp b/lib/Fuzzer/test/SingleMemcmpTest.cpp index c73f68a7ee6e278312ede3a4ccc6f06190ea33e8..83c09e0428ec1eec544e379302dc6f7148c58586 100644 --- a/lib/Fuzzer/test/SingleMemcmpTest.cpp +++ b/lib/Fuzzer/test/SingleMemcmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { char *S = (char*)Data; diff --git a/lib/Fuzzer/test/SingleStrcmpTest.cpp b/lib/Fuzzer/test/SingleStrcmpTest.cpp index 48f481dfc51aede37b56d2d5c6512e40bca83328..149073444c9cb2dbf7190444aed850656bd36ecd 100644 --- a/lib/Fuzzer/test/SingleStrcmpTest.cpp +++ b/lib/Fuzzer/test/SingleStrcmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size >= 7) { diff --git a/lib/Fuzzer/test/SingleStrncmpTest.cpp b/lib/Fuzzer/test/SingleStrncmpTest.cpp index e5601da86329959754d5ba183a37fb77766e9ad8..b38c7995d8ff83ea491929e127bd69e9fa450c33 100644 --- a/lib/Fuzzer/test/SingleStrncmpTest.cpp +++ b/lib/Fuzzer/test/SingleStrncmpTest.cpp @@ -2,12 +2,13 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 64) return 0; char *S = (char*)Data; volatile auto Strncmp = &(strncmp); // Make sure strncmp is not inlined. if (Size >= 6 && !Strncmp(S, "qwerty", 6)) { diff --git a/lib/Fuzzer/test/SpamyTest.cpp b/lib/Fuzzer/test/SpamyTest.cpp index d294d4dc53e0c4e7872892d7efe42311eaafd8f1..721134e1841c377979c6a6ad160c2fbfc025f0fb 100644 --- a/lib/Fuzzer/test/SpamyTest.cpp +++ b/lib/Fuzzer/test/SpamyTest.cpp @@ -3,9 +3,9 @@ // The test spams to stderr and stdout. #include +#include #include #include -#include #include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { diff --git a/lib/Fuzzer/test/StrcmpTest.cpp b/lib/Fuzzer/test/StrcmpTest.cpp index cd91dda76f30d2f6d0d69b51a37d1947633ad2db..e7636e8812fcdd6d4bb8d3922dfce05dbb012db5 100644 --- a/lib/Fuzzer/test/StrcmpTest.cpp +++ b/lib/Fuzzer/test/StrcmpTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Break through a series of strcmp. -#include +#include #include #include #include -#include +#include bool Eq(const uint8_t *Data, size_t Size, const char *Str) { char Buff[1024]; diff --git a/lib/Fuzzer/test/StrncmpOOBTest.cpp b/lib/Fuzzer/test/StrncmpOOBTest.cpp index f70b003afad65c760a8c23b9e53390fba8c3259d..4ed71d9d021dd561a52508bf5d70619cfc45b6af 100644 --- a/lib/Fuzzer/test/StrncmpOOBTest.cpp +++ b/lib/Fuzzer/test/StrncmpOOBTest.cpp @@ -3,10 +3,10 @@ // Test that libFuzzer itself does not read out of bounds. #include +#include #include -#include #include -#include +#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/StrncmpTest.cpp b/lib/Fuzzer/test/StrncmpTest.cpp index 5ffd011dcdfff834203c148a192bd0514cf700cc..f71f01ee3098ce11c4e825b3fee4290e7b999f68 100644 --- a/lib/Fuzzer/test/StrncmpTest.cpp +++ b/lib/Fuzzer/test/StrncmpTest.cpp @@ -2,10 +2,10 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find a particular string. -#include #include #include #include +#include static volatile int sink; diff --git a/lib/Fuzzer/test/StrstrTest.cpp b/lib/Fuzzer/test/StrstrTest.cpp index f021e75ec0fde915602ed73fbdcda676151a158a..a3ea4e03b3d2745abd50151898edcd1d0306deef 100644 --- a/lib/Fuzzer/test/StrstrTest.cpp +++ b/lib/Fuzzer/test/StrstrTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Test strstr and strcasestr hooks. -#include -#include #include #include #include +#include +#include // Windows does not have strcasestr and memmem, so we are not testing them. #ifdef _WIN32 diff --git a/lib/Fuzzer/test/SwapCmpTest.cpp b/lib/Fuzzer/test/SwapCmpTest.cpp index b90ac72c22c4b4c6843130defbb1b06b3e219e8e..bbfbefe6ab710836e5e8593641f973563f253223 100644 --- a/lib/Fuzzer/test/SwapCmpTest.cpp +++ b/lib/Fuzzer/test/SwapCmpTest.cpp @@ -3,9 +3,9 @@ // The fuzzer must find several constants with swapped bytes. #include +#include #include #include -#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { if (Size < 14) return 0; diff --git a/lib/Fuzzer/test/Switch2Test.cpp b/lib/Fuzzer/test/Switch2Test.cpp index 3c6a3004907e143b3b7c04c7762f72043066e8dd..5f66ac8b499e6cfd37042cc07649c094b5addb1a 100644 --- a/lib/Fuzzer/test/Switch2Test.cpp +++ b/lib/Fuzzer/test/Switch2Test.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the interesting switch value. +#include #include -#include #include +#include #include -#include int Switch(int a) { switch(a) { diff --git a/lib/Fuzzer/test/SwitchTest.cpp b/lib/Fuzzer/test/SwitchTest.cpp index 3dc051ff7b5b109bd0eda1138906b5ed154c15b3..86944cad21c5fdf697585c6380fd93690ae6864c 100644 --- a/lib/Fuzzer/test/SwitchTest.cpp +++ b/lib/Fuzzer/test/SwitchTest.cpp @@ -2,11 +2,11 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the interesting switch value. +#include #include -#include #include +#include #include -#include static volatile int Sink; diff --git a/lib/Fuzzer/test/TableLookupTest.cpp b/lib/Fuzzer/test/TableLookupTest.cpp index f9d5610820ff589217f76ef396c7f631ccfa0f43..4d8ab0611cde8bac62c66954173edd037c536c2f 100644 --- a/lib/Fuzzer/test/TableLookupTest.cpp +++ b/lib/Fuzzer/test/TableLookupTest.cpp @@ -3,11 +3,11 @@ // Make sure the fuzzer eventually finds all possible values of a variable // within a range. -#include +#include #include #include #include -#include +#include #include const size_t N = 1 << 12; @@ -15,7 +15,6 @@ const size_t N = 1 << 12; // Define an array of counters that will be understood by libFuzzer // as extra coverage signal. The array must be: // * uint8_t -// * aligned by 64 // * in the section named __libfuzzer_extra_counters. // The target code may declare more than one such array. // @@ -23,7 +22,7 @@ const size_t N = 1 << 12; // depending on whether multiple occurrences of the event 'Idx' // is important to distinguish from one occurrence. #ifdef __linux__ -alignas(64) __attribute__((section("__libfuzzer_extra_counters"))) +__attribute__((section("__libfuzzer_extra_counters"))) #endif static uint8_t Counters[N]; diff --git a/lib/Fuzzer/test/ThreadedLeakTest.cpp b/lib/Fuzzer/test/ThreadedLeakTest.cpp index 7511071108712aea04abdc5d1354f3158d1df847..538d3b434808ead5017dd0b1dc9b7968c0118657 100644 --- a/lib/Fuzzer/test/ThreadedLeakTest.cpp +++ b/lib/Fuzzer/test/ThreadedLeakTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // The fuzzer should find a leak in a non-main thread. -#include #include +#include #include static volatile int *Sink; diff --git a/lib/Fuzzer/test/ThreadedTest.cpp b/lib/Fuzzer/test/ThreadedTest.cpp index 09137a9a70c177abf36222fdddafb501e2abca74..bb51ba764ebaf68008bde72ad4a9d9df9d202d02 100644 --- a/lib/Fuzzer/test/ThreadedTest.cpp +++ b/lib/Fuzzer/test/ThreadedTest.cpp @@ -3,8 +3,8 @@ // Threaded test for a fuzzer. The fuzzer should not crash. #include -#include #include +#include #include #include diff --git a/lib/Fuzzer/test/TimeoutEmptyTest.cpp b/lib/Fuzzer/test/TimeoutEmptyTest.cpp index 8066f480b655078092ed7df2667182a937e2bd58..1ddf1fa34589a57057236f986f363b2db04275fd 100644 --- a/lib/Fuzzer/test/TimeoutEmptyTest.cpp +++ b/lib/Fuzzer/test/TimeoutEmptyTest.cpp @@ -2,8 +2,8 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the empty string. -#include #include +#include extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { static volatile int Zero = 0; diff --git a/lib/Fuzzer/test/TimeoutTest.cpp b/lib/Fuzzer/test/TimeoutTest.cpp index f8107012c841f82202ce54882b7b10b09be7b01f..e3cdba3eec382132b2d477fd64c294319e27cba4 100644 --- a/lib/Fuzzer/test/TimeoutTest.cpp +++ b/lib/Fuzzer/test/TimeoutTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. The fuzzer must find the string "Hi!". +#include #include #include -#include #include static volatile int Sink; diff --git a/lib/Fuzzer/test/TraceMallocTest.cpp b/lib/Fuzzer/test/TraceMallocTest.cpp index 43e6950e185f3d7c7c7239d9f3c18cab8ff5cf3b..af9975603aa183504a1c1ec328d9c6165d1951b0 100644 --- a/lib/Fuzzer/test/TraceMallocTest.cpp +++ b/lib/Fuzzer/test/TraceMallocTest.cpp @@ -3,9 +3,9 @@ // Tests -trace_malloc #include +#include #include #include -#include #include int *Ptr; diff --git a/lib/Fuzzer/test/TwoDifferentBugsTest.cpp b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp index 42c0d192ba8669b99a38b3e8c1bc165eedc2249c..77d2cb1a25f9fe6a3e70f98b991c71c9507237f8 100644 --- a/lib/Fuzzer/test/TwoDifferentBugsTest.cpp +++ b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp @@ -2,9 +2,9 @@ // License. See LICENSE.TXT for details. // Simple test for a fuzzer. This test may trigger two different bugs. +#include #include #include -#include #include static volatile int *Null = 0; diff --git a/lib/Fuzzer/test/afl-driver.test b/lib/Fuzzer/test/afl-driver.test new file mode 100644 index 0000000000000000000000000000000000000000..6eab23cc3636247965dbc9146b0bb5352de1dcc6 --- /dev/null +++ b/lib/Fuzzer/test/afl-driver.test @@ -0,0 +1,26 @@ +REQUIRES: linux +RUN: echo -n "abc" > %t.file3 +RUN: echo -n "abcd" > %t.file4 + +RUN: AFLDriverTest < %t.file3 2>&1 | FileCheck %s --check-prefix=CHECK1 +CHECK1: __afl_persistent_loop calle, Count = 1000 +CHECK1: LLVMFuzzerTestOneInput called; Size = 3 + + +RUN: AFLDriverTest < %t.file3 -42 2>&1 | FileCheck %s --check-prefix=CHECK2 +CHECK2: __afl_persistent_loop calle, Count = 42 +CHECK2: LLVMFuzzerTestOneInput called; Size = 3 + + +RUN: AFLDriverTest < %t.file3 666 2>&1 | FileCheck %s --check-prefix=CHECK3 +CHECK3: WARNING: using the deprecated call style +CHECK3: __afl_persistent_loop calle, Count = 666 +CHECK3: LLVMFuzzerTestOneInput called; Size = 3 + + +RUN: AFLDriverTest %t.file3 2>&1 | FileCheck %s --check-prefix=CHECK4 +CHECK4: LLVMFuzzerTestOneInput called; Size = 3 + +RUN: AFLDriverTest %t.file3 %t.file4 2>&1 | FileCheck %s --check-prefix=CHECK5 +CHECK5: LLVMFuzzerTestOneInput called; Size = 3 +CHECK5: LLVMFuzzerTestOneInput called; Size = 4 diff --git a/lib/Fuzzer/test/cleanse.test b/lib/Fuzzer/test/cleanse.test new file mode 100644 index 0000000000000000000000000000000000000000..ad08591d2fa31ed9942132fdafb83597e308f18f --- /dev/null +++ b/lib/Fuzzer/test/cleanse.test @@ -0,0 +1,3 @@ +RUN: echo -n 0123456789ABCDEFGHIZ > %t-in +RUN: LLVMFuzzer-CleanseTest -cleanse_crash=1 %t-in -exact_artifact_path=%t-out +RUN: echo -n ' 1 5 A Z' | diff - %t-out diff --git a/lib/Fuzzer/test/cxxstring.test b/lib/Fuzzer/test/cxxstring.test index c60d7aee9686bfcab9a0de796165b1786ad13f0d..52168fc8c822e9d224e5835e609d37594edcd0bc 100644 --- a/lib/Fuzzer/test/cxxstring.test +++ b/lib/Fuzzer/test/cxxstring.test @@ -1,2 +1,4 @@ +UNSUPPORTED: windows + RUN: not LLVMFuzzer-CxxStringEqTest -seed=1 -runs=1000000 2>&1 | FileCheck %s CHECK: BINGO diff --git a/lib/Fuzzer/test/dump_coverage.test b/lib/Fuzzer/test/dump_coverage.test index 8acc8304fc60dea46f05407d54453ac680aade78..bd85ed718e19ad23f34d13d5c36aa50cc855c5fa 100644 --- a/lib/Fuzzer/test/dump_coverage.test +++ b/lib/Fuzzer/test/dump_coverage.test @@ -4,11 +4,11 @@ RUN: sancov -covered-functions LLVMFuzzer-NullDerefTest* %t_workdir/*.sancov | F RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' LLVMFuzzer-DSOTest -dump_coverage=1 -runs=0 2>&1 | FileCheck %s --check-prefix=DSO RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' not LLVMFuzzer-NullDerefTest -dump_coverage=0 2>&1 | FileCheck %s --check-prefix=NOCOV -CHECK: SanitizerCoverage: {{.*}}LLVMFuzzer-NullDerefTest.{{.*}}.sancov {{.*}} PCs written +CHECK: SanitizerCoverage: {{.*}}LLVMFuzzer-NullDerefTest.{{.*}}.sancov: {{.*}} PCs written SANCOV: LLVMFuzzerTestOneInput -DSO: SanitizerCoverage: {{.*}}LLVMFuzzer-DSOTest.{{.*}}.sancov {{.*}} PCs written -DSO-DAG: SanitizerCoverage: {{.*}}LLVMFuzzer-DSO1.{{.*}}.sancov {{.*}} PCs written -DSO-DAG: SanitizerCoverage: {{.*}}LLVMFuzzer-DSO2.{{.*}}.sancov {{.*}} PCs written +DSO: SanitizerCoverage: {{.*}}LLVMFuzzer-DSOTest.{{.*}}.sancov: {{.*}} PCs written +DSO-DAG: SanitizerCoverage: {{.*}}LLVMFuzzer-DSO1.{{.*}}.sancov: {{.*}} PCs written +DSO-DAG: SanitizerCoverage: {{.*}}LLVMFuzzer-DSO2.{{.*}}.sancov: {{.*}} PCs written NOCOV-NOT: SanitizerCoverage: {{.*}} PCs written diff --git a/lib/Fuzzer/test/fuzzer-dirs.test b/lib/Fuzzer/test/fuzzer-dirs.test index 3de64f278f5dfda6d40b8ec89812dd77c7a9c062..622ff5da3a297e9c1455a5f2f2a04dffec86f8c1 100644 --- a/lib/Fuzzer/test/fuzzer-dirs.test +++ b/lib/Fuzzer/test/fuzzer-dirs.test @@ -5,9 +5,13 @@ RUN: echo b > %t/SUB1/SUB2/b RUN: echo c > %t/SUB1/SUB2/SUB3/c RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS SUBDIRS: READ units: 3 -RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/long +RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64 +RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256 +RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024 +RUN: cat %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 %t/SUB1/f1024 > %t/SUB1/f4096 +RUN: cat %t/SUB1/f4096 %t/SUB1/f4096 > %t/SUB1/f8192 RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG -LONG: INFO: -max_len is not provided, using 93 +LONG: INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 8192 bytes RUN: rm -rf %t/SUB1 RUN: not LLVMFuzzer-SimpleTest NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR diff --git a/lib/Fuzzer/test/fuzzer-segv.test b/lib/Fuzzer/test/fuzzer-segv.test index b9a6a5ce44ca004006a1f09169de92819797f641..90f01932f652deab1dcbadc1454aa58e9f327620 100644 --- a/lib/Fuzzer/test/fuzzer-segv.test +++ b/lib/Fuzzer/test/fuzzer-segv.test @@ -3,3 +3,5 @@ LIBFUZZER_OWN_SEGV_HANDLER: == ERROR: libFuzzer: deadly signal LIBFUZZER_OWN_SEGV_HANDLER: SUMMARY: libFuzzer: deadly signal LIBFUZZER_OWN_SEGV_HANDLER: Test unit written to ./crash- +RUN: env ASAN_OPTIONS=handle_segv=1 not LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=LIBFUZZER_ASAN_SEGV_HANDLER +LIBFUZZER_ASAN_SEGV_HANDLER: ERROR: AddressSanitizer: {{SEGV|access-violation}} on unknown address diff --git a/lib/Fuzzer/test/inline-8bit-counters.test b/lib/Fuzzer/test/inline-8bit-counters.test new file mode 100644 index 0000000000000000000000000000000000000000..8747af81451f3647c580a177d19ad01867b34704 --- /dev/null +++ b/lib/Fuzzer/test/inline-8bit-counters.test @@ -0,0 +1,4 @@ +REQUIRES: linux +CHECK: INFO: Loaded 1 modules with {{.*}} inline 8-bit counters +CHECK: BINGO +RUN: LLVMFuzzer-SimpleTest-Inline8bitCounters -runs=1000000 -seed=1 2>&1 | FileCheck %s diff --git a/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt b/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..088ab04fe6a0902dd940b2b371036a11290dd30f --- /dev/null +++ b/lib/Fuzzer/test/inline-8bit-counters/CMakeLists.txt @@ -0,0 +1,12 @@ +# These tests are instrumented with -fsanitize-coverage=inline-8bit-counters + +set(CMAKE_CXX_FLAGS + "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard -fsanitize-coverage=inline-8bit-counters") + +set(Inline8bitCounterTests + SimpleTest + ) + +foreach(Test ${Inline8bitCounterTests}) + add_libfuzzer_test(${Test}-Inline8bitCounters SOURCES ../${Test}.cpp) +endforeach() diff --git a/lib/Fuzzer/test/overwrite-input.test b/lib/Fuzzer/test/overwrite-input.test new file mode 100644 index 0000000000000000000000000000000000000000..81c27909e8df3c0815f6b5bfc8191068943cace0 --- /dev/null +++ b/lib/Fuzzer/test/overwrite-input.test @@ -0,0 +1,2 @@ +RUN: not LLVMFuzzer-OverwriteInputTest 2>&1 | FileCheck %s +CHECK: ERROR: libFuzzer: fuzz target overwrites it's const input diff --git a/lib/Fuzzer/test/trace-pc/CMakeLists.txt b/lib/Fuzzer/test/trace-pc/CMakeLists.txt index e800f82cc5dcd8f552ebe8fd88bd79bf02a939b5..572fcc9836541951d590326dd414c7d7d487d3ea 100644 --- a/lib/Fuzzer/test/trace-pc/CMakeLists.txt +++ b/lib/Fuzzer/test/trace-pc/CMakeLists.txt @@ -1,5 +1,4 @@ -# These tests are not instrumented with coverage and don't -# have coverage rt in the binary. +# These tests are instrumented with -fsanitize-coverage=trace-pc set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard -fsanitize-coverage=trace-pc") diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index d0b77e7218b93ad0e5513cb0099fd2057d223cf5..556e122ff82ff137b4157a5604fa701fc3f90506 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" @@ -39,7 +40,6 @@ #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" @@ -332,6 +332,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::HHVM: Out << "hhvmcc"; break; case CallingConv::HHVM_C: Out << "hhvm_ccc"; break; case CallingConv::AMDGPU_VS: Out << "amdgpu_vs"; break; + case CallingConv::AMDGPU_HS: Out << "amdgpu_hs"; break; case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break; case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; @@ -804,6 +805,9 @@ void SlotTracker::processModule() { if (!Var.hasName()) CreateModuleSlot(&Var); processGlobalObjectMetadata(Var); + auto Attrs = Var.getAttributes(); + if (Attrs.hasAttributes()) + CreateAttributeSetSlot(Attrs); } for (const GlobalAlias &A : TheModule->aliases()) { @@ -1103,35 +1107,34 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (const ConstantFP *CFP = dyn_cast(CV)) { - if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle() || - &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) { + const APFloat &APF = CFP->getValueAPF(); + if (&APF.getSemantics() == &APFloat::IEEEsingle() || + &APF.getSemantics() == &APFloat::IEEEdouble()) { // We would like to output the FP constant value in exponential notation, // but we cannot do this if doing so will lose precision. Check here to // make sure that we only output it in exponential format if we can parse // the value back and get the same value. // bool ignored; - bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble(); - bool isInf = CFP->getValueAPF().isInfinity(); - bool isNaN = CFP->getValueAPF().isNaN(); + bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble(); + bool isInf = APF.isInfinity(); + bool isNaN = APF.isNaN(); if (!isInf && !isNaN) { - double Val = isDouble ? CFP->getValueAPF().convertToDouble() : - CFP->getValueAPF().convertToFloat(); + double Val = isDouble ? APF.convertToDouble() : APF.convertToFloat(); SmallString<128> StrVal; - raw_svector_ostream(StrVal) << Val; - + APF.toString(StrVal, 6, 0, false); // Check to make sure that the stringized number is not some string like // "Inf" or NaN, that atof will accept, but the lexer will not. Check // that the string matches the "[-+]?[0-9]" regex. // - if ((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) { - // Reparse stringized version! - if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { - Out << StrVal; - return; - } + assert(((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + "[-+]?[0-9] regex does not match!"); + // Reparse stringized version! + if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { + Out << StrVal; + return; } } // Otherwise we could not reparse it to exactly the same value, so we must @@ -1140,7 +1143,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // x86, so we must not use these types. static_assert(sizeof(double) == sizeof(uint64_t), "assuming that double is 64 bits!"); - APFloat apf = CFP->getValueAPF(); + APFloat apf = APF; // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, @@ -1153,27 +1156,27 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // These appear as a magic letter identifying the type, then a // fixed number of hex digits. Out << "0x"; - APInt API = CFP->getValueAPF().bitcastToAPInt(); - if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended()) { + APInt API = APF.bitcastToAPInt(); + if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) { Out << 'K'; Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, /*Upper=*/true); Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); return; - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad()) { + } else if (&APF.getSemantics() == &APFloat::IEEEquad()) { Out << 'L'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble()) { + } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) { Out << 'M'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf()) { + } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) { Out << 'H'; Out << format_hex_no_prefix(API.getZExtValue(), 4, /*Upper=*/true); @@ -1720,6 +1723,7 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N, Printer.printMetadata("templateParams", N->getRawTemplateParams()); Printer.printMetadata("declaration", N->getRawDeclaration()); Printer.printMetadata("variables", N->getRawVariables()); + Printer.printMetadata("thrownTypes", N->getRawThrownTypes()); Out << ")"; } @@ -1756,8 +1760,6 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N, MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); Printer.printString("name", N->getName()); Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false); - Printer.printMetadata("file", N->getRawFile()); - Printer.printInt("line", N->getLine()); Printer.printBool("exportSymbols", N->getExportSymbols(), false); Out << ")"; } @@ -2085,8 +2087,7 @@ public: void printModule(const Module *M); void writeOperand(const Value *Op, bool PrintType); - void writeParamOperand(const Value *Operand, AttributeList Attrs, - unsigned Idx); + void writeParamOperand(const Value *Operand, AttributeSet Attrs); void writeOperandBundles(ImmutableCallSite CS); void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, @@ -2102,7 +2103,7 @@ public: void printIndirectSymbol(const GlobalIndirectSymbol *GIS); void printComdat(const Comdat *C); void printFunction(const Function *F); - void printArgument(const Argument *FA, AttributeList Attrs, unsigned Idx); + void printArgument(const Argument *FA, AttributeSet Attrs); void printBasicBlock(const BasicBlock *BB); void printInstructionLine(const Instruction &I); void printInstruction(const Instruction &I); @@ -2181,7 +2182,7 @@ void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, } void AssemblyWriter::writeParamOperand(const Value *Operand, - AttributeList Attrs, unsigned Idx) { + AttributeSet Attrs) { if (!Operand) { Out << ""; return; @@ -2190,8 +2191,8 @@ void AssemblyWriter::writeParamOperand(const Value *Operand, // Print the type TypePrinter.print(Operand->getType(), Out); // Print parameter attributes list - if (Attrs.hasAttributes(Idx)) - Out << ' ' << Attrs.getAsString(Idx); + if (Attrs.hasAttributes()) + Out << ' ' << Attrs.getAsString(); Out << ' '; // Print the operand WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); @@ -2504,6 +2505,10 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { GV->getAllMetadata(MDs); printMetadataAttachments(MDs, ", "); + auto Attrs = GV->getAttributes(); + if (Attrs.hasAttributes()) + Out << " #" << Machine.getAttributeGroupSlot(Attrs); + printInfoComment(*GV); } @@ -2654,17 +2659,17 @@ void AssemblyWriter::printFunction(const Function *F) { // Output type... TypePrinter.print(FT->getParamType(I), Out); - if (Attrs.hasAttributes(I + 1)) - Out << ' ' << Attrs.getAsString(I + 1); + AttributeSet ArgAttrs = Attrs.getParamAttributes(I); + if (ArgAttrs.hasAttributes()) + Out << ' ' << ArgAttrs.getAsString(); } } else { // The arguments are meaningful here, print them in detail. - unsigned Idx = 1; for (const Argument &Arg : F->args()) { // Insert commas as we go... the first arg doesn't get a comma - if (Idx != 1) + if (Arg.getArgNo() != 0) Out << ", "; - printArgument(&Arg, Attrs, Idx++); + printArgument(&Arg, Attrs.getParamAttributes(Arg.getArgNo())); } } @@ -2726,14 +2731,13 @@ void AssemblyWriter::printFunction(const Function *F) { /// printArgument - This member is called for every argument that is passed into /// the function. Simply print it out /// -void AssemblyWriter::printArgument(const Argument *Arg, AttributeList Attrs, - unsigned Idx) { +void AssemblyWriter::printArgument(const Argument *Arg, AttributeSet Attrs) { // Output type... TypePrinter.print(Arg->getType(), Out); // Output parameter attributes list - if (Attrs.hasAttributes(Idx)) - Out << ' ' << Attrs.getAsString(Idx); + if (Attrs.hasAttributes()) + Out << ' ' << Attrs.getAsString(); // Output name, if available... if (Arg->hasName()) { @@ -3027,7 +3031,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) { if (op > 0) Out << ", "; - writeParamOperand(CI->getArgOperand(op), PAL, op + 1); + writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op)); } // Emit an ellipsis if this is a musttail call in a vararg function. This @@ -3070,7 +3074,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) { if (op) Out << ", "; - writeParamOperand(II->getArgOperand(op), PAL, op + 1); + writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op)); } Out << ')'; diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index 09f037365793d3377e6ba848af28ccbd868b042a..4ed7b021883de2747cf2372dd12413320b021095 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -1,4 +1,4 @@ -//===-- AttributeImpl.h - Attribute Internals -------------------*- C++ -*-===// +//===- AttributeImpl.h - Attribute Internals --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,9 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/IR/Attributes.h" #include "llvm/Support/TrailingObjects.h" -#include #include -#include #include #include #include @@ -80,11 +78,13 @@ public: else Profile(ID, getKindAsString(), getValueAsString()); } + static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind, uint64_t Val) { ID.AddInteger(Kind); if (Val) ID.AddInteger(Val); } + static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) { ID.AddString(Kind); if (!Values.empty()) ID.AddString(Values); @@ -114,9 +114,10 @@ public: }; class IntAttributeImpl : public EnumAttributeImpl { - void anchor() override; uint64_t Val; + void anchor() override; + public: IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val) : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) { @@ -188,20 +189,22 @@ public: std::pair> getAllocSizeArgs() const; std::string getAsString(bool InAttrGrp) const; - typedef const Attribute *iterator; + using iterator = const Attribute *; + iterator begin() const { return getTrailingObjects(); } iterator end() const { return begin() + NumAttrs; } void Profile(FoldingSetNodeID &ID) const { Profile(ID, makeArrayRef(begin(), end())); } + static void Profile(FoldingSetNodeID &ID, ArrayRef AttrList) { for (const auto &Attr : AttrList) Attr.Profile(ID); } }; -typedef std::pair IndexAttrPair; +using IndexAttrPair = std::pair; //===----------------------------------------------------------------------===// /// \class @@ -209,27 +212,21 @@ typedef std::pair IndexAttrPair; /// return type, and parameters. class AttributeListImpl final : public FoldingSetNode, - private TrailingObjects { + private TrailingObjects { friend class AttributeList; friend TrailingObjects; private: - LLVMContext &Context; - unsigned NumSlots; ///< Number of entries in this set. /// Bitset with a bit for each available attribute Attribute::AttrKind. uint64_t AvailableFunctionAttrs; + LLVMContext &Context; + unsigned NumAttrSets; ///< Number of entries in this set. // Helper fn for TrailingObjects class. - size_t numTrailingObjects(OverloadToken) { return NumSlots; } - - /// \brief Return a pointer to the IndexAttrPair for the specified slot. - const IndexAttrPair *getSlotPair(unsigned Slot) const { - return getTrailingObjects() + Slot; - } + size_t numTrailingObjects(OverloadToken) { return NumAttrSets; } public: - AttributeListImpl(LLVMContext &C, - ArrayRef> Slots); + AttributeListImpl(LLVMContext &C, ArrayRef Sets); // AttributesSetImpt is uniqued, these should not be available. AttributeListImpl(const AttributeListImpl &) = delete; @@ -240,45 +237,18 @@ public: /// \brief Get the context that created this AttributeListImpl. LLVMContext &getContext() { return Context; } - /// \brief Return the number of slots used in this attribute list. This is - /// the number of arguments that have an attribute set on them (including the - /// function itself). - unsigned getNumSlots() const { return NumSlots; } - - /// \brief Get the index of the given "slot" in the AttrNodes list. This index - /// is the index of the return, parameter, or function object that the - /// attributes are applied to, not the index into the AttrNodes list where the - /// attributes reside. - unsigned getSlotIndex(unsigned Slot) const { - return getSlotPair(Slot)->first; - } - - /// \brief Retrieve the attribute set node for the given "slot" in the - /// AttrNode list. - AttributeSet getSlotNode(unsigned Slot) const { - return getSlotPair(Slot)->second; - } - - /// \brief Retrieve the attributes for the given "slot" in the AttrNode list. - /// \p Slot is an index into the AttrNodes list, not the index of the return / - /// parameter/ function which the attributes apply to. - AttributeList getSlotAttributes(unsigned Slot) const { - return AttributeList::get(Context, *getSlotPair(Slot)); - } - /// \brief Return true if the AttributeSet or the FunctionIndex has an /// enum attribute of the given kind. bool hasFnAttribute(Attribute::AttrKind Kind) const { return AvailableFunctionAttrs & ((uint64_t)1) << Kind; } - typedef AttributeSet::iterator iterator; - iterator begin(unsigned Slot) const { return getSlotNode(Slot).begin(); } - iterator end(unsigned Slot) const { return getSlotNode(Slot).end(); } + typedef const AttributeSet *iterator; + iterator begin() const { return getTrailingObjects(); } + iterator end() const { return begin() + NumAttrSets; } void Profile(FoldingSetNodeID &ID) const; - static void Profile(FoldingSetNodeID &ID, - ArrayRef> Nodes); + static void Profile(FoldingSetNodeID &ID, ArrayRef Nodes); void dump() const; }; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 4b840c36ccb0f2303f53647c7bd6836d60d51194..a518f7b5c81a87e4a801d1e9e9f934adc78f4cae 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -13,17 +13,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Attributes.h" #include "AttributeImpl.h" #include "LLVMContextImpl.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Type.h" @@ -34,6 +34,8 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include +#include #include #include #include @@ -315,6 +317,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "returns_twice"; if (hasAttribute(Attribute::SExt)) return "signext"; + if (hasAttribute(Attribute::Speculatable)) + return "speculatable"; if (hasAttribute(Attribute::StackProtect)) return "ssp"; if (hasAttribute(Attribute::StackProtectReq)) @@ -502,16 +506,74 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { return AttributeSet(AttributeSetNode::get(C, Attrs)); } +AttributeSet AttributeSet::addAttribute(LLVMContext &C, + Attribute::AttrKind Kind) const { + if (hasAttribute(Kind)) return *this; + AttrBuilder B; + B.addAttribute(Kind); + return addAttributes(C, AttributeSet::get(C, B)); +} + +AttributeSet AttributeSet::addAttribute(LLVMContext &C, StringRef Kind, + StringRef Value) const { + AttrBuilder B; + B.addAttribute(Kind, Value); + return addAttributes(C, AttributeSet::get(C, B)); +} + +AttributeSet AttributeSet::addAttributes(LLVMContext &C, + const AttributeSet AS) const { + if (!hasAttributes()) + return AS; + + if (!AS.hasAttributes()) + return *this; + + AttrBuilder B(AS); + for (Attribute I : *this) + B.addAttribute(I); + + return get(C, B); +} + +AttributeSet AttributeSet::removeAttribute(LLVMContext &C, + Attribute::AttrKind Kind) const { + if (!hasAttribute(Kind)) return *this; + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, B); +} + +AttributeSet AttributeSet::removeAttribute(LLVMContext &C, + StringRef Kind) const { + if (!hasAttribute(Kind)) return *this; + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, B); +} + +AttributeSet AttributeSet::removeAttributes(LLVMContext &C, + const AttrBuilder &Attrs) const { + + // FIXME it is not obvious how this should work for alignment. + // For now, say we can't pass in alignment, which no current use does. + assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!"); + + AttrBuilder B(*this); + B.remove(Attrs); + return get(C, B); +} + unsigned AttributeSet::getNumAttributes() const { return SetNode ? SetNode->getNumAttributes() : 0; } bool AttributeSet::hasAttribute(Attribute::AttrKind Kind) const { - return SetNode ? SetNode->hasAttribute(Kind) : 0; + return SetNode ? SetNode->hasAttribute(Kind) : false; } bool AttributeSet::hasAttribute(StringRef Kind) const { - return SetNode ? SetNode->hasAttribute(Kind) : 0; + return SetNode ? SetNode->hasAttribute(Kind) : false; } Attribute AttributeSet::getAttribute(Attribute::AttrKind Kind) const { @@ -539,7 +601,8 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes() const { } std::pair> AttributeSet::getAllocSizeArgs() const { - return SetNode ? SetNode->getAllocSizeArgs() : std::make_pair(0, 0); + return SetNode ? SetNode->getAllocSizeArgs() + : std::pair>(0, 0); } std::string AttributeSet::getAsString(bool InAttrGrp) const { @@ -554,6 +617,14 @@ AttributeSet::iterator AttributeSet::end() const { return SetNode ? SetNode->end() : nullptr; } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void AttributeSet::dump() const { + dbgs() << "AS =\n"; + dbgs() << " { "; + dbgs() << getAsString(true) << " }\n"; +} +#endif + //===----------------------------------------------------------------------===// // AttributeSetNode Definition //===----------------------------------------------------------------------===// @@ -717,49 +788,44 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const { // AttributeListImpl Definition //===----------------------------------------------------------------------===// -AttributeListImpl::AttributeListImpl( - LLVMContext &C, ArrayRef> Slots) - : Context(C), NumSlots(Slots.size()), AvailableFunctionAttrs(0) { -#ifndef NDEBUG - if (Slots.size() >= 2) { - auto &PrevPair = Slots.front(); - for (auto &CurPair : Slots.drop_front()) { - assert(PrevPair.first <= CurPair.first && "Attribute set not ordered!"); - } - } -#endif +/// Map from AttributeList index to the internal array index. Adding one works: +/// FunctionIndex: ~0U -> 0 +/// ReturnIndex: 0 -> 1 +/// FirstArgIndex: 1.. -> 2.. +static constexpr unsigned attrIdxToArrayIdx(unsigned Index) { + // MSVC warns about '~0U + 1' wrapping around when this is called on + // FunctionIndex, so cast to int first. + return static_cast(Index) + 1; +} + +AttributeListImpl::AttributeListImpl(LLVMContext &C, + ArrayRef Sets) + : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) { + assert(!Sets.empty() && "pointless AttributeListImpl"); // There's memory after the node where we can store the entries in. - std::copy(Slots.begin(), Slots.end(), getTrailingObjects()); + std::copy(Sets.begin(), Sets.end(), getTrailingObjects()); // Initialize AvailableFunctionAttrs summary bitset. - if (NumSlots > 0) { - static_assert(Attribute::EndAttrKinds <= - sizeof(AvailableFunctionAttrs) * CHAR_BIT, - "Too many attributes"); - static_assert(AttributeList::FunctionIndex == ~0u, - "FunctionIndex should be biggest possible index"); - const auto &Last = Slots.back(); - if (Last.first == AttributeList::FunctionIndex) { - AttributeSet Node = Last.second; - for (Attribute I : Node) { - if (!I.isStringAttribute()) - AvailableFunctionAttrs |= ((uint64_t)1) << I.getKindAsEnum(); - } - } + static_assert(Attribute::EndAttrKinds <= + sizeof(AvailableFunctionAttrs) * CHAR_BIT, + "Too many attributes"); + static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U, + "function should be stored in slot 0"); + for (Attribute I : Sets[0]) { + if (!I.isStringAttribute()) + AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum(); } } void AttributeListImpl::Profile(FoldingSetNodeID &ID) const { - Profile(ID, makeArrayRef(getSlotPair(0), getNumSlots())); + Profile(ID, makeArrayRef(begin(), end())); } -void AttributeListImpl::Profile( - FoldingSetNodeID &ID, ArrayRef> Nodes) { - for (const auto &Node : Nodes) { - ID.AddInteger(Node.first); - ID.AddPointer(Node.second.SetNode); - } +void AttributeListImpl::Profile(FoldingSetNodeID &ID, + ArrayRef Sets) { + for (const auto &Set : Sets) + ID.AddPointer(Set.SetNode); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -772,24 +838,13 @@ LLVM_DUMP_METHOD void AttributeListImpl::dump() const { // AttributeList Construction and Mutation Methods //===----------------------------------------------------------------------===// -AttributeList AttributeList::getImpl( - LLVMContext &C, ArrayRef> Attrs) { - assert(!Attrs.empty() && "creating pointless AttributeList"); -#ifndef NDEBUG - unsigned LastIndex = 0; - bool IsFirst = true; - for (const auto &AttrPair : Attrs) { - assert((IsFirst || LastIndex < AttrPair.first) && - "unsorted or duplicate AttributeList indices"); - assert(AttrPair.second.hasAttributes() && "pointless AttributeList slot"); - LastIndex = AttrPair.first; - IsFirst = false; - } -#endif +AttributeList AttributeList::getImpl(LLVMContext &C, + ArrayRef AttrSets) { + assert(!AttrSets.empty() && "pointless AttributeListImpl"); LLVMContextImpl *pImpl = C.pImpl; FoldingSetNodeID ID; - AttributeListImpl::Profile(ID, Attrs); + AttributeListImpl::Profile(ID, AttrSets); void *InsertPoint; AttributeListImpl *PA = @@ -800,8 +855,8 @@ AttributeList AttributeList::getImpl( if (!PA) { // Coallocate entries after the AttributeListImpl itself. void *Mem = ::operator new( - AttributeListImpl::totalSizeToAlloc(Attrs.size())); - PA = new (Mem) AttributeListImpl(C, Attrs); + AttributeListImpl::totalSizeToAlloc(AttrSets.size())); + PA = new (Mem) AttributeListImpl(C, AttrSets); pImpl->AttrsLists.InsertNode(PA, InsertPoint); } @@ -842,7 +897,7 @@ AttributeList::get(LLVMContext &C, AttrPairVec.emplace_back(Index, AttributeSet::get(C, AttrVec)); } - return getImpl(C, AttrPairVec); + return get(C, AttrPairVec); } AttributeList @@ -852,35 +907,76 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return AttributeList(); - return getImpl(C, Attrs); + assert(std::is_sorted(Attrs.begin(), Attrs.end(), + [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }) && + "Misordered Attributes list!"); + assert(none_of(Attrs, + [](const std::pair &Pair) { + return !Pair.second.hasAttributes(); + }) && + "Pointless attribute!"); + + unsigned MaxIndex = Attrs.back().first; + + SmallVector AttrVec(attrIdxToArrayIdx(MaxIndex) + 1); + for (auto Pair : Attrs) + AttrVec[attrIdxToArrayIdx(Pair.first)] = Pair.second; + + return getImpl(C, AttrVec); } -AttributeList AttributeList::get(LLVMContext &C, ArrayRef Attrs) { - assert(Attrs.size() >= 2 && - "should always have function and return attr slots"); - SmallVector, 8> AttrPairs; - size_t Index = 0; - for (AttributeSet AS : Attrs) { - if (AS.hasAttributes()) { - // If this is the last AttributeSetNode, it's for the function. - if (Index == Attrs.size() - 1) - Index = AttributeList::FunctionIndex; - AttrPairs.emplace_back(Index, AS); +AttributeList AttributeList::get(LLVMContext &C, AttributeSet FnAttrs, + AttributeSet RetAttrs, + ArrayRef ArgAttrs) { + // Scan from the end to find the last argument with attributes. Most + // arguments don't have attributes, so it's nice if we can have fewer unique + // AttributeListImpls by dropping empty attribute sets at the end of the list. + unsigned NumSets = 0; + for (size_t I = ArgAttrs.size(); I != 0; --I) { + if (ArgAttrs[I - 1].hasAttributes()) { + NumSets = I + 2; + break; } - ++Index; } - if (AttrPairs.empty()) + if (NumSets == 0) { + // Check function and return attributes if we didn't have argument + // attributes. + if (RetAttrs.hasAttributes()) + NumSets = 2; + else if (FnAttrs.hasAttributes()) + NumSets = 1; + } + + // If all attribute sets were empty, we can use the empty attribute list. + if (NumSets == 0) return AttributeList(); - return getImpl(C, AttrPairs); + + SmallVector AttrSets; + AttrSets.reserve(NumSets); + // If we have any attributes, we always have function attributes. + AttrSets.push_back(FnAttrs); + if (NumSets > 1) + AttrSets.push_back(RetAttrs); + if (NumSets > 2) { + // Drop the empty argument attribute sets at the end. + ArgAttrs = ArgAttrs.take_front(NumSets - 2); + AttrSets.insert(AttrSets.end(), ArgAttrs.begin(), ArgAttrs.end()); + } + + return getImpl(C, AttrSets); } AttributeList AttributeList::get(LLVMContext &C, unsigned Index, const AttrBuilder &B) { if (!B.hasAttributes()) return AttributeList(); - AttributeSet AS = AttributeSet::get(C, B); - std::pair Arr[1] = {{Index, AS}}; - return getImpl(C, Arr); + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(Index + 1); + AttrSets[Index] = AttributeSet::get(C, B); + return getImpl(C, AttrSets); } AttributeList AttributeList::get(LLVMContext &C, unsigned Index, @@ -903,38 +999,34 @@ AttributeList AttributeList::get(LLVMContext &C, ArrayRef Attrs) { if (Attrs.empty()) return AttributeList(); - if (Attrs.size() == 1) return Attrs[0]; - - SmallVector, 8> AttrNodeVec; - AttributeListImpl *A0 = Attrs[0].pImpl; - if (A0) - AttrNodeVec.append(A0->getSlotPair(0), A0->getSlotPair(A0->getNumSlots())); - // Copy all attributes from Attrs into AttrNodeVec while keeping AttrNodeVec - // ordered by index. Because we know that each list in Attrs is ordered by - // index we only need to merge each successive list in rather than doing a - // full sort. - for (unsigned I = 1, E = Attrs.size(); I != E; ++I) { - AttributeListImpl *ALI = Attrs[I].pImpl; - if (!ALI) continue; - SmallVector, 8>::iterator - ANVI = AttrNodeVec.begin(), ANVE; - for (const IndexAttrPair *AI = ALI->getSlotPair(0), - *AE = ALI->getSlotPair(ALI->getNumSlots()); - AI != AE; ++AI) { - ANVE = AttrNodeVec.end(); - while (ANVI != ANVE && ANVI->first <= AI->first) - ++ANVI; - ANVI = AttrNodeVec.insert(ANVI, *AI) + 1; - } + if (Attrs.size() == 1) + return Attrs[0]; + + unsigned MaxSize = 0; + for (AttributeList List : Attrs) + MaxSize = std::max(MaxSize, List.getNumAttrSets()); + + // If every list was empty, there is no point in merging the lists. + if (MaxSize == 0) + return AttributeList(); + + SmallVector NewAttrSets(MaxSize); + for (unsigned I = 0; I < MaxSize; ++I) { + AttrBuilder CurBuilder; + for (AttributeList List : Attrs) + CurBuilder.merge(List.getAttributes(I - 1)); + NewAttrSets[I] = AttributeSet::get(C, CurBuilder); } - return getImpl(C, AttrNodeVec); + return getImpl(C, NewAttrSets); } AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (hasAttribute(Index, Kind)) return *this; - return addAttributes(C, Index, AttributeList::get(C, Index, Kind)); + AttrBuilder B; + B.addAttribute(Kind); + return addAttributes(C, Index, B); } AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, @@ -942,201 +1034,113 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, StringRef Value) const { AttrBuilder B; B.addAttribute(Kind, Value); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } -AttributeList AttributeList::addAttribute(LLVMContext &C, - ArrayRef Indices, +AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, Attribute A) const { - assert(std::is_sorted(Indices.begin(), Indices.end())); - - unsigned I = 0, E = pImpl ? pImpl->getNumSlots() : 0; - SmallVector AttrVec; - for (unsigned Index : Indices) { - // Add all attribute slots before the current index. - for (; I < E && getSlotIndex(I) < Index; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); - - // Add the attribute at this index. If we already have attributes at this - // index, merge them into a new set. - AttrBuilder B; - if (I < E && getSlotIndex(I) == Index) { - B.merge(AttrBuilder(pImpl->getSlotNode(I))); - ++I; - } - B.addAttribute(A); - AttrVec.emplace_back(Index, AttributeSet::get(C, B)); - } - - // Add remaining attributes. - for (; I < E; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); - - return get(C, AttrVec); -} - -AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const { - if (!pImpl) return Attrs; - if (!Attrs.pImpl) return *this; - - return addAttributes(C, Index, Attrs.getAttributes(Index)); + AttrBuilder B; + B.addAttribute(A); + return addAttributes(C, Index, B); } AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, - AttributeSet AS) const { - if (!AS.hasAttributes()) + const AttrBuilder &B) const { + if (!B.hasAttributes()) return *this; + if (!pImpl) + return AttributeList::get(C, {{Index, AttributeSet::get(C, B)}}); + #ifndef NDEBUG // FIXME it is not obvious how this should work for alignment. For now, say // we can't change a known alignment. - unsigned OldAlign = getParamAlignment(Index); - unsigned NewAlign = AS.getAlignment(); + unsigned OldAlign = getAttributes(Index).getAlignment(); + unsigned NewAlign = B.getAlignment(); assert((!OldAlign || !NewAlign || OldAlign == NewAlign) && "Attempt to change alignment!"); #endif - SmallVector, 4> AttrSet; - uint64_t NumAttrs = pImpl->getNumSlots(); - unsigned I; + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); - // Add all the attribute slots before the one we need to merge. - for (I = 0; I < NumAttrs; ++I) { - if (getSlotIndex(I) >= Index) - break; - AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); - } + AttrBuilder Merged(AttrSets[Index]); + Merged.merge(B); + AttrSets[Index] = AttributeSet::get(C, Merged); - if (I < NumAttrs && getSlotIndex(I) == Index) { - // We need to merge two AttributeSets. - AttributeSet Merged = AttributeSet::get( - C, AttrBuilder(pImpl->getSlotNode(I)).merge(AttrBuilder(AS))); - AttrSet.emplace_back(Index, Merged); - ++I; - } else { - // Otherwise, there were no attributes at this position in the original - // list. Add the set as is. - AttrSet.emplace_back(Index, AS); - } + return getImpl(C, AttrSets); +} - // Add the remaining entries. - for (; I < NumAttrs; ++I) - AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I)); +AttributeList AttributeList::addParamAttribute(LLVMContext &C, + ArrayRef ArgNos, + Attribute A) const { + assert(std::is_sorted(ArgNos.begin(), ArgNos.end())); - return get(C, AttrSet); -} + SmallVector AttrSets(this->begin(), this->end()); + unsigned MaxIndex = attrIdxToArrayIdx(ArgNos.back() + FirstArgIndex); + if (MaxIndex >= AttrSets.size()) + AttrSets.resize(MaxIndex + 1); -AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &B) const { - return get(C, Index, AttributeSet::get(C, B)); + for (unsigned ArgNo : ArgNos) { + unsigned Index = attrIdxToArrayIdx(ArgNo + FirstArgIndex); + AttrBuilder B(AttrSets[Index]); + B.addAttribute(A); + AttrSets[Index] = AttributeSet::get(C, B); + } + + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (!hasAttribute(Index, Kind)) return *this; - return removeAttributes(C, Index, AttributeList::get(C, Index, Kind)); + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, Index, B); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const { if (!hasAttribute(Index, Kind)) return *this; - return removeAttributes(C, Index, AttributeList::get(C, Index, Kind)); -} - -AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, - AttributeList Attrs) const { - if (!pImpl) - return AttributeList(); - if (!Attrs.pImpl) return *this; - - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAttribute(Index, Attribute::Alignment) && - "Attempt to change alignment!"); - - // Add the attribute slots before the one we're trying to add. - SmallVector AttrSet; - uint64_t NumAttrs = pImpl->getNumSlots(); - AttributeList AL; - uint64_t LastIndex = 0; - for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++); - break; - } - LastIndex = I + 1; - AttrSet.push_back(getSlotAttributes(I)); - } - - // Now remove the attribute from the correct slot. There may already be an - // AttributeList there. - AttrBuilder B(AL, Index); - - for (unsigned I = 0, E = Attrs.pImpl->getNumSlots(); I != E; ++I) - if (Attrs.getSlotIndex(I) == Index) { - B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index); - break; - } - - AttrSet.push_back(AttributeList::get(C, Index, B)); - - // Add the remaining attribute slots. - for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSet.push_back(getSlotAttributes(I)); - - return get(C, AttrSet); + AttrBuilder B; + B.addAttribute(Kind); + return removeAttributes(C, Index, B); } -AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &Attrs) const { +AttributeList +AttributeList::removeAttributes(LLVMContext &C, unsigned Index, + const AttrBuilder &AttrsToRemove) const { if (!pImpl) return AttributeList(); // FIXME it is not obvious how this should work for alignment. // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!"); - - // Add the attribute slots before the one we're trying to add. - SmallVector AttrSet; - uint64_t NumAttrs = pImpl->getNumSlots(); - AttributeList AL; - uint64_t LastIndex = 0; - for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++); - break; - } - LastIndex = I + 1; - AttrSet.push_back(getSlotAttributes(I)); - } + assert(!AttrsToRemove.hasAlignmentAttr() && "Attempt to change alignment!"); - // Now remove the attribute from the correct slot. There may already be an - // AttributeList there. - AttrBuilder B(AL, Index); - B.remove(Attrs); - - AttrSet.push_back(AttributeList::get(C, Index, B)); + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); - // Add the remaining attribute slots. - for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSet.push_back(getSlotAttributes(I)); + AttrBuilder B(AttrSets[Index]); + B.remove(AttrsToRemove); + AttrSets[Index] = AttributeSet::get(C, B); - return get(C, AttrSet); + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned WithoutIndex) const { if (!pImpl) return AttributeList(); - - SmallVector, 4> AttrSet; - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { - unsigned Index = getSlotIndex(I); - if (Index != WithoutIndex) - AttrSet.push_back({Index, pImpl->getSlotNode(I)}); - } - return get(C, AttrSet); + WithoutIndex = attrIdxToArrayIdx(WithoutIndex); + if (WithoutIndex >= getNumAttrSets()) + return *this; + SmallVector AttrSets(this->begin(), this->end()); + AttrSets[WithoutIndex] = AttributeSet(); + return getImpl(C, AttrSets); } AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C, @@ -1144,7 +1148,7 @@ AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C, uint64_t Bytes) const { AttrBuilder B; B.addDereferenceableAttr(Bytes); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList @@ -1152,7 +1156,7 @@ AttributeList::addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index, uint64_t Bytes) const { AttrBuilder B; B.addDereferenceableOrNullAttr(Bytes); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } AttributeList @@ -1161,7 +1165,7 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index, const Optional &NumElemsArg) { AttrBuilder B; B.addAllocSizeAttr(ElemSizeArg, NumElemsArg); - return addAttributes(C, Index, AttributeList::get(C, Index, B)); + return addAttributes(C, Index, B); } //===----------------------------------------------------------------------===// @@ -1170,8 +1174,8 @@ AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index, LLVMContext &AttributeList::getContext() const { return pImpl->getContext(); } -AttributeSet AttributeList::getParamAttributes(unsigned Index) const { - return getAttributes(Index); +AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const { + return getAttributes(ArgNo + FirstArgIndex); } AttributeSet AttributeList::getRetAttributes() const { @@ -1203,17 +1207,22 @@ bool AttributeList::hasFnAttribute(StringRef Kind) const { return hasAttribute(AttributeList::FunctionIndex, Kind); } +bool AttributeList::hasParamAttribute(unsigned ArgNo, + Attribute::AttrKind Kind) const { + return hasAttribute(ArgNo + FirstArgIndex, Kind); +} + bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr, unsigned *Index) const { if (!pImpl) return false; - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) - for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); - II != IE; ++II) - if (II->hasAttribute(Attr)) { - if (Index) *Index = pImpl->getSlotIndex(I); - return true; - } + for (unsigned I = index_begin(), E = index_end(); I != E; ++I) { + if (hasAttribute(I, Attr)) { + if (Index) + *Index = I; + return true; + } + } return false; } @@ -1227,8 +1236,12 @@ Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const { return getAttributes(Index).getAttribute(Kind); } -unsigned AttributeList::getParamAlignment(unsigned Index) const { - return getAttributes(Index).getAlignment(); +unsigned AttributeList::getRetAlignment() const { + return getAttributes(ReturnIndex).getAlignment(); +} + +unsigned AttributeList::getParamAlignment(unsigned ArgNo) const { + return getAttributes(ArgNo + FirstArgIndex).getAlignment(); } unsigned AttributeList::getStackAlignment(unsigned Index) const { @@ -1253,60 +1266,35 @@ std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const { } AttributeSet AttributeList::getAttributes(unsigned Index) const { - if (!pImpl) return AttributeSet(); - - // Loop through to find the attribute node we want. - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) - if (pImpl->getSlotIndex(I) == Index) - return pImpl->getSlotNode(I); - - return AttributeSet(); + Index = attrIdxToArrayIdx(Index); + if (!pImpl || Index >= getNumAttrSets()) + return AttributeSet(); + return pImpl->begin()[Index]; } -AttributeList::iterator AttributeList::begin(unsigned Slot) const { - if (!pImpl) - return ArrayRef().begin(); - return pImpl->begin(Slot); +AttributeList::iterator AttributeList::begin() const { + return pImpl ? pImpl->begin() : nullptr; } -AttributeList::iterator AttributeList::end(unsigned Slot) const { - if (!pImpl) - return ArrayRef().end(); - return pImpl->end(Slot); +AttributeList::iterator AttributeList::end() const { + return pImpl ? pImpl->end() : nullptr; } //===----------------------------------------------------------------------===// // AttributeList Introspection Methods //===----------------------------------------------------------------------===// -unsigned AttributeList::getNumSlots() const { - return pImpl ? pImpl->getNumSlots() : 0; -} - -unsigned AttributeList::getSlotIndex(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumSlots() && - "Slot # out of range!"); - return pImpl->getSlotIndex(Slot); -} - -AttributeList AttributeList::getSlotAttributes(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumSlots() && - "Slot # out of range!"); - return pImpl->getSlotAttributes(Slot); +unsigned AttributeList::getNumAttrSets() const { + return pImpl ? pImpl->NumAttrSets : 0; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void AttributeList::dump() const { dbgs() << "PAL[\n"; - for (unsigned i = 0, e = getNumSlots(); i < e; ++i) { - uint64_t Index = getSlotIndex(i); - dbgs() << " { "; - if (Index == ~0U) - dbgs() << "~0U"; - else - dbgs() << Index; - dbgs() << " => " << getAsString(Index) << " }\n"; + for (unsigned i = index_begin(), e = index_end(); i != e; ++i) { + if (getAttributes(i).hasAttributes()) + dbgs() << " { " << i << " => " << getAsString(i) << " }\n"; } dbgs() << "]\n"; @@ -1317,26 +1305,16 @@ LLVM_DUMP_METHOD void AttributeList::dump() const { // AttrBuilder Method Implementations //===----------------------------------------------------------------------===// +// FIXME: Remove this ctor, use AttributeSet. AttrBuilder::AttrBuilder(AttributeList AL, unsigned Index) { - AttributeListImpl *pImpl = AL.pImpl; - if (!pImpl) return; - - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { - if (pImpl->getSlotIndex(I) != Index) continue; - - for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); - II != IE; ++II) - addAttribute(*II); - - break; - } + AttributeSet AS = AL.getAttributes(Index); + for (const Attribute &A : AS) + addAttribute(A); } AttrBuilder::AttrBuilder(AttributeSet AS) { - if (AS.hasAttributes()) { - for (const Attribute &A : AS) - addAttribute(A); - } + for (const Attribute &A : AS) + addAttribute(A); } void AttrBuilder::clear() { @@ -1401,26 +1379,7 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { } AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) { - unsigned Slot = ~0U; - for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) - if (A.getSlotIndex(I) == Index) { - Slot = I; - break; - } - - assert(Slot != ~0U && "Couldn't find index in AttributeList!"); - - for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E; - ++I) { - Attribute Attr = *I; - if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { - removeAttribute(Attr.getKindAsEnum()); - } else { - assert(Attr.isStringAttribute() && "Invalid attribute type!"); - removeAttribute(Attr.getKindAsString()); - } - } - + remove(A.getAttributes(Index)); return *this; } @@ -1562,25 +1521,16 @@ bool AttrBuilder::hasAttributes() const { return !Attrs.none() || !TargetDepAttrs.empty(); } -bool AttrBuilder::hasAttributes(AttributeList A, uint64_t Index) const { - unsigned Slot = ~0U; - for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) - if (A.getSlotIndex(I) == Index) { - Slot = I; - break; - } +bool AttrBuilder::hasAttributes(AttributeList AL, uint64_t Index) const { + AttributeSet AS = AL.getAttributes(Index); - assert(Slot != ~0U && "Couldn't find the index!"); - - for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E; - ++I) { - Attribute Attr = *I; + for (Attribute Attr : AS) { if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { - if (Attrs[I->getKindAsEnum()]) + if (contains(Attr.getKindAsEnum())) return true; } else { assert(Attr.isStringAttribute() && "Invalid attribute kind!"); - return TargetDepAttrs.find(Attr.getKindAsString())!=TargetDepAttrs.end(); + return contains(Attr.getKindAsString()); } } @@ -1670,12 +1620,10 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) { // If upgrading the SSP attribute, clear out the old SSP Attributes first. // Having multiple SSP attributes doesn't actually hurt, but it adds useless // clutter to the IR. - AttrBuilder B; - B.addAttribute(Attribute::StackProtect) - .addAttribute(Attribute::StackProtectStrong) - .addAttribute(Attribute::StackProtectReq); - AttributeList OldSSPAttr = - AttributeList::get(Caller.getContext(), AttributeList::FunctionIndex, B); + AttrBuilder OldSSPAttr; + OldSSPAttr.addAttribute(Attribute::StackProtect) + .addAttribute(Attribute::StackProtectStrong) + .addAttribute(Attribute::StackProtectReq); if (Callee.hasFnAttribute(Attribute::StackProtectReq)) { Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr); diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 0262e2cc05e85a971ea29699727e899de068b186..06934b365a11b35ef4ac3fb253c69bec6faaf869 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -202,6 +202,9 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { Name.startswith("sse4a.movnt.") || // Added in 3.9 Name.startswith("avx.movnt.") || // Added in 3.2 Name.startswith("avx512.storent.") || // Added in 3.9 + Name == "sse41.movntdqa" || // Added in 5.0 + Name == "avx2.movntdqa" || // Added in 5.0 + Name == "avx512.movntdqa" || // Added in 5.0 Name == "sse2.storel.dq" || // Added in 3.9 Name.startswith("sse.storeu.") || // Added in 3.9 Name.startswith("sse2.storeu.") || // Added in 3.9 @@ -464,6 +467,27 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } + // Renaming gather/scatter intrinsics with no address space overloading + // to the new overload which includes an address space + if (Name.startswith("masked.gather.")) { + Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; + if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_gather, Tys); + return true; + } + } + if (Name.startswith("masked.scatter.")) { + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {Args[0], Args[1]}; + if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) { + rename(F); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::masked_scatter, Tys); + return true; + } + } break; } case 'n': { @@ -497,6 +521,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } + break; } case 'o': // We only need to change the name to match the mangling including the @@ -1875,6 +1900,20 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { { CI->getArgOperand(0), CI->getArgOperand(1) }); Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); + } else if (IsX86 && Name.endswith(".movntdqa")) { + Module *M = F->getParent(); + MDNode *Node = MDNode::get( + C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); + + Value *Ptr = CI->getArgOperand(0); + VectorType *VTy = cast(CI->getType()); + + // Convert the type of the pointer to a pointer to the stored type. + Value *BC = + Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); + LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8); + LI->setMetadata(M->getMDKindID("nontemporal"), Node); + Rep = LI; } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { Value *Arg = CI->getArgOperand(0); Value *Neg = Builder.CreateNeg(Arg, "neg"); @@ -2055,7 +2094,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::masked_load: - case Intrinsic::masked_store: { + case Intrinsic::masked_store: + case Intrinsic::masked_gather: + case Intrinsic::masked_scatter: { SmallVector Args(CI->arg_operands().begin(), CI->arg_operands().end()); NewCall = Builder.CreateCall(NewFn, Args); diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 90ca21ab91f8fcfcaf3ae1e8f0a8ea509dcb91da..1f8659d4e2caefe87fc26915b4cb761d5c718b87 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -263,6 +263,10 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const { return SuccBB; } +iterator_range BasicBlock::phis() { + return make_range(dyn_cast(&front()), nullptr); +} + /// This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to @@ -389,13 +393,11 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { // Loop over any phi nodes in the basic block, updating the BB field of // incoming values... BasicBlock *Successor = *I; - PHINode *PN; - for (BasicBlock::iterator II = Successor->begin(); - (PN = dyn_cast(II)); ++II) { - int IDX = PN->getBasicBlockIndex(this); - while (IDX != -1) { - PN->setIncomingBlock((unsigned)IDX, New); - IDX = PN->getBasicBlockIndex(this); + for (auto &PN : Successor->phis()) { + int Idx = PN.getBasicBlockIndex(this); + while (Idx != -1) { + PN.setIncomingBlock((unsigned)Idx, New); + Idx = PN.getBasicBlockIndex(this); } } } diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp index e27ecad0a8841698dd56d86cefc79b9f126e6741..c735f9b2eb1eb6ddff7a3cdb155aa855d6f8c191 100644 --- a/lib/IR/Comdat.cpp +++ b/lib/IR/Comdat.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Comdat.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Comdat.h" using namespace llvm; diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index bba230677ebf711fba4538ce26fc7ffe07dd6561..3469026ad7ed67850bcb26be642a1dfec7a469db 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -223,7 +223,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, if (ConstantInt *CI = dyn_cast(C)) { APInt V = CI->getValue(); if (ByteStart) - V = V.lshr(ByteStart*8); + V.lshrInPlace(ByteStart*8); V = V.trunc(ByteSize*8); return ConstantInt::get(CI->getContext(), V); } @@ -348,8 +348,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, /// factors factored out. If Folded is false, return null if no factoring was /// possible, to avoid endlessly bouncing an unfoldable expression back into the /// top-level folder. -static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, - bool Folded) { +static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) { if (ArrayType *ATy = dyn_cast(Ty)) { Constant *N = ConstantInt::get(DestTy, ATy->getNumElements()); Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true); @@ -404,8 +403,7 @@ static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, /// factors factored out. If Folded is false, return null if no factoring was /// possible, to avoid endlessly bouncing an unfoldable expression back into the /// top-level folder. -static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, - bool Folded) { +static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) { // The alignment of an array is equal to the alignment of the // array element. Note that this is not always true for vectors. if (ArrayType *ATy = dyn_cast(Ty)) { @@ -469,8 +467,7 @@ static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, /// any known factors factored out. If Folded is false, return null if no /// factoring was possible, to avoid endlessly bouncing an unfoldable expression /// back into the top-level folder. -static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, - Type *DestTy, +static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy, bool Folded) { if (ArrayType *ATy = dyn_cast(Ty)) { Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false, @@ -2041,9 +2038,6 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, Optional InRangeIndex, ArrayRef Idxs) { if (Idxs.empty()) return C; - Constant *Idx0 = cast(Idxs[0]); - if ((Idxs.size() == 1 && Idx0->isNullValue())) - return C; if (isa(C)) { Type *GEPTy = GetElementPtrInst::getGEPReturnType( @@ -2051,10 +2045,15 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C, return UndefValue::get(GEPTy); } + Constant *Idx0 = cast(Idxs[0]); + if (Idxs.size() == 1 && (Idx0->isNullValue() || isa(Idx0))) + return C; + if (C->isNullValue()) { bool isNull = true; for (unsigned i = 0, e = Idxs.size(); i != e; ++i) - if (!cast(Idxs[i])->isNullValue()) { + if (!isa(Idxs[i]) && + !cast(Idxs[i])->isNullValue()) { isNull = false; break; } diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp index f1826c029795bd3ee79080a8d7a875b73a1a57f7..5f44af206355d7de6a4382351852c363a339af93 100644 --- a/lib/IR/ConstantRange.cpp +++ b/lib/IR/ConstantRange.cpp @@ -21,29 +21,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instruction.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Operator.h" -#include "llvm/IR/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -/// Initialize a full (the default) or empty set for the specified type. -/// -ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { - if (Full) - Lower = Upper = APInt::getMaxValue(BitWidth); - else - Lower = Upper = APInt::getMinValue(BitWidth); -} +ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) + : Lower(Full ? APInt::getMaxValue(BitWidth) : APInt::getMinValue(BitWidth)), + Upper(Lower) {} -/// Initialize a range to hold the single specified value. -/// -ConstantRange::ConstantRange(APIntMoveTy V) +ConstantRange::ConstantRange(APInt V) : Lower(std::move(V)), Upper(Lower + 1) {} -ConstantRange::ConstantRange(APIntMoveTy L, APIntMoveTy U) +ConstantRange::ConstantRange(APInt L, APInt U) : Lower(std::move(L)), Upper(std::move(U)) { assert(Lower.getBitWidth() == Upper.getBitWidth() && "ConstantRange with unequal bit widths"); @@ -70,49 +63,49 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred, APInt UMax(CR.getUnsignedMax()); if (UMax.isMinValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(APInt::getMinValue(W), UMax); + return ConstantRange(APInt::getMinValue(W), std::move(UMax)); } case CmpInst::ICMP_SLT: { APInt SMax(CR.getSignedMax()); if (SMax.isMinSignedValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(APInt::getSignedMinValue(W), SMax); + return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax)); } case CmpInst::ICMP_ULE: { APInt UMax(CR.getUnsignedMax()); if (UMax.isMaxValue()) return ConstantRange(W); - return ConstantRange(APInt::getMinValue(W), UMax + 1); + return ConstantRange(APInt::getMinValue(W), std::move(UMax) + 1); } case CmpInst::ICMP_SLE: { APInt SMax(CR.getSignedMax()); if (SMax.isMaxSignedValue()) return ConstantRange(W); - return ConstantRange(APInt::getSignedMinValue(W), SMax + 1); + return ConstantRange(APInt::getSignedMinValue(W), std::move(SMax) + 1); } case CmpInst::ICMP_UGT: { APInt UMin(CR.getUnsignedMin()); if (UMin.isMaxValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(UMin + 1, APInt::getNullValue(W)); + return ConstantRange(std::move(UMin) + 1, APInt::getNullValue(W)); } case CmpInst::ICMP_SGT: { APInt SMin(CR.getSignedMin()); if (SMin.isMaxSignedValue()) return ConstantRange(W, /* empty */ false); - return ConstantRange(SMin + 1, APInt::getSignedMinValue(W)); + return ConstantRange(std::move(SMin) + 1, APInt::getSignedMinValue(W)); } case CmpInst::ICMP_UGE: { APInt UMin(CR.getUnsignedMin()); if (UMin.isMinValue()) return ConstantRange(W); - return ConstantRange(UMin, APInt::getNullValue(W)); + return ConstantRange(std::move(UMin), APInt::getNullValue(W)); } case CmpInst::ICMP_SGE: { APInt SMin(CR.getSignedMin()); if (SMin.isMinSignedValue()) return ConstantRange(W); - return ConstantRange(SMin, APInt::getSignedMinValue(W)); + return ConstantRange(std::move(SMin), APInt::getSignedMinValue(W)); } } } @@ -202,7 +195,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, return ConstantRange(BitWidth, false); if (auto *C = Other.getSingleElement()) - if (C->isMinValue()) + if (C->isNullValue()) // Full set: nothing signed / unsigned wraps when added to 0. return ConstantRange(BitWidth); @@ -214,8 +207,8 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, -Other.getUnsignedMax())); if (NoWrapKind & OBO::NoSignedWrap) { - APInt SignedMin = Other.getSignedMin(); - APInt SignedMax = Other.getSignedMax(); + const APInt &SignedMin = Other.getSignedMin(); + const APInt &SignedMax = Other.getSignedMax(); if (SignedMax.isStrictlyPositive()) Result = SubsetIntersect( @@ -232,54 +225,33 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp, return Result; } -/// isFullSet - Return true if this set contains all of the elements possible -/// for this data-type bool ConstantRange::isFullSet() const { return Lower == Upper && Lower.isMaxValue(); } -/// isEmptySet - Return true if this set contains no members. -/// bool ConstantRange::isEmptySet() const { return Lower == Upper && Lower.isMinValue(); } -/// isWrappedSet - Return true if this set wraps around the top of the range, -/// for example: [100, 8) -/// bool ConstantRange::isWrappedSet() const { return Lower.ugt(Upper); } -/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of -/// its bitwidth, for example: i8 [120, 140). -/// bool ConstantRange::isSignWrappedSet() const { return contains(APInt::getSignedMaxValue(getBitWidth())) && contains(APInt::getSignedMinValue(getBitWidth())); } -/// getSetSize - Return the number of elements in this set. -/// APInt ConstantRange::getSetSize() const { - if (isFullSet()) { - APInt Size(getBitWidth()+1, 0); - Size.setBit(getBitWidth()); - return Size; - } + if (isFullSet()) + return APInt::getOneBitSet(getBitWidth()+1, getBitWidth()); // This is also correct for wrapped sets. return (Upper - Lower).zext(getBitWidth()+1); } -/// isSizeStrictlySmallerThanOf - Compare set size of this range with the range -/// CR. -/// This function is faster than comparing results of getSetSize for the two -/// ranges, because we don't need to extend bitwidth of APInts we're operating -/// with. -/// bool -ConstantRange::isSizeStrictlySmallerThanOf(const ConstantRange &Other) const { +ConstantRange::isSizeStrictlySmallerThan(const ConstantRange &Other) const { assert(getBitWidth() == Other.getBitWidth()); if (isFullSet()) return false; @@ -288,58 +260,41 @@ ConstantRange::isSizeStrictlySmallerThanOf(const ConstantRange &Other) const { return (Upper - Lower).ult(Other.Upper - Other.Lower); } -/// getUnsignedMax - Return the largest unsigned value contained in the -/// ConstantRange. -/// +bool +ConstantRange::isSizeLargerThan(uint64_t MaxSize) const { + assert(MaxSize && "MaxSize can't be 0."); + // If this a full set, we need special handling to avoid needing an extra bit + // to represent the size. + if (isFullSet()) + return APInt::getMaxValue(getBitWidth()).ugt(MaxSize - 1); + + return (Upper - Lower).ugt(MaxSize); +} + APInt ConstantRange::getUnsignedMax() const { if (isFullSet() || isWrappedSet()) return APInt::getMaxValue(getBitWidth()); return getUpper() - 1; } -/// getUnsignedMin - Return the smallest unsigned value contained in the -/// ConstantRange. -/// APInt ConstantRange::getUnsignedMin() const { - if (isFullSet() || (isWrappedSet() && getUpper() != 0)) + if (isFullSet() || (isWrappedSet() && !getUpper().isNullValue())) return APInt::getMinValue(getBitWidth()); return getLower(); } -/// getSignedMax - Return the largest signed value contained in the -/// ConstantRange. -/// APInt ConstantRange::getSignedMax() const { - APInt SignedMax(APInt::getSignedMaxValue(getBitWidth())); - if (!isWrappedSet()) { - if (getLower().sle(getUpper() - 1)) - return getUpper() - 1; - return SignedMax; - } - if (getLower().isNegative() == getUpper().isNegative()) - return SignedMax; + if (isFullSet() || Lower.sgt(Upper)) + return APInt::getSignedMaxValue(getBitWidth()); return getUpper() - 1; } -/// getSignedMin - Return the smallest signed value contained in the -/// ConstantRange. -/// APInt ConstantRange::getSignedMin() const { - APInt SignedMin(APInt::getSignedMinValue(getBitWidth())); - if (!isWrappedSet()) { - if (getLower().sle(getUpper() - 1)) - return getLower(); - return SignedMin; - } - if ((getUpper() - 1).slt(getLower())) { - if (getUpper() != SignedMin) - return SignedMin; - } + if (isFullSet() || (Lower.sgt(Upper) && !getUpper().isMinSignedValue())) + return APInt::getSignedMinValue(getBitWidth()); return getLower(); } -/// contains - Return true if the specified value is in the set. -/// bool ConstantRange::contains(const APInt &V) const { if (Lower == Upper) return isFullSet(); @@ -349,10 +304,6 @@ bool ConstantRange::contains(const APInt &V) const { return Lower.ule(V) || V.ult(Upper); } -/// contains - Return true if the argument is a subset of this range. -/// Two equal sets contain each other. The empty set contained by all other -/// sets. -/// bool ConstantRange::contains(const ConstantRange &Other) const { if (isFullSet() || Other.isEmptySet()) return true; if (isEmptySet() || Other.isFullSet()) return false; @@ -371,8 +322,6 @@ bool ConstantRange::contains(const ConstantRange &Other) const { return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower()); } -/// subtract - Subtract the specified constant from the endpoints of this -/// constant range. ConstantRange ConstantRange::subtract(const APInt &Val) const { assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width"); // If the set is empty or full, don't modify the endpoints. @@ -381,17 +330,10 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const { return ConstantRange(Lower - Val, Upper - Val); } -/// \brief Subtract the specified range from this range (aka relative complement -/// of the sets). ConstantRange ConstantRange::difference(const ConstantRange &CR) const { return intersectWith(CR.inverse()); } -/// intersectWith - Return the range that results from the intersection of this -/// range with another range. The resultant range is guaranteed to include all -/// elements contained in both input ranges, and to have the smallest possible -/// set size that does so. Because there may be two intersections with the -/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A). ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { assert(getBitWidth() == CR.getBitWidth() && "ConstantRange types don't agree!"); @@ -430,7 +372,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { if (CR.Upper.ule(Lower)) return ConstantRange(CR.Lower, Upper); - if (isSizeStrictlySmallerThanOf(CR)) + if (isSizeStrictlySmallerThan(CR)) return *this; return CR; } @@ -445,7 +387,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { if (CR.Upper.ult(Upper)) { if (CR.Lower.ult(Upper)) { - if (isSizeStrictlySmallerThanOf(CR)) + if (isSizeStrictlySmallerThan(CR)) return *this; return CR; } @@ -461,18 +403,11 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { return ConstantRange(CR.Lower, Upper); } - if (isSizeStrictlySmallerThanOf(CR)) + if (isSizeStrictlySmallerThan(CR)) return *this; return CR; } - -/// unionWith - Return the range that results from the union of this range with -/// another range. The resultant range is guaranteed to include the elements of -/// both sets, but may contain more. For example, [3, 9) union [12,15) is -/// [3, 15), which includes 9, 10, and 11, which were not included in either -/// set before. -/// ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { assert(getBitWidth() == CR.getBitWidth() && "ConstantRange types don't agree!"); @@ -491,16 +426,13 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { return ConstantRange(CR.Lower, Upper); } - APInt L = Lower, U = Upper; - if (CR.Lower.ult(L)) - L = CR.Lower; - if ((CR.Upper - 1).ugt(U - 1)) - U = CR.Upper; + APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower; + APInt U = (CR.Upper - 1).ugt(Upper - 1) ? CR.Upper : Upper; - if (L == 0 && U == 0) + if (L.isNullValue() && U.isNullValue()) return ConstantRange(getBitWidth()); - return ConstantRange(L, U); + return ConstantRange(std::move(L), std::move(U)); } if (!CR.isWrappedSet()) { @@ -541,13 +473,10 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper)) return ConstantRange(getBitWidth()); - APInt L = Lower, U = Upper; - if (CR.Upper.ugt(U)) - U = CR.Upper; - if (CR.Lower.ult(L)) - L = CR.Lower; + APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower; + APInt U = CR.Upper.ugt(Upper) ? CR.Upper : Upper; - return ConstantRange(L, U); + return ConstantRange(std::move(L), std::move(U)); } ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, @@ -574,14 +503,14 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, auto BW = getBitWidth(); APInt Min = APInt::getMinValue(BW).zextOrSelf(ResultBitWidth); APInt Max = APInt::getMaxValue(BW).zextOrSelf(ResultBitWidth); - return ConstantRange(Min, Max); + return ConstantRange(std::move(Min), std::move(Max)); } case Instruction::SIToFP: { // TODO: use input range if available auto BW = getBitWidth(); APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(ResultBitWidth); APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(ResultBitWidth); - return ConstantRange(SMin, SMax); + return ConstantRange(std::move(SMin), std::move(SMax)); } case Instruction::FPTrunc: case Instruction::FPExt: @@ -593,10 +522,6 @@ ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, }; } -/// zeroExtend - Return a new range in the specified integer type, which must -/// be strictly larger than the current type. The returned range will -/// correspond to the possible range of values as if the source range had been -/// zero extended. ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false); @@ -607,16 +532,13 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { APInt LowerExt(DstTySize, 0); if (!Upper) // special case: [X, 0) -- not really wrapping around LowerExt = Lower.zext(DstTySize); - return ConstantRange(LowerExt, APInt::getOneBitSet(DstTySize, SrcTySize)); + return ConstantRange(std::move(LowerExt), + APInt::getOneBitSet(DstTySize, SrcTySize)); } return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize)); } -/// signExtend - Return a new range in the specified integer type, which must -/// be strictly larger than the current type. The returned range will -/// correspond to the possible range of values as if the source range had been -/// sign extended. ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false); @@ -635,10 +557,6 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize)); } -/// truncate - Return a new range in the specified integer type, which must be -/// strictly smaller than the current type. The returned range will -/// correspond to the possible range of values as if the source range had been -/// truncated to the specified type. ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { assert(getBitWidth() > DstTySize && "Not a value truncation"); if (isEmptySet()) @@ -646,10 +564,6 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { if (isFullSet()) return ConstantRange(DstTySize, /*isFullSet=*/true); - APInt MaxValue = APInt::getMaxValue(DstTySize).zext(getBitWidth()); - APInt MaxBitValue(getBitWidth(), 0); - MaxBitValue.setBit(DstTySize); - APInt LowerDiv(Lower), UpperDiv(Upper); ConstantRange Union(DstTySize, /*isFullSet=*/false); @@ -657,41 +571,46 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { // We use the non-wrapped set code to analyze the [Lower, MaxValue) part, and // then we do the union with [MaxValue, Upper) if (isWrappedSet()) { - // If Upper is greater than Max Value, it covers the whole truncated range. - if (Upper.uge(MaxValue)) + // If Upper is greater than or equal to MaxValue(DstTy), it covers the whole + // truncated range. + if (Upper.getActiveBits() > DstTySize || + Upper.countTrailingOnes() == DstTySize) return ConstantRange(DstTySize, /*isFullSet=*/true); Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize)); - UpperDiv = APInt::getMaxValue(getBitWidth()); + UpperDiv.setAllBits(); // Union covers the MaxValue case, so return if the remaining range is just - // MaxValue. + // MaxValue(DstTy). if (LowerDiv == UpperDiv) return Union; } // Chop off the most significant bits that are past the destination bitwidth. - if (LowerDiv.uge(MaxValue)) { - APInt Div(getBitWidth(), 0); - APInt::udivrem(LowerDiv, MaxBitValue, Div, LowerDiv); - UpperDiv = UpperDiv - MaxBitValue * Div; + if (LowerDiv.getActiveBits() > DstTySize) { + // Mask to just the signficant bits and subtract from LowerDiv/UpperDiv. + APInt Adjust = LowerDiv & APInt::getBitsSetFrom(getBitWidth(), DstTySize); + LowerDiv -= Adjust; + UpperDiv -= Adjust; } - if (UpperDiv.ule(MaxValue)) + unsigned UpperDivWidth = UpperDiv.getActiveBits(); + if (UpperDivWidth <= DstTySize) return ConstantRange(LowerDiv.trunc(DstTySize), UpperDiv.trunc(DstTySize)).unionWith(Union); // The truncated value wraps around. Check if we can do better than fullset. - APInt UpperModulo = UpperDiv - MaxBitValue; - if (UpperModulo.ult(LowerDiv)) - return ConstantRange(LowerDiv.trunc(DstTySize), - UpperModulo.trunc(DstTySize)).unionWith(Union); + if (UpperDivWidth == DstTySize + 1) { + // Clear the MSB so that UpperDiv wraps around. + UpperDiv.clearBit(DstTySize); + if (UpperDiv.ult(LowerDiv)) + return ConstantRange(LowerDiv.trunc(DstTySize), + UpperDiv.trunc(DstTySize)).unionWith(Union); + } return ConstantRange(DstTySize, /*isFullSet=*/true); } -/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The -/// value is zero extended, truncated, or left alone to make it that width. ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const { unsigned SrcTySize = getBitWidth(); if (SrcTySize > DstTySize) @@ -701,8 +620,6 @@ ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const { return *this; } -/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The -/// value is sign extended, truncated, or left alone to make it that width. ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const { unsigned SrcTySize = getBitWidth(); if (SrcTySize > DstTySize) @@ -760,9 +677,9 @@ ConstantRange::add(const ConstantRange &Other) const { if (NewLower == NewUpper) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - ConstantRange X = ConstantRange(NewLower, NewUpper); - if (X.isSizeStrictlySmallerThanOf(*this) || - X.isSizeStrictlySmallerThanOf(Other)) + ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper)); + if (X.isSizeStrictlySmallerThan(*this) || + X.isSizeStrictlySmallerThan(Other)) // We've wrapped, therefore, full set. return ConstantRange(getBitWidth(), /*isFullSet=*/true); return X; @@ -793,9 +710,9 @@ ConstantRange::sub(const ConstantRange &Other) const { if (NewLower == NewUpper) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - ConstantRange X = ConstantRange(NewLower, NewUpper); - if (X.isSizeStrictlySmallerThanOf(*this) || - X.isSizeStrictlySmallerThanOf(Other)) + ConstantRange X = ConstantRange(std::move(NewLower), std::move(NewUpper)); + if (X.isSizeStrictlySmallerThan(*this) || + X.isSizeStrictlySmallerThan(Other)) // We've wrapped, therefore, full set. return ConstantRange(getBitWidth(), /*isFullSet=*/true); return X; @@ -831,7 +748,8 @@ ConstantRange::multiply(const ConstantRange &Other) const { // from one positive number to another which is as good as we can generate. // In this case, skip the extra work of generating signed ranges which aren't // going to be better than this range. - if (!UR.isWrappedSet() && UR.getLower().isNonNegative()) + if (!UR.isWrappedSet() && + (UR.getUpper().isNonNegative() || UR.getUpper().isMinSignedValue())) return UR; // Now the signed range. Because we could be dealing with negative numbers @@ -851,7 +769,7 @@ ConstantRange::multiply(const ConstantRange &Other) const { ConstantRange Result_sext(std::min(L, Compare), std::max(L, Compare) + 1); ConstantRange SR = Result_sext.truncate(getBitWidth()); - return UR.isSizeStrictlySmallerThanOf(SR) ? UR : SR; + return UR.isSizeStrictlySmallerThan(SR) ? UR : SR; } ConstantRange @@ -864,7 +782,7 @@ ConstantRange::smax(const ConstantRange &Other) const { APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange @@ -877,7 +795,7 @@ ConstantRange::umax(const ConstantRange &Other) const { APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange @@ -890,7 +808,7 @@ ConstantRange::smin(const ConstantRange &Other) const { APInt NewU = APIntOps::smin(getSignedMax(), Other.getSignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange @@ -903,12 +821,12 @@ ConstantRange::umin(const ConstantRange &Other) const { APInt NewU = APIntOps::umin(getUnsignedMax(), Other.getUnsignedMax()) + 1; if (NewU == NewL) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(NewL, NewU); + return ConstantRange(std::move(NewL), std::move(NewU)); } ConstantRange ConstantRange::udiv(const ConstantRange &RHS) const { - if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0) + if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); if (RHS.isFullSet()) return ConstantRange(getBitWidth(), /*isFullSet=*/true); @@ -916,13 +834,13 @@ ConstantRange::udiv(const ConstantRange &RHS) const { APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax()); APInt RHS_umin = RHS.getUnsignedMin(); - if (RHS_umin == 0) { + if (RHS_umin.isNullValue()) { // We want the lowest value in RHS excluding zero. Usually that would be 1 // except for a range in the form of [X, 1) in which case it would be X. if (RHS.getUpper() == 1) RHS_umin = RHS.getLower(); else - RHS_umin = APInt(getBitWidth(), 1); + RHS_umin = 1; } APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1; @@ -932,7 +850,7 @@ ConstantRange::udiv(const ConstantRange &RHS) const { if (Lower == Upper) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(Lower, Upper); + return ConstantRange(std::move(Lower), std::move(Upper)); } ConstantRange @@ -945,7 +863,7 @@ ConstantRange::binaryAnd(const ConstantRange &Other) const { APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()); if (umin.isAllOnesValue()) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1); + return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(umin) + 1); } ConstantRange @@ -956,9 +874,9 @@ ConstantRange::binaryOr(const ConstantRange &Other) const { // TODO: replace this with something less conservative APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); - if (umax.isMinValue()) + if (umax.isNullValue()) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(umax, APInt::getNullValue(getBitWidth())); + return ConstantRange(std::move(umax), APInt::getNullValue(getBitWidth())); } ConstantRange @@ -966,29 +884,33 @@ ConstantRange::shl(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); - APInt min = getUnsignedMin().shl(Other.getUnsignedMin()); - APInt max = getUnsignedMax().shl(Other.getUnsignedMax()); + APInt max = getUnsignedMax(); + APInt Other_umax = Other.getUnsignedMax(); - // there's no overflow! - APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros()); - if (Zeros.ugt(Other.getUnsignedMax())) - return ConstantRange(min, max + 1); + // there's overflow! + if (Other_umax.uge(max.countLeadingZeros())) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); // FIXME: implement the other tricky cases - return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt min = getUnsignedMin(); + min <<= Other.getUnsignedMin(); + max <<= Other_umax; + + return ConstantRange(std::move(min), std::move(max) + 1); } ConstantRange ConstantRange::lshr(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); - - APInt max = getUnsignedMax().lshr(Other.getUnsignedMin()); + + APInt max = getUnsignedMax().lshr(Other.getUnsignedMin()) + 1; APInt min = getUnsignedMin().lshr(Other.getUnsignedMax()); - if (min == max + 1) + if (min == max) return ConstantRange(getBitWidth(), /*isFullSet=*/true); - return ConstantRange(min, max + 1); + return ConstantRange(std::move(min), std::move(max)); } ConstantRange ConstantRange::inverse() const { @@ -999,8 +921,6 @@ ConstantRange ConstantRange::inverse() const { return ConstantRange(Upper, Lower); } -/// print - Print out the bounds to a stream... -/// void ConstantRange::print(raw_ostream &OS) const { if (isFullSet()) OS << "full-set"; @@ -1011,8 +931,6 @@ void ConstantRange::print(raw_ostream &OS) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// dump - Allow printing from a debugger easily... -/// LLVM_DUMP_METHOD void ConstantRange::dump() const { print(dbgs()); } diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index c5f93c9f4db018aeba731798482a32a746cfb775..27150a89d9b21843f2d3e128ac7456fb321d499b 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -30,17 +30,13 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include -#include + using namespace llvm; //===----------------------------------------------------------------------===// // Constant Class //===----------------------------------------------------------------------===// -void Constant::anchor() { } - -void ConstantData::anchor() {} - bool Constant::isNegativeZeroValue() const { // Floating point values have an explicit -0.0 value. if (const ConstantFP *CFP = dyn_cast(this)) @@ -131,7 +127,7 @@ bool Constant::isOneValue() const { // Check for FP which are bitcasted from 1 integers if (const ConstantFP *CFP = dyn_cast(this)) - return CFP->getValueAPF().bitcastToAPInt() == 1; + return CFP->getValueAPF().bitcastToAPInt().isOneValue(); // Check for constant vectors which are splats of 1 values. if (const ConstantVector *CV = dyn_cast(this)) @@ -496,8 +492,6 @@ void Constant::removeDeadConstantUsers() const { // ConstantInt //===----------------------------------------------------------------------===// -void ConstantInt::anchor() { } - ConstantInt::ConstantInt(IntegerType *Ty, const APInt &V) : ConstantData(Ty, ConstantIntVal), Val(V) { assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type"); @@ -518,27 +512,19 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { } Constant *ConstantInt::getTrue(Type *Ty) { - VectorType *VTy = dyn_cast(Ty); - if (!VTy) { - assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1."); - return ConstantInt::getTrue(Ty->getContext()); - } - assert(VTy->getElementType()->isIntegerTy(1) && - "True must be vector of i1 or i1."); - return ConstantVector::getSplat(VTy->getNumElements(), - ConstantInt::getTrue(Ty->getContext())); + assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext()); + if (auto *VTy = dyn_cast(Ty)) + return ConstantVector::getSplat(VTy->getNumElements(), TrueC); + return TrueC; } Constant *ConstantInt::getFalse(Type *Ty) { - VectorType *VTy = dyn_cast(Ty); - if (!VTy) { - assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1."); - return ConstantInt::getFalse(Ty->getContext()); - } - assert(VTy->getElementType()->isIntegerTy(1) && - "False must be vector of i1 or i1."); - return ConstantVector::getSplat(VTy->getNumElements(), - ConstantInt::getFalse(Ty->getContext())); + assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + ConstantInt *FalseC = ConstantInt::getFalse(Ty->getContext()); + if (auto *VTy = dyn_cast(Ty)) + return ConstantVector::getSplat(VTy->getNumElements(), FalseC); + return FalseC; } // Get a ConstantInt from an APInt. @@ -618,8 +604,6 @@ static const fltSemantics *TypeToFloatSemantics(Type *Ty) { return &APFloat::PPCDoubleDouble(); } -void ConstantFP::anchor() { } - Constant *ConstantFP::get(Type *Ty, double V) { LLVMContext &Context = Ty->getContext(); @@ -974,16 +958,6 @@ Constant *ConstantStruct::get(StructType *ST, ArrayRef V) { return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V); } -Constant *ConstantStruct::get(StructType *T, ...) { - va_list ap; - SmallVector Values; - va_start(ap, T); - while (Constant *Val = va_arg(ap, llvm::Constant*)) - Values.push_back(Val); - va_end(ap); - return get(T, Values); -} - ConstantVector::ConstantVector(VectorType *T, ArrayRef V) : ConstantAggregate(T, ConstantVectorVal, V) { assert(V.size() == T->getNumElements() && @@ -1183,21 +1157,14 @@ bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) { unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1; - if (NumBits >= 64) - return true; // always true, has to fit in largest type - uint64_t Max = (1ll << NumBits) - 1; - return Val <= Max; + return isUIntN(NumBits, Val); } bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) { unsigned NumBits = Ty->getIntegerBitWidth(); if (Ty->isIntegerTy(1)) return Val == 0 || Val == 1 || Val == -1; - if (NumBits >= 64) - return true; // always true, has to fit in largest type - int64_t Min = -(1ll << (NumBits-1)); - int64_t Max = (1ll << (NumBits-1)) - 1; - return (Val >= Min && Val <= Max); + return isIntN(NumBits, Val); } bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) { @@ -1818,8 +1785,7 @@ Constant *ConstantExpr::getSizeOf(Type* Ty) { Constant *ConstantExpr::getAlignOf(Type* Ty) { // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1 // Note that a non-inbounds gep is used, as null isn't within any object. - Type *AligningTy = - StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, nullptr); + Type *AligningTy = StructType::get(Type::getInt1Ty(Ty->getContext()), Ty); Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo(0)); Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0); Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); @@ -2285,9 +2251,6 @@ Type *GetElementPtrConstantExpr::getResultElementType() const { //===----------------------------------------------------------------------===// // ConstantData* implementations -void ConstantDataArray::anchor() {} -void ConstantDataVector::anchor() {} - Type *ConstantDataSequential::getElementType() const { return getType()->getElementType(); } @@ -2646,8 +2609,8 @@ Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); } -bool ConstantDataSequential::isString() const { - return isa(getType()) && getElementType()->isIntegerTy(8); +bool ConstantDataSequential::isString(unsigned CharSize) const { + return isa(getType()) && getElementType()->isIntegerTy(CharSize); } bool ConstantDataSequential::isCString() const { diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h index eda751d8af4ab182560c31d400a422f796df3866..6585304e7674b14ee7574461b54bbef41287242f 100644 --- a/lib/IR/ConstantsContext.h +++ b/lib/IR/ConstantsContext.h @@ -22,6 +22,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" @@ -43,8 +44,6 @@ namespace llvm { /// UnaryConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement unary constant exprs. class UnaryConstantExpr : public ConstantExpr { - void anchor() override; - public: UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty) : ConstantExpr(Ty, Opcode, &Op<0>(), 1) { @@ -56,16 +55,12 @@ public: return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; /// BinaryConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement binary constant exprs. class BinaryConstantExpr : public ConstantExpr { - void anchor() override; - public: BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags) @@ -80,8 +75,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -89,8 +82,6 @@ public: /// SelectConstantExpr - This class is private to Constants.cpp, and is used /// behind the scenes to implement select constant exprs. class SelectConstantExpr : public ConstantExpr { - void anchor() override; - public: SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3) : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) { @@ -104,8 +95,6 @@ public: return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -114,8 +103,6 @@ public: /// Constants.cpp, and is used behind the scenes to implement /// extractelement constant exprs. class ExtractElementConstantExpr : public ConstantExpr { - void anchor() override; - public: ExtractElementConstantExpr(Constant *C1, Constant *C2) : ConstantExpr(cast(C1->getType())->getElementType(), @@ -129,8 +116,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -139,8 +124,6 @@ public: /// Constants.cpp, and is used behind the scenes to implement /// insertelement constant exprs. class InsertElementConstantExpr : public ConstantExpr { - void anchor() override; - public: InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3) : ConstantExpr(C1->getType(), Instruction::InsertElement, @@ -155,8 +138,6 @@ public: return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -165,8 +146,6 @@ public: /// Constants.cpp, and is used behind the scenes to implement /// shufflevector constant exprs. class ShuffleVectorConstantExpr : public ConstantExpr { - void anchor() override; - public: ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3) : ConstantExpr(VectorType::get( @@ -184,8 +163,6 @@ public: return User::operator new(s, 3); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -194,8 +171,6 @@ public: /// Constants.cpp, and is used behind the scenes to implement /// extractvalue constant exprs. class ExtractValueConstantExpr : public ConstantExpr { - void anchor() override; - public: ExtractValueConstantExpr(Constant *Agg, ArrayRef IdxList, Type *DestTy) @@ -209,8 +184,6 @@ public: return User::operator new(s, 1); } - void *operator new(size_t, unsigned) = delete; - /// Indices - These identify which value to extract. const SmallVector Indices; @@ -229,8 +202,6 @@ public: /// Constants.cpp, and is used behind the scenes to implement /// insertvalue constant exprs. class InsertValueConstantExpr : public ConstantExpr { - void anchor() override; - public: InsertValueConstantExpr(Constant *Agg, Constant *Val, ArrayRef IdxList, Type *DestTy) @@ -245,8 +216,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Indices - These identify the position for the insertion. const SmallVector Indices; @@ -270,8 +239,6 @@ class GetElementPtrConstantExpr : public ConstantExpr { GetElementPtrConstantExpr(Type *SrcElementTy, Constant *C, ArrayRef IdxList, Type *DestTy); - void anchor() override; - public: static GetElementPtrConstantExpr *Create(Type *SrcElementTy, Constant *C, ArrayRef IdxList, @@ -300,8 +267,6 @@ public: // behind the scenes to implement ICmp and FCmp constant expressions. This is // needed in order to store the predicate value for these instructions. class CompareConstantExpr : public ConstantExpr { - void anchor() override; - public: unsigned short predicate; CompareConstantExpr(Type *ty, Instruction::OtherOps opc, @@ -316,8 +281,6 @@ public: return User::operator new(s, 2); } - void *operator new(size_t, unsigned) = delete; - /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -387,31 +350,34 @@ struct ConstantExprKeyType; template struct ConstantInfo; template <> struct ConstantInfo { - typedef ConstantExprKeyType ValType; - typedef Type TypeClass; + using ValType = ConstantExprKeyType; + using TypeClass = Type; }; template <> struct ConstantInfo { - typedef InlineAsmKeyType ValType; - typedef PointerType TypeClass; + using ValType = InlineAsmKeyType; + using TypeClass = PointerType; }; template <> struct ConstantInfo { - typedef ConstantAggrKeyType ValType; - typedef ArrayType TypeClass; + using ValType = ConstantAggrKeyType; + using TypeClass = ArrayType; }; template <> struct ConstantInfo { - typedef ConstantAggrKeyType ValType; - typedef StructType TypeClass; + using ValType = ConstantAggrKeyType; + using TypeClass = StructType; }; template <> struct ConstantInfo { - typedef ConstantAggrKeyType ValType; - typedef VectorType TypeClass; + using ValType = ConstantAggrKeyType; + using TypeClass = VectorType; }; template struct ConstantAggrKeyType { ArrayRef Operands; + ConstantAggrKeyType(ArrayRef Operands) : Operands(Operands) {} + ConstantAggrKeyType(ArrayRef Operands, const ConstantClass *) : Operands(Operands) {} + ConstantAggrKeyType(const ConstantClass *C, SmallVectorImpl &Storage) { assert(Storage.empty() && "Expected empty storage"); @@ -437,7 +403,8 @@ template struct ConstantAggrKeyType { return hash_combine_range(Operands.begin(), Operands.end()); } - typedef typename ConstantInfo::TypeClass TypeClass; + using TypeClass = typename ConstantInfo::TypeClass; + ConstantClass *create(TypeClass *Ty) const { return new (Operands.size()) ConstantClass(Ty, Operands); } @@ -457,6 +424,7 @@ struct InlineAsmKeyType { : AsmString(AsmString), Constraints(Constraints), FTy(FTy), HasSideEffects(HasSideEffects), IsAlignStack(IsAlignStack), AsmDialect(AsmDialect) {} + InlineAsmKeyType(const InlineAsm *Asm, SmallVectorImpl &) : AsmString(Asm->getAsmString()), Constraints(Asm->getConstraintString()), FTy(Asm->getFunctionType()), HasSideEffects(Asm->hasSideEffects()), @@ -483,7 +451,8 @@ struct InlineAsmKeyType { AsmDialect, FTy); } - typedef ConstantInfo::TypeClass TypeClass; + using TypeClass = ConstantInfo::TypeClass; + InlineAsm *create(TypeClass *Ty) const { assert(PointerType::getUnqual(FTy) == Ty); return new InlineAsm(FTy, AsmString, Constraints, HasSideEffects, @@ -507,11 +476,13 @@ struct ConstantExprKeyType { : Opcode(Opcode), SubclassOptionalData(SubclassOptionalData), SubclassData(SubclassData), Ops(Ops), Indexes(Indexes), ExplicitTy(ExplicitTy) {} + ConstantExprKeyType(ArrayRef Operands, const ConstantExpr *CE) : Opcode(CE->getOpcode()), SubclassOptionalData(CE->getRawSubclassOptionalData()), SubclassData(CE->isCompare() ? CE->getPredicate() : 0), Ops(Operands), Indexes(CE->hasIndices() ? CE->getIndices() : ArrayRef()) {} + ConstantExprKeyType(const ConstantExpr *CE, SmallVectorImpl &Storage) : Opcode(CE->getOpcode()), @@ -553,7 +524,8 @@ struct ConstantExprKeyType { hash_combine_range(Indexes.begin(), Indexes.end())); } - typedef ConstantInfo::TypeClass TypeClass; + using TypeClass = ConstantInfo::TypeClass; + ConstantExpr *create(TypeClass *Ty) const { switch (Opcode) { default: @@ -594,16 +566,17 @@ struct ConstantExprKeyType { template class ConstantUniqueMap { public: - typedef typename ConstantInfo::ValType ValType; - typedef typename ConstantInfo::TypeClass TypeClass; - typedef std::pair LookupKey; + using ValType = typename ConstantInfo::ValType; + using TypeClass = typename ConstantInfo::TypeClass; + using LookupKey = std::pair; /// Key and hash together, so that we compute the hash only once and reuse it. - typedef std::pair LookupKeyHashed; + using LookupKeyHashed = std::pair; private: struct MapInfo { - typedef DenseMapInfo ConstantClassInfo; + using ConstantClassInfo = DenseMapInfo; + static inline ConstantClass *getEmptyKey() { return ConstantClassInfo::getEmptyKey(); } @@ -643,7 +616,7 @@ private: }; public: - typedef DenseSet MapTy; + using MapTy = DenseSet; private: MapTy Map; diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index b5ed30b85c8a13a1e06730db83a0a247ce5438f5..4ff0261a7f08f062358f146b6809c5d1ccb753ed 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -568,6 +568,14 @@ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) { /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ +void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr) { + int i = 0; + for (auto *T : unwrap(Tp)->subtypes()) { + Arr[i] = wrap(T); + i++; + } +} + LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { return wrap(ArrayType::get(unwrap(ElementType), ElementCount)); } @@ -587,6 +595,10 @@ LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) { return wrap(cast(Ty)->getElementType()); } +unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp) { + return unwrap(Tp)->getNumContainedTypes(); +} + unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) { return unwrap(ArrayTy)->getNumElements(); } @@ -863,6 +875,19 @@ LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) { return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count); } +LLVMValueRef LLVMMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD) { + return wrap(MetadataAsValue::get(*unwrap(C), unwrap(MD))); +} + +LLVMMetadataRef LLVMValueAsMetadata(LLVMValueRef Val) { + auto *V = unwrap(Val); + if (auto *C = dyn_cast(V)) + return wrap(ConstantAsMetadata::get(C)); + if (auto *MAV = dyn_cast(V)) + return wrap(MAV->getMetadata()); + return wrap(ValueAsMetadata::get(V)); +} + const char *LLVMGetMDString(LLVMValueRef V, unsigned *Length) { if (const auto *MD = dyn_cast(unwrap(V))) if (const MDString *S = dyn_cast(MD->getMetadata())) { @@ -1883,13 +1908,8 @@ void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A, const char *V) { Function *Func = unwrap(Fn); - AttributeList::AttrIndex Idx = - AttributeList::AttrIndex(AttributeList::FunctionIndex); - AttrBuilder B; - - B.addAttribute(A, V); - AttributeList Set = AttributeList::get(Func->getContext(), Idx, B); - Func->addAttributes(Idx, Set); + Attribute Attr = Attribute::get(Func->getContext(), A, V); + Func->addAttribute(AttributeList::FunctionIndex, Attr); } /*--.. Operations on parameters ............................................--*/ @@ -1949,9 +1969,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { Argument *A = unwrap(Arg); - AttrBuilder B; - B.addAlignmentAttr(align); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::getWithAlignment(A->getContext(), align)); } /*--.. Operations on basic blocks ..........................................--*/ @@ -2158,11 +2176,8 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) { void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align) { CallSite Call = CallSite(unwrap(Instr)); - AttrBuilder B; - B.addAlignmentAttr(align); - Call.setAttributes(Call.getAttributes().addAttributes( - Call->getContext(), index, - AttributeList::get(Call->getContext(), index, B))); + Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), align); + Call.addAttribute(index, AlignAttr); } void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 9407c805b92a5b85ba52e1bc7cf1cc1f906da171..7e598b43ac16e803e3b08b70520d4c246f802ff4 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -12,14 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DIBuilder.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" -#include "LLVMContextImpl.h" using namespace llvm; using namespace llvm::dwarf; @@ -39,6 +39,21 @@ void DIBuilder::trackIfUnresolved(MDNode *N) { UnresolvedNodes.emplace_back(N); } +void DIBuilder::finalizeSubprogram(DISubprogram *SP) { + MDTuple *Temp = SP->getVariables().get(); + if (!Temp || !Temp->isTemporary()) + return; + + SmallVector Variables; + + auto PV = PreservedVariables.find(SP); + if (PV != PreservedVariables.end()) + Variables.append(PV->second.begin(), PV->second.end()); + + DINodeArray AV = getOrCreateArray(Variables); + TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); +} + void DIBuilder::finalize() { if (!CUNode) { assert(!AllowUnresolvedNodes && @@ -62,25 +77,11 @@ void DIBuilder::finalize() { CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues)); DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms); - auto resolveVariables = [&](DISubprogram *SP) { - MDTuple *Temp = SP->getVariables().get(); - if (!Temp) - return; - - SmallVector Variables; - - auto PV = PreservedVariables.find(SP); - if (PV != PreservedVariables.end()) - Variables.append(PV->second.begin(), PV->second.end()); - - DINodeArray AV = getOrCreateArray(Variables); - TempMDTuple(Temp)->replaceAllUsesWith(AV.get()); - }; for (auto *SP : SPs) - resolveVariables(SP); + finalizeSubprogram(SP); for (auto *N : RetainValues) if (auto *SP = dyn_cast(N)) - resolveVariables(SP); + finalizeSubprogram(SP); if (!AllGVs.empty()) CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs)); @@ -676,13 +677,14 @@ DISubprogram *DIBuilder::createFunction( DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags, - bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) { + bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl, + DITypeArray ThrownTypes) { auto *Node = getSubprogram( /* IsDistinct = */ isDefinition, VMContext, getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, 0, Flags, isOptimized, isDefinition ? CUNode : nullptr, TParams, Decl, - MDTuple::getTemporary(VMContext, None).release()); + MDTuple::getTemporary(VMContext, None).release(), ThrownTypes); if (isDefinition) AllSubprograms.push_back(Node); @@ -694,23 +696,22 @@ DISubprogram *DIBuilder::createTempFunctionFwdDecl( DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags, - bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) { + bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl, + DITypeArray ThrownTypes) { return DISubprogram::getTemporary( VMContext, getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr, 0, 0, 0, Flags, isOptimized, isDefinition ? CUNode : nullptr, - TParams, Decl, nullptr) + TParams, Decl, nullptr, ThrownTypes) .release(); } -DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name, - StringRef LinkageName, DIFile *F, - unsigned LineNo, DISubroutineType *Ty, - bool isLocalToUnit, bool isDefinition, - unsigned VK, unsigned VIndex, - int ThisAdjustment, DIType *VTableHolder, - DINode::DIFlags Flags, bool isOptimized, - DITemplateParameterArray TParams) { +DISubprogram *DIBuilder::createMethod( + DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F, + unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit, + bool isDefinition, unsigned VK, unsigned VIndex, int ThisAdjustment, + DIType *VTableHolder, DINode::DIFlags Flags, bool isOptimized, + DITemplateParameterArray TParams, DITypeArray ThrownTypes) { assert(getNonCompileUnitScope(Context) && "Methods should have both a Context and a context that isn't " "the compile unit."); @@ -719,7 +720,7 @@ DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name, /* IsDistinct = */ isDefinition, VMContext, cast(Context), Name, LinkageName, F, LineNo, Ty, isLocalToUnit, isDefinition, LineNo, VTableHolder, VK, VIndex, ThisAdjustment, Flags, isOptimized, - isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr); + isDefinition ? CUNode : nullptr, TParams, nullptr, nullptr, ThrownTypes); if (isDefinition) AllSubprograms.push_back(SP); @@ -728,10 +729,15 @@ DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name, } DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name, - DIFile *File, unsigned LineNo, bool ExportSymbols) { - return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name, - LineNo, ExportSymbols); + + // It is okay to *not* make anonymous top-level namespaces distinct, because + // all nodes that have an anonymous namespace as their parent scope are + // guaranteed to be unique and/or are linked to their containing + // DICompileUnit. This decision is an explicit tradeoff of link time versus + // memory usage versus code simplicity and may get revisited in the future. + return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), Name, + ExportSymbols); } DIModule *DIBuilder::createModule(DIScope *Scope, StringRef Name, diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index 6f90ce598568628478e4a669da60d103f9a644b5..5de281a95237655f68e0c37214421cc9eeab13aa 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -1,4 +1,4 @@ -//===-- DataLayout.cpp - Data size & alignment routines --------------------==// +//===- DataLayout.cpp - Data size & alignment routines ---------------------==// // // The LLVM Compiler Infrastructure // @@ -18,19 +18,25 @@ #include "llvm/IR/DataLayout.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/raw_ostream.h" #include +#include +#include #include +#include +#include + using namespace llvm; //===----------------------------------------------------------------------===// @@ -73,7 +79,6 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) { } } - /// getElementContainingOffset - Given a valid offset into the structure, /// return the structure index that contains it. unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const { @@ -302,7 +307,7 @@ void DataLayout::parseSpecifier(StringRef Desc) { case 'a': { AlignTypeEnum AlignType; switch (Specifier) { - default: + default: llvm_unreachable("Unexpected specifier!"); case 'i': AlignType = INTEGER_ALIGN; break; case 'v': AlignType = VECTOR_ALIGN; break; case 'f': AlignType = FLOAT_ALIGN; break; @@ -338,7 +343,7 @@ void DataLayout::parseSpecifier(StringRef Desc) { break; } case 'n': // Native integer types. - for (;;) { + while (true) { unsigned Width = getInt(Tok); if (Width == 0) report_fatal_error( @@ -393,7 +398,7 @@ void DataLayout::parseSpecifier(StringRef Desc) { } } -DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) { +DataLayout::DataLayout(const Module *M) { init(M); } @@ -522,7 +527,7 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType, namespace { class StructLayoutMap { - typedef DenseMap LayoutInfoTy; + using LayoutInfoTy = DenseMap; LayoutInfoTy LayoutInfo; public: @@ -577,7 +582,6 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { return L; } - unsigned DataLayout::getPointerABIAlignment(unsigned AS) const { PointersTy::const_iterator I = findPointerLowerBound(AS); if (I == Pointers.end() || I->AddressSpace != AS) { @@ -608,11 +612,8 @@ unsigned DataLayout::getPointerSize(unsigned AS) const { unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "This should only be called with a pointer or pointer vector type"); - - if (Ty->isPointerTy()) - return getTypeSizeInBits(Ty); - - return getTypeSizeInBits(Ty->getScalarType()); + Ty = Ty->getScalarType(); + return getPointerSizeInBits(cast(Ty)->getAddressSpace()); } /*! @@ -624,7 +625,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { == false) for the requested type \a Ty. */ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { - int AlignType = -1; + AlignTypeEnum AlignType; assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); switch (Ty->getTypeID()) { @@ -673,8 +674,7 @@ unsigned DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { llvm_unreachable("Bad type for getAlignment!!!"); } - return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty), - abi_or_pref, Ty); + return getAlignmentInfo(AlignType, getTypeSizeInBits(Ty), abi_or_pref, Ty); } unsigned DataLayout::getABITypeAlignment(Type *Ty) const { @@ -782,4 +782,3 @@ unsigned DataLayout::getPreferredAlignment(const GlobalVariable *GV) const { unsigned DataLayout::getPreferredAlignmentLog(const GlobalVariable *GV) const { return Log2_32(getPreferredAlignment(GV)); } - diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index c5d39c5443049d0edf70a27349bf438db64bc635..56cec57a4d070944d3c499b2d22f3375ef9d73e5 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -1,4 +1,4 @@ -//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===// +//===- DebugInfo.cpp - Debug Information Helper Classes -------------------===// // // The LLVM Compiler Infrastructure // @@ -13,21 +13,28 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DebugInfo.h" -#include "LLVMContextImpl.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Casting.h" +#include +#include +#include + using namespace llvm; using namespace llvm::dwarf; @@ -249,7 +256,7 @@ bool DebugInfoFinder::addScope(DIScope *Scope) { return true; } -static llvm::MDNode *stripDebugLocFromLoopID(llvm::MDNode *N) { +static MDNode *stripDebugLocFromLoopID(MDNode *N) { assert(N->op_begin() != N->op_end() && "Missing self reference?"); // if there is no debug location, we do not have to rewrite this MDNode. @@ -288,7 +295,7 @@ bool llvm::stripDebugInfo(Function &F) { F.setSubprogram(nullptr); } - llvm::DenseMap LoopIDsMap; + DenseMap LoopIDsMap; for (BasicBlock &BB : F) { for (auto II = BB.begin(), End = BB.end(); II != End;) { Instruction &I = *II++; // We may delete the instruction, increment now. @@ -525,7 +532,7 @@ private: void traverse(MDNode *); }; -} // Anonymous namespace. +} // end anonymous namespace void DebugTypeInfoRemoval::traverse(MDNode *N) { if (!N || Replacements.count(N)) @@ -590,7 +597,7 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) { GV.eraseMetadata(LLVMContext::MD_dbg); DebugTypeInfoRemoval Mapper(M.getContext()); - auto remap = [&](llvm::MDNode *Node) -> llvm::MDNode * { + auto remap = [&](MDNode *Node) -> MDNode * { if (!Node) return nullptr; Mapper.traverseAndRemap(Node); diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index d14c6018d40990de7fb21ad3286beb28f21f82b3..0bf68b4c53bbf2a344e35cc6685f76505d99a4e1 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -15,6 +15,7 @@ #include "LLVMContextImpl.h" #include "MetadataImpl.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Function.h" using namespace llvm; @@ -214,6 +215,10 @@ void GenericDINode::recalculateHash() { #define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS) \ return storeImpl(new (array_lengthof(OPS)) CLASS(Context, Storage, OPS), \ Storage, Context.pImpl->CLASS##s) +#define DEFINE_GETIMPL_STORE_N(CLASS, ARGS, OPS, NUM_OPS) \ + return storeImpl(new (NUM_OPS) \ + CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS), \ + Storage, Context.pImpl->CLASS##s) DISubrange *DISubrange::getImpl(LLVMContext &Context, int64_t Count, int64_t Lo, StorageType Storage, bool ShouldCreate) { @@ -441,21 +446,30 @@ DISubprogram *DISubprogram::getImpl( Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit, Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables, - StorageType Storage, bool ShouldCreate) { + Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); assert(isCanonical(LinkageName) && "Expected canonical MDString"); DEFINE_GETIMPL_LOOKUP( - DISubprogram, - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, - ScopeLine, ContainingType, Virtuality, VirtualIndex, ThisAdjustment, - Flags, IsOptimized, Unit, TemplateParams, Declaration, Variables)); - Metadata *Ops[] = {File, Scope, Name, Name, - LinkageName, Type, ContainingType, Unit, - TemplateParams, Declaration, Variables}; - DEFINE_GETIMPL_STORE(DISubprogram, (Line, ScopeLine, Virtuality, VirtualIndex, - ThisAdjustment, Flags, IsLocalToUnit, - IsDefinition, IsOptimized), - Ops); + DISubprogram, (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, + IsDefinition, ScopeLine, ContainingType, Virtuality, + VirtualIndex, ThisAdjustment, Flags, IsOptimized, Unit, + TemplateParams, Declaration, Variables, ThrownTypes)); + SmallVector Ops = { + File, Scope, Name, LinkageName, Type, Unit, + Declaration, Variables, ContainingType, TemplateParams, ThrownTypes}; + if (!ThrownTypes) { + Ops.pop_back(); + if (!TemplateParams) { + Ops.pop_back(); + if (!ContainingType) + Ops.pop_back(); + } + } + DEFINE_GETIMPL_STORE_N(DISubprogram, + (Line, ScopeLine, Virtuality, VirtualIndex, + ThisAdjustment, Flags, IsLocalToUnit, IsDefinition, + IsOptimized), + Ops, Ops.size()); } bool DISubprogram::describes(const Function *F) const { @@ -493,13 +507,13 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context, } DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope, - Metadata *File, MDString *Name, unsigned Line, - bool ExportSymbols, StorageType Storage, - bool ShouldCreate) { + MDString *Name, bool ExportSymbols, + StorageType Storage, bool ShouldCreate) { assert(isCanonical(Name) && "Expected canonical MDString"); - DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, File, Name, Line, ExportSymbols)); - Metadata *Ops[] = {File, Scope, Name}; - DEFINE_GETIMPL_STORE(DINamespace, (Line, ExportSymbols), Ops); + DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, Name, ExportSymbols)); + // The nullptr is for DIScope's File operand. This should be refactored. + Metadata *Ops[] = {nullptr, Scope, Name}; + DEFINE_GETIMPL_STORE(DINamespace, (ExportSymbols), Ops); } DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope, @@ -584,8 +598,7 @@ unsigned DIExpression::ExprOperand::getSize() const { case dwarf::DW_OP_LLVM_fragment: return 3; case dwarf::DW_OP_constu: - case dwarf::DW_OP_plus: - case dwarf::DW_OP_minus: + case dwarf::DW_OP_plus_uconst: return 2; default: return 1; @@ -627,6 +640,7 @@ bool DIExpression::isValid() const { break; } case dwarf::DW_OP_constu: + case dwarf::DW_OP_plus_uconst: case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: case dwarf::DW_OP_deref: @@ -647,6 +661,69 @@ DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) { return None; } +void DIExpression::appendOffset(SmallVectorImpl &Ops, + int64_t Offset) { + if (Offset > 0) { + Ops.push_back(dwarf::DW_OP_plus_uconst); + Ops.push_back(Offset); + } else if (Offset < 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(-Offset); + Ops.push_back(dwarf::DW_OP_minus); + } +} + +bool DIExpression::extractIfOffset(int64_t &Offset) const { + if (getNumElements() == 0) { + Offset = 0; + return true; + } + + if (getNumElements() == 2 && Elements[0] == dwarf::DW_OP_plus_uconst) { + Offset = Elements[1]; + return true; + } + + if (getNumElements() == 3 && Elements[0] == dwarf::DW_OP_constu) { + if (Elements[2] == dwarf::DW_OP_plus) { + Offset = Elements[1]; + return true; + } + if (Elements[2] == dwarf::DW_OP_minus) { + Offset = -Elements[1]; + return true; + } + } + + return false; +} + +DIExpression *DIExpression::prepend(const DIExpression *Expr, bool Deref, + int64_t Offset, bool StackValue) { + SmallVector Ops; + appendOffset(Ops, Offset); + if (Deref) + Ops.push_back(dwarf::DW_OP_deref); + if (Expr) + for (auto Op : Expr->expr_ops()) { + // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. + if (StackValue) { + if (Op.getOp() == dwarf::DW_OP_stack_value) + StackValue = false; + else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { + Ops.push_back(dwarf::DW_OP_stack_value); + StackValue = false; + } + } + Ops.push_back(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + Ops.push_back(Op.getArg(I)); + } + if (StackValue) + Ops.push_back(dwarf::DW_OP_stack_value); + return DIExpression::get(Expr->getContext(), Ops); +} + bool DIExpression::isConstant() const { // Recognize DW_OP_constu C DW_OP_stack_value (DW_OP_LLVM_fragment Len Ofs)?. if (getNumElements() != 3 && getNumElements() != 6) diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp index f31074a7ad44251ee87fe9023cbb28ace2e5a5d6..6297395b4c00920ac563d5227552e37894f56de2 100644 --- a/lib/IR/DebugLoc.cpp +++ b/lib/IR/DebugLoc.cpp @@ -10,6 +10,7 @@ #include "llvm/IR/DebugLoc.h" #include "LLVMContextImpl.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IntrinsicInst.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -66,6 +67,38 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope, const_cast(InlinedAt)); } +DebugLoc DebugLoc::appendInlinedAt(DebugLoc DL, DILocation *InlinedAt, + LLVMContext &Ctx, + DenseMap &Cache, + bool ReplaceLast) { + SmallVector InlinedAtLocations; + DILocation *Last = InlinedAt; + DILocation *CurInlinedAt = DL; + + // Gather all the inlined-at nodes. + while (DILocation *IA = CurInlinedAt->getInlinedAt()) { + // Skip any we've already built nodes for. + if (auto *Found = Cache[IA]) { + Last = cast(Found); + break; + } + + if (ReplaceLast && !IA->getInlinedAt()) + break; + InlinedAtLocations.push_back(IA); + CurInlinedAt = IA; + } + + // Starting from the top, rebuild the nodes to point to the new inlined-at + // location (then rebuilding the rest of the chain behind it) and update the + // map of already-constructed inlined-at nodes. + for (const DILocation *MD : reverse(InlinedAtLocations)) + Cache[MD] = Last = DILocation::getDistinct( + Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); + + return Last; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void DebugLoc::dump() const { if (!Loc) diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 395b6158e0c8666f62ffd0a11f4c539d5f0f3767..5129d6b9b008e3d162cdc9a881200437d1e9fd59 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -13,19 +13,30 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DiagnosticInfo.h" -#include "LLVMContextImpl.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" #include +#include +#include #include using namespace llvm; @@ -53,6 +64,8 @@ struct PassRemarksOpt { } }; +} // end anonymous namespace + static PassRemarksOpt PassRemarksOptLoc; static PassRemarksOpt PassRemarksMissedOptLoc; static PassRemarksOpt PassRemarksAnalysisOptLoc; @@ -85,7 +98,6 @@ PassRemarksAnalysis( "the given regular expression"), cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired, cl::ZeroOrMore); -} int llvm::getNextAvailablePluginDiagnosticKind() { static std::atomic PluginKindID(DK_FirstPluginKind); @@ -97,8 +109,7 @@ const char *OptimizationRemarkAnalysis::AlwaysPrint = ""; DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I, const Twine &MsgStr, DiagnosticSeverity Severity) - : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(0), MsgStr(MsgStr), - Instr(&I) { + : DiagnosticInfo(DK_InlineAsm, Severity), MsgStr(MsgStr), Instr(&I) { if (const MDNode *SrcLoc = I.getMetadata("srcloc")) { if (SrcLoc->getNumOperands() != 0) if (const auto *CI = @@ -193,7 +204,7 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, const Value *V // Only include names that correspond to user variables. FIXME: we should use // debug info if available to get the name of the user variable. if (isa(V) || isa(V)) - Val = GlobalValue::getRealLinkageName(V->getName()); + Val = GlobalValue::dropLLVMManglingEscape(V->getName()); else if (isa(V)) { raw_string_ostream OS(Val); V->printAsOperand(OS, /*PrintType=*/false); diff --git a/lib/IR/DiagnosticPrinter.cpp b/lib/IR/DiagnosticPrinter.cpp index 659ff49d623f8fe7dc7c5fe698f34ccbe95e2853..ee2df9e24f939cbaea3d9ac9b9568d35bde61514 100644 --- a/lib/IR/DiagnosticPrinter.cpp +++ b/lib/IR/DiagnosticPrinter.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Twine.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp index 44948cc5831d785c0daaca63f5341233b2e79c70..37e735251fdfae54168d8e61f9775bbc0eacc74e 100644 --- a/lib/IR/Dominators.cpp +++ b/lib/IR/Dominators.cpp @@ -150,12 +150,6 @@ bool DominatorTree::dominates(const Instruction *Def, bool DominatorTree::dominates(const BasicBlockEdge &BBE, const BasicBlock *UseBB) const { - // Assert that we have a single edge. We could handle them by simply - // returning false, but since isSingleEdge is linear on the number of - // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge() && - "This function is not efficient in handling multiple edges"); - // If the BB the edge ends in doesn't dominate the use BB, then the // edge also doesn't. const BasicBlock *Start = BBE.getStart(); @@ -188,11 +182,17 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, // trivially dominates itself, so we only have to find if it dominates the // other predecessors. Since the only way out of X is via NormalDest, X can // only properly dominate a node if NormalDest dominates that node too. + int IsDuplicateEdge = 0; for (const_pred_iterator PI = pred_begin(End), E = pred_end(End); PI != E; ++PI) { const BasicBlock *BB = *PI; - if (BB == Start) + if (BB == Start) { + // If there are multiple edges between Start and End, by definition they + // can't dominate anything. + if (IsDuplicateEdge++) + return false; continue; + } if (!dominates(End, BB)) return false; @@ -201,12 +201,6 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, } bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const { - // Assert that we have a single edge. We could handle them by simply - // returning false, but since isSingleEdge is linear on the number of - // edges, the callers can normally handle them more efficiently. - assert(BBE.isSingleEdge() && - "This function is not efficient in handling multiple edges"); - Instruction *UserInst = cast(U.getUser()); // A PHI in the end of the edge is dominated by it. PHINode *PN = dyn_cast(UserInst); diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 3953a6e1352743d34bcf4e9617dfda5f43219ad8..85a019856c0177c22f4fe987e91006fbeb13a1af 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -1,4 +1,4 @@ -//===-- Function.cpp - Implement the Global object classes ----------------===// +//===- Function.cpp - Implement the Global object classes -----------------===// // // The LLVM Compiler Infrastructure // @@ -14,18 +14,48 @@ #include "llvm/IR/Function.h" #include "LLVMContextImpl.h" #include "SymbolTableListTraitsImpl.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/SymbolTableListTraits.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include +#include +#include +#include +#include + using namespace llvm; // Explicit instantiations of SymbolTableListTraits since some of the methods @@ -36,8 +66,6 @@ template class llvm::SymbolTableListTraits; // Argument Implementation //===----------------------------------------------------------------------===// -void Argument::anchor() { } - Argument::Argument(Type *Ty, const Twine &Name, Function *Par, unsigned ArgNo) : Value(Ty, Value::ArgumentVal), Parent(Par), ArgNo(ArgNo) { setName(Name); @@ -49,8 +77,7 @@ void Argument::setParent(Function *parent) { bool Argument::hasNonNullAttr() const { if (!getType()->isPointerTy()) return false; - if (getParent()->getAttributes(). - hasAttribute(getArgNo()+1, Attribute::NonNull)) + if (getParent()->hasParamAttribute(getArgNo(), Attribute::NonNull)) return true; else if (getDereferenceableBytes() > 0 && getType()->getPointerAddressSpace() == 0) @@ -64,13 +91,11 @@ bool Argument::hasByValAttr() const { } bool Argument::hasSwiftSelfAttr() const { - return getParent()->getAttributes(). - hasAttribute(getArgNo()+1, Attribute::SwiftSelf); + return getParent()->hasParamAttribute(getArgNo(), Attribute::SwiftSelf); } bool Argument::hasSwiftErrorAttr() const { - return getParent()->getAttributes(). - hasAttribute(getArgNo()+1, Attribute::SwiftError); + return getParent()->hasParamAttribute(getArgNo(), Attribute::SwiftError); } bool Argument::hasInAllocaAttr() const { @@ -81,26 +106,25 @@ bool Argument::hasInAllocaAttr() const { bool Argument::hasByValOrInAllocaAttr() const { if (!getType()->isPointerTy()) return false; AttributeList Attrs = getParent()->getAttributes(); - return Attrs.hasAttribute(getArgNo() + 1, Attribute::ByVal) || - Attrs.hasAttribute(getArgNo() + 1, Attribute::InAlloca); + return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) || + Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca); } unsigned Argument::getParamAlignment() const { assert(getType()->isPointerTy() && "Only pointers have alignments"); - return getParent()->getParamAlignment(getArgNo()+1); - + return getParent()->getParamAlignment(getArgNo()); } uint64_t Argument::getDereferenceableBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableBytes(getArgNo()+1); + return getParent()->getParamDereferenceableBytes(getArgNo()); } uint64_t Argument::getDereferenceableOrNullBytes() const { assert(getType()->isPointerTy() && "Only pointers have dereferenceable bytes"); - return getParent()->getDereferenceableOrNullBytes(getArgNo()+1); + return getParent()->getParamDereferenceableOrNullBytes(getArgNo()); } bool Argument::hasNestAttr() const { @@ -136,32 +160,31 @@ bool Argument::hasSExtAttr() const { } bool Argument::onlyReadsMemory() const { - return getParent()->getAttributes(). - hasAttribute(getArgNo()+1, Attribute::ReadOnly) || - getParent()->getAttributes(). - hasAttribute(getArgNo()+1, Attribute::ReadNone); + AttributeList Attrs = getParent()->getAttributes(); + return Attrs.hasParamAttribute(getArgNo(), Attribute::ReadOnly) || + Attrs.hasParamAttribute(getArgNo(), Attribute::ReadNone); } -void Argument::addAttr(AttributeList AS) { - assert(AS.getNumSlots() <= 1 && - "Trying to add more than one attribute set to an argument!"); - AttrBuilder B(AS, AS.getSlotIndex(0)); - getParent()->addAttributes( - getArgNo() + 1, - AttributeList::get(Parent->getContext(), getArgNo() + 1, B)); +void Argument::addAttrs(AttrBuilder &B) { + AttributeList AL = getParent()->getAttributes(); + AL = AL.addParamAttributes(Parent->getContext(), getArgNo(), B); + getParent()->setAttributes(AL); } -void Argument::removeAttr(AttributeList AS) { - assert(AS.getNumSlots() <= 1 && - "Trying to remove more than one attribute set from an argument!"); - AttrBuilder B(AS, AS.getSlotIndex(0)); - getParent()->removeAttributes( - getArgNo() + 1, - AttributeList::get(Parent->getContext(), getArgNo() + 1, B)); +void Argument::addAttr(Attribute::AttrKind Kind) { + getParent()->addParamAttr(getArgNo(), Kind); +} + +void Argument::addAttr(Attribute Attr) { + getParent()->addParamAttr(getArgNo(), Attr); +} + +void Argument::removeAttr(Attribute::AttrKind Kind) { + getParent()->removeParamAttr(getArgNo(), Kind); } bool Argument::hasAttribute(Attribute::AttrKind Kind) const { - return getParent()->hasAttribute(getArgNo() + 1, Kind); + return getParent()->hasParamAttribute(getArgNo(), Kind); } //===----------------------------------------------------------------------===// @@ -188,7 +211,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name, Module *ParentModule) : GlobalObject(Ty, Value::FunctionVal, OperandTraits::op_begin(this), 0, Linkage, name), - Arguments(nullptr), NumArgs(Ty->getNumParams()) { + NumArgs(Ty->getNumParams()) { assert(FunctionType::isValidReturnType(getReturnType()) && "invalid return type"); setGlobalObjectSubClassData(0); @@ -333,12 +356,30 @@ void Function::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } -void Function::addAttributes(unsigned i, AttributeList Attrs) { +void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) { AttributeList PAL = getAttributes(); PAL = PAL.addAttributes(getContext(), i, Attrs); setAttributes(PAL); } +void Function::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void Function::addParamAttr(unsigned ArgNo, Attribute Attr) { + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr); + setAttributes(PAL); +} + +void Function::addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) { + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttributes(getContext(), ArgNo, Attrs); + setAttributes(PAL); +} + void Function::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -351,24 +392,55 @@ void Function::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } -void Function::removeAttributes(unsigned i, AttributeList Attrs) { +void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttributes(getContext(), i, Attrs); setAttributes(PAL); } +void Function::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void Function::removeParamAttr(unsigned ArgNo, StringRef Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void Function::removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs); + setAttributes(PAL); +} + void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); setAttributes(PAL); } +void Function::addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes) { + AttributeList PAL = getAttributes(); + PAL = PAL.addDereferenceableParamAttr(getContext(), ArgNo, Bytes); + setAttributes(PAL); +} + void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes); setAttributes(PAL); } +void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo, + uint64_t Bytes) { + AttributeList PAL = getAttributes(); + PAL = PAL.addDereferenceableOrNullParamAttr(getContext(), ArgNo, Bytes); + setAttributes(PAL); +} + const std::string &Function::getGC() const { assert(hasGC() && "Function has no collector"); return getContext().getGC(*this); @@ -388,24 +460,20 @@ void Function::clearGC() { /// Copy all additional attributes (those not needed to create a Function) from /// the Function Src to this one. -void Function::copyAttributesFrom(const GlobalValue *Src) { +void Function::copyAttributesFrom(const Function *Src) { GlobalObject::copyAttributesFrom(Src); - const Function *SrcF = dyn_cast(Src); - if (!SrcF) - return; - - setCallingConv(SrcF->getCallingConv()); - setAttributes(SrcF->getAttributes()); - if (SrcF->hasGC()) - setGC(SrcF->getGC()); + setCallingConv(Src->getCallingConv()); + setAttributes(Src->getAttributes()); + if (Src->hasGC()) + setGC(Src->getGC()); else clearGC(); - if (SrcF->hasPersonalityFn()) - setPersonalityFn(SrcF->getPersonalityFn()); - if (SrcF->hasPrefixData()) - setPrefixData(SrcF->getPrefixData()); - if (SrcF->hasPrologueData()) - setPrologueData(SrcF->getPrologueData()); + if (Src->hasPersonalityFn()) + setPersonalityFn(Src->getPersonalityFn()); + if (Src->hasPrefixData()) + setPrefixData(Src->getPrefixData()); + if (Src->hasPrologueData()) + setPrologueData(Src->getPrologueData()); } /// Table of string intrinsic names indexed by enum value. @@ -488,10 +556,10 @@ void Function::recalculateIntrinsicID() { static std::string getMangledTypeStr(Type* Ty) { std::string Result; if (PointerType* PTyp = dyn_cast(Ty)) { - Result += "p" + llvm::utostr(PTyp->getAddressSpace()) + + Result += "p" + utostr(PTyp->getAddressSpace()) + getMangledTypeStr(PTyp->getElementType()); } else if (ArrayType* ATyp = dyn_cast(Ty)) { - Result += "a" + llvm::utostr(ATyp->getNumElements()) + + Result += "a" + utostr(ATyp->getNumElements()) + getMangledTypeStr(ATyp->getElementType()); } else if (StructType *STyp = dyn_cast(Ty)) { if (!STyp->isLiteral()) { @@ -536,7 +604,6 @@ std::string Intrinsic::getName(ID id, ArrayRef Tys) { return Result; } - /// IIT_Info - These are enumerators that describe the entries returned by the /// getIntrinsicInfoTableEntries function. /// @@ -579,18 +646,18 @@ enum IIT_Info { IIT_SAME_VEC_WIDTH_ARG = 31, IIT_PTR_TO_ARG = 32, IIT_PTR_TO_ELT = 33, - IIT_VEC_OF_PTRS_TO_ELT = 34, + IIT_VEC_OF_ANYPTRS_TO_ELT = 34, IIT_I128 = 35, IIT_V512 = 36, IIT_V1024 = 37 }; - static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, SmallVectorImpl &OutputTable) { + using namespace Intrinsic; + IIT_Info Info = IIT_Info(Infos[NextElt++]); unsigned StructElts = 2; - using namespace Intrinsic; switch (Info) { case IIT_Done: @@ -721,10 +788,11 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo)); return; } - case IIT_VEC_OF_PTRS_TO_ELT: { - unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); - OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt, - ArgInfo)); + case IIT_VEC_OF_ANYPTRS_TO_ELT: { + unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back( + IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo)); return; } case IIT_EMPTYSTRUCT: @@ -744,7 +812,6 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, llvm_unreachable("unhandled"); } - #define GET_INTRINSIC_GENERATOR_GLOBAL #include "llvm/IR/Intrinsics.gen" #undef GET_INTRINSIC_GENERATOR_GLOBAL @@ -782,10 +849,10 @@ void Intrinsic::getIntrinsicInfoTableEntries(ID id, DecodeIITType(NextElt, IITEntries, T); } - static Type *DecodeFixedType(ArrayRef &Infos, ArrayRef Tys, LLVMContext &Context) { using namespace Intrinsic; + IITDescriptor D = Infos.front(); Infos = Infos.slice(1); @@ -813,7 +880,6 @@ static Type *DecodeFixedType(ArrayRef &Infos, Elts[i] = DecodeFixedType(Infos, Tys, Context); return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements)); } - case IITDescriptor::Argument: return Tys[D.getArgumentNumber()]; case IITDescriptor::ExtendArgument: { @@ -855,21 +921,13 @@ static Type *DecodeFixedType(ArrayRef &Infos, Type *EltTy = VTy->getVectorElementType(); return PointerType::getUnqual(EltTy); } - case IITDescriptor::VecOfPtrsToElt: { - Type *Ty = Tys[D.getArgumentNumber()]; - VectorType *VTy = dyn_cast(Ty); - if (!VTy) - llvm_unreachable("Expected an argument of Vector Type"); - Type *EltTy = VTy->getVectorElementType(); - return VectorType::get(PointerType::getUnqual(EltTy), - VTy->getNumElements()); + case IITDescriptor::VecOfAnyPtrsToElt: + // Return the overloaded type (which determines the pointers address space) + return Tys[D.getOverloadArgNumber()]; } - } llvm_unreachable("unhandled"); } - - FunctionType *Intrinsic::getType(LLVMContext &Context, ID id, ArrayRef Tys) { SmallVector Table; @@ -1059,11 +1117,22 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef return (!ThisArgType || !ReferenceType || ThisArgType->getElementType() != ReferenceType->getElementType()); } - case IITDescriptor::VecOfPtrsToElt: { - if (D.getArgumentNumber() >= ArgTys.size()) + case IITDescriptor::VecOfAnyPtrsToElt: { + unsigned RefArgNumber = D.getRefArgNumber(); + + // This may only be used when referring to a previous argument. + if (RefArgNumber >= ArgTys.size()) return true; - VectorType * ReferenceType = - dyn_cast (ArgTys[D.getArgumentNumber()]); + + // Record the overloaded type + assert(D.getOverloadArgNumber() == ArgTys.size() && + "Table consistency error"); + ArgTys.push_back(Ty); + + // Verify the overloaded type "matches" the Ref type. + // i.e. Ty is a vector with the same width as Ref. + // Composed of pointers to the same element type as Ref. + VectorType *ReferenceType = dyn_cast(ArgTys[RefArgNumber]); VectorType *ThisArgVecTy = dyn_cast(Ty); if (!ThisArgVecTy || !ReferenceType || (ReferenceType->getVectorNumElements() != diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index ba92a91cc917bfefc69584b26340e728ca492f6d..d4b4552282252f37b00120fbe647435432c25a4e 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -589,8 +589,12 @@ FileInfo::openCoveragePath(StringRef CoveragePath) { /// print - Print source files with collected line count information. void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, StringRef GCNOFile, StringRef GCDAFile) { - for (const auto &LI : LineInfo) { - StringRef Filename = LI.first(); + SmallVector Filenames; + for (const auto &LI : LineInfo) + Filenames.push_back(LI.first()); + std::sort(Filenames.begin(), Filenames.end()); + + for (StringRef Filename : Filenames) { auto AllLines = LineConsumer(Filename); std::string CoveragePath = getCoveragePath(Filename, MainFilename); @@ -603,7 +607,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename, CovOS << " -: 0:Runs:" << RunCount << "\n"; CovOS << " -: 0:Programs:" << ProgramCount << "\n"; - const LineData &Line = LI.second; + const LineData &Line = LineInfo[Filename]; GCOVCoverage FileCoverage(Filename); for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty(); ++LineIndex) { diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index 5f338f58d9403677532cb22196e39a517937ab6f..afd4a36270a87d3d616b3352adb22a33764c223b 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -12,10 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "LLVMContextImpl.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Triple.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" @@ -24,7 +25,6 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "LLVMContextImpl.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -69,6 +69,30 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { setDLLStorageClass(Src->getDLLStorageClass()); } +void GlobalValue::removeFromParent() { + switch (getValueID()) { +#define HANDLE_GLOBAL_VALUE(NAME) \ + case Value::NAME##Val: \ + return static_cast(this)->removeFromParent(); +#include "llvm/IR/Value.def" + default: + break; + } + llvm_unreachable("not a global"); +} + +void GlobalValue::eraseFromParent() { + switch (getValueID()) { +#define HANDLE_GLOBAL_VALUE(NAME) \ + case Value::NAME##Val: \ + return static_cast(this)->eraseFromParent(); +#include "llvm/IR/Value.def" + default: + break; + } + llvm_unreachable("not a global"); +} + unsigned GlobalValue::getAlignment() const { if (auto *GA = dyn_cast(this)) { // In general we cannot compute this at the IR level, but we try. @@ -93,12 +117,10 @@ void GlobalObject::setAlignment(unsigned Align) { assert(getAlignment() == Align && "Alignment representation error!"); } -void GlobalObject::copyAttributesFrom(const GlobalValue *Src) { +void GlobalObject::copyAttributesFrom(const GlobalObject *Src) { GlobalValue::copyAttributesFrom(Src); - if (const auto *GV = dyn_cast(Src)) { - setAlignment(GV->getAlignment()); - setSection(GV->getSection()); - } + setAlignment(Src->getAlignment()); + setSection(Src->getSection()); } std::string GlobalValue::getGlobalIdentifier(StringRef Name, @@ -233,7 +255,7 @@ bool GlobalValue::canIncreaseAlignment() const { const GlobalObject *GlobalValue::getBaseObject() const { if (auto *GO = dyn_cast(this)) return GO; - if (auto *GA = dyn_cast(this)) + if (auto *GA = dyn_cast(this)) return GA->getBaseObject(); return nullptr; } @@ -271,6 +293,8 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) && + "invalid type for global variable"); setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && @@ -289,6 +313,8 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, InitVal != nullptr, Link, Name, AddressSpace), isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) && + "invalid type for global variable"); setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && @@ -333,12 +359,11 @@ void GlobalVariable::setInitializer(Constant *InitVal) { /// Copy all additional attributes (those not needed to create a GlobalVariable) /// from the GlobalVariable Src to this one. -void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { +void GlobalVariable::copyAttributesFrom(const GlobalVariable *Src) { GlobalObject::copyAttributesFrom(Src); - if (const GlobalVariable *SrcVar = dyn_cast(Src)) { - setThreadLocalMode(SrcVar->getThreadLocalMode()); - setExternallyInitialized(SrcVar->isExternallyInitialized()); - } + setThreadLocalMode(Src->getThreadLocalMode()); + setExternallyInitialized(Src->isExternallyInitialized()); + setAttributes(Src->getAttributes()); } void GlobalVariable::dropAllReferences() { diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index fd5ae71a2f3ccce10661be82532cdec31a17d6b3..b7fa07c6ffac74c06d8ffbd60545decf26ccb42b 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" @@ -134,6 +134,37 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, return CI; } +CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( + Value *Dst, Value *Src, Value *Size, uint32_t ElementSize, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { + Dst = getCastedInt8PtrValue(Dst); + Src = getCastedInt8PtrValue(Src); + + Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)}; + Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()}; + Module *M = BB->getParent()->getParent(); + Value *TheFn = Intrinsic::getDeclaration( + M, Intrinsic::memcpy_element_unordered_atomic, Tys); + + CallInst *CI = createCallHelper(TheFn, Ops, this); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + // Set the TBAA Struct info if present. + if (TBAAStructTag) + CI->setMetadata(LLVMContext::MD_tbaa_struct, TBAAStructTag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + return CI; +} + CallInst *IRBuilderBase:: CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, @@ -161,6 +192,94 @@ CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, return CI; } +static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID, + Value *Src) { + Module *M = Builder->GetInsertBlock()->getParent()->getParent(); + Value *Ops[] = {Src}; + Type *Tys[] = { Src->getType()->getVectorElementType(), Src->getType() }; + auto Decl = Intrinsic::getDeclaration(M, ID, Tys); + return createCallHelper(Decl, Ops, Builder); +} + +CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) { + Module *M = GetInsertBlock()->getParent()->getParent(); + Value *Ops[] = {Acc, Src}; + Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(), + Src->getType()}; + auto Decl = Intrinsic::getDeclaration( + M, Intrinsic::experimental_vector_reduce_fadd, Tys); + return createCallHelper(Decl, Ops, this); +} + +CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) { + Module *M = GetInsertBlock()->getParent()->getParent(); + Value *Ops[] = {Acc, Src}; + Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(), + Src->getType()}; + auto Decl = Intrinsic::getDeclaration( + M, Intrinsic::experimental_vector_reduce_fmul, Tys); + return createCallHelper(Decl, Ops, this); +} + +CallInst *IRBuilderBase::CreateAddReduce(Value *Src) { + return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_add, + Src); +} + +CallInst *IRBuilderBase::CreateMulReduce(Value *Src) { + return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_mul, + Src); +} + +CallInst *IRBuilderBase::CreateAndReduce(Value *Src) { + return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_and, + Src); +} + +CallInst *IRBuilderBase::CreateOrReduce(Value *Src) { + return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_or, + Src); +} + +CallInst *IRBuilderBase::CreateXorReduce(Value *Src) { + return getReductionIntrinsic(this, Intrinsic::experimental_vector_reduce_xor, + Src); +} + +CallInst *IRBuilderBase::CreateIntMaxReduce(Value *Src, bool IsSigned) { + auto ID = IsSigned ? Intrinsic::experimental_vector_reduce_smax + : Intrinsic::experimental_vector_reduce_umax; + return getReductionIntrinsic(this, ID, Src); +} + +CallInst *IRBuilderBase::CreateIntMinReduce(Value *Src, bool IsSigned) { + auto ID = IsSigned ? Intrinsic::experimental_vector_reduce_smin + : Intrinsic::experimental_vector_reduce_umin; + return getReductionIntrinsic(this, ID, Src); +} + +CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src, bool NoNaN) { + auto Rdx = getReductionIntrinsic( + this, Intrinsic::experimental_vector_reduce_fmax, Src); + if (NoNaN) { + FastMathFlags FMF; + FMF.setNoNaNs(); + Rdx->setFastMathFlags(FMF); + } + return Rdx; +} + +CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src, bool NoNaN) { + auto Rdx = getReductionIntrinsic( + this, Intrinsic::experimental_vector_reduce_fmin, Src); + if (NoNaN) { + FastMathFlags FMF; + FMF.setNoNaNs(); + Rdx->setFastMathFlags(FMF); + } + return Rdx; +} + CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) { assert(isa(Ptr->getType()) && "lifetime.start only applies to pointers."); @@ -293,11 +412,16 @@ CallInst *IRBuilderBase::CreateMaskedGather(Value *Ptrs, unsigned Align, Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), NumElts)); - Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)}; + if (!PassThru) + PassThru = UndefValue::get(DataTy); + + Type *OverloadedTypes[] = {DataTy, PtrsTy}; + Value * Ops[] = {Ptrs, getInt32(Align), Mask, PassThru}; // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name); + return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, OverloadedTypes, + Name); } /// \brief Create a call to a Masked Scatter intrinsic. @@ -323,11 +447,13 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs, if (!Mask) Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context), NumElts)); + + Type *OverloadedTypes[] = {DataTy, PtrsTy}; Value * Ops[] = {Data, Ptrs, getInt32(Align), Mask}; // We specify only one type when we create this intrinsic. Types of other // arguments are derived from this type. - return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy }); + return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes); } template diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index 8feeeb65d445ef4f664140ffa7fcc94876f483b4..ad22efdf0effbc42a607bd0c2e0bd9ea6e4a273b 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/InlineAsm.h" #include "ConstantsContext.h" #include "LLVMContextImpl.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" @@ -40,10 +40,6 @@ InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString, "Function type not legal for constraints!"); } -// Implement the first virtual method in this class in this file so the -// InlineAsm vtable is emitted here. -InlineAsm::~InlineAsm() = default; - InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString, StringRef Constraints, bool hasSideEffects, bool isAlignStack, AsmDialect asmDialect) { diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index c26699eab4e2aa74768e237c5e8e86fb322f8f57..3dd653d2d0473ee40e1adede91418603f6dce6c2 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" #include "llvm/IR/Instruction.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" using namespace llvm; @@ -43,8 +43,6 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, InsertAtEnd->getInstList().push_back(this); } - -// Out of line virtual method, so the vtable, etc has a home. Instruction::~Instruction() { assert(!Parent && "Instruction still linked in the program!"); if (hasMetadataHashEntry()) @@ -218,10 +216,10 @@ void Instruction::copyFastMathFlags(const Instruction *I) { copyFastMathFlags(I->getFastMathFlags()); } -void Instruction::copyIRFlags(const Value *V) { +void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) { // Copy the wrapping flags. - if (auto *OB = dyn_cast(V)) { - if (isa(this)) { + if (IncludeWrapFlags && isa(this)) { + if (auto *OB = dyn_cast(V)) { setHasNoSignedWrap(OB->hasNoSignedWrap()); setHasNoUnsignedWrap(OB->hasNoUnsignedWrap()); } @@ -534,6 +532,30 @@ bool Instruction::isAtomic() const { } } +bool Instruction::hasAtomicLoad() const { + assert(isAtomic()); + switch (getOpcode()) { + default: + return false; + case Instruction::AtomicCmpXchg: + case Instruction::AtomicRMW: + case Instruction::Load: + return true; + } +} + +bool Instruction::hasAtomicStore() const { + assert(isAtomic()); + switch (getOpcode()) { + default: + return false; + case Instruction::AtomicCmpXchg: + case Instruction::AtomicRMW: + case Instruction::Store: + return true; + } +} + bool Instruction::mayThrow() const { if (const CallInst *CI = dyn_cast(this)) return !CI->doesNotThrow(); @@ -625,20 +647,41 @@ void Instruction::updateProfWeight(uint64_t S, uint64_t T) { return; auto *ProfDataName = dyn_cast(ProfileData->getOperand(0)); - if (!ProfDataName || !ProfDataName->getString().equals("branch_weights")) + if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") && + !ProfDataName->getString().equals("VP"))) return; - SmallVector Weights; - for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) { - // Using APInt::div may be expensive, but most cases should fit in 64 bits. - APInt Val(128, mdconst::dyn_extract(ProfileData->getOperand(i)) - ->getValue() - .getZExtValue()); - Val *= APInt(128, S); - Weights.push_back(Val.udiv(APInt(128, T)).getLimitedValue()); - } MDBuilder MDB(getContext()); - setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + SmallVector Vals; + Vals.push_back(ProfileData->getOperand(0)); + APInt APS(128, S), APT(128, T); + if (ProfDataName->getString().equals("branch_weights")) + for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) { + // Using APInt::div may be expensive, but most cases should fit 64 bits. + APInt Val(128, + mdconst::dyn_extract(ProfileData->getOperand(i)) + ->getValue() + .getZExtValue()); + Val *= APS; + Vals.push_back(MDB.createConstant( + ConstantInt::get(Type::getInt64Ty(getContext()), + Val.udiv(APT).getLimitedValue()))); + } + else if (ProfDataName->getString().equals("VP")) + for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) { + // The first value is the key of the value profile, which will not change. + Vals.push_back(ProfileData->getOperand(i)); + // Using APInt::div may be expensive, but most cases should fit 64 bits. + APInt Val(128, + mdconst::dyn_extract(ProfileData->getOperand(i + 1)) + ->getValue() + .getZExtValue()); + Val *= APS; + Vals.push_back(MDB.createConstant( + ConstantInt::get(Type::getInt64Ty(getContext()), + Val.udiv(APT).getLimitedValue()))); + } + setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals)); } void Instruction::setProfWeight(uint64_t W) { diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index faa5ed078cf78c5727d19062982f1e5c34679532..023a0b178a1451916d8404d0386d3c711925ee77 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1,4 +1,4 @@ -//===-- Instructions.cpp - Implement the LLVM instructions ----------------===// +//===- Instructions.cpp - Implement the LLVM instructions -----------------===// // // The LLVM Compiler Infrastructure // @@ -14,16 +14,34 @@ #include "llvm/IR/Instructions.h" #include "LLVMContextImpl.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include +#include +#include +#include + using namespace llvm; //===----------------------------------------------------------------------===// @@ -41,16 +59,40 @@ User::op_iterator CallSite::getCallee() const { // TerminatorInst Class //===----------------------------------------------------------------------===// -// Out of line virtual method, so the vtable, etc has a home. -TerminatorInst::~TerminatorInst() { +unsigned TerminatorInst::getNumSuccessors() const { + switch (getOpcode()) { +#define HANDLE_TERM_INST(N, OPC, CLASS) \ + case Instruction::OPC: \ + return static_cast(this)->getNumSuccessors(); +#include "llvm/IR/Instruction.def" + default: + break; + } + llvm_unreachable("not a terminator"); } -//===----------------------------------------------------------------------===// -// UnaryInstruction Class -//===----------------------------------------------------------------------===// +BasicBlock *TerminatorInst::getSuccessor(unsigned idx) const { + switch (getOpcode()) { +#define HANDLE_TERM_INST(N, OPC, CLASS) \ + case Instruction::OPC: \ + return static_cast(this)->getSuccessor(idx); +#include "llvm/IR/Instruction.def" + default: + break; + } + llvm_unreachable("not a terminator"); +} -// Out of line virtual method, so the vtable, etc has a home. -UnaryInstruction::~UnaryInstruction() { +void TerminatorInst::setSuccessor(unsigned idx, BasicBlock *B) { + switch (getOpcode()) { +#define HANDLE_TERM_INST(N, OPC, CLASS) \ + case Instruction::OPC: \ + return static_cast(this)->setSuccessor(idx, B); +#include "llvm/IR/Instruction.def" + default: + break; + } + llvm_unreachable("not a terminator"); } //===----------------------------------------------------------------------===// @@ -82,13 +124,10 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { return nullptr; } - //===----------------------------------------------------------------------===// // PHINode Class //===----------------------------------------------------------------------===// -void PHINode::anchor() {} - PHINode::PHINode(const PHINode &PN) : Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()), ReservedSpace(PN.getNumOperands()) { @@ -242,9 +281,6 @@ void LandingPadInst::addClause(Constant *Val) { // CallInst Implementation //===----------------------------------------------------------------------===// -CallInst::~CallInst() { -} - void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef Args, ArrayRef Bundles, const Twine &NameStr) { this->FTy = FTy; @@ -335,12 +371,12 @@ Value *CallInst::getReturnedArgOperand() const { unsigned Index; if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); + return getArgOperand(Index - AttributeList::FirstArgIndex); if (const Function *F = getCalledFunction()) if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); - + return getArgOperand(Index - AttributeList::FirstArgIndex); + return nullptr; } @@ -356,6 +392,20 @@ void CallInst::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } +void CallInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void CallInst::addParamAttr(unsigned ArgNo, Attribute Attr) { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr); + setAttributes(PAL); +} + void CallInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -368,6 +418,20 @@ void CallInst::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } +void CallInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + +void CallInst::removeParamAttr(unsigned ArgNo, StringRef Kind) { + assert(ArgNo < getNumArgOperands() && "Out of bounds"); + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); @@ -380,13 +444,23 @@ void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { setAttributes(PAL); } +bool CallInst::hasRetAttr(Attribute::AttrKind Kind) const { + if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind)) + return true; + + // Look at the callee, if available. + if (const Function *F = getCalledFunction()) + return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind); + return false; +} + bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind Kind) const { - assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!"); + assert(i < getNumArgOperands() && "Param index out of bounds!"); - if (Attrs.hasAttribute(i, Kind)) + if (Attrs.hasParamAttribute(i, Kind)) return true; if (const Function *F = getCalledFunction()) - return F->getAttributes().hasAttribute(i, Kind); + return F->getAttributes().hasParamAttribute(i, Kind); return false; } @@ -400,8 +474,13 @@ bool CallInst::dataOperandHasImpliedAttr(unsigned i, // question is a call argument; or be indirectly implied by the kind of its // containing operand bundle, if the operand is a bundle operand. + if (i == AttributeList::ReturnIndex) + return hasRetAttr(Kind); + + // FIXME: Avoid these i - 1 calculations and update the API to use zero-based + // indices. if (i < (getNumArgOperands() + 1)) - return paramHasAttr(i, Kind); + return paramHasAttr(i - 1, Kind); assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && "Must be either a call argument or an operand bundle!"); @@ -489,7 +568,8 @@ static Instruction *createMalloc(Instruction *InsertBefore, MCall->setTailCall(); if (Function *F = dyn_cast(MallocFunc)) { MCall->setCallingConv(F->getCallingConv()); - if (!F->doesNotAlias(0)) F->setDoesNotAlias(0); + if (!F->returnDoesNotAlias()) + F->setReturnDoesNotAlias(); } assert(!MCall->getType()->isVoidTy() && "Malloc has void return type"); @@ -520,7 +600,6 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, ArraySize, OpB, MallocF, Name); } - /// CreateMalloc - Generate the IR for a call to malloc: /// 1. Compute the malloc call's argument as the specified type's size, /// possibly multiplied by the array size if the array size is not @@ -668,36 +747,36 @@ InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, return NewII; } -BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned InvokeInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) { - return setSuccessor(idx, B); -} - Value *InvokeInst::getReturnedArgOperand() const { unsigned Index; if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); + return getArgOperand(Index - AttributeList::FirstArgIndex); if (const Function *F = getCalledFunction()) if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) && Index) - return getArgOperand(Index-1); - + return getArgOperand(Index - AttributeList::FirstArgIndex); + return nullptr; } +bool InvokeInst::hasRetAttr(Attribute::AttrKind Kind) const { + if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind)) + return true; + + // Look at the callee, if available. + if (const Function *F = getCalledFunction()) + return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind); + return false; +} + bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind Kind) const { - assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!"); + assert(i < getNumArgOperands() && "Param index out of bounds!"); - if (Attrs.hasAttribute(i, Kind)) + if (Attrs.hasParamAttribute(i, Kind)) return true; if (const Function *F = getCalledFunction()) - return F->getAttributes().hasAttribute(i, Kind); + return F->getAttributes().hasParamAttribute(i, Kind); return false; } @@ -711,8 +790,13 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i, // question is an invoke argument; or be indirectly implied by the kind of its // containing operand bundle, if the operand is a bundle operand. + if (i == AttributeList::ReturnIndex) + return hasRetAttr(Kind); + + // FIXME: Avoid these i - 1 calculations and update the API to use zero-based + // indices. if (i < (getNumArgOperands() + 1)) - return paramHasAttr(i, Kind); + return paramHasAttr(i - 1, Kind); assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && "Must be either an invoke argument or an operand bundle!"); @@ -731,6 +815,12 @@ void InvokeInst::addAttribute(unsigned i, Attribute Attr) { setAttributes(PAL); } +void InvokeInst::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + void InvokeInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) { AttributeList PAL = getAttributes(); PAL = PAL.removeAttribute(getContext(), i, Kind); @@ -743,6 +833,12 @@ void InvokeInst::removeAttribute(unsigned i, StringRef Kind) { setAttributes(PAL); } +void InvokeInst::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); + setAttributes(PAL); +} + void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) { AttributeList PAL = getAttributes(); PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); @@ -780,6 +876,7 @@ ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore) if (retVal) Op<0>() = retVal; } + ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(C), Instruction::Ret, OperandTraits::op_end(this) - !!retVal, !!retVal, @@ -787,28 +884,12 @@ ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd) if (retVal) Op<0>() = retVal; } + ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret, OperandTraits::op_end(this), 0, InsertAtEnd) { } -unsigned ReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to -/// emit the vtable for the class in this translation unit. -void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("ReturnInst has no successors!"); -} - -BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("ReturnInst has no successors!"); -} - -ReturnInst::~ReturnInst() { -} - //===----------------------------------------------------------------------===// // ResumeInst Implementation //===----------------------------------------------------------------------===// @@ -831,18 +912,6 @@ ResumeInst::ResumeInst(Value *Exn, BasicBlock *InsertAtEnd) Op<0>() = Exn; } -unsigned ResumeInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("ResumeInst has no successors!"); -} - -BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("ResumeInst has no successors!"); -} - //===----------------------------------------------------------------------===// // CleanupReturnInst Implementation //===----------------------------------------------------------------------===// @@ -885,18 +954,6 @@ CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, init(CleanupPad, UnwindBB); } -BasicBlock *CleanupReturnInst::getSuccessorV(unsigned Idx) const { - assert(Idx == 0); - return getUnwindDest(); -} -unsigned CleanupReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void CleanupReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { - assert(Idx == 0); - setUnwindDest(B); -} - //===----------------------------------------------------------------------===// // CatchReturnInst Implementation //===----------------------------------------------------------------------===// @@ -928,18 +985,6 @@ CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB, init(CatchPad, BB); } -BasicBlock *CatchReturnInst::getSuccessorV(unsigned Idx) const { - assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); - return getSuccessor(); -} -unsigned CatchReturnInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void CatchReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) { - assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!"); - setSuccessor(B); -} - //===----------------------------------------------------------------------===// // CatchSwitchInst Implementation //===----------------------------------------------------------------------===// @@ -1023,16 +1068,6 @@ void CatchSwitchInst::removeHandler(handler_iterator HI) { setNumHungOffUseOperands(getNumOperands() - 1); } -BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned CatchSwitchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void CatchSwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // FuncletPadInst Implementation //===----------------------------------------------------------------------===// @@ -1085,18 +1120,6 @@ UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd) nullptr, 0, InsertAtEnd) { } -unsigned UnreachableInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} - -void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) { - llvm_unreachable("UnreachableInst has no successors!"); -} - -BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const { - llvm_unreachable("UnreachableInst has no successors!"); -} - //===----------------------------------------------------------------------===// // BranchInst Implementation //===----------------------------------------------------------------------===// @@ -1114,6 +1137,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore) assert(IfTrue && "Branch destination may not be null!"); Op<-1>() = IfTrue; } + BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, Instruction *InsertBefore) : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, @@ -1148,7 +1172,6 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, #endif } - BranchInst::BranchInst(const BranchInst &BI) : TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br, OperandTraits::op_end(this) - BI.getNumOperands(), @@ -1172,17 +1195,6 @@ void BranchInst::swapSuccessors() { swapProfMetadata(); } -BasicBlock *BranchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned BranchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - - //===----------------------------------------------------------------------===// // AllocaInst Implementation //===----------------------------------------------------------------------===// @@ -1237,10 +1249,6 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, setName(Name); } -// Out of line virtual method, so the vtable, etc has a home. -AllocaInst::~AllocaInst() { -} - void AllocaInst::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); assert(Align <= MaximumAlignment && @@ -1502,8 +1510,7 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, SynchronizationScope SynchScope, Instruction *InsertBefore) : Instruction( - StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext()), - nullptr), + StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertBefore) { Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); @@ -1515,8 +1522,7 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, SynchronizationScope SynchScope, BasicBlock *InsertAtEnd) : Instruction( - StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext()), - nullptr), + StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertAtEnd) { Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); @@ -1592,8 +1598,6 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, // GetElementPtrInst Implementation //===----------------------------------------------------------------------===// -void GetElementPtrInst::anchor() {} - void GetElementPtrInst::init(Value *Ptr, ArrayRef IdxList, const Twine &Name) { assert(getNumOperands() == 1 + IdxList.size() && @@ -1730,14 +1734,12 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, setName(Name); } - bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy()) return false; return true; } - //===----------------------------------------------------------------------===// // InsertElementInst Implementation //===----------------------------------------------------------------------===// @@ -1784,7 +1786,6 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, return true; } - //===----------------------------------------------------------------------===// // ShuffleVectorInst Implementation //===----------------------------------------------------------------------===// @@ -1831,7 +1832,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return false; // Mask must be vector of i32. - VectorType *MaskTy = dyn_cast(Mask->getType()); + auto *MaskTy = dyn_cast(Mask->getType()); if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32)) return false; @@ -1839,10 +1840,10 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, if (isa(Mask) || isa(Mask)) return true; - if (const ConstantVector *MV = dyn_cast(Mask)) { + if (const auto *MV = dyn_cast(Mask)) { unsigned V1Size = cast(V1->getType())->getNumElements(); for (Value *Op : MV->operands()) { - if (ConstantInt *CI = dyn_cast(Op)) { + if (auto *CI = dyn_cast(Op)) { if (CI->uge(V1Size*2)) return false; } else if (!isa(Op)) { @@ -1852,8 +1853,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return true; } - if (const ConstantDataSequential *CDS = - dyn_cast(Mask)) { + if (const auto *CDS = dyn_cast(Mask)) { unsigned V1Size = cast(V1->getType())->getNumElements(); for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i) if (CDS->getElementAsInteger(i) >= V1Size*2) @@ -1865,7 +1865,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, // used as the shuffle mask. When this occurs, the shuffle mask will // fall into this case and fail. To avoid this error, do this bit of // ugliness to allow such a mask pass. - if (const ConstantExpr *CE = dyn_cast(Mask)) + if (const auto *CE = dyn_cast(Mask)) if (CE->getOpcode() == Instruction::UserOp1) return true; @@ -1874,7 +1874,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) { assert(i < Mask->getType()->getVectorNumElements() && "Index out of range"); - if (ConstantDataSequential *CDS =dyn_cast(Mask)) + if (auto *CDS = dyn_cast(Mask)) return CDS->getElementAsInteger(i); Constant *C = Mask->getAggregateElement(i); if (isa(C)) @@ -1886,7 +1886,7 @@ void ShuffleVectorInst::getShuffleMask(Constant *Mask, SmallVectorImpl &Result) { unsigned NumElts = Mask->getType()->getVectorNumElements(); - if (ConstantDataSequential *CDS=dyn_cast(Mask)) { + if (auto *CDS = dyn_cast(Mask)) { for (unsigned i = 0; i != NumElts; ++i) Result.push_back(CDS->getElementAsInteger(i)); return; @@ -1898,7 +1898,6 @@ void ShuffleVectorInst::getShuffleMask(Constant *Mask, } } - //===----------------------------------------------------------------------===// // InsertValueInst Class //===----------------------------------------------------------------------===// @@ -1911,7 +1910,7 @@ void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef Idxs, // (other than weirdness with &*IdxBegin being invalid; see // getelementptr's init routine for example). But there's no // present need to support it. - assert(Idxs.size() > 0 && "InsertValueInst must have at least one index"); + assert(!Idxs.empty() && "InsertValueInst must have at least one index"); assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) == Val->getType() && "Inserted value must match indexed type!"); @@ -1940,7 +1939,7 @@ void ExtractValueInst::init(ArrayRef Idxs, const Twine &Name) { // There's no fundamental reason why we require at least one index. // But there's no present need to support it. - assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index"); + assert(!Idxs.empty() && "ExtractValueInst must have at least one index"); Indices.append(Idxs.begin(), Idxs.end()); setName(Name); @@ -2013,7 +2012,6 @@ BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, setName(Name); } - void BinaryOperator::init(BinaryOps iType) { Value *LHS = getOperand(0), *RHS = getOperand(1); (void)LHS; (void)RHS; // Silence warnings. @@ -2173,7 +2171,6 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name, Op->getType(), Name, InsertAtEnd); } - // isConstantAllOnes - Helper function for several functions below static inline bool isConstantAllOnes(const Value *V) { if (const Constant *C = dyn_cast(V)) @@ -2239,7 +2236,6 @@ const Value *BinaryOperator::getNotArgument(const Value *BinOp) { return getNotArgument(const_cast(BinOp)); } - // Exchange the two operands to this instruction. This instruction is safe to // use on any binary instruction and does not modify the semantics of the // instruction. If the instruction is order-dependent (SetLT f.e.), the opcode @@ -2251,7 +2247,6 @@ bool BinaryOperator::swapOperands() { return false; } - //===----------------------------------------------------------------------===// // FPMathOperator Class //===----------------------------------------------------------------------===// @@ -2265,13 +2260,10 @@ float FPMathOperator::getFPAccuracy() const { return Accuracy->getValueAPF().convertToFloat(); } - //===----------------------------------------------------------------------===// // CastInst Class //===----------------------------------------------------------------------===// -void CastInst::anchor() {} - // Just determine if this cast only deals with integral->integral conversion. bool CastInst::isIntegerCast() const { switch (getOpcode()) { @@ -2527,13 +2519,12 @@ unsigned CastInst::isEliminableCastPair( return Instruction::BitCast; return 0; } - case 12: { + case 12: // addrspacecast, addrspacecast -> bitcast, if SrcAS == DstAS // addrspacecast, addrspacecast -> addrspacecast, if SrcAS != DstAS if (SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) return Instruction::AddrSpaceCast; return Instruction::BitCast; - } case 13: // FIXME: this state can be merged with (1), but the following assert // is useful to check the correcteness of the sequence due to semantic @@ -2554,7 +2545,6 @@ unsigned CastInst::isEliminableCastPair( DstTy->getScalarType()->getPointerElementType()) return Instruction::AddrSpaceCast; return 0; - case 15: // FIXME: this state can be merged with (1), but the following assert // is useful to check the correcteness of the sequence due to semantic @@ -3030,7 +3020,6 @@ CastInst::getCastOpcode( /// of the types involved. bool CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { - // Check for type sanity on the arguments Type *SrcTy = S->getType(); @@ -3303,8 +3292,6 @@ AddrSpaceCastInst::AddrSpaceCastInst( // CmpInst Classes //===----------------------------------------------------------------------===// -void CmpInst::anchor() {} - CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS, Value *RHS, const Twine &Name, Instruction *InsertBefore) : Instruction(ty, op, @@ -3379,7 +3366,6 @@ bool CmpInst::isEquality() const { return cast(this)->isEquality(); } - CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) { switch (pred) { default: llvm_unreachable("Unknown cmp predicate!"); @@ -3445,8 +3431,6 @@ StringRef CmpInst::getPredicateName(Predicate Pred) { } } -void ICmpInst::anchor() {} - ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) { switch (pred) { default: llvm_unreachable("Unknown icmp predicate!"); @@ -3699,17 +3683,6 @@ void SwitchInst::growOperands() { growHungoffUses(ReservedSpace); } - -BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned SwitchInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // IndirectBrInst Implementation //===----------------------------------------------------------------------===// @@ -3789,16 +3762,6 @@ void IndirectBrInst::removeDestination(unsigned idx) { setNumHungOffUseOperands(NumOps-1); } -BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const { - return getSuccessor(idx); -} -unsigned IndirectBrInst::getNumSuccessorsV() const { - return getNumSuccessors(); -} -void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) { - setSuccessor(idx, B); -} - //===----------------------------------------------------------------------===// // cloneImpl() implementations //===----------------------------------------------------------------------===// diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp index c9814a96bea69cff7affa9d4a37804d3c79cfdf9..8b12c55937f544237e56547906842789e3528e61 100644 --- a/lib/IR/IntrinsicInst.cpp +++ b/lib/IR/IntrinsicInst.cpp @@ -21,8 +21,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" @@ -97,7 +97,9 @@ Value *InstrProfIncrementInst::getStep() const { ConstrainedFPIntrinsic::RoundingMode ConstrainedFPIntrinsic::getRoundingMode() const { - Metadata *MD = dyn_cast(getOperand(2))->getMetadata(); + unsigned NumOperands = getNumArgOperands(); + Metadata *MD = + dyn_cast(getArgOperand(NumOperands - 2))->getMetadata(); if (!MD || !isa(MD)) return rmInvalid; StringRef RoundingArg = cast(MD)->getString(); @@ -115,7 +117,9 @@ ConstrainedFPIntrinsic::getRoundingMode() const { ConstrainedFPIntrinsic::ExceptionBehavior ConstrainedFPIntrinsic::getExceptionBehavior() const { - Metadata *MD = dyn_cast(getOperand(3))->getMetadata(); + unsigned NumOperands = getNumArgOperands(); + Metadata *MD = + dyn_cast(getArgOperand(NumOperands - 1))->getMetadata(); if (!MD || !isa(MD)) return ebInvalid; StringRef ExceptionArg = cast(MD)->getString(); @@ -125,3 +129,21 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const { .Case("fpexcept.strict", ebStrict) .Default(ebInvalid); } + +bool ConstrainedFPIntrinsic::isUnaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + return true; + } +} diff --git a/lib/IR/LLVMBuild.txt b/lib/IR/LLVMBuild.txt index cd90ef5b16b65262c123675dd5c126a0a191f890..71368abfd8748438a8b3d8e0b6758ab3f5944da9 100644 --- a/lib/IR/LLVMBuild.txt +++ b/lib/IR/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Core parent = Libraries -required_libraries = Support +required_libraries = BinaryFormat Support diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 6c6383c22255d288b800e5bdb0433009473f14c4..ad0d4470c111e04bfb35ebde1a3b46cab154b0b3 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/LLVMContext.h" +#include "LLVMContextImpl.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "LLVMContextImpl.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Metadata.h" diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index 343722463e5faed2bda618bb584b156a5f88c495..4a30d28c39138485c502b0888b810d49660ed001 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -215,27 +215,6 @@ uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const { return I->second; } -// ConstantsContext anchors -void UnaryConstantExpr::anchor() { } - -void BinaryConstantExpr::anchor() { } - -void SelectConstantExpr::anchor() { } - -void ExtractElementConstantExpr::anchor() { } - -void InsertElementConstantExpr::anchor() { } - -void ShuffleVectorConstantExpr::anchor() { } - -void ExtractValueConstantExpr::anchor() { } - -void InsertValueConstantExpr::anchor() { } - -void GetElementPtrConstantExpr::anchor() { } - -void CompareConstantExpr::anchor() { } - /// Singleton instance of the OptBisect class. /// /// This singleton is accessed via the LLVMContext::getOptBisect() function. It diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 0ee0b9c0da2542b29e809776f2604706c6ca18a5..4ba974409a4fc240fa261c544bfbc11387145428 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -27,13 +27,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/YAMLTraits.h" #include @@ -52,12 +52,12 @@ class Value; struct DenseMapAPIntKeyInfo { static inline APInt getEmptyKey() { APInt V(nullptr, 0); - V.VAL = 0; + V.U.VAL = 0; return V; } static inline APInt getTombstoneKey() { APInt V(nullptr, 0); - V.VAL = 1; + V.U.VAL = 1; return V; } static unsigned getHashValue(const APInt &Key) { @@ -552,6 +552,7 @@ template <> struct MDNodeKeyImpl { Metadata *TemplateParams; Metadata *Declaration; Metadata *Variables; + Metadata *ThrownTypes; MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, @@ -559,7 +560,8 @@ template <> struct MDNodeKeyImpl { Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex, int ThisAdjustment, unsigned Flags, bool IsOptimized, Metadata *Unit, Metadata *TemplateParams, - Metadata *Declaration, Metadata *Variables) + Metadata *Declaration, Metadata *Variables, + Metadata *ThrownTypes) : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File), Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition), ScopeLine(ScopeLine), @@ -567,7 +569,7 @@ template <> struct MDNodeKeyImpl { VirtualIndex(VirtualIndex), ThisAdjustment(ThisAdjustment), Flags(Flags), IsOptimized(IsOptimized), Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration), - Variables(Variables) {} + Variables(Variables), ThrownTypes(ThrownTypes) {} MDNodeKeyImpl(const DISubprogram *N) : Scope(N->getRawScope()), Name(N->getRawName()), LinkageName(N->getRawLinkageName()), File(N->getRawFile()), @@ -578,7 +580,8 @@ template <> struct MDNodeKeyImpl { ThisAdjustment(N->getThisAdjustment()), Flags(N->getFlags()), IsOptimized(N->isOptimized()), Unit(N->getRawUnit()), TemplateParams(N->getRawTemplateParams()), - Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {} + Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()), + ThrownTypes(N->getRawThrownTypes()) {} bool isKeyOf(const DISubprogram *RHS) const { return Scope == RHS->getRawScope() && Name == RHS->getRawName() && @@ -595,7 +598,8 @@ template <> struct MDNodeKeyImpl { Unit == RHS->getUnit() && TemplateParams == RHS->getRawTemplateParams() && Declaration == RHS->getRawDeclaration() && - Variables == RHS->getRawVariables(); + Variables == RHS->getRawVariables() && + ThrownTypes == RHS->getRawThrownTypes(); } unsigned getHashValue() const { // If this is a declaration inside an ODR type, only hash the type and the @@ -695,26 +699,21 @@ template <> struct MDNodeKeyImpl { template <> struct MDNodeKeyImpl { Metadata *Scope; - Metadata *File; MDString *Name; - unsigned Line; bool ExportSymbols; - MDNodeKeyImpl(Metadata *Scope, Metadata *File, MDString *Name, unsigned Line, - bool ExportSymbols) - : Scope(Scope), File(File), Name(Name), Line(Line), - ExportSymbols(ExportSymbols) {} + MDNodeKeyImpl(Metadata *Scope, MDString *Name, bool ExportSymbols) + : Scope(Scope), Name(Name), ExportSymbols(ExportSymbols) {} MDNodeKeyImpl(const DINamespace *N) - : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getRawName()), - Line(N->getLine()), ExportSymbols(N->getExportSymbols()) {} + : Scope(N->getRawScope()), Name(N->getRawName()), + ExportSymbols(N->getExportSymbols()) {} bool isKeyOf(const DINamespace *RHS) const { - return Scope == RHS->getRawScope() && File == RHS->getRawFile() && - Name == RHS->getRawName() && Line == RHS->getLine() && + return Scope == RHS->getRawScope() && Name == RHS->getRawName() && ExportSymbols == RHS->getExportSymbols(); } unsigned getHashValue() const { - return hash_combine(Scope, File, Name, Line); + return hash_combine(Scope, Name); } }; diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 628a67bd639ce29f9cf5e0e2be63628f49532a80..29e2f42d3e05d1a5420df16a7639f32b6e9d5561 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/LegacyPassManager.h" +#include "llvm/ADT/Statistic.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManagers.h" @@ -465,6 +466,11 @@ public: // null. It may be called multiple times. static void createTheTimeInfo(); + // print - Prints out timing information and then resets the timers. + void print() { + TG.print(*CreateInfoOutputFile()); + } + /// getPassTimer - Return the timer for the specified pass if it exists. Timer *getPassTimer(Pass *P) { if (P->getAsPMDataManager()) @@ -587,7 +593,7 @@ AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { assert(Node && "cached analysis usage must be non null"); AnUsageMap[P] = &Node->AU; - AnUsage = &Node->AU;; + AnUsage = &Node->AU; } return AnUsage; } @@ -1752,6 +1758,13 @@ Timer *llvm::getPassTimer(Pass *P) { return nullptr; } +/// If timing is enabled, report the times collected up to now and then reset +/// them. +void llvm::reportAndResetTimings() { + if (TheTimeInfo) + TheTimeInfo->print(); +} + //===----------------------------------------------------------------------===// // PMStack implementation // diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 7228de3d23702d140e2254f86a24f25937fd1caf..92e5798dcf21473f478972c42ceb3a37a1a54a31 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Metadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" #include "SymbolTableListTraitsImpl.h" @@ -19,11 +20,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Argument.h" @@ -38,7 +39,6 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" @@ -967,7 +967,7 @@ static void addRange(SmallVectorImpl &EndPoints, MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { // Given two ranges, we want to compute the union of the ranges. This - // is slightly complitade by having to combine the intervals and merge + // is slightly complicated by having to combine the intervals and merge // the ones that overlap. if (!A || !B) @@ -976,7 +976,7 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { if (A == B) return A; - // First, walk both lists in older of the lower boundary of each interval. + // First, walk both lists in order of the lower boundary of each interval. // At each step, try to merge the new interval to the last one we adedd. SmallVector EndPoints; int AI = 0; @@ -1470,7 +1470,7 @@ void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) { if (E) OrigElements = E->getElements(); std::vector Elements(OrigElements.size() + 2); - Elements[0] = dwarf::DW_OP_plus; + Elements[0] = dwarf::DW_OP_plus_uconst; Elements[1] = Offset; std::copy(OrigElements.begin(), OrigElements.end(), Elements.begin() + 2); E = DIExpression::get(getContext(), Elements); diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index fec9df193685de4819b291dc79effe5bc0aeb103..f8853ed169c5d3d0415ba8eee24ac59697412bcb 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -1,4 +1,4 @@ -//===-- Module.cpp - Implement the Module class ---------------------------===// +//===- Module.cpp - Implement the Module class ----------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,25 +13,44 @@ #include "llvm/IR/Module.h" #include "SymbolTableListTraitsImpl.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Comdat.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/SymbolTableListTraits.h" +#include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" #include -#include -#include +#include +#include +#include +#include +#include using namespace llvm; @@ -462,7 +481,7 @@ PICLevel::Level Module::getPICLevel() const { } void Module::setPICLevel(PICLevel::Level PL) { - addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL); + addModuleFlag(ModFlagBehavior::Max, "PIC Level", PL); } PIELevel::Level Module::getPIELevel() const { @@ -476,7 +495,7 @@ PIELevel::Level Module::getPIELevel() const { } void Module::setPIELevel(PIELevel::Level PL) { - addModuleFlag(ModFlagBehavior::Error, "PIE Level", PL); + addModuleFlag(ModFlagBehavior::Max, "PIE Level", PL); } void Module::setProfileSummary(Metadata *M) { diff --git a/lib/IR/ModuleSummaryIndex.cpp b/lib/IR/ModuleSummaryIndex.cpp index 9072f4bc7b121bb93e8bad9655679ce4b38f3ba2..51c4bae3332e69243a6e50ce9e84e0af984336b6 100644 --- a/lib/IR/ModuleSummaryIndex.cpp +++ b/lib/IR/ModuleSummaryIndex.cpp @@ -16,61 +16,13 @@ #include "llvm/ADT/StringMap.h" using namespace llvm; -// Create the combined module index/summary from multiple -// per-module instances. -void ModuleSummaryIndex::mergeFrom(std::unique_ptr Other, - uint64_t NextModuleId) { - if (Other->modulePaths().empty()) - return; - - assert(Other->modulePaths().size() == 1 && - "Can only merge from an single-module index at that time"); - - StringRef OtherModPath = Other->modulePaths().begin()->first(); - StringRef ModPath = addModulePath(OtherModPath, NextModuleId, - Other->getModuleHash(OtherModPath)) - ->first(); - - for (auto &OtherGlobalValSummaryLists : *Other) { - GlobalValue::GUID ValueGUID = OtherGlobalValSummaryLists.first; - GlobalValueSummaryList &List = OtherGlobalValSummaryLists.second; - - // Assert that the value summary list only has one entry, since we shouldn't - // have duplicate names within a single per-module index. - assert(List.size() == 1); - std::unique_ptr Summary = std::move(List.front()); - - // Note the module path string ref was copied above and is still owned by - // the original per-module index. Reset it to the new module path - // string reference owned by the combined index. - Summary->setModulePath(ModPath); - - // Add new value summary to existing list. There may be duplicates when - // combining GlobalValueMap entries, due to COMDAT values. Any local - // values were given unique global IDs. - addGlobalValueSummary(ValueGUID, std::move(Summary)); - } -} - -void ModuleSummaryIndex::removeEmptySummaryEntries() { - for (auto MI = begin(), MIE = end(); MI != MIE;) { - // Only expect this to be called on a per-module index, which has a single - // entry per value entry list. - assert(MI->second.size() == 1); - if (!MI->second[0]) - MI = GlobalValueMap.erase(MI); - else - ++MI; - } -} - // Collect for the given module the list of function it defines // (GUID -> Summary). void ModuleSummaryIndex::collectDefinedFunctionsForModule( StringRef ModulePath, GVSummaryMapTy &GVSummaryMap) const { for (auto &GlobalList : *this) { auto GUID = GlobalList.first; - for (auto &GlobSummary : GlobalList.second) { + for (auto &GlobSummary : GlobalList.second.SummaryList) { auto *Summary = dyn_cast_or_null(GlobSummary.get()); if (!Summary) // Ignore global variable, focus on functions @@ -88,7 +40,7 @@ void ModuleSummaryIndex::collectDefinedGVSummariesPerModule( StringMap &ModuleToDefinedGVSummaries) const { for (auto &GlobalList : *this) { auto GUID = GlobalList.first; - for (auto &Summary : GlobalList.second) { + for (auto &Summary : GlobalList.second.SummaryList) { ModuleToDefinedGVSummaries[Summary->modulePath()][GUID] = Summary.get(); } } @@ -97,10 +49,23 @@ void ModuleSummaryIndex::collectDefinedGVSummariesPerModule( GlobalValueSummary * ModuleSummaryIndex::getGlobalValueSummary(uint64_t ValueGUID, bool PerModuleIndex) const { - auto SummaryList = findGlobalValueSummaryList(ValueGUID); - assert(SummaryList != end() && "GlobalValue not found in index"); - assert((!PerModuleIndex || SummaryList->second.size() == 1) && + auto VI = getValueInfo(ValueGUID); + assert(VI && "GlobalValue not found in index"); + assert((!PerModuleIndex || VI.getSummaryList().size() == 1) && "Expected a single entry per global value in per-module index"); - auto &Summary = SummaryList->second[0]; + auto &Summary = VI.getSummaryList()[0]; return Summary.get(); } + +bool ModuleSummaryIndex::isGUIDLive(GlobalValue::GUID GUID) const { + auto VI = getValueInfo(GUID); + if (!VI) + return true; + const auto &SummaryList = VI.getSummaryList(); + if (SummaryList.empty()) + return true; + for (auto &I : SummaryList) + if (isGlobalValueLive(I.get())) + return true; + return false; +} diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp index b670c817569a9d39c86f0dde7f720fa22792f243..f1c70058fac2c1257732678413f10cbd3e73bcef 100644 --- a/lib/IR/OptBisect.cpp +++ b/lib/IR/OptBisect.cpp @@ -13,11 +13,12 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/IR/OptBisect.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionInfo.h" #include "llvm/IR/Module.h" -#include "llvm/IR/OptBisect.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -53,13 +54,20 @@ static std::string getDescription(const BasicBlock &BB) { } static std::string getDescription(const Loop &L) { - // FIXME: I'd like to be able to provide a better description here, but - // calling L->getHeader() would introduce a new dependency on the - // LLVMCore library. + // FIXME: Move into LoopInfo so we can get a better description + // (and avoid a circular dependency between IR and Analysis). return "loop"; } +static std::string getDescription(const Region &R) { + // FIXME: Move into RegionInfo so we can get a better description + // (and avoid a circular dependency between IR and Analysis). + return "region"; +} + static std::string getDescription(const CallGraphSCC &SCC) { + // FIXME: Move into CallGraphSCCPass to avoid circular dependency between + // IR and Analysis. std::string Desc = "SCC ("; bool First = true; for (CallGraphNode *CGN : SCC) { @@ -83,6 +91,7 @@ template bool OptBisect::shouldRunPass(const Pass *, const Function &); template bool OptBisect::shouldRunPass(const Pass *, const BasicBlock &); template bool OptBisect::shouldRunPass(const Pass *, const Loop &); template bool OptBisect::shouldRunPass(const Pass *, const CallGraphSCC &); +template bool OptBisect::shouldRunPass(const Pass *, const Region &); template bool OptBisect::shouldRunPass(const Pass *P, const UnitT &U) { diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp index 584dee2869c178dc0a91d3fee7ca63b57d0779bd..c0f6f07169ffb6ab156da98d5d3f72b3b65175c2 100644 --- a/lib/IR/PassRegistry.cpp +++ b/lib/IR/PassRegistry.cpp @@ -105,8 +105,6 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, ImplementationInfo->getNormalCtor() && "Cannot specify pass as default if it does not have a default ctor"); InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor()); - InterfaceInfo->setTargetMachineCtor( - ImplementationInfo->getTargetMachineCtor()); } } diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index b67b0a307861b50feee3d5fcbbd376fc12b998ef..44fe5e48c720c1ce2be28db9c23525cb00418f5c 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -1,4 +1,4 @@ -//===-- Type.cpp - Implement the Type class -------------------------------===// +//===- Type.cpp - Implement the Type class --------------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,10 +13,23 @@ #include "llvm/IR/Type.h" #include "LLVMContextImpl.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include -#include +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + using namespace llvm; //===----------------------------------------------------------------------===// @@ -220,7 +233,6 @@ PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) { return getInt64Ty(C)->getPointerTo(AS); } - //===----------------------------------------------------------------------===// // IntegerType Implementation //===----------------------------------------------------------------------===// @@ -362,7 +374,8 @@ void StructType::setName(StringRef Name) { if (Name == getName()) return; StringMap &SymbolTable = getContext().pImpl->NamedStructTypes; - typedef StringMap::MapEntryTy EntryTy; + + using EntryTy = StringMap::MapEntryTy; // If this struct already had a name, remove its symbol table entry. Don't // delete the data yet because it may be part of the new name. @@ -419,21 +432,6 @@ StructType *StructType::get(LLVMContext &Context, bool isPacked) { return get(Context, None, isPacked); } -StructType *StructType::get(Type *type, ...) { - assert(type && "Cannot create a struct type with no elements with this"); - LLVMContext &Ctx = type->getContext(); - va_list ap; - SmallVector StructFields; - va_start(ap, type); - while (type) { - StructFields.push_back(type); - type = va_arg(ap, llvm::Type*); - } - auto *Ret = llvm::StructType::get(Ctx, StructFields); - va_end(ap); - return Ret; -} - StructType *StructType::create(LLVMContext &Context, ArrayRef Elements, StringRef Name, bool isPacked) { StructType *ST = create(Context, Name); @@ -462,21 +460,6 @@ StructType *StructType::create(ArrayRef Elements) { return create(Elements[0]->getContext(), Elements, StringRef()); } -StructType *StructType::create(StringRef Name, Type *type, ...) { - assert(type && "Cannot create a struct type with no elements with this"); - LLVMContext &Ctx = type->getContext(); - va_list ap; - SmallVector StructFields; - va_start(ap, type); - while (type) { - StructFields.push_back(type); - type = va_arg(ap, llvm::Type*); - } - auto *Ret = llvm::StructType::create(Ctx, StructFields, Name); - va_end(ap); - return Ret; -} - bool StructType::isSized(SmallPtrSetImpl *Visited) const { if ((getSubclassData() & SCDB_IsSized) != 0) return true; @@ -508,19 +491,6 @@ StringRef StructType::getName() const { return ((StringMapEntry *)SymbolTableEntry)->getKey(); } -void StructType::setBody(Type *type, ...) { - assert(type && "Cannot create a struct type with no elements with this"); - va_list ap; - SmallVector StructFields; - va_start(ap, type); - while (type) { - StructFields.push_back(type); - type = va_arg(ap, llvm::Type*); - } - setBody(StructFields); - va_end(ap); -} - bool StructType::isValidElementType(Type *ElemTy) { return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() && !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() && @@ -540,7 +510,6 @@ StructType *Module::getTypeByName(StringRef Name) const { return getContext().pImpl->NamedStructTypes.lookup(Name); } - //===----------------------------------------------------------------------===// // CompositeType Implementation //===----------------------------------------------------------------------===// @@ -589,7 +558,6 @@ bool CompositeType::indexValid(unsigned Idx) const { return true; } - //===----------------------------------------------------------------------===// // ArrayType Implementation //===----------------------------------------------------------------------===// @@ -661,7 +629,6 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) { return Entry; } - PointerType::PointerType(Type *E, unsigned AddrSpace) : Type(E->getContext(), PointerTyID), PointeeTy(E) { ContainedTys = &PointeeTy; diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp index a178b9ec0f09b95bd92cdeee156f27b38b66cf62..b39678a013fb2c366550dbd6319c0311c45faaa6 100644 --- a/lib/IR/TypeFinder.cpp +++ b/lib/IR/TypeFinder.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/TypeFinder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -20,7 +21,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/IR/TypeFinder.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp index 497b4aa176434a7447bd87e14fef2a50ae5eaa94..d46039107f33176dde94da3e748f4d9acd94a6da 100644 --- a/lib/IR/User.cpp +++ b/lib/IR/User.cpp @@ -19,8 +19,6 @@ class BasicBlock; // User Class //===----------------------------------------------------------------------===// -void User::anchor() {} - void User::replaceUsesOfWith(Value *From, Value *To) { if (From == To) return; // Duh what? @@ -193,12 +191,4 @@ void User::operator delete(void *Usr) { } } -//===----------------------------------------------------------------------===// -// Operator Class -//===----------------------------------------------------------------------===// - -Operator::~Operator() { - llvm_unreachable("should never destroy an Operator"); -} - } // End llvm namespace diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index b07c57685a26682abac72e52de0f7ed657171526..51a7d424c1f331821b43c798e815142e08d7b274 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DerivedUser.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" @@ -59,7 +60,7 @@ Value::Value(Type *ty, unsigned scid) (SubclassID < ConstantFirstVal || SubclassID > ConstantLastVal)) assert((VTy->isFirstClassType() || VTy->isVoidTy()) && "Cannot create non-first-class values except for constants!"); - static_assert(sizeof(Value) == 3 * sizeof(void *) + 2 * sizeof(unsigned), + static_assert(sizeof(Value) == 2 * sizeof(void *) + 2 * sizeof(unsigned), "Value too big"); } @@ -89,6 +90,32 @@ Value::~Value() { destroyValueName(); } +void Value::deleteValue() { + switch (getValueID()) { +#define HANDLE_VALUE(Name) \ + case Value::Name##Val: \ + delete static_cast(this); \ + break; +#define HANDLE_MEMORY_VALUE(Name) \ + case Value::Name##Val: \ + static_cast(this)->DeleteValue( \ + static_cast(this)); \ + break; +#define HANDLE_INSTRUCTION(Name) /* nothing */ +#include "llvm/IR/Value.def" + +#define HANDLE_INST(N, OPC, CLASS) \ + case Value::InstructionVal + Instruction::OPC: \ + delete static_cast(this); \ + break; +#define HANDLE_USER_INST(N, OPC, CLASS) +#include "llvm/IR/Instruction.def" + + default: + llvm_unreachable("attempting to delete unknown value kind"); + } +} + void Value::destroyValueName() { ValueName *Name = getValueName(); if (Name) @@ -432,6 +459,7 @@ namespace { enum PointerStripKind { PSK_ZeroIndices, PSK_ZeroIndicesAndAliases, + PSK_ZeroIndicesAndAliasesAndBarriers, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -450,6 +478,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { if (auto *GEP = dyn_cast(V)) { switch (StripKind) { case PSK_ZeroIndicesAndAliases: + case PSK_ZeroIndicesAndAliasesAndBarriers: case PSK_ZeroIndices: if (!GEP->hasAllZeroIndices()) return V; @@ -472,12 +501,20 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) { return V; V = GA->getAliasee(); } else { - if (auto CS = ImmutableCallSite(V)) + if (auto CS = ImmutableCallSite(V)) { if (const Value *RV = CS.getReturnedArgOperand()) { V = RV; continue; } - + // The result of invariant.group.barrier must alias it's argument, + // but it can't be marked with returned attribute, that's why it needs + // special case. + if (StripKind == PSK_ZeroIndicesAndAliasesAndBarriers && + CS.getIntrinsicID() == Intrinsic::invariant_group_barrier) { + V = CS.getArgOperand(0); + continue; + } + } return V; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); @@ -499,6 +536,11 @@ const Value *Value::stripInBoundsConstantOffsets() const { return stripPointerCastsAndOffsets(this); } +const Value *Value::stripPointerCastsAndBarriers() const { + return stripPointerCastsAndOffsets( + this); +} + const Value * Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const { @@ -563,9 +605,9 @@ unsigned Value::getPointerDereferenceableBytes(const DataLayout &DL, CanBeNull = true; } } else if (auto CS = ImmutableCallSite(this)) { - DerefBytes = CS.getDereferenceableBytes(0); + DerefBytes = CS.getDereferenceableBytes(AttributeList::ReturnIndex); if (DerefBytes == 0) { - DerefBytes = CS.getDereferenceableOrNullBytes(0); + DerefBytes = CS.getDereferenceableOrNullBytes(AttributeList::ReturnIndex); CanBeNull = true; } } else if (const LoadInst *LI = dyn_cast(this)) { @@ -634,7 +676,7 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const { Align = DL.getPrefTypeAlignment(AllocatedType); } } else if (auto CS = ImmutableCallSite(this)) - Align = CS.getAttributes().getParamAlignment(AttributeList::ReturnIndex); + Align = CS.getAttributes().getRetAlignment(); else if (const LoadInst *LI = dyn_cast(this)) if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { ConstantInt *CI = mdconst::extract(MD->getOperand(0)); @@ -696,7 +738,7 @@ void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) { setPrevPtr(List); if (Next) { Next->setPrevPtr(&Next); - assert(V == Next->V && "Added to wrong list?"); + assert(getValPtr() == Next->getValPtr() && "Added to wrong list?"); } } @@ -711,14 +753,14 @@ void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) { } void ValueHandleBase::AddToUseList() { - assert(V && "Null pointer doesn't have a use list!"); + assert(getValPtr() && "Null pointer doesn't have a use list!"); - LLVMContextImpl *pImpl = V->getContext().pImpl; + LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl; - if (V->HasValueHandle) { + if (getValPtr()->HasValueHandle) { // If this value already has a ValueHandle, then it must be in the // ValueHandles map already. - ValueHandleBase *&Entry = pImpl->ValueHandles[V]; + ValueHandleBase *&Entry = pImpl->ValueHandles[getValPtr()]; assert(Entry && "Value doesn't have any handles?"); AddToExistingUseList(&Entry); return; @@ -732,10 +774,10 @@ void ValueHandleBase::AddToUseList() { DenseMap &Handles = pImpl->ValueHandles; const void *OldBucketPtr = Handles.getPointerIntoBucketsArray(); - ValueHandleBase *&Entry = Handles[V]; + ValueHandleBase *&Entry = Handles[getValPtr()]; assert(!Entry && "Value really did already have handles?"); AddToExistingUseList(&Entry); - V->HasValueHandle = true; + getValPtr()->HasValueHandle = true; // If reallocation didn't happen or if this was the first insertion, don't // walk the table. @@ -747,14 +789,14 @@ void ValueHandleBase::AddToUseList() { // Okay, reallocation did happen. Fix the Prev Pointers. for (DenseMap::iterator I = Handles.begin(), E = Handles.end(); I != E; ++I) { - assert(I->second && I->first == I->second->V && + assert(I->second && I->first == I->second->getValPtr() && "List invariant broken!"); I->second->setPrevPtr(&I->second); } } void ValueHandleBase::RemoveFromUseList() { - assert(V && V->HasValueHandle && + assert(getValPtr() && getValPtr()->HasValueHandle && "Pointer doesn't have a use list!"); // Unlink this from its use list. @@ -771,11 +813,11 @@ void ValueHandleBase::RemoveFromUseList() { // If the Next pointer was null, then it is possible that this was the last // ValueHandle watching VP. If so, delete its entry from the ValueHandles // map. - LLVMContextImpl *pImpl = V->getContext().pImpl; + LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl; DenseMap &Handles = pImpl->ValueHandles; if (Handles.isPointerIntoBucketsArray(PrevPtr)) { - Handles.erase(V); - V->HasValueHandle = false; + Handles.erase(getValPtr()); + getValPtr()->HasValueHandle = false; } } @@ -805,13 +847,10 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { switch (Entry->getKind()) { case Assert: break; - case Tracking: - // Mark that this value has been deleted by setting it to an invalid Value - // pointer. - Entry->operator=(DenseMapInfo::getTombstoneKey()); - break; case Weak: - // Weak just goes to null, which will unlink it from the list. + case WeakTracking: + // WeakTracking and Weak just go to null, which unlinks them + // from the list. Entry->operator=(nullptr); break; case Callback: @@ -859,16 +898,10 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { switch (Entry->getKind()) { case Assert: - // Asserting handle does not follow RAUW implicitly. - break; - case Tracking: - // Tracking goes to new value like a WeakVH. Note that this may make it - // something incompatible with its templated type. We don't want to have a - // virtual (or inline) interface to handle this though, so instead we make - // the TrackingVH accessors guarantee that a client never sees this value. - - LLVM_FALLTHROUGH; case Weak: + // Asserting and Weak handles do not follow RAUW implicitly. + break; + case WeakTracking: // Weak goes to the new value, which will unlink it from Old's list. Entry->operator=(New); break; @@ -880,18 +913,17 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { } #ifndef NDEBUG - // If any new tracking or weak value handles were added while processing the + // If any new weak value handles were added while processing the // list, then complain about it now. if (Old->HasValueHandle) for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next) switch (Entry->getKind()) { - case Tracking: - case Weak: + case WeakTracking: dbgs() << "After RAUW from " << *Old->getType() << " %" << Old->getName() << " to " << *New->getType() << " %" << New->getName() << "\n"; - llvm_unreachable("A tracking or weak value handle still pointed to the" - " old value!\n"); + llvm_unreachable( + "A weak tracking value handle still pointed to the old value!\n"); default: break; } diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp index 0c3946c8661ebe488af64a611ee5e157ed9f93e8..ccdabe0817b4f69d36be71d32916f1a783c4c27f 100644 --- a/lib/IR/ValueSymbolTable.cpp +++ b/lib/IR/ValueSymbolTable.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp index 2132e1659225dfe4ea52950c08450b76037e3dc9..cf6ee063c2d5b7a24bb67cc3d7090c4580137869 100644 --- a/lib/IR/ValueTypes.cpp +++ b/lib/IR/ValueTypes.cpp @@ -142,6 +142,7 @@ std::string EVT::getEVTString() const { case MVT::Other: return "ch"; case MVT::Glue: return "glue"; case MVT::x86mmx: return "x86mmx"; + case MVT::v1i1: return "v1i1"; case MVT::v2i1: return "v2i1"; case MVT::v4i1: return "v4i1"; case MVT::v8i1: return "v8i1"; @@ -220,6 +221,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::f128: return Type::getFP128Ty(Context); case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); + case MVT::v1i1: return VectorType::get(Type::getInt1Ty(Context), 1); case MVT::v2i1: return VectorType::get(Type::getInt1Ty(Context), 2); case MVT::v4i1: return VectorType::get(Type::getInt1Ty(Context), 4); case MVT::v8i1: return VectorType::get(Type::getInt1Ty(Context), 8); diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 893890446b7a96148ea357007b78b8136a8c6160..819f63520c7441f46068e0438151ed72fde6f99a 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -49,7 +49,6 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/ilist.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -59,6 +58,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -81,10 +82,10 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" @@ -102,7 +103,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -267,6 +267,9 @@ class Verifier : public InstVisitor, VerifierSupport { /// \brief Keep track of the metadata nodes that have been checked already. SmallPtrSet MDNodes; + /// Keep track which DISubprogram is attached to which function. + DenseMap DISubprogramAttachments; + /// Track all DICompileUnits visited. SmallPtrSet CUVisited; @@ -386,7 +389,7 @@ public: verifyCompileUnits(); verifyDeoptimizeCallingConvs(); - + DISubprogramAttachments.clear(); return !Broken; } @@ -490,10 +493,9 @@ private: bool performTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT, unsigned ArgNo, std::string &Suffix); bool verifyAttributeCount(AttributeList Attrs, unsigned Params); - void verifyAttributeTypes(AttributeList Attrs, unsigned Idx, bool isFunction, + void verifyAttributeTypes(AttributeSet Attrs, bool IsFunction, const Value *V); - void verifyParameterAttrs(AttributeList Attrs, unsigned Idx, Type *Ty, - bool isReturnValue, const Value *V); + void verifyParameterAttrs(AttributeSet Attrs, Type *Ty, const Value *V); void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, const Value *V); void verifyFunctionMetadata(ArrayRef> MDs); @@ -1051,6 +1053,14 @@ void Verifier::visitDISubprogram(const DISubprogram &N) { // Subprogram declarations (part of the type hierarchy). AssertDI(!Unit, "subprogram declarations must not have a compile unit", &N); } + + if (auto *RawThrownTypes = N.getRawThrownTypes()) { + auto *ThrownTypes = dyn_cast(RawThrownTypes); + AssertDI(ThrownTypes, "invalid thrown types list", &N, RawThrownTypes); + for (Metadata *Op : ThrownTypes->operands()) + AssertDI(Op && isa(Op), "invalid thrown type", &N, ThrownTypes, + Op); + } } void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) { @@ -1196,9 +1206,9 @@ void Verifier::visitComdat(const Comdat &C) { void Verifier::visitModuleIdents(const Module &M) { const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident"); - if (!Idents) + if (!Idents) return; - + // llvm.ident takes a list of metadata entry. Each entry has only one string. // Scan each llvm.ident entry and make sure that this requirement is met. for (const MDNode *N : Idents->operands()) { @@ -1208,7 +1218,7 @@ void Verifier::visitModuleIdents(const Module &M) { ("invalid value for llvm.ident metadata entry operand" "(the operand should be a string)"), N->getOperand(0)); - } + } } void Verifier::visitModuleFlags(const Module &M) { @@ -1272,6 +1282,13 @@ Verifier::visitModuleFlag(const MDNode *Op, // These behavior types accept any value. break; + case Module::Max: { + Assert(mdconst::dyn_extract_or_null(Op->getOperand(2)), + "invalid value for 'max' module flag (expected constant integer)", + Op->getOperand(2)); + break; + } + case Module::Require: { // The value should itself be an MDNode with two operands, a flag ID (an // MDString), and a value. @@ -1307,73 +1324,90 @@ Verifier::visitModuleFlag(const MDNode *Op, Assert(Inserted, "module flag identifiers must be unique (or of 'require' type)", ID); } -} -void Verifier::verifyAttributeTypes(AttributeList Attrs, unsigned Idx, - bool isFunction, const Value *V) { - unsigned Slot = ~0U; - for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I) - if (Attrs.getSlotIndex(I) == Idx) { - Slot = I; - break; - } + if (ID->getString() == "wchar_size") { + ConstantInt *Value + = mdconst::dyn_extract_or_null(Op->getOperand(2)); + Assert(Value, "wchar_size metadata requires constant integer argument"); + } + + if (ID->getString() == "Linker Options") { + // If the llvm.linker.options named metadata exists, we assume that the + // bitcode reader has upgraded the module flag. Otherwise the flag might + // have been created by a client directly. + Assert(M.getNamedMetadata("llvm.linker.options"), + "'Linker Options' named metadata no longer supported"); + } +} + +/// Return true if this attribute kind only applies to functions. +static bool isFuncOnlyAttr(Attribute::AttrKind Kind) { + switch (Kind) { + case Attribute::NoReturn: + case Attribute::NoUnwind: + case Attribute::NoInline: + case Attribute::AlwaysInline: + case Attribute::OptimizeForSize: + case Attribute::StackProtect: + case Attribute::StackProtectReq: + case Attribute::StackProtectStrong: + case Attribute::SafeStack: + case Attribute::NoRedZone: + case Attribute::NoImplicitFloat: + case Attribute::Naked: + case Attribute::InlineHint: + case Attribute::StackAlignment: + case Attribute::UWTable: + case Attribute::NonLazyBind: + case Attribute::ReturnsTwice: + case Attribute::SanitizeAddress: + case Attribute::SanitizeThread: + case Attribute::SanitizeMemory: + case Attribute::MinSize: + case Attribute::NoDuplicate: + case Attribute::Builtin: + case Attribute::NoBuiltin: + case Attribute::Cold: + case Attribute::OptimizeNone: + case Attribute::JumpTable: + case Attribute::Convergent: + case Attribute::ArgMemOnly: + case Attribute::NoRecurse: + case Attribute::InaccessibleMemOnly: + case Attribute::InaccessibleMemOrArgMemOnly: + case Attribute::AllocSize: + case Attribute::Speculatable: + return true; + default: + break; + } + return false; +} - assert(Slot != ~0U && "Attribute set inconsistency!"); +/// Return true if this is a function attribute that can also appear on +/// arguments. +static bool isFuncOrArgAttr(Attribute::AttrKind Kind) { + return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly || + Kind == Attribute::ReadNone; +} - for (AttributeList::iterator I = Attrs.begin(Slot), E = Attrs.end(Slot); - I != E; ++I) { - if (I->isStringAttribute()) +void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction, + const Value *V) { + for (Attribute A : Attrs) { + if (A.isStringAttribute()) continue; - if (I->getKindAsEnum() == Attribute::NoReturn || - I->getKindAsEnum() == Attribute::NoUnwind || - I->getKindAsEnum() == Attribute::NoInline || - I->getKindAsEnum() == Attribute::AlwaysInline || - I->getKindAsEnum() == Attribute::OptimizeForSize || - I->getKindAsEnum() == Attribute::StackProtect || - I->getKindAsEnum() == Attribute::StackProtectReq || - I->getKindAsEnum() == Attribute::StackProtectStrong || - I->getKindAsEnum() == Attribute::SafeStack || - I->getKindAsEnum() == Attribute::NoRedZone || - I->getKindAsEnum() == Attribute::NoImplicitFloat || - I->getKindAsEnum() == Attribute::Naked || - I->getKindAsEnum() == Attribute::InlineHint || - I->getKindAsEnum() == Attribute::StackAlignment || - I->getKindAsEnum() == Attribute::UWTable || - I->getKindAsEnum() == Attribute::NonLazyBind || - I->getKindAsEnum() == Attribute::ReturnsTwice || - I->getKindAsEnum() == Attribute::SanitizeAddress || - I->getKindAsEnum() == Attribute::SanitizeThread || - I->getKindAsEnum() == Attribute::SanitizeMemory || - I->getKindAsEnum() == Attribute::MinSize || - I->getKindAsEnum() == Attribute::NoDuplicate || - I->getKindAsEnum() == Attribute::Builtin || - I->getKindAsEnum() == Attribute::NoBuiltin || - I->getKindAsEnum() == Attribute::Cold || - I->getKindAsEnum() == Attribute::OptimizeNone || - I->getKindAsEnum() == Attribute::JumpTable || - I->getKindAsEnum() == Attribute::Convergent || - I->getKindAsEnum() == Attribute::ArgMemOnly || - I->getKindAsEnum() == Attribute::NoRecurse || - I->getKindAsEnum() == Attribute::InaccessibleMemOnly || - I->getKindAsEnum() == Attribute::InaccessibleMemOrArgMemOnly || - I->getKindAsEnum() == Attribute::AllocSize) { - if (!isFunction) { - CheckFailed("Attribute '" + I->getAsString() + - "' only applies to functions!", V); - return; - } - } else if (I->getKindAsEnum() == Attribute::ReadOnly || - I->getKindAsEnum() == Attribute::WriteOnly || - I->getKindAsEnum() == Attribute::ReadNone) { - if (Idx == 0) { - CheckFailed("Attribute '" + I->getAsString() + - "' does not apply to function returns"); + if (isFuncOnlyAttr(A.getKindAsEnum())) { + if (!IsFunction) { + CheckFailed("Attribute '" + A.getAsString() + + "' only applies to functions!", + V); return; } - } else if (isFunction) { - CheckFailed("Attribute '" + I->getAsString() + - "' does not apply to functions!", V); + } else if (IsFunction && !isFuncOrArgAttr(A.getKindAsEnum())) { + CheckFailed("Attribute '" + A.getAsString() + + "' does not apply to functions!", + V); return; } } @@ -1381,106 +1415,91 @@ void Verifier::verifyAttributeTypes(AttributeList Attrs, unsigned Idx, // VerifyParameterAttrs - Check the given attributes for an argument or return // value of the specified type. The value V is printed in error messages. -void Verifier::verifyParameterAttrs(AttributeList Attrs, unsigned Idx, Type *Ty, - bool isReturnValue, const Value *V) { - if (!Attrs.hasAttributes(Idx)) +void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, + const Value *V) { + if (!Attrs.hasAttributes()) return; - verifyAttributeTypes(Attrs, Idx, false, V); - - if (isReturnValue) - Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) && - !Attrs.hasAttribute(Idx, Attribute::Nest) && - !Attrs.hasAttribute(Idx, Attribute::StructRet) && - !Attrs.hasAttribute(Idx, Attribute::NoCapture) && - !Attrs.hasAttribute(Idx, Attribute::Returned) && - !Attrs.hasAttribute(Idx, Attribute::InAlloca) && - !Attrs.hasAttribute(Idx, Attribute::SwiftSelf) && - !Attrs.hasAttribute(Idx, Attribute::SwiftError), - "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', " - "'returned', 'swiftself', and 'swifterror' do not apply to return " - "values!", - V); + verifyAttributeTypes(Attrs, /*IsFunction=*/false, V); // Check for mutually incompatible attributes. Only inreg is compatible with // sret. unsigned AttrCount = 0; - AttrCount += Attrs.hasAttribute(Idx, Attribute::ByVal); - AttrCount += Attrs.hasAttribute(Idx, Attribute::InAlloca); - AttrCount += Attrs.hasAttribute(Idx, Attribute::StructRet) || - Attrs.hasAttribute(Idx, Attribute::InReg); - AttrCount += Attrs.hasAttribute(Idx, Attribute::Nest); + AttrCount += Attrs.hasAttribute(Attribute::ByVal); + AttrCount += Attrs.hasAttribute(Attribute::InAlloca); + AttrCount += Attrs.hasAttribute(Attribute::StructRet) || + Attrs.hasAttribute(Attribute::InReg); + AttrCount += Attrs.hasAttribute(Attribute::Nest); Assert(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', " "and 'sret' are incompatible!", V); - Assert(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) && - Attrs.hasAttribute(Idx, Attribute::ReadOnly)), + Assert(!(Attrs.hasAttribute(Attribute::InAlloca) && + Attrs.hasAttribute(Attribute::ReadOnly)), "Attributes " "'inalloca and readonly' are incompatible!", V); - Assert(!(Attrs.hasAttribute(Idx, Attribute::StructRet) && - Attrs.hasAttribute(Idx, Attribute::Returned)), + Assert(!(Attrs.hasAttribute(Attribute::StructRet) && + Attrs.hasAttribute(Attribute::Returned)), "Attributes " "'sret and returned' are incompatible!", V); - Assert(!(Attrs.hasAttribute(Idx, Attribute::ZExt) && - Attrs.hasAttribute(Idx, Attribute::SExt)), + Assert(!(Attrs.hasAttribute(Attribute::ZExt) && + Attrs.hasAttribute(Attribute::SExt)), "Attributes " "'zeroext and signext' are incompatible!", V); - Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) && - Attrs.hasAttribute(Idx, Attribute::ReadOnly)), + Assert(!(Attrs.hasAttribute(Attribute::ReadNone) && + Attrs.hasAttribute(Attribute::ReadOnly)), "Attributes " "'readnone and readonly' are incompatible!", V); - Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) && - Attrs.hasAttribute(Idx, Attribute::WriteOnly)), + Assert(!(Attrs.hasAttribute(Attribute::ReadNone) && + Attrs.hasAttribute(Attribute::WriteOnly)), "Attributes " "'readnone and writeonly' are incompatible!", V); - Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadOnly) && - Attrs.hasAttribute(Idx, Attribute::WriteOnly)), + Assert(!(Attrs.hasAttribute(Attribute::ReadOnly) && + Attrs.hasAttribute(Attribute::WriteOnly)), "Attributes " "'readonly and writeonly' are incompatible!", V); - Assert(!(Attrs.hasAttribute(Idx, Attribute::NoInline) && - Attrs.hasAttribute(Idx, Attribute::AlwaysInline)), + Assert(!(Attrs.hasAttribute(Attribute::NoInline) && + Attrs.hasAttribute(Attribute::AlwaysInline)), "Attributes " "'noinline and alwaysinline' are incompatible!", V); - Assert( - !AttrBuilder(Attrs, Idx).overlaps(AttributeFuncs::typeIncompatible(Ty)), - "Wrong types for attribute: " + - AttributeList::get(Context, Idx, AttributeFuncs::typeIncompatible(Ty)) - .getAsString(Idx), - V); + AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty); + Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs), + "Wrong types for attribute: " + + AttributeSet::get(Context, IncompatibleAttrs).getAsString(), + V); if (PointerType *PTy = dyn_cast(Ty)) { SmallPtrSet Visited; if (!PTy->getElementType()->isSized(&Visited)) { - Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) && - !Attrs.hasAttribute(Idx, Attribute::InAlloca), + Assert(!Attrs.hasAttribute(Attribute::ByVal) && + !Attrs.hasAttribute(Attribute::InAlloca), "Attributes 'byval' and 'inalloca' do not support unsized types!", V); } if (!isa(PTy->getElementType())) - Assert(!Attrs.hasAttribute(Idx, Attribute::SwiftError), + Assert(!Attrs.hasAttribute(Attribute::SwiftError), "Attribute 'swifterror' only applies to parameters " "with pointer to pointer type!", V); } else { - Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal), + Assert(!Attrs.hasAttribute(Attribute::ByVal), "Attribute 'byval' only applies to parameters with pointer type!", V); - Assert(!Attrs.hasAttribute(Idx, Attribute::SwiftError), + Assert(!Attrs.hasAttribute(Attribute::SwiftError), "Attribute 'swifterror' only applies to parameters " "with pointer type!", V); @@ -1500,123 +1519,122 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, bool SawSwiftSelf = false; bool SawSwiftError = false; - for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - unsigned Idx = Attrs.getSlotIndex(i); - - Type *Ty; - if (Idx == 0) - Ty = FT->getReturnType(); - else if (Idx-1 < FT->getNumParams()) - Ty = FT->getParamType(Idx-1); - else - break; // VarArgs attributes, verified elsewhere. + // Verify return value attributes. + AttributeSet RetAttrs = Attrs.getRetAttributes(); + Assert((!RetAttrs.hasAttribute(Attribute::ByVal) && + !RetAttrs.hasAttribute(Attribute::Nest) && + !RetAttrs.hasAttribute(Attribute::StructRet) && + !RetAttrs.hasAttribute(Attribute::NoCapture) && + !RetAttrs.hasAttribute(Attribute::Returned) && + !RetAttrs.hasAttribute(Attribute::InAlloca) && + !RetAttrs.hasAttribute(Attribute::SwiftSelf) && + !RetAttrs.hasAttribute(Attribute::SwiftError)), + "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', " + "'returned', 'swiftself', and 'swifterror' do not apply to return " + "values!", + V); + Assert((!RetAttrs.hasAttribute(Attribute::ReadOnly) && + !RetAttrs.hasAttribute(Attribute::WriteOnly) && + !RetAttrs.hasAttribute(Attribute::ReadNone)), + "Attribute '" + RetAttrs.getAsString() + + "' does not apply to function returns", + V); + verifyParameterAttrs(RetAttrs, FT->getReturnType(), V); - verifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V); + // Verify parameter attributes. + for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { + Type *Ty = FT->getParamType(i); + AttributeSet ArgAttrs = Attrs.getParamAttributes(i); - if (Idx == 0) - continue; + verifyParameterAttrs(ArgAttrs, Ty, V); - if (Attrs.hasAttribute(Idx, Attribute::Nest)) { + if (ArgAttrs.hasAttribute(Attribute::Nest)) { Assert(!SawNest, "More than one parameter has attribute nest!", V); SawNest = true; } - if (Attrs.hasAttribute(Idx, Attribute::Returned)) { + if (ArgAttrs.hasAttribute(Attribute::Returned)) { Assert(!SawReturned, "More than one parameter has attribute returned!", V); Assert(Ty->canLosslesslyBitCastTo(FT->getReturnType()), - "Incompatible " - "argument and return types for 'returned' attribute", + "Incompatible argument and return types for 'returned' attribute", V); SawReturned = true; } - if (Attrs.hasAttribute(Idx, Attribute::StructRet)) { + if (ArgAttrs.hasAttribute(Attribute::StructRet)) { Assert(!SawSRet, "Cannot have multiple 'sret' parameters!", V); - Assert(Idx == 1 || Idx == 2, + Assert(i == 0 || i == 1, "Attribute 'sret' is not on first or second parameter!", V); SawSRet = true; } - if (Attrs.hasAttribute(Idx, Attribute::SwiftSelf)) { + if (ArgAttrs.hasAttribute(Attribute::SwiftSelf)) { Assert(!SawSwiftSelf, "Cannot have multiple 'swiftself' parameters!", V); SawSwiftSelf = true; } - if (Attrs.hasAttribute(Idx, Attribute::SwiftError)) { + if (ArgAttrs.hasAttribute(Attribute::SwiftError)) { Assert(!SawSwiftError, "Cannot have multiple 'swifterror' parameters!", V); SawSwiftError = true; } - if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) { - Assert(Idx == FT->getNumParams(), "inalloca isn't on the last parameter!", - V); + if (ArgAttrs.hasAttribute(Attribute::InAlloca)) { + Assert(i == FT->getNumParams() - 1, + "inalloca isn't on the last parameter!", V); } } if (!Attrs.hasAttributes(AttributeList::FunctionIndex)) return; - verifyAttributeTypes(Attrs, AttributeList::FunctionIndex, true, V); + verifyAttributeTypes(Attrs.getFnAttributes(), /*IsFunction=*/true, V); - Assert( - !(Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReadNone) && - Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly)), - "Attributes 'readnone and readonly' are incompatible!", V); + Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) && + Attrs.hasFnAttribute(Attribute::ReadOnly)), + "Attributes 'readnone and readonly' are incompatible!", V); - Assert( - !(Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReadNone) && - Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly)), - "Attributes 'readnone and writeonly' are incompatible!", V); + Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) && + Attrs.hasFnAttribute(Attribute::WriteOnly)), + "Attributes 'readnone and writeonly' are incompatible!", V); - Assert( - !(Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly) && - Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly)), - "Attributes 'readonly and writeonly' are incompatible!", V); + Assert(!(Attrs.hasFnAttribute(Attribute::ReadOnly) && + Attrs.hasFnAttribute(Attribute::WriteOnly)), + "Attributes 'readonly and writeonly' are incompatible!", V); - Assert( - !(Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReadNone) && - Attrs.hasAttribute(AttributeList::FunctionIndex, - Attribute::InaccessibleMemOrArgMemOnly)), - "Attributes 'readnone and inaccessiblemem_or_argmemonly' are " - "incompatible!", - V); + Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) && + Attrs.hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly)), + "Attributes 'readnone and inaccessiblemem_or_argmemonly' are " + "incompatible!", + V); - Assert( - !(Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::ReadNone) && - Attrs.hasAttribute(AttributeList::FunctionIndex, - Attribute::InaccessibleMemOnly)), - "Attributes 'readnone and inaccessiblememonly' are incompatible!", V); + Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) && + Attrs.hasFnAttribute(Attribute::InaccessibleMemOnly)), + "Attributes 'readnone and inaccessiblememonly' are incompatible!", V); - Assert( - !(Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::NoInline) && - Attrs.hasAttribute(AttributeList::FunctionIndex, - Attribute::AlwaysInline)), - "Attributes 'noinline and alwaysinline' are incompatible!", V); + Assert(!(Attrs.hasFnAttribute(Attribute::NoInline) && + Attrs.hasFnAttribute(Attribute::AlwaysInline)), + "Attributes 'noinline and alwaysinline' are incompatible!", V); - if (Attrs.hasAttribute(AttributeList::FunctionIndex, - Attribute::OptimizeNone)) { - Assert( - Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::NoInline), - "Attribute 'optnone' requires 'noinline'!", V); + if (Attrs.hasFnAttribute(Attribute::OptimizeNone)) { + Assert(Attrs.hasFnAttribute(Attribute::NoInline), + "Attribute 'optnone' requires 'noinline'!", V); - Assert(!Attrs.hasAttribute(AttributeList::FunctionIndex, - Attribute::OptimizeForSize), + Assert(!Attrs.hasFnAttribute(Attribute::OptimizeForSize), "Attributes 'optsize and optnone' are incompatible!", V); - Assert( - !Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize), - "Attributes 'minsize and optnone' are incompatible!", V); + Assert(!Attrs.hasFnAttribute(Attribute::MinSize), + "Attributes 'minsize and optnone' are incompatible!", V); } - if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::JumpTable)) { + if (Attrs.hasFnAttribute(Attribute::JumpTable)) { const GlobalValue *GV = cast(V); Assert(GV->hasGlobalUnnamedAddr(), "Attribute 'jumptable' requires 'unnamed_addr'", V); } - if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::AllocSize)) { + if (Attrs.hasFnAttribute(Attribute::AllocSize)) { std::pair> Args = Attrs.getAllocSizeArgs(AttributeList::FunctionIndex); @@ -1726,17 +1744,9 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) { } bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { - if (Attrs.getNumSlots() == 0) - return true; - - unsigned LastSlot = Attrs.getNumSlots() - 1; - unsigned LastIndex = Attrs.getSlotIndex(LastSlot); - if (LastIndex <= Params || - (LastIndex == AttributeList::FunctionIndex && - (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params))) - return true; - - return false; + // There shouldn't be more attribute sets than there are parameters plus the + // function and return value. + return Attrs.getNumAttrSets() <= Params + 2; } /// Verify that statepoint intrinsic is well formed. @@ -1844,7 +1854,7 @@ void Verifier::verifyStatepoint(ImmutableCallSite CS) { Assert(ExpectedNumArgs <= (int)CS.arg_size(), "gc.statepoint too few arguments according to length fields", &CI); - // Check that the only uses of this gc.statepoint are gc.result or + // Check that the only uses of this gc.statepoint are gc.result or // gc.relocate calls which are tied to this statepoint and thus part // of the same statepoint sequence for (const User *U : CI.users()) { @@ -1974,7 +1984,7 @@ void Verifier::visitFunction(const Function &F) { // On function declarations/definitions, we do not support the builtin // attribute. We do not check this in VerifyFunctionAttrs since that is // checking for Attributes that can/can not ever be on functions. - Assert(!Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Builtin), + Assert(!Attrs.hasFnAttribute(Attribute::Builtin), "Attribute 'builtin' can only be applied to a callsite.", &F); // Check that this function meets the restrictions on this calling convention. @@ -1990,6 +2000,7 @@ void Verifier::visitFunction(const Function &F) { "Calling convention requires void return type", &F); LLVM_FALLTHROUGH; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: @@ -2026,7 +2037,7 @@ void Verifier::visitFunction(const Function &F) { } // Check that swifterror argument is only used by loads and stores. - if (Attrs.hasAttribute(i+1, Attribute::SwiftError)) { + if (Attrs.hasParamAttribute(i, Attribute::SwiftError)) { verifySwiftErrorValue(&Arg); } ++i; @@ -2090,13 +2101,19 @@ void Verifier::visitFunction(const Function &F) { switch (I.first) { default: break; - case LLVMContext::MD_dbg: + case LLVMContext::MD_dbg: { ++NumDebugAttachments; AssertDI(NumDebugAttachments == 1, "function must have a single !dbg attachment", &F, I.second); AssertDI(isa(I.second), "function !dbg attachment must be a subprogram", &F, I.second); + auto *SP = cast(I.second); + const Function *&AttachedTo = DISubprogramAttachments[SP]; + AssertDI(!AttachedTo || AttachedTo == &F, + "DISubprogram attached to more than one function", SP, &F); + AttachedTo = &F; break; + } case LLVMContext::MD_prof: ++NumProfAttachments; Assert(NumProfAttachments == 1, @@ -2617,6 +2634,15 @@ void Verifier::verifyCallSite(CallSite CS) { Assert(verifyAttributeCount(Attrs, CS.arg_size()), "Attribute after last parameter!", I); + if (Attrs.hasAttribute(AttributeList::FunctionIndex, Attribute::Speculatable)) { + // Don't allow speculatable on call sites, unless the underlying function + // declaration is also speculatable. + Function *Callee + = dyn_cast(CS.getCalledValue()->stripPointerCasts()); + Assert(Callee && Callee->isSpeculatable(), + "speculatable attribute may not apply to call sites", I); + } + // Verify call attributes. verifyFunctionAttrs(FTy, Attrs, I); @@ -2634,7 +2660,7 @@ void Verifier::verifyCallSite(CallSite CS) { // make sure the underlying alloca/parameter it comes from has a swifterror as // well. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - if (CS.paramHasAttr(i+1, Attribute::SwiftError)) { + if (CS.paramHasAttr(i, Attribute::SwiftError)) { Value *SwiftErrorArg = CS.getArgument(i); if (auto AI = dyn_cast(SwiftErrorArg->stripInBoundsOffsets())) { Assert(AI->isSwiftError(), @@ -2652,24 +2678,25 @@ void Verifier::verifyCallSite(CallSite CS) { bool SawNest = false; bool SawReturned = false; - for (unsigned Idx = 1; Idx < 1 + FTy->getNumParams(); ++Idx) { - if (Attrs.hasAttribute(Idx, Attribute::Nest)) + for (unsigned Idx = 0; Idx < FTy->getNumParams(); ++Idx) { + if (Attrs.hasParamAttribute(Idx, Attribute::Nest)) SawNest = true; - if (Attrs.hasAttribute(Idx, Attribute::Returned)) + if (Attrs.hasParamAttribute(Idx, Attribute::Returned)) SawReturned = true; } // Check attributes on the varargs part. - for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) { - Type *Ty = CS.getArgument(Idx-1)->getType(); - verifyParameterAttrs(Attrs, Idx, Ty, false, I); + for (unsigned Idx = FTy->getNumParams(); Idx < CS.arg_size(); ++Idx) { + Type *Ty = CS.getArgument(Idx)->getType(); + AttributeSet ArgAttrs = Attrs.getParamAttributes(Idx); + verifyParameterAttrs(ArgAttrs, Ty, I); - if (Attrs.hasAttribute(Idx, Attribute::Nest)) { + if (ArgAttrs.hasAttribute(Attribute::Nest)) { Assert(!SawNest, "More than one parameter has attribute nest!", I); SawNest = true; } - if (Attrs.hasAttribute(Idx, Attribute::Returned)) { + if (ArgAttrs.hasAttribute(Attribute::Returned)) { Assert(!SawReturned, "More than one parameter has attribute returned!", I); Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()), @@ -2679,11 +2706,12 @@ void Verifier::verifyCallSite(CallSite CS) { SawReturned = true; } - Assert(!Attrs.hasAttribute(Idx, Attribute::StructRet), + Assert(!ArgAttrs.hasAttribute(Attribute::StructRet), "Attribute 'sret' cannot be used for vararg call arguments!", I); - if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) - Assert(Idx == CS.arg_size(), "inalloca isn't on the last argument!", I); + if (ArgAttrs.hasAttribute(Attribute::InAlloca)) + Assert(Idx == CS.arg_size() - 1, "inalloca isn't on the last argument!", + I); } } @@ -2763,11 +2791,11 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) { Attribute::SwiftError}; AttrBuilder Copy; for (auto AK : ABIAttrs) { - if (Attrs.hasAttribute(I + 1, AK)) + if (Attrs.hasParamAttribute(I, AK)) Copy.addAttribute(AK); } - if (Attrs.hasAttribute(I + 1, Attribute::Alignment)) - Copy.addAlignmentAttr(Attrs.getParamAlignment(I + 1)); + if (Attrs.hasParamAttribute(I, Attribute::Alignment)) + Copy.addAlignmentAttr(Attrs.getParamAlignment(I)); return Copy; } @@ -3127,7 +3155,7 @@ void Verifier::verifySwiftErrorCallSite(CallSite CS, for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I, ++Idx) { if (*I == SwiftErrorVal) { - Assert(CS.paramHasAttr(Idx+1, Attribute::SwiftError), + Assert(CS.paramHasAttr(Idx, Attribute::SwiftError), "swifterror value when used in a callsite should be marked " "with swifterror attribute", SwiftErrorVal, CS); @@ -3913,7 +3941,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. - for (Value *V : CS.args()) + for (Value *V : CS.args()) if (auto *MD = dyn_cast(V)) visitMetadataAsValue(*MD, CS.getCaller()); @@ -3946,6 +3974,18 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: visitConstrainedFPIntrinsic( cast(*CS.getInstruction())); break; @@ -3972,10 +4012,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { CS); break; } - case Intrinsic::memcpy_element_atomic: { - ConstantInt *ElementSizeCI = dyn_cast(CS.getArgOperand(3)); - Assert(ElementSizeCI, "element size of the element-wise atomic memory " - "intrinsic must be a constant int", + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst *MI = + cast(CS.getInstruction()); + ; + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", CS); const APInt &ElementSizeVal = ElementSizeCI->getValue(); Assert(ElementSizeVal.isPowerOf2(), @@ -3983,19 +4029,24 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "must be a power of 2", CS); + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + auto IsValidAlignment = [&](uint64_t Alignment) { return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); }; - - uint64_t DstAlignment = CS.getParamAlignment(1), - SrcAlignment = CS.getParamAlignment(2); - + uint64_t DstAlignment = CS.getParamAlignment(0), + SrcAlignment = CS.getParamAlignment(1); Assert(IsValidAlignment(DstAlignment), - "incorrect alignment of the destination argument", - CS); + "incorrect alignment of the destination argument", CS); Assert(IsValidAlignment(SrcAlignment), - "incorrect alignment of the source argument", - CS); + "incorrect alignment of the source argument", CS); break; } case Intrinsic::gcroot: @@ -4225,7 +4276,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { } case Intrinsic::masked_load: { Assert(CS.getType()->isVectorTy(), "masked_load: must return a vector", CS); - + Value *Ptr = CS.getArgOperand(0); //Value *Alignment = CS.getArgOperand(1); Value *Mask = CS.getArgOperand(2); @@ -4235,12 +4286,12 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // DataTy is the overloaded type Type *DataTy = cast(Ptr->getType())->getElementType(); - Assert(DataTy == CS.getType(), + Assert(DataTy == CS.getType(), "masked_load: return must match pointer type", CS); Assert(PassThru->getType() == DataTy, "masked_load: pass through and data type must match", CS); Assert(Mask->getType()->getVectorNumElements() == - DataTy->getVectorNumElements(), + DataTy->getVectorNumElements(), "masked_load: vector mask must be same length as data", CS); break; } @@ -4254,10 +4305,10 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // DataTy is the overloaded type Type *DataTy = cast(Ptr->getType())->getElementType(); - Assert(DataTy == Val->getType(), + Assert(DataTy == Val->getType(), "masked_store: storee must match pointer type", CS); Assert(Mask->getType()->getVectorNumElements() == - DataTy->getVectorNumElements(), + DataTy->getVectorNumElements(), "masked_store: vector mask must be same length as data", CS); break; } @@ -4315,7 +4366,12 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) { } void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { - Assert(isa(FPI.getOperand(2)), + unsigned NumOperands = FPI.getNumArgOperands(); + Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)), + "invalid arguments for constrained FP intrinsic", &FPI); + Assert(isa(FPI.getArgOperand(NumOperands-1)), + "invalid exception behavior argument", &FPI); + Assert(isa(FPI.getArgOperand(NumOperands-2)), "invalid rounding mode argument", &FPI); Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid, "invalid rounding mode argument", &FPI); diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt index 684b378c93e5a3626acfbf02b94c9d8dc79b9205..1d22c2a11f131673a47a82f7daf3e480ce8ebd3c 100644 --- a/lib/LLVMBuild.txt +++ b/lib/LLVMBuild.txt @@ -24,7 +24,6 @@ subdirectories = DebugInfo Demangle ExecutionEngine - LibDriver LineEditor Linker IR @@ -32,6 +31,7 @@ subdirectories = LTO MC Object + BinaryFormat ObjectYAML Option Passes @@ -39,6 +39,8 @@ subdirectories = Support TableGen Target + Testing + ToolDrivers Transforms [component_0] diff --git a/lib/LTO/CMakeLists.txt b/lib/LTO/CMakeLists.txt index c73143eb330beaca23bc4ca68bae0988480d6900..73b5662d4bc8ed10b1383f0c617db5d71d011028 100644 --- a/lib/LTO/CMakeLists.txt +++ b/lib/LTO/CMakeLists.txt @@ -1,52 +1,3 @@ -# Figure out if we can track VC revisions. -function(find_first_existing_file out_var) - foreach(file ${ARGN}) - if(EXISTS "${file}") - set(${out_var} "${file}" PARENT_SCOPE) - return() - endif() - endforeach() -endfunction() - -macro(find_first_existing_vc_file out_var path) - find_first_existing_file(${out_var} - "${path}/.git/logs/HEAD" # Git - "${path}/.svn/wc.db" # SVN 1.7 - "${path}/.svn/entries" # SVN 1.6 - ) -endmacro() - -find_first_existing_vc_file(llvm_vc "${LLVM_MAIN_SRC_DIR}") - -# The VC revision include that we want to generate. -set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/LLVMLTORevision.h") - -set(get_svn_script "${LLVM_CMAKE_PATH}/GenerateVersionFromCVS.cmake") - -if(DEFINED llvm_vc) - # Create custom target to generate the VC revision include. - add_custom_command(OUTPUT "${version_inc}" - DEPENDS "${llvm_vc}" "${get_svn_script}" - COMMAND - ${CMAKE_COMMAND} "-DSOURCE_DIR=${LLVM_MAIN_SRC_DIR}" - "-DNAME=LLVM_REVISION" - "-DHEADER_FILE=${version_inc}" - -P "${get_svn_script}") - - # Mark the generated header as being generated. - set_source_files_properties("${version_inc}" - PROPERTIES GENERATED TRUE - HEADER_FILE_ONLY TRUE) - - # Tell Version.cpp that it needs to build with -DHAVE_SVN_VERSION_INC. - set_source_files_properties(Version.cpp - PROPERTIES COMPILE_DEFINITIONS "HAVE_SVN_VERSION_INC") -else() - # Not producing a VC revision include. - set(version_inc) -endif() - - add_llvm_library(LLVMLTO Caching.cpp LTO.cpp @@ -55,11 +6,11 @@ add_llvm_library(LLVMLTO LTOCodeGenerator.cpp UpdateCompilerUsed.cpp ThinLTOCodeGenerator.cpp - ${version_inc} ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/LTO DEPENDS intrinsics_gen + llvm_vcsrevision_h ) diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 6ca7e34527c7d91f2352d40b96a6544d482fbe1e..35032fdd33e1003d6fc8d1dfcfbc7dba7e545243 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -25,7 +25,6 @@ #include "llvm/LTO/LTOBackend.h" #include "llvm/Linker/IRMover.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" @@ -35,6 +34,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/VCSRevision.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -74,7 +74,7 @@ static void computeCacheKey( // Start with the compiler revision Hasher.update(LLVM_VERSION_STRING); -#ifdef HAVE_LLVM_REVISION +#ifdef LLVM_REVISION Hasher.update(LLVM_REVISION); #endif @@ -114,11 +114,15 @@ static void computeCacheKey( AddUnsigned((unsigned)Conf.Options.DebuggerTuning); for (auto &A : Conf.MAttrs) AddString(A); - AddUnsigned(Conf.RelocModel); + if (Conf.RelocModel) + AddUnsigned(*Conf.RelocModel); + else + AddUnsigned(-1); AddUnsigned(Conf.CodeModel); AddUnsigned(Conf.CGOptLevel); AddUnsigned(Conf.CGFileType); AddUnsigned(Conf.OptLevel); + AddUnsigned(Conf.UseNewPM); AddString(Conf.OptPipeline); AddString(Conf.AAPipeline); AddString(Conf.OverrideTriple); @@ -274,13 +278,14 @@ void llvm::thinLTOResolveWeakForLinkerInIndex( // when needed. DenseSet GlobalInvolvedWithAlias; for (auto &I : Index) - for (auto &S : I.second) + for (auto &S : I.second.SummaryList) if (auto AS = dyn_cast(S.get())) GlobalInvolvedWithAlias.insert(&AS->getAliasee()); for (auto &I : Index) - thinLTOResolveWeakForLinkerGUID(I.second, I.first, GlobalInvolvedWithAlias, - isPrevailing, recordNewLinkage); + thinLTOResolveWeakForLinkerGUID(I.second.SummaryList, I.first, + GlobalInvolvedWithAlias, isPrevailing, + recordNewLinkage); } static void thinLTOInternalizeAndPromoteGUID( @@ -301,7 +306,7 @@ void llvm::thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, function_ref isExported) { for (auto &I : Index) - thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported); + thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported); } // Requires a destructor for std::vector. @@ -310,53 +315,19 @@ InputFile::~InputFile() = default; Expected> InputFile::create(MemoryBufferRef Object) { std::unique_ptr File(new InputFile); - ErrorOr BCOrErr = - IRObjectFile::findBitcodeInMemBuffer(Object); - if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); - - Expected> BMsOrErr = - getBitcodeModuleList(*BCOrErr); - if (!BMsOrErr) - return BMsOrErr.takeError(); - - if (BMsOrErr->empty()) - return make_error("Bitcode file does not contain any modules", - inconvertibleErrorCode()); - - File->Mods = *BMsOrErr; - - LLVMContext Ctx; - std::vector Mods; - std::vector> OwnedMods; - for (auto BM : *BMsOrErr) { - Expected> MOrErr = - BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, - /*IsImporting*/ false); - if (!MOrErr) - return MOrErr.takeError(); - - if ((*MOrErr)->getDataLayoutStr().empty()) - return make_error("input module has no datalayout", - inconvertibleErrorCode()); - - Mods.push_back(MOrErr->get()); - OwnedMods.push_back(std::move(*MOrErr)); - } - - SmallVector Symtab; - if (Error E = irsymtab::build(Mods, Symtab, File->Strtab)) - return std::move(E); + Expected FOrErr = readIRSymtab(Object); + if (!FOrErr) + return FOrErr.takeError(); - irsymtab::Reader R({Symtab.data(), Symtab.size()}, - {File->Strtab.data(), File->Strtab.size()}); - File->SourceFileName = R.getSourceFileName(); - File->COFFLinkerOpts = R.getCOFFLinkerOpts(); - File->ComdatTable = R.getComdatTable(); + File->TargetTriple = FOrErr->TheReader.getTargetTriple(); + File->SourceFileName = FOrErr->TheReader.getSourceFileName(); + File->COFFLinkerOpts = FOrErr->TheReader.getCOFFLinkerOpts(); + File->ComdatTable = FOrErr->TheReader.getComdatTable(); - for (unsigned I = 0; I != Mods.size(); ++I) { + for (unsigned I = 0; I != FOrErr->Mods.size(); ++I) { size_t Begin = File->Symbols.size(); - for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I)) + for (const irsymtab::Reader::SymbolRef &Sym : + FOrErr->TheReader.module_symbols(I)) // Skip symbols that are irrelevant to LTO. Note that this condition needs // to match the one in Skip() in LTO::addRegularLTO(). if (Sym.isGlobal() && !Sym.isFormatSpecific()) @@ -364,6 +335,8 @@ Expected> InputFile::create(MemoryBufferRef Object) { File->ModuleSymIndices.push_back({Begin, File->Symbols.size()}); } + File->Mods = FOrErr->Mods; + File->Strtab = std::move(FOrErr->Strtab); return std::move(File); } @@ -391,29 +364,40 @@ LTO::LTO(Config Conf, ThinBackend Backend, // Requires a destructor for MapVector. LTO::~LTO() = default; -// Add the given symbol to the GlobalResolutions map, and resolve its partition. -void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym, - SymbolResolution Res, unsigned Partition) { - auto &GlobalRes = GlobalResolutions[Sym.getName()]; - GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); - if (Res.Prevailing) - GlobalRes.IRName = Sym.getIRName(); - - // Set the partition to external if we know it is used elsewhere, e.g. - // it is visible to a regular object, is referenced from llvm.compiler_used, - // or was already recorded as being referenced from a different partition. - if (Res.VisibleToRegularObj || Sym.isUsed() || - (GlobalRes.Partition != GlobalResolution::Unknown && - GlobalRes.Partition != Partition)) { - GlobalRes.Partition = GlobalResolution::External; - } else - // First recorded reference, save the current partition. - GlobalRes.Partition = Partition; - - // Flag as visible outside of ThinLTO if visible from a regular object or - // if this is a reference in the regular LTO partition. - GlobalRes.VisibleOutsideThinLTO |= - (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO)); +// Add the symbols in the given module to the GlobalResolutions map, and resolve +// their partitions. +void LTO::addModuleToGlobalRes(ArrayRef Syms, + ArrayRef Res, + unsigned Partition, bool InSummary) { + auto *ResI = Res.begin(); + auto *ResE = Res.end(); + (void)ResE; + for (const InputFile::Symbol &Sym : Syms) { + assert(ResI != ResE); + SymbolResolution Res = *ResI++; + + auto &GlobalRes = GlobalResolutions[Sym.getName()]; + GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr(); + if (Res.Prevailing) + GlobalRes.IRName = Sym.getIRName(); + + // Set the partition to external if we know it is re-defined by the linker + // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a + // regular object, is referenced from llvm.compiler_used, or was already + // recorded as being referenced from a different partition. + if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || + (GlobalRes.Partition != GlobalResolution::Unknown && + GlobalRes.Partition != Partition)) { + GlobalRes.Partition = GlobalResolution::External; + } else + // First recorded reference, save the current partition. + GlobalRes.Partition = Partition; + + // Flag as visible outside of summary if visible from a regular object or + // from a module that does not have a summary. + GlobalRes.VisibleOutsideSummary |= + (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary); + } } static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, @@ -432,6 +416,8 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input, OS << 'l'; if (Res.VisibleToRegularObj) OS << 'x'; + if (Res.LinkerRedefined) + OS << 'r'; OS << '\n'; } OS.flush(); @@ -457,46 +443,61 @@ Error LTO::add(std::unique_ptr Input, Error LTO::addModule(InputFile &Input, unsigned ModI, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - Expected HasThinLTOSummary = Input.Mods[ModI].hasSummary(); - if (!HasThinLTOSummary) - return HasThinLTOSummary.takeError(); + Expected LTOInfo = Input.Mods[ModI].getLTOInfo(); + if (!LTOInfo) + return LTOInfo.takeError(); + BitcodeModule BM = Input.Mods[ModI]; auto ModSyms = Input.module_symbols(ModI); - if (*HasThinLTOSummary) - return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE); - else - return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE); + addModuleToGlobalRes(ModSyms, {ResI, ResE}, + LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, + LTOInfo->HasSummary); + + if (LTOInfo->IsThinLTO) + return addThinLTO(BM, ModSyms, ResI, ResE); + + Expected ModOrErr = + addRegularLTO(BM, ModSyms, ResI, ResE); + if (!ModOrErr) + return ModOrErr.takeError(); + + if (!LTOInfo->HasSummary) + return linkRegularLTO(std::move(*ModOrErr), /*LivenessFromIndex=*/false); + + // Regular LTO module summaries are added to a dummy module that represents + // the combined regular LTO module. + if (Error Err = BM.readSummary(ThinLTO.CombinedIndex, "", -1ull)) + return Err; + RegularLTO.ModsWithSummaries.push_back(std::move(*ModOrErr)); + return Error::success(); } // Add a regular LTO object to the link. -Error LTO::addRegularLTO(BitcodeModule BM, - ArrayRef Syms, - const SymbolResolution *&ResI, - const SymbolResolution *ResE) { - if (!RegularLTO.CombinedModule) { - RegularLTO.CombinedModule = - llvm::make_unique("ld-temp.o", RegularLTO.Ctx); - RegularLTO.Mover = llvm::make_unique(*RegularLTO.CombinedModule); - } +// The resulting module needs to be linked into the combined LTO module with +// linkRegularLTO. +Expected +LTO::addRegularLTO(BitcodeModule BM, ArrayRef Syms, + const SymbolResolution *&ResI, + const SymbolResolution *ResE) { + RegularLTOState::AddedModule Mod; Expected> MOrErr = BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true, /*IsImporting*/ false); if (!MOrErr) return MOrErr.takeError(); - Module &M = **MOrErr; + Mod.M = std::move(*MOrErr); + if (Error Err = M.materializeMetadata()) - return Err; + return std::move(Err); UpgradeDebugInfo(M); ModuleSymbolTable SymTab; SymTab.addModule(&M); - std::vector Keep; - for (GlobalVariable &GV : M.globals()) if (GV.hasAppendingLinkage()) - Keep.push_back(&GV); + Mod.Keep.push_back(&GV); DenseSet AliasedGlobals; for (auto &GA : M.aliases()) @@ -525,7 +526,6 @@ Error LTO::addRegularLTO(BitcodeModule BM, for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Sym, Res, 0); assert(MsymI != MsymE); ModuleSymbolTable::Symbol Msym = *MsymI++; @@ -535,32 +535,29 @@ Error LTO::addRegularLTO(BitcodeModule BM, if (Res.Prevailing) { if (Sym.isUndefined()) continue; - Keep.push_back(GV); - switch (GV->getLinkage()) { - default: - break; - case GlobalValue::LinkOnceAnyLinkage: + Mod.Keep.push_back(GV); + // For symbols re-defined with linker -wrap and -defsym options, + // set the linkage to weak to inhibit IPO. The linkage will be + // restored by the linker. + if (Res.LinkerRedefined) GV->setLinkage(GlobalValue::WeakAnyLinkage); - break; - case GlobalValue::LinkOnceODRLinkage: - GV->setLinkage(GlobalValue::WeakODRLinkage); - break; - } + + GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage(); + if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) + GV->setLinkage(GlobalValue::getWeakLinkage( + GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); } else if (isa(GV) && (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() || GV->hasAvailableExternallyLinkage()) && !AliasedGlobals.count(cast(GV))) { - // Either of the above three types of linkage indicates that the + // Any of the above three types of linkage indicates that the // chosen prevailing symbol will have the same semantics as this copy of - // the symbol, so we can link it with available_externally linkage. We - // only need to do this if the symbol is undefined. - GlobalValue *CombinedGV = - RegularLTO.CombinedModule->getNamedValue(GV->getName()); - if (!CombinedGV || CombinedGV->isDeclaration()) { - Keep.push_back(GV); - GV->setLinkage(GlobalValue::AvailableExternallyLinkage); - cast(GV)->setComdat(nullptr); - } + // the symbol, so we may be able to link it with available_externally + // linkage. We will decide later whether to do that when we link this + // module (in linkRegularLTO), based on whether it is undefined. + Mod.Keep.push_back(GV); + GV->setLinkage(GlobalValue::AvailableExternallyLinkage); + cast(GV)->setComdat(nullptr); } } // Common resolution: collect the maximum size/alignment over all commons. @@ -578,27 +575,54 @@ Error LTO::addRegularLTO(BitcodeModule BM, // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit. } assert(MsymI == MsymE); + return std::move(Mod); +} - return RegularLTO.Mover->move(std::move(*MOrErr), Keep, +Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod, + bool LivenessFromIndex) { + if (!RegularLTO.CombinedModule) { + RegularLTO.CombinedModule = + llvm::make_unique("ld-temp.o", RegularLTO.Ctx); + RegularLTO.Mover = llvm::make_unique(*RegularLTO.CombinedModule); + } + + std::vector Keep; + for (GlobalValue *GV : Mod.Keep) { + if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) + continue; + + if (!GV->hasAvailableExternallyLinkage()) { + Keep.push_back(GV); + continue; + } + + // Only link available_externally definitions if we don't already have a + // definition. + GlobalValue *CombinedGV = + RegularLTO.CombinedModule->getNamedValue(GV->getName()); + if (CombinedGV && !CombinedGV->isDeclaration()) + continue; + + Keep.push_back(GV); + } + + return RegularLTO.Mover->move(std::move(Mod.M), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, /* IsPerformingImport */ false); } -// Add a ThinLTO object to the link. -Error LTO::addThinLTO(BitcodeModule BM, - ArrayRef Syms, +// Add a ThinLTO module to the link. +Error LTO::addThinLTO(BitcodeModule BM, ArrayRef Syms, const SymbolResolution *&ResI, const SymbolResolution *ResE) { - Expected> SummaryOrErr = BM.getSummary(); - if (!SummaryOrErr) - return SummaryOrErr.takeError(); - ThinLTO.CombinedIndex.mergeFrom(std::move(*SummaryOrErr), - ThinLTO.ModuleMap.size()); + if (Error Err = + BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(), + ThinLTO.ModuleMap.size())) + return Err; for (const InputFile::Symbol &Sym : Syms) { assert(ResI != ResE); SymbolResolution Res = *ResI++; - addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1); if (Res.Prevailing) { if (!Sym.getIRName().empty()) { @@ -623,10 +647,24 @@ unsigned LTO::getMaxTasks() const { } Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { + // Compute "dead" symbols, we don't want to import/export these! + DenseSet GUIDPreservedSymbols; + for (auto &Res : GlobalResolutions) { + if (Res.second.VisibleOutsideSummary && + // IRName will be defined if we have seen the prevailing copy of + // this value. If not, no need to preserve any ThinLTO copies. + !Res.second.IRName.empty()) + GUIDPreservedSymbols.insert(GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(Res.second.IRName))); + } + + computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); + // Save the status of having a regularLTO combined module, as // this is needed for generating the ThinLTO Task ID, and // the CombinedModule will be moved at the end of runRegularLTO. - bool HasRegularLTO = RegularLTO.CombinedModule != nullptr; + bool HasRegularLTO = RegularLTO.CombinedModule != nullptr || + !RegularLTO.ModsWithSummaries.empty(); // Invoke regular LTO if there was a regular LTO module to start with. if (HasRegularLTO) if (auto E = runRegularLTO(AddStream)) @@ -635,6 +673,11 @@ Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { } Error LTO::runRegularLTO(AddStreamFn AddStream) { + for (auto &M : RegularLTO.ModsWithSummaries) + if (Error Err = linkRegularLTO(std::move(M), + /*LivenessFromIndex=*/true)) + return Err; + // Make sure commons have the right size/alignment: we kept the largest from // all the prevailing when adding the inputs, and we apply it here. const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout(); @@ -964,22 +1007,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, StringMap> ResolvedODR; if (Conf.OptLevel > 0) { - // Compute "dead" symbols, we don't want to import/export these! - DenseSet GUIDPreservedSymbols; - for (auto &Res : GlobalResolutions) { - if (Res.second.VisibleOutsideThinLTO && - // IRName will be defined if we have seen the prevailing copy of - // this value. If not, no need to preserve any ThinLTO copies. - !Res.second.IRName.empty()) - GUIDPreservedSymbols.insert(GlobalValue::getGUID( - GlobalValue::getRealLinkageName(Res.second.IRName))); - } - - auto DeadSymbols = - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols); - ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, - ImportLists, ExportLists, &DeadSymbols); + ImportLists, ExportLists); std::set ExportedGUIDs; for (auto &Res : GlobalResolutions) { @@ -992,16 +1021,12 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, if (Res.second.IRName.empty()) continue; auto GUID = GlobalValue::getGUID( - GlobalValue::getRealLinkageName(Res.second.IRName)); + GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); // Mark exported unless index-based analysis determined it to be dead. - if (!DeadSymbols.count(GUID)) + if (ThinLTO.CombinedIndex.isGUIDLive(GUID)) ExportedGUIDs.insert(GUID); } - auto isPrevailing = [&](GlobalValue::GUID GUID, - const GlobalValueSummary *S) { - return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); - }; auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && @@ -1009,17 +1034,20 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ExportedGUIDs.count(GUID); }; thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); - - auto recordNewLinkage = [&](StringRef ModuleIdentifier, - GlobalValue::GUID GUID, - GlobalValue::LinkageTypes NewLinkage) { - ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; - }; - - thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, - recordNewLinkage); } + auto isPrevailing = [&](GlobalValue::GUID GUID, + const GlobalValueSummary *S) { + return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + }; + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, + recordNewLinkage); + std::unique_ptr BackendProc = ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp index 4bd251f727a4353da699102e9c1ee4e0b5bdac18..3f72e446cdf2e81b47602ef62dc32c10f6bc6132 100644 --- a/lib/LTO/LTOBackend.cpp +++ b/lib/LTO/LTOBackend.cpp @@ -25,7 +25,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/LTO/LTO.h" -#include "llvm/LTO/legacy/UpdateCompilerUsed.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/ModuleSymbolTable.h" #include "llvm/Passes/PassBuilder.h" @@ -43,11 +42,6 @@ using namespace llvm; using namespace lto; -static cl::opt - LTOUseNewPM("lto-use-new-pm", - cl::desc("Run LTO passes using the new pass manager"), - cl::init(false), cl::Hidden); - LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { errs() << "failed to open " << Path << ": " << Msg << '\n'; errs().flush(); @@ -118,19 +112,27 @@ Error Config::addSaveTemps(std::string OutputFileName, namespace { std::unique_ptr -createTargetMachine(Config &Conf, StringRef TheTriple, - const Target *TheTarget) { +createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) { + StringRef TheTriple = M.getTargetTriple(); SubtargetFeatures Features; Features.getDefaultSubtargetFeatures(Triple(TheTriple)); for (const std::string &A : Conf.MAttrs) Features.AddFeature(A); + Reloc::Model RelocModel; + if (Conf.RelocModel) + RelocModel = *Conf.RelocModel; + else + RelocModel = + M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; + return std::unique_ptr(TheTarget->createTargetMachine( - TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel, + TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel, Conf.CodeModel, Conf.CGOptLevel)); } -static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel) { +static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel, + bool IsThinLTO) { PassBuilder PB(TM); AAManager AA; @@ -174,7 +176,10 @@ static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel) { break; } - MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); + if (IsThinLTO) + MPM = PB.buildThinLTODefaultPipeline(OL, false /* DebugLogging */); + else + MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */); MPM.run(Mod, MAM); // FIXME (davide): verify the output. @@ -252,17 +257,12 @@ static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM, bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) { - // There's still no ThinLTO pipeline hooked up in the new pass manager, - // once there is one, we can just remove this. - if (LTOUseNewPM && IsThinLTO) - report_fatal_error("ThinLTO not supported with the new PM yet!"); - // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline, Conf.DisableVerify); - else if (LTOUseNewPM) - runNewPMPasses(Mod, TM, Conf.OptLevel); + else if (Conf.UseNewPM) + runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO); else runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary); return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); @@ -312,7 +312,7 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream, std::unique_ptr MPartInCtx = std::move(MOrErr.get()); std::unique_ptr TM = - createTargetMachine(C, MPartInCtx->getTargetTriple(), T); + createTargetMachine(C, T, *MPartInCtx); codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx); }, @@ -353,19 +353,6 @@ finalizeOptimizationRemarks(std::unique_ptr DiagOutputFile) { DiagOutputFile->os().flush(); } -static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) { - // Collect the list of undefined symbols used in asm and update - // llvm.compiler.used to prevent optimization to drop these from the output. - StringSet<> AsmUndefinedRefs; - ModuleSymbolTable::CollectAsmSymbols( - Mod, - [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) { - if (Flags & object::BasicSymbolRef::SF_Undefined) - AsmUndefinedRefs.insert(Name); - }); - updateCompilerUsed(Mod, TM, AsmUndefinedRefs); -} - Error lto::backend(Config &C, AddStreamFn AddStream, unsigned ParallelCodeGenParallelismLevel, std::unique_ptr Mod, @@ -374,10 +361,7 @@ Error lto::backend(Config &C, AddStreamFn AddStream, if (!TOrErr) return TOrErr.takeError(); - std::unique_ptr TM = - createTargetMachine(C, Mod->getTargetTriple(), *TOrErr); - - handleAsmUndefinedRefs(*Mod, *TM); + std::unique_ptr TM = createTargetMachine(C, *TOrErr, *Mod); // Setup optimization remarks. auto DiagFileOrErr = lto::setupOptimizationRemarks( @@ -413,10 +397,7 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, if (!TOrErr) return TOrErr.takeError(); - std::unique_ptr TM = - createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr); - - handleAsmUndefinedRefs(Mod, *TM); + std::unique_ptr TM = createTargetMachine(Conf, *TOrErr, Mod); if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 86fba843e980bf1fdc27f486e9a5416e87a44048..6a275560dc92b177a8d9bca9f077f9140d5a7182 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -495,17 +495,14 @@ void LTOCodeGenerator::verifyMergedModuleOnce() { return; HasVerifiedInput = true; - if (LTOStripInvalidDebugInfo) { - bool BrokenDebugInfo = false; - if (verifyModule(*MergedModule, &dbgs(), &BrokenDebugInfo)) - report_fatal_error("Broken module found, compilation aborted!"); - if (BrokenDebugInfo) { - emitWarning("Invalid debug info found, debug info will be stripped"); - StripDebugInfo(*MergedModule); - } - } - if (verifyModule(*MergedModule, &dbgs())) + bool BrokenDebugInfo = false; + if (verifyModule(*MergedModule, &dbgs(), + LTOStripInvalidDebugInfo ? &BrokenDebugInfo : nullptr)) report_fatal_error("Broken module found, compilation aborted!"); + if (BrokenDebugInfo) { + emitWarning("Invalid debug info found, debug info will be stripped"); + StripDebugInfo(*MergedModule); + } } void LTOCodeGenerator::finishOptimizationRemarks() { @@ -600,6 +597,7 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef Out) { // If statistics were requested, print them out after codegen. if (llvm::AreStatisticsEnabled()) llvm::PrintStatistics(); + reportAndResetTimings(); finishOptimizationRemarks(); diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 11f0982c6a6029fa45a5208f6df87b9d2cc7cd95..3cc8b7d0e7706dafa966fe3d308bcb038ea34342 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -77,14 +77,12 @@ bool LTOModule::isBitcodeFile(StringRef Path) { } bool LTOModule::isThinLTO() { - // Right now the detection is only based on the summary presence. We may want - // to add a dedicated flag at some point. - Expected Result = hasGlobalValueSummary(MBRef); + Expected Result = getBitcodeLTOInfo(MBRef); if (!Result) { logAllUnhandledErrors(Result.takeError(), errs(), ""); return false; } - return *Result; + return Result->IsThinLTO; } bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, @@ -637,10 +635,10 @@ void LTOModule::parseMetadata() { raw_string_ostream OS(LinkerOpts); // Linker Options - if (Metadata *Val = getModule().getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast(Val); + if (NamedMDNode *LinkerOptions = + getModule().getNamedMetadata("llvm.linker.options")) { for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { - MDNode *MDOptions = cast(LinkerOptions->getOperand(i)); + MDNode *MDOptions = LinkerOptions->getOperand(i); for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { MDString *MDOption = cast(MDOptions->getOperand(ii)); OS << " " << MDOption->getString(); diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index 970c75e4aceac3ef9e6072cf3ee02b363f09cd4f..e4094d44867b2d5ef057ce6cd5408d6c991e4ac9 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -14,10 +14,6 @@ #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h" -#ifdef HAVE_LLVM_REVISION -#include "LLVMLTORevision.h" -#endif - #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" @@ -28,16 +24,17 @@ #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" #include "llvm/LTO/LTO.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -47,6 +44,7 @@ #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/VCSRevision.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionImport.h" @@ -66,6 +64,7 @@ namespace llvm { extern cl::opt LTODiscardValueNames; extern cl::opt LTORemarksFilename; extern cl::opt LTOPassRemarksWithHotness; +extern cl::opt LTOStripInvalidDebugInfo; } namespace { @@ -123,8 +122,9 @@ static void computePrevailingCopies( }; for (auto &I : Index) { - if (HasMultipleCopies(I.second)) - PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second); + if (HasMultipleCopies(I.second.SummaryList)) + PrevailingCopy[I.first] = + getFirstDefinitionForLinker(I.second.SummaryList); } } @@ -145,6 +145,30 @@ static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) { report_fatal_error("renameModuleForThinLTO failed"); } +namespace { +class ThinLTODiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; +public: + ThinLTODiagnosticInfo(const Twine &DiagMsg, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {} + void print(DiagnosticPrinter &DP) const override { DP << Msg; } +}; +} + +/// Verify the module and strip broken debug info. +static void verifyLoadedModule(Module &TheModule) { + bool BrokenDebugInfo = false; + if (verifyModule(TheModule, &dbgs(), + LTOStripInvalidDebugInfo ? &BrokenDebugInfo : nullptr)) + report_fatal_error("Broken module found, compilation aborted!"); + if (BrokenDebugInfo) { + TheModule.getContext().diagnose(ThinLTODiagnosticInfo( + "Invalid debug info found, debug info will be stripped", DS_Warning)); + StripDebugInfo(TheModule); + } +} + static std::unique_ptr loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, bool Lazy, bool IsImporting) { @@ -162,6 +186,8 @@ loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, }); report_fatal_error("Can't load module, abort."); } + if (!Lazy) + verifyLoadedModule(*ModuleOrErr.get()); return std::move(ModuleOrErr.get()); } @@ -184,6 +210,8 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index, }); report_fatal_error("importFunctions failed"); } + // Verify again after cross-importing. + verifyLoadedModule(TheModule); } static void optimizeModule(Module &TheModule, TargetMachine &TM, @@ -198,7 +226,8 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM, PMB.OptLevel = OptLevel; PMB.LoopVectorize = true; PMB.SLPVectorize = true; - PMB.VerifyInput = true; + // Already did this in verifyLoadedModule(). + PMB.VerifyInput = false; PMB.VerifyOutput = false; legacy::PassManager PM; @@ -304,7 +333,7 @@ public: // Start with the compiler revision Hasher.update(LLVM_VERSION_STRING); -#ifdef HAVE_LLVM_REVISION +#ifdef LLVM_REVISION Hasher.update(LLVM_REVISION); #endif @@ -449,7 +478,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index, { raw_svector_ostream OS(OutputBuffer); ProfileSummaryInfo PSI(TheModule); - auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr); + auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI); WriteBitcodeToFile(&TheModule, OS, true, &Index); } return make_unique(std::move(OutputBuffer)); @@ -508,29 +537,25 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) { ThinLTOBuffer Buffer(Data, Identifier); - if (Modules.empty()) { - // First module added, so initialize the triple and some options - LLVMContext Context; - StringRef TripleStr; - ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( - Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); - if (TripleOrErr) - TripleStr = *TripleOrErr; - Triple TheTriple(TripleStr); + LLVMContext Context; + StringRef TripleStr; + ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( + Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); + + if (TripleOrErr) + TripleStr = *TripleOrErr; + + Triple TheTriple(TripleStr); + + if (Modules.empty()) initTMBuilder(TMBuilder, Triple(TheTriple)); + else if (TMBuilder.TheTriple != TheTriple) { + if (!TMBuilder.TheTriple.isCompatibleWith(TheTriple)) + report_fatal_error("ThinLTO modules with incompatible triples not " + "supported"); + initTMBuilder(TMBuilder, Triple(TMBuilder.TheTriple.merge(TheTriple))); } -#ifndef NDEBUG - else { - LLVMContext Context; - StringRef TripleStr; - ErrorOr TripleOrErr = expectedToErrorOrAndEmitErrors( - Context, getBitcodeTargetTriple(Buffer.getMemBuffer())); - if (TripleOrErr) - TripleStr = *TripleOrErr; - assert(TMBuilder.TheTriple.str() == TripleStr && - "ThinLTO modules with different triple not supported"); - } -#endif + Modules.push_back(Buffer); } @@ -569,25 +594,18 @@ std::unique_ptr TargetMachineBuilder::create() const { * "thin-link". */ std::unique_ptr ThinLTOCodeGenerator::linkCombinedIndex() { - std::unique_ptr CombinedIndex; + std::unique_ptr CombinedIndex = + llvm::make_unique(); uint64_t NextModuleId = 0; for (auto &ModuleBuffer : Modules) { - Expected> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create( - ModuleBuffer.getMemBuffer()); - if (!ObjOrErr) { + if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(), + *CombinedIndex, NextModuleId++)) { // FIXME diagnose logAllUnhandledErrors( - ObjOrErr.takeError(), errs(), - "error: can't create ModuleSummaryIndexObjectFile for buffer: "); + std::move(Err), errs(), + "error: can't create module summary index for buffer: "); return nullptr; } - auto Index = (*ObjOrErr)->takeIndex(); - if (CombinedIndex) { - CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId); - } else { - CombinedIndex = std::move(Index); - } } return CombinedIndex; } @@ -610,13 +628,13 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); // Resolve LinkOnce/Weak symbols. StringMap> ResolvedODR; @@ -655,13 +673,13 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule, PreservedSymbols, Triple(TheModule.getTargetTriple())); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); auto &ImportList = ImportLists[TheModule.getModuleIdentifier()]; crossImportIntoModule(TheModule, Index, ModuleMap, ImportList); @@ -732,13 +750,13 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule, Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols); + computeDeadSymbols(Index, GUIDPreservedSymbols); // Generate import/export list StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); auto &ExportList = ExportLists[ModuleIdentifier]; // Be friendly and don't nuke totally the module when the client didn't @@ -884,14 +902,14 @@ void ThinLTOCodeGenerator::run() { computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple); // Compute "dead" symbols, we don't want to import/export these! - auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols); + computeDeadSymbols(*Index, GUIDPreservedSymbols); // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap ImportLists(ModuleCount); StringMap ExportLists(ModuleCount); ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists, - ExportLists, &DeadSymbols); + ExportLists); // We use a std::map here to be able to have a defined ordering when // producing a hash for the cache entry. @@ -1034,4 +1052,5 @@ void ThinLTOCodeGenerator::run() { // If statistics were requested, print them out now. if (llvm::AreStatisticsEnabled()) llvm::PrintStatistics(); + reportAndResetTimings(); } diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index 15a46a2d0420f1d368a1b31a8f9ba4a2e6e0dc81..defad1904989d2d0c037ffa9398714f2752c73a8 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -602,6 +602,7 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) { /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(), SGVar->getType()->getAddressSpace()); NewDGV->setAlignment(SGVar->getAlignment()); + NewDGV->copyAttributesFrom(SGVar); return NewDGV; } @@ -610,8 +611,11 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) { Function *IRLinker::copyFunctionProto(const Function *SF) { // If there is no linkage to be performed or we are linking from the source, // bring SF over. - return Function::Create(TypeMap.get(SF->getFunctionType()), - GlobalValue::ExternalLinkage, SF->getName(), &DstM); + auto *F = + Function::Create(TypeMap.get(SF->getFunctionType()), + GlobalValue::ExternalLinkage, SF->getName(), &DstM); + F->copyAttributesFrom(SF); + return F; } /// Set up prototypes for any aliases that come over from the source module. @@ -619,9 +623,11 @@ GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) { // If there is no linkage to be performed or we're linking from the source, // bring over SGA. auto *Ty = TypeMap.get(SGA->getValueType()); - return GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(), - GlobalValue::ExternalLinkage, SGA->getName(), - &DstM); + auto *GA = + GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(), + GlobalValue::ExternalLinkage, SGA->getName(), &DstM); + GA->copyAttributesFrom(SGA); + return GA; } GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, @@ -648,8 +654,6 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, else if (SGV->hasExternalWeakLinkage()) NewGV->setLinkage(GlobalValue::ExternalWeakLinkage); - NewGV->copyAttributesFrom(SGV); - if (auto *NewGO = dyn_cast(NewGV)) { // Metadata for global variables and function declarations is copied eagerly. if (isa(SGV) || SGV->isDeclaration()) @@ -1153,6 +1157,11 @@ Error IRLinker::linkModuleFlagsMetadata() { mdconst::extract(DstOp->getOperand(0)); unsigned DstBehaviorValue = DstBehavior->getZExtValue(); + auto overrideDstValue = [&]() { + DstModFlags->setOperand(DstIndex, SrcOp); + Flags[ID].first = SrcOp; + }; + // If either flag has override behavior, handle it first. if (DstBehaviorValue == Module::Override) { // Diagnose inconsistent flags which both have override behavior. @@ -1163,8 +1172,7 @@ Error IRLinker::linkModuleFlagsMetadata() { continue; } else if (SrcBehaviorValue == Module::Override) { // Update the destination flag to that of the source. - DstModFlags->setOperand(DstIndex, SrcOp); - Flags[ID].first = SrcOp; + overrideDstValue(); continue; } @@ -1200,6 +1208,15 @@ Error IRLinker::linkModuleFlagsMetadata() { } continue; } + case Module::Max: { + ConstantInt *DstValue = + mdconst::extract(DstOp->getOperand(2)); + ConstantInt *SrcValue = + mdconst::extract(SrcOp->getOperand(2)); + if (SrcValue->getZExtValue() > DstValue->getZExtValue()) + overrideDstValue(); + break; + } case Module::Append: { MDNode *DstValue = cast(DstOp->getOperand(2)); MDNode *SrcValue = cast(SrcOp->getOperand(2)); @@ -1239,27 +1256,6 @@ Error IRLinker::linkModuleFlagsMetadata() { return Error::success(); } -// This function returns true if the triples match. -static bool triplesMatch(const Triple &T0, const Triple &T1) { - // If vendor is apple, ignore the version number. - if (T0.getVendor() == Triple::Apple) - return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() && - T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS(); - - return T0 == T1; -} - -// This function returns the merged triple. -static std::string mergeTriples(const Triple &SrcTriple, - const Triple &DstTriple) { - // If vendor is apple, pick the triple with the larger version number. - if (SrcTriple.getVendor() == Triple::Apple) - if (DstTriple.isOSVersionLT(SrcTriple)) - return SrcTriple.str(); - - return DstTriple.str(); -} - Error IRLinker::run() { // Ensure metadata materialized before value mapping. if (SrcM->getMaterializer()) @@ -1285,14 +1281,15 @@ Error IRLinker::run() { Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM.getTargetTriple()); - if (!SrcM->getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple)) + if (!SrcM->getTargetTriple().empty()&& + !SrcTriple.isCompatibleWith(DstTriple)) emitWarning("Linking two modules of different target triples: " + SrcM->getModuleIdentifier() + "' is '" + SrcM->getTargetTriple() + "' whereas '" + DstM.getModuleIdentifier() + "' is '" + DstM.getTargetTriple() + "'\n"); - DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple)); + DstM.setTargetTriple(SrcTriple.merge(DstTriple)); // Append the module inline asm string. if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) { diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp index 8c94e2780998f1851048b1f9bf620669e6a802de..ca5440237e49817b11c069355660d2d14a0da8d5 100644 --- a/lib/MC/ConstantPools.cpp +++ b/lib/MC/ConstantPools.cpp @@ -57,6 +57,10 @@ const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context, bool ConstantPool::empty() { return Entries.empty(); } +void ConstantPool::clearCache() { + CachedEntries.clear(); +} + // // AssemblerConstantPools implementation // @@ -98,6 +102,13 @@ void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) { } } +void AssemblerConstantPools::clearCacheForCurrentSection(MCStreamer &Streamer) { + MCSection *Section = Streamer.getCurrentSectionOnly(); + if (ConstantPool *CP = getConstantPool(Section)) { + CP->clearCache(); + } +} + const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer, const MCExpr *Expr, unsigned Size, SMLoc Loc) { diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index ee9c25cda94fd89815e83328fb1419786eb203b3..4d139132df4621d03a2ae2f3ff45be131722bd3f 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -13,11 +13,12 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -36,7 +37,6 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compression.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -63,7 +63,7 @@ using namespace llvm; namespace { -typedef DenseMap SectionIndexMapTy; +using SectionIndexMapTy = DenseMap; class ELFObjectWriter; @@ -194,8 +194,8 @@ public: ELFSymbolData &MSD, const MCAsmLayout &Layout); // Start and end offset of each section - typedef std::map> - SectionOffsetsTy; + using SectionOffsetsTy = + std::map>; bool shouldRelocateWithSymbol(const MCAssembler &Asm, const MCSymbolRefExpr *RefA, @@ -208,7 +208,7 @@ public: uint64_t &FixedValue) override; // Map from a signature symbol to the group section index - typedef DenseMap RevGroupMapTy; + using RevGroupMapTy = DenseMap; /// Compute the symbol table data /// @@ -1020,18 +1020,24 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, MCSectionELF &Section = static_cast(Sec); StringRef SectionName = Section.getSectionName(); + auto &MC = Asm.getContext(); + const auto &MAI = MC.getAsmInfo(); + // Compressing debug_frame requires handling alignment fragments which is // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow // for writing to arbitrary buffers) for little benefit. bool CompressionEnabled = - Asm.getContext().getAsmInfo()->compressDebugSections() != - DebugCompressionType::DCT_None; + MAI->compressDebugSections() != DebugCompressionType::None; if (!CompressionEnabled || !SectionName.startswith(".debug_") || SectionName == ".debug_frame") { Asm.writeSectionData(&Section, Layout); return; } + assert((MAI->compressDebugSections() == DebugCompressionType::Z || + MAI->compressDebugSections() == DebugCompressionType::GNU) && + "expected zlib or zlib-gnu style compression"); + SmallVector UncompressedData; raw_svector_ostream VecOS(UncompressedData); raw_pwrite_stream &OldStream = getStream(); @@ -1048,8 +1054,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, return; } - bool ZlibStyle = Asm.getContext().getAsmInfo()->compressDebugSections() == - DebugCompressionType::DCT_Zlib; + bool ZlibStyle = MAI->compressDebugSections() == DebugCompressionType::Z; if (!maybeWriteCompression(UncompressedData.size(), CompressedContents, ZlibStyle, Sec.getAlignment())) { getStream() << UncompressedData; @@ -1061,8 +1066,7 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec, Section.setFlags(Section.getFlags() | ELF::SHF_COMPRESSED); else // Add "z" prefix to section name. This is zlib-gnu style. - Asm.getContext().renameELFSection(&Section, - (".z" + SectionName.drop_front(1)).str()); + MC.renameELFSection(&Section, (".z" + SectionName.drop_front(1)).str()); getStream() << CompressedContents; } diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index fc0aa788f6d3a88e02c4bf650bd0bb6116dd833c..3642f37aa855c4bfe69486b6883963bde40f347c 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixupKindInfo.h" #include #include diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index b9be685cedc4139dc2d214bd69b98b6baf7a29f6..f05904048e0b7d4504a74a037309bbd35463de3f 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 4b2001764e972b0515971c61716d7c136bb1516d..c74840982fb75d19fae52b492531ed6f4601f82a 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/Support/MachO.h" using namespace llvm; diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp index e44c08b50d766b4c9b9ca1638c41da60887daa20..b0dc43c6c868fb759e78390effde44bc110419a6 100644 --- a/lib/MC/MCAsmInfoELF.cpp +++ b/lib/MC/MCAsmInfoELF.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" using namespace llvm; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 92dcf535ec996a32a02250f5d2a86da1eb853c6c..9e5553fa8d422ac53ff560dd31d1912c428df318 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -103,7 +103,10 @@ public: void AddComment(const Twine &T, bool EOL = true) override; /// AddEncodingComment - Add a comment showing the encoding of an instruction. - void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &); + /// If PrintSchedInfo - is true then the comment sched:[x:y] should + // be added to output if it's being supported by target + void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &, + bool PrintSchedInfo); /// GetCommentOS - Return a raw_ostream that comments can be written to. /// Unlike AddComment, you are required to terminate comments with \n if you @@ -278,7 +281,8 @@ public: void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) override; void EmitWinEHHandlerData() override; - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool PrintSchedInfo) override; void EmitBundleAlignMode(unsigned AlignPow2) override; void EmitBundleLock(bool AlignToEnd) override; @@ -1504,7 +1508,8 @@ void MCAsmStreamer::EmitWinCFIEndProlog() { } void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, + bool PrintSchedInfo) { raw_ostream &OS = GetCommentOS(); SmallString<256> Code; SmallVector Fixups; @@ -1577,7 +1582,11 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, } } } - OS << "]\n"; + OS << "]"; + // If we are not going to add fixup or schedul comments after this point then + // we have to end the current comment line with "\n". + if (Fixups.size() || !PrintSchedInfo) + OS << "\n"; for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { MCFixup &F = Fixups[i]; @@ -1588,16 +1597,19 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, } void MCAsmStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, + bool PrintSchedInfo) { assert(getCurrentSectionOnly() && "Cannot emit contents before setting section!"); // Show the encoding in a comment if we have a code emitter. if (Emitter) - AddEncodingComment(Inst, STI); + AddEncodingComment(Inst, STI, PrintSchedInfo); // Show the MCInst if enabled. if (ShowInst) { + if (PrintSchedInfo) + GetCommentOS() << "\n"; Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n "); GetCommentOS() << "\n"; } @@ -1607,6 +1619,16 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst, else InstPrinter->printInst(&Inst, OS, "", STI); + if (PrintSchedInfo) { + std::string SI = STI.getSchedInfoStr(Inst); + if (!SI.empty()) + GetCommentOS() << SI; + } + + StringRef Comments = CommentToEmit; + if (Comments.size() && Comments.back() != '\n') + GetCommentOS() << "\n"; + EmitEOL(); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index c2bb7b2771814295bde62d3eed227bdb69dd34a4..53cdaac3aa54bb841430f79295f658ef9a8add7c 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAssembler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -16,7 +17,6 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -37,9 +37,9 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include +#include #include #include diff --git a/lib/MC/MCCodeView.cpp b/lib/MC/MCCodeView.cpp index 99a5c11a498e9863342b5cac07dfbc6b8ebf7b2b..92b1e12da5525e0e497a972fc7df718830d0e89f 100644 --- a/lib/MC/MCCodeView.cpp +++ b/lib/MC/MCCodeView.cpp @@ -12,15 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCCodeView.h" -#include "llvm/MC/MCAsmLayout.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/EndianStream.h" using namespace llvm; @@ -145,7 +145,7 @@ void CodeViewContext::emitStringTable(MCObjectStreamer &OS) { MCSymbol *StringBegin = Ctx.createTempSymbol("strtab_begin", false), *StringEnd = Ctx.createTempSymbol("strtab_end", false); - OS.EmitIntValue(unsigned(ModuleSubstreamKind::StringTable), 4); + OS.EmitIntValue(unsigned(DebugSubsectionKind::StringTable), 4); OS.emitAbsoluteSymbolDiff(StringEnd, StringBegin, 4); OS.EmitLabel(StringBegin); @@ -172,7 +172,7 @@ void CodeViewContext::emitFileChecksums(MCObjectStreamer &OS) { MCSymbol *FileBegin = Ctx.createTempSymbol("filechecksums_begin", false), *FileEnd = Ctx.createTempSymbol("filechecksums_end", false); - OS.EmitIntValue(unsigned(ModuleSubstreamKind::FileChecksums), 4); + OS.EmitIntValue(unsigned(DebugSubsectionKind::FileChecksums), 4); OS.emitAbsoluteSymbolDiff(FileEnd, FileBegin, 4); OS.EmitLabel(FileBegin); @@ -197,7 +197,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, MCSymbol *LineBegin = Ctx.createTempSymbol("linetable_begin", false), *LineEnd = Ctx.createTempSymbol("linetable_end", false); - OS.EmitIntValue(unsigned(ModuleSubstreamKind::Lines), 4); + OS.EmitIntValue(unsigned(DebugSubsectionKind::Lines), 4); OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4); OS.EmitLabel(LineBegin); OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0); @@ -208,7 +208,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS, bool HaveColumns = any_of(Locs, [](const MCCVLineEntry &LineEntry) { return LineEntry.getColumn() != 0; }); - OS.EmitIntValue(HaveColumns ? int(LineFlags::HaveColumns) : 0, 2); + OS.EmitIntValue(HaveColumns ? int(LF_HaveColumns) : 0, 2); OS.emitAbsoluteSymbolDiff(FuncEnd, FuncBegin, 4); for (auto I = Locs.begin(), E = Locs.end(); I != E;) { diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 4628d0ab88f30d3d0b8c3f8f5acf6327eca04b57..48ee84edb096b0cfe2dc9524c3309ddb4aa9b7df 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -7,14 +7,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCContext.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" @@ -32,14 +34,12 @@ #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index aa5072743bdfeccb25d40a67790b18ab9a27e31d..ef1d8335e1bd71968c8d5915c2226307c17feed1 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -27,8 +27,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp index 5805fd7007d2cb60306b305142c1796a933f3c77..8f932a3f0d48771f5322ac508ffb48f15c6cba7f 100644 --- a/lib/MC/MCDisassembler/MCRelocationInfo.cpp +++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" -#include "llvm/Support/TargetRegistry.h" #include "llvm-c/Disassembler.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index cc32e90ad36ee023ea83421094562468178890e8..a2beee32f2cb133dbaf3907d6645cf7d3ec8c186 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -7,19 +7,20 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCDwarf.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Config/config.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" @@ -28,7 +29,6 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" @@ -168,7 +168,7 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section, // and the current Label. const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, - asmInfo->getPointerSize()); + asmInfo->getCodePointerSize()); Discriminator = 0; LastLine = LineEntry.getLine(); @@ -188,7 +188,7 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section, const MCAsmInfo *AsmInfo = Ctx.getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd, - AsmInfo->getPointerSize()); + AsmInfo->getCodePointerSize()); } // @@ -594,7 +594,7 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, // Figure the padding after the header before the table of address and size // pairs who's values are PointerSize'ed. const MCAsmInfo *asmInfo = context.getAsmInfo(); - int AddrSize = asmInfo->getPointerSize(); + int AddrSize = asmInfo->getCodePointerSize(); int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1)); if (Pad == 2 * AddrSize) Pad = 0; @@ -677,7 +677,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, // The DWARF v5 header has unit type, address size, abbrev offset. // Earlier versions have abbrev offset, address size. const MCAsmInfo &AsmInfo = *context.getAsmInfo(); - int AddrSize = AsmInfo.getPointerSize(); + int AddrSize = AsmInfo.getCodePointerSize(); if (context.getDwarfVersion() >= 5) { MCOS->EmitIntValue(dwarf::DW_UT_compile, 1); MCOS->EmitIntValue(AddrSize, 1); @@ -823,7 +823,7 @@ static void EmitGenDwarfRanges(MCStreamer *MCOS) { auto &Sections = context.getGenDwarfSectionSyms(); const MCAsmInfo *AsmInfo = context.getAsmInfo(); - int AddrSize = AsmInfo->getPointerSize(); + int AddrSize = AsmInfo->getCodePointerSize(); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); @@ -981,7 +981,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, default: llvm_unreachable("Unknown Encoding"); case dwarf::DW_EH_PE_absptr: case dwarf::DW_EH_PE_signed: - return context.getAsmInfo()->getPointerSize(); + return context.getAsmInfo()->getCodePointerSize(); case dwarf::DW_EH_PE_udata2: case dwarf::DW_EH_PE_sdata2: return 2; @@ -1318,7 +1318,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality, if (CIEVersion >= 4) { // Address Size - Streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1); + Streamer.EmitIntValue(context.getAsmInfo()->getCodePointerSize(), 1); // Segment Descriptor Size Streamer.EmitIntValue(0, 1); @@ -1384,7 +1384,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality, InitialCFAOffset = CFAOffset; // Padding - Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize()); + Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getCodePointerSize()); Streamer.EmitLabel(sectionEnd); return *sectionStart; @@ -1453,7 +1453,7 @@ void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart, // The size of a .eh_frame section has to be a multiple of the alignment // since a null CIE is interpreted as the end. Old systems overaligned // .eh_frame, so we do too and account for it in the last FDE. - unsigned Align = LastInSection ? asmInfo->getPointerSize() : PCSize; + unsigned Align = LastInSection ? asmInfo->getCodePointerSize() : PCSize; Streamer.EmitValueToAlignment(Align); Streamer.EmitLabel(fdeEnd); @@ -1514,6 +1514,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, MCContext &Context = Streamer.getContext(); const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); + const MCAsmInfo *AsmInfo = Context.getAsmInfo(); FrameEmitterImpl Emitter(IsEH, Streamer); ArrayRef FrameArray = Streamer.getDwarfFrameInfos(); @@ -1525,7 +1526,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, if (Frame.CompactUnwindEncoding == 0) continue; if (!SectionEmitted) { Streamer.SwitchSection(MOFI->getCompactUnwindSection()); - Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize()); + Streamer.EmitValueToAlignment(AsmInfo->getCodePointerSize()); SectionEmitted = true; } NeedsEHFrameSection |= diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index c8e0223c0573b17c2193342d7b9dc8bc16fb6f5c..50c1f6e79f8a27f936391175e8c08229e107098b 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCELFStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFragment.h" @@ -27,10 +28,9 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 8149aa27327caad72df79ef806040101ae73cd9e..38a8af49c19491312f6fefc907a722e866ae728d 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCExpr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" @@ -655,8 +655,12 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, // the OS X assembler will completely drop the 4. We should probably // include it in the relocation or produce an error if that is not // possible. + // Allow constant expressions. if (!A && !B) return true; + // Allows aliases with zero offset. + if (Res.getConstant() == 0 && (!A || !B)) + return true; } } diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp index 90b44177cf5e8a49864bbaa807dd937007921e1b..f3d0eb55eecd4c6db2ef1a20e041a02c520f8552 100644 --- a/lib/MC/MCFragment.cpp +++ b/lib/MC/MCFragment.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCFragment.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 9121790959749cc2ae99e51ffde6a5dd324e159e..9296fcedb72b5ae6ffe3b6434b69f3213679b81d 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCInstPrinter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp index 566944c53548a61f623c12ea189eac6aed1e8672..280b5cf68c98599c5b6d9434c4e0a4469f6ffa9b 100644 --- a/lib/MC/MCInstrAnalysis.cpp +++ b/lib/MC/MCInstrAnalysis.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 1e9ef4163256ae505421ba44381e655a70df6946..674c7b9bf61975cf63ce80667c884f4d71f007fb 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -32,8 +32,8 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index d156f5d05a3160c0a2b6e87733d56a24285709cf..4db9a2c8d8de9070d47493d3185ac5a8f0565ae7 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 9f94264684f947bcf4e8696be69762ff16eca3fa..21c5516785efd2b9c77ba70a292282f4d5d2cc9e 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -10,6 +10,8 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" @@ -17,8 +19,6 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionWasm.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/ELF.h" using namespace llvm; @@ -241,6 +241,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { DwarfStrSection = Ctx->getMachOSection("__DWARF", "__debug_str", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "info_string"); + DwarfStrOffSection = + Ctx->getMachOSection("__DWARF", "__debug_str_offs", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata(), "section_str_off"); DwarfLocSection = Ctx->getMachOSection("__DWARF", "__debug_loc", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "section_debug_loc"); @@ -286,6 +289,10 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { ((CMModel == CodeModel::Large) ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); break; + case Triple::bpfel: + case Triple::bpfeb: + FDECFIEncoding = dwarf::DW_EH_PE_sdata8; + break; default: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; break; @@ -553,6 +560,11 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { DwarfAccelTypesSection = Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0); + // String Offset and Address Sections + DwarfStrOffSection = + Ctx->getELFSection(".debug_str_offsets", DebugSecType, 0); + DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0); + // Fission Sections DwarfInfoDWOSection = Ctx->getELFSection(".debug_info.dwo", DebugSecType, 0); @@ -569,7 +581,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) { Ctx->getELFSection(".debug_loc.dwo", DebugSecType, 0); DwarfStrOffDWOSection = Ctx->getELFSection(".debug_str_offsets.dwo", DebugSecType, 0); - DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0); // DWP Sections DwarfCUIndexSection = @@ -691,6 +702,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata(), "info_string"); + DwarfStrOffSection = Ctx->getCOFFSection( + ".debug_str_offsets", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata(), "section_str_off"); DwarfLocSection = Ctx->getCOFFSection( ".debug_loc", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -745,7 +761,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { ".debug_str_offsets.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); + SectionKind::getMetadata(), "section_str_off_dwo"); DwarfAddrSection = Ctx->getCOFFSection( ".debug_addr", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 726326be2ee1556056298d06789476b466267a6d..174397e273960066b428250e0eecdc9f85c7f41a 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -133,6 +133,11 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, // Avoid fixups when possible. int64_t AbsValue; if (Value->evaluateAsAbsolute(AbsValue, getAssembler())) { + if (!isUIntN(8 * Size, AbsValue) && !isIntN(8 * Size, AbsValue)) { + getContext().reportError( + Loc, "value evaluated as " + Twine(AbsValue) + " is out of range."); + return; + } EmitIntValue(AbsValue, Size); return; } @@ -238,7 +243,7 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const { } void MCObjectStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, bool) { MCStreamer::EmitInstruction(Inst, STI); MCSection *Sec = getCurrentSectionOnly(); diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 478b4e84e74ac5507b4054bd4c2e71be34d0aeb0..98ac48a23f91c5cbe4090ed6f3c7eafe873205db 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCFragment.h" #include "llvm/MC/MCSymbol.h" using namespace llvm; diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 38dadfe62135561798c08790cf4bd708115fb121..2b963607b8374ba92f31955fa8d4db93a1dd65cc 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SaveAndRestore.h" diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index e65ce9f0b9361bf69d61504eb9005cee653fdbf6..dad47e49e2c2062e2aaa3c6b619dbafc78e9b0ff 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -15,12 +15,13 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" #include "llvm/MC/MCContext.h" @@ -47,7 +48,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" @@ -134,7 +134,7 @@ struct ParseStatementInfo { SmallVectorImpl *AsmRewrites = nullptr; - ParseStatementInfo() = default; + ParseStatementInfo() = delete; ParseStatementInfo(SmallVectorImpl *rewrites) : AsmRewrites(rewrites) {} }; @@ -287,6 +287,8 @@ public: /// } private: + bool isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc); + void altMacroString(StringRef AltMacroStr, std::string &Res); bool parseStatement(ParseStatementInfo &Info, MCAsmParserSemaCallback *SI); bool parseCurlyBlockScope(SmallVectorImpl& AsmStrRewrites); @@ -412,7 +414,7 @@ private: DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE, DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED, DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE, - DK_MACROS_ON, DK_MACROS_OFF, + DK_MACROS_ON, DK_MACROS_OFF, DK_ALTMACRO, DK_NOALTMACRO, DK_MACRO, DK_EXITM, DK_ENDM, DK_ENDMACRO, DK_PURGEM, DK_SLEB128, DK_ULEB128, DK_ERR, DK_ERROR, DK_WARNING, @@ -484,7 +486,8 @@ private: bool parseDirectiveEndMacro(StringRef Directive); bool parseDirectiveMacro(SMLoc DirectiveLoc); bool parseDirectiveMacrosOnOff(StringRef Directive); - + // alternate macro mode directives + bool parseDirectiveAltmacro(StringRef Directive); // ".bundle_align_mode" bool parseDirectiveBundleAlignMode(); // ".bundle_lock" @@ -700,7 +703,7 @@ const AsmToken &AsmParser::Lex() { // if it's a end of statement with a comment in it if (getTok().is(AsmToken::EndOfStatement)) { // if this is a line comment output it. - if (getTok().getString().front() != '\n' && + if (!getTok().getString().empty() && getTok().getString().front() != '\n' && getTok().getString().front() != '\r' && MAI.preserveAsmComments()) Out.addExplicitComment(Twine(getTok().getString())); } @@ -737,6 +740,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { HadError = false; AsmCond StartingCondState = TheCondState; + SmallVector AsmStrRewrites; // If we are generating dwarf for assembly source files save the initial text // section and generate a .file directive. @@ -756,7 +760,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof)) { - ParseStatementInfo Info; + ParseStatementInfo Info(&AsmStrRewrites); if (!parseStatement(Info, nullptr)) continue; @@ -1190,6 +1194,42 @@ AsmParser::applyModifierToExpr(const MCExpr *E, llvm_unreachable("Invalid expression kind!"); } +/// This function checks if the next token is type or arithmetic. +/// string that begin with character '<' must end with character '>'. +/// otherwise it is arithmetics. +/// If the function returns a 'true' value, +/// the End argument will be filled with the last location pointed to the '>' +/// character. + +/// There is a gap between the AltMacro's documentation and the single quote implementation. +/// GCC does not fully support this feature and so we will not support it. +/// TODO: Adding single quote as a string. +bool AsmParser::isAltmacroString(SMLoc &StrLoc, SMLoc &EndLoc) { + assert((StrLoc.getPointer() != NULL) && + "Argument to the function cannot be a NULL value"); + const char *CharPtr = StrLoc.getPointer(); + while ((*CharPtr != '>') && (*CharPtr != '\n') && + (*CharPtr != '\r') && (*CharPtr != '\0')){ + if(*CharPtr == '!') + CharPtr++; + CharPtr++; + } + if (*CharPtr == '>') { + EndLoc = StrLoc.getFromPointer(CharPtr + 1); + return true; + } + return false; +} + +/// \brief creating a string without the escape characters '!'. +void AsmParser::altMacroString(StringRef AltMacroStr,std::string &Res) { + for (size_t Pos = 0; Pos < AltMacroStr.size(); Pos++) { + if (AltMacroStr[Pos] == '!') + Pos++; + Res += AltMacroStr[Pos]; + } +} + /// \brief Parse an expression and return it. /// /// expr ::= expr &&,|| expr -> lowest. @@ -1483,7 +1523,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, Lex(); if (Lexer.is(AsmToken::EndOfStatement)) { // if this is a line comment we can drop it safely - if (getTok().getString().front() == '\r' || + if (getTok().getString().empty() || getTok().getString().front() == '\r' || getTok().getString().front() == '\n') Out.AddBlankLine(); Lex(); @@ -1650,7 +1690,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, } // Emit the label. - if (!ParsingInlineAsm) + if (!getTargetParser().isParsingInlineAsm()) Out.EmitLabel(Sym, IDLoc); // If we are generating dwarf for assembly source files then gather the @@ -1755,8 +1795,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, case DK_8BYTE: return parseDirectiveValue(IDVal, 8); case DK_DC_A: - return parseDirectiveValue(IDVal, - getContext().getAsmInfo()->getPointerSize()); + return parseDirectiveValue( + IDVal, getContext().getAsmInfo()->getCodePointerSize()); case DK_OCTA: return parseDirectiveOctaValue(IDVal); case DK_SINGLE: @@ -1921,6 +1961,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveMacrosOnOff(IDVal); case DK_MACRO: return parseDirectiveMacro(IDLoc); + case DK_ALTMACRO: + case DK_NOALTMACRO: + return parseDirectiveAltmacro(IDVal); case DK_EXITM: return parseDirectiveExitMacro(IDVal); case DK_ENDM: @@ -2057,9 +2100,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // If parsing succeeded, match the instruction. if (!ParseHadError) { uint64_t ErrorInfo; - if (getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode, - Info.ParsedOperands, Out, - ErrorInfo, ParsingInlineAsm)) + if (getTargetParser().MatchAndEmitInstruction( + IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, + getTargetParser().isParsingInlineAsm())) return true; } return false; @@ -2269,9 +2312,27 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, } else { bool VarargParameter = HasVararg && Index == (NParameters - 1); for (const AsmToken &Token : A[Index]) + // For altmacro mode, you can write '%expr'. + // The prefix '%' evaluates the expression 'expr' + // and uses the result as a string (e.g. replace %(1+2) with the string "3"). + // Here, we identify the integer token which is the result of the + // absolute expression evaluation and replace it with its string representation. + if ((Lexer.IsaAltMacroMode()) && + (*(Token.getString().begin()) == '%') && Token.is(AsmToken::Integer)) + // Emit an integer value to the buffer. + OS << Token.getIntVal(); + // Only Token that was validated as a string and begins with '<' + // is considered altMacroString!!! + else if ((Lexer.IsaAltMacroMode()) && + (*(Token.getString().begin()) == '<') && + Token.is(AsmToken::String)) { + std::string Res; + altMacroString(Token.getStringContents(), Res); + OS << Res; + } // We expect no quotes around the string's contents when // parsing for varargs. - if (Token.getKind() != AsmToken::String || VarargParameter) + else if (Token.isNot(AsmToken::String) || VarargParameter) OS << Token.getString(); else OS << Token.getStringContents(); @@ -2442,13 +2503,37 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, NamedParametersFound = true; } + bool Vararg = HasVararg && Parameter == (NParameters - 1); if (NamedParametersFound && FA.Name.empty()) return Error(IDLoc, "cannot mix positional and keyword arguments"); - bool Vararg = HasVararg && Parameter == (NParameters - 1); - if (parseMacroArgument(FA.Value, Vararg)) - return true; + SMLoc StrLoc = Lexer.getLoc(); + SMLoc EndLoc; + if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Percent)) { + const MCExpr *AbsoluteExp; + int64_t Value; + /// Eat '%' + Lex(); + if (parseExpression(AbsoluteExp, EndLoc)) + return false; + if (!AbsoluteExp->evaluateAsAbsolute(Value)) + return Error(StrLoc, "expected absolute expression"); + const char *StrChar = StrLoc.getPointer(); + const char *EndChar = EndLoc.getPointer(); + AsmToken newToken(AsmToken::Integer, StringRef(StrChar , EndChar - StrChar), Value); + FA.Value.push_back(newToken); + } else if (Lexer.IsaAltMacroMode() && Lexer.is(AsmToken::Less) && + isAltmacroString(StrLoc, EndLoc)) { + const char *StrChar = StrLoc.getPointer(); + const char *EndChar = EndLoc.getPointer(); + jumpToLoc(EndLoc, CurBuffer); + /// Eat from '<' to '>' + Lex(); + AsmToken newToken(AsmToken::String, StringRef(StrChar, EndChar - StrChar)); + FA.Value.push_back(newToken); + } else if(parseMacroArgument(FA.Value, Vararg)) + return true; unsigned PI = Parameter; if (!FA.Name.empty()) { @@ -3840,6 +3925,19 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) { return false; } +/// parseDirectiveAltmacro +/// ::= .altmacro +/// ::= .noaltmacro +bool AsmParser::parseDirectiveAltmacro(StringRef Directive) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + Directive + "' directive"); + if (Directive == ".altmacro") + getLexer().SetAltMacroMode(true); + else + getLexer().SetAltMacroMode(false); + return false; +} + /// parseDirectiveMacrosOnOff /// ::= .macros_on /// ::= .macros_off @@ -4937,6 +5035,8 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".err"] = DK_ERR; DirectiveKindMap[".error"] = DK_ERROR; DirectiveKindMap[".warning"] = DK_WARNING; + DirectiveKindMap[".altmacro"] = DK_ALTMACRO; + DirectiveKindMap[".noaltmacro"] = DK_NOALTMACRO; DirectiveKindMap[".reloc"] = DK_RELOC; DirectiveKindMap[".dc"] = DK_DC; DirectiveKindMap[".dc.a"] = DK_DC_A; diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index bec62ccb2f7f784f390e0d47ed77a367115fe535..b83d68d4fe206ffee77541e650010c9d15229d37 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/SMLoc.h" #include #include diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 73a7ad0500c372a4fc8a658e22bb4f80ed29a63a..f4152a9067a06238f4b60571c253f5466d2699e2 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCObjectFileInfo.h" @@ -25,10 +26,9 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 401011a027f42e8d2e123c1d1453e8e7908df549..a407691b0bd1733498e1ce6592c39b3f3473e015 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" #include @@ -603,6 +603,8 @@ EndStmt: Type = ELF::SHT_NOTE; else if (TypeName == "unwind") Type = ELF::SHT_X86_64_UNWIND; + else if (TypeName == "llvm_odrtab") + Type = ELF::SHT_LLVM_ODRTAB; else if (TypeName.getAsInteger(0, Type)) return TokError("unknown section type"); } diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp index f8fe78aece0cacd11c43581f330840ea1d3f244b..8f845ee1d76fa934ed5aedef86811f8278deac13 100644 --- a/lib/MC/MCParser/MCAsmLexer.cpp +++ b/lib/MC/MCParser/MCAsmLexer.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/SMLoc.h" using namespace llvm; -MCAsmLexer::MCAsmLexer() { +MCAsmLexer::MCAsmLexer() : AltMacroMode(false) { CurTok.emplace_back(AsmToken::Space, StringRef()); } diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index 27b37f3e2dfbc930ab1cab8f3f5b42e029b5940c..ea36b3b9b3b2a4637d585e93d1fd6336043dd04b 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/Support/Debug.h" diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp index 5f821443bb9649895ed9939c26c6b151c52da0c6..64ac82a6c66f37b457150db09f6d9e2e0df69b44 100644 --- a/lib/MC/MCParser/MCTargetAsmParser.cpp +++ b/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCContext.h" using namespace llvm; diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp index a75100a4876b7c015f0a81ab0b12c89a449b92a6..0f76c1838b518840c13d96998231de1d087b6134 100644 --- a/lib/MC/MCRegisterInfo.cpp +++ b/lib/MC/MCRegisterInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp index 7986c01220434cfd714d1052190c1587c25316ec..b961cb3968e86a2736c1efa22cfba421feda7427 100644 --- a/lib/MC/MCSection.cpp +++ b/lib/MC/MCSection.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSection.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index f0709cbc25153647aa0fa63a16ce3e3ccb38f904..72a7fc36a460c9fbdbf8335bf70638d4620b684c 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index 78fe01cca24a3b1adc37b2133fb2b313d4230a0b..2f4f61aa4d50445b665c7f9956ab01d3639ee4ff 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSectionELF.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include @@ -147,6 +147,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, // Print hex value of the flag while we do not have // any standard symbolic representation of the flag. OS << "0x7000001e"; + else if (Type == ELF::SHT_LLVM_ODRTAB) + OS << "llvm_odrtab"; else report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) + " for section " + getSectionName()); diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index b9c01c66f31d766830be6a42e309208efdec0c45..2bfb9a63eedbd6cb10f4a14ba5e11610f934106f 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -7,9 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeView.h" @@ -21,19 +23,17 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCWinEH.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include #include #include +#include #include using namespace llvm; @@ -777,8 +777,8 @@ void MCStreamer::visitUsedExpr(const MCExpr &Expr) { } } -void MCStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { +void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) { // Scan for values. for (unsigned i = Inst.getNumOperands(); i--;) if (Inst.getOperand(i).isExpr()) diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 777b4e3d6b676ee38be2b5fe7133afaf26173ef8..385cdcc6232025db9ddf6416e03ac8156d076b83 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index cb262542b89f88f33f89bbd1b779659b2e5a2884..9abaaef2fe848b66f597dfa2c1acde0a9be1f380 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCSymbol.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp index ffa8260d43420c77699b56b3ad447ff68eb4f70f..67449eb6dcf938247ed5324015cb19401b7bf992 100644 --- a/lib/MC/MCSymbolELF.cpp +++ b/lib/MC/MCSymbolELF.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCSymbolELF.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/Support/ELF.h" namespace llvm { diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp index 5d666b67fddbe858bda50fc29459cbedbac88ae4..b85e53db5d616af8bd97193b3fa3fea038e90110 100644 --- a/lib/MC/MCTargetOptions.cpp +++ b/lib/MC/MCTargetOptions.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCTargetOptions.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp index a09a17d7a124f60d7cf48250006227533ac81acc..301f30d4f6ecf97d646cb6b1fe36acb3934be96b 100644 --- a/lib/MC/MCWasmObjectTargetWriter.cpp +++ b/lib/MC/MCWasmObjectTargetWriter.cpp @@ -17,11 +17,5 @@ using namespace llvm; MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit_) : Is64Bit(Is64Bit_) {} -bool MCWasmObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym, - unsigned Type) const { - return false; -} - -void MCWasmObjectTargetWriter::sortRelocs( - const MCAssembler &Asm, std::vector &Relocs) { -} +// Pin the vtable to this object file +MCWasmObjectTargetWriter::~MCWasmObjectTargetWriter() = default; diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp index 21a913999f64eb408c7bd92c28d8e0045897ea8f..a5d0f5a2cb7509dd88021b84881ae4c6fa1460ad 100644 --- a/lib/MC/MCWinEH.cpp +++ b/lib/MC/MCWinEH.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCWinEH.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCWinEH.h" -#include "llvm/Support/COFF.h" namespace llvm { namespace WinEH { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index d9ccf0dd661f1307f2871a4a23ebdbf649e2e704..c4e7cdbe095e7d7e0fab14e7779ae034122fd6ff 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -8,8 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -27,7 +28,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index fbd7ba60bc90b20f239faec31f968a711a6a35dd..6025a20a9c193a2fa1dcc6cb0ddcad7b0d42f257 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/StringTableBuilder.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/StringTableBuilder.h" -#include "llvm/Support/COFF.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -58,7 +58,7 @@ void StringTableBuilder::write(raw_ostream &OS) const { OS << Data; } -typedef std::pair StringPair; +using StringPair = std::pair; void StringTableBuilder::write(uint8_t *Buf) const { assert(isFinalized()); diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 51aaa4b0aa2593b20037beda8c34fa37e38cea1f..b68e88ca5725adbe766f33fa24e162bede86f991 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/SubtargetFeature.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 159cc3b4def2a523e8a7afcd4013d912e4944608..7ac4fb8767eedb10cc3814ab072965342602b649 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" @@ -31,7 +32,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/StringSaver.h" -#include "llvm/Support/Wasm.h" #include using namespace llvm; @@ -40,6 +40,7 @@ using namespace llvm; #define DEBUG_TYPE "reloc-info" namespace { + // For patching purposes, we need to remember where each section starts, both // for patching up the section size field, and for patching up references to // locations within the section. @@ -50,6 +51,114 @@ struct SectionBookkeeping { uint64_t ContentsOffset; }; +// The signature of a wasm function, in a struct capable of being used as a +// DenseMap key. +struct WasmFunctionType { + // Support empty and tombstone instances, needed by DenseMap. + enum { Plain, Empty, Tombstone } State; + + // The return types of the function. + SmallVector Returns; + + // The parameter types of the function. + SmallVector Params; + + WasmFunctionType() : State(Plain) {} + + bool operator==(const WasmFunctionType &Other) const { + return State == Other.State && Returns == Other.Returns && + Params == Other.Params; + } +}; + +// Traits for using WasmFunctionType in a DenseMap. +struct WasmFunctionTypeDenseMapInfo { + static WasmFunctionType getEmptyKey() { + WasmFunctionType FuncTy; + FuncTy.State = WasmFunctionType::Empty; + return FuncTy; + } + static WasmFunctionType getTombstoneKey() { + WasmFunctionType FuncTy; + FuncTy.State = WasmFunctionType::Tombstone; + return FuncTy; + } + static unsigned getHashValue(const WasmFunctionType &FuncTy) { + uintptr_t Value = FuncTy.State; + for (wasm::ValType Ret : FuncTy.Returns) + Value += DenseMapInfo::getHashValue(int32_t(Ret)); + for (wasm::ValType Param : FuncTy.Params) + Value += DenseMapInfo::getHashValue(int32_t(Param)); + return Value; + } + static bool isEqual(const WasmFunctionType &LHS, + const WasmFunctionType &RHS) { + return LHS == RHS; + } +}; + +// A wasm import to be written into the import section. +struct WasmImport { + StringRef ModuleName; + StringRef FieldName; + unsigned Kind; + int32_t Type; +}; + +// A wasm function to be written into the function section. +struct WasmFunction { + int32_t Type; + const MCSymbolWasm *Sym; +}; + +// A wasm export to be written into the export section. +struct WasmExport { + StringRef FieldName; + unsigned Kind; + uint32_t Index; +}; + +// A wasm global to be written into the global section. +struct WasmGlobal { + wasm::ValType Type; + bool IsMutable; + bool HasImport; + uint64_t InitialValue; + uint32_t ImportIndex; +}; + +// Information about a single relocation. +struct WasmRelocationEntry { + uint64_t Offset; // Where is the relocation. + const MCSymbolWasm *Symbol; // The symbol to relocate with. + int64_t Addend; // A value to add to the symbol. + unsigned Type; // The type of the relocation. + MCSectionWasm *FixupSection;// The section the relocation is targeting. + + WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, + int64_t Addend, unsigned Type, + MCSectionWasm *FixupSection) + : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), + FixupSection(FixupSection) {} + + bool hasAddend() const { + switch (Type) { + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + return true; + default: + return false; + } + } + + void print(raw_ostream &Out) const { + Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend + << ", Type=" << Type << ", FixupSection=" << FixupSection; + } + void dump() const { print(errs()); } +}; + class WasmObjectWriter : public MCObjectWriter { /// Helper struct for containing some precomputed information on symbols. struct WasmSymbolData { @@ -69,17 +178,22 @@ class WasmObjectWriter : public MCObjectWriter { // Relocations for fixing up references in the data section. std::vector DataRelocations; - // Fixups for call_indirect type indices. - std::vector TypeIndexFixups; - // Index values to use for fixing up call_indirect type indices. - std::vector TypeIndexFixupTypes; + // Maps function symbols to the index of the type of the function + DenseMap TypeIndices; + // Maps function symbols to the table element index space. Used + // for TABLE_INDEX relocation types (i.e. address taken functions). + DenseMap IndirectSymbolIndices; + // Maps function/global symbols to the function/global index space. + DenseMap SymbolIndices; + + DenseMap + FunctionTypeIndices; // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const { - return TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel); + unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup) const { + return TargetObjectWriter->getRelocType(Target, Fixup); } void startSection(SectionBookkeeping &Section, unsigned SectionId, @@ -91,18 +205,20 @@ public: : MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {} private: + ~WasmObjectWriter() override; + void reset() override { + CodeRelocations.clear(); + DataRelocations.clear(); + TypeIndices.clear(); + SymbolIndices.clear(); + IndirectSymbolIndices.clear(); + FunctionTypeIndices.clear(); MCObjectWriter::reset(); } - ~WasmObjectWriter() override; - void writeHeader(const MCAssembler &Asm); - void writeValueType(wasm::ValType Ty) { - encodeSLEB128(int32_t(Ty), getStream()); - } - void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, bool &IsPCRel, @@ -112,7 +228,39 @@ private: const MCAsmLayout &Layout) override; void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; + + void writeValueType(wasm::ValType Ty) { + encodeSLEB128(int32_t(Ty), getStream()); + } + + void writeTypeSection(const SmallVector &FunctionTypes); + void writeImportSection(const SmallVector &Imports); + void writeFunctionSection(const SmallVector &Functions); + void writeTableSection(uint32_t NumElements); + void writeMemorySection(const SmallVector &DataBytes); + void writeGlobalSection(const SmallVector &Globals); + void writeExportSection(const SmallVector &Exports); + void writeElemSection(const SmallVector &TableElems); + void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, + const SmallVector &Functions); + uint64_t + writeDataSection(const SmallVector &DataBytes); + void writeNameSection(const SmallVector &Functions, + const SmallVector &Imports, + uint32_t NumFuncImports); + void writeCodeRelocSection(); + void writeDataRelocSection(uint64_t DataSectionHeaderSize); + void writeLinkingMetaDataSection(bool HasStackPointer, + uint32_t StackPointerGlobal); + + void applyRelocations(ArrayRef Relocations, + uint64_t ContentsOffset); + + void writeRelocations(ArrayRef Relocations, + uint64_t HeaderSize); + uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); }; + } // end anonymous namespace WasmObjectWriter::~WasmObjectWriter() {} @@ -257,18 +405,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, SymA->setUsedInReloc(); } - if (RefA) { - if (RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX) { - assert(C == 0); - WasmRelocationEntry Rec(FixupOffset, SymA, C, - wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB, - &FixupSection); - TypeIndexFixups.push_back(Rec); - return; - } - } + assert(!IsPCRel); + assert(SymA); - unsigned Type = getRelocType(Ctx, Target, Fixup, IsPCRel); + unsigned Type = getRelocType(Target, Fixup); WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); @@ -278,86 +418,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, DataRelocations.push_back(Rec); } -namespace { - -// The signature of a wasm function, in a struct capable of being used as a -// DenseMap key. -struct WasmFunctionType { - // Support empty and tombstone instances, needed by DenseMap. - enum { Plain, Empty, Tombstone } State; - - // The return types of the function. - SmallVector Returns; - - // The parameter types of the function. - SmallVector Params; - - WasmFunctionType() : State(Plain) {} - - bool operator==(const WasmFunctionType &Other) const { - return State == Other.State && Returns == Other.Returns && - Params == Other.Params; - } -}; - -// Traits for using WasmFunctionType in a DenseMap. -struct WasmFunctionTypeDenseMapInfo { - static WasmFunctionType getEmptyKey() { - WasmFunctionType FuncTy; - FuncTy.State = WasmFunctionType::Empty; - return FuncTy; - } - static WasmFunctionType getTombstoneKey() { - WasmFunctionType FuncTy; - FuncTy.State = WasmFunctionType::Tombstone; - return FuncTy; - } - static unsigned getHashValue(const WasmFunctionType &FuncTy) { - uintptr_t Value = FuncTy.State; - for (wasm::ValType Ret : FuncTy.Returns) - Value += DenseMapInfo::getHashValue(int32_t(Ret)); - for (wasm::ValType Param : FuncTy.Params) - Value += DenseMapInfo::getHashValue(int32_t(Param)); - return Value; - } - static bool isEqual(const WasmFunctionType &LHS, - const WasmFunctionType &RHS) { - return LHS == RHS; - } -}; - -// A wasm import to be written into the import section. -struct WasmImport { - StringRef ModuleName; - StringRef FieldName; - unsigned Kind; - int32_t Type; -}; - -// A wasm function to be written into the function section. -struct WasmFunction { - int32_t Type; - const MCSymbolWasm *Sym; -}; - -// A wasm export to be written into the export section. -struct WasmExport { - StringRef FieldName; - unsigned Kind; - uint32_t Index; -}; - -// A wasm global to be written into the global section. -struct WasmGlobal { - wasm::ValType Type; - bool IsMutable; - bool HasImport; - uint64_t InitialValue; - uint32_t ImportIndex; -}; - -} // end anonymous namespace - // Write X as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. static void @@ -408,121 +468,422 @@ static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) { return Value; } +uint32_t WasmObjectWriter::getRelocationIndexValue( + const WasmRelocationEntry &RelEntry) { + switch (RelEntry.Type) { + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + assert(IndirectSymbolIndices.count(RelEntry.Symbol)); + return IndirectSymbolIndices[RelEntry.Symbol]; + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: + case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: + assert(SymbolIndices.count(RelEntry.Symbol)); + return SymbolIndices[RelEntry.Symbol]; + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + assert(TypeIndices.count(RelEntry.Symbol)); + return TypeIndices[RelEntry.Symbol]; + default: + llvm_unreachable("invalid relocation type"); + } +} + // Apply the portions of the relocation records that we can handle ourselves // directly. -static void ApplyRelocations( - ArrayRef Relocations, - raw_pwrite_stream &Stream, - DenseMap &SymbolIndices, - uint64_t ContentsOffset) -{ +void WasmObjectWriter::applyRelocations( + ArrayRef Relocations, uint64_t ContentsOffset) { + raw_pwrite_stream &Stream = getStream(); for (const WasmRelocationEntry &RelEntry : Relocations) { uint64_t Offset = ContentsOffset + RelEntry.FixupSection->getSectionOffset() + RelEntry.Offset; - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: { - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - WritePatchableLEB(Stream, Index, Offset); + switch (RelEntry.Type) { + case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: { + uint32_t Index = getRelocationIndexValue(RelEntry); + WritePatchableSLEB(Stream, Index, Offset); break; } - case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: { - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - - WritePatchableSLEB(Stream, Index, Offset); + case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { + uint32_t Index = getRelocationIndexValue(RelEntry); + WriteI32(Stream, Index, Offset); break; } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: { uint32_t Value = ProvisionalValue(RelEntry); - WritePatchableSLEB(Stream, Value, Offset); break; } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: { uint32_t Value = ProvisionalValue(RelEntry); - WritePatchableLEB(Stream, Value, Offset); break; } - case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - assert(RelEntry.Addend == 0); - - WriteI32(Stream, Index, Offset); - break; - } case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: { uint32_t Value = ProvisionalValue(RelEntry); - WriteI32(Stream, Value, Offset); break; } default: - break; + llvm_unreachable("invalid relocation type"); } } } // Write out the portions of the relocation records that the linker will // need to handle. -static void WriteRelocations( - ArrayRef Relocations, - raw_pwrite_stream &Stream, - DenseMap &SymbolIndices) -{ - for (const WasmRelocationEntry RelEntry : Relocations) { - encodeULEB128(RelEntry.Type, Stream); +void WasmObjectWriter::writeRelocations( + ArrayRef Relocations, uint64_t HeaderSize) { + raw_pwrite_stream &Stream = getStream(); + for (const WasmRelocationEntry& RelEntry : Relocations) { uint64_t Offset = RelEntry.Offset + - RelEntry.FixupSection->getSectionOffset(); - uint32_t Index = SymbolIndices[RelEntry.Symbol]; - int64_t Addend = RelEntry.Addend; + RelEntry.FixupSection->getSectionOffset() + HeaderSize; + uint32_t Index = getRelocationIndexValue(RelEntry); - switch (RelEntry.Type) { - case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: - case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: - case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: - encodeULEB128(Offset, Stream); - encodeULEB128(Index, Stream); - assert(Addend == 0 && "addends not supported for functions"); + encodeULEB128(RelEntry.Type, Stream); + encodeULEB128(Offset, Stream); + encodeULEB128(Index, Stream); + if (RelEntry.hasAddend()) + encodeSLEB128(RelEntry.Addend, Stream); + } +} + +void WasmObjectWriter::writeTypeSection( + const SmallVector &FunctionTypes) { + if (FunctionTypes.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_TYPE); + + encodeULEB128(FunctionTypes.size(), getStream()); + + for (const WasmFunctionType &FuncTy : FunctionTypes) { + encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream()); + encodeULEB128(FuncTy.Params.size(), getStream()); + for (wasm::ValType Ty : FuncTy.Params) + writeValueType(Ty); + encodeULEB128(FuncTy.Returns.size(), getStream()); + for (wasm::ValType Ty : FuncTy.Returns) + writeValueType(Ty); + } + + endSection(Section); +} + +void WasmObjectWriter::writeImportSection( + const SmallVector &Imports) { + if (Imports.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_IMPORT); + + encodeULEB128(Imports.size(), getStream()); + for (const WasmImport &Import : Imports) { + StringRef ModuleName = Import.ModuleName; + encodeULEB128(ModuleName.size(), getStream()); + writeBytes(ModuleName); + + StringRef FieldName = Import.FieldName; + encodeULEB128(FieldName.size(), getStream()); + writeBytes(FieldName); + + encodeULEB128(Import.Kind, getStream()); + + switch (Import.Kind) { + case wasm::WASM_EXTERNAL_FUNCTION: + encodeULEB128(Import.Type, getStream()); break; - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: - case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: - encodeULEB128(Offset, Stream); - encodeULEB128(Index, Stream); - encodeSLEB128(Addend, Stream); + case wasm::WASM_EXTERNAL_GLOBAL: + encodeSLEB128(int32_t(Import.Type), getStream()); + encodeULEB128(0, getStream()); // mutability break; default: - llvm_unreachable("unsupported relocation type"); + llvm_unreachable("unsupported import kind"); } } + + endSection(Section); } -// Write out the the type relocation records that the linker will -// need to handle. -static void WriteTypeRelocations( - ArrayRef TypeIndexFixups, - ArrayRef TypeIndexFixupTypes, - raw_pwrite_stream &Stream) -{ - for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { - const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; - uint32_t Type = TypeIndexFixupTypes[i]; - - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - assert(Fixup.Addend == 0); - - uint64_t Offset = Fixup.Offset + - Fixup.FixupSection->getSectionOffset(); - - encodeULEB128(Fixup.Type, Stream); - encodeULEB128(Offset, Stream); - encodeULEB128(Type, Stream); +void WasmObjectWriter::writeFunctionSection( + const SmallVector &Functions) { + if (Functions.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_FUNCTION); + + encodeULEB128(Functions.size(), getStream()); + for (const WasmFunction &Func : Functions) + encodeULEB128(Func.Type, getStream()); + + endSection(Section); +} + +void WasmObjectWriter::writeTableSection(uint32_t NumElements) { + // For now, always emit the table section, since indirect calls are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no indirect calls. + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_TABLE); + + encodeULEB128(1, getStream()); // The number of tables. + // Fixed to 1 for now. + encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); // Type of table + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumElements, getStream()); // initial + + endSection(Section); +} + +void WasmObjectWriter::writeMemorySection( + const SmallVector &DataBytes) { + // For now, always emit the memory section, since loads and stores are not + // valid without it. In the future, we could perhaps be more clever and omit + // it if there are no loads or stores. + SectionBookkeeping Section; + uint32_t NumPages = + (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize; + + startSection(Section, wasm::WASM_SEC_MEMORY); + encodeULEB128(1, getStream()); // number of memory spaces + + encodeULEB128(0, getStream()); // flags + encodeULEB128(NumPages, getStream()); // initial + + endSection(Section); +} + +void WasmObjectWriter::writeGlobalSection( + const SmallVector &Globals) { + if (Globals.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_GLOBAL); + + encodeULEB128(Globals.size(), getStream()); + for (const WasmGlobal &Global : Globals) { + writeValueType(Global.Type); + write8(Global.IsMutable); + + if (Global.HasImport) { + assert(Global.InitialValue == 0); + write8(wasm::WASM_OPCODE_GET_GLOBAL); + encodeULEB128(Global.ImportIndex, getStream()); + } else { + assert(Global.ImportIndex == 0); + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(Global.InitialValue, getStream()); // offset + } + write8(wasm::WASM_OPCODE_END); + } + + endSection(Section); +} + +void WasmObjectWriter::writeExportSection( + const SmallVector &Exports) { + if (Exports.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_EXPORT); + + encodeULEB128(Exports.size(), getStream()); + for (const WasmExport &Export : Exports) { + encodeULEB128(Export.FieldName.size(), getStream()); + writeBytes(Export.FieldName); + + encodeSLEB128(Export.Kind, getStream()); + + encodeULEB128(Export.Index, getStream()); } + + endSection(Section); +} + +void WasmObjectWriter::writeElemSection( + const SmallVector &TableElems) { + if (TableElems.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_ELEM); + + encodeULEB128(1, getStream()); // number of "segments" + encodeULEB128(0, getStream()); // the table index + + // init expr for starting offset + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(0, getStream()); + write8(wasm::WASM_OPCODE_END); + + encodeULEB128(TableElems.size(), getStream()); + for (uint32_t Elem : TableElems) + encodeULEB128(Elem, getStream()); + + endSection(Section); +} + +void WasmObjectWriter::writeCodeSection( + const MCAssembler &Asm, const MCAsmLayout &Layout, + const SmallVector &Functions) { + if (Functions.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CODE); + + encodeULEB128(Functions.size(), getStream()); + + for (const WasmFunction &Func : Functions) { + MCSectionWasm &FuncSection = + static_cast(Func.Sym->getSection()); + + if (Func.Sym->isVariable()) + report_fatal_error("weak symbols not supported yet"); + + if (Func.Sym->getOffset() != 0) + report_fatal_error("function sections must contain one function each"); + + if (!Func.Sym->getSize()) + report_fatal_error("function symbols must have a size set with .size"); + + int64_t Size = 0; + if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) + report_fatal_error(".size expression must be evaluatable"); + + encodeULEB128(Size, getStream()); + + FuncSection.setSectionOffset(getStream().tell() - + Section.ContentsOffset); + + Asm.writeSectionData(&FuncSection, Layout); + } + + // Apply fixups. + applyRelocations(CodeRelocations, Section.ContentsOffset); + + endSection(Section); +} + +uint64_t WasmObjectWriter::writeDataSection( + const SmallVector &DataBytes) { + if (DataBytes.empty()) + return 0; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_DATA); + + encodeULEB128(1, getStream()); // count + encodeULEB128(0, getStream()); // memory index + write8(wasm::WASM_OPCODE_I32_CONST); + encodeSLEB128(0, getStream()); // offset + write8(wasm::WASM_OPCODE_END); + encodeULEB128(DataBytes.size(), getStream()); // size + uint32_t HeaderSize = getStream().tell() - Section.ContentsOffset; + writeBytes(DataBytes); // data + + // Apply fixups. + applyRelocations(DataRelocations, Section.ContentsOffset + HeaderSize); + + endSection(Section); + return HeaderSize; +} + +void WasmObjectWriter::writeNameSection( + const SmallVector &Functions, + const SmallVector &Imports, + unsigned NumFuncImports) { + uint32_t TotalFunctions = NumFuncImports + Functions.size(); + if (TotalFunctions == 0) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "name"); + SectionBookkeeping SubSection; + startSection(SubSection, wasm::WASM_NAMES_FUNCTION); + + encodeULEB128(TotalFunctions, getStream()); + uint32_t Index = 0; + for (const WasmImport &Import : Imports) { + if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { + encodeULEB128(Index, getStream()); + encodeULEB128(Import.FieldName.size(), getStream()); + writeBytes(Import.FieldName); + ++Index; + } + } + for (const WasmFunction &Func : Functions) { + encodeULEB128(Index, getStream()); + encodeULEB128(Func.Sym->getName().size(), getStream()); + writeBytes(Func.Sym->getName()); + ++Index; + } + + endSection(SubSection); + endSection(Section); +} + +void WasmObjectWriter::writeCodeRelocSection() { + // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + // for descriptions of the reloc sections. + + if (CodeRelocations.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE"); + + encodeULEB128(wasm::WASM_SEC_CODE, getStream()); + encodeULEB128(CodeRelocations.size(), getStream()); + + writeRelocations(CodeRelocations, 0); + + endSection(Section); +} + +void WasmObjectWriter::writeDataRelocSection(uint64_t DataSectionHeaderSize) { + // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + // for descriptions of the reloc sections. + + if (DataRelocations.empty()) + return; + + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA"); + + encodeULEB128(wasm::WASM_SEC_DATA, getStream()); + encodeULEB128(DataRelocations.size(), getStream()); + + writeRelocations(DataRelocations, DataSectionHeaderSize); + + endSection(Section); +} + +void WasmObjectWriter::writeLinkingMetaDataSection( + bool HasStackPointer, uint32_t StackPointerGlobal) { + if (!HasStackPointer) + return; + SectionBookkeeping Section; + startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); + + encodeULEB128(1, getStream()); // count + + encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type + encodeULEB128(StackPointerGlobal, getStream()); // id + + endSection(Section); } void WasmObjectWriter::writeObject(MCAssembler &Asm, @@ -531,15 +892,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32; // Collect information from the available symbols. - DenseMap - FunctionTypeIndices; SmallVector FunctionTypes; SmallVector Functions; SmallVector TableElems; SmallVector Globals; SmallVector Imports; SmallVector Exports; - DenseMap SymbolIndices; SmallPtrSet IsAddressTaken; unsigned NumFuncImports = 0; unsigned NumGlobalImports = 0; @@ -723,23 +1081,26 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } // If needed, prepare the function to be called indirectly. - if (IsAddressTaken.count(&WS)) + if (IsAddressTaken.count(&WS)) { + IndirectSymbolIndices[&WS] = TableElems.size(); TableElems.push_back(Index); + } } else { - // For now, ignore temporary non-function symbols. - if (S.isTemporary()) + if (WS.isTemporary() && !WS.getSize()) continue; - if (WS.getOffset() != 0) - report_fatal_error("data sections must contain one variable each"); - if (!WS.getSize()) - report_fatal_error("data symbols must have a size set with .size"); + if (WS.isDefined(false)) { + if (WS.getOffset() != 0) + report_fatal_error("data sections must contain one variable each: " + + WS.getName()); + if (!WS.getSize()) + report_fatal_error("data symbols must have a size set with .size: " + + WS.getName()); - int64_t Size = 0; - if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) - report_fatal_error(".size expression must be evaluatable"); + int64_t Size = 0; + if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) + report_fatal_error(".size expression must be evaluatable"); - if (WS.isDefined(false)) { MCSectionWasm &DataSection = static_cast(WS.getSection()); @@ -777,20 +1138,18 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } } - // For each external global, prepare a corresponding wasm global - // holding its address. - if (WS.isExternal()) { - Index = NumGlobalImports + Globals.size(); - - WasmGlobal Global; - Global.Type = PtrType; - Global.IsMutable = false; - Global.HasImport = false; - Global.InitialValue = DataSection.getSectionOffset(); - Global.ImportIndex = 0; - SymbolIndices[&WS] = Index; - Globals.push_back(Global); - } + // For each global, prepare a corresponding wasm global holding its + // address. For externals these will also be named exports. + Index = NumGlobalImports + Globals.size(); + + WasmGlobal Global; + Global.Type = PtrType; + Global.IsMutable = false; + Global.HasImport = false; + Global.InitialValue = DataSection.getSectionOffset(); + Global.ImportIndex = 0; + SymbolIndices[&WS] = Index; + Globals.push_back(Global); } } @@ -811,9 +1170,9 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } // Add types for indirect function calls. - for (const WasmRelocationEntry &Fixup : TypeIndexFixups) { - assert(Fixup.Addend == 0); - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); + for (const WasmRelocationEntry &Fixup : CodeRelocations) { + if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) + continue; WasmFunctionType F; F.Returns = Fixup.Symbol->getReturns(); @@ -823,323 +1182,29 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (Pair.second) FunctionTypes.push_back(F); - TypeIndexFixupTypes.push_back(Pair.first->second); + TypeIndices[Fixup.Symbol] = Pair.first->second; } // Write out the Wasm header. writeHeader(Asm); - SectionBookkeeping Section; - - // === Type Section ========================================================= - if (!FunctionTypes.empty()) { - startSection(Section, wasm::WASM_SEC_TYPE); - - encodeULEB128(FunctionTypes.size(), getStream()); - - for (WasmFunctionType &FuncTy : FunctionTypes) { - encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream()); - encodeULEB128(FuncTy.Params.size(), getStream()); - for (wasm::ValType Ty : FuncTy.Params) - writeValueType(Ty); - encodeULEB128(FuncTy.Returns.size(), getStream()); - for (wasm::ValType Ty : FuncTy.Returns) - writeValueType(Ty); - } - - endSection(Section); - } - - // === Import Section ======================================================== - if (!Imports.empty()) { - startSection(Section, wasm::WASM_SEC_IMPORT); - - encodeULEB128(Imports.size(), getStream()); - for (const WasmImport &Import : Imports) { - StringRef ModuleName = Import.ModuleName; - encodeULEB128(ModuleName.size(), getStream()); - writeBytes(ModuleName); - - StringRef FieldName = Import.FieldName; - encodeULEB128(FieldName.size(), getStream()); - writeBytes(FieldName); - - encodeULEB128(Import.Kind, getStream()); - - switch (Import.Kind) { - case wasm::WASM_EXTERNAL_FUNCTION: - encodeULEB128(Import.Type, getStream()); - break; - case wasm::WASM_EXTERNAL_GLOBAL: - encodeSLEB128(int32_t(Import.Type), getStream()); - encodeULEB128(0, getStream()); // mutability - break; - default: - llvm_unreachable("unsupported import kind"); - } - } - - endSection(Section); - } - - // === Function Section ====================================================== - if (!Functions.empty()) { - startSection(Section, wasm::WASM_SEC_FUNCTION); - - encodeULEB128(Functions.size(), getStream()); - for (const WasmFunction &Func : Functions) - encodeULEB128(Func.Type, getStream()); - - endSection(Section); - } - - // === Table Section ========================================================= - // For now, always emit the table section, since indirect calls are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no indirect calls. - startSection(Section, wasm::WASM_SEC_TABLE); - - // The number of tables, fixed to 1 for now. - encodeULEB128(1, getStream()); - - encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream()); - - encodeULEB128(0, getStream()); // flags - encodeULEB128(TableElems.size(), getStream()); // initial - - endSection(Section); - - // === Memory Section ======================================================== - // For now, always emit the memory section, since loads and stores are not - // valid without it. In the future, we could perhaps be more clever and omit - // it if there are no loads or stores. - startSection(Section, wasm::WASM_SEC_MEMORY); - - encodeULEB128(1, getStream()); // number of memory spaces - - encodeULEB128(0, getStream()); // flags - encodeULEB128(DataBytes.size(), getStream()); // initial - - endSection(Section); - - // === Global Section ======================================================== - if (!Globals.empty()) { - startSection(Section, wasm::WASM_SEC_GLOBAL); - - encodeULEB128(Globals.size(), getStream()); - for (const WasmGlobal &Global : Globals) { - writeValueType(Global.Type); - write8(Global.IsMutable); - - if (Global.HasImport) { - assert(Global.InitialValue == 0); - write8(wasm::WASM_OPCODE_GET_GLOBAL); - encodeULEB128(Global.ImportIndex, getStream()); - } else { - assert(Global.ImportIndex == 0); - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(Global.InitialValue, getStream()); // offset - } - write8(wasm::WASM_OPCODE_END); - } - - endSection(Section); - } - - // === Export Section ======================================================== - if (!Exports.empty()) { - startSection(Section, wasm::WASM_SEC_EXPORT); - - encodeULEB128(Exports.size(), getStream()); - for (const WasmExport &Export : Exports) { - encodeULEB128(Export.FieldName.size(), getStream()); - writeBytes(Export.FieldName); - - encodeSLEB128(Export.Kind, getStream()); - - encodeULEB128(Export.Index, getStream()); - } - - endSection(Section); - } - -#if 0 // TODO: Start Section - if (HaveStartFunction) { - // === Start Section ========================================================= - startSection(Section, wasm::WASM_SEC_START); - - encodeSLEB128(StartFunction, getStream()); - - endSection(Section); - } -#endif - - // === Elem Section ========================================================== - if (!TableElems.empty()) { - startSection(Section, wasm::WASM_SEC_ELEM); - - encodeULEB128(1, getStream()); // number of "segments" - encodeULEB128(0, getStream()); // the table index - - // init expr for starting offset - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(0, getStream()); - write8(wasm::WASM_OPCODE_END); - - encodeULEB128(TableElems.size(), getStream()); - for (uint32_t Elem : TableElems) - encodeULEB128(Elem, getStream()); - - endSection(Section); - } - - // === Code Section ========================================================== - if (!Functions.empty()) { - startSection(Section, wasm::WASM_SEC_CODE); - - encodeULEB128(Functions.size(), getStream()); - - for (const WasmFunction &Func : Functions) { - MCSectionWasm &FuncSection = - static_cast(Func.Sym->getSection()); - - if (Func.Sym->isVariable()) - report_fatal_error("weak symbols not supported yet"); - - if (Func.Sym->getOffset() != 0) - report_fatal_error("function sections must contain one function each"); - - if (!Func.Sym->getSize()) - report_fatal_error("function symbols must have a size set with .size"); - - int64_t Size = 0; - if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) - report_fatal_error(".size expression must be evaluatable"); - - encodeULEB128(Size, getStream()); - - FuncSection.setSectionOffset(getStream().tell() - - Section.ContentsOffset); - - Asm.writeSectionData(&FuncSection, Layout); - } - - // Apply the type index fixups for call_indirect etc. instructions. - for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) { - uint32_t Type = TypeIndexFixupTypes[i]; - unsigned Padding = PaddingFor5ByteULEB128(Type); - - const WasmRelocationEntry &Fixup = TypeIndexFixups[i]; - assert(Fixup.Addend == 0); - assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB); - uint64_t Offset = Fixup.Offset + - Fixup.FixupSection->getSectionOffset(); - - uint8_t Buffer[16]; - unsigned SizeLen = encodeULEB128(Type, Buffer, Padding); - assert(SizeLen == 5); - getStream().pwrite((char *)Buffer, SizeLen, - Section.ContentsOffset + Offset); - } - - // Apply fixups. - ApplyRelocations(CodeRelocations, getStream(), SymbolIndices, - Section.ContentsOffset); - - endSection(Section); - } - - // === Data Section ========================================================== - if (!DataBytes.empty()) { - startSection(Section, wasm::WASM_SEC_DATA); - - encodeULEB128(1, getStream()); // count - encodeULEB128(0, getStream()); // memory index - write8(wasm::WASM_OPCODE_I32_CONST); - encodeSLEB128(0, getStream()); // offset - write8(wasm::WASM_OPCODE_END); - encodeULEB128(DataBytes.size(), getStream()); // size - writeBytes(DataBytes); // data - - // Apply fixups. - ApplyRelocations(DataRelocations, getStream(), SymbolIndices, - Section.ContentsOffset); - - endSection(Section); - } - - // === Name Section ========================================================== - uint32_t TotalFunctions = NumFuncImports + Functions.size(); - if (TotalFunctions != 0) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "name"); - SectionBookkeeping SubSection; - startSection(SubSection, wasm::WASM_NAMES_FUNCTION); - - encodeULEB128(TotalFunctions, getStream()); - uint32_t Index = 0; - for (const WasmImport &Import : Imports) { - if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { - encodeULEB128(Index, getStream()); - encodeULEB128(Import.FieldName.size(), getStream()); - writeBytes(Import.FieldName); - ++Index; - } - } - for (const WasmFunction &Func : Functions) { - encodeULEB128(Index, getStream()); - encodeULEB128(Func.Sym->getName().size(), getStream()); - writeBytes(Func.Sym->getName()); - ++Index; - } - - endSection(SubSection); - endSection(Section); - } - - // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md - // for descriptions of the reloc sections. - - // === Code Reloc Section ==================================================== - if (!CodeRelocations.empty()) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE"); - - encodeULEB128(wasm::WASM_SEC_CODE, getStream()); - - encodeULEB128(CodeRelocations.size(), getStream()); - - WriteRelocations(CodeRelocations, getStream(), SymbolIndices); - WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream()); - - endSection(Section); - } - - // === Data Reloc Section ==================================================== - if (!DataRelocations.empty()) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA"); - - encodeULEB128(wasm::WASM_SEC_DATA, getStream()); - - encodeULEB128(DataRelocations.size(), getStream()); - - WriteRelocations(DataRelocations, getStream(), SymbolIndices); - - endSection(Section); - } - - // === Linking Metadata Section ============================================== - if (HasStackPointer) { - startSection(Section, wasm::WASM_SEC_CUSTOM, "linking"); - - encodeULEB128(1, getStream()); // count - - encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type - encodeULEB128(StackPointerGlobal, getStream()); // id - - endSection(Section); - } + writeTypeSection(FunctionTypes); + writeImportSection(Imports); + writeFunctionSection(Functions); + writeTableSection(TableElems.size()); + writeMemorySection(DataBytes); + writeGlobalSection(Globals); + writeExportSection(Exports); + // TODO: Start Section + writeElemSection(TableElems); + writeCodeSection(Asm, Layout, Functions); + uint64_t DataSectionHeaderSize = writeDataSection(DataBytes); + writeNameSection(Functions, Imports, NumFuncImports); + writeCodeRelocSection(); + writeDataRelocSection(DataSectionHeaderSize); + writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal); // TODO: Translate the .comment section to the output. - // TODO: Translate debug sections to the output. } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index da8fe73f823bff7a222a113b4df44ad2818a5126..53dee3e8b9f367db7176ee2fd359bb7e2027d699 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -12,11 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -32,12 +33,12 @@ #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/JamCRC.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -54,7 +55,7 @@ using llvm::support::endian::write32le; namespace { -typedef SmallString name; +using name = SmallString; enum AuxiliaryType { ATFunctionDefinition, @@ -75,7 +76,7 @@ class COFFSymbol { public: COFF::symbol Data = {}; - typedef SmallVector AuxiliarySymbols; + using AuxiliarySymbols = SmallVector; name Name; int Index; @@ -107,7 +108,7 @@ struct COFFRelocation { static size_t size() { return COFF::RelocationSize; } }; -typedef std::vector relocations; +using relocations = std::vector; class COFFSection { public: @@ -124,11 +125,11 @@ public: class WinCOFFObjectWriter : public MCObjectWriter { public: - typedef std::vector> symbols; - typedef std::vector> sections; + using symbols = std::vector>; + using sections = std::vector>; - typedef DenseMap symbol_map; - typedef DenseMap section_map; + using symbol_map = DenseMap; + using section_map = DenseMap; std::unique_ptr TargetObjectWriter; diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index c26d87f36f83d6edaa4f89b8347edad923976a76..b4d0d7a87f1d368913a41bbf669738085cc03ec7 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -28,11 +29,10 @@ #include "llvm/MC/MCSymbolCOFF.h" #include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index f2021f796d1250258d077405d9f3c644d1c5a229..977cccc11dcdaac614f2933f010a1822015b5046 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -1,4 +1,4 @@ -//===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===// +//===- Archive.cpp - ar File Format implementation ------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,11 +12,28 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/Archive.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Chrono.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace object; @@ -25,7 +42,7 @@ using namespace llvm::support::endian; static const char *const Magic = "!\n"; static const char *const ThinMagic = "!\n"; -void Archive::anchor() { } +void Archive::anchor() {} static Error malformedError(Twine Msg) { @@ -61,8 +78,8 @@ ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, if (Err) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator, - sizeof(ArMemHdr->Terminator))); + OS.write_escaped(StringRef(ArMemHdr->Terminator, + sizeof(ArMemHdr->Terminator))); OS.flush(); std::string Msg("terminator characters in archive member \"" + Buf + "\" not the correct \"`\\n\" values for the archive " @@ -97,13 +114,13 @@ Expected ArchiveMemberHeader::getRawName() const { EndCond = ' '; else EndCond = '/'; - llvm::StringRef::size_type end = - llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); - if (end == llvm::StringRef::npos) + StringRef::size_type end = + StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); + if (end == StringRef::npos) end = sizeof(ArMemHdr->Name); assert(end <= sizeof(ArMemHdr->Name) && end > 0); // Don't include the EndCond if there is one. - return llvm::StringRef(ArMemHdr->Name, end); + return StringRef(ArMemHdr->Name, end); } // This gets the name looking up long names. Size is the size of the archive @@ -205,12 +222,12 @@ Expected ArchiveMemberHeader::getName(uint64_t Size) const { Expected ArchiveMemberHeader::getSize() const { uint32_t Ret; - if (llvm::StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + if (StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast(ArMemHdr) - Parent->getData().data(); @@ -227,8 +244,8 @@ Expected ArchiveMemberHeader::getAccessMode() const { sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode, - sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->AccessMode, + sizeof(ArMemHdr->AccessMode)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast(ArMemHdr) - Parent->getData().data(); @@ -247,8 +264,8 @@ ArchiveMemberHeader::getLastModified() const { .getAsInteger(10, Seconds)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->LastModified, + sizeof(ArMemHdr->LastModified)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast(ArMemHdr) - Parent->getData().data(); diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index 5b233aab2018a7a3f365aefe9addd80b5089103a..4034f9039ddaab728684c73246e4103431876c19 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/ArchiveWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" #include "llvm/Object/ObjectFile.h" @@ -35,7 +36,8 @@ using namespace llvm; NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) - : Buf(MemoryBuffer::getMemBuffer(BufRef, false)) {} + : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), + MemberName(BufRef.getBufferIdentifier()) {} Expected NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, @@ -47,6 +49,7 @@ NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, NewArchiveMember M; assert(M.IsNew == false); M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { auto ModTimeOrErr = OldMember.getLastModified(); if (!ModTimeOrErr) @@ -96,6 +99,7 @@ Expected NewArchiveMember::getFile(StringRef FileName, NewArchiveMember M; M.IsNew = true; M.Buf = std::move(*MemberBufferOrErr); + M.MemberName = M.Buf->getBufferIdentifier(); if (!Deterministic) { M.ModTime = std::chrono::time_point_cast( Status.getLastModificationTime()); @@ -184,7 +188,7 @@ printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name, } static bool useStringTable(bool Thin, StringRef Name) { - return Thin || Name.size() >= 16; + return Thin || Name.size() >= 16 || Name.contains('/'); } static void @@ -238,7 +242,7 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, unsigned StartOffset = 0; for (const NewArchiveMember &M : Members) { StringRef Path = M.Buf->getBufferIdentifier(); - StringRef Name = sys::path::filename(Path); + StringRef Name = M.MemberName; if (!useStringTable(Thin, Name)) continue; if (StartOffset == 0) { @@ -290,7 +294,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef(); Expected> ObjOrErr = object::SymbolicFile::createSymbolicFile( - MemberBuffer, sys::fs::file_magic::unknown, &Context); + MemberBuffer, llvm::file_magic::unknown, &Context); if (!ObjOrErr) { // FIXME: check only for "not an object file" errors. consumeError(ObjOrErr.takeError()); @@ -422,9 +426,8 @@ llvm::writeArchive(StringRef ArcName, if (Kind == object::Archive::K_DARWIN) Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8); - printMemberHeader(Out, Kind, Thin, - sys::path::filename(M.Buf->getBufferIdentifier()), - StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms, + printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter, + M.ModTime, M.UID, M.GID, M.Perms, M.Buf->getBufferSize() + Padding); if (!Thin) diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp index 8467d349cd9599608d6ddb99cad9aee96c49dc84..c4565db459e6413cc1c2528181f0ae143009c88c 100644 --- a/lib/Object/Binary.cpp +++ b/lib/Object/Binary.cpp @@ -1,4 +1,4 @@ -//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===// +//===- Binary.cpp - A generic binary file ---------------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,19 +13,25 @@ #include "llvm/Object/Binary.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" - -// Include headers for createBinary. +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/Error.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/WindowsResource.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include +#include using namespace llvm; using namespace object; -Binary::~Binary() {} +Binary::~Binary() = default; Binary::Binary(unsigned int Type, MemoryBufferRef Source) : TypeID(Type), Data(Source) {} @@ -38,40 +44,41 @@ MemoryBufferRef Binary::getMemoryBufferRef() const { return Data; } Expected> object::createBinary(MemoryBufferRef Buffer, LLVMContext *Context) { - sys::fs::file_magic Type = sys::fs::identify_magic(Buffer.getBuffer()); + file_magic Type = identify_magic(Buffer.getBuffer()); switch (Type) { - case sys::fs::file_magic::archive: - return Archive::create(Buffer); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::wasm_object: - return ObjectFile::createSymbolicFile(Buffer, Type, Context); - case sys::fs::file_magic::macho_universal_binary: - return MachOUniversalBinary::create(Buffer); - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::windows_resource: - // Unrecognized object file format. - return errorCodeToError(object_error::invalid_file_type); + case file_magic::archive: + return Archive::create(Buffer); + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: + case file_magic::bitcode: + case file_magic::wasm_object: + return ObjectFile::createSymbolicFile(Buffer, Type, Context); + case file_magic::macho_universal_binary: + return MachOUniversalBinary::create(Buffer); + case file_magic::windows_resource: + return WindowsResource::createWindowsResource(Buffer); + case file_magic::unknown: + case file_magic::coff_cl_gl_object: + // Unrecognized object file format. + return errorCodeToError(object_error::invalid_file_type); } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 2007f560c166da1d8fa709e28dca86c5ccfcf20e..1d08a9efd8b3f8257a97a39ffa28aa698a13cec6 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -2,6 +2,8 @@ add_llvm_library(LLVMObject Archive.cpp ArchiveWriter.cpp Binary.cpp + COFFImportFile.cpp + COFFModuleDefinition.cpp COFFObjectFile.cpp Decompressor.cpp ELF.cpp @@ -11,7 +13,6 @@ add_llvm_library(LLVMObject IRSymtab.cpp MachOObjectFile.cpp MachOUniversal.cpp - ModuleSummaryIndexObjectFile.cpp ModuleSymbolTable.cpp Object.cpp ObjectFile.cpp @@ -19,6 +20,7 @@ add_llvm_library(LLVMObject SymbolicFile.cpp SymbolSize.cpp WasmObjectFile.cpp + WindowsResource.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Object diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp new file mode 100644 index 0000000000000000000000000000000000000000..740bf94d40e084381b635a7449a61def1a4b749e --- /dev/null +++ b/lib/Object/COFFImportFile.cpp @@ -0,0 +1,527 @@ +//===- COFFImportFile.cpp - COFF short import file implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the writeImportLibrary function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFImportFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ArchiveWriter.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" + +#include +#include +#include +#include +#include + +using namespace llvm::COFF; +using namespace llvm::object; +using namespace llvm; + +namespace llvm { +namespace object { + +static bool is32bit(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return false; + case IMAGE_FILE_MACHINE_ARMNT: + case IMAGE_FILE_MACHINE_I386: + return true; + } +} + +static uint16_t getImgRelRelocation(MachineTypes Machine) { + switch (Machine) { + default: + llvm_unreachable("unsupported machine"); + case IMAGE_FILE_MACHINE_AMD64: + return IMAGE_REL_AMD64_ADDR32NB; + case IMAGE_FILE_MACHINE_ARMNT: + return IMAGE_REL_ARM_ADDR32NB; + case IMAGE_FILE_MACHINE_I386: + return IMAGE_REL_I386_DIR32NB; + } +} + +template static void append(std::vector &B, const T &Data) { + size_t S = B.size(); + B.resize(S + sizeof(T)); + memcpy(&B[S], &Data, sizeof(T)); +} + +static void writeStringTable(std::vector &B, + ArrayRef Strings) { + // The COFF string table consists of a 4-byte value which is the size of the + // table, including the length field itself. This value is followed by the + // string content itself, which is an array of null-terminated C-style + // strings. The termination is important as they are referenced to by offset + // by the symbol entity in the file format. + + size_t Pos = B.size(); + size_t Offset = B.size(); + + // Skip over the length field, we will fill it in later as we will have + // computed the length while emitting the string content itself. + Pos += sizeof(uint32_t); + + for (const auto &S : Strings) { + B.resize(Pos + S.length() + 1); + strcpy(reinterpret_cast(&B[Pos]), S.c_str()); + Pos += S.length() + 1; + } + + // Backfill the length of the table now that it has been computed. + support::ulittle32_t Length(B.size() - Offset); + support::endian::write32le(&B[Offset], Length); +} + +static ImportNameType getNameType(StringRef Sym, StringRef ExtName, + MachineTypes Machine) { + if (Sym != ExtName) + return IMPORT_NAME_UNDECORATE; + if (Machine == IMAGE_FILE_MACHINE_I386 && Sym.startswith("_")) + return IMPORT_NAME_NOPREFIX; + return IMPORT_NAME; +} + +static Expected replace(StringRef S, StringRef From, + StringRef To) { + size_t Pos = S.find(From); + + // From and To may be mangled, but substrings in S may not. + if (Pos == StringRef::npos && From.startswith("_") && To.startswith("_")) { + From = From.substr(1); + To = To.substr(1); + Pos = S.find(From); + } + + if (Pos == StringRef::npos) { + return make_error( + StringRef(Twine(S + ": replacing '" + From + + "' with '" + To + "' failed").str()), object_error::parse_failed); + } + + return (Twine(S.substr(0, Pos)) + To + S.substr(Pos + From.size())).str(); +} + +static const std::string NullImportDescriptorSymbolName = + "__NULL_IMPORT_DESCRIPTOR"; + +namespace { +// This class constructs various small object files necessary to support linking +// symbols imported from a DLL. The contents are pretty strictly defined and +// nearly entirely static. The details of the structures files are defined in +// WINNT.h and the PE/COFF specification. +class ObjectFactory { + using u16 = support::ulittle16_t; + using u32 = support::ulittle32_t; + MachineTypes Machine; + BumpPtrAllocator Alloc; + StringRef DLLName; + StringRef Library; + std::string ImportDescriptorSymbolName; + std::string NullThunkSymbolName; + +public: + ObjectFactory(StringRef S, MachineTypes M) + : Machine(M), DLLName(S), Library(S.drop_back(4)), + ImportDescriptorSymbolName(("__IMPORT_DESCRIPTOR_" + Library).str()), + NullThunkSymbolName(("\x7f" + Library + "_NULL_THUNK_DATA").str()) {} + + // Creates an Import Descriptor. This is a small object file which contains a + // reference to the terminators and contains the library name (entry) for the + // import name table. It will force the linker to construct the necessary + // structure to import symbols from the DLL. + NewArchiveMember createImportDescriptor(std::vector &Buffer); + + // Creates a NULL import descriptor. This is a small object file whcih + // contains a NULL import descriptor. It is used to terminate the imports + // from a specific DLL. + NewArchiveMember createNullImportDescriptor(std::vector &Buffer); + + // Create a NULL Thunk Entry. This is a small object file which contains a + // NULL Import Address Table entry and a NULL Import Lookup Table Entry. It + // is used to terminate the IAT and ILT. + NewArchiveMember createNullThunk(std::vector &Buffer); + + // Create a short import file which is described in PE/COFF spec 7. Import + // Library Format. + NewArchiveMember createShortImport(StringRef Sym, uint16_t Ordinal, + ImportType Type, ImportNameType NameType); +}; +} // namespace + +NewArchiveMember +ObjectFactory::createImportDescriptor(std::vector &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 7; + static const uint32_t NumberOfRelocations = 3; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$2 + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation) + + // .idata$4 + (DLLName.size() + 1)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry)), + u32(0), + u16(NumberOfRelocations), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}, + u32(0), + u32(0), + u32(DLLName.size() + 1), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + sizeof(coff_import_directory_table_entry) + + NumberOfRelocations * sizeof(coff_relocation)), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$2 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + static const coff_relocation RelocationTable[NumberOfRelocations] = { + {u32(offsetof(coff_import_directory_table_entry, NameRVA)), u32(2), + u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportLookupTableRVA)), + u32(3), u16(getImgRelRelocation(Machine))}, + {u32(offsetof(coff_import_directory_table_entry, ImportAddressTableRVA)), + u32(4), u16(getImgRelRelocation(Machine))}, + }; + append(Buffer, RelocationTable); + + // .idata$6 + auto S = Buffer.size(); + Buffer.resize(S + DLLName.size() + 1); + memcpy(&Buffer[S], DLLName.data(), DLLName.size()); + Buffer[S + DLLName.size()] = '\0'; + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '2'}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '6'}}, + u32(0), + u16(2), + u16(0), + IMAGE_SYM_CLASS_STATIC, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_SECTION, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(0), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + // TODO: Name.Offset.Offset here and in the all similar places below + // suggests a names refactoring. Maybe StringTableOffset.Value? + SymbolTable[0].Name.Offset.Offset = + sizeof(uint32_t); + SymbolTable[5].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1; + SymbolTable[6].Name.Offset.Offset = + sizeof(uint32_t) + ImportDescriptorSymbolName.length() + 1 + + NullImportDescriptorSymbolName.length() + 1; + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, + {ImportDescriptorSymbolName, NullImportDescriptorSymbolName, + NullThunkSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember +ObjectFactory::createNullImportDescriptor(std::vector &Buffer) { + static const uint32_t NumberOfSections = 1; + static const uint32_t NumberOfSymbols = 1; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$3 + sizeof(coff_import_directory_table_entry)), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '3'}, + u32(0), + u32(0), + u32(sizeof(coff_import_directory_table_entry)), + u32(sizeof(coff_file_header) + + (NumberOfSections * sizeof(coff_section))), + u32(0), + u32(0), + u16(0), + u16(0), + u32(IMAGE_SCN_ALIGN_4BYTES | IMAGE_SCN_CNT_INITIALIZED_DATA | + IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$3 + static const coff_import_directory_table_entry ImportDescriptor{ + u32(0), u32(0), u32(0), u32(0), u32(0), + }; + append(Buffer, ImportDescriptor); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullImportDescriptorSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef(F, DLLName)}; +} + +NewArchiveMember ObjectFactory::createNullThunk(std::vector &Buffer) { + static const uint32_t NumberOfSections = 2; + static const uint32_t NumberOfSymbols = 1; + uint32_t VASize = is32bit(Machine) ? 4 : 8; + + // COFF Header + coff_file_header Header{ + u16(Machine), + u16(NumberOfSections), + u32(0), + u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section)) + + // .idata$5 + VASize + + // .idata$4 + VASize), + u32(NumberOfSymbols), + u16(0), + u16(is32bit(Machine) ? IMAGE_FILE_32BIT_MACHINE : 0), + }; + append(Buffer, Header); + + // Section Header Table + static const coff_section SectionTable[NumberOfSections] = { + {{'.', 'i', 'd', 'a', 't', 'a', '$', '5'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section)), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + {{'.', 'i', 'd', 'a', 't', 'a', '$', '4'}, + u32(0), + u32(0), + u32(VASize), + u32(sizeof(coff_file_header) + NumberOfSections * sizeof(coff_section) + + VASize), + u32(0), + u32(0), + u16(0), + u16(0), + u32((is32bit(Machine) ? IMAGE_SCN_ALIGN_4BYTES + : IMAGE_SCN_ALIGN_8BYTES) | + IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE)}, + }; + append(Buffer, SectionTable); + + // .idata$5, ILT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // .idata$4, IAT + append(Buffer, u32(0)); + if (!is32bit(Machine)) + append(Buffer, u32(0)); + + // Symbol Table + coff_symbol16 SymbolTable[NumberOfSymbols] = { + {{{0, 0, 0, 0, 0, 0, 0, 0}}, + u32(0), + u16(1), + u16(0), + IMAGE_SYM_CLASS_EXTERNAL, + 0}, + }; + SymbolTable[0].Name.Offset.Offset = sizeof(uint32_t); + append(Buffer, SymbolTable); + + // String Table + writeStringTable(Buffer, {NullThunkSymbolName}); + + StringRef F{reinterpret_cast(Buffer.data()), Buffer.size()}; + return {MemoryBufferRef{F, DLLName}}; +} + +NewArchiveMember ObjectFactory::createShortImport(StringRef Sym, + uint16_t Ordinal, + ImportType ImportType, + ImportNameType NameType) { + size_t ImpSize = DLLName.size() + Sym.size() + 2; // +2 for NULs + size_t Size = sizeof(coff_import_header) + ImpSize; + char *Buf = Alloc.Allocate(Size); + memset(Buf, 0, Size); + char *P = Buf; + + // Write short import library. + auto *Imp = reinterpret_cast(P); + P += sizeof(*Imp); + Imp->Sig2 = 0xFFFF; + Imp->Machine = Machine; + Imp->SizeOfData = ImpSize; + if (Ordinal > 0) + Imp->OrdinalHint = Ordinal; + Imp->TypeInfo = (NameType << 2) | ImportType; + + // Write symbol name and DLL name. + memcpy(P, Sym.data(), Sym.size()); + P += Sym.size() + 1; + memcpy(P, DLLName.data(), DLLName.size()); + + return {MemoryBufferRef(StringRef(Buf, Size), DLLName)}; +} + +std::error_code writeImportLibrary(StringRef DLLName, StringRef Path, + ArrayRef Exports, + MachineTypes Machine) { + + std::vector Members; + ObjectFactory OF(llvm::sys::path::filename(DLLName), Machine); + + std::vector ImportDescriptor; + Members.push_back(OF.createImportDescriptor(ImportDescriptor)); + + std::vector NullImportDescriptor; + Members.push_back(OF.createNullImportDescriptor(NullImportDescriptor)); + + std::vector NullThunk; + Members.push_back(OF.createNullThunk(NullThunk)); + + for (COFFShortExport E : Exports) { + if (E.Private) + continue; + + ImportType ImportType = IMPORT_CODE; + if (E.Data) + ImportType = IMPORT_DATA; + if (E.Constant) + ImportType = IMPORT_CONST; + + StringRef SymbolName = E.isWeak() ? E.ExtName : E.Name; + ImportNameType NameType = getNameType(SymbolName, E.Name, Machine); + Expected Name = E.ExtName.empty() + ? SymbolName + : replace(SymbolName, E.Name, E.ExtName); + + if (!Name) { + return errorToErrorCode(Name.takeError()); + } + + Members.push_back( + OF.createShortImport(*Name, E.Ordinal, ImportType, NameType)); + } + + std::pair Result = + writeArchive(Path, Members, /*WriteSymtab*/ true, object::Archive::K_GNU, + /*Deterministic*/ true, /*Thin*/ false); + + return Result.second; +} + +} // namespace object +} // namespace llvm diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0d69cb6b709c8103b47acb3eb0df219e1a4e22c4 --- /dev/null +++ b/lib/Object/COFFModuleDefinition.cpp @@ -0,0 +1,319 @@ +//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Windows-specific. +// A parser for the module-definition file (.def file). +// +// The format of module-definition files are described in this document: +// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFModuleDefinition.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::COFF; +using namespace llvm; + +namespace llvm { +namespace object { + +enum Kind { + Unknown, + Eof, + Identifier, + Comma, + Equal, + KwBase, + KwConstant, + KwData, + KwExports, + KwHeapsize, + KwLibrary, + KwName, + KwNoname, + KwPrivate, + KwStacksize, + KwVersion, +}; + +struct Token { + explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} + Kind K; + StringRef Value; +}; + +static bool isDecorated(StringRef Sym) { + return Sym.startswith("_") || Sym.startswith("@") || Sym.startswith("?"); +} + +static Error createError(const Twine &Err) { + return make_error(StringRef(Err.str()), + object_error::parse_failed); +} + +class Lexer { +public: + Lexer(StringRef S) : Buf(S) {} + + Token lex() { + Buf = Buf.trim(); + if (Buf.empty()) + return Token(Eof); + + switch (Buf[0]) { + case '\0': + return Token(Eof); + case ';': { + size_t End = Buf.find('\n'); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return lex(); + } + case '=': + Buf = Buf.drop_front(); + return Token(Equal, "="); + case ',': + Buf = Buf.drop_front(); + return Token(Comma, ","); + case '"': { + StringRef S; + std::tie(S, Buf) = Buf.substr(1).split('"'); + return Token(Identifier, S); + } + default: { + size_t End = Buf.find_first_of("=,\r\n \t\v"); + StringRef Word = Buf.substr(0, End); + Kind K = llvm::StringSwitch(Word) + .Case("BASE", KwBase) + .Case("CONSTANT", KwConstant) + .Case("DATA", KwData) + .Case("EXPORTS", KwExports) + .Case("HEAPSIZE", KwHeapsize) + .Case("LIBRARY", KwLibrary) + .Case("NAME", KwName) + .Case("NONAME", KwNoname) + .Case("PRIVATE", KwPrivate) + .Case("STACKSIZE", KwStacksize) + .Case("VERSION", KwVersion) + .Default(Identifier); + Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); + return Token(K, Word); + } + } + } + +private: + StringRef Buf; +}; + +class Parser { +public: + explicit Parser(StringRef S, MachineTypes M) : Lex(S), Machine(M) {} + + Expected parse() { + do { + if (Error Err = parseOne()) + return std::move(Err); + } while (Tok.K != Eof); + return Info; + } + +private: + void read() { + if (Stack.empty()) { + Tok = Lex.lex(); + return; + } + Tok = Stack.back(); + Stack.pop_back(); + } + + Error readAsInt(uint64_t *I) { + read(); + if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) + return createError("integer expected"); + return Error::success(); + } + + Error expect(Kind Expected, StringRef Msg) { + read(); + if (Tok.K != Expected) + return createError(Msg); + return Error::success(); + } + + void unget() { Stack.push_back(Tok); } + + Error parseOne() { + read(); + switch (Tok.K) { + case Eof: + return Error::success(); + case KwExports: + for (;;) { + read(); + if (Tok.K != Identifier) { + unget(); + return Error::success(); + } + if (Error Err = parseExport()) + return Err; + } + case KwHeapsize: + return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); + case KwStacksize: + return parseNumbers(&Info.StackReserve, &Info.StackCommit); + case KwLibrary: + case KwName: { + bool IsDll = Tok.K == KwLibrary; // Check before parseName. + std::string Name; + if (Error Err = parseName(&Name, &Info.ImageBase)) + return Err; + // Append the appropriate file extension if not already present. + StringRef Ext = IsDll ? ".dll" : ".exe"; + if (!StringRef(Name).endswith_lower(Ext)) + Name += Ext; + + // Set the output file, but don't override /out if it was already passed. + if (Info.OutputFile.empty()) + Info.OutputFile = Name; + return Error::success(); + } + case KwVersion: + return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); + default: + return createError("unknown directive: " + Tok.Value); + } + } + + Error parseExport() { + COFFShortExport E; + E.Name = Tok.Value; + read(); + if (Tok.K == Equal) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + E.ExtName = E.Name; + E.Name = Tok.Value; + } else { + unget(); + } + + if (Machine == IMAGE_FILE_MACHINE_I386) { + if (!isDecorated(E.Name)) + E.Name = (std::string("_").append(E.Name)); + if (!E.ExtName.empty() && !isDecorated(E.ExtName)) + E.ExtName = (std::string("_").append(E.ExtName)); + } + + for (;;) { + read(); + if (Tok.K == Identifier && Tok.Value[0] == '@') { + Tok.Value.drop_front().getAsInteger(10, E.Ordinal); + read(); + if (Tok.K == KwNoname) { + E.Noname = true; + } else { + unget(); + } + continue; + } + if (Tok.K == KwData) { + E.Data = true; + continue; + } + if (Tok.K == KwConstant) { + E.Constant = true; + continue; + } + if (Tok.K == KwPrivate) { + E.Private = true; + continue; + } + unget(); + Info.Exports.push_back(E); + return Error::success(); + } + } + + // HEAPSIZE/STACKSIZE reserve[,commit] + Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { + if (Error Err = readAsInt(Reserve)) + return Err; + read(); + if (Tok.K != Comma) { + unget(); + Commit = nullptr; + return Error::success(); + } + if (Error Err = readAsInt(Commit)) + return Err; + return Error::success(); + } + + // NAME outputPath [BASE=address] + Error parseName(std::string *Out, uint64_t *Baseaddr) { + read(); + if (Tok.K == Identifier) { + *Out = Tok.Value; + } else { + *Out = ""; + unget(); + return Error::success(); + } + read(); + if (Tok.K == KwBase) { + if (Error Err = expect(Equal, "'=' expected")) + return Err; + if (Error Err = readAsInt(Baseaddr)) + return Err; + } else { + unget(); + *Baseaddr = 0; + } + return Error::success(); + } + + // VERSION major[.minor] + Error parseVersion(uint32_t *Major, uint32_t *Minor) { + read(); + if (Tok.K != Identifier) + return createError("identifier expected, but got " + Tok.Value); + StringRef V1, V2; + std::tie(V1, V2) = Tok.Value.split('.'); + if (V1.getAsInteger(10, *Major)) + return createError("integer expected, but got " + Tok.Value); + if (V2.empty()) + *Minor = 0; + else if (V2.getAsInteger(10, *Minor)) + return createError("integer expected, but got " + Tok.Value); + return Error::success(); + } + + Lexer Lex; + Token Tok; + std::vector Stack; + MachineTypes Machine; + COFFModuleDefinition Info; +}; + +Expected parseCOFFModuleDefinition(MemoryBufferRef MB, + MachineTypes Machine) { + return Parser(MB.getBuffer(), Machine).parse(); +} + +} // namespace object +} // namespace llvm diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index a2d8f12449e6fec5a97431c1b6105474b828a210..579c8dde366a073bcd5d2d9eaa7d1e882dbf022e 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -1,4 +1,4 @@ -//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===// +//===- COFFObjectFile.cpp - COFF object file implementation ---------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,29 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/COFF.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include +#include +#include +#include #include +#include +#include using namespace llvm; using namespace object; @@ -116,7 +129,7 @@ const coff_symbol_type *COFFObjectFile::toSymb(DataRefImpl Ref) const { const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { const coff_section *Addr = reinterpret_cast(Ref.p); -# ifndef NDEBUG +#ifndef NDEBUG // Verify that the section points to a valid entry in the section table. if (Addr < SectionTable || Addr >= (SectionTable + getNumberOfSections())) report_fatal_error("Section was outside of section table."); @@ -124,7 +137,7 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Ref) const { uintptr_t Offset = uintptr_t(Addr) - uintptr_t(SectionTable); assert(Offset % sizeof(coff_section) == 0 && "Section did not point to the beginning of a section"); -# endif +#endif return Addr; } @@ -147,8 +160,7 @@ void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const { Expected COFFObjectFile::getSymbolName(DataRefImpl Ref) const { COFFSymbolRef Symb = getCOFFSymbol(Ref); StringRef Result; - std::error_code EC = getSymbolName(Symb, Result); - if (EC) + if (std::error_code EC = getSymbolName(Symb, Result)) return errorCodeToError(EC); return Result; } @@ -281,6 +293,10 @@ uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { return Result; } +uint64_t COFFObjectFile::getSectionIndex(DataRefImpl Sec) const { + return toSec(Sec) - SectionTable; +} + uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { return getSectionSize(toSec(Ref)); } @@ -985,7 +1001,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { if (Symbol.getNumberOfAuxSymbols() > 0) { // AUX data comes immediately after the symbol in COFF Aux = reinterpret_cast(Symbol.getRawPtr()) + SymbolSize; -# ifndef NDEBUG +#ifndef NDEBUG // Verify that the Aux symbol points to a valid entry in the symbol table. uintptr_t Offset = uintptr_t(Aux) - uintptr_t(base()); if (Offset < getPointerToSymbolTable() || @@ -995,7 +1011,7 @@ COFFObjectFile::getSymbolAuxData(COFFSymbolRef Symbol) const { assert((Offset - getPointerToSymbolTable()) % SymbolSize == 0 && "Aux Symbol data did not point to the beginning of a symbol"); -# endif +#endif } return makeArrayRef(Aux, Symbol.getNumberOfAuxSymbols() * SymbolSize); } @@ -1050,7 +1066,7 @@ COFFObjectFile::getSectionContents(const coff_section *Sec, // In COFF, a virtual section won't have any in-file // content, so the file pointer to the content will be zero. if (Sec->PointerToRawData == 0) - return object_error::parse_failed; + return std::error_code(); // The only thing that we need to verify is that the contents is contained // within the file bounds. We don't need to make sure it doesn't cover other // data, as there's nothing that says that is not allowed. @@ -1579,3 +1595,42 @@ std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { Result = Header->PageRVA + Entry[Index].getOffset(); return std::error_code(); } + +#define RETURN_IF_ERROR(X) \ + if (auto EC = errorToErrorCode(X)) \ + return EC; + +ErrorOr> ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { + BinaryStreamReader Reader = BinaryStreamReader(BBS); + Reader.setOffset(Offset); + uint16_t Length; + RETURN_IF_ERROR(Reader.readInteger(Length)); + ArrayRef RawDirString; + RETURN_IF_ERROR(Reader.readArray(RawDirString, Length)); + return RawDirString; +} + +ErrorOr> +ResourceSectionRef::getEntryNameString(const coff_resource_dir_entry &Entry) { + return getDirStringAtOffset(Entry.Identifier.getNameOffset()); +} + +ErrorOr +ResourceSectionRef::getTableAtOffset(uint32_t Offset) { + const coff_resource_dir_table *Table = nullptr; + + BinaryStreamReader Reader(BBS); + Reader.setOffset(Offset); + RETURN_IF_ERROR(Reader.readObject(Table)); + assert(Table != nullptr); + return *Table; +} + +ErrorOr +ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) { + return getTableAtOffset(Entry.Offset.value()); +} + +ErrorOr ResourceSectionRef::getBaseTable() { + return getTableAtOffset(0); +} diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp index 0be602b1fc1ab5beddce65a77c6280943847469a..53f084d7620e7419a28920b85cf76a95852e33a2 100644 --- a/lib/Object/Decompressor.cpp +++ b/lib/Object/Decompressor.cpp @@ -8,11 +8,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/Decompressor.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/Compression.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/ELF.h" using namespace llvm; using namespace llvm::support::endian; @@ -88,11 +88,6 @@ bool Decompressor::isCompressedELFSection(uint64_t Flags, StringRef Name) { return (Flags & ELF::SHF_COMPRESSED) || isGnuStyle(Name); } -Error Decompressor::decompress(SmallString<32> &Out) { - Out.resize(DecompressedSize); - return decompress({Out.data(), (size_t)DecompressedSize}); -} - Error Decompressor::decompress(MutableArrayRef Buffer) { size_t Size = Buffer.size(); return zlib::uncompress(SectionData, Buffer.data(), Size); diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 23682e1fabfd609b2f477f3116ba4f1cfbf554b8..448fb1bd6b561f2c2a6c3c39bd4fc633f91c4257 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -1,4 +1,4 @@ -//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===// +//===- ELF.cpp - ELF object file implementation ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,19 +8,23 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELF.h" +#include "llvm/BinaryFormat/ELF.h" -namespace llvm { -namespace object { +using namespace llvm; +using namespace object; -#define ELF_RELOC(name, value) \ - case ELF::name: \ - return #name; \ +#define STRINGIFY_ENUM_CASE(ns, name) \ + case ns::name: \ + return #name; -StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { +#define ELF_RELOC(name, value) STRINGIFY_ENUM_CASE(ELF, name) + +StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine, + uint32_t Type) { switch (Machine) { case ELF::EM_X86_64: switch (Type) { -#include "llvm/Support/ELFRelocs/x86_64.def" +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" default: break; } @@ -28,77 +32,77 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { case ELF::EM_386: case ELF::EM_IAMCU: switch (Type) { -#include "llvm/Support/ELFRelocs/i386.def" +#include "llvm/BinaryFormat/ELFRelocs/i386.def" default: break; } break; case ELF::EM_MIPS: switch (Type) { -#include "llvm/Support/ELFRelocs/Mips.def" +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" default: break; } break; case ELF::EM_AARCH64: switch (Type) { -#include "llvm/Support/ELFRelocs/AArch64.def" +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" default: break; } break; case ELF::EM_ARM: switch (Type) { -#include "llvm/Support/ELFRelocs/ARM.def" +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" default: break; } break; case ELF::EM_AVR: switch (Type) { -#include "llvm/Support/ELFRelocs/AVR.def" +#include "llvm/BinaryFormat/ELFRelocs/AVR.def" default: break; } break; case ELF::EM_HEXAGON: switch (Type) { -#include "llvm/Support/ELFRelocs/Hexagon.def" +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" default: break; } break; case ELF::EM_LANAI: switch (Type) { -#include "llvm/Support/ELFRelocs/Lanai.def" +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" default: break; } break; case ELF::EM_PPC: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC.def" default: break; } break; case ELF::EM_PPC64: switch (Type) { -#include "llvm/Support/ELFRelocs/PowerPC64.def" +#include "llvm/BinaryFormat/ELFRelocs/PowerPC64.def" default: break; } break; case ELF::EM_RISCV: switch (Type) { -#include "llvm/Support/ELFRelocs/RISCV.def" +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" default: break; } break; case ELF::EM_S390: switch (Type) { -#include "llvm/Support/ELFRelocs/SystemZ.def" +#include "llvm/BinaryFormat/ELFRelocs/SystemZ.def" default: break; } @@ -107,27 +111,27 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { case ELF::EM_SPARC32PLUS: case ELF::EM_SPARCV9: switch (Type) { -#include "llvm/Support/ELFRelocs/Sparc.def" +#include "llvm/BinaryFormat/ELFRelocs/Sparc.def" default: break; } break; case ELF::EM_WEBASSEMBLY: switch (Type) { -#include "llvm/Support/ELFRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/ELFRelocs/WebAssembly.def" default: break; } break; case ELF::EM_AMDGPU: switch (Type) { -#include "llvm/Support/ELFRelocs/AMDGPU.def" +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" default: break; } case ELF::EM_BPF: switch (Type) { -#include "llvm/Support/ELFRelocs/BPF.def" +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" default: break; } @@ -140,5 +144,61 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { #undef ELF_RELOC -} // end namespace object -} // end namespace llvm +StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { + switch (Machine) { + case ELF::EM_ARM: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_EXIDX); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_PREEMPTMAP); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_DEBUGOVERLAY); + STRINGIFY_ENUM_CASE(ELF, SHT_ARM_OVERLAYSECTION); + } + break; + case ELF::EM_HEXAGON: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_HEX_ORDERED); } + break; + case ELF::EM_X86_64: + switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_X86_64_UNWIND); } + break; + case ELF::EM_MIPS: + case ELF::EM_MIPS_RS3_LE: + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_REGINFO); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_OPTIONS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS); + STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_DWARF); + } + break; + default: + break; + } + + switch (Type) { + STRINGIFY_ENUM_CASE(ELF, SHT_NULL); + STRINGIFY_ENUM_CASE(ELF, SHT_PROGBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_STRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_RELA); + STRINGIFY_ENUM_CASE(ELF, SHT_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNAMIC); + STRINGIFY_ENUM_CASE(ELF, SHT_NOTE); + STRINGIFY_ENUM_CASE(ELF, SHT_NOBITS); + STRINGIFY_ENUM_CASE(ELF, SHT_REL); + STRINGIFY_ENUM_CASE(ELF, SHT_SHLIB); + STRINGIFY_ENUM_CASE(ELF, SHT_DYNSYM); + STRINGIFY_ENUM_CASE(ELF, SHT_INIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_FINI_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_PREINIT_ARRAY); + STRINGIFY_ENUM_CASE(ELF, SHT_GROUP); + STRINGIFY_ENUM_CASE(ELF, SHT_SYMTAB_SHNDX); + STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_ODRTAB); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verneed); + STRINGIFY_ENUM_CASE(ELF, SHT_GNU_versym); + default: + return "Unknown"; + } +} diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 3f8c81c8e91105921857ffff33026b30eceecb4b..fa136d782b5aa4e41b564a5dbbf6082786e79008 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===// +//===- ELFObjectFile.cpp - ELF object file implementation -----------------===// // // The LLVM Compiler Infrastructure // @@ -12,11 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Object/Error.h" #include "llvm/Support/ARMAttributeParser.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include +#include +#include +#include +#include +#include +#include -namespace llvm { +using namespace llvm; using namespace object; ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) @@ -299,5 +314,3 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const { TheTriple.setArchName(Triple); } - -} // end namespace llvm diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index adbf0de6d1bc420458dfd694329f335c423bc50d..e7807b0383351c182555f608fa659796c09cb9ff 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/IRObjectFile.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/LLVMContext.h" @@ -95,13 +96,13 @@ ErrorOr IRObjectFile::findBitcodeInObject(const ObjectFile &Obj } ErrorOr IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { - sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); + file_magic Type = identify_magic(Object.getBuffer()); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: return Object; - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected> ObjFile = ObjectFile::createObjectFile(Object, Type); if (!ObjFile) @@ -138,3 +139,25 @@ IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { return std::unique_ptr( new IRObjectFile(*BCOrErr, std::move(Mods))); } + +Expected object::readIRSymtab(MemoryBufferRef MBRef) { + IRSymtabFile F; + ErrorOr BCOrErr = + IRObjectFile::findBitcodeInMemBuffer(MBRef); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected BFCOrErr = getBitcodeFileContents(*BCOrErr); + if (!BFCOrErr) + return BFCOrErr.takeError(); + + Expected FCOrErr = irsymtab::readBitcode(*BFCOrErr); + if (!FCOrErr) + return FCOrErr.takeError(); + + F.Mods = std::move(BFCOrErr->Mods); + F.Symtab = std::move(FCOrErr->Symtab); + F.Strtab = std::move(FCOrErr->Strtab); + F.TheReader = std::move(FCOrErr->TheReader); + return std::move(F); +} diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp index 1ef2c4bb730325cbd07b652bd9a52d8f26111fa2..a6cd5dda12d3e0ba16028270db758004916a97ad 100644 --- a/lib/Object/IRSymtab.cpp +++ b/lib/Object/IRSymtab.cpp @@ -1,4 +1,4 @@ -//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===// +//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===// // // The LLVM Compiler Infrastructure // @@ -8,13 +8,35 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/IRSymtab.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/MC/StringTableBuilder.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include using namespace llvm; using namespace irsymtab; @@ -25,17 +47,16 @@ namespace { struct Builder { SmallVector &Symtab; SmallVector &Strtab; + Builder(SmallVector &Symtab, SmallVector &Strtab) : Symtab(Symtab), Strtab(Strtab) {} - StringTableBuilder StrtabBuilder{StringTableBuilder::ELF}; + StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; BumpPtrAllocator Alloc; StringSaver Saver{Alloc}; DenseMap ComdatMap; - ModuleSymbolTable Msymtab; - SmallPtrSet Used; Mangler Mang; Triple TT; @@ -49,7 +70,9 @@ struct Builder { void setStr(storage::Str &S, StringRef Value) { S.Offset = StrtabBuilder.add(Value); + S.Size = Value.size(); } + template void writeRange(storage::Range &R, const std::vector &Objs) { R.Offset = Symtab.size(); @@ -59,45 +82,60 @@ struct Builder { } Error addModule(Module *M); - Error addSymbol(ModuleSymbolTable::Symbol Sym); + Error addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet &Used, + ModuleSymbolTable::Symbol Sym); Error build(ArrayRef Mods); }; Error Builder::addModule(Module *M) { + if (M->getDataLayoutStr().empty()) + return make_error("input module has no datalayout", + inconvertibleErrorCode()); + + SmallPtrSet Used; collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); - storage::Module Mod; - Mod.Begin = Msymtab.symbols().size(); + ModuleSymbolTable Msymtab; Msymtab.addModule(M); - Mod.End = Msymtab.symbols().size(); + + storage::Module Mod; + Mod.Begin = Syms.size(); + Mod.End = Syms.size() + Msymtab.symbols().size(); + Mod.UncBegin = Uncommons.size(); Mods.push_back(Mod); if (TT.isOSBinFormatCOFF()) { if (auto E = M->materializeMetadata()) return E; - if (Metadata *Val = M->getModuleFlag("Linker Options")) { - MDNode *LinkerOptions = cast(Val); - for (const MDOperand &MDOptions : LinkerOptions->operands()) + if (NamedMDNode *LinkerOptions = + M->getNamedMetadata("llvm.linker.options")) { + for (MDNode *MDOptions : LinkerOptions->operands()) for (const MDOperand &MDOption : cast(MDOptions)->operands()) COFFLinkerOptsOS << " " << cast(MDOption)->getString(); } } + for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) + if (Error Err = addSymbol(Msymtab, Used, Msym)) + return Err; + return Error::success(); } -Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) { +Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, + const SmallPtrSet &Used, + ModuleSymbolTable::Symbol Msym) { Syms.emplace_back(); storage::Symbol &Sym = Syms.back(); Sym = {}; - Sym.UncommonIndex = -1; storage::Uncommon *Unc = nullptr; auto Uncommon = [&]() -> storage::Uncommon & { if (Unc) return *Unc; - Sym.UncommonIndex = Uncommons.size(); + Sym.Flags |= 1 << storage::Symbol::FB_has_uncommon; Uncommons.emplace_back(); Unc = &Uncommons.back(); *Unc = {}; @@ -125,10 +163,15 @@ Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) { Sym.Flags |= 1 << storage::Symbol::FB_global; if (Flags & object::BasicSymbolRef::SF_FormatSpecific) Sym.Flags |= 1 << storage::Symbol::FB_format_specific; + if (Flags & object::BasicSymbolRef::SF_Executable) + Sym.Flags |= 1 << storage::Symbol::FB_executable; Sym.ComdatIndex = -1; auto *GV = Msym.dyn_cast(); if (!GV) { + // Undefined module asm symbols act as GC roots and are implicitly used. + if (Flags & object::BasicSymbolRef::SF_Undefined) + Sym.Flags |= 1 << storage::Symbol::FB_used; setStr(Sym.IRName, ""); return Error::success(); } @@ -188,18 +231,14 @@ Error Builder::build(ArrayRef IRMods) { storage::Header Hdr; assert(!IRMods.empty()); + setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple()); setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); TT = Triple(IRMods[0]->getTargetTriple()); - // This adds the symbols for each module to Msymtab. for (auto *M : IRMods) if (Error Err = addModule(M)) return Err; - for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols()) - if (Error Err = addSymbol(Msym)) - return Err; - COFFLinkerOptsOS.flush(); setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); @@ -220,9 +259,46 @@ Error Builder::build(ArrayRef IRMods) { return Error::success(); } -} // anonymous namespace +} // end anonymous namespace Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, SmallVector &Strtab) { return Builder(Symtab, Strtab).build(Mods); } + +// Upgrade a vector of bitcode modules created by an old version of LLVM by +// creating an irsymtab for them in the current format. +static Expected upgrade(ArrayRef BMs) { + FileContents FC; + + LLVMContext Ctx; + std::vector Mods; + std::vector> OwnedMods; + for (auto BM : BMs) { + Expected> MOrErr = + BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true, + /*IsImporting*/ false); + if (!MOrErr) + return MOrErr.takeError(); + + Mods.push_back(MOrErr->get()); + OwnedMods.push_back(std::move(*MOrErr)); + } + + if (Error E = build(Mods, FC.Symtab, FC.Strtab)) + return std::move(E); + + FC.TheReader = {{FC.Symtab.data(), FC.Symtab.size()}, + {FC.Strtab.data(), FC.Strtab.size()}}; + return std::move(FC); +} + +Expected irsymtab::readBitcode(const BitcodeFileContents &BFC) { + if (BFC.Mods.empty()) + return make_error("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + // Right now we have no on-disk representation of symbol tables, so we always + // upgrade. + return upgrade(BFC.Mods); +} diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index bae578c76f7e89655f77d1d1a676112e486e8fe6..687713bab6a2903caa860758c23fede1d382b520 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Object parent = Libraries -required_libraries = BitReader Core MC MCParser Support +required_libraries = BitReader Core MC BinaryFormat MCParser Support diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 1753d2baaedd2181c72370f1eb6876b4fef164a8..7804bbe06f83ecbd0f02ae201db86e5cbcef021b 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1,4 +1,4 @@ -//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,32 +12,52 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/MachO.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/Error.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/SymbolicFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" -#include +#include +#include +#include +#include #include #include #include +#include +#include +#include using namespace llvm; using namespace object; namespace { + struct section_base { char sectname[16]; char segname[16]; }; -} + +} // end anonymous namespace static Error malformedError(Twine Msg) { @@ -1144,11 +1164,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64bits, Error &Err, uint32_t UniversalCputype, uint32_t UniversalIndex) - : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), - SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), - DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr), - DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr), - HasPageZeroSegment(false) { + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) { ErrorAsOutParameter ErrAsOutParam(&Err); uint64_t SizeOfHeaders; uint32_t cputype; @@ -1804,6 +1820,10 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec).addr; } +uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { // In the case if a malformed Mach-O file where the section offset is past // the end of the file or some part of the section size is past the end of @@ -2343,11 +2363,11 @@ StringRef MachOObjectFile::getFileFormatName() const { unsigned CPUType = getCPUType(*this); if (!is64Bit()) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return "Mach-O 32-bit i386"; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return "Mach-O arm"; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return "Mach-O 32-bit ppc"; default: return "Mach-O 32-bit unknown"; @@ -2355,11 +2375,11 @@ StringRef MachOObjectFile::getFileFormatName() const { } switch (CPUType) { - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return "Mach-O 64-bit x86-64"; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return "Mach-O arm64"; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return "Mach-O 64-bit ppc64"; default: return "Mach-O 64-bit unknown"; @@ -2368,17 +2388,17 @@ StringRef MachOObjectFile::getFileFormatName() const { Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { switch (CPUType) { - case llvm::MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_I386: return Triple::x86; - case llvm::MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_X86_64: return Triple::x86_64; - case llvm::MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM: return Triple::arm; - case llvm::MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64: return Triple::aarch64; - case llvm::MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC: return Triple::ppc; - case llvm::MachO::CPU_TYPE_POWERPC64: + case MachO::CPU_TYPE_POWERPC64: return Triple::ppc64; default: return Triple::UnknownArch; @@ -2571,8 +2591,7 @@ dice_iterator MachOObjectFile::end_dices() const { return dice_iterator(DiceRef(DRI, this)); } -ExportEntry::ExportEntry(ArrayRef T) - : Trie(T), Malformed(false), Done(false) {} +ExportEntry::ExportEntry(ArrayRef T) : Trie(T) {} void ExportEntry::moveToFirst() { pushNode(0); @@ -2641,9 +2660,7 @@ uint32_t ExportEntry::nodeOffset() const { } ExportEntry::NodeState::NodeState(const uint8_t *Ptr) - : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), - ImportName(nullptr), ChildCount(0), NextChildIndex(0), - ParentStringLength(0), IsExportNode(false) {} + : Start(Ptr), Current(Ptr) {} void ExportEntry::pushNode(uint64_t offset) { const uint8_t *Ptr = Trie.begin() + offset; @@ -2733,7 +2750,7 @@ void ExportEntry::moveNext() { iterator_range MachOObjectFile::exports(ArrayRef Trie) { ExportEntry Start(Trie); - if (Trie.size() == 0) + if (Trie.empty()) Start.moveToEnd(); else Start.moveToFirst(); @@ -2750,9 +2767,8 @@ iterator_range MachOObjectFile::exports() const { MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O, ArrayRef Bytes, bool is64Bit) - : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), - SegmentIndex(-1), RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), - PointerSize(is64Bit ? 8 : 4), Done(false) {} + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4) {} void MachORebaseEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -2794,7 +2810,7 @@ void MachORebaseEntry::moveNext() { More = false; Done = true; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n"); break; case MachO::REBASE_OPCODE_SET_TYPE_IMM: RebaseType = ImmValue; @@ -2807,8 +2823,8 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " - << "RebaseType=" << (int) RebaseType << "\n"); + dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); break; case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; @@ -2831,10 +2847,10 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: SegmentOffset += readULEB128(&error); @@ -2855,9 +2871,9 @@ void MachORebaseEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2881,9 +2897,9 @@ void MachORebaseEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2913,11 +2929,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2954,11 +2970,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -2992,11 +3008,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, @@ -3041,11 +3057,11 @@ void MachORebaseEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-rebase", - llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: *E = malformedError("bad rebase info (bad opcode value 0x" + @@ -3131,10 +3147,8 @@ iterator_range MachOObjectFile::rebaseTable(Error &Err) { MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O, ArrayRef Bytes, bool is64Bit, Kind BK) - : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), - SegmentIndex(-1), LibraryOrdinalSet(false), Ordinal(0), Flags(0), - Addend(0), RemainingLoopCount(0), AdvanceAmount(0), BindType(0), - PointerSize(is64Bit ? 8 : 4), TableKind(BK), Done(false) {} + : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), + PointerSize(is64Bit ? 8 : 4), TableKind(BK) {} void MachOBindEntry::moveToFirst() { Ptr = Opcodes.begin(); @@ -3189,7 +3203,7 @@ void MachOBindEntry::moveNext() { } More = false; moveToEnd(); - DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n"); + DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: if (TableKind == Kind::Weak) { @@ -3211,8 +3225,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: if (TableKind == Kind::Weak) { @@ -3241,8 +3255,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: if (TableKind == Kind::Weak) { @@ -3255,7 +3269,6 @@ void MachOBindEntry::moveNext() { if (ImmValue) { SignExtended = MachO::BIND_OPCODE_MASK | ImmValue; Ordinal = SignExtended; - LibraryOrdinalSet = true; if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) { *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown " "special ordinal: " + Twine((int)Ordinal) + " for opcode at: " @@ -3265,10 +3278,11 @@ void MachOBindEntry::moveNext() { } } else Ordinal = 0; + LibraryOrdinalSet = true; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " - << "Ordinal=" << Ordinal << "\n"); + dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: " + << "Ordinal=" << Ordinal << "\n"); break; case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: Flags = ImmValue; @@ -3288,8 +3302,8 @@ void MachOBindEntry::moveNext() { ++Ptr; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " - << "SymbolName=" << SymbolName << "\n"); + dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: " + << "SymbolName=" << SymbolName << "\n"); if (TableKind == Kind::Weak) { if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION) return; @@ -3306,8 +3320,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " - << "BindType=" << (int)BindType << "\n"); + dbgs() << "BIND_OPCODE_SET_TYPE_IMM: " + << "BindType=" << (int)BindType << "\n"); break; case MachO::BIND_OPCODE_SET_ADDEND_SLEB: Addend = readSLEB128(&error); @@ -3320,8 +3334,8 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " - << "Addend=" << Addend << "\n"); + dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: " + << "Addend=" << Addend << "\n"); break; case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: SegmentIndex = ImmValue; @@ -3343,10 +3357,10 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " - << "SegmentIndex=" << SegmentIndex << ", " - << format("SegmentOffset=0x%06X", SegmentOffset) - << "\n"); + dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); break; case MachO::BIND_OPCODE_ADD_ADDR_ULEB: SegmentOffset += readULEB128(&error); @@ -3366,9 +3380,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); break; case MachO::BIND_OPCODE_DO_BIND: AdvanceAmount = PointerSize; @@ -3395,9 +3409,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: if (TableKind == Kind::Lazy) { @@ -3452,11 +3466,11 @@ void MachOBindEntry::moveNext() { RemainingLoopCount = 0; DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: if (TableKind == Kind::Lazy) { @@ -3501,10 +3515,9 @@ void MachOBindEntry::moveNext() { return; } DEBUG_WITH_TYPE("mach-o-bind", - llvm::dbgs() + dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: " - << format("SegmentOffset=0x%06X", - SegmentOffset) << "\n"); + << format("SegmentOffset=0x%06X", SegmentOffset) << "\n"); return; case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: if (TableKind == Kind::Lazy) { @@ -3568,11 +3581,11 @@ void MachOBindEntry::moveNext() { } DEBUG_WITH_TYPE( "mach-o-bind", - llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " - << format("SegmentOffset=0x%06X", SegmentOffset) - << ", AdvanceAmount=" << AdvanceAmount - << ", RemainingLoopCount=" << RemainingLoopCount - << "\n"); + dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); return; default: *E = malformedError("bad bind info (bad opcode value 0x" + @@ -4301,3 +4314,9 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer, return make_error("Unrecognized MachO magic number", object_error::invalid_file_type); } + +StringRef MachOObjectFile::mapDebugSectionName(StringRef Name) const { + return StringSwitch(Name) + .Case("debug_str_offs", "debug_str_offsets") + .Default(Name); +} diff --git a/lib/Object/ModuleSummaryIndexObjectFile.cpp b/lib/Object/ModuleSummaryIndexObjectFile.cpp deleted file mode 100644 index de1ddab88fd4017859adb90b8b8c6e00baa5e09c..0000000000000000000000000000000000000000 --- a/lib/Object/ModuleSummaryIndexObjectFile.cpp +++ /dev/null @@ -1,120 +0,0 @@ -//===- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation ==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Part of the ModuleSummaryIndexObjectFile class implementation. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/ModuleSummaryIndex.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; -using namespace object; - -static llvm::cl::opt IgnoreEmptyThinLTOIndexFile( - "ignore-empty-index-file", llvm::cl::ZeroOrMore, - llvm::cl::desc( - "Ignore an empty index file and perform non-ThinLTO compilation"), - llvm::cl::init(false)); - -ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile( - MemoryBufferRef Object, std::unique_ptr I) - : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) { -} - -ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() {} - -std::unique_ptr ModuleSummaryIndexObjectFile::takeIndex() { - return std::move(Index); -} - -ErrorOr -ModuleSummaryIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) { - for (const SectionRef &Sec : Obj.sections()) { - if (Sec.isBitcode()) { - StringRef SecContents; - if (std::error_code EC = Sec.getContents(SecContents)) - return EC; - return MemoryBufferRef(SecContents, Obj.getFileName()); - } - } - - return object_error::bitcode_section_not_found; -} - -ErrorOr -ModuleSummaryIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { - sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer()); - switch (Type) { - case sys::fs::file_magic::bitcode: - return Object; - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { - Expected> ObjFile = - ObjectFile::createObjectFile(Object, Type); - if (!ObjFile) - return errorToErrorCode(ObjFile.takeError()); - return findBitcodeInObject(*ObjFile->get()); - } - default: - return object_error::invalid_file_type; - } -} - -// Parse module summary index in the given memory buffer. -// Return new ModuleSummaryIndexObjectFile instance containing parsed -// module summary/index. -Expected> -ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) { - ErrorOr BCOrErr = findBitcodeInMemBuffer(Object); - if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); - - Expected> IOrErr = - getModuleSummaryIndex(BCOrErr.get()); - - if (!IOrErr) - return IOrErr.takeError(); - - std::unique_ptr Index = std::move(IOrErr.get()); - return llvm::make_unique(Object, - std::move(Index)); -} - -// Parse the module summary index out of an IR file and return the summary -// index object if found, or nullptr if not. -Expected> -llvm::getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier) { - ErrorOr> FileOrErr = - MemoryBuffer::getFileOrSTDIN(Path); - std::error_code EC = FileOrErr.getError(); - if (EC) - return errorCodeToError(EC); - std::unique_ptr MemBuffer = std::move(FileOrErr.get()); - // If Identifier is non-empty, use it as the buffer identifier, which - // will become the module path in the index. - if (Identifier.empty()) - Identifier = MemBuffer->getBufferIdentifier(); - MemoryBufferRef BufferRef(MemBuffer->getBuffer(), Identifier); - if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize()) - return nullptr; - Expected> ObjOrErr = - object::ModuleSummaryIndexObjectFile::create(BufferRef); - if (!ObjOrErr) - return ObjOrErr.takeError(); - - object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr; - return Obj.takeIndex(); -} diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp index 9a935d8e08699dff37691ce50dcb82051b6091e6..f2e7a218c13a1a089c7e21fd149287439bfa2471 100644 --- a/lib/Object/ModuleSymbolTable.cpp +++ b/lib/Object/ModuleSymbolTable.cpp @@ -1,4 +1,4 @@ -//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===// +//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===// // // The LLVM Compiler Infrastructure // @@ -13,27 +13,45 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ModuleSymbolTable.h" #include "RecordStreamer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Bitcode/BitcodeReader.h" -#include "llvm/IR/GVMaterializer.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Object/ObjectFile.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + using namespace llvm; using namespace object; diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 6df481b060e100c5f5f54058dd2df15b227fd808..1d2859cfbe9d87b3a0c7203741b0e4c67a19b682 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm-c/Object.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Object/ObjectFile.h" using namespace llvm; diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index f36388b677f36b424e23b34ffceb8ff944bdce0d..8377dd0d73fa4bfec97f3fdb920b1c277a57d940 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -1,4 +1,4 @@ -//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===// +//===- ObjectFile.cpp - File format independent object file ---------------===// // // The LLVM Compiler Infrastructure // @@ -12,19 +12,28 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ObjectFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/Error.h" #include "llvm/Object/MachO.h" #include "llvm/Object/Wasm.h" +#include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include #include using namespace llvm; using namespace object; -void ObjectFile::anchor() { } +void ObjectFile::anchor() {} ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source) : SymbolicFile(Type, Source) {} @@ -71,42 +80,42 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { } Expected> -ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) { +ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::bitcode: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::bitcode: + case file_magic::coff_cl_gl_object: + case file_magic::archive: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: return errorOrToExpected(createELFObjectFile(Object)); - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: + case file_magic::macho_object: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: return createMachOObjectFile(Object); - case sys::fs::file_magic::coff_object: - case sys::fs::file_magic::coff_import_library: - case sys::fs::file_magic::pecoff_executable: + case file_magic::coff_object: + case file_magic::coff_import_library: + case file_magic::pecoff_executable: return errorOrToExpected(createCOFFObjectFile(Object)); - case sys::fs::file_magic::wasm_object: + case file_magic::wasm_object: return createWasmObjectFile(Object); } llvm_unreachable("Unexpected Object File Type"); diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp index a5018443b87dc6c59e52dc07a824f9b2fef26864..e94e9cfed394530ada826177b2c0ae72657b44ad 100644 --- a/lib/Object/RecordStreamer.cpp +++ b/lib/Object/RecordStreamer.cpp @@ -9,6 +9,7 @@ #include "RecordStreamer.h" #include "llvm/MC/MCSymbol.h" + using namespace llvm; void RecordStreamer::markDefined(const MCSymbol &Symbol) { @@ -69,16 +70,16 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) { void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } +RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + RecordStreamer::const_iterator RecordStreamer::begin() { return Symbols.begin(); } RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } -RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} - void RecordStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, bool) { MCStreamer::EmitInstruction(Inst, STI); } diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h index c3bd5b09a9bf55993a55bcea000f0846cd8694c1..4d119091a3d2990c88b9f72ec9f3e3abd8404f90 100644 --- a/lib/Object/RecordStreamer.h +++ b/lib/Object/RecordStreamer.h @@ -1,4 +1,4 @@ -//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===// +//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,9 +10,16 @@ #ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H #define LLVM_LIB_OBJECT_RECORDSTREAMER_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/SMLoc.h" +#include namespace llvm { + class RecordStreamer : public MCStreamer { public: enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used, @@ -24,17 +31,21 @@ private: // their symbol binding after parsing complete. This maps from each // aliasee to its list of aliases. DenseMap> SymverAliasMap; + void markDefined(const MCSymbol &Symbol); void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute); void markUsed(const MCSymbol &Symbol); void visitUsedSymbol(const MCSymbol &Sym) override; public: - typedef StringMap::const_iterator const_iterator; + RecordStreamer(MCContext &Context); + + using const_iterator = StringMap::const_iterator; + const_iterator begin(); const_iterator end(); - RecordStreamer(MCContext &Context); - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override; void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; @@ -49,6 +60,7 @@ public: DenseMap> &symverAliases() { return SymverAliasMap; } + /// Get the state recorded for the given symbol. State getSymbolState(const MCSymbol *Sym) { auto SI = Symbols.find(Sym->getName()); @@ -57,5 +69,7 @@ public: return SI->second; } }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_OBJECT_RECORDSTREAMER_H diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 4b51a49cf342d5184d5c8237c89d83da64ceb8cb..1042d29d2350cee22f066446ff3838facff6469f 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -1,4 +1,4 @@ -//===- SymbolicFile.cpp - Interface that only provides symbols --*- C++ -*-===// +//===- SymbolicFile.cpp - Interface that only provides symbols ------------===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,21 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/COFF.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include +#include using namespace llvm; using namespace object; @@ -24,47 +33,48 @@ using namespace object; SymbolicFile::SymbolicFile(unsigned int Type, MemoryBufferRef Source) : Binary(Type, Source) {} -SymbolicFile::~SymbolicFile() {} +SymbolicFile::~SymbolicFile() = default; -Expected> SymbolicFile::createSymbolicFile( - MemoryBufferRef Object, sys::fs::file_magic Type, LLVMContext *Context) { +Expected> +SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, + LLVMContext *Context) { StringRef Data = Object.getBuffer(); - if (Type == sys::fs::file_magic::unknown) - Type = sys::fs::identify_magic(Data); + if (Type == file_magic::unknown) + Type = identify_magic(Data); switch (Type) { - case sys::fs::file_magic::bitcode: + case file_magic::bitcode: if (Context) return IRObjectFile::create(Object, *Context); LLVM_FALLTHROUGH; - case sys::fs::file_magic::unknown: - case sys::fs::file_magic::archive: - case sys::fs::file_magic::coff_cl_gl_object: - case sys::fs::file_magic::macho_universal_binary: - case sys::fs::file_magic::windows_resource: + case file_magic::unknown: + case file_magic::archive: + case file_magic::coff_cl_gl_object: + case file_magic::macho_universal_binary: + case file_magic::windows_resource: return errorCodeToError(object_error::invalid_file_type); - case sys::fs::file_magic::elf: - case sys::fs::file_magic::elf_executable: - case sys::fs::file_magic::elf_shared_object: - case sys::fs::file_magic::elf_core: - case sys::fs::file_magic::macho_executable: - case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case sys::fs::file_magic::macho_core: - case sys::fs::file_magic::macho_preload_executable: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib: - case sys::fs::file_magic::macho_dynamic_linker: - case sys::fs::file_magic::macho_bundle: - case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case sys::fs::file_magic::macho_dsym_companion: - case sys::fs::file_magic::macho_kext_bundle: - case sys::fs::file_magic::pecoff_executable: - case sys::fs::file_magic::wasm_object: + case file_magic::elf: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + case file_magic::macho_executable: + case file_magic::macho_fixed_virtual_memory_shared_lib: + case file_magic::macho_core: + case file_magic::macho_preload_executable: + case file_magic::macho_dynamically_linked_shared_lib: + case file_magic::macho_dynamic_linker: + case file_magic::macho_bundle: + case file_magic::macho_dynamically_linked_shared_lib_stub: + case file_magic::macho_dsym_companion: + case file_magic::macho_kext_bundle: + case file_magic::pecoff_executable: + case file_magic::wasm_object: return ObjectFile::createObjectFile(Object, Type); - case sys::fs::file_magic::coff_import_library: + case file_magic::coff_import_library: return std::unique_ptr(new COFFImportFile(Object)); - case sys::fs::file_magic::elf_relocatable: - case sys::fs::file_magic::macho_object: - case sys::fs::file_magic::coff_object: { + case file_magic::elf_relocatable: + case file_magic::macho_object: + case file_magic::coff_object: { Expected> Obj = ObjectFile::createObjectFile(Object, Type); if (!Obj || !Context) diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index fc1dca35424e375d0ddb54258540311f9b7dfc86..5e0affb9ef94515c1802404c5c74dbd440c0761e 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -11,6 +11,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" @@ -20,9 +22,10 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/Wasm.h" #include +#include #include +#include #include using namespace llvm; @@ -141,7 +144,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) { Expr.Value.Float64 = readFloat64(Ptr); break; case wasm::WASM_OPCODE_GET_GLOBAL: - Expr.Value.Global = readUint32(Ptr); + Expr.Value.Global = readULEB128(Ptr); break; default: return make_error("Invalid opcode in init_expr", @@ -165,6 +168,13 @@ static wasm::WasmLimits readLimits(const uint8_t *&Ptr) { return Result; } +static wasm::WasmTable readTable(const uint8_t *&Ptr) { + wasm::WasmTable Table; + Table.ElemType = readVarint7(Ptr); + Table.Limits = readLimits(Ptr); + return Table; +} + static Error readSection(WasmSection &Section, const uint8_t *&Ptr, const uint8_t *Start) { // TODO(sbc): Avoid reading past EOF in the case of malformed files. @@ -180,7 +190,7 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr, } WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) - : ObjectFile(Binary::ID_Wasm, Buffer), StartFunction(-1) { + : ObjectFile(Binary::ID_Wasm, Buffer) { ErrorAsOutParameter ErrAsOutParam(&Err); Header.Magic = getData().substr(0, 4); if (Header.Magic != StringRef("\0asm", 4)) { @@ -250,11 +260,12 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) { case wasm::WASM_NAMES_FUNCTION: { uint32_t Count = readVaruint32(Ptr); while (Count--) { - /*uint32_t Index =*/readVaruint32(Ptr); + uint32_t Index = readVaruint32(Ptr); StringRef Name = readString(Ptr); - if (Name.size()) + if (!Name.empty()) Symbols.emplace_back(Name, - WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME); + WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME, + Sections.size(), Index); } break; } @@ -313,14 +324,17 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr, case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: + case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: + case wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB: break; case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: - Reloc.Addend = readVaruint32(Ptr); + Reloc.Addend = readVarint32(Ptr); break; default: - return make_error("Bad relocation type", + return make_error("Bad relocation type: " + + Twine(Reloc.Type), object_error::parse_failed); } Section->Relocations.push_back(Reloc); @@ -380,7 +394,7 @@ Error WasmObjectFile::parseTypeSection(const uint8_t *Ptr, const uint8_t *End) { Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) { uint32_t Count = readVaruint32(Ptr); Imports.reserve(Count); - while (Count--) { + for (uint32_t i = 0; i < Count; i++) { wasm::WasmImport Im; Im.Module = readString(Ptr); Im.Field = readString(Ptr); @@ -388,15 +402,26 @@ Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) switch (Im.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: Im.SigIndex = readVaruint32(Ptr); - Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT, + Sections.size(), i); break; case wasm::WASM_EXTERNAL_GLOBAL: - Im.GlobalType = readVarint7(Ptr); - Im.GlobalMutable = readVaruint1(Ptr); - Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT); + Im.Global.Type = readVarint7(Ptr); + Im.Global.Mutable = readVaruint1(Ptr); + Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT, + Sections.size(), i); + break; + case wasm::WASM_EXTERNAL_MEMORY: + Im.Memory = readLimits(Ptr); + break; + case wasm::WASM_EXTERNAL_TABLE: + Im.Table = readTable(Ptr); + if (Im.Table.ElemType != wasm::WASM_TYPE_ANYFUNC) { + return make_error("Invalid table element type", + object_error::parse_failed); + } break; default: - // TODO(sbc): Handle other kinds of imports return make_error( "Unexpected import kind", object_error::parse_failed); } @@ -424,14 +449,11 @@ Error WasmObjectFile::parseTableSection(const uint8_t *Ptr, const uint8_t *End) uint32_t Count = readVaruint32(Ptr); Tables.reserve(Count); while (Count--) { - wasm::WasmTable Table; - Table.ElemType = readVarint7(Ptr); - if (Table.ElemType != wasm::WASM_TYPE_ANYFUNC) { + Tables.push_back(readTable(Ptr)); + if (Tables.back().ElemType != wasm::WASM_TYPE_ANYFUNC) { return make_error("Invalid table element type", object_error::parse_failed); } - Table.Limits = readLimits(Ptr); - Tables.push_back(Table); } if (Ptr != End) return make_error("Table section ended prematurely", @@ -471,7 +493,7 @@ Error WasmObjectFile::parseGlobalSection(const uint8_t *Ptr, const uint8_t *End) Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) { uint32_t Count = readVaruint32(Ptr); Exports.reserve(Count); - while (Count--) { + for (uint32_t i = 0; i < Count; i++) { wasm::WasmExport Ex; Ex.Name = readString(Ptr); Ex.Kind = readUint8(Ptr); @@ -479,13 +501,17 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) Exports.push_back(Ex); switch (Ex.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT); + Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT, + Sections.size(), i); break; case wasm::WASM_EXTERNAL_GLOBAL: - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT); + Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT, + Sections.size(), i); + break; + case wasm::WASM_EXTERNAL_MEMORY: + case wasm::WASM_EXTERNAL_TABLE: break; default: - // TODO(sbc): Handle other kinds of exports return make_error( "Unexpected export kind", object_error::parse_failed); } @@ -498,7 +524,7 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) Error WasmObjectFile::parseStartSection(const uint8_t *Ptr, const uint8_t *End) { StartFunction = readVaruint32(Ptr); - if (StartFunction < FunctionTypes.size()) + if (StartFunction >= FunctionTypes.size()) return make_error("Invalid start function", object_error::parse_failed); return Error::success(); @@ -593,20 +619,28 @@ const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const { void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.a++; } uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const { + uint32_t Result = SymbolRef::SF_None; const WasmSymbol &Sym = getWasmSymbol(Symb); + switch (Sym.Type) { case WasmSymbol::SymbolType::FUNCTION_IMPORT: - return object::SymbolRef::SF_Undefined | SymbolRef::SF_Executable; + Result |= SymbolRef::SF_Undefined | SymbolRef::SF_Executable; + break; case WasmSymbol::SymbolType::FUNCTION_EXPORT: - return object::SymbolRef::SF_Global | SymbolRef::SF_Executable; + Result |= SymbolRef::SF_Global | SymbolRef::SF_Executable; + break; case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: - return object::SymbolRef::SF_Executable; + Result |= SymbolRef::SF_Executable; + break; case WasmSymbol::SymbolType::GLOBAL_IMPORT: - return object::SymbolRef::SF_Undefined; + Result |= SymbolRef::SF_Undefined; + break; case WasmSymbol::SymbolType::GLOBAL_EXPORT: - return object::SymbolRef::SF_Global; + Result |= SymbolRef::SF_Global; + break; } - llvm_unreachable("Unknown WasmSymbol::SymbolType"); + + return Result; } basic_symbol_iterator WasmObjectFile::symbol_begin() const { @@ -621,22 +655,26 @@ basic_symbol_iterator WasmObjectFile::symbol_end() const { return BasicSymbolRef(Ref, this); } -const WasmSymbol &WasmObjectFile::getWasmSymbol(DataRefImpl Symb) const { +const WasmSymbol &WasmObjectFile::getWasmSymbol(const DataRefImpl &Symb) const { return Symbols[Symb.d.a]; } +const WasmSymbol &WasmObjectFile::getWasmSymbol(const SymbolRef &Symb) const { + return getWasmSymbol(Symb.getRawDataRefImpl()); +} + Expected WasmObjectFile::getSymbolName(DataRefImpl Symb) const { const WasmSymbol &Sym = getWasmSymbol(Symb); return Sym.Name; } Expected WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const { - return (uint64_t)Symb.d.a; + return getSymbolValue(Symb); } uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return 0; + const WasmSymbol &Sym = getWasmSymbol(Symb); + return Sym.ElementIndex; } uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const { @@ -651,14 +689,27 @@ uint64_t WasmObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const { Expected WasmObjectFile::getSymbolType(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + const WasmSymbol &Sym = getWasmSymbol(Symb); + + switch (Sym.Type) { + case WasmSymbol::SymbolType::FUNCTION_IMPORT: + case WasmSymbol::SymbolType::FUNCTION_EXPORT: + case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: + return SymbolRef::ST_Function; + case WasmSymbol::SymbolType::GLOBAL_IMPORT: + case WasmSymbol::SymbolType::GLOBAL_EXPORT: + return SymbolRef::ST_Data; + } + + llvm_unreachable("Unknown WasmSymbol::SymbolType"); + return SymbolRef::ST_Other; } Expected WasmObjectFile::getSymbolSection(DataRefImpl Symb) const { - llvm_unreachable("not yet implemented"); - return errorCodeToError(object_error::invalid_symbol_index); + DataRefImpl Ref; + Ref.d.a = getWasmSymbol(Symb).Section; + return section_iterator(SectionRef(Ref, this)); } void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } @@ -694,6 +745,10 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } +uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { const WasmSection &S = Sections[Sec.d.a]; return S.Content.size(); @@ -777,7 +832,7 @@ void WasmObjectFile::getRelocationTypeName( break; switch (Rel.Type) { -#include "llvm/Support/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" } #undef WASM_RELOC diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp new file mode 100644 index 0000000000000000000000000000000000000000..af836057ca38c0a4fc38034c2ed90a6d3a23d8c2 --- /dev/null +++ b/lib/Object/WindowsResource.cpp @@ -0,0 +1,730 @@ +//===-- WindowsResource.cpp -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the .res file class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/WindowsResource.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MathExtras.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace object; + +namespace llvm { +namespace object { + +#define RETURN_IF_ERROR(X) \ + if (auto EC = X) \ + return EC; + +const uint32_t MIN_HEADER_SIZE = 7 * sizeof(uint32_t) + 2 * sizeof(uint16_t); + +// COFF files seem to be inconsistent with alignment between sections, just use +// 8-byte because it makes everyone happy. +const uint32_t SECTION_ALIGNMENT = sizeof(uint64_t); + +static const size_t ResourceMagicSize = 16; + +static const size_t NullEntrySize = 16; + +uint32_t WindowsResourceParser::TreeNode::StringCount = 0; +uint32_t WindowsResourceParser::TreeNode::DataCount = 0; + +WindowsResource::WindowsResource(MemoryBufferRef Source) + : Binary(Binary::ID_WinRes, Source) { + size_t LeadingSize = ResourceMagicSize + NullEntrySize; + BBS = BinaryByteStream(Data.getBuffer().drop_front(LeadingSize), + support::little); +} + +Expected> +WindowsResource::createWindowsResource(MemoryBufferRef Source) { + if (Source.getBufferSize() < ResourceMagicSize + NullEntrySize) + return make_error( + "File too small to be a resource file", + object_error::invalid_file_type); + std::unique_ptr Ret(new WindowsResource(Source)); + return std::move(Ret); +} + +Expected WindowsResource::getHeadEntry() { + Error Err = Error::success(); + auto Ref = ResourceEntryRef(BinaryStreamRef(BBS), this, Err); + if (Err) + return std::move(Err); + return Ref; +} + +ResourceEntryRef::ResourceEntryRef(BinaryStreamRef Ref, + const WindowsResource *Owner, Error &Err) + : Reader(Ref), OwningRes(Owner) { + if (loadNext()) + Err = make_error("Could not read first entry.\n", + object_error::unexpected_eof); +} + +Error ResourceEntryRef::moveNext(bool &End) { + // Reached end of all the entries. + if (Reader.bytesRemaining() == 0) { + End = true; + return Error::success(); + } + RETURN_IF_ERROR(loadNext()); + + return Error::success(); +} + +static Error readStringOrId(BinaryStreamReader &Reader, uint16_t &ID, + ArrayRef &Str, bool &IsString) { + uint16_t IDFlag; + RETURN_IF_ERROR(Reader.readInteger(IDFlag)); + IsString = IDFlag != 0xffff; + + if (IsString) { + Reader.setOffset( + Reader.getOffset() - + sizeof(uint16_t)); // Re-read the bytes which we used to check the flag. + RETURN_IF_ERROR(Reader.readWideString(Str)); + } else + RETURN_IF_ERROR(Reader.readInteger(ID)); + + return Error::success(); +} + +Error ResourceEntryRef::loadNext() { + uint32_t DataSize; + RETURN_IF_ERROR(Reader.readInteger(DataSize)); + uint32_t HeaderSize; + RETURN_IF_ERROR(Reader.readInteger(HeaderSize)); + + if (HeaderSize < MIN_HEADER_SIZE) + return make_error("Header size is too small.", + object_error::parse_failed); + + RETURN_IF_ERROR(readStringOrId(Reader, TypeID, Type, IsStringType)); + + RETURN_IF_ERROR(readStringOrId(Reader, NameID, Name, IsStringName)); + + RETURN_IF_ERROR(Reader.padToAlignment(sizeof(uint32_t))); + + RETURN_IF_ERROR(Reader.readObject(Suffix)); + + RETURN_IF_ERROR(Reader.readArray(Data, DataSize)); + + RETURN_IF_ERROR(Reader.padToAlignment(sizeof(uint32_t))); + + return Error::success(); +} + +WindowsResourceParser::WindowsResourceParser() : Root(false) {} + +Error WindowsResourceParser::parse(WindowsResource *WR) { + auto EntryOrErr = WR->getHeadEntry(); + if (!EntryOrErr) + return EntryOrErr.takeError(); + + ResourceEntryRef Entry = EntryOrErr.get(); + bool End = false; + while (!End) { + Data.push_back(Entry.getData()); + + bool IsNewTypeString = false; + bool IsNewNameString = false; + + Root.addEntry(Entry, IsNewTypeString, IsNewNameString); + + if (IsNewTypeString) + StringTable.push_back(Entry.getTypeString()); + + if (IsNewNameString) + StringTable.push_back(Entry.getNameString()); + + RETURN_IF_ERROR(Entry.moveNext(End)); + } + + return Error::success(); +} + +void WindowsResourceParser::printTree(raw_ostream &OS) const { + ScopedPrinter Writer(OS); + Root.print(Writer, "Resource Tree"); +} + +void WindowsResourceParser::TreeNode::addEntry(const ResourceEntryRef &Entry, + bool &IsNewTypeString, + bool &IsNewNameString) { + TreeNode &TypeNode = addTypeNode(Entry, IsNewTypeString); + TreeNode &NameNode = TypeNode.addNameNode(Entry, IsNewNameString); + NameNode.addLanguageNode(Entry); +} + +WindowsResourceParser::TreeNode::TreeNode(bool IsStringNode) { + if (IsStringNode) + StringIndex = StringCount++; +} + +WindowsResourceParser::TreeNode::TreeNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) + : IsDataNode(true), MajorVersion(MajorVersion), MinorVersion(MinorVersion), + Characteristics(Characteristics) { + DataIndex = DataCount++; +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createStringNode() { + return std::unique_ptr(new TreeNode(true)); +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createIDNode() { + return std::unique_ptr(new TreeNode(false)); +} + +std::unique_ptr +WindowsResourceParser::TreeNode::createDataNode(uint16_t MajorVersion, + uint16_t MinorVersion, + uint32_t Characteristics) { + return std::unique_ptr( + new TreeNode(MajorVersion, MinorVersion, Characteristics)); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addTypeNode(const ResourceEntryRef &Entry, + bool &IsNewTypeString) { + if (Entry.checkTypeString()) + return addChild(Entry.getTypeString(), IsNewTypeString); + else + return addChild(Entry.getTypeID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addNameNode(const ResourceEntryRef &Entry, + bool &IsNewNameString) { + if (Entry.checkNameString()) + return addChild(Entry.getNameString(), IsNewNameString); + else + return addChild(Entry.getNameID()); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addLanguageNode( + const ResourceEntryRef &Entry) { + return addChild(Entry.getLanguage(), true, Entry.getMajorVersion(), + Entry.getMinorVersion(), Entry.getCharacteristics()); +} + +WindowsResourceParser::TreeNode &WindowsResourceParser::TreeNode::addChild( + uint32_t ID, bool IsDataNode, uint16_t MajorVersion, uint16_t MinorVersion, + uint32_t Characteristics) { + auto Child = IDChildren.find(ID); + if (Child == IDChildren.end()) { + auto NewChild = + IsDataNode ? createDataNode(MajorVersion, MinorVersion, Characteristics) + : createIDNode(); + WindowsResourceParser::TreeNode &Node = *NewChild; + IDChildren.emplace(ID, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +WindowsResourceParser::TreeNode & +WindowsResourceParser::TreeNode::addChild(ArrayRef NameRef, + bool &IsNewString) { + std::string NameString; + ArrayRef CorrectedName; + std::vector EndianCorrectedName; + if (sys::IsBigEndianHost) { + EndianCorrectedName.resize(NameRef.size() + 1); + std::copy(NameRef.begin(), NameRef.end(), EndianCorrectedName.begin() + 1); + EndianCorrectedName[0] = UNI_UTF16_BYTE_ORDER_MARK_SWAPPED; + CorrectedName = makeArrayRef(EndianCorrectedName); + } else + CorrectedName = NameRef; + convertUTF16ToUTF8String(CorrectedName, NameString); + + auto Child = StringChildren.find(NameString); + if (Child == StringChildren.end()) { + auto NewChild = createStringNode(); + IsNewString = true; + WindowsResourceParser::TreeNode &Node = *NewChild; + StringChildren.emplace(NameString, std::move(NewChild)); + return Node; + } else + return *(Child->second); +} + +void WindowsResourceParser::TreeNode::print(ScopedPrinter &Writer, + StringRef Name) const { + ListScope NodeScope(Writer, Name); + for (auto const &Child : StringChildren) { + Child.second->print(Writer, Child.first); + } + for (auto const &Child : IDChildren) { + Child.second->print(Writer, to_string(Child.first)); + } +} + +// This function returns the size of the entire resource tree, including +// directory tables, directory entries, and data entries. It does not include +// the directory strings or the relocations of the .rsrc section. +uint32_t WindowsResourceParser::TreeNode::getTreeSize() const { + uint32_t Size = (IDChildren.size() + StringChildren.size()) * + sizeof(coff_resource_dir_entry); + + // Reached a node pointing to a data entry. + if (IsDataNode) { + Size += sizeof(coff_resource_data_entry); + return Size; + } + + // If the node does not point to data, it must have a directory table pointing + // to other nodes. + Size += sizeof(coff_resource_dir_table); + + for (auto const &Child : StringChildren) { + Size += Child.second->getTreeSize(); + } + for (auto const &Child : IDChildren) { + Size += Child.second->getTreeSize(); + } + return Size; +} + +class WindowsResourceCOFFWriter { +public: + WindowsResourceCOFFWriter(std::unique_ptr &OutputBuffer, + COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser, Error &E); + Error write(); + +private: + void performFileLayout(); + void performSectionOneLayout(); + void performSectionTwoLayout(); + void writeCOFFHeader(); + void writeFirstSectionHeader(); + void writeSecondSectionHeader(); + void writeFirstSection(); + void writeSecondSection(); + void writeSymbolTable(); + void writeStringTable(); + void writeDirectoryTree(); + void writeDirectoryStringTable(); + void writeFirstSectionRelocations(); + std::unique_ptr &OutputBuffer; + char *BufferStart; + uint64_t CurrentOffset = 0; + COFF::MachineTypes MachineType; + const WindowsResourceParser::TreeNode &Resources; + const ArrayRef> Data; + uint64_t FileSize; + uint32_t SymbolTableOffset; + uint32_t SectionOneSize; + uint32_t SectionOneOffset; + uint32_t SectionOneRelocations; + uint32_t SectionTwoSize; + uint32_t SectionTwoOffset; + const ArrayRef> StringTable; + std::vector StringTableOffsets; + std::vector DataOffsets; + std::vector RelocationAddresses; +}; + +WindowsResourceCOFFWriter::WindowsResourceCOFFWriter( + std::unique_ptr &OutputBuffer, COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser, Error &E) + : OutputBuffer(OutputBuffer), MachineType(MachineType), + Resources(Parser.getTree()), Data(Parser.getData()), + StringTable(Parser.getStringTable()) { + performFileLayout(); + + OutputBuffer = MemoryBuffer::getNewMemBuffer(FileSize); +} + +void WindowsResourceCOFFWriter::performFileLayout() { + // Add size of COFF header. + FileSize = COFF::Header16Size; + + // one .rsrc section header for directory tree, another for resource data. + FileSize += 2 * COFF::SectionSize; + + performSectionOneLayout(); + performSectionTwoLayout(); + + // We have reached the address of the symbol table. + SymbolTableOffset = FileSize; + + FileSize += COFF::Symbol16Size; // size of the @feat.00 symbol. + FileSize += 4 * COFF::Symbol16Size; // symbol + aux for each section. + FileSize += Data.size() * COFF::Symbol16Size; // 1 symbol per resource. + FileSize += 4; // four null bytes for the string table. +} + +void WindowsResourceCOFFWriter::performSectionOneLayout() { + SectionOneOffset = FileSize; + + SectionOneSize = Resources.getTreeSize(); + uint32_t CurrentStringOffset = SectionOneSize; + uint32_t TotalStringTableSize = 0; + for (auto const &String : StringTable) { + StringTableOffsets.push_back(CurrentStringOffset); + uint32_t StringSize = String.size() * sizeof(UTF16) + sizeof(uint16_t); + CurrentStringOffset += StringSize; + TotalStringTableSize += StringSize; + } + SectionOneSize += alignTo(TotalStringTableSize, sizeof(uint32_t)); + + // account for the relocations of section one. + SectionOneRelocations = FileSize + SectionOneSize; + FileSize += SectionOneSize; + FileSize += + Data.size() * COFF::RelocationSize; // one relocation for each resource. + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::performSectionTwoLayout() { + // add size of .rsrc$2 section, which contains all resource data on 8-byte + // alignment. + SectionTwoOffset = FileSize; + SectionTwoSize = 0; + for (auto const &Entry : Data) { + DataOffsets.push_back(SectionTwoSize); + SectionTwoSize += alignTo(Entry.size(), sizeof(uint64_t)); + } + FileSize += SectionTwoSize; + FileSize = alignTo(FileSize, SECTION_ALIGNMENT); +} + +static std::time_t getTime() { + std::time_t Now = time(nullptr); + if (Now < 0 || !isUInt<32>(Now)) + return UINT32_MAX; + return Now; +} + +Error WindowsResourceCOFFWriter::write() { + BufferStart = const_cast(OutputBuffer->getBufferStart()); + + writeCOFFHeader(); + writeFirstSectionHeader(); + writeSecondSectionHeader(); + writeFirstSection(); + writeSecondSection(); + writeSymbolTable(); + writeStringTable(); + + return Error::success(); +} + +void WindowsResourceCOFFWriter::writeCOFFHeader() { + // Write the COFF header. + auto *Header = reinterpret_cast(BufferStart); + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Header->Machine = COFF::IMAGE_FILE_MACHINE_ARMNT; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Header->Machine = COFF::IMAGE_FILE_MACHINE_AMD64; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Header->Machine = COFF::IMAGE_FILE_MACHINE_I386; + break; + default: + Header->Machine = COFF::IMAGE_FILE_MACHINE_UNKNOWN; + } + Header->NumberOfSections = 2; + Header->TimeDateStamp = getTime(); + Header->PointerToSymbolTable = SymbolTableOffset; + // One symbol for every resource plus 2 for each section and @feat.00 + Header->NumberOfSymbols = Data.size() + 5; + Header->SizeOfOptionalHeader = 0; + Header->Characteristics = COFF::IMAGE_FILE_32BIT_MACHINE; +} + +void WindowsResourceCOFFWriter::writeFirstSectionHeader() { + // Write the first section header. + CurrentOffset += sizeof(coff_file_header); + auto *SectionOneHeader = + reinterpret_cast(BufferStart + CurrentOffset); + strncpy(SectionOneHeader->Name, ".rsrc$01", (size_t)COFF::NameSize); + SectionOneHeader->VirtualSize = 0; + SectionOneHeader->VirtualAddress = 0; + SectionOneHeader->SizeOfRawData = SectionOneSize; + SectionOneHeader->PointerToRawData = SectionOneOffset; + SectionOneHeader->PointerToRelocations = SectionOneRelocations; + SectionOneHeader->PointerToLinenumbers = 0; + SectionOneHeader->NumberOfRelocations = Data.size(); + SectionOneHeader->NumberOfLinenumbers = 0; + SectionOneHeader->Characteristics = COFF::IMAGE_SCN_ALIGN_1BYTES; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionOneHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeSecondSectionHeader() { + // Write the second section header. + CurrentOffset += sizeof(coff_section); + auto *SectionTwoHeader = + reinterpret_cast(BufferStart + CurrentOffset); + strncpy(SectionTwoHeader->Name, ".rsrc$02", (size_t)COFF::NameSize); + SectionTwoHeader->VirtualSize = 0; + SectionTwoHeader->VirtualAddress = 0; + SectionTwoHeader->SizeOfRawData = SectionTwoSize; + SectionTwoHeader->PointerToRawData = SectionTwoOffset; + SectionTwoHeader->PointerToRelocations = 0; + SectionTwoHeader->PointerToLinenumbers = 0; + SectionTwoHeader->NumberOfRelocations = 0; + SectionTwoHeader->NumberOfLinenumbers = 0; + SectionTwoHeader->Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + SectionTwoHeader->Characteristics += COFF::IMAGE_SCN_MEM_READ; +} + +void WindowsResourceCOFFWriter::writeFirstSection() { + // Write section one. + CurrentOffset += sizeof(coff_section); + + writeDirectoryTree(); + writeDirectoryStringTable(); + writeFirstSectionRelocations(); + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSecondSection() { + // Now write the .rsrc$02 section. + for (auto const &RawDataEntry : Data) { + std::copy(RawDataEntry.begin(), RawDataEntry.end(), + BufferStart + CurrentOffset); + CurrentOffset += alignTo(RawDataEntry.size(), sizeof(uint64_t)); + } + + CurrentOffset = alignTo(CurrentOffset, SECTION_ALIGNMENT); +} + +void WindowsResourceCOFFWriter::writeSymbolTable() { + // Now write the symbol table. + // First, the feat symbol. + auto *Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, "@feat.00", (size_t)COFF::NameSize); + Symbol->Value = 0x11; + Symbol->SectionNumber = 0xffff; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + + // Now write the .rsrc1 symbol + aux. + Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$01", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 1; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + auto *Aux = reinterpret_cast(BufferStart + + CurrentOffset); + Aux->Length = SectionOneSize; + Aux->NumberOfRelocations = Data.size(); + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write the .rsrc2 symbol + aux. + Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, ".rsrc$02", (size_t)COFF::NameSize); + Symbol->Value = 0; + Symbol->SectionNumber = 2; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 1; + CurrentOffset += sizeof(coff_symbol16); + Aux = reinterpret_cast(BufferStart + + CurrentOffset); + Aux->Length = SectionTwoSize; + Aux->NumberOfRelocations = 0; + Aux->NumberOfLinenumbers = 0; + Aux->CheckSum = 0; + Aux->NumberLowPart = 0; + Aux->Selection = 0; + CurrentOffset += sizeof(coff_aux_section_definition); + + // Now write a symbol for each relocation. + for (unsigned i = 0; i < Data.size(); i++) { + char RelocationName[9]; + sprintf(RelocationName, "$R%06X", DataOffsets[i]); + Symbol = reinterpret_cast(BufferStart + CurrentOffset); + strncpy(Symbol->Name.ShortName, RelocationName, (size_t)COFF::NameSize); + Symbol->Value = DataOffsets[i]; + Symbol->SectionNumber = 1; + Symbol->Type = COFF::IMAGE_SYM_DTYPE_NULL; + Symbol->StorageClass = COFF::IMAGE_SYM_CLASS_STATIC; + Symbol->NumberOfAuxSymbols = 0; + CurrentOffset += sizeof(coff_symbol16); + } +} + +void WindowsResourceCOFFWriter::writeStringTable() { + // Just 4 null bytes for the string table. + auto COFFStringTable = reinterpret_cast(BufferStart + CurrentOffset); + memset(COFFStringTable, 0, 4); +} + +void WindowsResourceCOFFWriter::writeDirectoryTree() { + // Traverse parsed resource tree breadth-first and write the corresponding + // COFF objects. + std::queue Queue; + Queue.push(&Resources); + uint32_t NextLevelOffset = + sizeof(coff_resource_dir_table) + (Resources.getStringChildren().size() + + Resources.getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + std::vector DataEntriesTreeOrder; + uint32_t CurrentRelativeOffset = 0; + + while (!Queue.empty()) { + auto CurrentNode = Queue.front(); + Queue.pop(); + auto *Table = reinterpret_cast(BufferStart + + CurrentOffset); + Table->Characteristics = CurrentNode->getCharacteristics(); + Table->TimeDateStamp = 0; + Table->MajorVersion = CurrentNode->getMajorVersion(); + Table->MinorVersion = CurrentNode->getMinorVersion(); + auto &IDChildren = CurrentNode->getIDChildren(); + auto &StringChildren = CurrentNode->getStringChildren(); + Table->NumberOfNameEntries = StringChildren.size(); + Table->NumberOfIDEntries = IDChildren.size(); + CurrentOffset += sizeof(coff_resource_dir_table); + CurrentRelativeOffset += sizeof(coff_resource_dir_table); + + // Write the directory entries immediately following each directory table. + for (auto const &Child : StringChildren) { + auto *Entry = reinterpret_cast(BufferStart + + CurrentOffset); + Entry->Identifier.NameOffset = + StringTableOffsets[Child.second->getStringIndex()]; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + for (auto const &Child : IDChildren) { + auto *Entry = reinterpret_cast(BufferStart + + CurrentOffset); + Entry->Identifier.ID = Child.first; + if (Child.second->checkIsDataNode()) { + Entry->Offset.DataEntryOffset = NextLevelOffset; + NextLevelOffset += sizeof(coff_resource_data_entry); + DataEntriesTreeOrder.push_back(Child.second.get()); + } else { + Entry->Offset.SubdirOffset = NextLevelOffset + (1 << 31); + NextLevelOffset += sizeof(coff_resource_dir_table) + + (Child.second->getStringChildren().size() + + Child.second->getIDChildren().size()) * + sizeof(coff_resource_dir_entry); + Queue.push(Child.second.get()); + } + CurrentOffset += sizeof(coff_resource_dir_entry); + CurrentRelativeOffset += sizeof(coff_resource_dir_entry); + } + } + + RelocationAddresses.resize(Data.size()); + // Now write all the resource data entries. + for (auto DataNodes : DataEntriesTreeOrder) { + auto *Entry = reinterpret_cast(BufferStart + + CurrentOffset); + RelocationAddresses[DataNodes->getDataIndex()] = CurrentRelativeOffset; + Entry->DataRVA = 0; // Set to zero because it is a relocation. + Entry->DataSize = Data[DataNodes->getDataIndex()].size(); + Entry->Codepage = 0; + Entry->Reserved = 0; + CurrentOffset += sizeof(coff_resource_data_entry); + CurrentRelativeOffset += sizeof(coff_resource_data_entry); + } +} + +void WindowsResourceCOFFWriter::writeDirectoryStringTable() { + // Now write the directory string table for .rsrc$01 + uint32_t TotalStringTableSize = 0; + for (auto &String : StringTable) { + uint16_t Length = String.size(); + support::endian::write16le(BufferStart + CurrentOffset, Length); + CurrentOffset += sizeof(uint16_t); + auto *Start = reinterpret_cast(BufferStart + CurrentOffset); + std::copy(String.begin(), String.end(), Start); + CurrentOffset += Length * sizeof(UTF16); + TotalStringTableSize += Length * sizeof(UTF16) + sizeof(uint16_t); + } + CurrentOffset += + alignTo(TotalStringTableSize, sizeof(uint32_t)) - TotalStringTableSize; +} + +void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { + + // Now write the relocations for .rsrc$01 + // Five symbols already in table before we start, @feat.00 and 2 for each + // .rsrc section. + uint32_t NextSymbolIndex = 5; + for (unsigned i = 0; i < Data.size(); i++) { + auto *Reloc = + reinterpret_cast(BufferStart + CurrentOffset); + Reloc->VirtualAddress = RelocationAddresses[i]; + Reloc->SymbolTableIndex = NextSymbolIndex++; + switch (MachineType) { + case COFF::IMAGE_FILE_MACHINE_ARMNT: + Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_AMD64: + Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB; + break; + case COFF::IMAGE_FILE_MACHINE_I386: + Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB; + break; + default: + Reloc->Type = 0; + } + CurrentOffset += sizeof(coff_relocation); + } +} + +Error writeWindowsResourceCOFF(std::unique_ptr &OutputBuffer, + COFF::MachineTypes MachineType, + const WindowsResourceParser &Parser) { + Error E = Error::success(); + WindowsResourceCOFFWriter Writer(OutputBuffer, MachineType, Parser, E); + if (E) + return E; + return Writer.write(); +} + +} // namespace object +} // namespace llvm diff --git a/lib/ObjectYAML/CMakeLists.txt b/lib/ObjectYAML/CMakeLists.txt index 37f8fd7bce1a6d53b61d5be6b767e87e64043895..7af0b9c194e64d76d5f2a7e80e996172f54a2921 100644 --- a/lib/ObjectYAML/CMakeLists.txt +++ b/lib/ObjectYAML/CMakeLists.txt @@ -1,4 +1,7 @@ add_llvm_library(LLVMObjectYAML + CodeViewYAMLTypes.cpp + CodeViewYAMLSymbols.cpp + CodeViewYAMLDebugSections.cpp COFFYAML.cpp DWARFEmitter.cpp DWARFVisitor.cpp diff --git a/lib/ObjectYAML/COFFYAML.cpp b/lib/ObjectYAML/COFFYAML.cpp index 7f9f4c1f8c2cbc9af48deb363efa8bb6cfba0242..c8cbea1490f684cf88130fc2dd67a125ba6c00f4 100644 --- a/lib/ObjectYAML/COFFYAML.cpp +++ b/lib/ObjectYAML/COFFYAML.cpp @@ -488,7 +488,16 @@ void MappingTraits::mapping(IO &IO, COFFYAML::Section &Sec) { IO.mapOptional("VirtualAddress", Sec.Header.VirtualAddress, 0U); IO.mapOptional("VirtualSize", Sec.Header.VirtualSize, 0U); IO.mapOptional("Alignment", Sec.Alignment, 0U); - IO.mapRequired("SectionData", Sec.SectionData); + + // If this is a .debug$S or .debug$T section parse the semantic representation + // of the symbols/types. If it is any other kind of section, just deal in raw + // bytes. + IO.mapOptional("SectionData", Sec.SectionData); + if (Sec.Name == ".debug$S") + IO.mapOptional("Subsections", Sec.DebugS); + else if (Sec.Name == ".debug$T") + IO.mapOptional("Types", Sec.DebugT); + IO.mapOptional("Relocations", Sec.Relocations); } diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d194420d5ef46bdcdb7594b913254e8e8bf8254e --- /dev/null +++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp @@ -0,0 +1,937 @@ +//===- CodeViewYAMLDebugSections.cpp - CodeView YAMLIO debug sections -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossExSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/SymbolSerializer.h" +#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h" +#include "llvm/Support/BinaryStreamWriter.h" +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::CodeViewYAML; +using namespace llvm::CodeViewYAML::detail; +using namespace llvm::yaml; + +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceFileChecksumEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceColumnEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineBlock) +LLVM_YAML_IS_SEQUENCE_VECTOR(SourceLineInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeSite) +LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(CrossModuleExport) +LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLCrossModuleImport) +LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLFrameData) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) + +LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false) +LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind) +LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags) + +LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleExport) +LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLFrameData) +LLVM_YAML_DECLARE_MAPPING_TRAITS(YAMLCrossModuleImport) +LLVM_YAML_DECLARE_MAPPING_TRAITS(CrossModuleImportItem) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceColumnEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceFileChecksumEntry) +LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineBlock) +LLVM_YAML_DECLARE_MAPPING_TRAITS(InlineeSite) + +namespace llvm { +namespace CodeViewYAML { +namespace detail { +struct YAMLSubsectionBase { + explicit YAMLSubsectionBase(DebugSubsectionKind Kind) : Kind(Kind) {} + DebugSubsectionKind Kind; + virtual ~YAMLSubsectionBase() {} + + virtual void map(IO &IO) = 0; + virtual std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const = 0; +}; +} +} +} + +namespace { +struct YAMLChecksumsSubsection : public YAMLSubsectionBase { + YAMLChecksumsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &FC); + + std::vector Checksums; +}; + +struct YAMLLinesSubsection : public YAMLSubsectionBase { + YAMLLinesSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Lines) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugLinesSubsectionRef &Lines); + + SourceLineInfo Lines; +}; + +struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase { + YAMLInlineeLinesSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::InlineeLines) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugInlineeLinesSubsectionRef &Lines); + + InlineeInfo InlineeLines; +}; + +struct YAMLCrossModuleExportsSubsection : public YAMLSubsectionBase { + YAMLCrossModuleExportsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeExports) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugCrossModuleExportsSubsectionRef &Exports); + + std::vector Exports; +}; + +struct YAMLCrossModuleImportsSubsection : public YAMLSubsectionBase { + YAMLCrossModuleImportsSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CrossScopeImports) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugCrossModuleImportsSubsectionRef &Imports); + + std::vector Imports; +}; + +struct YAMLSymbolsSubsection : public YAMLSubsectionBase { + YAMLSymbolsSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Symbols) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugSymbolsSubsectionRef &Symbols); + + std::vector Symbols; +}; + +struct YAMLStringTableSubsection : public YAMLSubsectionBase { + YAMLStringTableSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::StringTable) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings); + + std::vector Strings; +}; + +struct YAMLFrameDataSubsection : public YAMLSubsectionBase { + YAMLFrameDataSubsection() + : YAMLSubsectionBase(DebugSubsectionKind::FrameData) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings, + const DebugFrameDataSubsectionRef &Frames); + + std::vector Frames; +}; + +struct YAMLCoffSymbolRVASubsection : public YAMLSubsectionBase { + YAMLCoffSymbolRVASubsection() + : YAMLSubsectionBase(DebugSubsectionKind::CoffSymbolRVA) {} + + void map(IO &IO) override; + std::shared_ptr + toCodeViewSubsection(BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const override; + static Expected> + fromCodeViewSubsection(const DebugSymbolRVASubsectionRef &RVAs); + + std::vector RVAs; +}; +} + +void ScalarBitSetTraits::bitset(IO &io, LineFlags &Flags) { + io.bitSetCase(Flags, "HasColumnInfo", LF_HaveColumns); + io.enumFallback(Flags); +} + +void ScalarEnumerationTraits::enumeration( + IO &io, FileChecksumKind &Kind) { + io.enumCase(Kind, "None", FileChecksumKind::None); + io.enumCase(Kind, "MD5", FileChecksumKind::MD5); + io.enumCase(Kind, "SHA1", FileChecksumKind::SHA1); + io.enumCase(Kind, "SHA256", FileChecksumKind::SHA256); +} + +void ScalarTraits::output(const HexFormattedString &Value, + void *ctx, raw_ostream &Out) { + StringRef Bytes(reinterpret_cast(Value.Bytes.data()), + Value.Bytes.size()); + Out << toHex(Bytes); +} + +StringRef ScalarTraits::input(StringRef Scalar, void *ctxt, + HexFormattedString &Value) { + std::string H = fromHex(Scalar); + Value.Bytes.assign(H.begin(), H.end()); + return StringRef(); +} + +void MappingTraits::mapping(IO &IO, SourceLineEntry &Obj) { + IO.mapRequired("Offset", Obj.Offset); + IO.mapRequired("LineStart", Obj.LineStart); + IO.mapRequired("IsStatement", Obj.IsStatement); + IO.mapRequired("EndDelta", Obj.EndDelta); +} + +void MappingTraits::mapping(IO &IO, SourceColumnEntry &Obj) { + IO.mapRequired("StartColumn", Obj.StartColumn); + IO.mapRequired("EndColumn", Obj.EndColumn); +} + +void MappingTraits::mapping(IO &IO, SourceLineBlock &Obj) { + IO.mapRequired("FileName", Obj.FileName); + IO.mapRequired("Lines", Obj.Lines); + IO.mapRequired("Columns", Obj.Columns); +} + +void MappingTraits::mapping(IO &IO, CrossModuleExport &Obj) { + IO.mapRequired("LocalId", Obj.Local); + IO.mapRequired("GlobalId", Obj.Global); +} + +void MappingTraits::mapping(IO &IO, + YAMLCrossModuleImport &Obj) { + IO.mapRequired("Module", Obj.ModuleName); + IO.mapRequired("Imports", Obj.ImportIds); +} + +void MappingTraits::mapping( + IO &IO, SourceFileChecksumEntry &Obj) { + IO.mapRequired("FileName", Obj.FileName); + IO.mapRequired("Kind", Obj.Kind); + IO.mapRequired("Checksum", Obj.ChecksumBytes); +} + +void MappingTraits::mapping(IO &IO, InlineeSite &Obj) { + IO.mapRequired("FileName", Obj.FileName); + IO.mapRequired("LineNum", Obj.SourceLineNum); + IO.mapRequired("Inlinee", Obj.Inlinee); + IO.mapOptional("ExtraFiles", Obj.ExtraFiles); +} + +void MappingTraits::mapping(IO &IO, YAMLFrameData &Obj) { + IO.mapRequired("CodeSize", Obj.CodeSize); + IO.mapRequired("FrameFunc", Obj.FrameFunc); + IO.mapRequired("LocalSize", Obj.LocalSize); + IO.mapOptional("MaxStackSize", Obj.MaxStackSize); + IO.mapOptional("ParamsSize", Obj.ParamsSize); + IO.mapOptional("PrologSize", Obj.PrologSize); + IO.mapOptional("RvaStart", Obj.RvaStart); + IO.mapOptional("SavedRegsSize", Obj.SavedRegsSize); +} + +void YAMLChecksumsSubsection::map(IO &IO) { + IO.mapTag("!FileChecksums", true); + IO.mapRequired("Checksums", Checksums); +} + +void YAMLLinesSubsection::map(IO &IO) { + IO.mapTag("!Lines", true); + IO.mapRequired("CodeSize", Lines.CodeSize); + + IO.mapRequired("Flags", Lines.Flags); + IO.mapRequired("RelocOffset", Lines.RelocOffset); + IO.mapRequired("RelocSegment", Lines.RelocSegment); + IO.mapRequired("Blocks", Lines.Blocks); +} + +void YAMLInlineeLinesSubsection::map(IO &IO) { + IO.mapTag("!InlineeLines", true); + IO.mapRequired("HasExtraFiles", InlineeLines.HasExtraFiles); + IO.mapRequired("Sites", InlineeLines.Sites); +} + +void YAMLCrossModuleExportsSubsection::map(IO &IO) { + IO.mapTag("!CrossModuleExports", true); + IO.mapOptional("Exports", Exports); +} + +void YAMLCrossModuleImportsSubsection::map(IO &IO) { + IO.mapTag("!CrossModuleImports", true); + IO.mapOptional("Imports", Imports); +} + +void YAMLSymbolsSubsection::map(IO &IO) { + IO.mapTag("!Symbols", true); + IO.mapRequired("Records", Symbols); +} + +void YAMLStringTableSubsection::map(IO &IO) { + IO.mapTag("!StringTable", true); + IO.mapRequired("Strings", Strings); +} + +void YAMLFrameDataSubsection::map(IO &IO) { + IO.mapTag("!FrameData", true); + IO.mapRequired("Frames", Frames); +} + +void YAMLCoffSymbolRVASubsection::map(IO &IO) { + IO.mapTag("!COFFSymbolRVAs", true); + IO.mapRequired("RVAs", RVAs); +} + +void MappingTraits::mapping( + IO &IO, YAMLDebugSubsection &Subsection) { + if (!IO.outputting()) { + if (IO.mapTag("!FileChecksums")) { + auto SS = std::make_shared(); + Subsection.Subsection = SS; + } else if (IO.mapTag("!Lines")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!InlineeLines")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!CrossModuleExports")) { + Subsection.Subsection = + std::make_shared(); + } else if (IO.mapTag("!CrossModuleImports")) { + Subsection.Subsection = + std::make_shared(); + } else if (IO.mapTag("!Symbols")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!StringTable")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!FrameData")) { + Subsection.Subsection = std::make_shared(); + } else if (IO.mapTag("!COFFSymbolRVAs")) { + Subsection.Subsection = std::make_shared(); + } else { + llvm_unreachable("Unexpected subsection tag!"); + } + } + Subsection.Subsection->map(IO); +} + +std::shared_ptr YAMLChecksumsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + auto Result = std::make_shared(*SC.strings()); + for (const auto &CS : Checksums) { + Result->addChecksum(CS.FileName, CS.Kind, CS.ChecksumBytes.Bytes); + } + return Result; +} + +std::shared_ptr YAMLLinesSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings() && SC.hasChecksums()); + auto Result = + std::make_shared(*SC.checksums(), *SC.strings()); + Result->setCodeSize(Lines.CodeSize); + Result->setRelocationAddress(Lines.RelocSegment, Lines.RelocOffset); + Result->setFlags(Lines.Flags); + for (const auto &LC : Lines.Blocks) { + Result->createBlock(LC.FileName); + if (Result->hasColumnInfo()) { + for (const auto &Item : zip(LC.Lines, LC.Columns)) { + auto &L = std::get<0>(Item); + auto &C = std::get<1>(Item); + uint32_t LE = L.LineStart + L.EndDelta; + Result->addLineAndColumnInfo(L.Offset, + LineInfo(L.LineStart, LE, L.IsStatement), + C.StartColumn, C.EndColumn); + } + } else { + for (const auto &L : LC.Lines) { + uint32_t LE = L.LineStart + L.EndDelta; + Result->addLineInfo(L.Offset, LineInfo(L.LineStart, LE, L.IsStatement)); + } + } + } + return Result; +} + +std::shared_ptr +YAMLInlineeLinesSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasChecksums()); + auto Result = std::make_shared( + *SC.checksums(), InlineeLines.HasExtraFiles); + + for (const auto &Site : InlineeLines.Sites) { + Result->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName, + Site.SourceLineNum); + if (!InlineeLines.HasExtraFiles) + continue; + + for (auto EF : Site.ExtraFiles) { + Result->addExtraFile(EF); + } + } + return Result; +} + +std::shared_ptr +YAMLCrossModuleExportsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &M : Exports) + Result->addMapping(M.Local, M.Global); + return Result; +} + +std::shared_ptr +YAMLCrossModuleImportsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + + auto Result = + std::make_shared(*SC.strings()); + for (const auto &M : Imports) { + for (const auto Id : M.ImportIds) + Result->addImport(M.ModuleName, Id); + } + return Result; +} + +std::shared_ptr YAMLSymbolsSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &Sym : Symbols) + Result->addSymbol( + Sym.toCodeViewSymbol(Allocator, CodeViewContainer::ObjectFile)); + return Result; +} + +std::shared_ptr +YAMLStringTableSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &Str : this->Strings) + Result->insert(Str); + return Result; +} + +std::shared_ptr YAMLFrameDataSubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + assert(SC.hasStrings()); + + auto Result = std::make_shared(); + for (const auto &YF : Frames) { + codeview::FrameData F; + F.CodeSize = YF.CodeSize; + F.Flags = YF.Flags; + F.LocalSize = YF.LocalSize; + F.MaxStackSize = YF.MaxStackSize; + F.ParamsSize = YF.ParamsSize; + F.PrologSize = YF.PrologSize; + F.RvaStart = YF.RvaStart; + F.SavedRegsSize = YF.SavedRegsSize; + F.FrameFunc = SC.strings()->insert(YF.FrameFunc); + Result->addFrameData(F); + } + return Result; +} + +std::shared_ptr +YAMLCoffSymbolRVASubsection::toCodeViewSubsection( + BumpPtrAllocator &Allocator, + const codeview::StringsAndChecksums &SC) const { + auto Result = std::make_shared(); + for (const auto &RVA : RVAs) + Result->addRVA(RVA); + return Result; +} + +static Expected +convertOneChecksum(const DebugStringTableSubsectionRef &Strings, + const FileChecksumEntry &CS) { + auto ExpectedString = Strings.getString(CS.FileNameOffset); + if (!ExpectedString) + return ExpectedString.takeError(); + + SourceFileChecksumEntry Result; + Result.ChecksumBytes.Bytes = CS.Checksum; + Result.Kind = CS.Kind; + Result.FileName = *ExpectedString; + return Result; +} + +static Expected +getFileName(const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) { + auto Iter = Checksums.getArray().at(FileID); + if (Iter == Checksums.getArray().end()) + return make_error(cv_error_code::no_records); + uint32_t Offset = Iter->FileNameOffset; + return Strings.getString(Offset); +} + +Expected> +YAMLChecksumsSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &FC) { + auto Result = std::make_shared(); + + for (const auto &CS : FC) { + auto ConvertedCS = convertOneChecksum(Strings, CS); + if (!ConvertedCS) + return ConvertedCS.takeError(); + Result->Checksums.push_back(*ConvertedCS); + } + return Result; +} + +Expected> +YAMLLinesSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugLinesSubsectionRef &Lines) { + auto Result = std::make_shared(); + Result->Lines.CodeSize = Lines.header()->CodeSize; + Result->Lines.RelocOffset = Lines.header()->RelocOffset; + Result->Lines.RelocSegment = Lines.header()->RelocSegment; + Result->Lines.Flags = static_cast(uint16_t(Lines.header()->Flags)); + for (const auto &L : Lines) { + SourceLineBlock Block; + auto EF = getFileName(Strings, Checksums, L.NameIndex); + if (!EF) + return EF.takeError(); + Block.FileName = *EF; + if (Lines.hasColumnInfo()) { + for (const auto &C : L.Columns) { + SourceColumnEntry SCE; + SCE.EndColumn = C.EndColumn; + SCE.StartColumn = C.StartColumn; + Block.Columns.push_back(SCE); + } + } + for (const auto &LN : L.LineNumbers) { + SourceLineEntry SLE; + LineInfo LI(LN.Flags); + SLE.Offset = LN.Offset; + SLE.LineStart = LI.getStartLine(); + SLE.EndDelta = LI.getLineDelta(); + SLE.IsStatement = LI.isStatement(); + Block.Lines.push_back(SLE); + } + Result->Lines.Blocks.push_back(Block); + } + return Result; +} + +Expected> +YAMLInlineeLinesSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugChecksumsSubsectionRef &Checksums, + const DebugInlineeLinesSubsectionRef &Lines) { + auto Result = std::make_shared(); + + Result->InlineeLines.HasExtraFiles = Lines.hasExtraFiles(); + for (const auto &IL : Lines) { + InlineeSite Site; + auto ExpF = getFileName(Strings, Checksums, IL.Header->FileID); + if (!ExpF) + return ExpF.takeError(); + Site.FileName = *ExpF; + Site.Inlinee = IL.Header->Inlinee.getIndex(); + Site.SourceLineNum = IL.Header->SourceLineNum; + if (Lines.hasExtraFiles()) { + for (const auto EF : IL.ExtraFiles) { + auto ExpF2 = getFileName(Strings, Checksums, EF); + if (!ExpF2) + return ExpF2.takeError(); + Site.ExtraFiles.push_back(*ExpF2); + } + } + Result->InlineeLines.Sites.push_back(Site); + } + return Result; +} + +Expected> +YAMLCrossModuleExportsSubsection::fromCodeViewSubsection( + const DebugCrossModuleExportsSubsectionRef &Exports) { + auto Result = std::make_shared(); + Result->Exports.assign(Exports.begin(), Exports.end()); + return Result; +} + +Expected> +YAMLCrossModuleImportsSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugCrossModuleImportsSubsectionRef &Imports) { + auto Result = std::make_shared(); + for (const auto &CMI : Imports) { + YAMLCrossModuleImport YCMI; + auto ExpectedStr = Strings.getString(CMI.Header->ModuleNameOffset); + if (!ExpectedStr) + return ExpectedStr.takeError(); + YCMI.ModuleName = *ExpectedStr; + YCMI.ImportIds.assign(CMI.Imports.begin(), CMI.Imports.end()); + Result->Imports.push_back(YCMI); + } + return Result; +} + +Expected> +YAMLSymbolsSubsection::fromCodeViewSubsection( + const DebugSymbolsSubsectionRef &Symbols) { + auto Result = std::make_shared(); + for (const auto &Sym : Symbols) { + auto S = CodeViewYAML::SymbolRecord::fromCodeViewSymbol(Sym); + if (!S) + return joinErrors(make_error( + cv_error_code::corrupt_record, + "Invalid CodeView Symbol Record in SymbolRecord " + "subsection of .debug$S while converting to YAML!"), + S.takeError()); + + Result->Symbols.push_back(*S); + } + return Result; +} + +Expected> +YAMLStringTableSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings) { + auto Result = std::make_shared(); + BinaryStreamReader Reader(Strings.getBuffer()); + StringRef S; + // First item is a single null string, skip it. + if (auto EC = Reader.readCString(S)) + return std::move(EC); + assert(S.empty()); + while (Reader.bytesRemaining() > 0) { + if (auto EC = Reader.readCString(S)) + return std::move(EC); + Result->Strings.push_back(S); + } + return Result; +} + +Expected> +YAMLFrameDataSubsection::fromCodeViewSubsection( + const DebugStringTableSubsectionRef &Strings, + const DebugFrameDataSubsectionRef &Frames) { + auto Result = std::make_shared(); + for (const auto &F : Frames) { + YAMLFrameData YF; + YF.CodeSize = F.CodeSize; + YF.Flags = F.Flags; + YF.LocalSize = F.LocalSize; + YF.MaxStackSize = F.MaxStackSize; + YF.ParamsSize = F.ParamsSize; + YF.PrologSize = F.PrologSize; + YF.RvaStart = F.RvaStart; + YF.SavedRegsSize = F.SavedRegsSize; + + auto ES = Strings.getString(F.FrameFunc); + if (!ES) + return joinErrors( + make_error( + cv_error_code::no_records, + "Could not find string for string id while mapping FrameData!"), + ES.takeError()); + YF.FrameFunc = *ES; + Result->Frames.push_back(YF); + } + return Result; +} + +Expected> +YAMLCoffSymbolRVASubsection::fromCodeViewSubsection( + const DebugSymbolRVASubsectionRef &Section) { + auto Result = std::make_shared(); + for (const auto &RVA : Section) { + Result->RVAs.push_back(RVA); + } + return Result; +} + +Expected>> +llvm::CodeViewYAML::toCodeViewSubsectionList( + BumpPtrAllocator &Allocator, ArrayRef Subsections, + const codeview::StringsAndChecksums &SC) { + std::vector> Result; + if (Subsections.empty()) + return std::move(Result); + + for (const auto &SS : Subsections) { + std::shared_ptr CVS; + CVS = SS.Subsection->toCodeViewSubsection(Allocator, SC); + assert(CVS != nullptr); + Result.push_back(std::move(CVS)); + } + return std::move(Result); +} + +namespace { +struct SubsectionConversionVisitor : public DebugSubsectionVisitor { + SubsectionConversionVisitor() {} + + Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override; + Error visitLines(DebugLinesSubsectionRef &Lines, + const StringsAndChecksumsRef &State) override; + Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) override; + Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) override; + Error visitCrossModuleExports(DebugCrossModuleExportsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) override; + Error visitCrossModuleImports(DebugCrossModuleImportsSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) override; + Error visitStringTable(DebugStringTableSubsectionRef &ST, + const StringsAndChecksumsRef &State) override; + Error visitSymbols(DebugSymbolsSubsectionRef &Symbols, + const StringsAndChecksumsRef &State) override; + Error visitFrameData(DebugFrameDataSubsectionRef &Symbols, + const StringsAndChecksumsRef &State) override; + Error visitCOFFSymbolRVAs(DebugSymbolRVASubsectionRef &Symbols, + const StringsAndChecksumsRef &State) override; + + YAMLDebugSubsection Subsection; +}; + +Error SubsectionConversionVisitor::visitUnknown( + DebugUnknownSubsectionRef &Unknown) { + return make_error(cv_error_code::operation_unsupported); +} + +Error SubsectionConversionVisitor::visitLines( + DebugLinesSubsectionRef &Lines, const StringsAndChecksumsRef &State) { + auto Result = YAMLLinesSubsection::fromCodeViewSubsection( + State.strings(), State.checksums(), Lines); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitFileChecksums( + DebugChecksumsSubsectionRef &Checksums, + const StringsAndChecksumsRef &State) { + auto Result = YAMLChecksumsSubsection::fromCodeViewSubsection(State.strings(), + Checksums); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitInlineeLines( + DebugInlineeLinesSubsectionRef &Inlinees, + const StringsAndChecksumsRef &State) { + auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection( + State.strings(), State.checksums(), Inlinees); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCrossModuleExports( + DebugCrossModuleExportsSubsectionRef &Exports, + const StringsAndChecksumsRef &State) { + auto Result = + YAMLCrossModuleExportsSubsection::fromCodeViewSubsection(Exports); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCrossModuleImports( + DebugCrossModuleImportsSubsectionRef &Imports, + const StringsAndChecksumsRef &State) { + auto Result = YAMLCrossModuleImportsSubsection::fromCodeViewSubsection( + State.strings(), Imports); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitStringTable( + DebugStringTableSubsectionRef &Strings, + const StringsAndChecksumsRef &State) { + auto Result = YAMLStringTableSubsection::fromCodeViewSubsection(Strings); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitSymbols( + DebugSymbolsSubsectionRef &Symbols, const StringsAndChecksumsRef &State) { + auto Result = YAMLSymbolsSubsection::fromCodeViewSubsection(Symbols); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitFrameData( + DebugFrameDataSubsectionRef &Frames, const StringsAndChecksumsRef &State) { + auto Result = + YAMLFrameDataSubsection::fromCodeViewSubsection(State.strings(), Frames); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} + +Error SubsectionConversionVisitor::visitCOFFSymbolRVAs( + DebugSymbolRVASubsectionRef &RVAs, const StringsAndChecksumsRef &State) { + auto Result = YAMLCoffSymbolRVASubsection::fromCodeViewSubsection(RVAs); + if (!Result) + return Result.takeError(); + Subsection.Subsection = *Result; + return Error::success(); +} +} + +Expected +YAMLDebugSubsection::fromCodeViewSubection(const StringsAndChecksumsRef &SC, + const DebugSubsectionRecord &SS) { + SubsectionConversionVisitor V; + if (auto EC = visitDebugSubsection(SS, V, SC)) + return std::move(EC); + + return V.Subsection; +} + +std::vector +llvm::CodeViewYAML::fromDebugS(ArrayRef Data, + const StringsAndChecksumsRef &SC) { + BinaryStreamReader Reader(Data, support::little); + uint32_t Magic; + + ExitOnError Err("Invalid .debug$S section!"); + Err(Reader.readInteger(Magic)); + assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$S section!"); + + DebugSubsectionArray Subsections; + Err(Reader.readArray(Subsections, Reader.bytesRemaining())); + + std::vector Result; + + for (const auto &SS : Subsections) { + auto YamlSS = Err(YAMLDebugSubsection::fromCodeViewSubection(SC, SS)); + Result.push_back(YamlSS); + } + return Result; +} + +void llvm::CodeViewYAML::initializeStringsAndChecksums( + ArrayRef Sections, codeview::StringsAndChecksums &SC) { + // String Table and Checksums subsections don't use the allocator. + BumpPtrAllocator Allocator; + + // It's possible for checksums and strings to even appear in different debug$S + // sections, so we have to make this a stateful function that can build up + // the strings and checksums field over multiple iterations. + + // File Checksums require the string table, but may become before it, so we + // have to scan for strings first, then scan for checksums again from the + // beginning. + if (!SC.hasStrings()) { + for (const auto &SS : Sections) { + if (SS.Subsection->Kind != DebugSubsectionKind::StringTable) + continue; + + auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC); + SC.setStrings( + std::static_pointer_cast(Result)); + break; + } + } + + if (SC.hasStrings() && !SC.hasChecksums()) { + for (const auto &SS : Sections) { + if (SS.Subsection->Kind != DebugSubsectionKind::FileChecksums) + continue; + + auto Result = SS.Subsection->toCodeViewSubsection(Allocator, SC); + SC.setChecksums( + std::static_pointer_cast(Result)); + break; + } + } +} diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ba3a2abe2097821585d23bb8373e0d2ab7fa5889 --- /dev/null +++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -0,0 +1,563 @@ +//===- CodeViewYAMLSymbols.cpp - CodeView YAMLIO Symbol implementation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/CodeViewYAMLSymbols.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolSerializer.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::CodeViewYAML; +using namespace llvm::CodeViewYAML::detail; +using namespace llvm::yaml; + +LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex) + +// We only need to declare these, the definitions are in CodeViewYAMLTypes.cpp +LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false) +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false) + +LLVM_YAML_DECLARE_ENUM_TRAITS(SymbolKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(FrameCookieKind) + +LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym2Flags) +LLVM_YAML_DECLARE_BITSET_TRAITS(CompileSym3Flags) +LLVM_YAML_DECLARE_BITSET_TRAITS(ExportFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(LocalSymFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(ProcSymFlags) +LLVM_YAML_DECLARE_BITSET_TRAITS(FrameProcedureOptions) +LLVM_YAML_DECLARE_ENUM_TRAITS(CPUType) +LLVM_YAML_DECLARE_ENUM_TRAITS(RegisterId) +LLVM_YAML_DECLARE_ENUM_TRAITS(TrampolineType) +LLVM_YAML_DECLARE_ENUM_TRAITS(ThunkOrdinal) + +LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, TypeName) + +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeName, true) + +StringRef ScalarTraits::input(StringRef S, void *V, TypeName &T) { + return ScalarTraits::input(S, V, T.value); +} +void ScalarTraits::output(const TypeName &T, void *V, + llvm::raw_ostream &R) { + ScalarTraits::output(T.value, V, R); +} + +void ScalarEnumerationTraits::enumeration(IO &io, + SymbolKind &Value) { + auto SymbolNames = getSymbolTypeNames(); + for (const auto &E : SymbolNames) + io.enumCase(Value, E.Name.str().c_str(), E.Value); +} + +void ScalarBitSetTraits::bitset(IO &io, + CompileSym2Flags &Flags) { + auto FlagNames = getCompileSym2FlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, + CompileSym3Flags &Flags) { + auto FlagNames = getCompileSym3FlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, ExportFlags &Flags) { + auto FlagNames = getExportSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, LocalSymFlags &Flags) { + auto FlagNames = getLocalFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset(IO &io, ProcSymFlags &Flags) { + auto FlagNames = getProcSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarBitSetTraits::bitset( + IO &io, FrameProcedureOptions &Flags) { + auto FlagNames = getFrameProcSymFlagNames(); + for (const auto &E : FlagNames) { + io.bitSetCase(Flags, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration(IO &io, CPUType &Cpu) { + auto CpuNames = getCPUTypeNames(); + for (const auto &E : CpuNames) { + io.enumCase(Cpu, E.Name.str().c_str(), static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration(IO &io, RegisterId &Reg) { + auto RegNames = getRegisterNames(); + for (const auto &E : RegNames) { + io.enumCase(Reg, E.Name.str().c_str(), static_cast(E.Value)); + } + io.enumFallback(Reg); +} + +void ScalarEnumerationTraits::enumeration( + IO &io, TrampolineType &Tramp) { + auto TrampNames = getTrampolineNames(); + for (const auto &E : TrampNames) { + io.enumCase(Tramp, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration(IO &io, + ThunkOrdinal &Ord) { + auto ThunkNames = getThunkOrdinalNames(); + for (const auto &E : ThunkNames) { + io.enumCase(Ord, E.Name.str().c_str(), static_cast(E.Value)); + } +} + +void ScalarEnumerationTraits::enumeration( + IO &io, FrameCookieKind &FC) { + auto ThunkNames = getFrameCookieKindNames(); + for (const auto &E : ThunkNames) { + io.enumCase(FC, E.Name.str().c_str(), + static_cast(E.Value)); + } +} + +namespace llvm { +namespace CodeViewYAML { +namespace detail { + +struct SymbolRecordBase { + codeview::SymbolKind Kind; + explicit SymbolRecordBase(codeview::SymbolKind K) : Kind(K) {} + + virtual ~SymbolRecordBase() {} + virtual void map(yaml::IO &io) = 0; + virtual codeview::CVSymbol + toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const = 0; + virtual Error fromCodeViewSymbol(codeview::CVSymbol Type) = 0; +}; + +template struct SymbolRecordImpl : public SymbolRecordBase { + explicit SymbolRecordImpl(codeview::SymbolKind K) + : SymbolRecordBase(K), Symbol(static_cast(K)) {} + + void map(yaml::IO &io) override; + + codeview::CVSymbol + toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const override { + return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Container); + } + Error fromCodeViewSymbol(codeview::CVSymbol CVS) override { + return SymbolDeserializer::deserializeAs(CVS, Symbol); + } + + mutable T Symbol; +}; + +struct UnknownSymbolRecord : public SymbolRecordBase { + explicit UnknownSymbolRecord(codeview::SymbolKind K) : SymbolRecordBase(K) {} + + void map(yaml::IO &io) override; + + CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator, + CodeViewContainer Container) const override { + RecordPrefix Prefix; + uint32_t TotalLen = sizeof(RecordPrefix) + Data.size(); + Prefix.RecordKind = Kind; + Prefix.RecordLen = TotalLen - 2; + uint8_t *Buffer = Allocator.Allocate(TotalLen); + ::memcpy(Buffer, &Prefix, sizeof(RecordPrefix)); + ::memcpy(Buffer + sizeof(RecordPrefix), Data.data(), Data.size()); + return CVSymbol(Kind, ArrayRef(Buffer, TotalLen)); + } + Error fromCodeViewSymbol(CVSymbol CVS) override { + this->Kind = CVS.kind(); + Data = CVS.RecordData.drop_front(sizeof(RecordPrefix)); + return Error::success(); + } + + std::vector Data; +}; + +template <> void SymbolRecordImpl::map(IO &IO) {} + +void UnknownSymbolRecord::map(yaml::IO &io) { + yaml::BinaryRef Binary; + if (io.outputting()) + Binary = yaml::BinaryRef(Data); + io.mapRequired("Data", Binary); + if (!io.outputting()) { + std::string Str; + raw_string_ostream OS(Str); + Binary.writeAsBinary(OS); + OS.flush(); + Data.assign(Str.begin(), Str.end()); + } +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Parent", Symbol.Parent); + IO.mapRequired("End", Symbol.End); + IO.mapRequired("Next", Symbol.Next); + IO.mapRequired("Off", Symbol.Offset); + IO.mapRequired("Seg", Symbol.Segment); + IO.mapRequired("Len", Symbol.Length); + IO.mapRequired("Ordinal", Symbol.Thunk); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Size", Symbol.Size); + IO.mapRequired("ThunkOff", Symbol.ThunkOffset); + IO.mapRequired("TargetOff", Symbol.TargetOffset); + IO.mapRequired("ThunkSection", Symbol.ThunkSection); + IO.mapRequired("TargetSection", Symbol.TargetSection); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("SectionNumber", Symbol.SectionNumber); + IO.mapRequired("Alignment", Symbol.Alignment); + IO.mapRequired("Rva", Symbol.Rva); + IO.mapRequired("Length", Symbol.Length); + IO.mapRequired("Characteristics", Symbol.Characteristics); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Size", Symbol.Size); + IO.mapRequired("Characteristics", Symbol.Characteristics); + IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Segment", Symbol.Segment); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Ordinal", Symbol.Ordinal); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the linkage name + + IO.mapRequired("PtrParent", Symbol.Parent); + IO.mapRequired("PtrEnd", Symbol.End); + IO.mapRequired("PtrNext", Symbol.Next); + IO.mapRequired("CodeSize", Symbol.CodeSize); + IO.mapRequired("DbgStart", Symbol.DbgStart); + IO.mapRequired("DbgEnd", Symbol.DbgEnd); + IO.mapRequired("FunctionType", Symbol.FunctionType); + IO.mapRequired("Segment", Symbol.Segment); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("DisplayName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Index); + IO.mapRequired("Seg", Symbol.Register); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Index); + IO.mapRequired("Seg", Symbol.Segment); + IO.mapRequired("Off", Symbol.Offset); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("SumName", Symbol.SumName); + IO.mapRequired("SymOffset", Symbol.SymOffset); + IO.mapRequired("Mod", Symbol.Module); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Entries", Symbol.Fields); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("PtrParent", Symbol.Parent); + IO.mapRequired("PtrEnd", Symbol.End); + IO.mapRequired("Inlinee", Symbol.Inlinee); + // TODO: The binary annotations +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Flags", Symbol.Flags); + + IO.mapRequired("VarName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> +void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the subfields +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the linkage name + IO.mapRequired("PtrParent", Symbol.Parent); + IO.mapRequired("PtrEnd", Symbol.End); + IO.mapRequired("CodeSize", Symbol.CodeSize); + IO.mapRequired("Segment", Symbol.Segment); + IO.mapRequired("BlockName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Print the linkage name + IO.mapRequired("Segment", Symbol.Segment); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("DisplayName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Signature", Symbol.Signature); + IO.mapRequired("ObjectName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Machine", Symbol.Machine); + IO.mapRequired("FrontendMajor", Symbol.VersionFrontendMajor); + IO.mapRequired("FrontendMinor", Symbol.VersionFrontendMinor); + IO.mapRequired("FrontendBuild", Symbol.VersionFrontendBuild); + IO.mapRequired("BackendMajor", Symbol.VersionBackendMajor); + IO.mapRequired("BackendMinor", Symbol.VersionBackendMinor); + IO.mapRequired("BackendBuild", Symbol.VersionBackendBuild); + IO.mapRequired("Version", Symbol.Version); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Machine", Symbol.Machine); + IO.mapRequired("FrontendMajor", Symbol.VersionFrontendMajor); + IO.mapRequired("FrontendMinor", Symbol.VersionFrontendMinor); + IO.mapRequired("FrontendBuild", Symbol.VersionFrontendBuild); + IO.mapRequired("FrontendQFE", Symbol.VersionFrontendQFE); + IO.mapRequired("BackendMajor", Symbol.VersionBackendMajor); + IO.mapRequired("BackendMinor", Symbol.VersionBackendMinor); + IO.mapRequired("BackendBuild", Symbol.VersionBackendBuild); + IO.mapRequired("BackendQFE", Symbol.VersionBackendQFE); + IO.mapRequired("Version", Symbol.Version); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("TotalFrameBytes", Symbol.TotalFrameBytes); + IO.mapRequired("PaddingFrameBytes", Symbol.PaddingFrameBytes); + IO.mapRequired("OffsetToPadding", Symbol.OffsetToPadding); + IO.mapRequired("BytesOfCalleeSavedRegisters", + Symbol.BytesOfCalleeSavedRegisters); + IO.mapRequired("OffsetOfExceptionHandler", Symbol.OffsetOfExceptionHandler); + IO.mapRequired("SectionIdOfExceptionHandler", + Symbol.SectionIdOfExceptionHandler); + IO.mapRequired("Flags", Symbol.Flags); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Map Linkage Name + IO.mapRequired("Segment", Symbol.Segment); + IO.mapRequired("Type", Symbol.Type); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Index", Symbol.Index); + IO.mapRequired("ModFilenameOffset", Symbol.ModFilenameOffset); + IO.mapRequired("Flags", Symbol.Flags); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Map Linkage Name + IO.mapRequired("Segment", Symbol.Segment); + IO.mapRequired("CallInstructionSize", Symbol.CallInstructionSize); + IO.mapRequired("Type", Symbol.Type); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Map Linkage Name + IO.mapRequired("Register", Symbol.Register); + IO.mapRequired("CookieKind", Symbol.CookieKind); + IO.mapRequired("Flags", Symbol.Flags); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("FuncID", Symbol.Indices); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("UDTName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("BuildId", Symbol.BuildId); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("VarName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Offset", Symbol.Offset); + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Register", Symbol.Register); + IO.mapRequired("VarName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("Value", Symbol.Value); + IO.mapRequired("Name", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Map linkage name + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("DisplayName", Symbol.Name); +} + +template <> void SymbolRecordImpl::map(IO &IO) { + // TODO: Map linkage name + IO.mapRequired("Type", Symbol.Type); + IO.mapRequired("DisplayName", Symbol.Name); +} +} +} +} + +CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol( + BumpPtrAllocator &Allocator, CodeViewContainer Container) const { + return Symbol->toCodeViewSymbol(Allocator, Container); +} + +namespace llvm { +namespace yaml { +template <> struct MappingTraits { + static void mapping(IO &io, SymbolRecordBase &Record) { Record.map(io); } +}; +} +} + +template +static inline Expected +fromCodeViewSymbolImpl(CVSymbol Symbol) { + CodeViewYAML::SymbolRecord Result; + + auto Impl = std::make_shared(Symbol.kind()); + if (auto EC = Impl->fromCodeViewSymbol(Symbol)) + return std::move(EC); + Result.Symbol = Impl; + return Result; +} + +Expected +CodeViewYAML::SymbolRecord::fromCodeViewSymbol(CVSymbol Symbol) { +#define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + return fromCodeViewSymbolImpl>(Symbol); +#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + SYMBOL_RECORD(EnumName, EnumVal, ClassName) + switch (Symbol.kind()) { +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + return fromCodeViewSymbolImpl(Symbol); + } + return make_error(cv_error_code::corrupt_record); +} + +template +static void mapSymbolRecordImpl(IO &IO, const char *Class, SymbolKind Kind, + CodeViewYAML::SymbolRecord &Obj) { + if (!IO.outputting()) + Obj.Symbol = std::make_shared(Kind); + + IO.mapRequired(Class, *Obj.Symbol); +} + +void MappingTraits::mapping( + IO &IO, CodeViewYAML::SymbolRecord &Obj) { + SymbolKind Kind; + if (IO.outputting()) + Kind = Obj.Symbol->Kind; + IO.mapRequired("Kind", Kind); + +#define SYMBOL_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + mapSymbolRecordImpl>(IO, #ClassName, Kind, \ + Obj); \ + break; +#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + SYMBOL_RECORD(EnumName, EnumVal, ClassName) + switch (Kind) { +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + mapSymbolRecordImpl(IO, "UnknownSym", Kind, Obj); + } +} diff --git a/lib/ObjectYAML/CodeViewYAMLTypes.cpp b/lib/ObjectYAML/CodeViewYAMLTypes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a03b9cd50faa222c9118eb19684ec7cacaf6e4b9 --- /dev/null +++ b/lib/ObjectYAML/CodeViewYAMLTypes.cpp @@ -0,0 +1,756 @@ +//===- CodeViewYAMLTypes.cpp - CodeView YAMLIO types implementation -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of CodeView +// Debug Info. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ObjectYAML/CodeViewYAMLTypes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" +#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::CodeViewYAML; +using namespace llvm::CodeViewYAML::detail; +using namespace llvm::yaml; + +LLVM_YAML_IS_SEQUENCE_VECTOR(OneMethodRecord) +LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_SEQUENCE_VECTOR(VFTableSlotKind) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex) + +LLVM_YAML_DECLARE_SCALAR_TRAITS(TypeIndex, false) +LLVM_YAML_DECLARE_SCALAR_TRAITS(APSInt, false) + +LLVM_YAML_DECLARE_ENUM_TRAITS(TypeLeafKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(PointerToMemberRepresentation) +LLVM_YAML_DECLARE_ENUM_TRAITS(VFTableSlotKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(CallingConvention) +LLVM_YAML_DECLARE_ENUM_TRAITS(PointerKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(PointerMode) +LLVM_YAML_DECLARE_ENUM_TRAITS(HfaKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(MemberAccess) +LLVM_YAML_DECLARE_ENUM_TRAITS(MethodKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(WindowsRTClassKind) +LLVM_YAML_DECLARE_ENUM_TRAITS(LabelType) + +LLVM_YAML_DECLARE_BITSET_TRAITS(PointerOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(ModifierOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(FunctionOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(ClassOptions) +LLVM_YAML_DECLARE_BITSET_TRAITS(MethodOptions) + +LLVM_YAML_DECLARE_MAPPING_TRAITS(OneMethodRecord) +LLVM_YAML_DECLARE_MAPPING_TRAITS(MemberPointerInfo) + +namespace llvm { +namespace CodeViewYAML { +namespace detail { + +struct LeafRecordBase { + TypeLeafKind Kind; + explicit LeafRecordBase(TypeLeafKind K) : Kind(K) {} + + virtual ~LeafRecordBase() {} + virtual void map(yaml::IO &io) = 0; + virtual CVType toCodeViewRecord(TypeTableBuilder &TTB) const = 0; + virtual Error fromCodeViewRecord(CVType Type) = 0; +}; + +template struct LeafRecordImpl : public LeafRecordBase { + explicit LeafRecordImpl(TypeLeafKind K) + : LeafRecordBase(K), Record(static_cast(K)) {} + + void map(yaml::IO &io) override; + + Error fromCodeViewRecord(CVType Type) override { + return TypeDeserializer::deserializeAs(Type, Record); + } + + CVType toCodeViewRecord(TypeTableBuilder &TTB) const override { + TTB.writeKnownType(Record); + return CVType(Kind, TTB.records().back()); + } + + mutable T Record; +}; + +template <> struct LeafRecordImpl : public LeafRecordBase { + explicit LeafRecordImpl(TypeLeafKind K) : LeafRecordBase(K) {} + + void map(yaml::IO &io) override; + CVType toCodeViewRecord(TypeTableBuilder &TTB) const override; + Error fromCodeViewRecord(CVType Type) override; + + std::vector Members; +}; + +struct MemberRecordBase { + TypeLeafKind Kind; + explicit MemberRecordBase(TypeLeafKind K) : Kind(K) {} + + virtual ~MemberRecordBase() {} + virtual void map(yaml::IO &io) = 0; + virtual void writeTo(FieldListRecordBuilder &FLRB) = 0; +}; + +template struct MemberRecordImpl : public MemberRecordBase { + explicit MemberRecordImpl(TypeLeafKind K) + : MemberRecordBase(K), Record(static_cast(K)) {} + void map(yaml::IO &io) override; + + void writeTo(FieldListRecordBuilder &FLRB) override { + FLRB.writeMemberType(Record); + } + + mutable T Record; +}; +} +} +} + +void ScalarTraits::output(const TypeIndex &S, void *, + llvm::raw_ostream &OS) { + OS << S.getIndex(); +} + +StringRef ScalarTraits::input(StringRef Scalar, void *Ctx, + TypeIndex &S) { + uint32_t I; + StringRef Result = ScalarTraits::input(Scalar, Ctx, I); + S.setIndex(I); + return Result; +} + +void ScalarTraits::output(const APSInt &S, void *, + llvm::raw_ostream &OS) { + S.print(OS, true); +} + +StringRef ScalarTraits::input(StringRef Scalar, void *Ctx, APSInt &S) { + S = APSInt(Scalar); + return ""; +} + +void ScalarEnumerationTraits::enumeration(IO &io, + TypeLeafKind &Value) { +#define CV_TYPE(name, val) io.enumCase(Value, #name, name); +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +#undef CV_TYPE +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, PointerToMemberRepresentation &Value) { + IO.enumCase(Value, "Unknown", PointerToMemberRepresentation::Unknown); + IO.enumCase(Value, "SingleInheritanceData", + PointerToMemberRepresentation::SingleInheritanceData); + IO.enumCase(Value, "MultipleInheritanceData", + PointerToMemberRepresentation::MultipleInheritanceData); + IO.enumCase(Value, "VirtualInheritanceData", + PointerToMemberRepresentation::VirtualInheritanceData); + IO.enumCase(Value, "GeneralData", PointerToMemberRepresentation::GeneralData); + IO.enumCase(Value, "SingleInheritanceFunction", + PointerToMemberRepresentation::SingleInheritanceFunction); + IO.enumCase(Value, "MultipleInheritanceFunction", + PointerToMemberRepresentation::MultipleInheritanceFunction); + IO.enumCase(Value, "VirtualInheritanceFunction", + PointerToMemberRepresentation::VirtualInheritanceFunction); + IO.enumCase(Value, "GeneralFunction", + PointerToMemberRepresentation::GeneralFunction); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, VFTableSlotKind &Kind) { + IO.enumCase(Kind, "Near16", VFTableSlotKind::Near16); + IO.enumCase(Kind, "Far16", VFTableSlotKind::Far16); + IO.enumCase(Kind, "This", VFTableSlotKind::This); + IO.enumCase(Kind, "Outer", VFTableSlotKind::Outer); + IO.enumCase(Kind, "Meta", VFTableSlotKind::Meta); + IO.enumCase(Kind, "Near", VFTableSlotKind::Near); + IO.enumCase(Kind, "Far", VFTableSlotKind::Far); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, CallingConvention &Value) { + IO.enumCase(Value, "NearC", CallingConvention::NearC); + IO.enumCase(Value, "FarC", CallingConvention::FarC); + IO.enumCase(Value, "NearPascal", CallingConvention::NearPascal); + IO.enumCase(Value, "FarPascal", CallingConvention::FarPascal); + IO.enumCase(Value, "NearFast", CallingConvention::NearFast); + IO.enumCase(Value, "FarFast", CallingConvention::FarFast); + IO.enumCase(Value, "NearStdCall", CallingConvention::NearStdCall); + IO.enumCase(Value, "FarStdCall", CallingConvention::FarStdCall); + IO.enumCase(Value, "NearSysCall", CallingConvention::NearSysCall); + IO.enumCase(Value, "FarSysCall", CallingConvention::FarSysCall); + IO.enumCase(Value, "ThisCall", CallingConvention::ThisCall); + IO.enumCase(Value, "MipsCall", CallingConvention::MipsCall); + IO.enumCase(Value, "Generic", CallingConvention::Generic); + IO.enumCase(Value, "AlphaCall", CallingConvention::AlphaCall); + IO.enumCase(Value, "PpcCall", CallingConvention::PpcCall); + IO.enumCase(Value, "SHCall", CallingConvention::SHCall); + IO.enumCase(Value, "ArmCall", CallingConvention::ArmCall); + IO.enumCase(Value, "AM33Call", CallingConvention::AM33Call); + IO.enumCase(Value, "TriCall", CallingConvention::TriCall); + IO.enumCase(Value, "SH5Call", CallingConvention::SH5Call); + IO.enumCase(Value, "M32RCall", CallingConvention::M32RCall); + IO.enumCase(Value, "ClrCall", CallingConvention::ClrCall); + IO.enumCase(Value, "Inline", CallingConvention::Inline); + IO.enumCase(Value, "NearVector", CallingConvention::NearVector); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + PointerKind &Kind) { + IO.enumCase(Kind, "Near16", PointerKind::Near16); + IO.enumCase(Kind, "Far16", PointerKind::Far16); + IO.enumCase(Kind, "Huge16", PointerKind::Huge16); + IO.enumCase(Kind, "BasedOnSegment", PointerKind::BasedOnSegment); + IO.enumCase(Kind, "BasedOnValue", PointerKind::BasedOnValue); + IO.enumCase(Kind, "BasedOnSegmentValue", PointerKind::BasedOnSegmentValue); + IO.enumCase(Kind, "BasedOnAddress", PointerKind::BasedOnAddress); + IO.enumCase(Kind, "BasedOnSegmentAddress", + PointerKind::BasedOnSegmentAddress); + IO.enumCase(Kind, "BasedOnType", PointerKind::BasedOnType); + IO.enumCase(Kind, "BasedOnSelf", PointerKind::BasedOnSelf); + IO.enumCase(Kind, "Near32", PointerKind::Near32); + IO.enumCase(Kind, "Far32", PointerKind::Far32); + IO.enumCase(Kind, "Near64", PointerKind::Near64); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + PointerMode &Mode) { + IO.enumCase(Mode, "Pointer", PointerMode::Pointer); + IO.enumCase(Mode, "LValueReference", PointerMode::LValueReference); + IO.enumCase(Mode, "PointerToDataMember", PointerMode::PointerToDataMember); + IO.enumCase(Mode, "PointerToMemberFunction", + PointerMode::PointerToMemberFunction); + IO.enumCase(Mode, "RValueReference", PointerMode::RValueReference); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, HfaKind &Value) { + IO.enumCase(Value, "None", HfaKind::None); + IO.enumCase(Value, "Float", HfaKind::Float); + IO.enumCase(Value, "Double", HfaKind::Double); + IO.enumCase(Value, "Other", HfaKind::Other); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + MemberAccess &Access) { + IO.enumCase(Access, "None", MemberAccess::None); + IO.enumCase(Access, "Private", MemberAccess::Private); + IO.enumCase(Access, "Protected", MemberAccess::Protected); + IO.enumCase(Access, "Public", MemberAccess::Public); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, + MethodKind &Kind) { + IO.enumCase(Kind, "Vanilla", MethodKind::Vanilla); + IO.enumCase(Kind, "Virtual", MethodKind::Virtual); + IO.enumCase(Kind, "Static", MethodKind::Static); + IO.enumCase(Kind, "Friend", MethodKind::Friend); + IO.enumCase(Kind, "IntroducingVirtual", MethodKind::IntroducingVirtual); + IO.enumCase(Kind, "PureVirtual", MethodKind::PureVirtual); + IO.enumCase(Kind, "PureIntroducingVirtual", + MethodKind::PureIntroducingVirtual); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, WindowsRTClassKind &Value) { + IO.enumCase(Value, "None", WindowsRTClassKind::None); + IO.enumCase(Value, "Ref", WindowsRTClassKind::RefClass); + IO.enumCase(Value, "Value", WindowsRTClassKind::ValueClass); + IO.enumCase(Value, "Interface", WindowsRTClassKind::Interface); +} + +void ScalarEnumerationTraits::enumeration(IO &IO, LabelType &Value) { + IO.enumCase(Value, "Near", LabelType::Near); + IO.enumCase(Value, "Far", LabelType::Far); +} + +void ScalarBitSetTraits::bitset(IO &IO, + PointerOptions &Options) { + IO.bitSetCase(Options, "None", PointerOptions::None); + IO.bitSetCase(Options, "Flat32", PointerOptions::Flat32); + IO.bitSetCase(Options, "Volatile", PointerOptions::Volatile); + IO.bitSetCase(Options, "Const", PointerOptions::Const); + IO.bitSetCase(Options, "Unaligned", PointerOptions::Unaligned); + IO.bitSetCase(Options, "Restrict", PointerOptions::Restrict); + IO.bitSetCase(Options, "WinRTSmartPointer", + PointerOptions::WinRTSmartPointer); +} + +void ScalarBitSetTraits::bitset(IO &IO, + ModifierOptions &Options) { + IO.bitSetCase(Options, "None", ModifierOptions::None); + IO.bitSetCase(Options, "Const", ModifierOptions::Const); + IO.bitSetCase(Options, "Volatile", ModifierOptions::Volatile); + IO.bitSetCase(Options, "Unaligned", ModifierOptions::Unaligned); +} + +void ScalarBitSetTraits::bitset(IO &IO, + FunctionOptions &Options) { + IO.bitSetCase(Options, "None", FunctionOptions::None); + IO.bitSetCase(Options, "CxxReturnUdt", FunctionOptions::CxxReturnUdt); + IO.bitSetCase(Options, "Constructor", FunctionOptions::Constructor); + IO.bitSetCase(Options, "ConstructorWithVirtualBases", + FunctionOptions::ConstructorWithVirtualBases); +} + +void ScalarBitSetTraits::bitset(IO &IO, ClassOptions &Options) { + IO.bitSetCase(Options, "None", ClassOptions::None); + IO.bitSetCase(Options, "HasConstructorOrDestructor", + ClassOptions::HasConstructorOrDestructor); + IO.bitSetCase(Options, "HasOverloadedOperator", + ClassOptions::HasOverloadedOperator); + IO.bitSetCase(Options, "Nested", ClassOptions::Nested); + IO.bitSetCase(Options, "ContainsNestedClass", + ClassOptions::ContainsNestedClass); + IO.bitSetCase(Options, "HasOverloadedAssignmentOperator", + ClassOptions::HasOverloadedAssignmentOperator); + IO.bitSetCase(Options, "HasConversionOperator", + ClassOptions::HasConversionOperator); + IO.bitSetCase(Options, "ForwardReference", ClassOptions::ForwardReference); + IO.bitSetCase(Options, "Scoped", ClassOptions::Scoped); + IO.bitSetCase(Options, "HasUniqueName", ClassOptions::HasUniqueName); + IO.bitSetCase(Options, "Sealed", ClassOptions::Sealed); + IO.bitSetCase(Options, "Intrinsic", ClassOptions::Intrinsic); +} + +void ScalarBitSetTraits::bitset(IO &IO, MethodOptions &Options) { + IO.bitSetCase(Options, "None", MethodOptions::None); + IO.bitSetCase(Options, "Pseudo", MethodOptions::Pseudo); + IO.bitSetCase(Options, "NoInherit", MethodOptions::NoInherit); + IO.bitSetCase(Options, "NoConstruct", MethodOptions::NoConstruct); + IO.bitSetCase(Options, "CompilerGenerated", MethodOptions::CompilerGenerated); + IO.bitSetCase(Options, "Sealed", MethodOptions::Sealed); +} + +void MappingTraits::mapping(IO &IO, MemberPointerInfo &MPI) { + IO.mapRequired("ContainingType", MPI.ContainingType); + IO.mapRequired("Representation", MPI.Representation); +} + +namespace llvm { +namespace CodeViewYAML { +namespace detail { +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ModifiedType", Record.ModifiedType); + IO.mapRequired("Modifiers", Record.Modifiers); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ReturnType", Record.ReturnType); + IO.mapRequired("CallConv", Record.CallConv); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("ParameterCount", Record.ParameterCount); + IO.mapRequired("ArgumentList", Record.ArgumentList); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ReturnType", Record.ReturnType); + IO.mapRequired("ClassType", Record.ClassType); + IO.mapRequired("ThisType", Record.ThisType); + IO.mapRequired("CallConv", Record.CallConv); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("ParameterCount", Record.ParameterCount); + IO.mapRequired("ArgumentList", Record.ArgumentList); + IO.mapRequired("ThisPointerAdjustment", Record.ThisPointerAdjustment); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Mode", Record.Mode); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ClassType", Record.ClassType); + IO.mapRequired("FunctionType", Record.FunctionType); + IO.mapRequired("Name", Record.Name); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ArgIndices", Record.ArgIndices); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("StringIndices", Record.StringIndices); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ReferentType", Record.ReferentType); + IO.mapRequired("Attrs", Record.Attrs); + IO.mapOptional("MemberInfo", Record.MemberInfo); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ElementType", Record.ElementType); + IO.mapRequired("IndexType", Record.IndexType); + IO.mapRequired("Size", Record.Size); + IO.mapRequired("Name", Record.Name); +} + +void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("FieldList", Members); +} +} +} +} + +namespace { +class MemberRecordConversionVisitor : public TypeVisitorCallbacks { +public: + explicit MemberRecordConversionVisitor(std::vector &Records) + : Records(Records) {} + +#define TYPE_RECORD(EnumName, EnumVal, Name) +#define MEMBER_RECORD(EnumName, EnumVal, Name) \ + Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override { \ + return visitKnownMemberImpl(Record); \ + } +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" +private: + template Error visitKnownMemberImpl(T &Record) { + TypeLeafKind K = static_cast(Record.getKind()); + auto Impl = std::make_shared>(K); + Impl->Record = Record; + Records.push_back(MemberRecord{Impl}); + return Error::success(); + } + + std::vector &Records; +}; +} + +Error LeafRecordImpl::fromCodeViewRecord(CVType Type) { + MemberRecordConversionVisitor V(Members); + return visitMemberRecordStream(Type.content(), V); +} + +CVType +LeafRecordImpl::toCodeViewRecord(TypeTableBuilder &TTB) const { + FieldListRecordBuilder FLRB(TTB); + FLRB.begin(); + for (const auto &Member : Members) { + Member.Member->writeTo(FLRB); + } + FLRB.end(true); + return CVType(Kind, TTB.records().back()); +} + +void MappingTraits::mapping(IO &io, OneMethodRecord &Record) { + io.mapRequired("Type", Record.Type); + io.mapRequired("Attrs", Record.Attrs.Attrs); + io.mapRequired("VFTableOffset", Record.VFTableOffset); + io.mapRequired("Name", Record.Name); +} + +namespace llvm { +namespace CodeViewYAML { +namespace detail { +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("MemberCount", Record.MemberCount); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("FieldList", Record.FieldList); + IO.mapRequired("Name", Record.Name); + IO.mapRequired("UniqueName", Record.UniqueName); + + IO.mapRequired("DerivationList", Record.DerivationList); + IO.mapRequired("VTableShape", Record.VTableShape); + IO.mapRequired("Size", Record.Size); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("MemberCount", Record.MemberCount); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("FieldList", Record.FieldList); + IO.mapRequired("Name", Record.Name); + IO.mapRequired("UniqueName", Record.UniqueName); + + IO.mapRequired("Size", Record.Size); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("NumEnumerators", Record.MemberCount); + IO.mapRequired("Options", Record.Options); + IO.mapRequired("FieldList", Record.FieldList); + IO.mapRequired("Name", Record.Name); + IO.mapRequired("UniqueName", Record.UniqueName); + + IO.mapRequired("UnderlyingType", Record.UnderlyingType); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Record.Type); + IO.mapRequired("BitSize", Record.BitSize); + IO.mapRequired("BitOffset", Record.BitOffset); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Slots", Record.Slots); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Guid", Record.Guid); + IO.mapRequired("Age", Record.Age); + IO.mapRequired("Name", Record.Name); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Id", Record.Id); + IO.mapRequired("String", Record.String); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ParentScope", Record.ParentScope); + IO.mapRequired("FunctionType", Record.FunctionType); + IO.mapRequired("Name", Record.Name); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("UDT", Record.UDT); + IO.mapRequired("SourceFile", Record.SourceFile); + IO.mapRequired("LineNumber", Record.LineNumber); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("UDT", Record.UDT); + IO.mapRequired("SourceFile", Record.SourceFile); + IO.mapRequired("LineNumber", Record.LineNumber); + IO.mapRequired("Module", Record.Module); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("ArgIndices", Record.ArgIndices); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("CompleteClass", Record.CompleteClass); + IO.mapRequired("OverriddenVFTable", Record.OverriddenVFTable); + IO.mapRequired("VFPtrOffset", Record.VFPtrOffset); + IO.mapRequired("MethodNames", Record.MethodNames); +} + +template <> void LeafRecordImpl::map(IO &IO) { + IO.mapRequired("Methods", Record.Methods); +} + +template <> void MemberRecordImpl::map(IO &IO) { + MappingTraits::mapping(IO, Record); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("NumOverloads", Record.NumOverloads); + IO.mapRequired("MethodList", Record.MethodList); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Record.Type); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Type", Record.Type); + IO.mapRequired("FieldOffset", Record.FieldOffset); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Type", Record.Type); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Value", Record.Value); + IO.mapRequired("Name", Record.Name); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Type", Record.Type); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("Type", Record.Type); + IO.mapRequired("Offset", Record.Offset); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("Attrs", Record.Attrs.Attrs); + IO.mapRequired("BaseType", Record.BaseType); + IO.mapRequired("VBPtrType", Record.VBPtrType); + IO.mapRequired("VBPtrOffset", Record.VBPtrOffset); + IO.mapRequired("VTableIndex", Record.VTableIndex); +} + +template <> void MemberRecordImpl::map(IO &IO) { + IO.mapRequired("ContinuationIndex", Record.ContinuationIndex); +} +} +} +} + +template +static inline Expected fromCodeViewRecordImpl(CVType Type) { + LeafRecord Result; + + auto Impl = std::make_shared>(Type.kind()); + if (auto EC = Impl->fromCodeViewRecord(Type)) + return std::move(EC); + Result.Leaf = Impl; + return Result; +} + +Expected LeafRecord::fromCodeViewRecord(CVType Type) { +#define TYPE_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + return fromCodeViewRecordImpl(Type); +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + TYPE_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) + switch (Type.kind()) { +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: { llvm_unreachable("Unknown leaf kind!"); } + } + return make_error(cv_error_code::corrupt_record); +} + +CVType LeafRecord::toCodeViewRecord(BumpPtrAllocator &Alloc) const { + TypeTableBuilder TTB(Alloc); + return Leaf->toCodeViewRecord(TTB); +} + +CVType LeafRecord::toCodeViewRecord(TypeTableBuilder &TTB) const { + return Leaf->toCodeViewRecord(TTB); +} + +namespace llvm { +namespace yaml { +template <> struct MappingTraits { + static void mapping(IO &io, LeafRecordBase &Record) { Record.map(io); } +}; + +template <> struct MappingTraits { + static void mapping(IO &io, MemberRecordBase &Record) { Record.map(io); } +}; +} +} + +template +static void mapLeafRecordImpl(IO &IO, const char *Class, TypeLeafKind Kind, + LeafRecord &Obj) { + if (!IO.outputting()) + Obj.Leaf = std::make_shared>(Kind); + + if (Kind == LF_FIELDLIST) + Obj.Leaf->map(IO); + else + IO.mapRequired(Class, *Obj.Leaf); +} + +void MappingTraits::mapping(IO &IO, LeafRecord &Obj) { + TypeLeafKind Kind; + if (IO.outputting()) + Kind = Obj.Leaf->Kind; + IO.mapRequired("Kind", Kind); + +#define TYPE_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + mapLeafRecordImpl(IO, #ClassName, Kind, Obj); \ + break; +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + TYPE_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD(EnumName, EnumVal, ClassName) +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) + switch (Kind) { +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: { llvm_unreachable("Unknown leaf kind!"); } + } +} + +template +static void mapMemberRecordImpl(IO &IO, const char *Class, TypeLeafKind Kind, + MemberRecord &Obj) { + if (!IO.outputting()) + Obj.Member = std::make_shared>(Kind); + + IO.mapRequired(Class, *Obj.Member); +} + +void MappingTraits::mapping(IO &IO, MemberRecord &Obj) { + TypeLeafKind Kind; + if (IO.outputting()) + Kind = Obj.Member->Kind; + IO.mapRequired("Kind", Kind); + +#define MEMBER_RECORD(EnumName, EnumVal, ClassName) \ + case EnumName: \ + mapMemberRecordImpl(IO, #ClassName, Kind, Obj); \ + break; +#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) \ + MEMBER_RECORD(EnumName, EnumVal, ClassName) +#define TYPE_RECORD(EnumName, EnumVal, ClassName) +#define TYPE_RECORD_ALIAS(EnumName, EnumVal, AliasName, ClassName) + switch (Kind) { +#include "llvm/DebugInfo/CodeView/CodeViewTypes.def" + default: { llvm_unreachable("Unknown member kind!"); } + } +} + +std::vector +llvm::CodeViewYAML::fromDebugT(ArrayRef DebugT) { + ExitOnError Err("Invalid .debug$T section!"); + BinaryStreamReader Reader(DebugT, support::little); + CVTypeArray Types; + uint32_t Magic; + + Err(Reader.readInteger(Magic)); + assert(Magic == COFF::DEBUG_SECTION_MAGIC && "Invalid .debug$T section!"); + + std::vector Result; + Err(Reader.readArray(Types, Reader.bytesRemaining())); + for (const auto &T : Types) { + auto CVT = Err(LeafRecord::fromCodeViewRecord(T)); + Result.push_back(CVT); + } + return Result; +} + +ArrayRef llvm::CodeViewYAML::toDebugT(ArrayRef Leafs, + BumpPtrAllocator &Alloc) { + TypeTableBuilder TTB(Alloc, false); + uint32_t Size = sizeof(uint32_t); + for (const auto &Leaf : Leafs) { + CVType T = Leaf.toCodeViewRecord(TTB); + Size += T.length(); + assert(T.length() % 4 == 0 && "Improper type record alignment!"); + } + uint8_t *ResultBuffer = Alloc.Allocate(Size); + MutableArrayRef Output(ResultBuffer, Size); + BinaryStreamWriter Writer(Output, support::little); + ExitOnError Err("Error writing type record to .debug$T section"); + Err(Writer.writeInteger(COFF::DEBUG_SECTION_MAGIC)); + for (const auto &R : TTB.records()) { + Err(Writer.writeBytes(R)); + } + assert(Writer.bytesRemaining() == 0 && "Didn't write all type record bytes!"); + return Output; +} diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp index 1aa1519b708ba02d30f3693e2731e8818081290d..91c928771a657ffb1ec32faec21b23aaa77706eb 100644 --- a/lib/ObjectYAML/DWARFEmitter.cpp +++ b/lib/ObjectYAML/DWARFEmitter.cpp @@ -16,8 +16,8 @@ #include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/Support/Error.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" #include "DWARFVisitor.h" diff --git a/lib/ObjectYAML/DWARFVisitor.h b/lib/ObjectYAML/DWARFVisitor.h index 263e36220a05be65656eb6504e224a1656d64454..81ef412eb7e69002d352db48775f3b5ca9741306 100644 --- a/lib/ObjectYAML/DWARFVisitor.h +++ b/lib/ObjectYAML/DWARFVisitor.h @@ -13,7 +13,7 @@ #define LLVM_OBJECTYAML_DWARFVISITOR_H #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Dwarf.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/MemoryBuffer.h" namespace llvm { diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp index 3052901da45ca98c21e7f25613582a58eaaadf55..dbd5498e003d2de54c501caf3e079a9903a14a0d 100644 --- a/lib/ObjectYAML/ELFYAML.cpp +++ b/lib/ObjectYAML/ELFYAML.cpp @@ -372,6 +372,7 @@ void ScalarEnumerationTraits::enumeration( ECase(SHT_GROUP); ECase(SHT_SYMTAB_SHNDX); ECase(SHT_LOOS); + ECase(SHT_LLVM_ODRTAB); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); ECase(SHT_GNU_verdef); @@ -424,12 +425,6 @@ void ScalarBitSetTraits::bitset(IO &IO, case ELF::EM_ARM: BCase(SHF_ARM_PURECODE); break; - case ELF::EM_AMDGPU: - BCase(SHF_AMDGPU_HSA_GLOBAL); - BCase(SHF_AMDGPU_HSA_READONLY); - BCase(SHF_AMDGPU_HSA_CODE); - BCase(SHF_AMDGPU_HSA_AGENT); - break; case ELF::EM_HEXAGON: BCase(SHF_HEX_GPREL); break; @@ -513,35 +508,35 @@ void ScalarEnumerationTraits::enumeration( #define ELF_RELOC(X, Y) IO.enumCase(Value, #X, ELF::X); switch (Object->Header.Machine) { case ELF::EM_X86_64: -#include "llvm/Support/ELFRelocs/x86_64.def" +#include "llvm/BinaryFormat/ELFRelocs/x86_64.def" break; case ELF::EM_MIPS: -#include "llvm/Support/ELFRelocs/Mips.def" +#include "llvm/BinaryFormat/ELFRelocs/Mips.def" break; case ELF::EM_HEXAGON: -#include "llvm/Support/ELFRelocs/Hexagon.def" +#include "llvm/BinaryFormat/ELFRelocs/Hexagon.def" break; case ELF::EM_386: case ELF::EM_IAMCU: -#include "llvm/Support/ELFRelocs/i386.def" +#include "llvm/BinaryFormat/ELFRelocs/i386.def" break; case ELF::EM_AARCH64: -#include "llvm/Support/ELFRelocs/AArch64.def" +#include "llvm/BinaryFormat/ELFRelocs/AArch64.def" break; case ELF::EM_ARM: -#include "llvm/Support/ELFRelocs/ARM.def" +#include "llvm/BinaryFormat/ELFRelocs/ARM.def" break; case ELF::EM_RISCV: -#include "llvm/Support/ELFRelocs/RISCV.def" +#include "llvm/BinaryFormat/ELFRelocs/RISCV.def" break; case ELF::EM_LANAI: -#include "llvm/Support/ELFRelocs/Lanai.def" +#include "llvm/BinaryFormat/ELFRelocs/Lanai.def" break; case ELF::EM_AMDGPU: -#include "llvm/Support/ELFRelocs/AMDGPU.def" +#include "llvm/BinaryFormat/ELFRelocs/AMDGPU.def" break; case ELF::EM_BPF: -#include "llvm/Support/ELFRelocs/BPF.def" +#include "llvm/BinaryFormat/ELFRelocs/BPF.def" break; default: llvm_unreachable("Unsupported architecture"); diff --git a/lib/ObjectYAML/LLVMBuild.txt b/lib/ObjectYAML/LLVMBuild.txt index b8d1d2f1779e3821f555c17f36183d63ebf6da2a..44657e916a9151629770dca6ec673ddf58f12d47 100644 --- a/lib/ObjectYAML/LLVMBuild.txt +++ b/lib/ObjectYAML/LLVMBuild.txt @@ -11,4 +11,4 @@ type = Library name = ObjectYAML parent = Libraries -required_libraries = Support +required_libraries = Support DebugInfoCodeView diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp index 6b0e4e3762d056bf81eee4c7698e6eee45c14b48..461684827872dbd14170a21c7b854f4e63f10198 100644 --- a/lib/ObjectYAML/MachOYAML.cpp +++ b/lib/ObjectYAML/MachOYAML.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/ObjectYAML/MachOYAML.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" -#include "llvm/Support/MachO.h" #include // For memcpy, memset and strnlen. @@ -252,7 +252,7 @@ void MappingTraits::mapping( break; switch (LoadCommand.Data.load_command_data.cmd) { -#include "llvm/Support/MachO.def" +#include "llvm/BinaryFormat/MachO.def" } IO.mapOptional("PayloadBytes", LoadCommand.PayloadBytes); IO.mapOptional("ZeroPadBytes", LoadCommand.ZeroPadBytes, (uint64_t)0ull); diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp index 74581c1ecaacc597328d696cc7f27d25e1537662..4b7154ebb7c13c0c329d8a624c469bcc89e94330 100644 --- a/lib/ObjectYAML/ObjectYAML.cpp +++ b/lib/ObjectYAML/ObjectYAML.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ObjectYAML/YAML.h" #include "llvm/ObjectYAML/ObjectYAML.h" +#include "llvm/ObjectYAML/YAML.h" using namespace llvm; using namespace yaml; diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 3e1bed19d61fe7004f21253f83d94bdbffd6ea7b..353d027f4e111ce12bdd7832458f1fbe26fcccd1 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -50,7 +50,11 @@ static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) { static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Name", Section.Name); - IO.mapRequired("Payload", Section.Payload); + if (Section.Name == "name") { + IO.mapOptional("FunctionNames", Section.FunctionNames); + } else { + IO.mapRequired("Payload", Section.Payload); + } } static void sectionMapping(IO &IO, WasmYAML::TypeSection &Section) { @@ -223,7 +227,13 @@ void MappingTraits::mapping( IO.mapRequired("Type", Relocation.Type); IO.mapRequired("Index", Relocation.Index); IO.mapRequired("Offset", Relocation.Offset); - IO.mapRequired("Addend", Relocation.Addend); + IO.mapOptional("Addend", Relocation.Addend, 0); +} + +void MappingTraits::mapping( + IO &IO, WasmYAML::NameEntry &NameEntry) { + IO.mapRequired("Index", NameEntry.Index); + IO.mapRequired("Name", NameEntry.Name); } void MappingTraits::mapping( @@ -255,8 +265,12 @@ void MappingTraits::mapping(IO &IO, if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) { IO.mapRequired("SigIndex", Import.SigIndex); } else if (Import.Kind == wasm::WASM_EXTERNAL_GLOBAL) { - IO.mapRequired("GlobalType", Import.GlobalType); - IO.mapRequired("GlobalMutable", Import.GlobalMutable); + IO.mapRequired("GlobalType", Import.GlobalImport.Type); + IO.mapRequired("GlobalMutable", Import.GlobalImport.Mutable); + } else if (Import.Kind == wasm::WASM_EXTERNAL_TABLE) { + IO.mapRequired("Table", Import.TableImport); + } else if (Import.Kind == wasm::WASM_EXTERNAL_MEMORY ) { + IO.mapRequired("Memory", Import.Memory); } else { llvm_unreachable("unhandled import type"); } @@ -294,6 +308,9 @@ void MappingTraits::mapping(IO &IO, case wasm::WASM_OPCODE_F64_CONST: IO.mapRequired("Value", Expr.Value.Float64); break; + case wasm::WASM_OPCODE_GET_GLOBAL: + IO.mapRequired("Index", Expr.Value.Global); + break; } } @@ -349,7 +366,7 @@ void ScalarEnumerationTraits::enumeration( void ScalarEnumerationTraits::enumeration( IO &IO, WasmYAML::RelocType &Type) { #define WASM_RELOC(name, value) IO.enumCase(Type, #name, wasm::name); -#include "llvm/Support/WasmRelocs/WebAssembly.def" +#include "llvm/BinaryFormat/WasmRelocs/WebAssembly.def" #undef WASM_RELOC } diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp index 3e8a1d802314b01d2744fcfd375f4b06b322af21..e581fee8bf381f6efbd007df37ef59c4f47bea6a 100644 --- a/lib/Option/Arg.cpp +++ b/lib/Option/Arg.cpp @@ -1,4 +1,4 @@ -//===--- Arg.cpp - Argument Implementations -------------------------------===// +//===- Arg.cpp - Argument Implementations ---------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/Arg.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" +#include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::opt; @@ -67,7 +67,7 @@ LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); } std::string Arg::getAsString(const ArgList &Args) const { SmallString<256> Res; - llvm::raw_svector_ostream OS(Res); + raw_svector_ostream OS(Res); ArgStringList ASL; render(Args, ASL); @@ -98,7 +98,7 @@ void Arg::render(const ArgList &Args, ArgStringList &Output) const { case Option::RenderCommaJoinedStyle: { SmallString<256> Res; - llvm::raw_svector_ostream OS(Res); + raw_svector_ostream OS(Res); OS << getSpelling(); for (unsigned i = 0, e = getNumValues(); i != e; ++i) { if (i) OS << ','; diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index 7ff358a57e043bf03326d969a157d9364b649c1f..cbccc1935d3c2050d5c53d213fd3eb93e88e3ee9 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -1,4 +1,4 @@ -//===--- ArgList.cpp - Argument List Management ---------------------------===// +//===- ArgList.cpp - Argument List Management -----------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,215 +7,67 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/ArgList.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Option/OptSpecifier.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::opt; -void arg_iterator::SkipToNextArg() { - for (; Current != Args.end(); ++Current) { - // Done if there are no filters. - if (!Id0.isValid()) - break; - - // Otherwise require a match. - const Option &O = (*Current)->getOption(); - if (O.matches(Id0) || - (Id1.isValid() && O.matches(Id1)) || - (Id2.isValid() && O.matches(Id2))) - break; - } -} - void ArgList::append(Arg *A) { Args.push_back(A); -} - -void ArgList::eraseArg(OptSpecifier Id) { - Args.erase( - remove_if(*this, [=](Arg *A) { return A->getOption().matches(Id); }), - end()); -} - -Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const { - // FIXME: Make search efficient? - for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it) - if ((*it)->getOption().matches(Id)) - return *it; - return nullptr; -} -Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1) const { - // FIXME: Make search efficient? - for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it) - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1)) - return *it; - return nullptr; -} - -Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2) const { - // FIXME: Make search efficient? - for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it) - if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2)) - return *it; - return nullptr; -} - -Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2, OptSpecifier Id3) const { - // FIXME: Make search efficient? - for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it) - if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2) || (*it)->getOption().matches(Id3)) - return *it; - return nullptr; -} - -Arg *ArgList::getLastArg(OptSpecifier Id) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id)) { - Res = *it; - Res->claim(); - } + // Update ranges for the option and all of its groups. + for (Option O = A->getOption().getUnaliasedOption(); O.isValid(); + O = O.getGroup()) { + auto &R = + OptRanges.insert(std::make_pair(O.getID(), emptyRange())).first->second; + R.first = std::min(R.first, Args.size() - 1); + R.second = Args.size(); } - - return Res; } -Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1)) { - Res = *it; - Res->claim(); - - } - } - - return Res; -} - -Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2)) { - Res = *it; - Res->claim(); - } - } - - return Res; -} - -Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2, OptSpecifier Id3) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2) || - (*it)->getOption().matches(Id3)) { - Res = *it; - Res->claim(); - } - } - - return Res; -} - -Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2, OptSpecifier Id3, - OptSpecifier Id4) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2) || - (*it)->getOption().matches(Id3) || - (*it)->getOption().matches(Id4)) { - Res = *it; - Res->claim(); - } - } - - return Res; -} - -Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2, OptSpecifier Id3, - OptSpecifier Id4, OptSpecifier Id5) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2) || - (*it)->getOption().matches(Id3) || - (*it)->getOption().matches(Id4) || - (*it)->getOption().matches(Id5)) { - Res = *it; - Res->claim(); - } +void ArgList::eraseArg(OptSpecifier Id) { + // Zero out the removed entries but keep them around so that we don't + // need to invalidate OptRanges. + for (Arg *const &A : filtered(Id)) { + // Avoid the need for a non-const filtered iterator variant. + Arg **ArgsBegin = Args.data(); + ArgsBegin[&A - ArgsBegin] = nullptr; } - - return Res; + OptRanges.erase(Id.getID()); } -Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2, OptSpecifier Id3, - OptSpecifier Id4, OptSpecifier Id5, - OptSpecifier Id6) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2) || - (*it)->getOption().matches(Id3) || - (*it)->getOption().matches(Id4) || - (*it)->getOption().matches(Id5) || - (*it)->getOption().matches(Id6)) { - Res = *it; - Res->claim(); +ArgList::OptRange +ArgList::getRange(std::initializer_list Ids) const { + OptRange R = emptyRange(); + for (auto Id : Ids) { + auto I = OptRanges.find(Id.getID()); + if (I != OptRanges.end()) { + R.first = std::min(R.first, I->second.first); + R.second = std::max(R.second, I->second.second); } } - - return Res; -} - -Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, - OptSpecifier Id2, OptSpecifier Id3, - OptSpecifier Id4, OptSpecifier Id5, - OptSpecifier Id6, OptSpecifier Id7) const { - Arg *Res = nullptr; - for (const_iterator it = begin(), ie = end(); it != ie; ++it) { - if ((*it)->getOption().matches(Id0) || - (*it)->getOption().matches(Id1) || - (*it)->getOption().matches(Id2) || - (*it)->getOption().matches(Id3) || - (*it)->getOption().matches(Id4) || - (*it)->getOption().matches(Id5) || - (*it)->getOption().matches(Id6) || - (*it)->getOption().matches(Id7)) { - Res = *it; - Res->claim(); - } - } - - return Res; + // Map an empty {-1, 0} range to {0, 0} so it can be used to form iterators. + if (R.first == -1u) + R.first = 0; + return R; } bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const { @@ -231,8 +83,7 @@ bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg, return Default; } -StringRef ArgList::getLastArgValue(OptSpecifier Id, - StringRef Default) const { +StringRef ArgList::getLastArgValue(OptSpecifier Id, StringRef Default) const { if (Arg *A = getLastArg(Id)) return A->getValue(); return Default; @@ -262,7 +113,7 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0, void ArgList::AddAllArgsExcept(ArgStringList &Output, ArrayRef Ids, ArrayRef ExcludeIds) const { - for (const Arg *Arg : Args) { + for (const Arg *Arg : *this) { bool Excluded = false; for (OptSpecifier Id : ExcludeIds) { if (Arg->getOption().matches(Id)) { @@ -325,14 +176,14 @@ void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0, } void ArgList::ClaimAllArgs(OptSpecifier Id0) const { - for (auto Arg : filtered(Id0)) + for (auto *Arg : filtered(Id0)) Arg->claim(); } void ArgList::ClaimAllArgs() const { - for (const_iterator it = begin(), ie = end(); it != ie; ++it) - if (!(*it)->isClaimed()) - (*it)->claim(); + for (auto *Arg : *this) + if (!Arg->isClaimed()) + Arg->claim(); } const char *ArgList::GetOrMakeJoinedArgString(unsigned Index, @@ -357,8 +208,6 @@ void ArgList::print(raw_ostream &O) const { LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); } #endif -// - void InputArgList::releaseMemory() { // An InputArgList always owns its arguments. for (Arg *A : *this) @@ -394,8 +243,6 @@ const char *InputArgList::MakeArgStringRef(StringRef Str) const { return getArgString(MakeIndex(Str)); } -// - DerivedArgList::DerivedArgList(const InputArgList &BaseArgs) : BaseArgs(BaseArgs) {} diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index 7eafb00855d777bc034de760049ce2e4d744bd33..52a81ff0e15945fcc74838de345c04ea0cef0893 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -1,4 +1,4 @@ -//===--- OptTable.cpp - Option Table Implementation -----------------------===// +//===- OptTable.cpp - Option Table Implementation -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,16 +7,25 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/OptTable.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" +#include "llvm/Option/OptSpecifier.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include +#include #include +#include #include +#include +#include +#include using namespace llvm; using namespace llvm::opt; @@ -80,14 +89,14 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) { static inline bool operator<(const OptTable::Info &I, const char *Name) { return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0; } -} -} + +} // end namespace opt +} // end namespace llvm OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) - : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), TheInputOptionID(0), - TheUnknownOptionID(0), FirstSearchableIndex(0) { + : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase) { // Explicitly zero initialize the error to work around a bug in array // value-initialization on MinGW with gcc 4.3.5. @@ -138,8 +147,8 @@ OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) } // Build prefix chars. - for (llvm::StringSet<>::const_iterator I = PrefixesUnion.begin(), - E = PrefixesUnion.end(); I != E; ++I) { + for (StringSet<>::const_iterator I = PrefixesUnion.begin(), + E = PrefixesUnion.end(); I != E; ++I) { StringRef Prefix = I->getKey(); for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end(); C != CE; ++C) @@ -148,8 +157,7 @@ OptTable::OptTable(ArrayRef OptionInfos, bool IgnoreCase) } } -OptTable::~OptTable() { -} +OptTable::~OptTable() = default; const Option OptTable::getOption(OptSpecifier Opt) const { unsigned id = Opt.getID(); @@ -159,11 +167,11 @@ const Option OptTable::getOption(OptSpecifier Opt) const { return Option(&getInfo(id), this); } -static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) { +static bool isInput(const StringSet<> &Prefixes, StringRef Arg) { if (Arg == "-") return true; - for (llvm::StringSet<>::const_iterator I = Prefixes.begin(), - E = Prefixes.end(); I != E; ++I) + for (StringSet<>::const_iterator I = Prefixes.begin(), + E = Prefixes.end(); I != E; ++I) if (Arg.startswith(I->getKey())) return false; return true; @@ -186,6 +194,20 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str, return 0; } +std::vector OptTable::findByPrefix(StringRef Cur) const { + std::vector Ret; + for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { + if (!In.Prefixes) + continue; + for (int I = 0; In.Prefixes[I]; I++) { + std::string S = std::string(In.Prefixes[I]) + std::string(In.Name); + if (StringRef(S).startswith(Cur)) + Ret.push_back(S); + } + } + return Ret; +} + Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, unsigned FlagsToInclude, unsigned FlagsToExclude) const { @@ -332,7 +354,7 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { static void PrintHelpOptionList(raw_ostream &OS, StringRef Title, std::vector > &OptionHelp) { + const char*>> &OptionHelp) { OS << Title << ":\n"; // Find the maximum option length. @@ -398,8 +420,8 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, // Render help text into a map of group-name to a list of (option, help) // pairs. - typedef std::map > > helpmap_ty; + using helpmap_ty = + std::map>>; helpmap_ty GroupedOptionHelp; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp index 736b939fe80b33ddf7ed6581c8b7c33057ab66b1..4832e659f026dac4b6cf21dadd8e08c3f353bdb0 100644 --- a/lib/Option/Option.cpp +++ b/lib/Option/Option.cpp @@ -1,4 +1,4 @@ -//===--- Option.cpp - Abstract Driver Options -----------------------------===// +//===- Option.cpp - Abstract Driver Options -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,22 +7,24 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Option/Option.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Option/OptTable.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include #include +#include using namespace llvm; using namespace llvm::opt; Option::Option(const OptTable::Info *info, const OptTable *owner) : Info(info), Owner(owner) { - // Multi-level aliases are not supported. This just simplifies option // tracking, it is not an inherent limitation. assert((!Info || !getAlias().isValid() || !getAlias().getAlias().isValid()) && diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 0421946a32a69527f8b9c15a24bbbc4c75d23a95..afd66f55720a5fa1bb4811a8bc4736041f1fdd91 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -125,6 +125,7 @@ #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" @@ -149,8 +150,30 @@ using namespace llvm; static cl::opt MaxDevirtIterations("pm-max-devirt-iterations", cl::ReallyHidden, cl::init(4)); +static cl::opt + RunPartialInlining("enable-npm-partial-inlining", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Run Partial inlinining pass")); -static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$"); +static cl::opt + RunNewGVN("enable-npm-newgvn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Run NewGVN instead of GVN")); + +static cl::opt EnableEarlyCSEMemSSA( + "enable-npm-earlycse-memssa", cl::init(false), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass for the new PM (default = off)")); + +static cl::opt EnableGVNHoist( + "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); + +static cl::opt EnableGVNSink( + "enable-npm-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); + +static Regex DefaultAliasRegex( + "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { switch (Level) { @@ -293,7 +316,17 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SROA()); // Catch trivial redundancies - FPM.addPass(EarlyCSEPass()); + FPM.addPass(EarlyCSEPass(EnableEarlyCSEMemSSA)); + + // Hoisting of scalars and load expressions. + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); + } // Speculative execution if the target has divergent branches; otherwise nop. FPM.addPass(SpeculativeExecutionPass()); @@ -327,13 +360,13 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); LPM1.addPass(LICMPass()); -#if 0 - // The LoopUnswitch pass isn't yet ported to the new pass manager. - LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3)); -#endif + LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(LoopDeletionPass()); + // FIXME: The old pass manager has a hack to disable loop unrolling during + // ThinLTO when using sample PGO. Need to either fix it or port some + // workaround. LPM2.addPass(LoopUnrollPass::createFull(Level)); // We provide the opt remark emitter pass for LICM to use. We only need to do @@ -348,7 +381,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (Level != O1) { // These passes add substantial compile time so skip them at O1. FPM.addPass(MergedLoadStoreMotionPass()); - FPM.addPass(GVN()); + if (RunNewGVN) + FPM.addPass(NewGVNPass()); + else + FPM.addPass(GVN()); } // Specially optimize memory movement as it doesn't look like dataflow in SSA. @@ -420,6 +456,11 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); } + // Delete anything that is now dead to make sure that we don't instrument + // dead code. Instrumentation can end up keeping dead code around and + // dramatically increase code size. + MPM.addPass(GlobalDCEPass()); + if (RunProfileGen) { MPM.addPass(PGOInstrumentationGen()); @@ -435,14 +476,10 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, } ModulePassManager -PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { - assert(Level != O0 && "Must request optimizations for the default pipeline!"); +PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, + bool DebugLogging) { ModulePassManager MPM(DebugLogging); - // Force any function attributes we want the rest of the pipeline te observe. - MPM.addPass(ForceFunctionAttrsPass()); - // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); @@ -454,7 +491,6 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - EarlyFPM.addPass(GVNHoistPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); // Interprocedural constant propagation now that basic cleanup has occured @@ -484,21 +520,25 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); - // Add all the requested passes for PGO Instrumentation, if requested. + // Add all the requested passes for PGO, if requested. if (PGOOpt) { assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO || !PGOOpt->ProfileUseFile.empty()); addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile); - } - // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); + // Indirect call promotion that promotes intra-module targes only. + MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO)); + } // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MPM.addPass(RequireAnalysisPass()); + // Require the ProfileSummaryAnalysis for the module so we can query it within + // the inliner pass. + MPM.addPass(RequireAnalysisPass()); + // Now begin the main postorder CGSCC pipeline. // FIXME: The current CGSCC pipeline has its origins in the legacy pass // manager and trying to emulate its precise behavior. Much of this doesn't @@ -538,12 +578,30 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( std::move(MainCGPipeline), MaxDevirtIterations, DebugLogging))); - // This ends the canonicalization and simplification phase of the pipeline. - // At this point, we expect to have canonical and simple IR which we begin - // *optimizing* for efficient execution going forward. + return MPM; +} - // Eliminate externally available functions now that inlining is over -- we - // won't emit these anyways. +ModulePassManager +PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, + bool DebugLogging) { + ModulePassManager MPM(DebugLogging); + + // Optimize globals now that the module is fully simplified. + MPM.addPass(GlobalOptPass()); + + // Run partial inlining pass to partially inline functions that have + // large bodies. + if (RunPartialInlining) + MPM.addPass(PartialInlinerPass()); + + // Remove avail extern fns and globals definitions since we aren't compiling + // an object file for later LTO. For LTO we want to preserve these so they + // are eligible for inlining at link-time. Note if they are unreferenced they + // will be removed by GlobalDCE later, so this only impacts referenced + // available externally globals. Eventually they will be suppressed during + // codegen, but eliminating here enables more opportunity for GlobalDCE as it + // may make globals referenced by available external functions dead and saves + // running remaining passes on the eliminated functions. MPM.addPass(EliminateAvailableExternallyPass()); // Do RPO function attribute inference across the module to forward-propagate @@ -624,6 +682,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // And finally clean up LCSSA form before generating code. OptimizePM.addPass(InstSimplifierPass()); + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + OptimizePM.addPass(SimplifyCFGPass()); + // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); @@ -637,6 +699,87 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, return MPM; } +ModulePassManager +PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + + return MPM; +} + +ModulePassManager +PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // If we are planning to perform ThinLTO later, we don't bloat the code with + // unrolling/vectorization/... now. Just simplify the module as much as we + // can. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + + // Run partial inlining pass to partially inline functions that have + // large bodies. + // FIXME: It isn't clear whether this is really the right place to run this + // in ThinLTO. Because there is another canonicalization and simplification + // phase that will run after the thin link, running this here ends up with + // less information than will be available later and it may grow functions in + // ways that aren't beneficial. + if (RunPartialInlining) + MPM.addPass(PartialInlinerPass()); + + // Reduce the size of the IR as much as possible. + MPM.addPass(GlobalOptPass()); + + // Rename anon globals to be able to export them in the summary. + MPM.addPass(NameAnonGlobalPass()); + + return MPM; +} + +ModulePassManager +PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + // FIXME: The summary index is not hooked in the new pass manager yet. + // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest + // here. + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // During the ThinLTO backend phase we perform early indirect call promotion + // here, before globalopt. Otherwise imported available_externally functions + // look unreferenced and are removed. + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, + PGOOpt && PGOOpt->SamplePGO && + !PGOOpt->ProfileUseFile.empty())); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + + return MPM; +} + ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, bool DebugLogging) { @@ -751,7 +894,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // FIXME: once we fix LoopPass Manager, add LICM here. // FIXME: once we provide support for enabling MLSM, add it here. // FIXME: once we provide support for enabling NewGVN, add it here. - MainFPM.addPass(GVN()); + if (RunNewGVN) + MainFPM.addPass(NewGVNPass()); + else + MainFPM.addPass(GVN()); // Remove dead memcpy()'s. MainFPM.addPass(MemCpyOptPass()); @@ -857,9 +1003,16 @@ static Optional parseDevirtPassName(StringRef Name) { return Count; } +/// Tests whether a pass name starts with a valid prefix for a default pipeline +/// alias. +static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { + return Name.startswith("default") || Name.startswith("thinlto") || + Name.startswith("lto"); +} + static bool isModulePassName(StringRef Name) { // Manually handle aliases for pre-configured pipeline fragments. - if (Name.startswith("default") || Name.startswith("lto")) + if (startsWithDefaultPipelineAliasPrefix(Name)) return DefaultAliasRegex.match(Name); // Explicitly handle pass manager names. @@ -1054,7 +1207,7 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, } // Manually handle aliases for pre-configured pipeline fragments. - if (Name.startswith("default") || Name.startswith("lto")) { + if (startsWithDefaultPipelineAliasPrefix(Name)) { SmallVector Matches; if (!DefaultAliasRegex.match(Name, &Matches)) return false; @@ -1073,6 +1226,10 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); + } else if (Matches[1] == "thinlto-pre-link") { + MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging)); + } else if (Matches[1] == "thinlto") { + MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging)); } else if (Matches[1] == "lto-pre-link") { MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging)); } else { diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index efd4c097a67572db99561aad063158e1651d8c7f..d59ec7f858407140041b1f6a8faaa1cde3c86616 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -229,6 +229,7 @@ LOOP_PASS("strength-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("unroll", LoopUnrollPass::create()) LOOP_PASS("unroll-full", LoopUnrollPass::createFull()) +LOOP_PASS("unswitch", SimpleLoopUnswitchPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index 23999a5312c73c0457660828e611cbdab4befb6f..015b3c6c2021d293a7ed8c9ce0c70cc26bd2e2d0 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" @@ -19,7 +20,6 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/ProfileData/Coverage/CoverageMappingReader.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Support/Debug.h" diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp index 05c5b28d7a0765d7e90a2ff52a9f1ce0eef3e271..a34f359cd54272da3a2d90bad85dde0a4f7e7b67 100644 --- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp @@ -12,23 +12,24 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/Coverage/CoverageMappingReader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Triple.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" -#include "llvm/ProfileData/Coverage/CoverageMappingReader.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -648,11 +649,15 @@ static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer, : support::endianness::big; // Look for the sections that we are interested in. - auto NamesSection = lookupSection(*OF, getInstrProfNameSectionName(false)); + auto ObjFormat = OF->getTripleObjectFormat(); + auto NamesSection = + lookupSection(*OF, getInstrProfSectionName(IPSK_name, ObjFormat, + /*AddSegmentInfo=*/false)); if (auto E = NamesSection.takeError()) return E; auto CoverageSection = - lookupSection(*OF, getInstrProfCoverageSectionName(false)); + lookupSection(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat, + /*AddSegmentInfo=*/false)); if (auto E = CoverageSection.takeError()) return E; diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp index f131be2cba4929b4697b035e1201f6dc033ce1e9..6fe93530da21b8ab2a0331d46f86929ee1823f76 100644 --- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp +++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 0ec3fce4b2377da275fea8959ab7cacb730841af..c9b82c303e33807b7f95e91de3921c172aa2590e 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProf.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -29,7 +30,6 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" -#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" @@ -45,8 +45,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -136,8 +136,49 @@ const std::error_category &llvm::instrprof_category() { return *ErrorCategory; } +namespace { + +const char *InstrProfSectNameCommon[] = { +#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ + SectNameCommon, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +const char *InstrProfSectNameCoff[] = { +#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ + SectNameCoff, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +const char *InstrProfSectNamePrefix[] = { +#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ + Prefix, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +} // namespace + namespace llvm { +std::string getInstrProfSectionName(InstrProfSectKind IPSK, + Triple::ObjectFormatType OF, + bool AddSegmentInfo) { + std::string SectName; + + if (OF == Triple::MachO && AddSegmentInfo) + SectName = InstrProfSectNamePrefix[IPSK]; + + if (OF == Triple::COFF) + SectName += InstrProfSectNameCoff[IPSK]; + else + SectName += InstrProfSectNameCommon[IPSK]; + + if (OF == Triple::MachO && IPSK == IPSK_data && AddSegmentInfo) + SectName += ",regular,live_support"; + + return SectName; +} + void SoftInstrProfErrors::addError(instrprof_error IE) { if (IE == instrprof_error::success) return; @@ -314,7 +355,7 @@ void InstrProfSymtab::create(Module &M, bool InLTO) { finalizeSymtab(); } -Error collectPGOFuncNameStrings(const std::vector &NameStrs, +Error collectPGOFuncNameStrings(ArrayRef NameStrs, bool doCompression, std::string &Result) { assert(!NameStrs.empty() && "No name data to emit"); @@ -362,7 +403,7 @@ StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) { return NameStr; } -Error collectPGOFuncNameStrings(const std::vector &NameVars, +Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression) { std::vector NameStrs; for (auto *NameVar : NameVars) { @@ -937,22 +978,22 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { } // Parse the value profile options. -void getMemOPSizeRangeFromOption(std::string MemOPSizeRange, - int64_t &RangeStart, int64_t &RangeLast) { +void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, + int64_t &RangeLast) { static const int64_t DefaultMemOPSizeRangeStart = 0; static const int64_t DefaultMemOPSizeRangeLast = 8; RangeStart = DefaultMemOPSizeRangeStart; RangeLast = DefaultMemOPSizeRangeLast; if (!MemOPSizeRange.empty()) { - auto Pos = MemOPSizeRange.find(":"); + auto Pos = MemOPSizeRange.find(':'); if (Pos != std::string::npos) { if (Pos > 0) - RangeStart = atoi(MemOPSizeRange.substr(0, Pos).c_str()); + MemOPSizeRange.substr(0, Pos).getAsInteger(10, RangeStart); if (Pos < MemOPSizeRange.size() - 1) - RangeLast = atoi(MemOPSizeRange.substr(Pos + 1).c_str()); + MemOPSizeRange.substr(Pos + 1).getAsInteger(10, RangeLast); } else - RangeLast = atoi(MemOPSizeRange.c_str()); + MemOPSizeRange.getAsInteger(10, RangeLast); } assert(RangeLast >= RangeStart); } diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 856f793363f7740814e4bf9311a96abc09f16fb9..d9f599f400da5d43acf0a8654e3aa11a8ad5adb2 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 6b7bd3b2fc0a043226bfdb012f234c4249ecaaa7..b3402a6ea956c992b76445b30d16adc8206eace5 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/ProfileData/InstrProfWriter.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp index 8493acc2d95dda135aa9531a0985b81aa3973ebc..eafdd2154b7be608aa04fb9be07eb4d17b2f7cd3 100644 --- a/lib/ProfileData/SampleProf.cpp +++ b/lib/ProfileData/SampleProf.cpp @@ -129,12 +129,14 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { OS.indent(Indent); if (!CallsiteSamples.empty()) { OS << "Samples collected in inlined callsites {\n"; - SampleSorter SortedCallsiteSamples( + SampleSorter SortedCallsiteSamples( CallsiteSamples); for (const auto &CS : SortedCallsiteSamples.get()) { - OS.indent(Indent + 2); - OS << CS->first << ": inlined callee: " << CS->second.getName() << ": "; - CS->second.print(OS, Indent + 4); + for (const auto &FS : CS->second) { + OS.indent(Indent + 2); + OS << CS->first << ": inlined callee: " << FS.second.getName() << ": "; + FS.second.print(OS, Indent + 4); + } } OS << "}\n"; } else { diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp index 3a8d6190d2ca670592f744a8febdfc9855ede272..234fe02ac8a8789e27babd1aed67c301fc596749 100644 --- a/lib/ProfileData/SampleProfReader.cpp +++ b/lib/ProfileData/SampleProfReader.cpp @@ -211,7 +211,7 @@ std::error_code SampleProfileReaderText::read() { InlineStack.pop_back(); } FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( - LineLocation(LineOffset, Discriminator)); + LineLocation(LineOffset, Discriminator))[FName]; FSamples.setName(FName); MergeResult(Result, FSamples.addTotalSamples(NumSamples)); InlineStack.push_back(&FSamples); @@ -363,8 +363,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { if (std::error_code EC = FName.getError()) return EC; - FunctionSamples &CalleeProfile = - FProfile.functionSamplesAt(LineLocation(*LineOffset, *Discriminator)); + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( + LineLocation(*LineOffset, *Discriminator))[*FName]; CalleeProfile.setName(*FName); if (std::error_code EC = readProfile(CalleeProfile)) return EC; @@ -636,7 +636,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile( uint32_t LineOffset = Offset >> 16; uint32_t Discriminator = Offset & 0xffff; FProfile = &CallerProfile->functionSamplesAt( - LineLocation(LineOffset, Discriminator)); + LineLocation(LineOffset, Discriminator))[Name]; } FProfile->setName(Name); diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp index e1d6d575631aecea5c01f2a2632625e37949fd8b..b45026140c99ae8f2519a57be653f21f12c97838 100644 --- a/lib/ProfileData/SampleProfWriter.cpp +++ b/lib/ProfileData/SampleProfWriter.cpp @@ -18,10 +18,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/ADT/StringRef.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" -#include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,32 @@ using namespace llvm; using namespace sampleprof; +std::error_code +SampleProfileWriter::write(const StringMap &ProfileMap) { + if (std::error_code EC = writeHeader(ProfileMap)) + return EC; + + // Sort the ProfileMap by total samples. + typedef std::pair NameFunctionSamples; + std::vector V; + for (const auto &I : ProfileMap) + V.push_back(std::make_pair(I.getKey(), &I.second)); + + std::stable_sort( + V.begin(), V.end(), + [](const NameFunctionSamples &A, const NameFunctionSamples &B) { + if (A.second->getTotalSamples() == B.second->getTotalSamples()) + return A.first > B.first; + return A.second->getTotalSamples() > B.second->getTotalSamples(); + }); + + for (const auto &I : V) { + if (std::error_code EC = write(*I.second)) + return EC; + } + return sampleprof_error::success; +} + /// \brief Write samples to a text file. /// /// Note: it may be tempting to implement this in terms of @@ -68,20 +95,21 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) { OS << "\n"; } - SampleSorter SortedCallsiteSamples( + SampleSorter SortedCallsiteSamples( S.getCallsiteSamples()); Indent += 1; - for (const auto &I : SortedCallsiteSamples.get()) { - LineLocation Loc = I->first; - const FunctionSamples &CalleeSamples = I->second; - OS.indent(Indent); - if (Loc.Discriminator == 0) - OS << Loc.LineOffset << ": "; - else - OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; - if (std::error_code EC = write(CalleeSamples)) - return EC; - } + for (const auto &I : SortedCallsiteSamples.get()) + for (const auto &FS : I->second) { + LineLocation Loc = I->first; + const FunctionSamples &CalleeSamples = FS.second; + OS.indent(Indent); + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + if (std::error_code EC = write(CalleeSamples)) + return EC; + } Indent -= 1; return sampleprof_error::success; @@ -96,8 +124,7 @@ std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) { } void SampleProfileWriterBinary::addName(StringRef FName) { - auto NextIdx = NameTable.size(); - NameTable.insert(std::make_pair(FName, NextIdx)); + NameTable.insert(std::make_pair(FName, 0)); } void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { @@ -109,11 +136,12 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { } // Recursively add all the names for inlined callsites. - for (const auto &J : S.getCallsiteSamples()) { - const FunctionSamples &CalleeSamples = J.second; - addName(CalleeSamples.getName()); - addNames(CalleeSamples); - } + for (const auto &J : S.getCallsiteSamples()) + for (const auto &FS : J.second) { + const FunctionSamples &CalleeSamples = FS.second; + addName(CalleeSamples.getName()); + addNames(CalleeSamples); + } } std::error_code SampleProfileWriterBinary::writeHeader( @@ -134,10 +162,18 @@ std::error_code SampleProfileWriterBinary::writeHeader( addNames(I.second); } + // Sort the names to make NameTable is deterministic. + std::set V; + for (const auto &I : NameTable) + V.insert(I.first); + int i = 0; + for (const StringRef &N : V) + NameTable[N] = i++; + // Write out the name table. encodeULEB128(NameTable.size(), OS); - for (auto N : NameTable) { - OS << N.first; + for (auto N : V) { + OS << N; encodeULEB128(0, OS); } return sampleprof_error::success; @@ -187,14 +223,15 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) { // Recursively emit all the callsite samples. encodeULEB128(S.getCallsiteSamples().size(), OS); - for (const auto &J : S.getCallsiteSamples()) { - LineLocation Loc = J.first; - const FunctionSamples &CalleeSamples = J.second; - encodeULEB128(Loc.LineOffset, OS); - encodeULEB128(Loc.Discriminator, OS); - if (std::error_code EC = writeBody(CalleeSamples)) - return EC; - } + for (const auto &J : S.getCallsiteSamples()) + for (const auto &FS : J.second) { + LineLocation Loc = J.first; + const FunctionSamples &CalleeSamples = FS.second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + if (std::error_code EC = writeBody(CalleeSamples)) + return EC; + } return sampleprof_error::success; } diff --git a/lib/Support/AMDGPUCodeObjectMetadata.cpp b/lib/Support/AMDGPUCodeObjectMetadata.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a00e371415a32fa1125364ed076e6837d636dba8 --- /dev/null +++ b/lib/Support/AMDGPUCodeObjectMetadata.cpp @@ -0,0 +1,218 @@ +//===--- AMDGPUCodeObjectMetadata.cpp ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU Code Object Metadata definitions and in-memory +/// representations. +/// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" +#include "llvm/Support/YAMLTraits.h" + +using namespace llvm::AMDGPU; +using namespace llvm::AMDGPU::CodeObject; + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) +LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) + +namespace llvm { +namespace yaml { + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AccessQualifier &EN) { + YIO.enumCase(EN, "Default", AccessQualifier::Default); + YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); + YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); + YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { + YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); + YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); + YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); + YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); + YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); + YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueKind &EN) { + YIO.enumCase(EN, "ByValue", ValueKind::ByValue); + YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); + YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); + YIO.enumCase(EN, "Sampler", ValueKind::Sampler); + YIO.enumCase(EN, "Image", ValueKind::Image); + YIO.enumCase(EN, "Pipe", ValueKind::Pipe); + YIO.enumCase(EN, "Queue", ValueKind::Queue); + YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); + YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); + YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); + YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); + YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); + YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); + YIO.enumCase(EN, "HiddenCompletionAction", + ValueKind::HiddenCompletionAction); + } +}; + +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &YIO, ValueType &EN) { + YIO.enumCase(EN, "Struct", ValueType::Struct); + YIO.enumCase(EN, "I8", ValueType::I8); + YIO.enumCase(EN, "U8", ValueType::U8); + YIO.enumCase(EN, "I16", ValueType::I16); + YIO.enumCase(EN, "U16", ValueType::U16); + YIO.enumCase(EN, "F16", ValueType::F16); + YIO.enumCase(EN, "I32", ValueType::I32); + YIO.enumCase(EN, "U32", ValueType::U32); + YIO.enumCase(EN, "F32", ValueType::F32); + YIO.enumCase(EN, "I64", ValueType::I64); + YIO.enumCase(EN, "U64", ValueType::U64); + YIO.enumCase(EN, "F64", ValueType::F64); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { + YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, + MD.mReqdWorkGroupSize, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, + MD.mWorkGroupSizeHint, std::vector()); + YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, + MD.mVecTypeHint, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { + YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); + YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); + YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); + YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); + YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, + uint32_t(0)); + YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, + AddressSpaceQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); + YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); + YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); + YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); + YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, + MD.mKernargSegmentSize, uint64_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, + MD.mWorkgroupGroupSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, + MD.mWorkitemPrivateSegmentSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, + MD.mWavefrontNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, + MD.mWorkitemNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, + MD.mKernargSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, + MD.mGroupSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, + MD.mPrivateSegmentAlign, uint8_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, + MD.mWavefrontSize, uint8_t(0)); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { + YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, + MD.mDebuggerABIVersion, std::vector()); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, + MD.mReservedNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, + MD.mReservedFirstVGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, + MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); + YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, + MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, Kernel::Metadata &MD) { + YIO.mapRequired(Kernel::Key::Name, MD.mName); + YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); + YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, + std::vector()); + if (!MD.mAttrs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); + if (!MD.mArgs.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::Args, MD.mArgs); + if (!MD.mCodeProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); + if (!MD.mDebugProps.empty() || !YIO.outputting()) + YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); + } +}; + +template <> +struct MappingTraits { + static void mapping(IO &YIO, CodeObject::Metadata &MD) { + YIO.mapRequired(Key::Version, MD.mVersion); + YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); + if (!MD.mKernels.empty() || !YIO.outputting()) + YIO.mapOptional(Key::Kernels, MD.mKernels); + } +}; + +} // end namespace yaml + +namespace AMDGPU { +namespace CodeObject { + +/* static */ +std::error_code Metadata::fromYamlString( + std::string YamlString, Metadata &CodeObjectMetadata) { + yaml::Input YamlInput(YamlString); + YamlInput >> CodeObjectMetadata; + return YamlInput.error(); +} + +/* static */ +std::error_code Metadata::toYamlString( + Metadata CodeObjectMetadata, std::string &YamlString) { + raw_string_ostream YamlStream(YamlString); + yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); + YamlOutput << CodeObjectMetadata; + return std::error_code(); +} + +} // end namespace CodeObject +} // end namespace AMDGPU +} // end namespace llvm diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 9778628911cd054acc5585ebcc4c9d53b2f29879..f36c25a0ce914944cd35e0a95f7404e6825ed884 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1559,11 +1559,13 @@ IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) { case PackCategoriesIntoKey(fcInfinity, fcNaN): category = fcNaN; copySignificand(rhs); + LLVM_FALLTHROUGH; case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): case PackCategoriesIntoKey(fcNaN, fcInfinity): case PackCategoriesIntoKey(fcNaN, fcNaN): sign = false; + LLVM_FALLTHROUGH; case PackCategoriesIntoKey(fcInfinity, fcZero): case PackCategoriesIntoKey(fcInfinity, fcNormal): case PackCategoriesIntoKey(fcZero, fcInfinity): @@ -3393,7 +3395,7 @@ namespace { } void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, bool TruncateZero) const { switch (category) { case fcInfinity: if (isNegative()) @@ -3407,9 +3409,16 @@ void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, if (isNegative()) Str.push_back('-'); - if (!FormatMaxPadding) - append(Str, "0.0E+0"); - else + if (!FormatMaxPadding) { + if (TruncateZero) + append(Str, "0.0E+0"); + else { + append(Str, "0.0"); + if (FormatPrecision > 1) + Str.append(FormatPrecision - 1, '0'); + append(Str, "e+00"); + } + } else Str.push_back('0'); return; @@ -3442,7 +3451,7 @@ void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, // Ignore trailing binary zeros. int trailingZeros = significand.countTrailingZeros(); exp += trailingZeros; - significand = significand.lshr(trailingZeros); + significand.lshrInPlace(trailingZeros); // Change the exponent from 2^e to 10^e. if (exp == 0) { @@ -3543,12 +3552,16 @@ void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, Str.push_back(buffer[NDigits-1]); Str.push_back('.'); - if (NDigits == 1) + if (NDigits == 1 && TruncateZero) Str.push_back('0'); else for (unsigned I = 1; I != NDigits; ++I) Str.push_back(buffer[NDigits-1-I]); - Str.push_back('E'); + // Fill with zeros up to FormatPrecision. + if (!TruncateZero && FormatPrecision > NDigits - 1) + Str.append(FormatPrecision - NDigits + 1, '0'); + // For !TruncateZero we use lower 'e'. + Str.push_back(TruncateZero ? 'E' : 'e'); Str.push_back(exp >= 0 ? '+' : '-'); if (exp < 0) exp = -exp; @@ -3557,6 +3570,9 @@ void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, expbuf.push_back((char) ('0' + (exp % 10))); exp /= 10; } while (exp); + // Exponent always at least two digits if we do not truncate zeros. + if (!TruncateZero && expbuf.size() < 2) + expbuf.push_back('0'); for (unsigned I = 0, E = expbuf.size(); I != E; ++I) Str.push_back(expbuf[E-1-I]); return; @@ -4362,10 +4378,11 @@ bool DoubleAPFloat::isInteger() const { void DoubleAPFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, + bool TruncateZero) const { assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) - .toString(Str, FormatPrecision, FormatMaxPadding); + .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); } bool DoubleAPFloat::getExactInverse(APFloat *inv) const { diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 00b340e3ee4afbbb5e40af87793372c63f2730aa..e9716e3b1e872b3d34df9b5dc352018e76dbb4f2 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -76,34 +76,31 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) { void APInt::initSlowCase(uint64_t val, bool isSigned) { - VAL = 0; - pVal = getClearedMemory(getNumWords()); - pVal[0] = val; + U.pVal = getClearedMemory(getNumWords()); + U.pVal[0] = val; if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) - pVal[i] = -1ULL; + U.pVal[i] = WORD_MAX; clearUnusedBits(); } void APInt::initSlowCase(const APInt& that) { - VAL = 0; - pVal = getMemory(getNumWords()); - memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE); + U.pVal = getMemory(getNumWords()); + memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); } void APInt::initFromArray(ArrayRef bigVal) { assert(BitWidth && "Bitwidth too small"); assert(bigVal.data() && "Null pointer detected!"); if (isSingleWord()) - VAL = bigVal[0]; + U.VAL = bigVal[0]; else { // Get memory, cleared to 0 - VAL = 0; - pVal = getClearedMemory(getNumWords()); + U.pVal = getClearedMemory(getNumWords()); // Calculate the number of words to copy unsigned words = std::min(bigVal.size(), getNumWords()); // Copy the words from bigVal to pVal - memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE); + memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); } // Make sure unused high bits are cleared clearUnusedBits(); @@ -120,41 +117,43 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) } APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) - : BitWidth(numbits), VAL(0) { + : BitWidth(numbits) { assert(BitWidth && "Bitwidth too small"); fromString(numbits, Str, radix); } -APInt& APInt::AssignSlowCase(const APInt& RHS) { +void APInt::reallocate(unsigned NewBitWidth) { + // If the number of words is the same we can just change the width and stop. + if (getNumWords() == getNumWords(NewBitWidth)) { + BitWidth = NewBitWidth; + return; + } + + // If we have an allocation, delete it. + if (!isSingleWord()) + delete [] U.pVal; + + // Update BitWidth. + BitWidth = NewBitWidth; + + // If we are supposed to have an allocation, create it. + if (!isSingleWord()) + U.pVal = getMemory(getNumWords()); +} + +void APInt::AssignSlowCase(const APInt& RHS) { // Don't do anything for X = X if (this == &RHS) - return *this; + return; - if (BitWidth == RHS.getBitWidth()) { - // assume same bit-width single-word case is already handled - assert(!isSingleWord()); - memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE); - return *this; - } + // Adjust the bit width and handle allocations as necessary. + reallocate(RHS.getBitWidth()); - if (isSingleWord()) { - // assume case where both are single words is already handled - assert(!RHS.isSingleWord()); - VAL = 0; - pVal = getMemory(RHS.getNumWords()); - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); - } else if (getNumWords() == RHS.getNumWords()) - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); - else if (RHS.isSingleWord()) { - delete [] pVal; - VAL = RHS.VAL; - } else { - delete [] pVal; - pVal = getMemory(RHS.getNumWords()); - memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); - } - BitWidth = RHS.BitWidth; - return clearUnusedBits(); + // Copy the data. + if (isSingleWord()) + U.VAL = RHS.U.VAL; + else + memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); } /// This method 'profiles' an APInt for use with FoldingSet. @@ -162,67 +161,30 @@ void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(BitWidth); if (isSingleWord()) { - ID.AddInteger(VAL); + ID.AddInteger(U.VAL); return; } unsigned NumWords = getNumWords(); for (unsigned i = 0; i < NumWords; ++i) - ID.AddInteger(pVal[i]); -} - -/// This function adds a single "digit" integer, y, to the multiple -/// "digit" integer array, x[]. x[] is modified to reflect the addition and -/// 1 is returned if there is a carry out, otherwise 0 is returned. -/// @returns the carry of the addition. -static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { - for (unsigned i = 0; i < len; ++i) { - dest[i] = y + x[i]; - if (dest[i] < y) - y = 1; // Carry one to next digit. - else { - y = 0; // No need to carry so exit early - break; - } - } - return y; + ID.AddInteger(U.pVal[i]); } /// @brief Prefix increment operator. Increments the APInt by one. APInt& APInt::operator++() { if (isSingleWord()) - ++VAL; + ++U.VAL; else - add_1(pVal, pVal, getNumWords(), 1); + tcIncrement(U.pVal, getNumWords()); return clearUnusedBits(); } -/// This function subtracts a single "digit" (64-bit word), y, from -/// the multi-digit integer array, x[], propagating the borrowed 1 value until -/// no further borrowing is needed or it runs out of "digits" in x. The result -/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted. -/// In other words, if y > x then this function returns 1, otherwise 0. -/// @returns the borrow out of the subtraction -static bool sub_1(uint64_t x[], unsigned len, uint64_t y) { - for (unsigned i = 0; i < len; ++i) { - uint64_t X = x[i]; - x[i] -= y; - if (y > X) - y = 1; // We have to "borrow 1" from next "digit" - else { - y = 0; // No need to borrow - break; // Remaining digits are unchanged so exit early - } - } - return bool(y); -} - /// @brief Prefix decrement operator. Decrements the APInt by one. APInt& APInt::operator--() { if (isSingleWord()) - --VAL; + --U.VAL; else - sub_1(pVal, getNumWords(), 1); + tcDecrement(U.pVal, getNumWords()); return clearUnusedBits(); } @@ -232,17 +194,17 @@ APInt& APInt::operator--() { APInt& APInt::operator+=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL += RHS.VAL; + U.VAL += RHS.U.VAL; else - tcAdd(pVal, RHS.pVal, 0, getNumWords()); + tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); return clearUnusedBits(); } APInt& APInt::operator+=(uint64_t RHS) { if (isSingleWord()) - VAL += RHS; + U.VAL += RHS; else - add_1(pVal, pVal, getNumWords(), RHS); + tcAddPart(U.pVal, RHS, getNumWords()); return clearUnusedBits(); } @@ -252,204 +214,79 @@ APInt& APInt::operator+=(uint64_t RHS) { APInt& APInt::operator-=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) - VAL -= RHS.VAL; + U.VAL -= RHS.U.VAL; else - tcSubtract(pVal, RHS.pVal, 0, getNumWords()); + tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); return clearUnusedBits(); } APInt& APInt::operator-=(uint64_t RHS) { if (isSingleWord()) - VAL -= RHS; + U.VAL -= RHS; else - sub_1(pVal, getNumWords(), RHS); + tcSubtractPart(U.pVal, RHS, getNumWords()); return clearUnusedBits(); } -/// Multiplies an integer array, x, by a uint64_t integer and places the result -/// into dest. -/// @returns the carry out of the multiplication. -/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer. -static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { - // Split y into high 32-bit part (hy) and low 32-bit part (ly) - uint64_t ly = y & 0xffffffffULL, hy = y >> 32; - uint64_t carry = 0; - - // For each digit of x. - for (unsigned i = 0; i < len; ++i) { - // Split x into high and low words - uint64_t lx = x[i] & 0xffffffffULL; - uint64_t hx = x[i] >> 32; - // hasCarry - A flag to indicate if there is a carry to the next digit. - // hasCarry == 0, no carry - // hasCarry == 1, has carry - // hasCarry == 2, no carry and the calculation result == 0. - uint8_t hasCarry = 0; - dest[i] = carry + lx * ly; - // Determine if the add above introduces carry. - hasCarry = (dest[i] < carry) ? 1 : 0; - carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0); - // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + - // (2^32 - 1) + 2^32 = 2^64. - hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); - - carry += (lx * hy) & 0xffffffffULL; - dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL); - carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + - (carry >> 32) + ((lx * hy) >> 32) + hx * hy; - } - return carry; -} - -/// Multiplies integer array x by integer array y and stores the result into -/// the integer array dest. Note that dest's size must be >= xlen + ylen. -/// @brief Generalized multiplication of integer arrays. -static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], - unsigned ylen) { - dest[xlen] = mul_1(dest, x, xlen, y[0]); - for (unsigned i = 1; i < ylen; ++i) { - uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32; - uint64_t carry = 0, lx = 0, hx = 0; - for (unsigned j = 0; j < xlen; ++j) { - lx = x[j] & 0xffffffffULL; - hx = x[j] >> 32; - // hasCarry - A flag to indicate if has carry. - // hasCarry == 0, no carry - // hasCarry == 1, has carry - // hasCarry == 2, no carry and the calculation result == 0. - uint8_t hasCarry = 0; - uint64_t resul = carry + lx * ly; - hasCarry = (resul < carry) ? 1 : 0; - carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32); - hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); - - carry += (lx * hy) & 0xffffffffULL; - resul = (carry << 32) | (resul & 0xffffffffULL); - dest[i+j] += resul; - carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+ - (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + - ((lx * hy) >> 32) + hx * hy; - } - dest[i+xlen] = carry; - } -} - -APInt& APInt::operator*=(const APInt& RHS) { +APInt APInt::operator*(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) { - VAL *= RHS.VAL; - clearUnusedBits(); - return *this; - } - - // Get some bit facts about LHS and check for zero - unsigned lhsBits = getActiveBits(); - unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1; - if (!lhsWords) - // 0 * X ===> 0 - return *this; - - // Get some bit facts about RHS and check for zero - unsigned rhsBits = RHS.getActiveBits(); - unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1; - if (!rhsWords) { - // X * 0 ===> 0 - clearAllBits(); - return *this; - } + if (isSingleWord()) + return APInt(BitWidth, U.VAL * RHS.U.VAL); - // Allocate space for the result - unsigned destWords = rhsWords + lhsWords; - uint64_t *dest = getMemory(destWords); + APInt Result(getMemory(getNumWords()), getBitWidth()); - // Perform the long multiply - mul(dest, pVal, lhsWords, RHS.pVal, rhsWords); + tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); - // Copy result back into *this - clearAllBits(); - unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords; - memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE); - clearUnusedBits(); - - // delete dest array and return - delete[] dest; - return *this; + Result.clearUnusedBits(); + return Result; } -APInt& APInt::AndAssignSlowCase(const APInt& RHS) { - tcAnd(pVal, RHS.pVal, getNumWords()); - return *this; +void APInt::AndAssignSlowCase(const APInt& RHS) { + tcAnd(U.pVal, RHS.U.pVal, getNumWords()); } -APInt& APInt::OrAssignSlowCase(const APInt& RHS) { - tcOr(pVal, RHS.pVal, getNumWords()); - return *this; +void APInt::OrAssignSlowCase(const APInt& RHS) { + tcOr(U.pVal, RHS.U.pVal, getNumWords()); } -APInt& APInt::XorAssignSlowCase(const APInt& RHS) { - tcXor(pVal, RHS.pVal, getNumWords()); - return *this; +void APInt::XorAssignSlowCase(const APInt& RHS) { + tcXor(U.pVal, RHS.U.pVal, getNumWords()); } -APInt APInt::operator*(const APInt& RHS) const { +APInt& APInt::operator*=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) - return APInt(BitWidth, VAL * RHS.VAL); - APInt Result(*this); - Result *= RHS; - return Result; + *this = *this * RHS; + return *this; } -bool APInt::EqualSlowCase(const APInt& RHS) const { - return std::equal(pVal, pVal + getNumWords(), RHS.pVal); +APInt& APInt::operator*=(uint64_t RHS) { + if (isSingleWord()) { + U.VAL *= RHS; + } else { + unsigned NumWords = getNumWords(); + tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); + } + return clearUnusedBits(); } -bool APInt::EqualSlowCase(uint64_t Val) const { - unsigned n = getActiveBits(); - if (n <= APINT_BITS_PER_WORD) - return pVal[0] == Val; - else - return false; +bool APInt::EqualSlowCase(const APInt& RHS) const { + return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); } -bool APInt::ult(const APInt& RHS) const { +int APInt::compare(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) - return VAL < RHS.VAL; - - // Get active bit length of both operands - unsigned n1 = getActiveBits(); - unsigned n2 = RHS.getActiveBits(); - - // If magnitude of LHS is less than RHS, return true. - if (n1 < n2) - return true; - - // If magnitude of RHS is greater than LHS, return false. - if (n2 < n1) - return false; + return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; - // If they both fit in a word, just compare the low order word - if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) - return pVal[0] < RHS.pVal[0]; - - // Otherwise, compare all words - unsigned topWord = whichWord(std::max(n1,n2)-1); - for (int i = topWord; i >= 0; --i) { - if (pVal[i] > RHS.pVal[i]) - return false; - if (pVal[i] < RHS.pVal[i]) - return true; - } - return false; + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); } -bool APInt::slt(const APInt& RHS) const { +int APInt::compareSigned(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) { - int64_t lhsSext = SignExtend64(VAL, BitWidth); - int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth); - return lhsSext < rhsSext; + int64_t lhsSext = SignExtend64(U.VAL, BitWidth); + int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); + return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; } bool lhsNeg = isNegative(); @@ -457,18 +294,11 @@ bool APInt::slt(const APInt& RHS) const { // If the sign bits don't match, then (LHS < RHS) if LHS is negative if (lhsNeg != rhsNeg) - return lhsNeg; + return lhsNeg ? -1 : 1; // Otherwise we can just use an unsigned comparison, because even negative // numbers compare correctly this way if both have the same signed-ness. - return ult(RHS); -} - -void APInt::setBit(unsigned bitPosition) { - if (isSingleWord()) - VAL |= maskBit(bitPosition); - else - pVal[whichWord(bitPosition)] |= maskBit(bitPosition); + return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); } void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { @@ -476,40 +306,31 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { unsigned hiWord = whichWord(hiBit); // Create an initial mask for the low word with zeros below loBit. - uint64_t loMask = UINT64_MAX << whichBit(loBit); + uint64_t loMask = WORD_MAX << whichBit(loBit); // If hiBit is not aligned, we need a high mask. unsigned hiShiftAmt = whichBit(hiBit); if (hiShiftAmt != 0) { // Create a high mask with zeros above hiBit. - uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); + uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); // If loWord and hiWord are equal, then we combine the masks. Otherwise, // set the bits in hiWord. if (hiWord == loWord) loMask &= hiMask; else - pVal[hiWord] |= hiMask; + U.pVal[hiWord] |= hiMask; } // Apply the mask to the low word. - pVal[loWord] |= loMask; + U.pVal[loWord] |= loMask; // Fill any words between loWord and hiWord with all ones. for (unsigned word = loWord + 1; word < hiWord; ++word) - pVal[word] = UINT64_MAX; -} - -/// Set the given bit to 0 whose position is given as "bitPosition". -/// @brief Set a given bit to 0. -void APInt::clearBit(unsigned bitPosition) { - if (isSingleWord()) - VAL &= ~maskBit(bitPosition); - else - pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition); + U.pVal[word] = WORD_MAX; } /// @brief Toggle every bit to its opposite value. void APInt::flipAllBitsSlowCase() { - tcComplement(pVal, getNumWords()); + tcComplement(U.pVal, getNumWords()); clearUnusedBits(); } @@ -535,9 +356,9 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Single word result can be done as a direct bitmask. if (isSingleWord()) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); - VAL &= ~(mask << bitPosition); - VAL |= (subBits.VAL << bitPosition); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + U.VAL &= ~(mask << bitPosition); + U.VAL |= (subBits.U.VAL << bitPosition); return; } @@ -547,9 +368,9 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { // Insertion within a single word can be done as a direct bitmask. if (loWord == hi1Word) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); - pVal[loWord] &= ~(mask << loBit); - pVal[loWord] |= (subBits.VAL << loBit); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + U.pVal[loWord] &= ~(mask << loBit); + U.pVal[loWord] |= (subBits.U.VAL << loBit); return; } @@ -557,15 +378,15 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { if (loBit == 0) { // Direct copy whole words. unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; - memcpy(pVal + loWord, subBits.getRawData(), + memcpy(U.pVal + loWord, subBits.getRawData(), numWholeSubWords * APINT_WORD_SIZE); // Mask+insert remaining bits. unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; if (remainingBits != 0) { - uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits); - pVal[hi1Word] &= ~mask; - pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); + uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits); + U.pVal[hi1Word] &= ~mask; + U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); } return; } @@ -587,7 +408,7 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { "Illegal bit extraction"); if (isSingleWord()) - return APInt(numBits, VAL >> bitPosition); + return APInt(numBits, U.VAL >> bitPosition); unsigned loBit = whichBit(bitPosition); unsigned loWord = whichWord(bitPosition); @@ -595,12 +416,12 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { // Single word result extracting bits from a single word source. if (loWord == hiWord) - return APInt(numBits, pVal[loWord] >> loBit); + return APInt(numBits, U.pVal[loWord] >> loBit); // Extracting bits that start on a source word boundary can be done // as a fast memory copy. if (loBit == 0) - return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord)); + return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); // General case - shift + copy source words directly into place. APInt Result(numBits, 0); @@ -608,10 +429,10 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { unsigned NumDstWords = Result.getNumWords(); for (unsigned word = 0; word < NumDstWords; ++word) { - uint64_t w0 = pVal[loWord + word]; + uint64_t w0 = U.pVal[loWord + word]; uint64_t w1 = - (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0; - Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); + (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; + Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); } return Result.clearUnusedBits(); @@ -672,9 +493,9 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { hash_code llvm::hash_value(const APInt &Arg) { if (Arg.isSingleWord()) - return hash_combine(Arg.VAL); + return hash_combine(Arg.U.VAL); - return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords()); + return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); } bool APInt::isSplat(unsigned SplatSizeInBits) const { @@ -697,10 +518,21 @@ APInt APInt::getLoBits(unsigned numBits) const { return Result; } +/// Return a value containing V broadcasted over NewLen bits. +APInt APInt::getSplat(unsigned NewLen, const APInt &V) { + assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); + + APInt Val = V.zextOrSelf(NewLen); + for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) + Val |= Val << I; + + return Val; +} + unsigned APInt::countLeadingZerosSlowCase() const { unsigned Count = 0; for (int i = getNumWords()-1; i >= 0; --i) { - uint64_t V = pVal[i]; + uint64_t V = U.pVal[i]; if (V == 0) Count += APINT_BITS_PER_WORD; else { @@ -716,7 +548,7 @@ unsigned APInt::countLeadingZerosSlowCase() const { unsigned APInt::countLeadingOnes() const { if (isSingleWord()) - return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth)); + return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; unsigned shift; @@ -727,13 +559,13 @@ unsigned APInt::countLeadingOnes() const { shift = APINT_BITS_PER_WORD - highWordBits; } int i = getNumWords() - 1; - unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); + unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { - if (pVal[i] == -1ULL) + if (U.pVal[i] == WORD_MAX) Count += APINT_BITS_PER_WORD; else { - Count += llvm::countLeadingOnes(pVal[i]); + Count += llvm::countLeadingOnes(U.pVal[i]); break; } } @@ -743,67 +575,71 @@ unsigned APInt::countLeadingOnes() const { unsigned APInt::countTrailingZeros() const { if (isSingleWord()) - return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth); + return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth); unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == 0; ++i) + for (; i < getNumWords() && U.pVal[i] == 0; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingZeros(pVal[i]); + Count += llvm::countTrailingZeros(U.pVal[i]); return std::min(Count, BitWidth); } unsigned APInt::countTrailingOnesSlowCase() const { unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == -1ULL; ++i) + for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += llvm::countTrailingOnes(pVal[i]); - return std::min(Count, BitWidth); + Count += llvm::countTrailingOnes(U.pVal[i]); + assert(Count <= BitWidth); + return Count; } unsigned APInt::countPopulationSlowCase() const { unsigned Count = 0; for (unsigned i = 0; i < getNumWords(); ++i) - Count += llvm::countPopulation(pVal[i]); + Count += llvm::countPopulation(U.pVal[i]); return Count; } -/// Perform a logical right-shift from Src to Dst, which must be equal or -/// non-overlapping, of Words words, by Shift, which must be less than 64. -static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words, - unsigned Shift) { - uint64_t Carry = 0; - for (int I = Words - 1; I >= 0; --I) { - uint64_t Tmp = Src[I]; - Dst[I] = (Tmp >> Shift) | Carry; - Carry = Tmp << (64 - Shift); - } +bool APInt::intersectsSlowCase(const APInt &RHS) const { + for (unsigned i = 0, e = getNumWords(); i != e; ++i) + if ((U.pVal[i] & RHS.U.pVal[i]) != 0) + return true; + + return false; +} + +bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { + for (unsigned i = 0, e = getNumWords(); i != e; ++i) + if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) + return false; + + return true; } APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); if (BitWidth == 16) - return APInt(BitWidth, ByteSwap_16(uint16_t(VAL))); + return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); if (BitWidth == 32) - return APInt(BitWidth, ByteSwap_32(unsigned(VAL))); + return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); if (BitWidth == 48) { - unsigned Tmp1 = unsigned(VAL >> 16); + unsigned Tmp1 = unsigned(U.VAL >> 16); Tmp1 = ByteSwap_32(Tmp1); - uint16_t Tmp2 = uint16_t(VAL); + uint16_t Tmp2 = uint16_t(U.VAL); Tmp2 = ByteSwap_16(Tmp2); return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); } if (BitWidth == 64) - return APInt(BitWidth, ByteSwap_64(VAL)); + return APInt(BitWidth, ByteSwap_64(U.VAL)); APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); for (unsigned I = 0, N = getNumWords(); I != N; ++I) - Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]); + Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); if (Result.BitWidth != BitWidth) { - lshrNear(Result.pVal, Result.pVal, getNumWords(), - Result.BitWidth - BitWidth); + Result.lshrInPlace(Result.BitWidth - BitWidth); Result.BitWidth = BitWidth; } return Result; @@ -812,26 +648,24 @@ APInt APInt::byteSwap() const { APInt APInt::reverseBits() const { switch (BitWidth) { case 64: - return APInt(BitWidth, llvm::reverseBits(VAL)); + return APInt(BitWidth, llvm::reverseBits(U.VAL)); case 32: - return APInt(BitWidth, llvm::reverseBits(VAL)); + return APInt(BitWidth, llvm::reverseBits(U.VAL)); case 16: - return APInt(BitWidth, llvm::reverseBits(VAL)); + return APInt(BitWidth, llvm::reverseBits(U.VAL)); case 8: - return APInt(BitWidth, llvm::reverseBits(VAL)); + return APInt(BitWidth, llvm::reverseBits(U.VAL)); default: break; } APInt Val(*this); - APInt Reversed(*this); - int S = BitWidth - 1; - - const APInt One(BitWidth, 1); + APInt Reversed(BitWidth, 0); + unsigned S = BitWidth; - for ((Val = Val.lshr(1)); Val != 0; (Val = Val.lshr(1))) { + for (; Val != 0; Val.lshrInPlace(1)) { Reversed <<= 1; - Reversed |= (Val & One); + Reversed |= Val[0]; --S; } @@ -840,11 +674,45 @@ APInt APInt::reverseBits() const { } APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { - while (!!B) { - APInt R = A.urem(B); - A = std::move(B); - B = std::move(R); + // Fast-path a common case. + if (A == B) return A; + + // Corner cases: if either operand is zero, the other is the gcd. + if (!A) return B; + if (!B) return A; + + // Count common powers of 2 and remove all other powers of 2. + unsigned Pow2; + { + unsigned Pow2_A = A.countTrailingZeros(); + unsigned Pow2_B = B.countTrailingZeros(); + if (Pow2_A > Pow2_B) { + A.lshrInPlace(Pow2_A - Pow2_B); + Pow2 = Pow2_B; + } else if (Pow2_B > Pow2_A) { + B.lshrInPlace(Pow2_B - Pow2_A); + Pow2 = Pow2_A; + } else { + Pow2 = Pow2_A; + } } + + // Both operands are odd multiples of 2^Pow_2: + // + // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) + // + // This is a modified version of Stein's algorithm, taking advantage of + // efficient countTrailingZeros(). + while (A != B) { + if (A.ugt(B)) { + A -= B; + A.lshrInPlace(A.countTrailingZeros() - Pow2); + } else { + B -= A; + B.lshrInPlace(B.countTrailingZeros() - Pow2); + } + } + return A; } @@ -880,7 +748,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { // Otherwise, we have to shift the mantissa bits up to the right location APInt Tmp(width, mantissa); - Tmp = Tmp.shl((unsigned)exp - 52); + Tmp <<= (unsigned)exp - 52; return isNeg ? -Tmp : Tmp; } @@ -931,13 +799,13 @@ double APInt::roundToDouble(bool isSigned) const { uint64_t mantissa; unsigned hiWord = whichWord(n-1); if (hiWord == 0) { - mantissa = Tmp.pVal[0]; + mantissa = Tmp.U.pVal[0]; if (n > 52) mantissa >>= n - 52; // shift down, we want the top 52 bits. } else { assert(hiWord > 0 && "huh?"); - uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); - uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); + uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); + uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); mantissa = hibits | lobits; } @@ -964,54 +832,37 @@ APInt APInt::trunc(unsigned width) const { // Copy full words. unsigned i; for (i = 0; i != width / APINT_BITS_PER_WORD; i++) - Result.pVal[i] = pVal[i]; + Result.U.pVal[i] = U.pVal[i]; // Truncate and copy any partial word. unsigned bits = (0 - width) % APINT_BITS_PER_WORD; if (bits != 0) - Result.pVal[i] = pVal[i] << bits >> bits; + Result.U.pVal[i] = U.pVal[i] << bits >> bits; return Result; } // Sign extend to a new width. -APInt APInt::sext(unsigned width) const { - assert(width > BitWidth && "Invalid APInt SignExtend request"); +APInt APInt::sext(unsigned Width) const { + assert(Width > BitWidth && "Invalid APInt SignExtend request"); - if (width <= APINT_BITS_PER_WORD) { - uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth); - val = (int64_t)val >> (width - BitWidth); - return APInt(width, val >> (APINT_BITS_PER_WORD - width)); - } + if (Width <= APINT_BITS_PER_WORD) + return APInt(Width, SignExtend64(U.VAL, BitWidth)); - APInt Result(getMemory(getNumWords(width)), width); - - // Copy full words. - unsigned i; - uint64_t word = 0; - for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) { - word = getRawData()[i]; - Result.pVal[i] = word; - } + APInt Result(getMemory(getNumWords(Width)), Width); - // Read and sign-extend any partial word. - unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD; - if (bits != 0) - word = (int64_t)getRawData()[i] << bits >> bits; - else - word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); - - // Write remaining full words. - for (; i != width / APINT_BITS_PER_WORD; i++) { - Result.pVal[i] = word; - word = (int64_t)word >> (APINT_BITS_PER_WORD - 1); - } + // Copy words. + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); - // Write any partial word. - bits = (0 - width) % APINT_BITS_PER_WORD; - if (bits != 0) - Result.pVal[i] = word << bits >> bits; + // Sign extend the last word since there may be unused bits in the input. + Result.U.pVal[getNumWords() - 1] = + SignExtend64(Result.U.pVal[getNumWords() - 1], + ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); + // Fill with sign bits. + std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); + Result.clearUnusedBits(); return Result; } @@ -1020,17 +871,16 @@ APInt APInt::zext(unsigned width) const { assert(width > BitWidth && "Invalid APInt ZeroExtend request"); if (width <= APINT_BITS_PER_WORD) - return APInt(width, VAL); + return APInt(width, U.VAL); APInt Result(getMemory(getNumWords(width)), width); // Copy words. - unsigned i; - for (i = 0; i != getNumWords(); i++) - Result.pVal[i] = getRawData()[i]; + std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); // Zero remaining words. - memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE); + std::memset(Result.U.pVal + getNumWords(), 0, + (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); return Result; } @@ -1065,222 +915,76 @@ APInt APInt::sextOrSelf(unsigned width) const { /// Arithmetic right-shift this APInt by shiftAmt. /// @brief Arithmetic right-shift function. -APInt APInt::ashr(const APInt &shiftAmt) const { - return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth)); +void APInt::ashrInPlace(const APInt &shiftAmt) { + ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); } /// Arithmetic right-shift this APInt by shiftAmt. /// @brief Arithmetic right-shift function. -APInt APInt::ashr(unsigned shiftAmt) const { - assert(shiftAmt <= BitWidth && "Invalid shift amount"); - // Handle a degenerate case - if (shiftAmt == 0) - return *this; +void APInt::ashrSlowCase(unsigned ShiftAmt) { + // Don't bother performing a no-op shift. + if (!ShiftAmt) + return; - // Handle single word shifts with built-in ashr - if (isSingleWord()) { - if (shiftAmt == BitWidth) - return APInt(BitWidth, 0); // undefined - return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt); - } + // Save the original sign bit for later. + bool Negative = isNegative(); - // If all the bits were shifted out, the result is, technically, undefined. - // We return -1 if it was negative, 0 otherwise. We check this early to avoid - // issues in the algorithm below. - if (shiftAmt == BitWidth) { - if (isNegative()) - return APInt(BitWidth, -1ULL, true); - else - return APInt(BitWidth, 0); - } - - // Create some space for the result. - uint64_t * val = new uint64_t[getNumWords()]; - - // Compute some values needed by the following shift algorithms - unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word - unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift - unsigned breakWord = getNumWords() - 1 - offset; // last word affected - unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word? - if (bitsInWord == 0) - bitsInWord = APINT_BITS_PER_WORD; - - // If we are shifting whole words, just move whole words - if (wordShift == 0) { - // Move the words containing significant bits - for (unsigned i = 0; i <= breakWord; ++i) - val[i] = pVal[i+offset]; // move whole word - - // Adjust the top significant word for sign bit fill, if negative - if (isNegative()) - if (bitsInWord < APINT_BITS_PER_WORD) - val[breakWord] |= ~0ULL << bitsInWord; // set high bits - } else { - // Shift the low order words - for (unsigned i = 0; i < breakWord; ++i) { - // This combines the shifted corresponding word with the low bits from - // the next word (shifted into this word's high bits). - val[i] = (pVal[i+offset] >> wordShift) | - (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); - } + // WordShift is the inter-part shift; BitShift is is intra-part shift. + unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; + unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; + + unsigned WordsToMove = getNumWords() - WordShift; + if (WordsToMove != 0) { + // Sign extend the last word to fill in the unused bits. + U.pVal[getNumWords() - 1] = SignExtend64( + U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); - // Shift the break word. In this case there are no bits from the next word - // to include in this word. - val[breakWord] = pVal[breakWord+offset] >> wordShift; - - // Deal with sign extension in the break word, and possibly the word before - // it. - if (isNegative()) { - if (wordShift > bitsInWord) { - if (breakWord > 0) - val[breakWord-1] |= - ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); - val[breakWord] |= ~0ULL; - } else - val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); + } else { + // Move the words containing significant bits. + for (unsigned i = 0; i != WordsToMove - 1; ++i) + U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | + (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); + + // Handle the last word which has no high bits to copy. + U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; + // Sign extend one more time. + U.pVal[WordsToMove - 1] = + SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); } } - // Remaining words are 0 or -1, just assign them. - uint64_t fillValue = (isNegative() ? -1ULL : 0); - for (unsigned i = breakWord+1; i < getNumWords(); ++i) - val[i] = fillValue; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; + // Fill in the remainder based on the original sign. + std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, + WordShift * APINT_WORD_SIZE); + clearUnusedBits(); } /// Logical right-shift this APInt by shiftAmt. /// @brief Logical right-shift function. -APInt APInt::lshr(const APInt &shiftAmt) const { - return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth)); +void APInt::lshrInPlace(const APInt &shiftAmt) { + lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); } /// Logical right-shift this APInt by shiftAmt. /// @brief Logical right-shift function. -APInt APInt::lshr(unsigned shiftAmt) const { - if (isSingleWord()) { - if (shiftAmt >= BitWidth) - return APInt(BitWidth, 0); - else - return APInt(BitWidth, this->VAL >> shiftAmt); - } - - // If all the bits were shifted out, the result is 0. This avoids issues - // with shifting by the size of the integer type, which produces undefined - // results. We define these "undefined results" to always be 0. - if (shiftAmt >= BitWidth) - return APInt(BitWidth, 0); - - // If none of the bits are shifted out, the result is *this. This avoids - // issues with shifting by the size of the integer type, which produces - // undefined results in the code below. This is also an optimization. - if (shiftAmt == 0) - return *this; - - // Create some space for the result. - uint64_t * val = new uint64_t[getNumWords()]; - - // If we are shifting less than a word, compute the shift with a simple carry - if (shiftAmt < APINT_BITS_PER_WORD) { - lshrNear(val, pVal, getNumWords(), shiftAmt); - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; - } - - // Compute some values needed by the remaining shift algorithms - unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; - unsigned offset = shiftAmt / APINT_BITS_PER_WORD; - - // If we are shifting whole words, just move whole words - if (wordShift == 0) { - for (unsigned i = 0; i < getNumWords() - offset; ++i) - val[i] = pVal[i+offset]; - for (unsigned i = getNumWords()-offset; i < getNumWords(); i++) - val[i] = 0; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; - } - - // Shift the low order words - unsigned breakWord = getNumWords() - offset -1; - for (unsigned i = 0; i < breakWord; ++i) - val[i] = (pVal[i+offset] >> wordShift) | - (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); - // Shift the break word. - val[breakWord] = pVal[breakWord+offset] >> wordShift; - - // Remaining words are 0 - for (unsigned i = breakWord+1; i < getNumWords(); ++i) - val[i] = 0; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; +void APInt::lshrSlowCase(unsigned ShiftAmt) { + tcShiftRight(U.pVal, getNumWords(), ShiftAmt); } /// Left-shift this APInt by shiftAmt. /// @brief Left-shift function. -APInt APInt::shl(const APInt &shiftAmt) const { +APInt &APInt::operator<<=(const APInt &shiftAmt) { // It's undefined behavior in C to shift by BitWidth or greater. - return shl((unsigned)shiftAmt.getLimitedValue(BitWidth)); + *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); + return *this; } -APInt APInt::shlSlowCase(unsigned shiftAmt) const { - // If all the bits were shifted out, the result is 0. This avoids issues - // with shifting by the size of the integer type, which produces undefined - // results. We define these "undefined results" to always be 0. - if (shiftAmt == BitWidth) - return APInt(BitWidth, 0); - - // If none of the bits are shifted out, the result is *this. This avoids a - // lshr by the words size in the loop below which can produce incorrect - // results. It also avoids the expensive computation below for a common case. - if (shiftAmt == 0) - return *this; - - // Create some space for the result. - uint64_t * val = new uint64_t[getNumWords()]; - - // If we are shifting less than a word, do it the easy way - if (shiftAmt < APINT_BITS_PER_WORD) { - uint64_t carry = 0; - for (unsigned i = 0; i < getNumWords(); i++) { - val[i] = pVal[i] << shiftAmt | carry; - carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt); - } - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; - } - - // Compute some values needed by the remaining shift algorithms - unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; - unsigned offset = shiftAmt / APINT_BITS_PER_WORD; - - // If we are shifting whole words, just move whole words - if (wordShift == 0) { - for (unsigned i = 0; i < offset; i++) - val[i] = 0; - for (unsigned i = offset; i < getNumWords(); i++) - val[i] = pVal[i-offset]; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; - } - - // Copy whole words from this to Result. - unsigned i = getNumWords() - 1; - for (; i > offset; --i) - val[i] = pVal[i-offset] << wordShift | - pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift); - val[offset] = pVal[0] << wordShift; - for (i = 0; i < offset; ++i) - val[i] = 0; - APInt Result(val, BitWidth); - Result.clearUnusedBits(); - return Result; +void APInt::shlSlowCase(unsigned ShiftAmt) { + tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); + clearUnusedBits(); } // Calculate the rotate amount modulo the bit width. @@ -1342,7 +1046,7 @@ APInt APInt::sqrt() const { /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, /* 31 */ 6 }; - return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]); + return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); } // If the magnitude of the value fits in less than 52 bits (the precision of @@ -1351,7 +1055,8 @@ APInt APInt::sqrt() const { // This should be faster than the algorithm below. if (magnitude < 52) { return APInt(BitWidth, - uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); + uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL + : U.pVal[0]))))); } // Okay, all the short cuts are exhausted. We must compute it. The following @@ -1436,10 +1141,13 @@ APInt APInt::multiplicativeInverse(const APInt& modulo) const { return APInt(BitWidth, 0); // The next-to-last t is the multiplicative inverse. However, we are - // interested in a positive inverse. Calcuate a positive one from a negative + // interested in a positive inverse. Calculate a positive one from a negative // one if necessary. A simple addition of the modulo suffices because // abs(t[i]) is known to be less than *this/2 (see the link above). - return t[i].isNegative() ? t[i] + modulo : t[i]; + if (t[i].isNegative()) + t[i] += modulo; + + return std::move(t[i]); } /// Calculate the magic numbers required to implement a signed integer division @@ -1538,7 +1246,7 @@ APInt::mu APInt::magicu(unsigned LeadingZeros) const { /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The /// variables here have the same names as in the algorithm. Comments explain /// the algorithm and any deviation from it. -static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, +static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, unsigned m, unsigned n) { assert(u && "Must provide dividend"); assert(v && "Must provide divisor"); @@ -1564,16 +1272,16 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // overflow. Note that this can require an extra word in u so that u must // be of length m+n+1. unsigned shift = countLeadingZeros(v[n-1]); - unsigned v_carry = 0; - unsigned u_carry = 0; + uint32_t v_carry = 0; + uint32_t u_carry = 0; if (shift) { for (unsigned i = 0; i < m+n; ++i) { - unsigned u_tmp = u[i] >> (32 - shift); + uint32_t u_tmp = u[i] >> (32 - shift); u[i] = (u[i] << shift) | u_carry; u_carry = u_tmp; } for (unsigned i = 0; i < n; ++i) { - unsigned v_tmp = v[i] >> (32 - shift); + uint32_t v_tmp = v[i] >> (32 - shift); v[i] = (v[i] << shift) | v_carry; v_carry = v_tmp; } @@ -1594,11 +1302,11 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease - // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test + // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test // on v[n-2] determines at high speed most of the cases in which the trial // value qp is one too large, and it eliminates all cases where qp is two // too large. - uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); + uint64_t dividend = Make_64(u[j+n], u[j+n-1]); DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); uint64_t qp = dividend / v[n-1]; uint64_t rp = dividend % v[n-1]; @@ -1621,14 +1329,14 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, int64_t borrow = 0; for (unsigned i = 0; i < n; ++i) { uint64_t p = uint64_t(qp) * uint64_t(v[i]); - int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p; - u[j+i] = (unsigned)subres; - borrow = (p >> 32) - (subres >> 32); + int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); + u[j+i] = Lo_32(subres); + borrow = Hi_32(p) - Hi_32(subres); DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i] << ", borrow = " << borrow << '\n'); } bool isNeg = u[j+n] < borrow; - u[j+n] -= (unsigned)borrow; + u[j+n] -= Lo_32(borrow); DEBUG(dbgs() << "KnuthDiv: after subtraction:"); DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]); @@ -1636,7 +1344,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was // negative, go to step D6; otherwise go on to step D7. - q[j] = (unsigned)qp; + q[j] = Lo_32(qp); if (isNeg) { // D6. [Add back]. The probability that this step is necessary is very // small, on the order of only 2/b. Make sure that test data accounts for @@ -1647,7 +1355,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // since it cancels with the borrow that occurred in D4. bool carry = false; for (unsigned i = 0; i < n; i++) { - unsigned limit = std::min(u[j+i],v[i]); + uint32_t limit = std::min(u[j+i],v[i]); u[j+i] += v[i] + carry; carry = u[j+i] < limit || (carry && u[j+i] == limit); } @@ -1672,7 +1380,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // multiplication by d by using a shift left. So, all we have to do is // shift right here. if (shift) { - unsigned carry = 0; + uint32_t carry = 0; DEBUG(dbgs() << "KnuthDiv: remainder:"); for (int i = n-1; i >= 0; i--) { r[i] = (u[i] >> shift) | carry; @@ -1690,8 +1398,8 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, DEBUG(dbgs() << '\n'); } -void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, - unsigned rhsWords, APInt *Quotient, APInt *Remainder) { +void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, + unsigned rhsWords, WordType *Quotient, WordType *Remainder) { assert(lhsWords >= rhsWords && "Fractional result"); // First, compose the values into an array of 32-bit words instead of @@ -1701,17 +1409,16 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // can't use 64-bit operands here because we don't have native results of // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't // work on large-endian machines. - uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT); unsigned n = rhsWords * 2; unsigned m = (lhsWords * 2) - n; // Allocate space for the temporary values we need either on the stack, if // it will fit, or on the heap if it won't. - unsigned SPACE[128]; - unsigned *U = nullptr; - unsigned *V = nullptr; - unsigned *Q = nullptr; - unsigned *R = nullptr; + uint32_t SPACE[128]; + uint32_t *U = nullptr; + uint32_t *V = nullptr; + uint32_t *Q = nullptr; + uint32_t *R = nullptr; if ((Remainder?4:3)*n+2*m+1 <= 128) { U = &SPACE[0]; V = &SPACE[m+n+1]; @@ -1719,34 +1426,34 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, if (Remainder) R = &SPACE[(m+n+1) + n + (m+n)]; } else { - U = new unsigned[m + n + 1]; - V = new unsigned[n]; - Q = new unsigned[m+n]; + U = new uint32_t[m + n + 1]; + V = new uint32_t[n]; + Q = new uint32_t[m+n]; if (Remainder) - R = new unsigned[n]; + R = new uint32_t[n]; } // Initialize the dividend - memset(U, 0, (m+n+1)*sizeof(unsigned)); + memset(U, 0, (m+n+1)*sizeof(uint32_t)); for (unsigned i = 0; i < lhsWords; ++i) { - uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]); - U[i * 2] = (unsigned)(tmp & mask); - U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); + uint64_t tmp = LHS[i]; + U[i * 2] = Lo_32(tmp); + U[i * 2 + 1] = Hi_32(tmp); } U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. // Initialize the divisor - memset(V, 0, (n)*sizeof(unsigned)); + memset(V, 0, (n)*sizeof(uint32_t)); for (unsigned i = 0; i < rhsWords; ++i) { - uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]); - V[i * 2] = (unsigned)(tmp & mask); - V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT)); + uint64_t tmp = RHS[i]; + V[i * 2] = Lo_32(tmp); + V[i * 2 + 1] = Hi_32(tmp); } // initialize the quotient and remainder - memset(Q, 0, (m+n) * sizeof(unsigned)); + memset(Q, 0, (m+n) * sizeof(uint32_t)); if (Remainder) - memset(R, 0, n * sizeof(unsigned)); + memset(R, 0, n * sizeof(uint32_t)); // Now, adjust m and n for the Knuth division. n is the number of words in // the divisor. m is the number of words by which the dividend exceeds the @@ -1767,22 +1474,22 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // are using base 2^32 instead of base 10. assert(n != 0 && "Divide by zero?"); if (n == 1) { - unsigned divisor = V[0]; - unsigned remainder = 0; - for (int i = m+n-1; i >= 0; i--) { - uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i]; + uint32_t divisor = V[0]; + uint32_t remainder = 0; + for (int i = m; i >= 0; i--) { + uint64_t partial_dividend = Make_64(remainder, U[i]); if (partial_dividend == 0) { Q[i] = 0; remainder = 0; } else if (partial_dividend < divisor) { Q[i] = 0; - remainder = (unsigned)partial_dividend; + remainder = Lo_32(partial_dividend); } else if (partial_dividend == divisor) { Q[i] = 1; remainder = 0; } else { - Q[i] = (unsigned)(partial_dividend / divisor); - remainder = (unsigned)(partial_dividend - (Q[i] * divisor)); + Q[i] = Lo_32(partial_dividend / divisor); + remainder = Lo_32(partial_dividend - (Q[i] * divisor)); } } if (R) @@ -1795,66 +1502,14 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, // If the caller wants the quotient if (Quotient) { - // Set up the Quotient value's memory. - if (Quotient->BitWidth != LHS.BitWidth) { - if (Quotient->isSingleWord()) - Quotient->VAL = 0; - else - delete [] Quotient->pVal; - Quotient->BitWidth = LHS.BitWidth; - if (!Quotient->isSingleWord()) - Quotient->pVal = getClearedMemory(Quotient->getNumWords()); - } else - Quotient->clearAllBits(); - - // The quotient is in Q. Reconstitute the quotient into Quotient's low - // order words. - // This case is currently dead as all users of divide() handle trivial cases - // earlier. - if (lhsWords == 1) { - uint64_t tmp = - uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); - if (Quotient->isSingleWord()) - Quotient->VAL = tmp; - else - Quotient->pVal[0] = tmp; - } else { - assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); - for (unsigned i = 0; i < lhsWords; ++i) - Quotient->pVal[i] = - uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); - } + for (unsigned i = 0; i < lhsWords; ++i) + Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); } // If the caller wants the remainder if (Remainder) { - // Set up the Remainder value's memory. - if (Remainder->BitWidth != RHS.BitWidth) { - if (Remainder->isSingleWord()) - Remainder->VAL = 0; - else - delete [] Remainder->pVal; - Remainder->BitWidth = RHS.BitWidth; - if (!Remainder->isSingleWord()) - Remainder->pVal = getClearedMemory(Remainder->getNumWords()); - } else - Remainder->clearAllBits(); - - // The remainder is in R. Reconstitute the remainder into Remainder's low - // order words. - if (rhsWords == 1) { - uint64_t tmp = - uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); - if (Remainder->isSingleWord()) - Remainder->VAL = tmp; - else - Remainder->pVal[0] = tmp; - } else { - assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); - for (unsigned i = 0; i < rhsWords; ++i) - Remainder->pVal[i] = - uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); - } + for (unsigned i = 0; i < rhsWords; ++i) + Remainder[i] = Make_64(R[i*2+1], R[i*2]); } // Clean up the memory we allocated. @@ -1866,40 +1521,74 @@ void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS, } } -APInt APInt::udiv(const APInt& RHS) const { +APInt APInt::udiv(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); // First, deal with the easy case if (isSingleWord()) { - assert(RHS.VAL != 0 && "Divide by zero?"); - return APInt(BitWidth, VAL / RHS.VAL); + assert(RHS.U.VAL != 0 && "Divide by zero?"); + return APInt(BitWidth, U.VAL / RHS.U.VAL); } // Get some facts about the LHS and RHS number of bits and words - unsigned rhsBits = RHS.getActiveBits(); - unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); + unsigned lhsWords = getNumWords(getActiveBits()); + unsigned rhsBits = RHS.getActiveBits(); + unsigned rhsWords = getNumWords(rhsBits); assert(rhsWords && "Divided by zero???"); - unsigned lhsBits = this->getActiveBits(); - unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); // Deal with some degenerate cases if (!lhsWords) // 0 / X ===> 0 return APInt(BitWidth, 0); - else if (lhsWords < rhsWords || this->ult(RHS)) { + if (rhsBits == 1) + // X / 1 ===> X + return *this; + if (lhsWords < rhsWords || this->ult(RHS)) // X / Y ===> 0, iff X < Y return APInt(BitWidth, 0); - } else if (*this == RHS) { + if (*this == RHS) // X / X ===> 1 return APInt(BitWidth, 1); - } else if (lhsWords == 1 && rhsWords == 1) { + if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. // All high words are zero, just use native divide - return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]); - } + return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Quotient(BitWidth, 0); // to hold result. + divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); + return Quotient; +} + +APInt APInt::udiv(uint64_t RHS) const { + assert(RHS != 0 && "Divide by zero?"); + + // First, deal with the easy case + if (isSingleWord()) + return APInt(BitWidth, U.VAL / RHS); + + // Get some facts about the LHS words. + unsigned lhsWords = getNumWords(getActiveBits()); + + // Deal with some degenerate cases + if (!lhsWords) + // 0 / X ===> 0 + return APInt(BitWidth, 0); + if (RHS == 1) + // X / 1 ===> X + return *this; + if (this->ult(RHS)) + // X / Y ===> 0, iff X < Y + return APInt(BitWidth, 0); + if (*this == RHS) + // X / X ===> 1 + return APInt(BitWidth, 1); + if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. + // All high words are zero, just use native divide + return APInt(BitWidth, this->U.pVal[0] / RHS); // We have to compute it the hard way. Invoke the Knuth divide algorithm. - APInt Quotient(1,0); // to hold result. - divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr); + APInt Quotient(BitWidth, 0); // to hold result. + divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); return Quotient; } @@ -1914,40 +1603,84 @@ APInt APInt::sdiv(const APInt &RHS) const { return this->udiv(RHS); } -APInt APInt::urem(const APInt& RHS) const { +APInt APInt::sdiv(int64_t RHS) const { + if (isNegative()) { + if (RHS < 0) + return (-(*this)).udiv(-RHS); + return -((-(*this)).udiv(RHS)); + } + if (RHS < 0) + return -(this->udiv(-RHS)); + return this->udiv(RHS); +} + +APInt APInt::urem(const APInt &RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) { - assert(RHS.VAL != 0 && "Remainder by zero?"); - return APInt(BitWidth, VAL % RHS.VAL); + assert(RHS.U.VAL != 0 && "Remainder by zero?"); + return APInt(BitWidth, U.VAL % RHS.U.VAL); } // Get some facts about the LHS - unsigned lhsBits = getActiveBits(); - unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1); + unsigned lhsWords = getNumWords(getActiveBits()); // Get some facts about the RHS unsigned rhsBits = RHS.getActiveBits(); - unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); + unsigned rhsWords = getNumWords(rhsBits); assert(rhsWords && "Performing remainder operation by zero ???"); // Check the degenerate cases - if (lhsWords == 0) { + if (lhsWords == 0) // 0 % Y ===> 0 return APInt(BitWidth, 0); - } else if (lhsWords < rhsWords || this->ult(RHS)) { + if (rhsBits == 1) + // X % 1 ===> 0 + return APInt(BitWidth, 0); + if (lhsWords < rhsWords || this->ult(RHS)) // X % Y ===> X, iff X < Y return *this; - } else if (*this == RHS) { + if (*this == RHS) // X % X == 0; return APInt(BitWidth, 0); - } else if (lhsWords == 1) { + if (lhsWords == 1) // All high words are zero, just use native remainder - return APInt(BitWidth, pVal[0] % RHS.pVal[0]); - } + return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); + + // We have to compute it the hard way. Invoke the Knuth divide algorithm. + APInt Remainder(BitWidth, 0); + divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); + return Remainder; +} + +uint64_t APInt::urem(uint64_t RHS) const { + assert(RHS != 0 && "Remainder by zero?"); + + if (isSingleWord()) + return U.VAL % RHS; + + // Get some facts about the LHS + unsigned lhsWords = getNumWords(getActiveBits()); + + // Check the degenerate cases + if (lhsWords == 0) + // 0 % Y ===> 0 + return 0; + if (RHS == 1) + // X % 1 ===> 0 + return 0; + if (this->ult(RHS)) + // X % Y ===> X, iff X < Y + return getZExtValue(); + if (*this == RHS) + // X % X == 0; + return 0; + if (lhsWords == 1) + // All high words are zero, just use native remainder + return U.pVal[0] % RHS; // We have to compute it the hard way. Invoke the Knuth divide algorithm. - APInt Remainder(1,0); - divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder); + uint64_t Remainder; + divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); return Remainder; } @@ -1962,25 +1695,37 @@ APInt APInt::srem(const APInt &RHS) const { return this->urem(RHS); } +int64_t APInt::srem(int64_t RHS) const { + if (isNegative()) { + if (RHS < 0) + return -((-(*this)).urem(-RHS)); + return -((-(*this)).urem(RHS)); + } + if (RHS < 0) + return this->urem(-RHS); + return this->urem(RHS); +} + void APInt::udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder) { assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); + unsigned BitWidth = LHS.BitWidth; // First, deal with the easy case if (LHS.isSingleWord()) { - assert(RHS.VAL != 0 && "Divide by zero?"); - uint64_t QuotVal = LHS.VAL / RHS.VAL; - uint64_t RemVal = LHS.VAL % RHS.VAL; - Quotient = APInt(LHS.BitWidth, QuotVal); - Remainder = APInt(LHS.BitWidth, RemVal); + assert(RHS.U.VAL != 0 && "Divide by zero?"); + uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; + uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; + Quotient = APInt(BitWidth, QuotVal); + Remainder = APInt(BitWidth, RemVal); return; } // Get some size facts about the dividend and divisor - unsigned lhsBits = LHS.getActiveBits(); - unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); + unsigned lhsWords = getNumWords(LHS.getActiveBits()); unsigned rhsBits = RHS.getActiveBits(); - unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); + unsigned rhsWords = getNumWords(rhsBits); + assert(rhsWords && "Performing divrem operation by zero ???"); // Check the degenerate cases if (lhsWords == 0) { @@ -1989,6 +1734,11 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, return; } + if (rhsBits == 1) { + Quotient = LHS; // X / 1 ===> X + Remainder = 0; // X % 1 ===> 0 + } + if (lhsWords < rhsWords || LHS.ult(RHS)) { Remainder = LHS; // X % Y ===> X, iff X < Y Quotient = 0; // X / Y ===> 0, iff X < Y @@ -2001,17 +1751,90 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, return; } - if (lhsWords == 1 && rhsWords == 1) { + // Make sure there is enough space to hold the results. + // NOTE: This assumes that reallocate won't affect any bits if it doesn't + // change the size. This is necessary if Quotient or Remainder is aliased + // with LHS or RHS. + Quotient.reallocate(BitWidth); + Remainder.reallocate(BitWidth); + + if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. // There is only one word to consider so use the native versions. - uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; - uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0]; - Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue); - Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue); + uint64_t lhsValue = LHS.U.pVal[0]; + uint64_t rhsValue = RHS.U.pVal[0]; + Quotient = lhsValue / rhsValue; + Remainder = lhsValue % rhsValue; return; } // Okay, lets do it the long way - divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); + divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, + Remainder.U.pVal); + // Clear the rest of the Quotient and Remainder. + std::memset(Quotient.U.pVal + lhsWords, 0, + (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); + std::memset(Remainder.U.pVal + rhsWords, 0, + (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); +} + +void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, + uint64_t &Remainder) { + assert(RHS != 0 && "Divide by zero?"); + unsigned BitWidth = LHS.BitWidth; + + // First, deal with the easy case + if (LHS.isSingleWord()) { + uint64_t QuotVal = LHS.U.VAL / RHS; + Remainder = LHS.U.VAL % RHS; + Quotient = APInt(BitWidth, QuotVal); + return; + } + + // Get some size facts about the dividend and divisor + unsigned lhsWords = getNumWords(LHS.getActiveBits()); + + // Check the degenerate cases + if (lhsWords == 0) { + Quotient = 0; // 0 / Y ===> 0 + Remainder = 0; // 0 % Y ===> 0 + return; + } + + if (RHS == 1) { + Quotient = LHS; // X / 1 ===> X + Remainder = 0; // X % 1 ===> 0 + } + + if (LHS.ult(RHS)) { + Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y + Quotient = 0; // X / Y ===> 0, iff X < Y + return; + } + + if (LHS == RHS) { + Quotient = 1; // X / X ===> 1 + Remainder = 0; // X % X ===> 0; + return; + } + + // Make sure there is enough space to hold the results. + // NOTE: This assumes that reallocate won't affect any bits if it doesn't + // change the size. This is necessary if Quotient is aliased with LHS. + Quotient.reallocate(BitWidth); + + if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. + // There is only one word to consider so use the native versions. + uint64_t lhsValue = LHS.U.pVal[0]; + Quotient = lhsValue / RHS; + Remainder = lhsValue % RHS; + return; + } + + // Okay, lets do it the long way + divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); + // Clear the rest of the Quotient. + std::memset(Quotient.U.pVal + lhsWords, 0, + (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); } void APInt::sdivrem(const APInt &LHS, const APInt &RHS, @@ -2021,17 +1844,37 @@ void APInt::sdivrem(const APInt &LHS, const APInt &RHS, APInt::udivrem(-LHS, -RHS, Quotient, Remainder); else { APInt::udivrem(-LHS, RHS, Quotient, Remainder); - Quotient = -Quotient; + Quotient.negate(); } - Remainder = -Remainder; + Remainder.negate(); } else if (RHS.isNegative()) { APInt::udivrem(LHS, -RHS, Quotient, Remainder); - Quotient = -Quotient; + Quotient.negate(); } else { APInt::udivrem(LHS, RHS, Quotient, Remainder); } } +void APInt::sdivrem(const APInt &LHS, int64_t RHS, + APInt &Quotient, int64_t &Remainder) { + uint64_t R = Remainder; + if (LHS.isNegative()) { + if (RHS < 0) + APInt::udivrem(-LHS, -RHS, Quotient, R); + else { + APInt::udivrem(-LHS, RHS, Quotient, R); + Quotient.negate(); + } + R = -R; + } else if (RHS < 0) { + APInt::udivrem(LHS, -RHS, Quotient, R); + Quotient.negate(); + } else { + APInt::udivrem(LHS, RHS, Quotient, R); + } + Remainder = R; +} + APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { APInt Res = *this+RHS; Overflow = isNonNegative() == RHS.isNonNegative() && @@ -2131,17 +1974,15 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width"); - // Allocate memory - if (!isSingleWord()) - pVal = getClearedMemory(getNumWords()); + // Allocate memory if needed + if (isSingleWord()) + U.VAL = 0; + else + U.pVal = getClearedMemory(getNumWords()); // Figure out if we can shift instead of multiply unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); - // Set up an APInt for the radix multiplier outside the loop so we don't - // constantly construct/destruct it. - APInt apradix(getBitWidth(), radix); - // Enter digit traversal loop for (StringRef::iterator e = str.end(); p != e; ++p) { unsigned digit = getDigit(*p, radix); @@ -2152,17 +1993,15 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { if (shift) *this <<= shift; else - *this *= apradix; + *this *= radix; } // Add in the digit we just interpreted *this += digit; } // If its negative, put it in two's complement form - if (isNeg) { - --(*this); - this->flipAllBits(); - } + if (isNeg) + this->negate(); } void APInt::toString(SmallVectorImpl &Str, unsigned Radix, @@ -2206,7 +2045,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, if (isSingleWord()) { char Buffer[65]; - char *BufPtr = Buffer+65; + char *BufPtr = std::end(Buffer); uint64_t N; if (!Signed) { @@ -2230,7 +2069,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, *--BufPtr = Digits[N % Radix]; N /= Radix; } - Str.append(BufPtr, Buffer+65); + Str.append(BufPtr, std::end(Buffer)); return; } @@ -2240,8 +2079,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, // They want to print the signed version and it is a negative value // Flip the bits and add one to turn it into the equivalent positive // value and put a '-' in the result. - Tmp.flipAllBits(); - ++Tmp; + Tmp.negate(); Str.push_back('-'); } @@ -2261,22 +2099,17 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); unsigned MaskAmt = Radix - 1; - while (Tmp != 0) { + while (Tmp.getBoolValue()) { unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; Str.push_back(Digits[Digit]); - Tmp = Tmp.lshr(ShiftAmt); + Tmp.lshrInPlace(ShiftAmt); } } else { - APInt divisor(Radix == 10? 4 : 8, Radix); - while (Tmp != 0) { - APInt APdigit(1, 0); - APInt tmp2(Tmp.getBitWidth(), 0); - divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, - &APdigit); - unsigned Digit = (unsigned)APdigit.getZExtValue(); + while (Tmp.getBoolValue()) { + uint64_t Digit; + udivrem(Tmp, Radix, Tmp, Digit); assert(Digit < Radix && "divide failed"); Str.push_back(Digits[Digit]); - Tmp = tmp2; } } @@ -2471,6 +2304,22 @@ APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, return c; } +/// This function adds a single "word" integer, src, to the multiple +/// "word" integer array, dst[]. dst[] is modified to reflect the addition and +/// 1 is returned if there is a carry out, otherwise 0 is returned. +/// @returns the carry of the addition. +APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, + unsigned parts) { + for (unsigned i = 0; i < parts; ++i) { + dst[i] += src; + if (dst[i] >= src) + return 0; // No need to carry so exit early. + src = 1; // Carry one to next digit. + } + + return 1; +} + /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, WordType c, unsigned parts) { @@ -2490,6 +2339,26 @@ APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, return c; } +/// This function subtracts a single "word" (64-bit word), src, from +/// the multi-word integer array, dst[], propagating the borrowed 1 value until +/// no further borrowing is needed or it runs out of "words" in dst. The result +/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not +/// exhausted. In other words, if src > dst then this function returns 1, +/// otherwise 0. +/// @returns the borrow out of the subtraction +APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, + unsigned parts) { + for (unsigned i = 0; i < parts; ++i) { + WordType Dst = dst[i]; + dst[i] -= src; + if (src <= Dst) + return 0; // No need to borrow so exit early. + src = 1; // We have to "borrow 1" from next "word" + } + + return 1; +} + /* Negate a bignum in-place. */ void APInt::tcNegate(WordType *dst, unsigned parts) { tcComplement(dst, parts); @@ -2516,10 +2385,9 @@ int APInt::tcMultiplyPart(WordType *dst, const WordType *src, assert(dstParts <= srcParts + 1); /* N loops; minimum of dstParts and srcParts. */ - unsigned n = dstParts < srcParts ? dstParts: srcParts; + unsigned n = std::min(dstParts, srcParts); - unsigned i; - for (i = 0; i < n; i++) { + for (unsigned i = 0; i < n; i++) { WordType low, mid, high, srcPart; /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. @@ -2570,27 +2438,27 @@ int APInt::tcMultiplyPart(WordType *dst, const WordType *src, carry = high; } - if (i < dstParts) { + if (srcParts < dstParts) { /* Full multiplication, there is no overflow. */ - assert(i + 1 == dstParts); - dst[i] = carry; - return 0; - } else { - /* We overflowed if there is carry. */ - if (carry) - return 1; - - /* We would overflow if any significant unwritten parts would be - non-zero. This is true if any remaining src parts are non-zero - and the multiplier is non-zero. */ - if (multiplier) - for (; i < srcParts; i++) - if (src[i]) - return 1; - - /* We fitted in the narrow destination. */ + assert(srcParts + 1 == dstParts); + dst[srcParts] = carry; return 0; } + + /* We overflowed if there is carry. */ + if (carry) + return 1; + + /* We would overflow if any significant unwritten parts would be + non-zero. This is true if any remaining src parts are non-zero + and the multiplier is non-zero. */ + if (multiplier) + for (unsigned i = dstParts; i < srcParts; i++) + if (src[i]) + return 1; + + /* We fitted in the narrow destination. */ + return 0; } /* DST = LHS * RHS, where DST has the same width as the operands and @@ -2611,28 +2479,21 @@ int APInt::tcMultiply(WordType *dst, const WordType *lhs, return overflow; } -/* DST = LHS * RHS, where DST has width the sum of the widths of the - operands. No overflow occurs. DST must be disjoint from both - operands. Returns the number of parts required to hold the - result. */ -unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs, - const WordType *rhs, unsigned lhsParts, - unsigned rhsParts) { +/// DST = LHS * RHS, where DST has width the sum of the widths of the +/// operands. No overflow occurs. DST must be disjoint from both operands. +void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, + const WordType *rhs, unsigned lhsParts, + unsigned rhsParts) { /* Put the narrower number on the LHS for less loops below. */ - if (lhsParts > rhsParts) { + if (lhsParts > rhsParts) return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); - } else { - assert(dst != lhs && dst != rhs); - tcSet(dst, 0, rhsParts); - - for (unsigned i = 0; i < lhsParts; i++) - tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); + assert(dst != lhs && dst != rhs); - unsigned n = lhsParts + rhsParts; + tcSet(dst, 0, rhsParts); - return n - (dst[n - 1] == 0); - } + for (unsigned i = 0; i < lhsParts; i++) + tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); } /* If RHS is zero LHS and REMAINDER are left unchanged, return one. @@ -2666,84 +2527,77 @@ int APInt::tcDivide(WordType *lhs, const WordType *rhs, /* Loop, subtracting SRHS if REMAINDER is greater and adding that to the total. */ for (;;) { - int compare; - - compare = tcCompare(remainder, srhs, parts); - if (compare >= 0) { - tcSubtract(remainder, srhs, 0, parts); - lhs[n] |= mask; - } + int compare = tcCompare(remainder, srhs, parts); + if (compare >= 0) { + tcSubtract(remainder, srhs, 0, parts); + lhs[n] |= mask; + } - if (shiftCount == 0) - break; - shiftCount--; - tcShiftRight(srhs, parts, 1); - if ((mask >>= 1) == 0) { - mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); - n--; - } + if (shiftCount == 0) + break; + shiftCount--; + tcShiftRight(srhs, parts, 1); + if ((mask >>= 1) == 0) { + mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); + n--; + } } return false; } -/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero. - There are no restrictions on COUNT. */ -void APInt::tcShiftLeft(WordType *dst, unsigned parts, unsigned count) { - if (count) { - /* Jump is the inter-part jump; shift is is intra-part shift. */ - unsigned jump = count / APINT_BITS_PER_WORD; - unsigned shift = count % APINT_BITS_PER_WORD; - - while (parts > jump) { - WordType part; +/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are +/// no restrictions on Count. +void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { + // Don't bother performing a no-op shift. + if (!Count) + return; - parts--; + // WordShift is the inter-part shift; BitShift is the intra-part shift. + unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); + unsigned BitShift = Count % APINT_BITS_PER_WORD; - /* dst[i] comes from the two parts src[i - jump] and, if we have - an intra-part shift, src[i - jump - 1]. */ - part = dst[parts - jump]; - if (shift) { - part <<= shift; - if (parts >= jump + 1) - part |= dst[parts - jump - 1] >> (APINT_BITS_PER_WORD - shift); - } - - dst[parts] = part; + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); + } else { + while (Words-- > WordShift) { + Dst[Words] = Dst[Words - WordShift] << BitShift; + if (Words > WordShift) + Dst[Words] |= + Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); } - - while (parts > 0) - dst[--parts] = 0; } -} -/* Shift a bignum right COUNT bits in-place. Shifted in bits are - zero. There are no restrictions on COUNT. */ -void APInt::tcShiftRight(WordType *dst, unsigned parts, unsigned count) { - if (count) { - /* Jump is the inter-part jump; shift is is intra-part shift. */ - unsigned jump = count / APINT_BITS_PER_WORD; - unsigned shift = count % APINT_BITS_PER_WORD; + // Fill in the remainder with 0s. + std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); +} - /* Perform the shift. This leaves the most significant COUNT bits - of the result at zero. */ - for (unsigned i = 0; i < parts; i++) { - WordType part; +/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There +/// are no restrictions on Count. +void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { + // Don't bother performing a no-op shift. + if (!Count) + return; - if (i + jump >= parts) { - part = 0; - } else { - part = dst[i + jump]; - if (shift) { - part >>= shift; - if (i + jump + 1 < parts) - part |= dst[i + jump + 1] << (APINT_BITS_PER_WORD - shift); - } - } + // WordShift is the inter-part shift; BitShift is the intra-part shift. + unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); + unsigned BitShift = Count % APINT_BITS_PER_WORD; - dst[i] = part; + unsigned WordsToMove = Words - WordShift; + // Fastpath for moving by whole words. + if (BitShift == 0) { + std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); + } else { + for (unsigned i = 0; i != WordsToMove; ++i) { + Dst[i] = Dst[i + WordShift] >> BitShift; + if (i + 1 != WordsToMove) + Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); } } + + // Fill in the remainder with 0s. + std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); } /* Bitwise and of two bignums. */ @@ -2775,38 +2629,13 @@ int APInt::tcCompare(const WordType *lhs, const WordType *rhs, unsigned parts) { while (parts) { parts--; - if (lhs[parts] == rhs[parts]) - continue; - - return (lhs[parts] > rhs[parts]) ? 1 : -1; + if (lhs[parts] != rhs[parts]) + return (lhs[parts] > rhs[parts]) ? 1 : -1; } return 0; } -/* Increment a bignum in-place, return the carry flag. */ -APInt::WordType APInt::tcIncrement(WordType *dst, unsigned parts) { - unsigned i; - for (i = 0; i < parts; i++) - if (++dst[i] != 0) - break; - - return i == parts; -} - -/* Decrement a bignum in-place, return the borrow flag. */ -APInt::WordType APInt::tcDecrement(WordType *dst, unsigned parts) { - for (unsigned i = 0; i < parts; i++) { - // If the current word is non-zero, then the decrement has no effect on the - // higher-order words of the integer and no borrow can occur. Exit early. - if (dst[i]--) - return 0; - } - // If every word was zero, then there is a borrow. - return 1; -} - - /* Set the least significant BITS bits of a bignum, clear the rest. */ void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp index 63e800a5b78b0312a7225012787bbab03ae86900..a9a0c1d1a4d3d7717449afaefc13c162199db95e 100644 --- a/lib/Support/ARMAttributeParser.cpp +++ b/lib/Support/ARMAttributeParser.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/ARMAttributeParser.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/ARMAttributeParser.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/ScopedPrinter.h" diff --git a/lib/Support/ARMBuildAttrs.cpp b/lib/Support/ARMBuildAttrs.cpp index 134ef8b587b726917e7ad1e11216ce27b59d861b..8f18e9eb24edaca5f9e61ab023e30897c814bbb7 100644 --- a/lib/Support/ARMBuildAttrs.cpp +++ b/lib/Support/ARMBuildAttrs.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/ARMBuildAttributes.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ARMBuildAttributes.h" using namespace llvm; diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp index 80550e2b46a7c735cfdcdbf10f676b3a961b3825..55910c489faf5248696a18c9b2cda4b085336a9c 100644 --- a/lib/Support/Atomic.cpp +++ b/lib/Support/Atomic.cpp @@ -18,6 +18,8 @@ using namespace llvm; #if defined(_MSC_VER) #include + +// We must include windows.h after Intrin.h. #include #undef MemoryFence #endif diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp index c7a2e0ddb179b8519cd5407384efa9df13ec8c96..bfb658cfa0b7442aed03ccb6f5d26798fd756028 100644 --- a/lib/Support/BinaryStreamReader.cpp +++ b/lib/Support/BinaryStreamReader.cpp @@ -13,9 +13,18 @@ #include "llvm/Support/BinaryStreamRef.h" using namespace llvm; +using endianness = llvm::support::endianness; -BinaryStreamReader::BinaryStreamReader(BinaryStreamRef S) - : Stream(S), Offset(0) {} +BinaryStreamReader::BinaryStreamReader(BinaryStreamRef Ref) : Stream(Ref) {} + +BinaryStreamReader::BinaryStreamReader(BinaryStream &Stream) : Stream(Stream) {} + +BinaryStreamReader::BinaryStreamReader(ArrayRef Data, + endianness Endian) + : Stream(Data, Endian) {} + +BinaryStreamReader::BinaryStreamReader(StringRef Data, endianness Endian) + : Stream(Data, Endian) {} Error BinaryStreamReader::readLongestContiguousChunk( ArrayRef &Buffer) { @@ -33,28 +42,49 @@ Error BinaryStreamReader::readBytes(ArrayRef &Buffer, uint32_t Size) { } Error BinaryStreamReader::readCString(StringRef &Dest) { - // TODO: This could be made more efficient by using readLongestContiguousChunk - // and searching for null terminators in the resulting buffer. + uint32_t OriginalOffset = getOffset(); + uint32_t FoundOffset = 0; + while (true) { + uint32_t ThisOffset = getOffset(); + ArrayRef Buffer; + if (auto EC = readLongestContiguousChunk(Buffer)) + return EC; + StringRef S(reinterpret_cast(Buffer.begin()), Buffer.size()); + size_t Pos = S.find_first_of('\0'); + if (LLVM_LIKELY(Pos != StringRef::npos)) { + FoundOffset = Pos + ThisOffset; + break; + } + } + assert(FoundOffset >= OriginalOffset); + + setOffset(OriginalOffset); + size_t Length = FoundOffset - OriginalOffset; + if (auto EC = readFixedString(Dest, Length)) + return EC; + + // Now set the offset back to after the null terminator. + setOffset(FoundOffset + 1); + return Error::success(); +} + +Error BinaryStreamReader::readWideString(ArrayRef &Dest) { uint32_t Length = 0; - // First compute the length of the string by reading 1 byte at a time. uint32_t OriginalOffset = getOffset(); - const char *C; + const UTF16 *C; while (true) { if (auto EC = readObject(C)) return EC; - if (*C == '\0') + if (*C == 0x0000) break; ++Length; } - // Now go back and request a reference for that many bytes. uint32_t NewOffset = getOffset(); setOffset(OriginalOffset); - if (auto EC = readFixedString(Dest, Length)) + if (auto EC = readArray(Dest, Length)) return EC; - - // Now set the offset back to where it was after we calculated the length. setOffset(NewOffset); return Error::success(); } @@ -86,6 +116,11 @@ Error BinaryStreamReader::skip(uint32_t Amount) { return Error::success(); } +Error BinaryStreamReader::padToAlignment(uint32_t Align) { + uint32_t NewOffset = alignTo(Offset, Align); + return skip(NewOffset - Offset); +} + uint8_t BinaryStreamReader::peek() const { ArrayRef Buffer; auto EC = Stream.readBytes(Offset, 1, Buffer); @@ -93,3 +128,16 @@ uint8_t BinaryStreamReader::peek() const { llvm::consumeError(std::move(EC)); return Buffer[0]; } + +std::pair +BinaryStreamReader::split(uint32_t Off) const { + assert(getLength() >= Off); + + BinaryStreamRef First = Stream.drop_front(Offset); + + BinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamReader W1{First}; + BinaryStreamReader W2{Second}; + return std::make_pair(W1, W2); +} \ No newline at end of file diff --git a/lib/Support/BinaryStreamRef.cpp b/lib/Support/BinaryStreamRef.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fe9a8171e1460952dba9d5f4e1d8600af30470a9 --- /dev/null +++ b/lib/Support/BinaryStreamRef.cpp @@ -0,0 +1,137 @@ +//===- BinaryStreamRef.cpp - ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/BinaryByteStream.h" + +using namespace llvm; +using namespace llvm::support; + +namespace { + +class ArrayRefImpl : public BinaryStream { +public: + ArrayRefImpl(ArrayRef Data, endianness Endian) : BBS(Data, Endian) {} + + llvm::support::endianness getEndian() const override { + return BBS.getEndian(); + } + Error readBytes(uint32_t Offset, uint32_t Size, + ArrayRef &Buffer) override { + return BBS.readBytes(Offset, Size, Buffer); + } + Error readLongestContiguousChunk(uint32_t Offset, + ArrayRef &Buffer) override { + return BBS.readLongestContiguousChunk(Offset, Buffer); + } + uint32_t getLength() override { return BBS.getLength(); } + +private: + BinaryByteStream BBS; +}; + +class MutableArrayRefImpl : public WritableBinaryStream { +public: + MutableArrayRefImpl(MutableArrayRef Data, endianness Endian) + : BBS(Data, Endian) {} + + // Inherited via WritableBinaryStream + llvm::support::endianness getEndian() const override { + return BBS.getEndian(); + } + Error readBytes(uint32_t Offset, uint32_t Size, + ArrayRef &Buffer) override { + return BBS.readBytes(Offset, Size, Buffer); + } + Error readLongestContiguousChunk(uint32_t Offset, + ArrayRef &Buffer) override { + return BBS.readLongestContiguousChunk(Offset, Buffer); + } + uint32_t getLength() override { return BBS.getLength(); } + + Error writeBytes(uint32_t Offset, ArrayRef Data) override { + return BBS.writeBytes(Offset, Data); + } + Error commit() override { return BBS.commit(); } + +private: + MutableBinaryByteStream BBS; +}; +} + +BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream) + : BinaryStreamRef(Stream, 0, Stream.getLength()) {} +BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, + uint32_t Length) + : BinaryStreamRefBase(Stream, Offset, Length) {} +BinaryStreamRef::BinaryStreamRef(ArrayRef Data, endianness Endian) + : BinaryStreamRefBase(std::make_shared(Data, Endian), 0, + Data.size()) {} +BinaryStreamRef::BinaryStreamRef(StringRef Data, endianness Endian) + : BinaryStreamRef(makeArrayRef(Data.bytes_begin(), Data.bytes_end()), + Endian) {} + +BinaryStreamRef::BinaryStreamRef(const BinaryStreamRef &Other) + : BinaryStreamRefBase(Other) {} + +Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size, + ArrayRef &Buffer) const { + if (auto EC = checkOffset(Offset, Size)) + return EC; + return BorrowedImpl->readBytes(ViewOffset + Offset, Size, Buffer); +} + +Error BinaryStreamRef::readLongestContiguousChunk( + uint32_t Offset, ArrayRef &Buffer) const { + if (auto EC = checkOffset(Offset, 1)) + return EC; + + if (auto EC = + BorrowedImpl->readLongestContiguousChunk(ViewOffset + Offset, Buffer)) + return EC; + // This StreamRef might refer to a smaller window over a larger stream. In + // that case we will have read out more bytes than we should return, because + // we should not read past the end of the current view. + uint32_t MaxLength = Length - Offset; + if (Buffer.size() > MaxLength) + Buffer = Buffer.slice(0, MaxLength); + return Error::success(); +} + +WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream) + : WritableBinaryStreamRef(Stream, 0, Stream.getLength()) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream, + uint32_t Offset, + uint32_t Length) + : BinaryStreamRefBase(Stream, Offset, Length) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef Data, + endianness Endian) + : BinaryStreamRefBase(std::make_shared(Data, Endian), + 0, Data.size()) {} + +WritableBinaryStreamRef::WritableBinaryStreamRef( + const WritableBinaryStreamRef &Other) + : BinaryStreamRefBase(Other) {} + +Error WritableBinaryStreamRef::writeBytes(uint32_t Offset, + ArrayRef Data) const { + if (auto EC = checkOffset(Offset, Data.size())) + return EC; + + return BorrowedImpl->writeBytes(ViewOffset + Offset, Data); +} + +WritableBinaryStreamRef::operator BinaryStreamRef() const { + return BinaryStreamRef(*BorrowedImpl, ViewOffset, Length); +} + +/// \brief For buffered streams, commits changes to the backing store. +Error WritableBinaryStreamRef::commit() { return BorrowedImpl->commit(); } diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp index d60b75642d0f148cd59a21f09333beb2da087943..c4276518b1919dadfefada7fbe68e3747543aa3d 100644 --- a/lib/Support/BinaryStreamWriter.cpp +++ b/lib/Support/BinaryStreamWriter.cpp @@ -15,8 +15,15 @@ using namespace llvm; -BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef S) - : Stream(S), Offset(0) {} +BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef Ref) + : Stream(Ref) {} + +BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStream &Stream) + : Stream(Stream) {} + +BinaryStreamWriter::BinaryStreamWriter(MutableArrayRef Data, + llvm::support::endianness Endian) + : Stream(Data, Endian) {} Error BinaryStreamWriter::writeBytes(ArrayRef Buffer) { if (auto EC = Stream.writeBytes(Offset, Buffer)) @@ -59,10 +66,25 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) { return Error::success(); } +std::pair +BinaryStreamWriter::split(uint32_t Off) const { + assert(getLength() >= Off); + + WritableBinaryStreamRef First = Stream.drop_front(Offset); + + WritableBinaryStreamRef Second = First.drop_front(Off); + First = First.keep_front(Off); + BinaryStreamWriter W1{First}; + BinaryStreamWriter W2{Second}; + return std::make_pair(W1, W2); +} + Error BinaryStreamWriter::padToAlignment(uint32_t Align) { uint32_t NewOffset = alignTo(Offset, Align); if (NewOffset > getLength()) return make_error(stream_error_code::stream_too_short); - Offset = NewOffset; + while (Offset < NewOffset) + if (auto EC = writeInteger('\0')) + return EC; return Error::success(); } diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 491614b4bf632c5e12bdd948578011d6897109ce..0a8e3897cce92bee0d9e72e8d75c8e5e65476602 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -30,6 +30,7 @@ elseif( CMAKE_HOST_UNIX ) endif( MSVC OR MINGW ) add_llvm_library(LLVMSupport + AMDGPUCodeObjectMetadata.cpp APFloat.cpp APInt.cpp APSInt.cpp @@ -39,6 +40,7 @@ add_llvm_library(LLVMSupport Allocator.cpp BinaryStreamError.cpp BinaryStreamReader.cpp + BinaryStreamRef.cpp BinaryStreamWriter.cpp BlockFrequency.cpp BranchProbability.cpp @@ -56,7 +58,6 @@ add_llvm_library(LLVMSupport DebugCounter.cpp DeltaAlgorithm.cpp DAGDeltaAlgorithm.cpp - Dwarf.cpp Error.cpp ErrorHandling.cpp FileUtilities.cpp @@ -81,6 +82,7 @@ add_llvm_library(LLVMSupport MD5.cpp NativeFormatting.cpp Options.cpp + Parallel.cpp PluginLoader.cpp PrettyStackTrace.cpp RandomNumberGenerator.cpp @@ -130,7 +132,6 @@ add_llvm_library(LLVMSupport Process.cpp Program.cpp RWMutex.cpp - SearchForAddressOfSpecialSymbol.cpp Signals.cpp TargetRegistry.cpp ThreadLocal.cpp diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index f4a9108b8544e5ea776b242a59ac4b2906acf696..de0ca940b405f9adbbfae32b28c740a111cca09b 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -2042,9 +2042,9 @@ void CommandLineParser::printOptionValues() { Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); } -static void (*OverrideVersionPrinter)() = nullptr; +static VersionPrinterTy OverrideVersionPrinter = nullptr; -static std::vector *ExtraVersionPrinters = nullptr; +static std::vector *ExtraVersionPrinters = nullptr; namespace { class VersionPrinter { @@ -2069,19 +2069,22 @@ public: #ifndef NDEBUG OS << " with assertions"; #endif +#if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO std::string CPU = sys::getHostCPUName(); if (CPU == "generic") CPU = "(unknown)"; OS << ".\n" << " Default target: " << sys::getDefaultTargetTriple() << '\n' - << " Host CPU: " << CPU << '\n'; + << " Host CPU: " << CPU; +#endif + OS << '\n'; } void operator=(bool OptionWasSpecified) { if (!OptionWasSpecified) return; if (OverrideVersionPrinter != nullptr) { - (*OverrideVersionPrinter)(); + OverrideVersionPrinter(outs()); exit(0); } print(); @@ -2090,10 +2093,8 @@ public: // information. if (ExtraVersionPrinters != nullptr) { outs() << '\n'; - for (std::vector::iterator I = ExtraVersionPrinters->begin(), - E = ExtraVersionPrinters->end(); - I != E; ++I) - (*I)(); + for (auto I : *ExtraVersionPrinters) + I(outs()); } exit(0); @@ -2131,11 +2132,11 @@ void cl::PrintHelpMessage(bool Hidden, bool Categorized) { /// Utility function for printing version number. void cl::PrintVersionMessage() { VersionPrinterInstance.print(); } -void cl::SetVersionPrinter(void (*func)()) { OverrideVersionPrinter = func; } +void cl::SetVersionPrinter(VersionPrinterTy func) { OverrideVersionPrinter = func; } -void cl::AddExtraVersionPrinter(void (*func)()) { +void cl::AddExtraVersionPrinter(VersionPrinterTy func) { if (!ExtraVersionPrinters) - ExtraVersionPrinters = new std::vector; + ExtraVersionPrinters = new std::vector; ExtraVersionPrinters->push_back(func); } diff --git a/lib/Support/ConvertUTF.cpp b/lib/Support/ConvertUTF.cpp index 39fd218d3f07160ac941f0378157b28a6578bf24..e56854a3ae428502fc318b5674bc231e8eb336fb 100644 --- a/lib/Support/ConvertUTF.cpp +++ b/lib/Support/ConvertUTF.cpp @@ -46,13 +46,40 @@ ------------------------------------------------------------------------ */ - #include "llvm/Support/ConvertUTF.h" #ifdef CVTUTF_DEBUG #include #endif #include +/* + * This code extensively uses fall-through switches. + * Keep the compiler from warning about that. + */ +#if defined(__clang__) && defined(__has_warning) +# if __has_warning("-Wimplicit-fallthrough") +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("clang diagnostic pop") +# endif +#elif defined(__GNUC__) && __GNUC__ > 6 +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#endif +#ifndef ConvertUTF_DISABLE_WARNINGS +# define ConvertUTF_DISABLE_WARNINGS +#endif +#ifndef ConvertUTF_RESTORE_WARNINGS +# define ConvertUTF_RESTORE_WARNINGS +#endif + +ConvertUTF_DISABLE_WARNINGS + namespace llvm { static const int halfShift = 10; /* used for shifting by 10 bits */ @@ -708,3 +735,5 @@ ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, --------------------------------------------------------------------- */ } // namespace llvm + +ConvertUTF_RESTORE_WARNINGS diff --git a/lib/Support/ConvertUTFWrapper.cpp b/lib/Support/ConvertUTFWrapper.cpp index 217cedb24df69aa6cbb3016da88ac51917edd989..6cb4f63762500de7b2bb8be6cf25604a7087fb92 100644 --- a/lib/Support/ConvertUTFWrapper.cpp +++ b/lib/Support/ConvertUTFWrapper.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/ConvertUTF.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SwapByteOrder.h" #include diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index 98865f5e065e7ff664ac055974f33cfb7c480c86..bd38dd88201fdbbc2c894fac8792049659df3c28 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -78,6 +78,9 @@ static bool gCrashRecoveryEnabled = false; static ManagedStatic> tlIsRecoveringFromCrash; +static void installExceptionOrSignalHandlers(); +static void uninstallExceptionOrSignalHandlers(); + CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} CrashRecoveryContext::~CrashRecoveryContext() { @@ -113,6 +116,23 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { return CRCI->CRC; } +void CrashRecoveryContext::Enable() { + sys::ScopedLock L(*gCrashRecoveryContextMutex); + // FIXME: Shouldn't this be a refcount or something? + if (gCrashRecoveryEnabled) + return; + gCrashRecoveryEnabled = true; + installExceptionOrSignalHandlers(); +} + +void CrashRecoveryContext::Disable() { + sys::ScopedLock L(*gCrashRecoveryContextMutex); + if (!gCrashRecoveryEnabled) + return; + gCrashRecoveryEnabled = false; + uninstallExceptionOrSignalHandlers(); +} + void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup) { if (!cleanup) @@ -140,30 +160,70 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { delete cleanup; } -#ifdef LLVM_ON_WIN32 +#if defined(_MSC_VER) +// If _MSC_VER is defined, we must have SEH. Use it if it's available. It's way +// better than VEH. Vectored exception handling catches all exceptions happening +// on the thread with installed exception handlers, so it can interfere with +// internal exception handling of other libraries on that thread. SEH works +// exactly as you would expect normal exception handling to work: it only +// catches exceptions if they would bubble out from the stack frame with __try / +// __except. -#include "Windows/WindowsSupport.h" +static void installExceptionOrSignalHandlers() {} +static void uninstallExceptionOrSignalHandlers() {} -// On Windows, we can make use of vectored exception handling to -// catch most crashing situations. Note that this does mean -// we will be alerted of exceptions *before* structured exception -// handling has the opportunity to catch it. But that isn't likely -// to cause problems because nowhere in the project is SEH being -// used. +bool CrashRecoveryContext::RunSafely(function_ref Fn) { + if (!gCrashRecoveryEnabled) { + Fn(); + return true; + } + + bool Result = true; + __try { + Fn(); + } __except (1) { // Catch any exception. + Result = false; + } + return Result; +} + +#else // !_MSC_VER + +#if defined(LLVM_ON_WIN32) +// This is a non-MSVC compiler, probably mingw gcc or clang without +// -fms-extensions. Use vectored exception handling (VEH). +// +// On Windows, we can make use of vectored exception handling to catch most +// crashing situations. Note that this does mean we will be alerted of +// exceptions *before* structured exception handling has the opportunity to +// catch it. Unfortunately, this causes problems in practice with other code +// running on threads with LLVM crash recovery contexts, so we would like to +// eventually move away from VEH. // -// Vectored exception handling is built on top of SEH, and so it -// works on a per-thread basis. +// Vectored works on a per-thread basis, which is an advantage over +// SetUnhandledExceptionFilter. SetUnhandledExceptionFilter also doesn't have +// any native support for chaining exception handlers, but VEH allows more than +// one. // // The vectored exception handler functionality was added in Windows // XP, so if support for older versions of Windows is required, // it will have to be added. -// -// If we want to support as far back as Win2k, we could use the -// SetUnhandledExceptionFilter API, but there's a risk of that -// being entirely overwritten (it's not a chain). + +#include "Windows/WindowsSupport.h" static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { + // DBG_PRINTEXCEPTION_WIDE_C is not properly defined on all supported + // compilers and platforms, so we define it manually. + constexpr ULONG DbgPrintExceptionWideC = 0x4001000AL; + switch (ExceptionInfo->ExceptionRecord->ExceptionCode) + { + case DBG_PRINTEXCEPTION_C: + case DbgPrintExceptionWideC: + case 0x406D1388: // set debugger thread name + return EXCEPTION_CONTINUE_EXECUTION; + } + // Lookup the current thread local recovery object. const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); @@ -192,14 +252,7 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) // non-NULL, valid VEH handles, or NULL. static sys::ThreadLocal sCurrentExceptionHandle; -void CrashRecoveryContext::Enable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = true; - +static void installExceptionOrSignalHandlers() { // We can set up vectored exception handling now. We will install our // handler as the front of the list, though there's no assurances that // it will remain at the front (another call could install itself before @@ -208,14 +261,7 @@ void CrashRecoveryContext::Enable() { sCurrentExceptionHandle.set(handle); } -void CrashRecoveryContext::Disable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (!gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = false; - +static void uninstallExceptionOrSignalHandlers() { PVOID currentHandle = const_cast(sCurrentExceptionHandle.get()); if (currentHandle) { // Now we can remove the vectored exception handler from the chain @@ -226,7 +272,7 @@ void CrashRecoveryContext::Disable() { } } -#else +#else // !LLVM_ON_WIN32 // Generic POSIX implementation. // @@ -278,14 +324,7 @@ static void CrashRecoverySignalHandler(int Signal) { const_cast(CRCI)->HandleCrash(); } -void CrashRecoveryContext::Enable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = true; - +static void installExceptionOrSignalHandlers() { // Setup the signal handler. struct sigaction Handler; Handler.sa_handler = CrashRecoverySignalHandler; @@ -297,20 +336,13 @@ void CrashRecoveryContext::Enable() { } } -void CrashRecoveryContext::Disable() { - sys::ScopedLock L(*gCrashRecoveryContextMutex); - - if (!gCrashRecoveryEnabled) - return; - - gCrashRecoveryEnabled = false; - +static void uninstallExceptionOrSignalHandlers() { // Restore the previous signal handlers. for (unsigned i = 0; i != NumSignals; ++i) sigaction(Signals[i], &PrevActions[i], nullptr); } -#endif +#endif // !LLVM_ON_WIN32 bool CrashRecoveryContext::RunSafely(function_ref Fn) { // If crash recovery is disabled, do nothing. @@ -328,6 +360,8 @@ bool CrashRecoveryContext::RunSafely(function_ref Fn) { return true; } +#endif // !_MSC_VER + void CrashRecoveryContext::HandleCrash() { CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; assert(CRCI && "Crash recovery context never initialized!"); diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 5d6d60a87fbfa2af12a2cdbfcb5f884f0fa8719b..53c10bcc562e33b8bc98ef187afb3e51da5a2bb5 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -128,6 +128,16 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { return nullptr; } +StringRef DataExtractor::getCStrRef(uint32_t *OffsetPtr) const { + uint32_t Start = *OffsetPtr; + StringRef::size_type Pos = Data.find('\0', Start); + if (Pos != StringRef::npos) { + *OffsetPtr = Pos + 1; + return StringRef(Data.data() + Start, Pos - Start); + } + return StringRef(); +} + uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { uint64_t result = 0; if (Data.empty()) diff --git a/lib/Support/DebugCounter.cpp b/lib/Support/DebugCounter.cpp index 29dae8a20f00f312d8f6e4f7db4292f5048897cc..1d46de04ee6aff60dea3658110ec0edf9afa575f 100644 --- a/lib/Support/DebugCounter.cpp +++ b/lib/Support/DebugCounter.cpp @@ -6,6 +6,7 @@ using namespace llvm; +namespace { // This class overrides the default list implementation of printing so we // can pretty print the list of debug counter options. This type of // dynamic option is pretty rare (basically this and pass lists). @@ -40,6 +41,7 @@ private: } } }; +} // namespace // Create our command line option. static DebugCounterList DebugCounterOption( @@ -100,9 +102,13 @@ void DebugCounter::push_back(const std::string &Val) { } } -void DebugCounter::print(raw_ostream &OS) { +void DebugCounter::print(raw_ostream &OS) const { OS << "Counters and values:\n"; for (const auto &KV : Counters) OS << left_justify(RegisteredCounters[KV.first], 32) << ": {" << KV.second.first << "," << KV.second.second << "}\n"; } + +LLVM_DUMP_METHOD void DebugCounter::dump() const { + print(dbgs()); +} diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 92ce6185306afdb9d899050dc02c69dc5507a306..9398789cea871d41f091ebc2376b5504098a8e84 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -20,169 +20,169 @@ #include "llvm/Support/Mutex.h" #include #include +#include -// Collection of symbol name/value pairs to be searched prior to any libraries. -static llvm::ManagedStatic > ExplicitSymbols; -static llvm::ManagedStatic > SymbolsMutex; - -void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, - void *symbolValue) { - SmartScopedLock lock(*SymbolsMutex); - (*ExplicitSymbols)[symbolName] = symbolValue; -} - -char llvm::sys::DynamicLibrary::Invalid = 0; - -#ifdef LLVM_ON_WIN32 - -#include "Windows/DynamicLibrary.inc" - -#else - -#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) -#include using namespace llvm; using namespace llvm::sys; -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// +// All methods for HandleSet should be used holding SymbolsMutex. +class DynamicLibrary::HandleSet { + typedef std::vector HandleList; + HandleList Handles; + void *Process; -static llvm::ManagedStatic > OpenedHandles; +public: + static void *DLOpen(const char *Filename, std::string *Err); + static void DLClose(void *Handle); + static void *DLSym(void *Handle, const char *Symbol); -DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); + HandleSet() : Process(nullptr) {} + ~HandleSet(); - void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL); - if (!handle) { - if (errMsg) *errMsg = dlerror(); - return DynamicLibrary(); + HandleList::iterator Find(void *Handle) { + return std::find(Handles.begin(), Handles.end(), Handle); } -#ifdef __CYGWIN__ - // Cygwin searches symbols only in the main - // with the handle of dlopen(NULL, RTLD_GLOBAL). - if (!filename) - handle = RTLD_DEFAULT; -#endif - - // If we've already loaded this library, dlclose() the handle in order to - // keep the internal refcount at +1. - if (!OpenedHandles->insert(handle).second) - dlclose(handle); + bool Contains(void *Handle) { + return Handle == Process || Find(Handle) != Handles.end(); + } - return DynamicLibrary(handle); -} + bool AddLibrary(void *Handle, bool IsProcess = false, bool CanClose = true) { +#ifdef LLVM_ON_WIN32 + assert((Handle == this ? IsProcess : !IsProcess) && "Bad Handle."); +#endif -DynamicLibrary DynamicLibrary::addPermanentLibrary(void *handle, - std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); - // If we've already loaded this library, tell the caller. - if (!OpenedHandles->insert(handle).second) { - if (errMsg) *errMsg = "Library already loaded"; - return DynamicLibrary(); + if (LLVM_LIKELY(!IsProcess)) { + if (Find(Handle) != Handles.end()) { + if (CanClose) + DLClose(Handle); + return false; + } + Handles.push_back(Handle); + } else { +#ifndef LLVM_ON_WIN32 + if (Process) { + if (CanClose) + DLClose(Process); + if (Process == Handle) + return false; + } +#endif + Process = Handle; + } + return true; } - return DynamicLibrary(handle); -} - -void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - if (!isValid()) + void *Lookup(const char *Symbol) { + // Process handle gets first try. + if (Process) { + if (void *Ptr = DLSym(Process, Symbol)) + return Ptr; +#ifndef NDEBUG + for (void *Handle : Handles) + assert(!DLSym(Handle, Symbol) && "Symbol exists in non process handle"); +#endif + } else { + // Iterate in reverse, so newer libraries/symbols override older. + for (auto &&I = Handles.rbegin(), E = Handles.rend(); I != E; ++I) { + if (void *Ptr = DLSym(*I, Symbol)) + return Ptr; + } + } return nullptr; - return dlsym(Data, symbolName); + } +}; + +namespace { +// Collection of symbol name/value pairs to be searched prior to any libraries. +static llvm::ManagedStatic> ExplicitSymbols; +// Collection of known library handles. +static llvm::ManagedStatic OpenedHandles; +// Lock for ExplicitSymbols and OpenedHandles. +static llvm::ManagedStatic> SymbolsMutex; } -#else +#ifdef LLVM_ON_WIN32 -using namespace llvm; -using namespace llvm::sys; +#include "Windows/DynamicLibrary.inc" -DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - if (errMsg) *errMsg = "dlopen() not supported on this platform"; - return DynamicLibrary(); -} +#else -void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - return NULL; -} +#include "Unix/DynamicLibrary.inc" #endif +char DynamicLibrary::Invalid; + namespace llvm { -void *SearchForAddressOfSpecialSymbol(const char* symbolName); +void *SearchForAddressOfSpecialSymbol(const char *SymbolName) { + return DoSearch(SymbolName); // DynamicLibrary.inc +} } -void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { +void DynamicLibrary::AddSymbol(StringRef SymbolName, void *SymbolValue) { SmartScopedLock Lock(*SymbolsMutex); + (*ExplicitSymbols)[SymbolName] = SymbolValue; +} - // First check symbols added via AddSymbol(). - if (ExplicitSymbols.isConstructed()) { - StringMap::iterator i = ExplicitSymbols->find(symbolName); +DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName, + std::string *Err) { + // Force OpenedHandles to be added into the ManagedStatic list before any + // ManagedStatic can be added from static constructors in HandleSet::DLOpen. + HandleSet& HS = *OpenedHandles; - if (i != ExplicitSymbols->end()) - return i->second; + void *Handle = HandleSet::DLOpen(FileName, Err); + if (Handle != &Invalid) { + SmartScopedLock Lock(*SymbolsMutex); + HS.AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); } -#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) - // Now search the libraries. - if (OpenedHandles.isConstructed()) { - for (DenseSet::iterator I = OpenedHandles->begin(), - E = OpenedHandles->end(); I != E; ++I) { - //lt_ptr ptr = lt_dlsym(*I, symbolName); - void *ptr = dlsym(*I, symbolName); - if (ptr) { - return ptr; - } - } - } -#endif + return DynamicLibrary(Handle); +} - if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName)) - return Result; +DynamicLibrary DynamicLibrary::addPermanentLibrary(void *Handle, + std::string *Err) { + SmartScopedLock Lock(*SymbolsMutex); + // If we've already loaded this library, tell the caller. + if (!OpenedHandles->AddLibrary(Handle, /*IsProcess*/false, /*CanClose*/false)) + *Err = "Library already loaded"; -// This macro returns the address of a well-known, explicit symbol -#define EXPLICIT_SYMBOL(SYM) \ - if (!strcmp(symbolName, #SYM)) return &SYM + return DynamicLibrary(Handle); +} -// On linux we have a weird situation. The stderr/out/in symbols are both -// macros and global variables because of standards requirements. So, we -// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. -#if defined(__linux__) and !defined(__ANDROID__) - { - EXPLICIT_SYMBOL(stderr); - EXPLICIT_SYMBOL(stdout); - EXPLICIT_SYMBOL(stdin); - } -#else - // For everything else, we want to check to make sure the symbol isn't defined - // as a macro before using EXPLICIT_SYMBOL. +void *DynamicLibrary::getAddressOfSymbol(const char *SymbolName) { + if (!isValid()) + return nullptr; + return HandleSet::DLSym(Data, SymbolName); +} + +void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) { { -#ifndef stdin - EXPLICIT_SYMBOL(stdin); -#endif -#ifndef stdout - EXPLICIT_SYMBOL(stdout); -#endif -#ifndef stderr - EXPLICIT_SYMBOL(stderr); -#endif + SmartScopedLock Lock(*SymbolsMutex); + + // First check symbols added via AddSymbol(). + if (ExplicitSymbols.isConstructed()) { + StringMap::iterator i = ExplicitSymbols->find(SymbolName); + + if (i != ExplicitSymbols->end()) + return i->second; + } + + // Now search the libraries. + if (OpenedHandles.isConstructed()) { + if (void *Ptr = OpenedHandles->Lookup(SymbolName)) + return Ptr; + } } -#endif -#undef EXPLICIT_SYMBOL - return nullptr; + return llvm::SearchForAddressOfSpecialSymbol(SymbolName); } -#endif // LLVM_ON_WIN32 - //===----------------------------------------------------------------------===// // C API. //===----------------------------------------------------------------------===// -LLVMBool LLVMLoadLibraryPermanently(const char* Filename) { +LLVMBool LLVMLoadLibraryPermanently(const char *Filename) { return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename); } @@ -193,4 +193,3 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName) { void LLVMAddSymbol(const char *symbolName, void *symbolValue) { return llvm::sys::DynamicLibrary::AddSymbol(symbolName, symbolValue); } - diff --git a/lib/Support/Errno.cpp b/lib/Support/Errno.cpp index 3ba2a1277d05f2c16b4f4d6b4fc77f794b4638c9..10be9b391b4905c377f9de81dc5adae045ec789a 100644 --- a/lib/Support/Errno.cpp +++ b/lib/Support/Errno.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Errno.h" -#include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Support/Error.cpp b/lib/Support/Error.cpp index 4730c0b26ba06769393dc5479e57a986eb3e16ef..bb02c03ff2b6b5b9a83ab38f56cf45d68de77702 100644 --- a/lib/Support/Error.cpp +++ b/lib/Support/Error.cpp @@ -13,7 +13,6 @@ #include "llvm/Support/ManagedStatic.h" #include - using namespace llvm; namespace { diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index c9bca7f4c1ab715b5f791c0d292c8458a05d6be4..4496d06a15f3f862ec2ec7c4a9a2219f51f8323d 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -26,7 +26,7 @@ using namespace llvm; // FoldingSetNodeIDRef Implementation /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef, -/// used to lookup the node in the FoldingSetImpl. +/// used to lookup the node in the FoldingSetBase. unsigned FoldingSetNodeIDRef::ComputeHash() const { return static_cast(hash_combine_range(Data, Data+Size)); } @@ -142,7 +142,7 @@ void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { } /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to -/// lookup the node in the FoldingSetImpl. +/// lookup the node in the FoldingSetBase. unsigned FoldingSetNodeID::ComputeHash() const { return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); } @@ -180,7 +180,7 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { } //===----------------------------------------------------------------------===// -/// Helper functions for FoldingSetImpl. +/// Helper functions for FoldingSetBase. /// GetNextPtr - In order to save space, each bucket is a /// singly-linked-list. In order to make deletion more efficient, we make @@ -188,12 +188,12 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { /// The problem with this is that the start of the hash buckets are not /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: /// use GetBucketPtr when this happens. -static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { +static FoldingSetBase::Node *GetNextPtr(void *NextInBucketPtr) { // The low bit is set if this is the pointer back to the bucket. if (reinterpret_cast(NextInBucketPtr) & 1) return nullptr; - return static_cast(NextInBucketPtr); + return static_cast(NextInBucketPtr); } @@ -221,11 +221,11 @@ static void **AllocateBuckets(unsigned NumBuckets) { } //===----------------------------------------------------------------------===// -// FoldingSetImpl Implementation +// FoldingSetBase Implementation -void FoldingSetImpl::anchor() {} +void FoldingSetBase::anchor() {} -FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { +FoldingSetBase::FoldingSetBase(unsigned Log2InitSize) { assert(5 < Log2InitSize && Log2InitSize < 32 && "Initial hash table size out of range"); NumBuckets = 1 << Log2InitSize; @@ -233,14 +233,14 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { NumNodes = 0; } -FoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg) +FoldingSetBase::FoldingSetBase(FoldingSetBase &&Arg) : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) { Arg.Buckets = nullptr; Arg.NumBuckets = 0; Arg.NumNodes = 0; } -FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { +FoldingSetBase &FoldingSetBase::operator=(FoldingSetBase &&RHS) { free(Buckets); // This may be null if the set is in a moved-from state. Buckets = RHS.Buckets; NumBuckets = RHS.NumBuckets; @@ -251,11 +251,11 @@ FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) { return *this; } -FoldingSetImpl::~FoldingSetImpl() { +FoldingSetBase::~FoldingSetBase() { free(Buckets); } -void FoldingSetImpl::clear() { +void FoldingSetBase::clear() { // Set all but the last bucket to null pointers. memset(Buckets, 0, NumBuckets*sizeof(void*)); @@ -266,7 +266,7 @@ void FoldingSetImpl::clear() { NumNodes = 0; } -void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) { +void FoldingSetBase::GrowBucketCount(unsigned NewBucketCount) { assert((NewBucketCount > NumBuckets) && "Can't shrink a folding set with GrowBucketCount"); assert(isPowerOf2_32(NewBucketCount) && "Bad bucket count!"); void **OldBuckets = Buckets; @@ -300,11 +300,11 @@ void FoldingSetImpl::GrowBucketCount(unsigned NewBucketCount) { /// GrowHashTable - Double the size of the hash table and rehash everything. /// -void FoldingSetImpl::GrowHashTable() { +void FoldingSetBase::GrowHashTable() { GrowBucketCount(NumBuckets * 2); } -void FoldingSetImpl::reserve(unsigned EltCount) { +void FoldingSetBase::reserve(unsigned EltCount) { // This will give us somewhere between EltCount / 2 and // EltCount buckets. This puts us in the load factor // range of 1.0 - 2.0. @@ -316,9 +316,9 @@ void FoldingSetImpl::reserve(unsigned EltCount) { /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, /// return it. If not, return the insertion token that will make insertion /// faster. -FoldingSetImpl::Node -*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, - void *&InsertPos) { +FoldingSetBase::Node * +FoldingSetBase::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { unsigned IDHash = ID.ComputeHash(); void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; @@ -342,7 +342,7 @@ FoldingSetImpl::Node /// InsertNode - Insert the specified node into the folding set, knowing that it /// is not already in the map. InsertPos must be obtained from /// FindNodeOrInsertPos. -void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { +void FoldingSetBase::InsertNode(Node *N, void *InsertPos) { assert(!N->getNextInBucket()); // Do we need to grow the hashtable? if (NumNodes+1 > capacity()) { @@ -371,7 +371,7 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { /// RemoveNode - Remove a node from the folding set, returning true if one was /// removed or false if the node was not in the folding set. -bool FoldingSetImpl::RemoveNode(Node *N) { +bool FoldingSetBase::RemoveNode(Node *N) { // Because each bucket is a circular list, we don't need to compute N's hash // to remove it. void *Ptr = N->getNextInBucket(); @@ -412,7 +412,7 @@ bool FoldingSetImpl::RemoveNode(Node *N) { /// GetOrInsertNode - If there is an existing simple Node exactly /// equal to the specified node, return it. Otherwise, insert 'N' and it /// instead. -FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { +FoldingSetBase::Node *FoldingSetBase::GetOrInsertNode(FoldingSetBase::Node *N) { FoldingSetNodeID ID; GetNodeProfile(N, ID); void *IP; diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index 2ed71c7e43119ea6d35543455aa7cf039282debe..a9f4409f5ddeb2a5c3d4bb6549f720f6fe3785f4 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include @@ -32,6 +32,7 @@ static void UpdatePosition(std::pair &Position, const char * switch (*Ptr) { case '\n': Line += 1; + LLVM_FALLTHROUGH; case '\r': Column = 0; break; diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index d0e1d50e8ccbc1388d421f1120c68af8cbda4f4e..f70b77da8de47ba7205ad490342bb812b14947ed 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -43,6 +43,7 @@ std::string llvm::DOT::EscapeString(const std::string &Label) { Str.erase(Str.begin()+i); continue; default: break; } + LLVM_FALLTHROUGH; case '{': case '}': case '<': case '>': case '|': case '"': diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index ec51314fcbe1d3d47634db3ab584c4c2a813cad5..234f7439a546bbc6744a6e6166d03c6799be91d4 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -1245,6 +1245,7 @@ static int computeHostNumPhysicalCores() { if (std::error_code EC = Text.getError()) { llvm::errs() << "Can't read " << "/proc/cpuinfo: " << EC.message() << "\n"; + return -1; } SmallVector strs; (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, @@ -1362,6 +1363,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; + Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); @@ -1399,6 +1401,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; + Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; // Enable protection keys Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 8be9879fbc2436d1f8962c1e91a76ebf7c2f2f20..3ee3af7731e6bb6dd5270b4570f7dcc97f71eba2 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -15,15 +15,15 @@ #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" #include #include #include -#include -#include #include #include +#include +#include #if LLVM_ON_WIN32 #include #endif diff --git a/lib/Support/LowLevelType.cpp b/lib/Support/LowLevelType.cpp index 4290d69cd197d0645688b91b51be7e1b56f05527..0ee3f1d0119e3b7a3b94e7697ab5d187201ac050 100644 --- a/lib/Support/LowLevelType.cpp +++ b/lib/Support/LowLevelType.cpp @@ -18,25 +18,25 @@ using namespace llvm; LLT::LLT(MVT VT) { if (VT.isVector()) { - SizeInBits = VT.getVectorElementType().getSizeInBits(); - ElementsOrAddrSpace = VT.getVectorNumElements(); - Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector; + init(/*isPointer=*/false, VT.getVectorNumElements() > 1, + VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(), + /*AddressSpace=*/0); } else if (VT.isValid()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. - Kind = Scalar; - SizeInBits = VT.getSizeInBits(); - ElementsOrAddrSpace = 1; - assert(SizeInBits != 0 && "invalid zero-sized type"); + assert(VT.getSizeInBits() != 0 && "invalid zero-sized type"); + init(/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0, + VT.getSizeInBits(), /*AddressSpace=*/0); } else { - Kind = Invalid; - SizeInBits = ElementsOrAddrSpace = 0; + IsPointer = false; + IsVector = false; + RawData = 0; } } void LLT::print(raw_ostream &OS) const { if (isVector()) - OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">"; + OS << "<" << getNumElements() << " x " << getElementType() << ">"; else if (isPointer()) OS << "p" << getAddressSpace(); else if (isValid()) { @@ -45,3 +45,12 @@ void LLT::print(raw_ostream &OS) const { } else llvm_unreachable("trying to print an invalid type"); } + +const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo; +const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo; +const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo; +const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo; diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp index bdbf1d677938350bfaed5e0fed74c7da94eafb6a..545a64cfc7679fbf300a701a0c9b9b1b3ee7be15 100644 --- a/lib/Support/MD5.cpp +++ b/lib/Support/MD5.cpp @@ -37,11 +37,11 @@ * compile-time configuration. */ +#include "llvm/Support/MD5.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index c8d3844d0c9618a19133836c06aabac831f42e35..bdd02105f6f0eb72a1f22d5ebb3905dd1fecd9f4 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/Support/Mutex.h" +#include "llvm/Config/config.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ab2cfdebf07d4507d4464f0eba0b0670a7506a26 --- /dev/null +++ b/lib/Support/Parallel.cpp @@ -0,0 +1,138 @@ +//===- llvm/Support/Parallel.cpp - Parallel algorithms --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Parallel.h" +#include "llvm/Config/llvm-config.h" + +#include +#include +#include + +using namespace llvm; + +namespace { + +/// \brief An abstract class that takes closures and runs them asynchronously. +class Executor { +public: + virtual ~Executor() = default; + virtual void add(std::function func) = 0; + + static Executor *getDefaultExecutor(); +}; + +#if !LLVM_ENABLE_THREADS +class SyncExecutor : public Executor { +public: + virtual void add(std::function F) { F(); } +}; + +Executor *Executor::getDefaultExecutor() { + static SyncExecutor Exec; + return &Exec; +} + +#elif defined(_MSC_VER) +/// \brief An Executor that runs tasks via ConcRT. +class ConcRTExecutor : public Executor { + struct Taskish { + Taskish(std::function Task) : Task(Task) {} + + std::function Task; + + static void run(void *P) { + Taskish *Self = static_cast(P); + Self->Task(); + concurrency::Free(Self); + } + }; + +public: + virtual void add(std::function F) { + Concurrency::CurrentScheduler::ScheduleTask( + Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F)); + } +}; + +Executor *Executor::getDefaultExecutor() { + static ConcRTExecutor exec; + return &exec; +} + +#else +/// \brief An implementation of an Executor that runs closures on a thread pool +/// in filo order. +class ThreadPoolExecutor : public Executor { +public: + explicit ThreadPoolExecutor( + unsigned ThreadCount = std::thread::hardware_concurrency()) + : Done(ThreadCount) { + // Spawn all but one of the threads in another thread as spawning threads + // can take a while. + std::thread([&, ThreadCount] { + for (size_t i = 1; i < ThreadCount; ++i) { + std::thread([=] { work(); }).detach(); + } + work(); + }).detach(); + } + + ~ThreadPoolExecutor() override { + std::unique_lock Lock(Mutex); + Stop = true; + Lock.unlock(); + Cond.notify_all(); + // Wait for ~Latch. + } + + void add(std::function F) override { + std::unique_lock Lock(Mutex); + WorkStack.push(F); + Lock.unlock(); + Cond.notify_one(); + } + +private: + void work() { + while (true) { + std::unique_lock Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop) + break; + auto Task = WorkStack.top(); + WorkStack.pop(); + Lock.unlock(); + Task(); + } + Done.dec(); + } + + std::atomic Stop{false}; + std::stack> WorkStack; + std::mutex Mutex; + std::condition_variable Cond; + parallel::detail::Latch Done; +}; + +Executor *Executor::getDefaultExecutor() { + static ThreadPoolExecutor exec; + return &exec; +} +#endif +} + +#if LLVM_ENABLE_THREADS +void parallel::detail::TaskGroup::spawn(std::function F) { + L.inc(); + Executor::getDefaultExecutor()->add([&, F] { + F(); + L.dec(); + }); +} +#endif diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index 9fd6652ce4b8c5fbddca3ee3bdb1cf111ec198f9..e58f856ca244e4eed1df9a3173bb1a8a7e61691d 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -13,12 +13,12 @@ #include "llvm/Support/Path.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/Support/COFF.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/Process.h" #include #include @@ -1027,177 +1027,6 @@ void directory_entry::replace_filename(const Twine &filename, file_status st) { Status = st; } -template -static bool startswith(StringRef Magic, const char (&S)[N]) { - return Magic.startswith(StringRef(S, N - 1)); -} - -/// @brief Identify the magic in magic. -file_magic identify_magic(StringRef Magic) { - if (Magic.size() < 4) - return file_magic::unknown; - switch ((unsigned char)Magic[0]) { - case 0x00: { - // COFF bigobj, CL.exe's LTO object file, or short import library file - if (startswith(Magic, "\0\0\xFF\xFF")) { - size_t MinSize = offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); - if (Magic.size() < MinSize) - return file_magic::coff_import_library; - - const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); - if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0) - return file_magic::coff_object; - if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0) - return file_magic::coff_cl_gl_object; - return file_magic::coff_import_library; - } - // Windows resource file - if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF")) - return file_magic::windows_resource; - // 0x0000 = COFF unknown machine type - if (Magic[1] == 0) - return file_magic::coff_object; - if (startswith(Magic, "\0asm")) - return file_magic::wasm_object; - break; - } - case 0xDE: // 0x0B17C0DE = BC wraper - if (startswith(Magic, "\xDE\xC0\x17\x0B")) - return file_magic::bitcode; - break; - case 'B': - if (startswith(Magic, "BC\xC0\xDE")) - return file_magic::bitcode; - break; - case '!': - if (startswith(Magic, "!\n") || startswith(Magic, "!\n")) - return file_magic::archive; - break; - - case '\177': - if (startswith(Magic, "\177ELF") && Magic.size() >= 18) { - bool Data2MSB = Magic[5] == 2; - unsigned high = Data2MSB ? 16 : 17; - unsigned low = Data2MSB ? 17 : 16; - if (Magic[high] == 0) { - switch (Magic[low]) { - default: return file_magic::elf; - case 1: return file_magic::elf_relocatable; - case 2: return file_magic::elf_executable; - case 3: return file_magic::elf_shared_object; - case 4: return file_magic::elf_core; - } - } - // It's still some type of ELF file. - return file_magic::elf; - } - break; - - case 0xCA: - if (startswith(Magic, "\xCA\xFE\xBA\xBE") || - startswith(Magic, "\xCA\xFE\xBA\xBF")) { - // This is complicated by an overlap with Java class files. - // See the Mach-O section in /usr/share/file/magic for details. - if (Magic.size() >= 8 && Magic[7] < 43) - return file_magic::macho_universal_binary; - } - break; - - // The two magic numbers for mach-o are: - // 0xfeedface - 32-bit mach-o - // 0xfeedfacf - 64-bit mach-o - case 0xFE: - case 0xCE: - case 0xCF: { - uint16_t type = 0; - if (startswith(Magic, "\xFE\xED\xFA\xCE") || - startswith(Magic, "\xFE\xED\xFA\xCF")) { - /* Native endian */ - size_t MinSize; - if (Magic[3] == char(0xCE)) - MinSize = sizeof(MachO::mach_header); - else - MinSize = sizeof(MachO::mach_header_64); - if (Magic.size() >= MinSize) - type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; - } else if (startswith(Magic, "\xCE\xFA\xED\xFE") || - startswith(Magic, "\xCF\xFA\xED\xFE")) { - /* Reverse endian */ - size_t MinSize; - if (Magic[0] == char(0xCE)) - MinSize = sizeof(MachO::mach_header); - else - MinSize = sizeof(MachO::mach_header_64); - if (Magic.size() >= MinSize) - type = Magic[15] << 24 | Magic[14] << 12 |Magic[13] << 8 | Magic[12]; - } - switch (type) { - default: break; - case 1: return file_magic::macho_object; - case 2: return file_magic::macho_executable; - case 3: return file_magic::macho_fixed_virtual_memory_shared_lib; - case 4: return file_magic::macho_core; - case 5: return file_magic::macho_preload_executable; - case 6: return file_magic::macho_dynamically_linked_shared_lib; - case 7: return file_magic::macho_dynamic_linker; - case 8: return file_magic::macho_bundle; - case 9: return file_magic::macho_dynamically_linked_shared_lib_stub; - case 10: return file_magic::macho_dsym_companion; - case 11: return file_magic::macho_kext_bundle; - } - break; - } - case 0xF0: // PowerPC Windows - case 0x83: // Alpha 32-bit - case 0x84: // Alpha 64-bit - case 0x66: // MPS R4000 Windows - case 0x50: // mc68K - case 0x4c: // 80386 Windows - case 0xc4: // ARMNT Windows - if (Magic[1] == 0x01) - return file_magic::coff_object; - - case 0x90: // PA-RISC Windows - case 0x68: // mc68K Windows - if (Magic[1] == 0x02) - return file_magic::coff_object; - break; - - case 'M': // Possible MS-DOS stub on Windows PE file - if (startswith(Magic, "MZ")) { - uint32_t off = read32le(Magic.data() + 0x3c); - // PE/COFF file, either EXE or DLL. - if (off < Magic.size() && - memcmp(Magic.data()+off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) - return file_magic::pecoff_executable; - } - break; - - case 0x64: // x86-64 Windows. - if (Magic[1] == char(0x86)) - return file_magic::coff_object; - break; - - default: - break; - } - return file_magic::unknown; -} - -std::error_code identify_magic(const Twine &Path, file_magic &Result) { - int FD; - if (std::error_code EC = openFileForRead(Path, FD)) - return EC; - - char Buffer[32]; - int Length = read(FD, Buffer, sizeof(Buffer)); - if (close(FD) != 0 || Length < 0) - return std::error_code(errno, std::generic_category()); - - Result = identify_magic(StringRef(Buffer, Length)); - return std::error_code(); -} - std::error_code directory_entry::status(file_status &result) const { return fs::status(Path, result, FollowSymlinks); } diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 5b079ff211feadb92776525e04ac6331004dae56..a18e9cc50040fa3d317a0a195565e60521260616 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -15,13 +15,14 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm-c/ErrorHandling.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/Compiler.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Watchdog.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #ifdef HAVE_CRASHREPORTERCLIENT_H diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index 290c30f4968f045f15008378832780b4d98ba5e5..caec993ee1653e5078d5636ced01febf46b68548 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Process.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" #include "llvm/Support/Program.h" using namespace llvm; diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp index 6c9781c4e2d6d95af19ccea1d91e2275913b438a..83c6d1d52b4c8f44b7541735317114896d35e7af 100644 --- a/lib/Support/RWMutex.cpp +++ b/lib/Support/RWMutex.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" #include "llvm/Support/RWMutex.h" +#include "llvm/Config/config.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 68ba79e11766c35bf802bc486ec4cc364ee2e3c9..b1087fd8853cbf394547920847b2e5d929b3b7e6 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -48,7 +48,7 @@ Regex::~Regex() { } } -bool Regex::isValid(std::string &Error) { +bool Regex::isValid(std::string &Error) const { if (!error) return true; diff --git a/lib/Support/SHA1.cpp b/lib/Support/SHA1.cpp index 0eefd998cd75570f5ec625d36bff815783191e38..20f41c5ff44724b576ca343296e2bfbe1d30d470 100644 --- a/lib/Support/SHA1.cpp +++ b/lib/Support/SHA1.cpp @@ -15,9 +15,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Host.h" #include "llvm/Support/SHA1.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Host.h" using namespace llvm; #include diff --git a/lib/Support/ScopedPrinter.cpp b/lib/Support/ScopedPrinter.cpp index d8ee1efd8f3e6934932e1aa1b6f9bafc47abfa49..537ff62c7b09c14e4f5f63fb100982c06180303b 100644 --- a/lib/Support/ScopedPrinter.cpp +++ b/lib/Support/ScopedPrinter.cpp @@ -21,7 +21,8 @@ const std::string to_hexString(uint64_t Value, bool UpperCase) { } void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str, - ArrayRef Data, bool Block) { + ArrayRef Data, bool Block, + uint32_t StartOffset) { if (Data.size() > 16) Block = true; @@ -31,7 +32,8 @@ void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str, OS << ": " << Str; OS << " (\n"; if (!Data.empty()) - OS << format_bytes_with_ascii(Data, 0, 16, 4, (IndentLevel + 1) * 2, true) + OS << format_bytes_with_ascii(Data, StartOffset, 16, 4, + (IndentLevel + 1) * 2, true) << "\n"; startLine() << ")\n"; } else { diff --git a/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/lib/Support/SearchForAddressOfSpecialSymbol.cpp deleted file mode 100644 index 55f3320f640fd678e2c7d97b0cc708f303606d6f..0000000000000000000000000000000000000000 --- a/lib/Support/SearchForAddressOfSpecialSymbol.cpp +++ /dev/null @@ -1,58 +0,0 @@ -//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file pulls the addresses of certain symbols out of the linker. It must -// include as few header files as possible because it declares the symbols as -// void*, which would conflict with the actual symbol type if any header -// declared it. -// -//===----------------------------------------------------------------------===// - -#include - -// Must declare the symbols in the global namespace. -static void *DoSearch(const char* symbolName) { -#define EXPLICIT_SYMBOL(SYM) \ - extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM - - // If this is darwin, it has some funky issues, try to solve them here. Some - // important symbols are marked 'private external' which doesn't allow - // SearchForAddressOfSymbol to find them. As such, we special case them here, - // there is only a small handful of them. - -#ifdef __APPLE__ - { - // __eprintf is sometimes used for assert() handling on x86. - // - // FIXME: Currently disabled when using Clang, as we don't always have our - // runtime support libraries available. -#ifndef __clang__ -#ifdef __i386__ - EXPLICIT_SYMBOL(__eprintf); -#endif -#endif - } -#endif - -#ifdef __CYGWIN__ - { - EXPLICIT_SYMBOL(_alloca); - EXPLICIT_SYMBOL(__main); - } -#endif - -#undef EXPLICIT_SYMBOL - return nullptr; -} - -namespace llvm { -void *SearchForAddressOfSpecialSymbol(const char* symbolName) { - return DoSearch(symbolName); -} -} // namespace llvm diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp index 57f36bf175b3a80956b53f4d8f7f3522eba53374..256a22dee87b900e5eeab45a70a5dc0d533921ee 100644 --- a/lib/Support/Signals.cpp +++ b/lib/Support/Signals.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Signals.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" @@ -23,18 +24,23 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" -#include "llvm/Support/Signals.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Options.h" #include -namespace llvm { - //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system //=== independent code. //===----------------------------------------------------------------------===// +using namespace llvm; + +static cl::opt + DisableSymbolication("disable-symbolication", + cl::desc("Disable symbolizing crash backtraces."), + cl::init(false), cl::Hidden); + static ManagedStatic>> CallBacksToRun; void sys::RunSignalHandlers() { @@ -44,9 +50,6 @@ void sys::RunSignalHandlers() { I.first(I.second); CallBacksToRun->clear(); } -} - -using namespace llvm; static bool findModulesAndOffsets(void **StackTrace, int Depth, const char **Modules, intptr_t *Offsets, @@ -70,6 +73,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace, int Depth, llvm::raw_ostream &OS) { + if (DisableSymbolication) + return false; + // Don't recursively invoke the llvm-symbolizer binary. if (Argv0.find("llvm-symbolizer") != std::string::npos) return false; diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index ca2391c10ff1009743668fbd29e55b2683c2162b..b0609d4fe047c1e89b961fab998a57b11f0ca342 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -13,18 +13,18 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/SourceMgr.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/Locale.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -51,9 +51,7 @@ static LineNoCacheTy *getCache(void *Ptr) { } SourceMgr::~SourceMgr() { - // Delete the line # cache if allocated. - if (LineNoCacheTy *Cache = getCache(LineNoCache)) - delete Cache; + delete getCache(LineNoCache); } unsigned SourceMgr::AddIncludeFile(const std::string &Filename, diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index df524b352351ed4c8daa692b2b46fd9f6eb37369..05886eaa8aee722b58748f7a0c95b9df84650780 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -15,12 +15,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SpecialCaseList.h" -#include "llvm/Support/TrigramIndex.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/TrigramIndex.h" #include #include #include diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 0c50dfd27d61528f1793dfb262fa29673e2e707c..72ca22806c43cc75c8239b3af3f2c5a291fe52df 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -30,8 +30,8 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp index 3e2420f67760513a0adb341118a083126bcdfd2f..b2f42dfcc04d92250fa0d9554242fa26aee14732 100644 --- a/lib/Support/StringExtras.cpp +++ b/lib/Support/StringExtras.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; /// StrInStrNoCase - Portable version of strcasestr. Locates the first diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp index 639d2ece263a173b7a4355bc3d3c7929cc493097..b16351906a4c4dade1c86eaa7b8179dac250d21f 100644 --- a/lib/Support/TargetParser.cpp +++ b/lib/Support/TargetParser.cpp @@ -210,7 +210,7 @@ bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind, else Features.push_back("-hwdiv-arm"); - if (HWDivKind & ARM::AEK_HWDIV) + if (HWDivKind & ARM::AEK_HWDIVTHUMB) Features.push_back("+hwdiv"); else Features.push_back("-hwdiv"); @@ -422,8 +422,10 @@ unsigned llvm::AArch64::getDefaultExtensions(StringRef CPU, unsigned ArchKind) { return AArch64ARCHNames[ArchKind].ArchBaseExtensions; return StringSwitch(CPU) -#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ - .Case(NAME, DEFAULT_EXT) +#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ + .Case(NAME, \ + AArch64ARCHNames[(unsigned)AArch64::ArchKind::ID].ArchBaseExtensions | \ + DEFAULT_EXT) #include "llvm/Support/AArch64TargetParser.def" .Default(AArch64::AEK_INVALID); } diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index bed9ed64f802b8467da156d82e6b698c56674174..b5c28325311777b6a031b0623bbf71cd53e31286 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -114,7 +114,7 @@ static int TargetArraySortFn(const std::pair *LHS, return LHS->first.compare(RHS->first); } -void TargetRegistry::printRegisteredTargetsForVersion() { +void TargetRegistry::printRegisteredTargetsForVersion(raw_ostream &OS) { std::vector > Targets; size_t Width = 0; for (const auto &T : TargetRegistry::targets()) { @@ -123,7 +123,6 @@ void TargetRegistry::printRegisteredTargetsForVersion() { } array_pod_sort(Targets.begin(), Targets.end(), TargetArraySortFn); - raw_ostream &OS = outs(); OS << " Registered Targets:\n"; for (unsigned i = 0, e = Targets.size(); i != e; ++i) { OS << " " << Targets[i].first; diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp index 9da1603080a2ac50e5cdb051791121d403fba94c..9a75c02b351f884f53862f6b346881ef31ddda1e 100644 --- a/lib/Support/ThreadLocal.cpp +++ b/lib/Support/ThreadLocal.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/ThreadLocal.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/ThreadLocal.h" //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp index db03a4d6240d262f16ab7d0d6c47bf4b640dda9d..22b7550d497149bbb0292855c36ac7d88799d6d0 100644 --- a/lib/Support/ThreadPool.cpp +++ b/lib/Support/ThreadPool.cpp @@ -53,11 +53,7 @@ ThreadPool::ThreadPool(unsigned ThreadCount) Tasks.pop(); } // Run the task we just grabbed -#ifndef _MSC_VER Task(); -#else - Task(/* unused */ false); -#endif { // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() @@ -82,7 +78,7 @@ void ThreadPool::wait() { [&] { return !ActiveThreads && Tasks.empty(); }); } -std::shared_future ThreadPool::asyncImpl(TaskTy Task) { +std::shared_future ThreadPool::asyncImpl(TaskTy Task) { /// Wrap the Task in a packaged_task to return a future object. PackagedTaskTy PackagedTask(std::move(Task)); auto Future = PackagedTask.get_future(); @@ -128,25 +124,16 @@ void ThreadPool::wait() { while (!Tasks.empty()) { auto Task = std::move(Tasks.front()); Tasks.pop(); -#ifndef _MSC_VER - Task(); -#else - Task(/* unused */ false); -#endif + Task(); } } -std::shared_future ThreadPool::asyncImpl(TaskTy Task) { -#ifndef _MSC_VER +std::shared_future ThreadPool::asyncImpl(TaskTy Task) { // Get a Future with launch::deferred execution using std::async auto Future = std::async(std::launch::deferred, std::move(Task)).share(); // Wrap the future so that both ThreadPool::wait() can operate and the // returned future can be sync'ed on. PackagedTaskTy PackagedTask([Future]() { Future.get(); }); -#else - auto Future = std::async(std::launch::deferred, std::move(Task), false).share(); - PackagedTaskTy PackagedTask([Future](bool) -> bool { Future.get(); return false; }); -#endif Tasks.push(std::move(PackagedTask)); return Future; } diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 8d68c6ae9682a6f1cb3e167add2cbb9027127475..3386f2660f31a935ffaae877f0e075e622fbc962 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // This ugly hack is brought to you courtesy of constructor/destructor ordering @@ -72,10 +72,15 @@ std::unique_ptr llvm::CreateInfoOutputFile() { return llvm::make_unique(2, false); // stderr. } -static TimerGroup *getDefaultTimerGroup() { - static TimerGroup DefaultTimerGroup("misc", "Miscellaneous Ungrouped Timers"); - return &DefaultTimerGroup; -} +namespace { +struct CreateDefaultTimerGroup { + static void *call() { + return new TimerGroup("misc", "Miscellaneous Ungrouped Timers"); + } +}; +} // namespace +static ManagedStatic DefaultTimerGroup; +static TimerGroup *getDefaultTimerGroup() { return &*DefaultTimerGroup; } //===----------------------------------------------------------------------===// // Timer Implementation diff --git a/lib/Support/TrigramIndex.cpp b/lib/Support/TrigramIndex.cpp index 85ab5287566b3a2ddfb387765755f6e60e6ec8ac..721763c885252e9a18757af18e5ced0dadc982cc 100644 --- a/lib/Support/TrigramIndex.cpp +++ b/lib/Support/TrigramIndex.cpp @@ -18,9 +18,9 @@ #include "llvm/Support/TrigramIndex.h" #include "llvm/ADT/SmallVector.h" -#include #include #include +#include using namespace llvm; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 64d5977e2ebd77f41d2f84ebae60077edac67436..320aede79fbb04acf2eb6699c5a0d7ae39ece3c0 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -12,8 +12,8 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetParser.h" #include "llvm/Support/Host.h" +#include "llvm/Support/TargetParser.h" #include using namespace llvm; @@ -34,6 +34,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case mips64: return "mips64"; case mips64el: return "mips64el"; case msp430: return "msp430"; + case nios2: return "nios2"; case ppc64: return "powerpc64"; case ppc64le: return "powerpc64le"; case ppc: return "powerpc"; @@ -98,6 +99,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { case mips64: case mips64el: return "mips"; + case nios2: return "nios2"; + case hexagon: return "hexagon"; case amdgcn: return "amdgcn"; @@ -161,6 +164,7 @@ StringRef Triple::getVendorTypeName(VendorType Kind) { case Myriad: return "myriad"; case AMD: return "amd"; case Mesa: return "mesa"; + case SUSE: return "suse"; } llvm_unreachable("Invalid VendorType!"); @@ -261,6 +265,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("mips64", mips64) .Case("mips64el", mips64el) .Case("msp430", msp430) + .Case("nios2", nios2) .Case("ppc64", ppc64) .Case("ppc32", ppc) .Case("ppc", ppc) @@ -383,6 +388,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("mipsel", "mipsallegrexel", Triple::mipsel) .Cases("mips64", "mips64eb", Triple::mips64) .Case("mips64el", Triple::mips64el) + .Case("nios2", Triple::nios2) .Case("r600", Triple::r600) .Case("amdgcn", Triple::amdgcn) .Case("riscv32", Triple::riscv32) @@ -443,6 +449,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("myriad", Triple::Myriad) .Case("amd", Triple::AMD) .Case("mesa", Triple::Mesa) + .Case("suse", Triple::SUSE) .Default(Triple::UnknownVendor); } @@ -457,7 +464,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("kfreebsd", Triple::KFreeBSD) .StartsWith("linux", Triple::Linux) .StartsWith("lv2", Triple::Lv2) - .StartsWith("macosx", Triple::MacOSX) + .StartsWith("macos", Triple::MacOSX) .StartsWith("netbsd", Triple::NetBSD) .StartsWith("openbsd", Triple::OpenBSD) .StartsWith("solaris", Triple::Solaris) @@ -623,6 +630,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::mips64el: case Triple::mipsel: case Triple::msp430: + case Triple::nios2: case Triple::nvptx: case Triple::nvptx64: case Triple::ppc64le: @@ -869,6 +877,10 @@ std::string Triple::normalize(StringRef Str) { } } + // SUSE uses "gnueabi" to mean "gnueabihf" + if (Vendor == Triple::SUSE && Environment == llvm::Triple::GNUEABI) + Components[3] = "gnueabihf"; + if (OS == Triple::Win32) { Components.resize(4); Components[2] = "windows"; @@ -982,6 +994,8 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor, StringRef OSTypeName = getOSTypeName(getOS()); if (OSName.startswith(OSTypeName)) OSName = OSName.substr(OSTypeName.size()); + else if (getOS() == MacOSX) + OSName.consume_front("macos"); parseVersionFromName(OSName, Major, Minor, Micro); } @@ -1156,6 +1170,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::le32: case llvm::Triple::mips: case llvm::Triple::mipsel: + case llvm::Triple::nios2: case llvm::Triple::nvptx: case llvm::Triple::ppc: case llvm::Triple::r600: @@ -1239,6 +1254,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::le32: case Triple::mips: case Triple::mipsel: + case Triple::nios2: case Triple::nvptx: case Triple::ppc: case Triple::r600: @@ -1286,6 +1302,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::kalimba: case Triple::lanai: case Triple::msp430: + case Triple::nios2: case Triple::r600: case Triple::tce: case Triple::tcele: @@ -1357,6 +1374,7 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::le32: case Triple::le64: case Triple::msp430: + case Triple::nios2: case Triple::nvptx64: case Triple::nvptx: case Triple::r600: @@ -1443,6 +1461,7 @@ bool Triple::isLittleEndian() const { case Triple::mips64el: case Triple::mipsel: case Triple::msp430: + case Triple::nios2: case Triple::nvptx64: case Triple::nvptx: case Triple::ppc64le: @@ -1468,6 +1487,39 @@ bool Triple::isLittleEndian() const { } } +bool Triple::isCompatibleWith(const Triple &Other) const { + // ARM and Thumb triples are compatible, if subarch, vendor and OS match. + if ((getArch() == Triple::thumb && Other.getArch() == Triple::arm) || + (getArch() == Triple::arm && Other.getArch() == Triple::thumb) || + (getArch() == Triple::thumbeb && Other.getArch() == Triple::armeb) || + (getArch() == Triple::armeb && Other.getArch() == Triple::thumbeb)) { + if (getVendor() == Triple::Apple) + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS(); + else + return getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS() && + getEnvironment() == Other.getEnvironment() && + getObjectFormat() == Other.getObjectFormat(); + } + + // If vendor is apple, ignore the version number. + if (getVendor() == Triple::Apple) + return getArch() == Other.getArch() && getSubArch() == Other.getSubArch() && + getVendor() == Other.getVendor() && getOS() == Other.getOS(); + + return *this == Other; +} + +std::string Triple::merge(const Triple &Other) const { + // If vendor is apple, pick the triple with the larger version number. + if (getVendor() == Triple::Apple) + if (Other.isOSVersionLT(*this)) + return str(); + + return Other.str(); +} + StringRef Triple::getARMCPUForArch(StringRef MArch) const { if (MArch.empty()) MArch = getArchName(); diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc new file mode 100644 index 0000000000000000000000000000000000000000..aad77f19c35a520ffa52beb964ec599fed4347e6 --- /dev/null +++ b/lib/Support/Unix/DynamicLibrary.inc @@ -0,0 +1,132 @@ +//===- Unix/DynamicLibrary.cpp - Unix DL Implementation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the UNIX specific implementation of DynamicLibrary. +// +//===----------------------------------------------------------------------===// + +#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) +#include + +DynamicLibrary::HandleSet::~HandleSet() { + // Close the libraries in reverse order. + for (void *Handle : llvm::reverse(Handles)) + ::dlclose(Handle); + if (Process) + ::dlclose(Process); +} + +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + void *Handle = ::dlopen(File, RTLD_LAZY|RTLD_GLOBAL); + if (!Handle) { + if (Err) *Err = ::dlerror(); + return &DynamicLibrary::Invalid; + } + +#ifdef __CYGWIN__ + // Cygwin searches symbols only in the main + // with the handle of dlopen(NULL, RTLD_GLOBAL). + if (!File) + Handle = RTLD_DEFAULT; +#endif + + return Handle; +} + +void DynamicLibrary::HandleSet::DLClose(void *Handle) { + ::dlclose(Handle); +} + +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + return ::dlsym(Handle, Symbol); +} + +#else // !HAVE_DLOPEN + +DynamicLibrary::HandleSet::~HandleSet() {} + +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + if (Err) *Err = "dlopen() not supported on this platform"; + return &Invalid; +} + +void DynamicLibrary::HandleSet::DLClose(void *Handle) { +} + +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + return nullptr; +} + +#endif + +// Must declare the symbols in the global namespace. +static void *DoSearch(const char* SymbolName) { +#define EXPLICIT_SYMBOL(SYM) \ + extern void *SYM; if (!strcmp(SymbolName, #SYM)) return &SYM + + // If this is darwin, it has some funky issues, try to solve them here. Some + // important symbols are marked 'private external' which doesn't allow + // SearchForAddressOfSymbol to find them. As such, we special case them here, + // there is only a small handful of them. + +#ifdef __APPLE__ + { + // __eprintf is sometimes used for assert() handling on x86. + // + // FIXME: Currently disabled when using Clang, as we don't always have our + // runtime support libraries available. +#ifndef __clang__ +#ifdef __i386__ + EXPLICIT_SYMBOL(__eprintf); +#endif +#endif + } +#endif + +#ifdef __CYGWIN__ + { + EXPLICIT_SYMBOL(_alloca); + EXPLICIT_SYMBOL(__main); + } +#endif + +#undef EXPLICIT_SYMBOL + +// This macro returns the address of a well-known, explicit symbol +#define EXPLICIT_SYMBOL(SYM) \ + if (!strcmp(SymbolName, #SYM)) return &SYM + +// Under glibc we have a weird situation. The stderr/out/in symbols are both +// macros and global variables because of standards requirements. So, we +// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first. +#if defined(__GLIBC__) + { + EXPLICIT_SYMBOL(stderr); + EXPLICIT_SYMBOL(stdout); + EXPLICIT_SYMBOL(stdin); + } +#else + // For everything else, we want to check to make sure the symbol isn't defined + // as a macro before using EXPLICIT_SYMBOL. + { +#ifndef stdin + EXPLICIT_SYMBOL(stdin); +#endif +#ifndef stdout + EXPLICIT_SYMBOL(stdout); +#endif +#ifndef stderr + EXPLICIT_SYMBOL(stderr); +#endif + } +#endif +#undef EXPLICIT_SYMBOL + + return nullptr; +} diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index 457217125a2229a160cf71e7b13bdc44c012f4b4..0ba6a25aa198dcf46ec45bcc984a94bd2e3be46a 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -45,5 +45,11 @@ std::string sys::getDefaultTargetTriple() { TargetTripleString += getOSVersion(); } + // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + TargetTripleString = EnvTriple; +#endif + return Triple::normalize(TargetTripleString); } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 93f8982196b3c5285d2742932a7e3762ed3b2f3d..b6774692595bd1a5892047228c505f7105e7fb98 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -75,8 +75,8 @@ #define STATVFS_F_FRSIZE(vfs) vfs.f_frsize #else #if defined(__OpenBSD__) || defined(__FreeBSD__) -#include #include +#include #elif defined(__linux__) #if defined(HAVE_LINUX_MAGIC_H) #include @@ -381,6 +381,11 @@ static bool is_local_impl(struct STATVFS &Vfs) { #elif defined(__CYGWIN__) // Cygwin doesn't expose this information; would need to use Win32 API. return false; +#elif defined(__sun) + // statvfs::f_basetype contains a null-terminated FSType name of the mounted target + StringRef fstype(Vfs.f_basetype); + // NFS is the only non-local fstype?? + return !fstype.equals("nfs"); #else return !!(STATVFS_F_FLAG(Vfs) & MNT_LOCAL); #endif @@ -421,14 +426,15 @@ std::error_code resize_file(int FD, uint64_t Size) { #if defined(HAVE_POSIX_FALLOCATE) // If we have posix_fallocate use it. Unlike ftruncate it always allocates // space, so we get an error if the disk is full. - if (int Err = ::posix_fallocate(FD, 0, Size)) - return std::error_code(Err, std::generic_category()); -#else + if (int Err = ::posix_fallocate(FD, 0, Size)) { + if (Err != EOPNOTSUPP) + return std::error_code(Err, std::generic_category()); + } +#endif // Use ftruncate as a fallback. It may or may not allocate space. At least on // OS X with HFS+ it does. if (::ftruncate(FD, Size) == -1) return std::error_code(errno, std::generic_category()); -#endif return std::error_code(); } diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 16f8f5a98e5244f05a628ec8aaa0032d3061c28c..1d0143c6716e00edd9d99054644fd139e60ed45f 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -347,7 +347,7 @@ static bool terminalHasColors(int fd) { MutexGuard G(*TermColorMutex); int errret = 0; - if (setupterm((char *)nullptr, fd, &errret) != 0) + if (setupterm(nullptr, fd, &errret) != 0) // Regardless of why, if we can't get terminfo, we shouldn't try to print // colors. return false; @@ -369,7 +369,7 @@ static bool terminalHasColors(int fd) { // Now extract the structure allocated by setupterm and free its memory // through a really silly dance. - struct term *termp = set_curterm((struct term *)nullptr); + struct term *termp = set_curterm(nullptr); (void)del_curterm(termp); // Drop any errors here. // Return true if we found a color capabilities for the current terminal. diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 7d3537e20727e275466d339bc6083451f140cef1..2df0eaff47e52a507e3087aa28deb372d7b91612 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -163,16 +163,6 @@ static void SetMemoryLimits (unsigned size) r.rlim_cur = limit; setrlimit (RLIMIT_RSS, &r); #endif -#ifdef RLIMIT_AS // e.g. NetBSD doesn't have it. - // Don't set virtual memory limit if built with any Sanitizer. They need 80Tb - // of virtual memory for shadow memory mapping. -#if !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_ADDRESS_SANITIZER_BUILD - // Virtual memory. - getrlimit (RLIMIT_AS, &r); - r.rlim_cur = limit; - setrlimit (RLIMIT_AS, &r); -#endif -#endif #endif } diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 88ad21e9806ed7e3282ceb397c2c9355bb6a7d4a..aaf760c5b616667c996a639586ec53b5d0696809 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -15,9 +15,9 @@ #include "Unix.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Demangle/Demangle.h" -#include "llvm/Support/Format.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/Program.h" diff --git a/lib/Support/Unix/Threading.inc b/lib/Support/Unix/Threading.inc index 407b194e1b6ae4fb02c387e4aa7f3d81ca44f474..267af388ecdbc204522ca018df6790712d20a45a 100644 --- a/lib/Support/Unix/Threading.inc +++ b/lib/Support/Unix/Threading.inc @@ -26,19 +26,19 @@ #endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include #include #include -#include #include #endif #if defined(__NetBSD__) -#include // For _lwp_self() +#include // For _lwp_self() #endif #if defined(__linux__) -#include // For syscall() -#include // For syscall codes +#include // For syscall codes +#include // For syscall() #endif namespace { diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 709499deeafa93c0fadbdc555fa48232850b45dc..caf1a0a658de0bc078267ae543ad2d7a31f96767 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -12,98 +12,140 @@ //===----------------------------------------------------------------------===// #include "WindowsSupport.h" +#include "llvm/Support/raw_ostream.h" -#ifdef __MINGW32__ - #include -#else - #include -#endif - -#ifdef _MSC_VER - #include -#endif - -namespace llvm { +#include //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code //=== and must not be UNIX code. //===----------------------------------------------------------------------===// -typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID); -static fpEnumerateLoadedModules fEnumerateLoadedModules; -static llvm::ManagedStatic > OpenedHandles; -static bool loadDebugHelp(void) { - HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); - if (hLib) { - fEnumerateLoadedModules = (fpEnumerateLoadedModules) - ::GetProcAddress(hLib, "EnumerateLoadedModules64"); - } - return fEnumerateLoadedModules != 0; -} +DynamicLibrary::HandleSet::~HandleSet() { + for (void *Handle : llvm::reverse(Handles)) + FreeLibrary(HMODULE(Handle)); -static BOOL CALLBACK -ELM_Callback(PCSTR ModuleName, DWORD64 ModuleBase, - ULONG ModuleSize, PVOID UserContext) { - OpenedHandles->insert((HMODULE)ModuleBase); - return TRUE; + // 'Process' should not be released on Windows. + assert((!Process || Process==this) && "Bad Handle"); } -sys::DynamicLibrary -sys::DynamicLibrary::getPermanentLibrary(const char *filename, - std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); - - if (!filename) { - // When no file is specified, enumerate all DLLs and EXEs in the process. - if (!fEnumerateLoadedModules) { - if (!loadDebugHelp()) { - assert(false && "These APIs should always be available"); - return DynamicLibrary(); - } - } +void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { + // Create the instance and return it to be the *Process* handle + // simillar to dlopen(NULL, RTLD_LAZY|RTLD_GLOBAL) + if (!File) + return &(*OpenedHandles); - fEnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0); - // Dummy library that represents "search all handles". - // This is mostly to ensure that the return value still shows up as "valid". - return DynamicLibrary(&OpenedHandles); - } - - SmallVector filenameUnicode; - if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { + SmallVector FileUnicode; + if (std::error_code ec = windows::UTF8ToUTF16(File, FileUnicode)) { SetLastError(ec.value()); - MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16"); - return DynamicLibrary(); + MakeErrMsg(Err, std::string(File) + ": Can't convert to UTF-16"); + return &DynamicLibrary::Invalid; } - HMODULE a_handle = LoadLibraryW(filenameUnicode.data()); - - if (a_handle == 0) { - MakeErrMsg(errMsg, std::string(filename) + ": Can't open"); - return DynamicLibrary(); + HMODULE Handle = LoadLibraryW(FileUnicode.data()); + if (Handle == NULL) { + MakeErrMsg(Err, std::string(File) + ": Can't open"); + return &DynamicLibrary::Invalid; } - // If we've already loaded this library, FreeLibrary() the handle in order to - // keep the internal refcount at +1. - if (!OpenedHandles->insert(a_handle).second) - FreeLibrary(a_handle); + return reinterpret_cast(Handle); +} - return DynamicLibrary(a_handle); +static DynamicLibrary::HandleSet *IsOpenedHandlesInstance(void *Handle) { + if (!OpenedHandles.isConstructed()) + return nullptr; + DynamicLibrary::HandleSet &Inst = *OpenedHandles; + return Handle == &Inst ? &Inst : nullptr; } -sys::DynamicLibrary -sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) { - SmartScopedLock lock(*SymbolsMutex); - // If we've already loaded this library, tell the caller. - if (!OpenedHandles->insert((HMODULE)handle).second) { - MakeErrMsg(errMsg, "Library already loaded"); - return DynamicLibrary(); +void DynamicLibrary::HandleSet::DLClose(void *Handle) { + if (HandleSet* HS = IsOpenedHandlesInstance(Handle)) + HS->Process = nullptr; // Just drop the *Process* handle. + else + FreeLibrary((HMODULE)Handle); +} + +static bool GetProcessModules(HANDLE H, DWORD &Bytes, HMODULE *Data = nullptr) { + // EnumProcessModules will fail on Windows 64 while some versions of + // MingW-32 don't have EnumProcessModulesEx. + if ( +#ifdef _WIN64 + !EnumProcessModulesEx(H, Data, Bytes, &Bytes, LIST_MODULES_64BIT) +#else + !EnumProcessModules(H, Data, Bytes, &Bytes) +#endif + ) { + std::string Err; + if (MakeErrMsg(&Err, "EnumProcessModules failure")) + llvm::errs() << Err << "\n"; + return false; } + return true; +} - return DynamicLibrary(handle); +void *DynamicLibrary::HandleSet::DLSym(void *Handle, const char *Symbol) { + HandleSet* HS = IsOpenedHandlesInstance(Handle); + if (!HS) + return (void *)uintptr_t(GetProcAddress((HMODULE)Handle, Symbol)); + + // Could have done a dlclose on the *Process* handle + if (!HS->Process) + return nullptr; + + // Trials indicate EnumProcessModulesEx is consistantly faster than using + // EnumerateLoadedModules64 or CreateToolhelp32Snapshot. + // + // | Handles | DbgHelp.dll | CreateSnapshot | EnumProcessModulesEx + // |=========|=============|======================================== + // | 37 | 0.0000585 * | 0.0003031 | 0.0000152 + // | 1020 | 0.0026310 * | 0.0121598 | 0.0002683 + // | 2084 | 0.0149418 * | 0.0369936 | 0.0005610 + // + // * Not including the load time of Dbghelp.dll (~.005 sec) + // + // There's still a case to somehow cache the result of EnumProcessModulesEx + // across invocations, but the complication of doing that properly... + // Possibly using LdrRegisterDllNotification to invalidate the cache? + + DWORD Bytes = 0; + HMODULE Self = HMODULE(GetCurrentProcess()); + if (!GetProcessModules(Self, Bytes)) + return nullptr; + + // Get the most recent list in case any modules added/removed between calls + // to EnumProcessModulesEx that gets the amount of, then copies the HMODULES. + // MSDN is pretty clear that if the module list changes during the call to + // EnumProcessModulesEx the results should not be used. + std::vector Handles; + do { + assert(Bytes && ((Bytes % sizeof(HMODULE)) == 0) && + "Should have at least one module and be aligned"); + Handles.resize(Bytes / sizeof(HMODULE)); + if (!GetProcessModules(Self, Bytes, Handles.data())) + return nullptr; + } while (Bytes != (Handles.size() * sizeof(HMODULE))); + + // Try EXE first, mirroring what dlsym(dlopen(NULL)) does. + if (FARPROC Ptr = GetProcAddress(HMODULE(Handles.front()), Symbol)) + return (void *) uintptr_t(Ptr); + + if (Handles.size() > 1) { + // This is different behaviour than what Posix dlsym(dlopen(NULL)) does. + // Doing that here is causing real problems for the JIT where msvc.dll + // and ucrt.dll can define the same symbols. The runtime linker will choose + // symbols from ucrt.dll first, but iterating NOT in reverse here would + // mean that the msvc.dll versions would be returned. + + for (auto I = Handles.rbegin(), E = Handles.rend()-1; I != E; ++I) { + if (FARPROC Ptr = GetProcAddress(HMODULE(*I), Symbol)) + return (void *) uintptr_t(Ptr); + } + } + return nullptr; } + // Stack probing routines are in the support library (e.g. libgcc), but we don't // have dynamic linking on windows. Provide a hook. #define EXPLICIT_SYMBOL(SYM) \ @@ -129,38 +171,18 @@ sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) { #undef INLINE_DEF_SYMBOL1 #undef INLINE_DEF_SYMBOL2 -void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { - SmartScopedLock Lock(*SymbolsMutex); - - // First check symbols added via AddSymbol(). - if (ExplicitSymbols.isConstructed()) { - StringMap::iterator i = ExplicitSymbols->find(symbolName); - - if (i != ExplicitSymbols->end()) - return i->second; - } - - // Now search the libraries. - if (OpenedHandles.isConstructed()) { - for (DenseSet::iterator I = OpenedHandles->begin(), - E = OpenedHandles->end(); I != E; ++I) { - FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); - if (ptr) { - return (void *)(intptr_t)ptr; - } - } - } +static void *DoSearch(const char *SymbolName) { #define EXPLICIT_SYMBOL(SYM) \ - if (!strcmp(symbolName, #SYM)) \ + if (!strcmp(SymbolName, #SYM)) \ return (void *)&SYM; #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) \ - if (!strcmp(symbolName, #SYMFROM)) \ + if (!strcmp(SymbolName, #SYMFROM)) \ return (void *)&SYMTO; #ifdef _M_IX86 #define INLINE_DEF_SYMBOL1(TYP, SYM) \ - if (!strcmp(symbolName, #SYM)) \ + if (!strcmp(SymbolName, #SYM)) \ return (void *)&inline_##SYM; #define INLINE_DEF_SYMBOL2(TYP, SYM) INLINE_DEF_SYMBOL1(TYP, SYM) #endif @@ -174,15 +196,5 @@ void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { #undef INLINE_DEF_SYMBOL1 #undef INLINE_DEF_SYMBOL2 - return 0; -} - -void *sys::DynamicLibrary::getAddressOfSymbol(const char *symbolName) { - if (!isValid()) - return NULL; - if (Data == &OpenedHandles) - return SearchForAddressOfSymbol(symbolName); - return (void *)(intptr_t)GetProcAddress((HMODULE)Data, symbolName); -} - + return nullptr; } diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc index fe89fe0aad8c475d85d859de47928cf41b80bc0d..7e196cf0ce18a4c8e3c4a3d01d4c77cdd391d7d2 100644 --- a/lib/Support/Windows/Host.inc +++ b/lib/Support/Windows/Host.inc @@ -18,5 +18,13 @@ using namespace llvm; std::string sys::getDefaultTargetTriple() { - return Triple::normalize(LLVM_DEFAULT_TARGET_TRIPLE); + const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE; + + // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +#if defined(LLVM_TARGET_TRIPLE_ENV) + if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) + Triple = EnvTriple; +#endif + + return Triple::normalize(Triple); } diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h index c358b99ab96aaa941b14784901ad6b493d10eaa4..d4599dca044e9172ce718f0dd1e727ba38ea7b8c 100644 --- a/lib/Support/Windows/WindowsSupport.h +++ b/lib/Support/Windows/WindowsSupport.h @@ -45,7 +45,9 @@ #include #include #include -#include // Must be included after windows.h + +// Must be included after windows.h +#include /// Determines if the program is running on Windows 8 or newer. This /// reimplements one of the helpers in the Windows 8.1 SDK, which are intended diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index c17a6f6e1ea63ca7ae3d686987ec046502f6da66..01ae3214453dcc460b0f94015edcd7c81440b6b8 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLParser.h" +#include "llvm/ADT/AllocatorList.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/ADT/AllocatorList.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" @@ -2116,6 +2116,7 @@ void MappingNode::increment() { break; default: setError("Unexpected token. Expected Key or Block End", T); + LLVM_FALLTHROUGH; case Token::TK_Error: IsAtEnd = true; CurrentEntry = nullptr; @@ -2128,6 +2129,7 @@ void MappingNode::increment() { return increment(); case Token::TK_FlowMappingEnd: getNext(); + LLVM_FALLTHROUGH; case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; @@ -2170,6 +2172,7 @@ void SequenceNode::increment() { default: setError( "Unexpected token. Expected Block Entry or Block End." , T); + LLVM_FALLTHROUGH; case Token::TK_Error: IsAtEnd = true; CurrentEntry = nullptr; @@ -2198,6 +2201,7 @@ void SequenceNode::increment() { return increment(); case Token::TK_FlowSequenceEnd: getNext(); + LLVM_FALLTHROUGH; case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 33d3de5daf33f35a802bf19a876afdf494485c41..b2636e1e6cb4d67b6d9ec7f74a5d31cb22ecb861 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -11,20 +11,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/Record.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" #include #include -#include +#include +#include +#include +#include using namespace llvm; @@ -162,7 +170,8 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { // Initializer implementations //===----------------------------------------------------------------------===// -void Init::anchor() { } +void Init::anchor() {} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); } #endif @@ -219,7 +228,6 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef Range) { BitsInit *BitsInit::get(ArrayRef Range) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileBitsInit(ID, Range); @@ -234,7 +242,6 @@ BitsInit *BitsInit::get(ArrayRef Range) { std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -303,7 +310,6 @@ static Init *fixBitInit(const RecordVal *RV, Init *Before, Init *After) { // resolveReferences - If there are any field references that refer to fields // that have been filled in, we can propagate the values now. -// Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { bool Changed = false; SmallVector NewBits(getNumBits()); @@ -407,27 +413,21 @@ IntInit::convertInitializerBitRange(ArrayRef Bits) const { } CodeInit *CodeInit::get(StringRef V) { - static DenseMap ThePool; + static StringMap ThePool(Allocator); - auto I = ThePool.insert(std::make_pair(V, nullptr)); - if (I.second) { - StringRef VCopy = V.copy(Allocator); - I.first->first = VCopy; - I.first->second = new(Allocator) CodeInit(VCopy); - } - return I.first->second; + auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first; + if (!Entry.second) + Entry.second = new(Allocator) CodeInit(Entry.getKey()); + return Entry.second; } StringInit *StringInit::get(StringRef V) { - static DenseMap ThePool; + static StringMap ThePool(Allocator); - auto I = ThePool.insert(std::make_pair(V, nullptr)); - if (I.second) { - StringRef VCopy = V.copy(Allocator); - I.first->first = VCopy; - I.first->second = new(Allocator) StringInit(VCopy); - } - return I.first->second; + auto &Entry = *ThePool.insert(std::make_pair(V, nullptr)).first; + if (!Entry.second) + Entry.second = new(Allocator) StringInit(Entry.getKey()); + return Entry.second; } Init *StringInit::convertInitializerTo(RecTy *Ty) const { @@ -456,7 +456,6 @@ static void ProfileListInit(FoldingSetNodeID &ID, ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileListInit(ID, Range, EltTy); @@ -471,7 +470,6 @@ ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -606,7 +604,6 @@ ProfileUnOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *Op, RecTy *Type) { UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileUnOpInit(ID, Opc, LHS, Type); @@ -617,7 +614,6 @@ UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) { UnOpInit *I = new(Allocator) UnOpInit(Opc, LHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -627,7 +623,7 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const { Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { switch (getOpcode()) { - case CAST: { + case CAST: if (isa(getType())) { if (StringInit *LHSs = dyn_cast(LHS)) return LHSs; @@ -692,15 +688,15 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { } } break; - } - case HEAD: { + + case HEAD: if (ListInit *LHSl = dyn_cast(LHS)) { assert(!LHSl->empty() && "Empty list in head"); return LHSl->getElement(0); } break; - } - case TAIL: { + + case TAIL: if (ListInit *LHSl = dyn_cast(LHS)) { assert(!LHSl->empty() && "Empty list in tail"); // Note the +1. We can't just pass the result of getValues() @@ -708,16 +704,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { return ListInit::get(LHSl->getValues().slice(1), LHSl->getType()); } break; - } - case EMPTY: { + + case EMPTY: if (ListInit *LHSl = dyn_cast(LHS)) return IntInit::get(LHSl->empty()); if (StringInit *LHSs = dyn_cast(LHS)) return IntInit::get(LHSs->getValue().empty()); - break; } - } return const_cast(this); } @@ -752,7 +746,6 @@ ProfileBinOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *RHS, BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, Init *RHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileBinOpInit(ID, Opc, LHS, RHS, Type); @@ -763,7 +756,6 @@ BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, BinOpInit *I = new(Allocator) BinOpInit(Opc, LHS, RHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -910,7 +902,6 @@ ProfileTernOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *MHS, TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type); @@ -921,7 +912,6 @@ TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS, TernOpInit *I = new(Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -964,7 +954,6 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Record *CurRec, MultiClass *CurMultiClass) { - OpInit *RHSo = dyn_cast(RHS); if (!RHSo) @@ -1261,7 +1250,7 @@ VarInit *VarInit::get(StringRef VN, RecTy *T) { } VarInit *VarInit::get(Init *VN, RecTy *T) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(T, VN)); @@ -1336,7 +1325,7 @@ Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const { } VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(T, B)); @@ -1368,7 +1357,7 @@ Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const { VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(T, E)); @@ -1438,7 +1427,7 @@ std::string DefInit::getAsString() const { } FieldInit *FieldInit::get(Init *R, StringInit *FN) { - typedef std::pair Key; + using Key = std::pair; static DenseMap ThePool; Key TheKey(std::make_pair(R, FN)); @@ -1503,7 +1492,6 @@ DagInit * DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, ArrayRef NameRange) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileDagInit(ID, V, VN, ArgRange, NameRange); @@ -1512,9 +1500,13 @@ DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) return I; - DagInit *I = new(Allocator) DagInit(V, VN, ArgRange, NameRange); + void *Mem = Allocator.Allocate(totalSizeToAlloc(ArgRange.size(), NameRange.size()), alignof(BitsInit)); + DagInit *I = new(Mem) DagInit(V, VN, ArgRange.size(), NameRange.size()); + std::uninitialized_copy(ArgRange.begin(), ArgRange.end(), + I->getTrailingObjects()); + std::uninitialized_copy(NameRange.begin(), NameRange.end(), + I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -1533,7 +1525,7 @@ DagInit::get(Init *V, StringInit *VN, } void DagInit::Profile(FoldingSetNodeID &ID) const { - ProfileDagInit(ID, Val, ValName, Args, ArgNames); + ProfileDagInit(ID, Val, ValName, makeArrayRef(getTrailingObjects(), NumArgs), makeArrayRef(getTrailingObjects(), NumArgNames)); } Init *DagInit::convertInitializerTo(RecTy *Ty) const { @@ -1545,9 +1537,9 @@ Init *DagInit::convertInitializerTo(RecTy *Ty) const { Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { SmallVector NewArgs; - NewArgs.reserve(Args.size()); + NewArgs.reserve(arg_size()); bool ArgsChanged = false; - for (const Init *Arg : Args) { + for (const Init *Arg : getArgs()) { Init *NewArg = Arg->resolveReferences(R, RV); NewArgs.push_back(NewArg); ArgsChanged |= NewArg != Arg; @@ -1555,7 +1547,7 @@ Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *Op = Val->resolveReferences(R, RV); if (Op != Val || ArgsChanged) - return DagInit::get(Op, ValName, NewArgs, ArgNames); + return DagInit::get(Op, ValName, NewArgs, getArgNames()); return const_cast(this); } @@ -1564,12 +1556,12 @@ std::string DagInit::getAsString() const { std::string Result = "(" + Val->getAsString(); if (ValName) Result += ":" + ValName->getAsUnquotedString(); - if (!Args.empty()) { - Result += " " + Args[0]->getAsString(); - if (ArgNames[0]) Result += ":$" + ArgNames[0]->getAsUnquotedString(); - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - Result += ", " + Args[i]->getAsString(); - if (ArgNames[i]) Result += ":$" + ArgNames[i]->getAsUnquotedString(); + if (!arg_empty()) { + Result += " " + getArg(0)->getAsString(); + if (getArgName(0)) Result += ":$" + getArgName(0)->getAsUnquotedString(); + for (unsigned i = 1, e = getNumArgs(); i != e; ++i) { + Result += ", " + getArg(i)->getAsString(); + if (getArgName(i)) Result += ":$" + getArgName(i)->getAsUnquotedString(); } } return Result + ")"; @@ -1585,12 +1577,6 @@ RecordVal::RecordVal(Init *N, RecTy *T, bool P) assert(Value && "Cannot create unset value for current type!"); } -RecordVal::RecordVal(StringRef N, RecTy *T, bool P) - : Name(StringInit::get(N)), TyAndPrefix(T, P) { - Value = UnsetInit::get()->convertInitializerTo(T); - assert(Value && "Cannot create unset value for current type!"); -} - StringRef RecordVal::getName() const { return cast(getNameInit())->getValue(); } @@ -1616,8 +1602,7 @@ void Record::init() { // Every record potentially has a def at the top. This value is // replaced with the top-level def name at instantiation time. - RecordVal DN("NAME", StringRecTy::get(), false); - addValue(DN); + addValue(RecordVal(StringInit::get("NAME"), StringRecTy::get(), false)); } void Record::checkName() { @@ -1653,10 +1638,6 @@ void Record::setName(Init *NewName) { // this. See TGParser::ParseDef and TGParser::ParseDefm. } -void Record::setName(StringRef Name) { - setName(StringInit::get(Name)); -} - void Record::resolveReferencesTo(const RecordVal *RV) { for (RecordVal &Value : Values) { if (RV == &Value) // Skip resolve the same field as the given one @@ -1727,7 +1708,7 @@ Init *Record::getValueInit(StringRef FieldName) const { return R->getValue(); } -std::string Record::getValueAsString(StringRef FieldName) const { +StringRef Record::getValueAsString(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + @@ -1806,10 +1787,10 @@ Record::getValueAsListOfInts(StringRef FieldName) const { return Ints; } -std::vector +std::vector Record::getValueAsListOfStrings(StringRef FieldName) const { ListInit *List = getValueAsListInit(FieldName); - std::vector Strings; + std::vector Strings; for (Init *I : List->getValues()) { if (StringInit *SI = dyn_cast(I)) Strings.push_back(SI->getValue()); diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp index a4d33051b4f707014cb3024cf926ae9314a2b986..733e0aeef6234814fb430b18327ef3e926533100 100644 --- a/lib/TableGen/SetTheory.cpp +++ b/lib/TableGen/SetTheory.cpp @@ -12,18 +12,29 @@ // //===----------------------------------------------------------------------===// -#include "llvm/TableGen/SetTheory.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Format.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" +#include +#include +#include +#include using namespace llvm; // Define the standard operators. namespace { -typedef SetTheory::RecSet RecSet; -typedef SetTheory::RecVec RecVec; +using RecSet = SetTheory::RecSet; +using RecVec = SetTheory::RecVec; // (add a, b, ...) Evaluate and union all arguments. struct AddOp : public SetTheory::Operator { @@ -237,13 +248,13 @@ struct FieldExpander : public SetTheory::Expander { ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc()); } }; + } // end anonymous namespace // Pin the vtables to this file. void SetTheory::Operator::anchor() {} void SetTheory::Expander::anchor() {} - SetTheory::SetTheory() { addOperator("add", llvm::make_unique()); addOperator("sub", llvm::make_unique()); @@ -321,4 +332,3 @@ const RecVec *SetTheory::expand(Record *Set) { // Set is not expandable. return nullptr; } - diff --git a/lib/TableGen/StringMatcher.cpp b/lib/TableGen/StringMatcher.cpp index 0c83da65e19ed891d0163080f91b3c4cb1ad2d55..7e510f0c2fdc033bc2fe095e07ae24d8c2025871 100644 --- a/lib/TableGen/StringMatcher.cpp +++ b/lib/TableGen/StringMatcher.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/TableGen/StringMatcher.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/TableGen/StringMatcher.h" #include #include #include diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 96015b06d798fe5de3448dc9782b607f79bb3e99..b492cf9495c02a86bb470dd9272a4e4d66ad1091 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -339,7 +339,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ if (!IVal) return Error(Loc, "foreach iterator value is untyped"); - IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false)); + IterRec->addValue(RecordVal(IterVar->getNameInit(), IVal->getType(), false)); if (SetValue(IterRec.get(), Loc, IterVar->getNameInit(), None, IVal)) return Error(Loc, "when instantiating this def"); @@ -378,8 +378,8 @@ static bool isObjectStart(tgtok::TokKind K) { /// GetNewAnonymousName - Generate a unique anonymous name that can be used as /// an identifier. -std::string TGParser::GetNewAnonymousName() { - return "anonymous_" + utostr(AnonCounter++); +Init *TGParser::GetNewAnonymousName() { + return StringInit::get("anonymous_" + utostr(AnonCounter++)); } /// ParseObjectName - If an object name is specified, return it. Otherwise, @@ -2350,7 +2350,7 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto, bool IsAnonymous = false; if (!DefmPrefix) { - DefmPrefix = StringInit::get(GetNewAnonymousName()); + DefmPrefix = GetNewAnonymousName(); IsAnonymous = true; } diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index 76f7d8fe5026a3987a1eb8dc785cb274a78687e8..1b2966c9f6c9c950d27686becfec029332013f35 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -110,7 +110,7 @@ private: // Semantic analysis methods. bool AddSubMultiClass(MultiClass *CurMC, SubMultiClassReference &SubMultiClass); - std::string GetNewAnonymousName(); + Init *GetNewAnonymousName(); // IterRecord: Map an iterator name to a value. struct IterRecord { diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index b44b13e36e15d3f6336f585a6703c06cc0e8c280..3e0e3978b90b5d14cb7fc0b1db774af781be5bd2 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -41,7 +41,6 @@ FunctionPass *createAArch64LoadStoreOptimizationPass(); FunctionPass *createAArch64VectorByElementOptPass(); ModulePass *createAArch64PromoteConstantPass(); FunctionPass *createAArch64ConditionOptimizerPass(); -FunctionPass *createAArch64AddressTypePromotionPass(); FunctionPass *createAArch64A57FPLoadBalancing(); FunctionPass *createAArch64A53Fix835769(); @@ -54,7 +53,6 @@ createAArch64InstructionSelector(const AArch64TargetMachine &, void initializeAArch64A53Fix835769Pass(PassRegistry&); void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); -void initializeAArch64AddressTypePromotionPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); void initializeAArch64ConditionalComparesPass(PassRegistry&); diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 519ca28946830be06dc55f6eaff6acdcee3c6e85..53eef79c4df3f4d954c357e20b7ea056e84a357a 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -190,6 +190,7 @@ def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, @@ -216,6 +217,7 @@ def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", FeatureCRC, FeatureCrypto, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon ]>; @@ -225,6 +227,7 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", FeatureCRC, FeatureCrypto, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon ]>; @@ -358,8 +361,8 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", FeatureNEON]>; def : ProcessorModel<"generic", NoSchedModel, [ - FeatureCRC, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp deleted file mode 100644 index e1b8ee6d03c3e36435a5ddef9cc35fba6a131667..0000000000000000000000000000000000000000 --- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ /dev/null @@ -1,493 +0,0 @@ -//===-- AArch64AddressTypePromotion.cpp --- Promote type for addr accesses -==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass tries to promote the computations use to obtained a sign extended -// value used into memory accesses. -// E.g. -// a = add nsw i32 b, 3 -// d = sext i32 a to i64 -// e = getelementptr ..., i64 d -// -// => -// f = sext i32 b to i64 -// a = add nsw i64 f, 3 -// e = getelementptr ..., i64 a -// -// This is legal to do if the computations are marked with either nsw or nuw -// markers. Moreover, the current heuristic is simple: it does not create new -// sext operations, i.e., it gives up when a sext would have forked (e.g., if a -// = add i32 b, c, two sexts are required to promote the computation). -// -// FIXME: This pass may be useful for other targets too. -// ===---------------------------------------------------------------------===// - -#include "AArch64.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Use.h" -#include "llvm/IR/User.h" -#include "llvm/Pass.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; - -#define DEBUG_TYPE "aarch64-type-promotion" - -static cl::opt -EnableMerge("aarch64-type-promotion-merge", cl::Hidden, - cl::desc("Enable merging of redundant sexts when one is dominating" - " the other."), - cl::init(true)); - -#define AARCH64_TYPE_PROMO_NAME "AArch64 Address Type Promotion" - -//===----------------------------------------------------------------------===// -// AArch64AddressTypePromotion -//===----------------------------------------------------------------------===// - -namespace { - -class AArch64AddressTypePromotion : public FunctionPass { -public: - static char ID; - - AArch64AddressTypePromotion() : FunctionPass(ID) { - initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return AARCH64_TYPE_PROMO_NAME; } - - /// Iterate over the functions and promote the computation of interesting - // sext instructions. - bool runOnFunction(Function &F) override; - -private: - /// The current function. - Function *Func = nullptr; - - /// Filter out all sexts that does not have this type. - /// Currently initialized with Int64Ty. - Type *ConsideredSExtType = nullptr; - - // This transformation requires dominator info. - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - FunctionPass::getAnalysisUsage(AU); - } - - typedef SmallPtrSet SetOfInstructions; - typedef SmallVector Instructions; - typedef DenseMap ValueToInsts; - - /// Check if it is profitable to move a sext through this instruction. - /// Currently, we consider it is profitable if: - /// - Inst is used only once (no need to insert truncate). - /// - Inst has only one operand that will require a sext operation (we do - /// do not create new sext operation). - bool shouldGetThrough(const Instruction *Inst); - - /// Check if it is possible and legal to move a sext through this - /// instruction. - /// Current heuristic considers that we can get through: - /// - Arithmetic operation marked with the nsw or nuw flag. - /// - Other sext operation. - /// - Truncate operation if it was just dropping sign extended bits. - bool canGetThrough(const Instruction *Inst); - - /// Move sext operations through safe to sext instructions. - bool propagateSignExtension(Instructions &SExtInsts); - - /// Is this sext should be considered for code motion. - /// We look for sext with ConsideredSExtType and uses in at least one - // GetElementPtrInst. - bool shouldConsiderSExt(const Instruction *SExt) const; - - /// Collect all interesting sext operations, i.e., the ones with the right - /// type and used in memory accesses. - /// More precisely, a sext instruction is considered as interesting if it - /// is used in a "complex" getelementptr or it exits at least another - /// sext instruction that sign extended the same initial value. - /// A getelementptr is considered as "complex" if it has more than 2 - // operands. - void analyzeSExtension(Instructions &SExtInsts); - - /// Merge redundant sign extension operations in common dominator. - void mergeSExts(ValueToInsts &ValToSExtendedUses, - SetOfInstructions &ToRemove); -}; - -} // end anonymous namespace - -char AArch64AddressTypePromotion::ID = 0; - -INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion", - AARCH64_TYPE_PROMO_NAME, false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion", - AARCH64_TYPE_PROMO_NAME, false, false) - -FunctionPass *llvm::createAArch64AddressTypePromotionPass() { - return new AArch64AddressTypePromotion(); -} - -bool AArch64AddressTypePromotion::canGetThrough(const Instruction *Inst) { - if (isa(Inst)) - return true; - - const BinaryOperator *BinOp = dyn_cast(Inst); - if (BinOp && isa(BinOp) && - (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) - return true; - - // sext(trunc(sext)) --> sext - if (isa(Inst) && isa(Inst->getOperand(0))) { - const Instruction *Opnd = cast(Inst->getOperand(0)); - // Check that the truncate just drop sign extended bits. - if (Inst->getType()->getIntegerBitWidth() >= - Opnd->getOperand(0)->getType()->getIntegerBitWidth() && - Inst->getOperand(0)->getType()->getIntegerBitWidth() <= - ConsideredSExtType->getIntegerBitWidth()) - return true; - } - - return false; -} - -bool AArch64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) { - // If the type of the sext is the same as the considered one, this sext - // will become useless. - // Otherwise, we will have to do something to preserve the original value, - // unless it is used once. - if (isa(Inst) && - (Inst->getType() == ConsideredSExtType || Inst->hasOneUse())) - return true; - - // If the Inst is used more that once, we may need to insert truncate - // operations and we don't do that at the moment. - if (!Inst->hasOneUse()) - return false; - - // This truncate is used only once, thus if we can get thourgh, it will become - // useless. - if (isa(Inst)) - return true; - - // If both operands are not constant, a new sext will be created here. - // Current heuristic is: each step should be profitable. - // Therefore we don't allow to increase the number of sext even if it may - // be profitable later on. - if (isa(Inst) && isa(Inst->getOperand(1))) - return true; - - return false; -} - -static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { - return !(isa(Inst) && OpIdx == 0); -} - -bool -AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { - if (SExt->getType() != ConsideredSExtType) - return false; - - for (const User *U : SExt->users()) { - if (isa(U)) - return true; - } - - return false; -} - -// Input: -// - SExtInsts contains all the sext instructions that are used directly in -// GetElementPtrInst, i.e., access to memory. -// Algorithm: -// - For each sext operation in SExtInsts: -// Let var be the operand of sext. -// while it is profitable (see shouldGetThrough), legal, and safe -// (see canGetThrough) to move sext through var's definition: -// * promote the type of var's definition. -// * fold var into sext uses. -// * move sext above var's definition. -// * update sext operand to use the operand of var that should be sign -// extended (by construction there is only one). -// -// E.g., -// a = ... i32 c, 3 -// b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a' -// ... -// = b -// => Yes, update the code -// b = sext i32 c to i64 -// a = ... i64 b, 3 -// ... -// = a -// Iterate on 'c'. -bool -AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { - DEBUG(dbgs() << "*** Propagate Sign Extension ***\n"); - - bool LocalChange = false; - SetOfInstructions ToRemove; - ValueToInsts ValToSExtendedUses; - while (!SExtInsts.empty()) { - // Get through simple chain. - Instruction *SExt = SExtInsts.pop_back_val(); - - DEBUG(dbgs() << "Consider:\n" << *SExt << '\n'); - - // If this SExt has already been merged continue. - if (SExt->use_empty() && ToRemove.count(SExt)) { - DEBUG(dbgs() << "No uses => marked as delete\n"); - continue; - } - - // Now try to get through the chain of definitions. - while (auto *Inst = dyn_cast(SExt->getOperand(0))) { - DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n'); - if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) { - // We cannot get through something that is not an Instruction - // or not safe to SExt. - DEBUG(dbgs() << "Cannot get through\n"); - break; - } - - LocalChange = true; - // If this is a sign extend, it becomes useless. - if (isa(Inst) || isa(Inst)) { - DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n"); - // We cannot use replaceAllUsesWith here because we may trigger some - // assertion on the type as all involved sext operation may have not - // been moved yet. - while (!Inst->use_empty()) { - Use &U = *Inst->use_begin(); - Instruction *User = dyn_cast(U.getUser()); - assert(User && "User of sext is not an Instruction!"); - User->setOperand(U.getOperandNo(), SExt); - } - ToRemove.insert(Inst); - SExt->setOperand(0, Inst->getOperand(0)); - SExt->moveBefore(Inst); - continue; - } - - // Get through the Instruction: - // 1. Update its type. - // 2. Replace the uses of SExt by Inst. - // 3. Sign extend each operand that needs to be sign extended. - - // Step #1. - Inst->mutateType(SExt->getType()); - // Step #2. - SExt->replaceAllUsesWith(Inst); - // Step #3. - Instruction *SExtForOpnd = SExt; - - DEBUG(dbgs() << "Propagate SExt to operands\n"); - for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx; - ++OpIdx) { - DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n'); - if (Inst->getOperand(OpIdx)->getType() == SExt->getType() || - !shouldSExtOperand(Inst, OpIdx)) { - DEBUG(dbgs() << "No need to propagate\n"); - continue; - } - // Check if we can statically sign extend the operand. - Value *Opnd = Inst->getOperand(OpIdx); - if (const ConstantInt *Cst = dyn_cast(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(), - Cst->getSExtValue())); - continue; - } - // UndefValue are typed, so we have to statically sign extend them. - if (isa(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - Inst->setOperand(OpIdx, UndefValue::get(SExt->getType())); - continue; - } - - // Otherwise we have to explicity sign extend it. - assert(SExtForOpnd && - "Only one operand should have been sign extended"); - - SExtForOpnd->setOperand(0, Opnd); - - DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n"); - // Move the sign extension before the insertion point. - SExtForOpnd->moveBefore(Inst); - Inst->setOperand(OpIdx, SExtForOpnd); - // If more sext are required, new instructions will have to be created. - SExtForOpnd = nullptr; - } - if (SExtForOpnd == SExt) { - DEBUG(dbgs() << "Sign extension is useless now\n"); - ToRemove.insert(SExt); - break; - } - } - - // If the use is already of the right type, connect its uses to its argument - // and delete it. - // This can happen for an Instruction all uses of which are sign extended. - if (!ToRemove.count(SExt) && - SExt->getType() == SExt->getOperand(0)->getType()) { - DEBUG(dbgs() << "Sign extension is useless, attach its use to " - "its argument\n"); - SExt->replaceAllUsesWith(SExt->getOperand(0)); - ToRemove.insert(SExt); - } else - ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt); - } - - if (EnableMerge) - mergeSExts(ValToSExtendedUses, ToRemove); - - // Remove all instructions marked as ToRemove. - for (Instruction *I: ToRemove) - I->eraseFromParent(); - return LocalChange; -} - -void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, - SetOfInstructions &ToRemove) { - DominatorTree &DT = getAnalysis().getDomTree(); - - for (auto &Entry : ValToSExtendedUses) { - Instructions &Insts = Entry.second; - Instructions CurPts; - for (Instruction *Inst : Insts) { - if (ToRemove.count(Inst)) - continue; - bool inserted = false; - for (auto &Pt : CurPts) { - if (DT.dominates(Inst, Pt)) { - DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n" - << *Inst << '\n'); - Pt->replaceAllUsesWith(Inst); - ToRemove.insert(Pt); - Pt = Inst; - inserted = true; - break; - } - if (!DT.dominates(Pt, Inst)) - // Give up if we need to merge in a common dominator as the - // expermients show it is not profitable. - continue; - - DEBUG(dbgs() << "Replace all uses of:\n" << *Inst << "\nwith:\n" - << *Pt << '\n'); - Inst->replaceAllUsesWith(Pt); - ToRemove.insert(Inst); - inserted = true; - break; - } - if (!inserted) - CurPts.push_back(Inst); - } - } -} - -void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { - DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n"); - - DenseMap SeenChains; - - for (auto &BB : *Func) { - for (auto &II : BB) { - Instruction *SExt = &II; - - // Collect all sext operation per type. - if (!isa(SExt) || !shouldConsiderSExt(SExt)) - continue; - - DEBUG(dbgs() << "Found:\n" << (*SExt) << '\n'); - - // Cases where we actually perform the optimization: - // 1. SExt is used in a getelementptr with more than 2 operand => - // likely we can merge some computation if they are done on 64 bits. - // 2. The beginning of the SExt chain is SExt several time. => - // code sharing is possible. - - bool insert = false; - // #1. - for (const User *U : SExt->users()) { - const Instruction *Inst = dyn_cast(U); - if (Inst && Inst->getNumOperands() > 2) { - DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst - << '\n'); - insert = true; - break; - } - } - - // #2. - // Check the head of the chain. - Instruction *Inst = SExt; - Value *Last; - do { - int OpdIdx = 0; - const BinaryOperator *BinOp = dyn_cast(Inst); - if (BinOp && isa(BinOp->getOperand(0))) - OpdIdx = 1; - Last = Inst->getOperand(OpdIdx); - Inst = dyn_cast(Last); - } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst)); - - DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n'); - DenseMap::iterator AlreadySeen = - SeenChains.find(Last); - if (insert || AlreadySeen != SeenChains.end()) { - DEBUG(dbgs() << "Insert\n"); - SExtInsts.push_back(SExt); - if (AlreadySeen != SeenChains.end() && AlreadySeen->second != nullptr) { - DEBUG(dbgs() << "Insert chain member\n"); - SExtInsts.push_back(AlreadySeen->second); - SeenChains[Last] = nullptr; - } - } else { - DEBUG(dbgs() << "Record its chain membership\n"); - SeenChains[Last] = SExt; - } - } - } -} - -bool AArch64AddressTypePromotion::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - if (F.isDeclaration()) - return false; - Func = &F; - ConsideredSExtType = Type::getInt64Ty(Func->getContext()); - - DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n'); - - Instructions SExtInsts; - analyzeSExtension(SExtInsts); - return propagateSignExtension(SExtInsts); -} diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index efc221893782f0ad86ff75f86643359ef5454de7..5ce57926cc0364b244b773f2a89e8a5c52ba986c 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -12,13 +12,13 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64.h" #include "AArch64MCInstLower.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" @@ -35,11 +35,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -320,6 +320,9 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, switch (ExtraCode[0]) { default: return true; // Unknown modifier. + case 'a': // Print 'a' modifier + PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O); + return false; case 'w': // Print W register case 'x': // Print X register if (MO.isReg()) @@ -388,7 +391,7 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) + if (ExtraCode && ExtraCode[0] && ExtraCode[0] != 'a') return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); @@ -580,8 +583,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO_Sym = MI->getOperand(0); MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym); MCOperand Sym, SymTLSDescLo12, SymTLSDesc; - MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); + MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF); MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE); MCInstLowering.lowerOperand(MO_Sym, Sym); MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12); diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index b2f55a7e1e09163351917b54da282ff191e1d7a5..29f6d571d6bd8494aadcb74b58f2b1afc10e3a20 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -247,7 +247,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, unsigned i = 0; for (auto &Arg : F.args()) { ArgInfo OrigArg{VRegs[i], Arg.getType()}; - setArgFlags(OrigArg, i + 1, DL, F); + setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F); bool Split = false; LLT Ty = MRI.getType(VRegs[i]); unsigned Dst = VRegs[i]; @@ -380,7 +380,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets); } - CallSeqStart.addImm(Handler.StackSize); + CallSeqStart.addImm(Handler.StackSize).addImm(0); MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP) .addImm(Handler.StackSize) .addImm(0); diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp index 30e2b2310456bac17ba4439fbf2780bf4fc6eb2f..544f67433fd538a2d2cee71c4d5984678cbd88da 100644 --- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-dead-defs" diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index d0c0956b87ca8fb66e6a2a79d043a525c144be55..160107cd7e2bdbb94d9bb21e7d128f40a3d88e69 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -14,9 +14,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -584,27 +584,21 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, return true; } -static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MBB->addLiveIn(*I); -} - bool AArch64ExpandPseudo::expandCMP_SWAP( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - MachineOperand &Dest = MI.getOperand(0); + const MachineOperand &Dest = MI.getOperand(0); unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + bool StatusDead = MI.getOperand(1).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + unsigned NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -616,19 +610,18 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( MF->insert(++StoreBB->getIterator(), DoneBB); // .Lloadcmp: + // mov wStatus, 0 // ldaxr xDest, [xAddr] // cmp xDest, xDesired // b.ne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - + if (!StatusDead) + BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) + .addImm(0).addImm(0); BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) - .addReg(Addr.getReg()); + .addReg(AddrReg); BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) - .add(Desired) + .addReg(DesiredReg) .addImm(ExtendImm); BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) .addImm(AArch64CC::NE) @@ -640,25 +633,35 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( // .Lstore: // stlxr wStatus, xNew, [xAddr] // cbnz wStatus, .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - - BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr); + BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) + .addReg(NewReg) + .addReg(AddrReg); BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addMBB(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -671,16 +674,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( MachineOperand &DestLo = MI.getOperand(0); MachineOperand &DestHi = MI.getOperand(1); unsigned StatusReg = MI.getOperand(2).getReg(); - MachineOperand &Addr = MI.getOperand(3); - MachineOperand &DesiredLo = MI.getOperand(4); - MachineOperand &DesiredHi = MI.getOperand(5); - MachineOperand &NewLo = MI.getOperand(6); - MachineOperand &NewHi = MI.getOperand(7); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + bool StatusDead = MI.getOperand(2).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(3).getReg(); + unsigned DesiredLoReg = MI.getOperand(4).getReg(); + unsigned DesiredHiReg = MI.getOperand(5).getReg(); + unsigned NewLoReg = MI.getOperand(6).getReg(); + unsigned NewHiReg = MI.getOperand(7).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -696,20 +698,13 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( // cmp xDestLo, xDesiredLo // sbcs xDestHi, xDesiredHi // b.ne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(DestLo.getReg()); - LoadCmpBB->addLiveIn(DestHi.getReg()); - LoadCmpBB->addLiveIn(DesiredLo.getReg()); - LoadCmpBB->addLiveIn(DesiredHi.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) .addReg(DestLo.getReg(), RegState::Define) .addReg(DestHi.getReg(), RegState::Define) - .addReg(Addr.getReg()); + .addReg(AddrReg); BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) - .add(DesiredLo) + .addReg(DesiredLoReg) .addImm(0); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) .addUse(AArch64::WZR) @@ -717,14 +712,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( .addImm(AArch64CC::EQ); BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) - .add(DesiredHi) + .addReg(DesiredHiReg) .addImm(0); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) .addUse(StatusReg, RegState::Kill) .addUse(StatusReg, RegState::Kill) .addImm(AArch64CC::EQ); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) - .addUse(StatusReg, RegState::Kill) + .addUse(StatusReg, getKillRegState(StatusDead)) .addMBB(DoneBB); LoadCmpBB->addSuccessor(DoneBB); LoadCmpBB->addSuccessor(StoreBB); @@ -732,28 +727,36 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( // .Lstore: // stlxp wStatus, xNewLo, xNewHi, [xAddr] // cbnz wStatus, .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(NewLo.getReg()); - StoreBB->addLiveIn(NewHi.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) - .add(NewLo) - .add(NewHi) - .add(Addr); + .addReg(NewLoReg) + .addReg(NewHiReg) + .addReg(AddrReg); BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addMBB(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute liveness bottom up. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass in the loop to get the loop carried dependencies right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -942,6 +945,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, AArch64::XZR, NextMBBI); case AArch64::CMP_SWAP_128: return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); + } return false; } diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 4e5e3e43a468798df05cc75bbc2d9eb948093ca7..7bf2097c17cee1ff22d9fd4ac9468682e84707c0 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1282,6 +1282,10 @@ unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || + RHSReg == AArch64::SP || RHSReg == AArch64::WSP) + return 0; + if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -1362,6 +1366,8 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, uint64_t ShiftImm, bool SetFlags, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && + RHSReg != AArch64::SP && RHSReg != AArch64::WSP); if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -1403,6 +1409,8 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, uint64_t ShiftImm, bool SetFlags, bool WantResult) { assert(LHSReg && RHSReg && "Invalid register number."); + assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && + RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); if (RetVT != MVT::i32 && RetVT != MVT::i64) return 0; @@ -2827,7 +2835,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { return false; EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); - if (SrcVT == MVT::f128) + if (SrcVT == MVT::f128 || SrcVT == MVT::f16) return false; unsigned Opc; @@ -2854,6 +2862,10 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { MVT DestVT; if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) return false; + // Let regular ISEL handle FP16 + if (DestVT == MVT::f16) + return false; + assert((DestVT == MVT::f32 || DestVT == MVT::f64) && "Unexpected value type."); @@ -2907,16 +2919,13 @@ bool AArch64FastISel::fastLowerArguments() { // Only handle simple cases of up to 8 GPR and FPR each. unsigned GPRCnt = 0; unsigned FPRCnt = 0; - unsigned Idx = 0; for (auto const &Arg : F->args()) { - // The first argument is at index 1. - ++Idx; - if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || - F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) return false; Type *ArgTy = Arg.getType(); @@ -3017,7 +3026,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes); + .addImm(NumBytes).addImm(0); // Process the args. for (CCValAssign &VA : ArgLocs) { diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 550174b22a8960a9f693090e471dcce299cfe131..e96ee7d29b3e85e79eee286c15847aa54f55b274 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -137,6 +137,34 @@ static cl::opt EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); +/// Look at each instruction that references stack frames and return the stack +/// size limit beyond which some of these instructions will require a scratch +/// register during their expansion later. +static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { + // FIXME: For now, just conservatively guestimate based on unscaled indexing + // range. We'll end up allocating an unnecessary spill slot a lot, but + // realistically that's not a big deal at this stage of the game. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.isDebugValue() || MI.isPseudo() || + MI.getOpcode() == AArch64::ADDXri || + MI.getOpcode() == AArch64::ADDSXri) + continue; + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isFI()) + continue; + + int Offset = 0; + if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == + AArch64FrameOffsetCannotUpdate) + return 0; + } + } + } + return 255; +} + bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -267,12 +295,12 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { return AArch64::X9; const AArch64Subtarget &Subtarget = MF->getSubtarget(); - const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo(); + const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); LiveRegs.addLiveIns(*MBB); // Mark callee saved registers as used so we will not choose them. - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); @@ -991,6 +1019,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( SmallVector RegPairs; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); + const MachineRegisterInfo &MRI = MF.getRegInfo(); for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { @@ -1022,9 +1051,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); - MBB.addLiveIn(Reg1); + if (!MRI.isReserved(Reg1)) + MBB.addLiveIn(Reg1); if (RPI.isPaired()) { - MBB.addLiveIn(Reg2); + if (!MRI.isReserved(Reg2)) + MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), @@ -1125,7 +1156,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) BasePointerReg = RegInfo->getBaseRegister(); - bool ExtraCSSpill = false; + unsigned ExtraCSSpill = 0; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { @@ -1153,13 +1184,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(PairedReg); if (AArch64::GPR64RegClass.contains(PairedReg) && !RegInfo->isReservedReg(MF, PairedReg)) - ExtraCSSpill = true; + ExtraCSSpill = PairedReg; } } DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; - for (int Reg = SavedRegs.find_first(); Reg != -1; - Reg = SavedRegs.find_next(Reg)) + for (unsigned Reg : SavedRegs.set_bits()) dbgs() << ' ' << PrintReg(Reg, RegInfo); dbgs() << "\n";); @@ -1167,16 +1197,13 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned NumRegsSpilled = SavedRegs.count(); bool CanEliminateFrame = NumRegsSpilled == 0; - // FIXME: Set BigStack if any stack slot references may be out of range. - // For now, just conservatively guestimate based on unscaled indexing - // range. We'll end up allocating an unnecessary spill slot a lot, but - // realistically that's not a big deal at this stage of the game. // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); - bool BigStack = (CFSize >= 256); + unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); + bool BigStack = (CFSize > EstimatedStackSizeLimit); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); @@ -1186,8 +1213,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. - if (BigStack && !ExtraCSSpill) { - if (UnspilledCSGPR != AArch64::NoRegister) { + if (BigStack) { + if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); @@ -1196,15 +1223,18 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // store the pair. if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); - ExtraCSSpill = true; + ExtraCSSpill = UnspilledCSGPRPaired; NumRegsSpilled = SavedRegs.count(); } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. - if (!ExtraCSSpill) { - const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - int FI = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false); + if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass &RC = AArch64::GPR64RegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + int FI = MFI.CreateStackObject(Size, Align, false); RS->addScavengingFrameIndex(FI); DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI << " as the emergency spill slot.\n"); diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ae01ea477bb9a0d4943851972f4c7f3dc59ee7e1..8c2c0a564c30265e05ee31e77856d9539495ffa0 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -1852,20 +1853,20 @@ static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, OpUsefulBits = 1; if (MSB >= Imm) { - OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); + OpUsefulBits <<= MSB - Imm + 1; --OpUsefulBits; // The interesting part will be in the lower part of the result getUsefulBits(Op, OpUsefulBits, Depth + 1); // The interesting part was starting at Imm in the argument - OpUsefulBits = OpUsefulBits.shl(Imm); + OpUsefulBits <<= Imm; } else { - OpUsefulBits = OpUsefulBits.shl(MSB + 1); + OpUsefulBits <<= MSB + 1; --OpUsefulBits; // The interesting part will be shifted in the result - OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); + OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; getUsefulBits(Op, OpUsefulBits, Depth + 1); // The interesting part was at zero in the argument - OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); + OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); } UsefulBits &= OpUsefulBits; @@ -1892,17 +1893,17 @@ static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { // Shift Left uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.shl(ShiftAmt); + Mask <<= ShiftAmt; getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.lshr(ShiftAmt); + Mask.lshrInPlace(ShiftAmt); } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { // Shift Right // We do not handle AArch64_AM::ASR, because the sign will change the // number of useful bits uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.lshr(ShiftAmt); + Mask.lshrInPlace(ShiftAmt); getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.shl(ShiftAmt); + Mask <<= ShiftAmt; } else return; @@ -1930,13 +1931,13 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, uint64_t Width = MSB - Imm + 1; uint64_t LSB = Imm; - OpUsefulBits = OpUsefulBits.shl(Width); + OpUsefulBits <<= Width; --OpUsefulBits; if (Op.getOperand(1) == Orig) { // Copy the low bits from the result to bits starting from LSB. Mask = ResultUsefulBits & OpUsefulBits; - Mask = Mask.shl(LSB); + Mask <<= LSB; } if (Op.getOperand(0) == Orig) @@ -1947,14 +1948,14 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, uint64_t Width = MSB + 1; uint64_t LSB = UsefulBits.getBitWidth() - Imm; - OpUsefulBits = OpUsefulBits.shl(Width); + OpUsefulBits <<= Width; --OpUsefulBits; - OpUsefulBits = OpUsefulBits.shl(LSB); + OpUsefulBits <<= LSB; if (Op.getOperand(1) == Orig) { // Copy the bits from the result to the zero bits. Mask = ResultUsefulBits & OpUsefulBits; - Mask = Mask.lshr(LSB); + Mask.lshrInPlace(LSB); } if (Op.getOperand(0) == Orig) @@ -2078,18 +2079,18 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, (void)BitWidth; assert(BitWidth == 32 || BitWidth == 64); - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Op, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(Op, Known); // Non-zero in the sense that they're not provably zero, which is the key // point if we want to use this value - uint64_t NonZeroBits = (~KnownZero).getZExtValue(); + uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); // Discard a constant AND mask if present. It's safe because the node will // already have been factored into the computeKnownBits calculation above. uint64_t AndImm; if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { - assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0); + assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); Op = Op.getOperand(0); } @@ -2158,15 +2159,15 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { // Compute the Known Zero for the AND as this allows us to catch more general // cases than just looking for AND with imm. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(And, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(And, Known); // Non-zero in the sense that they're not provably zero, which is the key // point if we want to use this value. - uint64_t NotKnownZero = (~KnownZero).getZExtValue(); + uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). - if (!isShiftedMask(KnownZero.getZExtValue(), VT)) + if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) return false; // The bits being inserted must only set those bits that are known to be zero. @@ -2300,15 +2301,15 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, // This allows to catch more general case than just looking for // AND with imm. Indeed, simplify-demanded-bits may have removed // the AND instruction because it proves it was useless. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); + KnownBits Known; + CurDAG->computeKnownBits(OrOpd1Val, Known); // Check if there is enough room for the second operand to appear // in the first one APInt BitsToBeInserted = - APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); + APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); - if ((BitsToBeInserted & ~KnownZero) != 0) + if ((BitsToBeInserted & ~Known.Zero) != 0) continue; // Set the first operand @@ -2565,7 +2566,7 @@ bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { // pstatefield for the MSR (immediate) instruction, we also require that an // immediate value has been provided as an argument, we know that this is // the case as it has been ensured by semantic checking. - auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());; + auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); if (PMapper) { assert (isa(N->getOperand(2)) && "Expected a constant integer expression."); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 504cb5615b69bd04a13ece712d0fa31feae3677b..083ca2156598f16ff2dfd1938cbd1d56dcc422da 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AArch64ISelLowering.h" #include "AArch64CallingConvention.h" #include "AArch64MachineFunctionInfo.h" -#include "AArch64ISelLowering.h" #include "AArch64PerfectShuffle.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" @@ -22,9 +22,9 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -51,10 +51,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" @@ -67,6 +67,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetCallingConv.h" @@ -91,6 +92,7 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); +STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); static cl::opt EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, @@ -105,6 +107,12 @@ cl::opt EnableAArch64ELFLocalDynamicTLSGeneration( cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +static cl::opt +EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, + cl::desc("Enable AArch64 logical imm instruction " + "optimization"), + cl::init(true)); + /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -373,7 +381,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand); setOperationAction(ISD::FNEG, MVT::v4f16, Expand); setOperationAction(ISD::FPOW, MVT::v4f16, Expand); - setOperationAction(ISD::FPOWI, MVT::v4f16, Expand); setOperationAction(ISD::FREM, MVT::v4f16, Expand); setOperationAction(ISD::FROUND, MVT::v4f16, Expand); setOperationAction(ISD::FRINT, MVT::v4f16, Expand); @@ -405,7 +412,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand); setOperationAction(ISD::FNEG, MVT::v8f16, Expand); setOperationAction(ISD::FPOW, MVT::v8f16, Expand); - setOperationAction(ISD::FPOWI, MVT::v8f16, Expand); setOperationAction(ISD::FREM, MVT::v8f16, Expand); setOperationAction(ISD::FROUND, MVT::v8f16, Expand); setOperationAction(ISD::FRINT, MVT::v8f16, Expand); @@ -545,7 +551,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; @@ -651,6 +656,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); + // Vector reductions + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + } + for (MVT VT : MVT::fp_valuetypes()) { + setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); + } + setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); // Likewise, narrowing and extending vector loads/stores aren't handled @@ -706,7 +724,6 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); @@ -750,6 +767,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); + if (!VT.isFloatingPoint()) + setOperationAction(ISD::ABS, VT, Legal); + // [SU][MIN|MAX] are available for all NEON types apart from i64. if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) @@ -787,21 +807,157 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, + const APInt &Demanded, + TargetLowering::TargetLoweringOpt &TLO, + unsigned NewOpc) { + uint64_t OldImm = Imm, NewImm, Enc; + uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; + + // Return if the immediate is already all zeros, all ones, a bimm32 or a + // bimm64. + if (Imm == 0 || Imm == Mask || + AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) + return false; + + unsigned EltSize = Size; + uint64_t DemandedBits = Demanded.getZExtValue(); + + // Clear bits that are not demanded. + Imm &= DemandedBits; + + while (true) { + // The goal here is to set the non-demanded bits in a way that minimizes + // the number of switching between 0 and 1. In order to achieve this goal, + // we set the non-demanded bits to the value of the preceding demanded bits. + // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a + // non-demanded bit), we copy bit0 (1) to the least significant 'x', + // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'. + // The final result is 0b11000011. + uint64_t NonDemandedBits = ~DemandedBits; + uint64_t InvertedImm = ~Imm & DemandedBits; + uint64_t RotatedImm = + ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & + NonDemandedBits; + uint64_t Sum = RotatedImm + NonDemandedBits; + bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); + uint64_t Ones = (Sum + Carry) & NonDemandedBits; + NewImm = (Imm | Ones) & Mask; + + // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate + // or all-ones or all-zeros, in which case we can stop searching. Otherwise, + // we halve the element size and continue the search. + if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) + break; + + // We cannot shrink the element size any further if it is 2-bits. + if (EltSize == 2) + return false; + + EltSize /= 2; + Mask >>= EltSize; + uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; + + // Return if there is mismatch in any of the demanded bits of Imm and Hi. + if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) + return false; + + // Merge the upper and lower halves of Imm and DemandedBits. + Imm |= Hi; + DemandedBits |= DemandedBitsHi; + } + + ++NumOptimizedImms; + + // Replicate the element across the register width. + while (EltSize < Size) { + NewImm |= NewImm << EltSize; + EltSize *= 2; + } + + (void)OldImm; + assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && + "demanded bits should never be altered"); + assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm"); + + // Create the new constant immediate node. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue New; + + // If the new constant immediate is all-zeros or all-ones, let the target + // independent DAG combine optimize this node. + if (NewImm == 0 || NewImm == OrigMask) { + New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), + TLO.DAG.getConstant(NewImm, DL, VT)); + // Otherwise, create a machine node so that target independent DAG combine + // doesn't undo this optimization. + } else { + Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); + SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); + New = SDValue( + TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + } + + return TLO.CombineTo(Op, New); +} + +bool AArch64TargetLowering::targetShrinkDemandedConstant( + SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { + // Delay this optimization to as late as possible. + if (!TLO.LegalOps) + return false; + + if (!EnableOptimizeLogicalImm) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector()) + return false; + + unsigned Size = VT.getSizeInBits(); + assert((Size == 32 || Size == 64) && + "i32 or i64 is expected after legalization."); + + // Exit early if we demand all bits. + if (Demanded.countPopulation() == Size) + return false; + + unsigned NewOpc; + switch (Op.getOpcode()) { + default: + return false; + case ISD::AND: + NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; + break; + case ISD::OR: + NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; + break; + case ISD::XOR: + NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; + break; + } + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + uint64_t Imm = C->getZExtValue(); + return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc); +} + /// computeKnownBitsForTargetNode - Determine which of the bits specified in -/// Mask are known to be either zero or one and return them in the -/// KnownZero/KnownOne bitsets. +/// Mask are known to be either zero or one and return them Known. void AArch64TargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { switch (Op.getOpcode()) { default: break; case AArch64ISD::CSEL: { - APInt KnownZero2, KnownOne2; - DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1); - DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; + KnownBits Known2; + DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1); + DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; break; } case ISD::INTRINSIC_W_CHAIN: { @@ -811,10 +967,10 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( default: return; case Intrinsic::aarch64_ldaxr: case Intrinsic::aarch64_ldxr: { - unsigned BitWidth = KnownOne.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); EVT VT = cast(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } @@ -833,15 +989,15 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( // bits larger than the element datatype. 32-bit or larget doesn't need // this as those are legal types and will be handled by isel directly. MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); if (VT == MVT::v8i8 || VT == MVT::v16i8) { assert(BitWidth >= 8 && "Unexpected width!"); APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); - KnownZero |= Mask; + Known.Zero |= Mask; } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { assert(BitWidth >= 16 && "Unexpected width!"); APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); - KnownZero |= Mask; + Known.Zero |= Mask; } break; } break; @@ -2121,7 +2277,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout())); - StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); + StructType *RetTy = StructType::get(ArgTy, ArgTy); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) @@ -2341,6 +2497,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT); } + case Intrinsic::aarch64_neon_abs: + return DAG.getNode(ISD::ABS, dl, Op.getValueType(), + Op.getOperand(1)); case Intrinsic::aarch64_neon_smax: return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); @@ -2459,6 +2618,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerMUL(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + return LowerVECREDUCE(Op, DAG); } } @@ -3102,9 +3269,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL, - true), - DL); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy(DAG.getDataLayout())); @@ -3239,30 +3404,26 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - if (getTargetMachine().getCodeModel() == CodeModel::Large && - Subtarget->isTargetMachO()) { - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (auto *G = dyn_cast(Callee)) { + auto GV = G->getGlobal(); + if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) == + AArch64II::MO_GOT) { + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); + } else { const GlobalValue *GV = G->getGlobal(); - bool InternalLinkage = GV->hasInternalLinkage(); - if (InternalLinkage) - Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); - else { - Callee = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); - Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); - } - } else if (ExternalSymbolSDNode *S = - dyn_cast(Callee)) { + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); + } + } else if (auto *S = dyn_cast(Callee)) { + if (getTargetMachine().getCodeModel() == CodeModel::Large && + Subtarget->isTargetMachO()) { const char *Sym = S->getSymbol(); Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); + } else { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } - } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } // We don't usually want to end the call-sequence here because we would tidy @@ -3422,11 +3583,75 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Other Lowering Code //===----------------------------------------------------------------------===// +SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); +} + +// (loadGOT sym) +template +SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT); + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes instead of using a wrapper node. + return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); +} + +// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym)) +template +SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG) + const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, Ty, + getTargetNode(N, Ty, DAG, AArch64II::MO_G3), + getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC)); +} + +// (addlow (adrp %hi(sym)) %lo(sym)) +template +SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE); + SDValue Lo = getTargetNode(N, Ty, DAG, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo); +} + SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); - const GlobalAddressSDNode *GN = cast(Op); + GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); @@ -3434,32 +3659,15 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, assert(cast(Op)->getOffset() == 0 && "unexpected offset in global node"); - // This also catched the large code model case for Darwin. + // This also catches the large code model case for Darwin. if ((OpFlags & AArch64II::MO_GOT) != 0) { - SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes instead of using a wrapper node. - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(GN, DAG); } if (getTargetMachine().getCodeModel() == CodeModel::Large) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(GN, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and - // the only correct model on Darwin. - SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - OpFlags | AArch64II::MO_PAGE); - unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC; - SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(GN, DAG); } } @@ -4236,90 +4444,37 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(JT, DAG); } - - SDValue Hi = - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(JT, DAG); } SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large) { // Use the GOT for the large code model on iOS. if (Subtarget->isTargetMachO()) { - SDValue GotAddr = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_GOT); - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(CP, DAG); } - - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G3), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G2 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G1 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(CP, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on - // ELF, the only valid one on Darwin. - SDValue Hi = - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(CP, DAG); } } SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - const BlockAddress *BA = cast(Op)->getBlockAddress(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); + BlockAddressSDNode *BA = cast(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(BA, DAG); } else { - SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(BA, DAG); } } @@ -4716,9 +4871,9 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand, // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N) for (int i = ExtraSteps; i > 0; --i) { SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate, - &Flags); - Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags); - Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags); + Flags); + Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags); + Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); } if (!Reciprocal) { @@ -4727,7 +4882,7 @@ SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand, SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ); - Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags); + Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags); // Correct the result if the operand is 0.0. Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, Eq, Operand, Estimate); @@ -4756,8 +4911,8 @@ SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand, // AArch64 reciprocal iteration instruction: (2 - M * N) for (int i = ExtraSteps; i > 0; --i) { SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand, - Estimate, &Flags); - Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags); + Estimate, Flags); + Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags); } ExtraSteps = 0; @@ -6590,14 +6745,19 @@ FailedModImm: if (!isConstant && !usesOnlyOneValue) { SDValue Vec = DAG.getUNDEF(VT); SDValue Op0 = Op.getOperand(0); - unsigned ElemSize = VT.getScalarSizeInBits(); unsigned i = 0; - // For 32 and 64 bit types, use SCALAR_TO_VECTOR for lane zero to + + // Use SCALAR_TO_VECTOR for lane zero to // a) Avoid a RMW dependency on the full vector register, and // b) Allow the register coalescer to fold away the copy if the // value is already in an S or D register, and we're forced to emit an // INSERT_SUBREG that we can't fold anywhere. - if (!Op0.isUndef() && (ElemSize == 32 || ElemSize == 64)) { + // + // We also allow types like i8 and i16 which are illegal scalar but legal + // vector element types. After type-legalization the inserted value is + // extended (i32) and it is safe to cast them to the vector type by ignoring + // the upper bits of the lowest lane (e.g. v8i8, v4i16). + if (!Op0.isUndef()) { Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0); ++i; } @@ -6988,6 +7148,47 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, return Cmp; } +static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, + SelectionDAG &DAG) { + SDValue VecOp = ScalarOp.getOperand(0); + auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx, + DAG.getConstant(0, DL, MVT::i64)); +} + +SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + switch (Op.getOpcode()) { + case ISD::VECREDUCE_ADD: + return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG); + case ISD::VECREDUCE_SMAX: + return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG); + case ISD::VECREDUCE_SMIN: + return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG); + case ISD::VECREDUCE_UMAX: + return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG); + case ISD::VECREDUCE_UMIN: + return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG); + case ISD::VECREDUCE_FMAX: { + assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag"); + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), + DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32), + Op.getOperand(0)); + } + case ISD::VECREDUCE_FMIN: { + assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag"); + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), + DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32), + Op.getOperand(0)); + } + default: + llvm_unreachable("Unhandled reduction"); + } +} + /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. @@ -7125,7 +7326,7 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const { if (I->getOpcode() != Instruction::FMul) return true; - if (I->getNumUses() != 1) + if (!I->hasOneUse()) return true; Instruction *User = I->user_back(); @@ -9018,16 +9219,26 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, // instructions (stp). SDLoc DL(&St); SDValue BasePtr = St.getBasePtr(); + uint64_t BaseOffset = 0; + const MachinePointerInfo &PtrInfo = St.getPointerInfo(); SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, OrigAlignment, St.getMemOperand()->getFlags()); + // As this in ISel, we will not merge this add which may degrade results. + if (BasePtr->getOpcode() == ISD::ADD && + isa(BasePtr->getOperand(1))) { + BaseOffset = cast(BasePtr->getOperand(1))->getSExtValue(); + BasePtr = BasePtr->getOperand(0); + } + unsigned Offset = EltOffset; while (--NumVecElts) { unsigned Alignment = MinAlign(OrigAlignment, Offset); - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(Offset, DL, MVT::i64)); + SDValue OffsetPtr = + DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, + DAG.getConstant(BaseOffset + Offset, DL, MVT::i64)); NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, PtrInfo.getWithOffset(Offset), Alignment, St.getMemOperand()->getFlags()); @@ -9155,7 +9366,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return SDValue(); StoreSDNode *S = cast(N); - if (S->isVolatile()) + if (S->isVolatile() || S->isIndexed()) return SDValue(); SDValue StVal = S->getValue(); @@ -9325,11 +9536,11 @@ static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { APInt DemandedMask = APInt::getLowBitsSet(64, 56); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) { + if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); return true; } @@ -9350,266 +9561,6 @@ static SDValue performSTORECombine(SDNode *N, return SDValue(); } -/// This function handles the log2-shuffle pattern produced by the -/// LoopVectorizer for the across vector reduction. It consists of -/// log2(NumVectorElements) steps and, in each step, 2^(s) elements -/// are reduced, where s is an induction variable from 0 to -/// log2(NumVectorElements). -static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV, - unsigned Op, - SelectionDAG &DAG) { - EVT VTy = OpV->getOperand(0).getValueType(); - if (!VTy.isVector()) - return SDValue(); - - int NumVecElts = VTy.getVectorNumElements(); - if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { - if (NumVecElts != 4) - return SDValue(); - } else { - if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16) - return SDValue(); - } - - int NumExpectedSteps = APInt(8, NumVecElts).logBase2(); - SDValue PreOp = OpV; - // Iterate over each step of the across vector reduction. - for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) { - SDValue CurOp = PreOp.getOperand(0); - SDValue Shuffle = PreOp.getOperand(1); - if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) { - // Try to swap the 1st and 2nd operand as add and min/max instructions - // are commutative. - CurOp = PreOp.getOperand(1); - Shuffle = PreOp.getOperand(0); - if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) - return SDValue(); - } - - // Check if the input vector is fed by the operator we want to handle, - // except the last step; the very first input vector is not necessarily - // the same operator we are handling. - if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1))) - return SDValue(); - - // Check if it forms one step of the across vector reduction. - // E.g., - // %cur = add %1, %0 - // %shuffle = vector_shuffle %cur, <2, 3, u, u> - // %pre = add %cur, %shuffle - if (Shuffle.getOperand(0) != CurOp) - return SDValue(); - - int NumMaskElts = 1 << CurStep; - ArrayRef Mask = cast(Shuffle)->getMask(); - // Check mask values in each step. - // We expect the shuffle mask in each step follows a specific pattern - // denoted here by the form, where M is a sequence of integers - // starting from NumMaskElts, increasing by 1, and the number integers - // in M should be NumMaskElts. U is a sequence of UNDEFs and the number - // of undef in U should be NumVecElts - NumMaskElts. - // E.g., for <8 x i16>, mask values in each step should be : - // step 0 : <1,u,u,u,u,u,u,u> - // step 1 : <2,3,u,u,u,u,u,u> - // step 2 : <4,5,6,7,u,u,u,u> - for (int i = 0; i < NumVecElts; ++i) - if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) || - (i >= NumMaskElts && !(Mask[i] < 0))) - return SDValue(); - - PreOp = CurOp; - } - unsigned Opcode; - bool IsIntrinsic = false; - - switch (Op) { - default: - llvm_unreachable("Unexpected operator for across vector reduction"); - case ISD::ADD: - Opcode = AArch64ISD::UADDV; - break; - case ISD::SMAX: - Opcode = AArch64ISD::SMAXV; - break; - case ISD::UMAX: - Opcode = AArch64ISD::UMAXV; - break; - case ISD::SMIN: - Opcode = AArch64ISD::SMINV; - break; - case ISD::UMIN: - Opcode = AArch64ISD::UMINV; - break; - case ISD::FMAXNUM: - Opcode = Intrinsic::aarch64_neon_fmaxnmv; - IsIntrinsic = true; - break; - case ISD::FMINNUM: - Opcode = Intrinsic::aarch64_neon_fminnmv; - IsIntrinsic = true; - break; - } - SDLoc DL(N); - - return IsIntrinsic - ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0), - DAG.getConstant(Opcode, DL, MVT::i32), PreOp) - : DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), - DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp), - DAG.getConstant(0, DL, MVT::i64)); -} - -/// Target-specific DAG combine for the across vector min/max reductions. -/// This function specifically handles the final clean-up step of the vector -/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle -/// pattern, which narrows down and finds the final min/max value from all -/// elements of the vector. -/// For example, for a <16 x i8> vector : -/// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> -/// %smax0 = smax %arr, svn0 -/// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u> -/// %smax1 = smax %smax0, %svn1 -/// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -/// %smax2 = smax %smax1, svn2 -/// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> -/// %sc = setcc %smax2, %svn3, gt -/// %n0 = extract_vector_elt %sc, #0 -/// %n1 = extract_vector_elt %smax2, #0 -/// %n2 = extract_vector_elt $smax2, #1 -/// %result = select %n0, %n1, n2 -/// becomes : -/// %1 = smaxv %0 -/// %result = extract_vector_elt %1, 0 -static SDValue -performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG, - const AArch64Subtarget *Subtarget) { - if (!Subtarget->hasNEON()) - return SDValue(); - - SDValue N0 = N->getOperand(0); - SDValue IfTrue = N->getOperand(1); - SDValue IfFalse = N->getOperand(2); - - // Check if the SELECT merges up the final result of the min/max - // from a vector. - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return SDValue(); - - // Expect N0 is fed by SETCC. - SDValue SetCC = N0.getOperand(0); - EVT SetCCVT = SetCC.getValueType(); - if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() || - SetCCVT.getVectorElementType() != MVT::i1) - return SDValue(); - - SDValue VectorOp = SetCC.getOperand(0); - unsigned Op = VectorOp->getOpcode(); - // Check if the input vector is fed by the operator we want to handle. - if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && - Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM) - return SDValue(); - - EVT VTy = VectorOp.getValueType(); - if (!VTy.isVector()) - return SDValue(); - - if (VTy.getSizeInBits() < 64) - return SDValue(); - - EVT EltTy = VTy.getVectorElementType(); - if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) { - if (EltTy != MVT::f32) - return SDValue(); - } else { - if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) - return SDValue(); - } - - // Check if extracting from the same vector. - // For example, - // %sc = setcc %vector, %svn1, gt - // %n0 = extract_vector_elt %sc, #0 - // %n1 = extract_vector_elt %vector, #0 - // %n2 = extract_vector_elt $vector, #1 - if (!(VectorOp == IfTrue->getOperand(0) && - VectorOp == IfFalse->getOperand(0))) - return SDValue(); - - // Check if the condition code is matched with the operator type. - ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); - if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) || - (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) || - (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) || - (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) || - (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE && - CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT && - CC != ISD::SETGE) || - (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE && - CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT && - CC != ISD::SETLE)) - return SDValue(); - - // Expect to check only lane 0 from the vector SETCC. - if (!isNullConstant(N0.getOperand(1))) - return SDValue(); - - // Expect to extract the true value from lane 0. - if (!isNullConstant(IfTrue.getOperand(1))) - return SDValue(); - - // Expect to extract the false value from lane 1. - if (!isOneConstant(IfFalse.getOperand(1))) - return SDValue(); - - return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG); -} - -/// Target-specific DAG combine for the across vector add reduction. -/// This function specifically handles the final clean-up step of the vector -/// add reduction produced by the LoopVectorizer. It is the log2-shuffle -/// pattern, which adds all elements of a vector together. -/// For example, for a <4 x i32> vector : -/// %1 = vector_shuffle %0, <2,3,u,u> -/// %2 = add %0, %1 -/// %3 = vector_shuffle %2, <1,u,u,u> -/// %4 = add %2, %3 -/// %result = extract_vector_elt %4, 0 -/// becomes : -/// %0 = uaddv %0 -/// %result = extract_vector_elt %0, 0 -static SDValue -performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG, - const AArch64Subtarget *Subtarget) { - if (!Subtarget->hasNEON()) - return SDValue(); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - // Check if the input vector is fed by the ADD. - if (N0->getOpcode() != ISD::ADD) - return SDValue(); - - // The vector extract idx must constant zero because we only expect the final - // result of the reduction is placed in lane 0. - if (!isNullConstant(N1)) - return SDValue(); - - EVT VTy = N0.getValueType(); - if (!VTy.isVector()) - return SDValue(); - - EVT EltTy = VTy.getVectorElementType(); - if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8) - return SDValue(); - - if (VTy.getSizeInBits() < 64) - return SDValue(); - - return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG); -} /// Target-specific DAG combine function for NEON load/store intrinsics /// to merge base address updates. @@ -10288,12 +10239,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performBitcastCombine(N, DCI, DAG); case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); - case ISD::SELECT: { - SDValue RV = performSelectCombine(N, DCI); - if (!RV.getNode()) - RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget); - return RV; - } + case ISD::SELECT: + return performSelectCombine(N, DCI); case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); case ISD::LOAD: @@ -10315,8 +10262,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performNVCASTCombine(N); case ISD::INSERT_VECTOR_ELT: return performPostLD1Combine(N, DCI, true); - case ISD::EXTRACT_VECTOR_ELT: - return performAcrossLaneAddReductionCombine(N, DAG, Subtarget); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { @@ -10390,7 +10335,7 @@ bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N, // call. This will cause the optimizers to attempt to move, or duplicate, // return instructions to help enable tail call optimizations for this // instruction. -bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return CI->isTailCall(); } @@ -10536,6 +10481,14 @@ void AArch64TargetLowering::ReplaceNodeResults( case ISD::BITCAST: ReplaceBITCASTResults(N, Results, DAG); return; + case ISD::VECREDUCE_ADD: + case ISD::VECREDUCE_SMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_UMIN: + Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG)); + return; + case AArch64ISD::SADDV: ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV); return; diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 2ad6c8b23df8c3c9e510153fd38a9a5191dc372e..ecc2517fb288dfbe410aa2aef2ef0b937dc265c0 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -250,11 +250,14 @@ public: /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. - void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, const APInt &DemandedElts, + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; /// Returns true if the target allows unaligned memory accesses of the @@ -508,6 +511,18 @@ private: const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + template SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const; + template + SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const; + template SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; @@ -553,6 +568,7 @@ private: SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector *Created) const override; @@ -593,7 +609,7 @@ private: } bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, bool &IsInc, SelectionDAG &DAG) const; diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td index 867074c3c37451a9cfc458e86095e03f44e341e7..71826bec6b11f497ffd6f6957e98a95ce9a718f4 100644 --- a/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -14,6 +14,9 @@ //===---------------------------------- // Atomic fences //===---------------------------------- +let AddedComplexity = 15, Size = 0 in +def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), + [(atomic_fence imm:$ordering, 0)]>, Sched<[]>; def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 16be4432b160707e4bbfb8fa74572cbd32488ca9..c44daf306ea937ddd0cf992f5872328e42f668e9 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -693,11 +693,11 @@ def addsub_shifted_imm32_neg : addsub_shifted_imm_neg; def addsub_shifted_imm64_neg : addsub_shifted_imm_neg; def gi_addsub_shifted_imm32 : - GIComplexOperandMatcher, + GIComplexOperandMatcher, GIComplexPatternEquiv; def gi_addsub_shifted_imm64 : - GIComplexOperandMatcher, + GIComplexOperandMatcher, GIComplexPatternEquiv; class neg_addsub_shifted_imm diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 41fc8eceab5c7f67152602fefe1e6eba0560a6b4..eea012382150cafb633c66727827fe6251dd0952 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -763,15 +763,126 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { llvm_unreachable("Unknown opcode to check as cheap as a move!"); } -bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const { - if (MI.getNumOperands() < 4) +bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: return false; - unsigned ShOpVal = MI.getOperand(3).getImm(); - unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal); - if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL && - ShImm < 4) - return true; - return false; + + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + if (ShiftVal == 0) + return true; + return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; + } + + case AArch64::ADDWrx: + case AArch64::ADDXrx: + case AArch64::ADDXrx64: + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: + case AArch64::ADDSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) <= 4; + } + } + + case AArch64::SUBWrs: + case AArch64::SUBSWrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); + } + + case AArch64::SUBXrs: + case AArch64::SUBSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); + } + + case AArch64::SUBWrx: + case AArch64::SUBXrx: + case AArch64::SUBXrx64: + case AArch64::SUBSWrx: + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) == 0; + } + } + + case AArch64::LDRBBroW: + case AArch64::LDRBBroX: + case AArch64::LDRBroW: + case AArch64::LDRBroX: + case AArch64::LDRDroW: + case AArch64::LDRDroX: + case AArch64::LDRHHroW: + case AArch64::LDRHHroX: + case AArch64::LDRHroW: + case AArch64::LDRHroX: + case AArch64::LDRQroW: + case AArch64::LDRQroX: + case AArch64::LDRSBWroW: + case AArch64::LDRSBWroX: + case AArch64::LDRSBXroW: + case AArch64::LDRSBXroX: + case AArch64::LDRSHWroW: + case AArch64::LDRSHWroX: + case AArch64::LDRSHXroW: + case AArch64::LDRSHXroX: + case AArch64::LDRSWroW: + case AArch64::LDRSWroX: + case AArch64::LDRSroW: + case AArch64::LDRSroX: + case AArch64::LDRWroW: + case AArch64::LDRWroX: + case AArch64::LDRXroW: + case AArch64::LDRXroX: + case AArch64::PRFMroW: + case AArch64::PRFMroX: + case AArch64::STRBBroW: + case AArch64::STRBBroX: + case AArch64::STRBroW: + case AArch64::STRBroX: + case AArch64::STRDroW: + case AArch64::STRDroX: + case AArch64::STRHHroW: + case AArch64::STRHHroX: + case AArch64::STRHroW: + case AArch64::STRHroX: + case AArch64::STRQroW: + case AArch64::STRQroX: + case AArch64::STRSroW: + case AArch64::STRSroX: + case AArch64::STRWroW: + case AArch64::STRWroX: + case AArch64::STRXroW: + case AArch64::STRXroX: { + unsigned IsSigned = MI.getOperand(3).getImm(); + return !IsSigned; + } + } } bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, @@ -2320,7 +2431,7 @@ void AArch64InstrInfo::storeRegToStackSlot( PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::STRBui; @@ -2424,7 +2535,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRBui; @@ -2649,7 +2760,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { - assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == + TRI.getRegSizeInBits(*getRegClass(SrcReg)) && "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, @@ -2735,7 +2847,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( } if (FillRC) { - assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == + TRI.getRegSizeInBits(*FillRC) && "Mismatched regclass size on folded subreg COPY"); loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); MachineInstr &LoadMI = *--InsertPt; @@ -3025,7 +3138,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return false; } -void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +void AArch64InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(AArch64::HINT); NopInst.addOperand(MCOperand::createImm(0)); } @@ -3425,6 +3538,10 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { + Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); + Found = true; + } break; case AArch64::FSUBDrr: if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { @@ -3439,6 +3556,10 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { + Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); + Found = true; + } break; case AArch64::FSUBv2f32: if (canCombineWithFMUL(MBB, Root.getOperand(2), @@ -3493,6 +3614,8 @@ AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { case MachineCombinerPattern::FMULADDD_OP2: case MachineCombinerPattern::FMULSUBD_OP1: case MachineCombinerPattern::FMULSUBD_OP2: + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP2: case MachineCombinerPattern::FMLAv1i64_indexed_OP1: @@ -3994,6 +4117,24 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; } + + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: { + // FNMUL I=A,B,0 + // FSUB R,I,C + // ==> FNMADD R,A,B,C // = -A*B - C + // --- Create(FNMADD); + if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { + Opc = AArch64::FNMADDSrrr; + RC = &AArch64::FPR32RegClass; + } else { + Opc = AArch64::FNMADDDrrr; + RC = &AArch64::FPR64RegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + } + case MachineCombinerPattern::FMULSUBS_OP2: case MachineCombinerPattern::FMULSUBD_OP2: { // FMUL I=A,B,0 @@ -4009,6 +4150,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; + } case MachineCombinerPattern::FMLSv1i32_indexed_OP2: Opc = AArch64::FMLSv1i32_indexed; @@ -4065,7 +4207,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Accumulator); } break; - } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion DelInstrs.push_back(MUL); diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index bacce441f6c57d8405d82de0078e4d035296df54..59f3405fe439ae1c0288cbef5048652365f6458d 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -205,7 +205,7 @@ public: const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. @@ -270,7 +270,7 @@ public: bool IsTailCall) const override; /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. - bool isFalkorLSLFast(const MachineInstr &MI) const; + bool isFalkorShiftExtFast(const MachineInstr &MI) const; private: /// \brief Sets the offsets on outlined instructions in \p MBB which use SP diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 4449412532f30464189b7627c73e326f263eab24..ad24612239fa9bda07a15b615ee0e91ed1602eb6 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -156,7 +156,8 @@ def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", - SDCallSeqStart<[ SDTCisVT<0, i32> ]>, + SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>, [SDNPHasChain, SDNPOutGlue]>; def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDCallSeqEnd<[ SDTCisVT<0, i32>, @@ -314,8 +315,14 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; // AArch64 Instruction Predicate Definitions. def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; -def ForCodeSize : Predicate<"ForCodeSize">; -def NotForCodeSize : Predicate<"!ForCodeSize">; + +// We could compute these on a per-module basis but doing so requires accessing +// the Function object through the Subtarget and objections were raised +// to that (see post-commit review comments for r301750). +let RecomputePerFunction = 1 in { + def ForCodeSize : Predicate<"MF->getFunction()->optForSize()">; + def NotForCodeSize : Predicate<"!MF->getFunction()->optForSize()">; +} include "AArch64InstrFormats.td" @@ -328,8 +335,9 @@ include "AArch64InstrFormats.td" let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { // We set Sched to empty list because we expect these instructions to simply get // removed in most cases. -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - [(AArch64callseq_start timm:$amt)]>, Sched<[]>; +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, + Sched<[]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, Sched<[]>; @@ -434,7 +442,7 @@ def MSRpstateImm4 : MSRpstateImm0_15; // TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. let hasSideEffects = 0 in def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), - [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>; + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; // The cycle counter PMC register is PMCCNTR_EL0. let Predicates = [HasPerfMon] in @@ -2586,6 +2594,11 @@ def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, Sched<[WriteF]>; } +// Similarly add aliases +def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, + Requires<[HasFullFP16]>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; +def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; //===----------------------------------------------------------------------===// // Floating point conversion instruction. @@ -2729,60 +2742,36 @@ defm FMOV : FPMoveImmediate<"fmov">; defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", int_aarch64_neon_uabd>; // Match UABDL in log2-shuffle patterns. +def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), + (zext (v8i8 V64:$opB))))), + (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), (v8i16 (add (sub (zext (v8i8 V64:$opA)), (zext (v8i8 V64:$opB))), (AArch64vashr v8i16:$src, (i32 15))))), (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; +def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)), + (zext (extract_high_v16i8 V128:$opB))))), + (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)), (zext (extract_high_v16i8 V128:$opB))), (AArch64vashr v8i16:$src, (i32 15))))), (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; -def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))), - (v4i32 (add (sub (zext (v4i16 V64:$opA)), - (zext (v4i16 V64:$opB))), - (AArch64vashr v4i32:$src, (i32 31))))), +def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), + (zext (v4i16 V64:$opB))))), (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; -def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))), - (v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)), - (zext (extract_high_v8i16 V128:$opB))), - (AArch64vashr v4i32:$src, (i32 31))))), +def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)), + (zext (extract_high_v8i16 V128:$opB))))), (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; -def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))), - (v2i64 (add (sub (zext (v2i32 V64:$opA)), - (zext (v2i32 V64:$opB))), - (AArch64vashr v2i64:$src, (i32 63))))), +def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), + (zext (v2i32 V64:$opB))))), (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; -def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))), - (v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)), - (zext (extract_high_v4i32 V128:$opB))), - (AArch64vashr v2i64:$src, (i32 63))))), +def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)), + (zext (extract_high_v4i32 V128:$opB))))), (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; -defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>; -def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))), - (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))), - (ABSv8i8 V64:$src)>; -def : Pat<(xor (v4i16 (AArch64vashr V64:$src, (i32 15))), - (v4i16 (add V64:$src, (AArch64vashr V64:$src, (i32 15))))), - (ABSv4i16 V64:$src)>; -def : Pat<(xor (v2i32 (AArch64vashr V64:$src, (i32 31))), - (v2i32 (add V64:$src, (AArch64vashr V64:$src, (i32 31))))), - (ABSv2i32 V64:$src)>; -def : Pat<(xor (v16i8 (AArch64vashr V128:$src, (i32 7))), - (v16i8 (add V128:$src, (AArch64vashr V128:$src, (i32 7))))), - (ABSv16i8 V128:$src)>; -def : Pat<(xor (v8i16 (AArch64vashr V128:$src, (i32 15))), - (v8i16 (add V128:$src, (AArch64vashr V128:$src, (i32 15))))), - (ABSv8i16 V128:$src)>; -def : Pat<(xor (v4i32 (AArch64vashr V128:$src, (i32 31))), - (v4i32 (add V128:$src, (AArch64vashr V128:$src, (i32 31))))), - (ABSv4i32 V128:$src)>; -def : Pat<(xor (v2i64 (AArch64vashr V128:$src, (i32 63))), - (v2i64 (add V128:$src, (AArch64vashr V128:$src, (i32 63))))), - (ABSv2i64 V128:$src)>; - +defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; @@ -3354,7 +3343,7 @@ def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), // Advanced SIMD two scalar instructions. //===----------------------------------------------------------------------===// -defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>; +defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>; defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 878dac6bff1e31002e992421d5e8bee215c8a153..9bfd570e9a82798d17618471345cdf835cb0bffd 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -20,6 +20,7 @@ #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -40,6 +41,10 @@ using namespace llvm; namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class AArch64InstructionSelector : public InstructionSelector { public: AArch64InstructionSelector(const AArch64TargetMachine &TM, @@ -61,8 +66,7 @@ private: bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - bool selectArithImmed(MachineOperand &Root, MachineOperand &Result1, - MachineOperand &Result2) const; + ComplexRendererFn selectArithImmed(MachineOperand &Root) const; const AArch64TargetMachine &TM; const AArch64Subtarget &STI; @@ -70,6 +74,10 @@ private: const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; +#define GET_GLOBALISEL_PREDICATES_DECL +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + // We declare the temporaries used by selectImpl() in the class to minimize the // cost of constructing placeholder values. #define GET_GLOBALISEL_TEMPORARIES_DECL @@ -87,7 +95,10 @@ AArch64InstructionSelector::AArch64InstructionSelector( const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + TRI(*STI.getRegisterInfo()), RBI(RBI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT #define GET_GLOBALISEL_TEMPORARIES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -1311,9 +1322,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. -bool AArch64InstructionSelector::selectArithImmed( - MachineOperand &Root, MachineOperand &Result1, - MachineOperand &Result2) const { +InstructionSelector::ComplexRendererFn +AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { MachineInstr &MI = *Root.getParent(); MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); @@ -1332,13 +1342,13 @@ bool AArch64InstructionSelector::selectArithImmed( else if (Root.isReg()) { MachineInstr *Def = MRI.getVRegDef(Root.getReg()); if (Def->getOpcode() != TargetOpcode::G_CONSTANT) - return false; + return nullptr; MachineOperand &Op1 = Def->getOperand(1); if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64) - return false; + return nullptr; Immed = Op1.getCImm()->getZExtValue(); } else - return false; + return nullptr; unsigned ShiftAmt; @@ -1348,14 +1358,10 @@ bool AArch64InstructionSelector::selectArithImmed( ShiftAmt = 12; Immed = Immed >> 12; } else - return false; + return nullptr; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); - Result1.ChangeToImmediate(Immed); - Result1.clearParent(); - Result2.ChangeToImmediate(ShVal); - Result2.clearParent(); - return true; + return [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed).addImm(ShVal); }; } namespace llvm { diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 6e6daf8122951526b1c0973a964358985ce66f99..01196817f3112a6bd1b370461beda35a266dd3a1 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" #include "llvm/Target/TargetOpcodes.h" using namespace llvm; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 976498aa70d6d87d00f6b2d843739925d6246c5d..9243eb91cc1ac708adc3c24619e7a4ac6adb7484 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -16,10 +16,10 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp index a6926a6700e187299d9de480eb9b4a6e1625bdab..3b71d529db59b39747a2be6deedf0adade776155 100644 --- a/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -232,6 +232,19 @@ static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); + if (&SecondSU != &DAG->ExitSU) + // Make instructions dependent on FirstSU also dependent on SecondSU to + // prevent them from being scheduled between FirstSU and and SecondSU. + for (SUnit::const_succ_iterator + SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end(); + SI != SE; ++SI) { + if (!SI->getSUnit() || SI->getSUnit() == &SecondSU) + continue; + DEBUG(dbgs() << " Copy Succ "; + SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';); + DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial)); + } + ++NumFused; return true; } diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp index 038162c6f54a904c04be2f284cc9970adbe702e1..fe4ef4b40ece7bf6b067dd7e75db8fb1fddadc71 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp @@ -17,8 +17,8 @@ #define DEBUG_TYPE "aarch64-pbqp" -#include "AArch64.h" #include "AArch64PBQPRegAlloc.h" +#include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h index 4f656f94ea12f771427fbabe008965e748d1a78a..b99c1d1d6b3e2d4e6b4bd148c86832ddc6460cfb 100644 --- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h +++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h @@ -1,4 +1,4 @@ -//===-- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints -------===// +//==- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -15,6 +15,8 @@ namespace llvm { +class TargetRegisterInfo; + /// Add the accumulator chaining constraint to a PBQP graph class A57ChainingConstraint : public PBQPRAConstraint { public: @@ -33,6 +35,7 @@ private: // Add constraints between existing chains void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra); }; -} + +} // end namespace llvm #endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 20a5979f9b4b7115966426587083ea121c11c481..9b3899e0681cf0039b3b3edab46e3fda9db0eecc 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -15,13 +15,13 @@ #include "AArch64RegisterBankInfo.h" #include "AArch64InstrInfo.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/LowLevelType.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -260,15 +260,15 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( if (MI.getNumOperands() != 3) break; InstructionMappings AltMappings; - InstructionMapping GPRMapping( + const InstructionMapping &GPRMapping = getInstructionMapping( /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size), /*NumOperands*/ 3); - InstructionMapping FPRMapping( + const InstructionMapping &FPRMapping = getInstructionMapping( /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size), /*NumOperands*/ 3); - AltMappings.emplace_back(std::move(GPRMapping)); - AltMappings.emplace_back(std::move(FPRMapping)); + AltMappings.push_back(&GPRMapping); + AltMappings.push_back(&FPRMapping); return AltMappings; } case TargetOpcode::G_BITCAST: { @@ -282,29 +282,29 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( break; InstructionMappings AltMappings; - InstructionMapping GPRMapping( + const InstructionMapping &GPRMapping = getInstructionMapping( /*ID*/ 1, /*Cost*/ 1, getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size), /*NumOperands*/ 2); - InstructionMapping FPRMapping( + const InstructionMapping &FPRMapping = getInstructionMapping( /*ID*/ 2, /*Cost*/ 1, getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size), /*NumOperands*/ 2); - InstructionMapping GPRToFPRMapping( + const InstructionMapping &GPRToFPRMapping = getInstructionMapping( /*ID*/ 3, /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size), /*NumOperands*/ 2); - InstructionMapping FPRToGPRMapping( + const InstructionMapping &FPRToGPRMapping = getInstructionMapping( /*ID*/ 3, /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size), getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size), /*NumOperands*/ 2); - AltMappings.emplace_back(std::move(GPRMapping)); - AltMappings.emplace_back(std::move(FPRMapping)); - AltMappings.emplace_back(std::move(GPRToFPRMapping)); - AltMappings.emplace_back(std::move(FPRToGPRMapping)); + AltMappings.push_back(&GPRMapping); + AltMappings.push_back(&FPRMapping); + AltMappings.push_back(&GPRToFPRMapping); + AltMappings.push_back(&FPRToGPRMapping); return AltMappings; } case TargetOpcode::G_LOAD: { @@ -318,21 +318,21 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( break; InstructionMappings AltMappings; - InstructionMapping GPRMapping( + const InstructionMapping &GPRMapping = getInstructionMapping( /*ID*/ 1, /*Cost*/ 1, getOperandsMapping({getValueMapping(PMI_FirstGPR, Size), // Addresses are GPR 64-bit. getValueMapping(PMI_FirstGPR, 64)}), /*NumOperands*/ 2); - InstructionMapping FPRMapping( + const InstructionMapping &FPRMapping = getInstructionMapping( /*ID*/ 2, /*Cost*/ 1, getOperandsMapping({getValueMapping(PMI_FirstFPR, Size), // Addresses are GPR 64-bit. getValueMapping(PMI_FirstGPR, 64)}), /*NumOperands*/ 2); - AltMappings.emplace_back(std::move(GPRMapping)); - AltMappings.emplace_back(std::move(FPRMapping)); + AltMappings.push_back(&GPRMapping); + AltMappings.push_back(&FPRMapping); return AltMappings; } default: @@ -373,8 +373,9 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) { return false; } -RegisterBankInfo::InstructionMapping -AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) { +const RegisterBankInfo::InstructionMapping & +AArch64RegisterBankInfo::getSameKindOfOperandsMapping( + const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -411,11 +412,11 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) { } #endif // End NDEBUG. - return InstructionMapping{DefaultMappingID, 1, getValueMapping(RBIdx, Size), - NumOperands}; + return getInstructionMapping(DefaultMappingID, 1, + getValueMapping(RBIdx, Size), NumOperands); } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); const MachineFunction &MF = *MI.getParent()->getParent(); @@ -424,7 +425,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc)) { - RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + const RegisterBankInfo::InstructionMapping &Mapping = + getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } @@ -462,15 +464,15 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; const RegisterBank &SrcRB = SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; - return InstructionMapping{ + return getInstructionMapping( DefaultMappingID, copyCost(DstRB, SrcRB, Size), getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), - /*NumOperands*/ 2}; + /*NumOperands*/ 2); } case TargetOpcode::G_SEQUENCE: // FIXME: support this, but the generic code is really not going to do // anything sane. - return InstructionMapping(); + return getInvalidInstructionMapping(); default: break; } @@ -482,7 +484,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { SmallVector OpRegBankIdx(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { auto &MO = MI.getOperand(Idx); - if (!MO.isReg()) + if (!MO.isReg() || !MO.getReg()) continue; LLT Ty = MRI.getType(MO.getReg()); @@ -527,25 +529,48 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // for the greedy mode the cost of the cross bank copy will // offset this number. // FIXME: Should be derived from the scheduling model. - if (OpRegBankIdx[0] >= PMI_FirstFPR) + if (OpRegBankIdx[0] != PMI_FirstGPR) Cost = 2; + else + // Check if that load feeds fp instructions. + // In that case, we want the default mapping to be on FPR + // instead of blind map every scalar to GPR. + for (const MachineInstr &UseMI : + MRI.use_instructions(MI.getOperand(0).getReg())) + // If we have at least one direct use in a FP instruction, + // assume this was a floating point load in the IR. + // If it was not, we would have had a bitcast before + // reaching that instruction. + if (isPreISelGenericFloatingPointOpcode(UseMI.getOpcode())) { + OpRegBankIdx[0] = PMI_FirstFPR; + break; + } break; + case TargetOpcode::G_STORE: + // Check if that store is fed by fp instructions. + if (OpRegBankIdx[0] == PMI_FirstGPR) { + unsigned VReg = MI.getOperand(0).getReg(); + if (!VReg) + break; + MachineInstr *DefMI = MRI.getVRegDef(VReg); + if (isPreISelGenericFloatingPointOpcode(DefMI->getOpcode())) + OpRegBankIdx[0] = PMI_FirstFPR; + break; + } } // Finally construct the computed mapping. - RegisterBankInfo::InstructionMapping Mapping = - InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands}; SmallVector OpdsMapping(NumOperands); for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { - if (MI.getOperand(Idx).isReg()) { + if (MI.getOperand(Idx).isReg() && MI.getOperand(Idx).getReg()) { auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]); if (!Mapping->isValid()) - return InstructionMapping(); + return getInvalidInstructionMapping(); OpdsMapping[Idx] = Mapping; } } - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + return getInstructionMapping(DefaultMappingID, Cost, + getOperandsMapping(OpdsMapping), NumOperands); } diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h index 0a795a42c0b1a1824ebc4f397e143f22893e9ef6..6d74a47095a974c14c097cf1398ae7c4d5ab10b7 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.h +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h @@ -98,8 +98,8 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo { /// /// \return An InstructionMappings with a statically allocated /// OperandsMapping. - static InstructionMapping - getSameKindOfOperandsMapping(const MachineInstr &MI); + const InstructionMapping & + getSameKindOfOperandsMapping(const MachineInstr &MI) const; public: AArch64RegisterBankInfo(const TargetRegisterInfo &TRI); @@ -113,7 +113,8 @@ public: InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override; - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; }; } // End llvm namespace. #endif diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td index eec089087fe0ad6c5d1ca32153418e3f4b085f98..44fd94fc3d48518a427b3e1e5231d18023e87d03 100644 --- a/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/lib/Target/AArch64/AArch64SchedFalkor.td @@ -61,56 +61,42 @@ let SchedModel = FalkorModel in { let SchedModel = FalkorModel in { -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes - { let Latency = 1; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes { let Latency = 1; } -def : WriteRes - { let Latency = 8; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 16; let NumMicroOps = 2; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 3; let NumMicroOps = 3; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 5; } -def : WriteRes - { let Latency = 4; let NumMicroOps = 3; } -def : WriteRes - { let Latency = 3; let NumMicroOps = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes - { let Latency = 6; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } - -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Latency = 3; } - -def : WriteRes { let Unsupported = 1; } - -// No forwarding logic is modelled yet. +// These WriteRes entries are not used in the Falkor sched model. +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } + +// These ReadAdvance entries are not used in the Falkor sched model. def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td index 6bce4ef6b652bfa13f05f357755b25ae614e51ca..3d737402022d800d0034633b7a37da92452c955d 100644 --- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -12,7 +12,555 @@ // //===----------------------------------------------------------------------===// -include "AArch64SchedFalkorWriteRes.td" +// Contains all of the Falkor specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: FalkorWr +// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) +// Latency: #cyc +// +// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued +// down one Z pipe, six SD pipes, four VX pipes and the total latency is +// six cycles. +// +// Contains all of the Falkor specific ReadAdvance types for forwarding logic. +// +// Contains all of the Falkor specific WriteVariant types for immediate zero +// and LSLFast. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define 0 micro-op types +def FalkorWr_none_1cyc : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; +} +def FalkorWr_none_3cyc : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} +def FalkorWr_none_4cyc : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; +} + +//===----------------------------------------------------------------------===// +// Define 1 micro-op types + +def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } +def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } +def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } +def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } +def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } +def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } +def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; } +def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } +def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } +def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } +def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } +def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } + +def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; } +def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } +def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } +def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } +def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } +def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } + +def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } +def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } +def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } + +def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; } +def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } +def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } +def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Define 2 micro-op types + +def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 0; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 1; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} +def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 12; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 2; +} + +def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2, 8]; +} + +def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [2, 11]; +} + +def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define 3 micro-op types + +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +//===----------------------------------------------------------------------===// +// Define 4 micro-op types + +def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 14; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 20; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 21; + let NumMicroOps = 4; +} + +def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 24; + let NumMicroOps = 4; +} + +def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 4; +} + +//===----------------------------------------------------------------------===// +// Define 5 micro-op types + +def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 7; + let NumMicroOps = 5; +} +def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +//===----------------------------------------------------------------------===// +// Define 6 micro-op types + +def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +//===----------------------------------------------------------------------===// +// Define 8 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 8; +} + +def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 8; +} + +//===----------------------------------------------------------------------===// +// Define 9 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitLD, + FalkorUnitLD, FalkorUnitXYZ, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitXYZ, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +//===----------------------------------------------------------------------===// +// Define 10 micro-op types + +def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 10; +} + +//===----------------------------------------------------------------------===// +// Define 12 micro-op types + +def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 12; +} + +// Forwarding logic is modeled for multiply add/accumulate. +// ----------------------------------------------------------------------------- +def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; +def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; +def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>; +def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; +def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; + +// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast +// ----------------------------------------------------------------------------- +def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>; +def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || + + MI->getOperand(1).getReg() == AArch64::XZR}]>; +def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>; + +def FalkorWr_FMOV : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_MOVZ : SchedWriteVariant<[ + SchedVar, + SchedVar]>; // imm fwd + + +def FalkorWr_ADDSUBsx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRSro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_ORRi : SchedWriteVariant<[ + SchedVar, // imm fwd + SchedVar]>; + +def FalkorWr_PRFMro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRVro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRQro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; //===----------------------------------------------------------------------===// // Specialize the coarse model by associating instruction groups with the @@ -22,63 +570,81 @@ include "AArch64SchedFalkorWriteRes.td" // Miscellaneous // ----------------------------------------------------------------------------- -def : InstRW<[WriteI], (instrs COPY)>; +// FIXME: This could be better modeled by looking at the regclasses of the operands. +def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>; // SIMD Floating-point Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>; -def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FMULX16, FMULX32)>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FMULX32)>; -def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>; -def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FMULX64)>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^(FMUL|FMULX)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FMULX64)>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>; -def : InstRW<[FalkorWr_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>; -def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instregex "^(FMUL|FMULX)v2i64_indexed$")>; -def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>; +def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>; -def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>; +def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>; +def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>; +def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>; -def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; -def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v1i64_indexed$")>; -def : InstRW<[FalkorWr_2VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>; -def : InstRW<[FalkorWr_2VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v2i64_indexed$")>; // SIMD Integer Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>; @@ -91,12 +657,14 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$" def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>; @@ -109,6 +677,8 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN) def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>; @@ -119,10 +689,14 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64) def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQRDML(A|S)?H(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQDMULL(i16|i32)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>; @@ -153,7 +727,7 @@ def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; @@ -164,14 +738,18 @@ def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^SQDMULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>; @@ -185,129 +763,159 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>; def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)(i16|i32)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)v.*$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)v[248].*$")>; + // SIMD Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; -def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instrs LD2i64)>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>; - -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>; - -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>; -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>; -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>; - -def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instrs LD2i64)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instrs LD2i64_POST)>; + +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD3i64_POST)>; +def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD4i64_POST)>; + +def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instrs LD3Threev2d_POST)>; +def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instrs LD4Fourv2d_POST)>; +def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "^LD3Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1none_4cyc], + (instregex "^LD3Threev(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2none_4cyc], + (instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD3Threev(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc], + (instregex "^LD3Threev(16b|8h|4s)_POST$")>; + +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc], + (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; // Arithmetic and Logical Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>; -def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>; // SIMD Miscellaneous Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>; def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>; -def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; -def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FRECPS64, FRSQRTS64)>; -def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc], + (instregex "^INSv(i32|i64)(gpr|lane)$")>; def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; +def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>; def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>; def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>; -def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instrs FRECPSv4f32, FRSQRTSv4f32)>; -def : InstRW<[FalkorWr_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instrs FRECPSv2f64, FRSQRTSv2f64)>; def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>; def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>; @@ -320,46 +928,95 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; // SIMD Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; - -def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>; - -def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>; -def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>; - -def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>; - -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>; - -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>; - -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; - -def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>; -def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STR(Q|D|S|H|B)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^STR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^STPQi$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^STPQ(post|pre)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STP(D|S)(i)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STUR(Q|D|S|B|H)i$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instrs STNPDi, STNPSi)>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instrs STNPQi)>; + +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; + +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST3(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST4(i8|i16|i32|i64)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST3(i8|i16|i32|i64)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST4(i8|i16|i32|i64)_POST$")>; + +def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc], + (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc], + (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>; + +def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instrs ST3Threev2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc], + (instrs ST3Threev2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc], + (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc], + (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>; + +def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instrs ST4Fourv2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc], + (instrs ST4Fourv2d_POST)>; + +def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc], + (instregex "^ST3Three(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc], + (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; + +def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc], + (instregex "^ST4Four(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc], + (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; // Branch Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>; +def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>; def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>; +def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>; def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>; def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>; def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>; @@ -376,85 +1033,105 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>; // FP Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; -def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; -def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>; -def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], + (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(Q|D|S|H|B)i$")>; +def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], + (instrs LDNPQi)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], + (instrs LDPQi)>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], + (instregex "LDNP(D|S)i$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], + (instregex "LDP(D|S)i$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], + (instregex "LDP(D|S)(pre|post)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDPQ(pre|post)$")>; // FP Data Processing Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>; -def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^F(N)?MULSrr$")>; + +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^F(N)?MULDrr$")>; -def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>; +def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>; +def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>; +def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], + (instregex "^F(N)?M(ADD|SUB)Srrr$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], + (instregex "^F(N)?M(ADD|SUB)Drrr$")>; -def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>; -def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>; // FP Miscellaneous Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hi|Hr|S0|Si|Sr|D0|Di|Dr|v.*_ns)$")>; +def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>; +def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>; +def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd +// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr +def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>; - -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; // Load Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>; - -def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>; -def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>; -def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDNP(W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDP(W|X)i$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], + (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>; def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>; - +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(BB|HH|W|X)i$")>; +def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc], + (instrs LDPSWi)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc], + (instregex "^LDPSW(post|pre)$")>; def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc], + (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>; def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; -def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>; -def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>; - -def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; - -def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; -def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>; -def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>; // Miscellaneous Data-Processing Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; @@ -464,23 +1141,35 @@ def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>; // Divide and Multiply Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1X_4cyc], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; -def : InstRW<[FalkorWr_1X_4cyc], (instregex "^M(ADD|SUB)Wrrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; +def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], + (instregex "^M(ADD|SUB)Wrrr$")>; -def : InstRW<[FalkorWr_1X_5cyc], (instregex "^(S|U)MULHrr$")>; -def : InstRW<[FalkorWr_1X_5cyc], (instregex "^M(ADD|SUB)Xrrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^M(ADD|SUB)Xrrr$")>; -def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; -def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>; +def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; +def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)(MLAL|MLSL|MULL)v.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(S|U)MULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^(S|U)(MLAL|MLSL)v.*$")>; // Move and Shift Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>; -def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>; -def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd +def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation) +def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>], + (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>; +def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>], + (instrs LOADgot)>; // Other Instructions // ----------------------------------------------------------------------------- @@ -490,34 +1179,31 @@ def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>; def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>; def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>; +def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>; def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; -def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>; -def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs STNPWi, STNPXi)>; def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; -def : InstRW<[WriteST], (instregex "^LDC.*$")>; -def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>; -def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>; // Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>; -def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>; -def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>; -def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>; -def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>; - -def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STP(W|X)i$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc], + (instregex "^STP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc], + (instregex "^STR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_STRro], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STUR(BB|HH|W|X)i$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>; diff --git a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td deleted file mode 100644 index 9cdb4be4246bc8c3daa0cc08a79fb7ac6f4be949..0000000000000000000000000000000000000000 --- a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td +++ /dev/null @@ -1,361 +0,0 @@ -//=- AArch64SchedFalkorWrRes.td - Falkor Write Res ---*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Contains all of the Falkor specific SchedWriteRes types. The approach -// below is to define a generic SchedWriteRes for every combination of -// latency and microOps. The naming conventions is to use a prefix, one field -// for latency, and one or more microOp count/type designators. -// Prefix: FalkorWr -// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) -// Latency: #cyc -// -// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued -// down one Z pipe, six SD pipes, four VX pipes and the total latency is -// six cycles. -// -// Contains all of the Falkor specific ReadAdvance types for forwarding logic. -// -// Contains all of the Falkor specific WriteVariant types for immediate zero -// and LSLFast. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Define 1 micro-op types - - -def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } -def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } -def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } -def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } -def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } -def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } -def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } -def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } -def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } -def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } -def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } -def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } - -def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } -def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } -def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } -def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } -def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } -def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } - -def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } -def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } -def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } - -def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } -def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } -def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } - -//===----------------------------------------------------------------------===// -// Define 2 micro-op types - -def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 1; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 10; - let NumMicroOps = 2; -} - -def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { - let Latency = 0; - let NumMicroOps = 2; -} - -def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { - let Latency = 8; - let ResourceCycles = [2, 8]; -} - -def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { - let Latency = 16; - let ResourceCycles = [2, 16]; -} - -def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { - let Latency = 0; - let NumMicroOps = 2; -} - -//===----------------------------------------------------------------------===// -// Define 3 micro-op types - -def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 3; -} -def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 3; -} -def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 3; -} -def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 3; -} -def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} -def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitZ]> { - let Latency = 3; - let NumMicroOps = 3; -} - -//===----------------------------------------------------------------------===// -// Define 4 micro-op types - -def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, - FalkorUnitVX, FalkorUnitVY]> { - let Latency = 2; - let NumMicroOps = 4; -} - -def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 4; -} - -def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 4; -} - -def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -//===----------------------------------------------------------------------===// -// Define 5 micro-op types - -def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 5; -} -def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 5; -} -def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY]> { - let Latency = 7; - let NumMicroOps = 5; -} - -//===----------------------------------------------------------------------===// -// Define 6 micro-op types - -def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 6; -} - -//===----------------------------------------------------------------------===// -// Define 8 micro-op types - -def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 8; -} - -//===----------------------------------------------------------------------===// -// Define 9 micro-op types - -def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, - FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitLD, - FalkorUnitLD, FalkorUnitXYZ, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 9; -} - -def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, - FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitXYZ, - FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 9; -} - -// Forwarding logic is modeled for vector multiply and accumulate -// ----------------------------------------------------------------------------- -def FalkorReadVMA : SchedReadAdvance<2, [FalkorWr_1VXVY_4cyc, - FalkorWr_2VXVY_4cyc]>; -def FalkorReadFMA : SchedReadAdvance<3, [FalkorWr_1VXVY_5cyc, - FalkorWr_1VXVY_6cyc, - FalkorWr_2VXVY_5cyc, - FalkorWr_2VXVY_6cyc]>; - -// SchedPredicates and WriteVariants for Immediate Zero and LSLFast -// ----------------------------------------------------------------------------- -def FalkorImmZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>; -def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>; - -def FalkorWr_FMOV : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_MOVZ : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_LDR : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_ADD : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar]>; - -def FalkorWr_PRFM : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_LDRS : SchedWriteVariant<[ - SchedVar, - SchedVar]>; diff --git a/lib/Target/AArch64/AArch64SchedM1.td b/lib/Target/AArch64/AArch64SchedM1.td index 3fbbc0be682d739c4cf60a633565519fd5d8d1c0..3b71cf8399a0df4e2362869bf400b5ecca8fd170 100644 --- a/lib/Target/AArch64/AArch64SchedM1.td +++ b/lib/Target/AArch64/AArch64SchedM1.td @@ -23,7 +23,7 @@ def ExynosM1Model : SchedMachineModel { let LoopMicroOpBufferSize = 24; // Based on the instruction queue size. let LoadLatency = 4; // Optimistic load cases. let MispredictPenalty = 14; // Minimum branch misprediction penalty. - let CompleteModel = 0; // Use the default model otherwise. + let CompleteModel = 1; // Use the default model otherwise. } //===----------------------------------------------------------------------===// @@ -72,14 +72,14 @@ def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; } def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; } -def M1WriteLA : SchedWriteVariant<[SchedVar, SchedVar]>; def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; } def M1WriteS2 : SchedWriteRes<[M1UnitS]> { let Latency = 2; } def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } -def M1WriteSA : SchedWriteVariant<[SchedVar, SchedVar]>; @@ -125,13 +125,13 @@ def : WriteRes { let Latency = 0; } // Load instructions. def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 4; } -def : SchedAlias; +def : SchedAlias; // Store instructions. def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } -def : SchedAlias; +def : SchedAlias; // FP data instructions. def : WriteRes { let Latency = 3; } @@ -231,6 +231,111 @@ def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } def M1WriteTB : SchedWriteRes<[M1UnitC, M1UnitALU]> { let Latency = 2; } +def M1WriteVLDA : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 6; } +def M1WriteVLDB : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 7; } +def M1WriteVLDC : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 8; } +def M1WriteVLDD : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDE : SchedWriteRes<[M1UnitL, + M1UnitNALU]> { let Latency = 6; } +def M1WriteVLDF : SchedWriteRes<[M1UnitL, + M1UnitL]> { let Latency = 10; + let ResourceCycles = [5]; } +def M1WriteVLDG : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDH : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU]> { let Latency = 6; } +def M1WriteVLDI : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 12; + let ResourceCycles = [6]; } +def M1WriteVLDJ : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let ResourceCycles = [4]; } +def M1WriteVLDK : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 9; + let ResourceCycles = [4]; } +def M1WriteVLDL : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDM : SchedWriteRes<[M1UnitL, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU, + M1UnitNALU]> { let Latency = 7; + let ResourceCycles = [2]; } +def M1WriteVLDN : SchedWriteRes<[M1UnitL, + M1UnitL, + M1UnitL, + M1UnitL]> { let Latency = 14; + let ResourceCycles = [7]; } + +def M1WriteVSTA : WriteSequence<[WriteVST], 2>; +def M1WriteVSTB : WriteSequence<[WriteVST], 3>; +def M1WriteVSTC : WriteSequence<[WriteVST], 4>; +def M1WriteVSTD : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 7; + let ResourceCycles = [7]; } +def M1WriteVSTE : SchedWriteRes<[M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST]> { let Latency = 8; + let ResourceCycles = [8]; } +def M1WriteVSTF : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 15; + let ResourceCycles = [15]; } +def M1WriteVSTG : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 16; + let ResourceCycles = [16]; } +def M1WriteVSTH : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 14; + let ResourceCycles = [14]; } +def M1WriteVSTI : SchedWriteRes<[M1UnitNALU, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitS, + M1UnitFST, + M1UnitFST, + M1UnitFST]> { let Latency = 17; + let ResourceCycles = [17]; } // Branch instructions def : InstRW<[M1WriteB1], (instrs Bcc)>; @@ -360,8 +465,233 @@ def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64 def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>; // ASIMD load instructions. +def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[M1WriteVLDD, + WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>; +def : InstRW<[M1WriteVLDE, + WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteL5, + WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>; +def : InstRW<[M1WriteVLDG, + WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>; +def : InstRW<[M1WriteVLDH, + WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVLDA, + WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>; +def : InstRW<[M1WriteVLDF, + WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>; +def : InstRW<[M1WriteVLDJ, + WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>; +def : InstRW<[M1WriteVLDL, + WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>; +def : InstRW<[M1WriteVLDB, + WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>; +def : InstRW<[M1WriteVLDI, + WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>; +def : InstRW<[M1WriteVLDK, + WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>; +def : InstRW<[M1WriteVLDM, + WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>; +def : InstRW<[M1WriteVLDC, + WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[M1WriteVLDN, + WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; // ASIMD store instructions. +def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, + WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTA, + WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTB, + WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[M1WriteVSTC, + WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTD, + WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>; +def : InstRW<[M1WriteVSTE, + WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>; +def : InstRW<[M1WriteVSTG, + WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>; +def : InstRW<[M1WriteVSTH, + WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[M1WriteVSTF, + WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[M1WriteVSTI, + WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; // Cryptography instructions. def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index b3aba4781db8954aa33b845cfedf7bb1e30e0b6b..a9a9d5ce842973fab1248a59d20239775219a340 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -12,8 +12,22 @@ //===----------------------------------------------------------------------===// #include "AArch64Subtarget.h" + +#include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64PBQPRegAlloc.h" +#include "AArch64TargetMachine.h" + +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "AArch64CallLowering.h" +#include "AArch64LegalizerInfo.h" +#include "AArch64RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" +#include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#endif #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/TargetRegistry.h" @@ -35,6 +49,11 @@ static cl::opt UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " "an address is ignored"), cl::init(false), cl::Hidden); +static cl::opt + UseNonLazyBind("aarch64-enable-nonlazybind", + cl::desc("Call nonlazybind functions via direct GOT load"), + cl::init(false), cl::Hidden); + AArch64Subtarget & AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, StringRef CPUString) { @@ -71,7 +90,12 @@ void AArch64Subtarget::initializeProperties() { break; case Falkor: MaxInterleaveFactor = 4; - VectorInsertExtractBaseCost = 2; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; + CacheLineSize = 128; + PrefetchDistance = 820; + MinPrefetchStride = 2048; + MaxPrefetchIterationsAhead = 8; break; case Kryo: MaxInterleaveFactor = 4; @@ -80,6 +104,8 @@ void AArch64Subtarget::initializeProperties() { PrefetchDistance = 740; MinPrefetchStride = 1024; MaxPrefetchIterationsAhead = 11; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; break; case ThunderX2T99: CacheLineSize = 64; @@ -89,6 +115,8 @@ void AArch64Subtarget::initializeProperties() { PrefetchDistance = 128; MinPrefetchStride = 1024; MaxPrefetchIterationsAhead = 4; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; break; case ThunderX: case ThunderXT88: @@ -97,6 +125,8 @@ void AArch64Subtarget::initializeProperties() { CacheLineSize = 128; PrefFunctionAlignment = 3; PrefLoopAlignment = 2; + // FIXME: remove this to enable 64-bit SLP if performance looks good. + MinVectorRegisterBitWidth = 128; break; case CortexA35: break; case CortexA53: break; @@ -106,13 +136,62 @@ void AArch64Subtarget::initializeProperties() { } } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct AArch64GISelActualAccessor : public GISelAccessor { + std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + + const CallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), - TLInfo(TM, *this), GISel() {} + TLInfo(TM, *this), GISel() { +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *AArch64GISel = new GISelAccessor(); +#else + AArch64GISelActualAccessor *AArch64GISel = new AArch64GISelActualAccessor(); + AArch64GISel->CallLoweringInfo.reset( + new AArch64CallLowering(*getTargetLowering())); + AArch64GISel->Legalizer.reset(new AArch64LegalizerInfo()); + + auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); + + // FIXME: At this point, we can't rely on Subtarget having RBI. + // It's awkward to mix passing RBI and the Subtarget; should we pass + // TII/TRI as well? + AArch64GISel->InstSelector.reset(createAArch64InstructionSelector( + *static_cast(&TM), *this, *RBI)); + + AArch64GISel->RegBankInfo.reset(RBI); +#endif + setGISelAccessor(*AArch64GISel); +} const CallLowering *AArch64Subtarget::getCallLowering() const { assert(GISel && "Access to GlobalISel APIs not set"); @@ -155,6 +234,23 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, return AArch64II::MO_NO_FLAG; } +unsigned char AArch64Subtarget::classifyGlobalFunctionReference( + const GlobalValue *GV, const TargetMachine &TM) const { + // MachO large model always goes via a GOT, because we don't have the + // relocations available to do anything else.. + if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && + !GV->hasInternalLinkage()) + return AArch64II::MO_GOT; + + // NonLazyBind goes via GOT unless we know it's available locally. + auto *F = dyn_cast(GV); + if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && + !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return AArch64II::MO_GOT; + + return AArch64II::MO_NO_FLAG; +} + /// This function returns the name of a function which has an interface /// like the non-standard bzero function, if such a function exists on /// the current subtarget and it is considered prefereable over diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 40ad9185012cbfb114835470addd290dc271efaf..7933e58c49eed755619e37800a62596e19ebc4e3 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -83,6 +83,9 @@ protected: // NegativeImmediates - transform instructions with negative immediates bool NegativeImmediates = true; + // Enable 64-bit vectorization in SLP. + unsigned MinVectorRegisterBitWidth = 64; + bool UseAA = false; bool PredictableSelectIsExpensive = false; bool BalanceFPOps = false; @@ -106,6 +109,7 @@ protected: unsigned PrefFunctionAlignment = 0; unsigned PrefLoopAlignment = 0; unsigned MaxJumpTableSize = 0; + unsigned WideningBaseCost = 0; // ReserveX18 - X18 is not available as a general purpose register. bool ReserveX18; @@ -188,6 +192,10 @@ public: bool isXRaySupported() const override { return true; } + unsigned getMinVectorRegisterBitWidth() const { + return MinVectorRegisterBitWidth; + } + bool isX18Reserved() const { return ReserveX18; } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } @@ -226,6 +234,8 @@ public: unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } + unsigned getWideningBaseCost() const { return WideningBaseCost; } + /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; @@ -271,6 +281,9 @@ public: unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const; + unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, + const TargetMachine &TM) const; + /// This function returns the name of a function which has an interface /// like the non-standard bzero function, if such a function exists on /// the current subtarget and it is considered prefereable over diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index dcc51bf023299b64231c44ea1a3a4aecf65d6762..d4a8cecdb29f1654ce9beda207659b6956c1bee8 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -10,25 +10,20 @@ // //===----------------------------------------------------------------------===// +#include "AArch64TargetMachine.h" #include "AArch64.h" -#include "AArch64CallLowering.h" -#include "AArch64LegalizerInfo.h" #include "AArch64MacroFusion.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "AArch64RegisterBankInfo.h" -#endif #include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" @@ -114,11 +109,6 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden, cl::desc("Work around Cortex-A53 erratum 835769"), cl::init(false)); -static cl::opt - EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden, - cl::desc("Enable the type promotion pass"), - cl::init(false)); - static cl::opt EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), @@ -152,7 +142,6 @@ extern "C" void LLVMInitializeAArch64Target() { initializeGlobalISel(*PR); initializeAArch64A53Fix835769Pass(*PR); initializeAArch64A57FPLoadBalancingPass(*PR); - initializeAArch64AddressTypePromotionPass(*PR); initializeAArch64AdvSIMDScalarPass(*PR); initializeAArch64CollectLOHPass(*PR); initializeAArch64ConditionalComparesPass(*PR); @@ -222,35 +211,6 @@ AArch64TargetMachine::AArch64TargetMachine( AArch64TargetMachine::~AArch64TargetMachine() = default; -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct AArch64GISelActualAccessor : public GISelAccessor { - std::unique_ptr CallLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - const AArch64Subtarget * AArch64TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -271,26 +231,6 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const { resetTargetOptions(F); I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle); -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - AArch64GISelActualAccessor *GISel = - new AArch64GISelActualAccessor(); - GISel->CallLoweringInfo.reset( - new AArch64CallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new AArch64LegalizerInfo()); - - auto *RBI = new AArch64RegisterBankInfo(*I->getRegisterInfo()); - - // FIXME: At this point, we can't rely on Subtarget having RBI. - // It's awkward to mix passing RBI and the Subtarget; should we pass - // TII/TRI as well? - GISel->InstSelector.reset( - createAArch64InstructionSelector(*this, *I, *RBI)); - - GISel->RegBankInfo.reset(RBI); -#endif - I->setGISelAccessor(*GISel); } return I.get(); } @@ -316,9 +256,9 @@ namespace { /// AArch64 Code Generator Pass Configuration Options. class AArch64PassConfig : public TargetPassConfig { public: - AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) + AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM.getOptLevel() != CodeGenOpt::None) substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); } @@ -338,7 +278,7 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); - if (ST.hasFuseLiterals()) { + if (ST.hasFuseAES() || ST.hasFuseLiterals()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); @@ -356,6 +296,7 @@ public: bool addIRTranslator() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; + void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; #endif bool addILPOpts() override; @@ -376,13 +317,13 @@ TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { } TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { - return new AArch64PassConfig(this, PM); + return new AArch64PassConfig(*this, PM); } void AArch64PassConfig::addIRPasses() { // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg // ourselves. - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in @@ -401,7 +342,7 @@ void AArch64PassConfig::addIRPasses() { // Match interleaved memory accesses to ldN/stN intrinsics. if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createInterleavedAccessPass(TM)); + addPass(createInterleavedAccessPass()); if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices @@ -434,9 +375,6 @@ bool AArch64PassConfig::addPreISel() { addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize)); } - if (TM->getOptLevel() != CodeGenOpt::None && EnableAddressTypePromotion) - addPass(createAArch64AddressTypePromotionPass()); - return false; } @@ -468,6 +406,12 @@ bool AArch64PassConfig::addRegBankSelect() { return false; } +void AArch64PassConfig::addPreGlobalInstructionSelect() { + // Workaround the deficiency of the fast register allocator. + if (TM->getOptLevel() == CodeGenOpt::None) + addPass(new Localizer()); +} + bool AArch64PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 8875f9b7264754003bcc79fbd7f158089771ef3b..4bc2c060a0684ec2d9b5f263bc7edd1b9d565e7e 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AArch64TargetObjectFile.h" #include "AArch64TargetMachine.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; @@ -70,3 +70,11 @@ const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel( const MCExpr *PC = MCSymbolRefExpr::create(PCSym, getContext()); return MCBinaryExpr::createSub(Res, PC, getContext()); } + +void AArch64_MachoTargetObjectFile::getNameWithPrefix( + SmallVectorImpl &OutName, const GlobalValue *GV, + const TargetMachine &TM) const { + // AArch64 does not use section-relative relocations so any global symbol must + // be accessed via at least a linker-private symbol. + getMangler().getNameWithPrefix(OutName, GV, /* CannotUsePrivateLabel */ true); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 05e1dfa9e6c9bd250517ba7c7a91624e8e3a040a..47e3bce43f6eb3c244602d893822e9697e44c29b 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -40,6 +40,9 @@ public: const MCValue &MV, int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const override; + + void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, + const TargetMachine &TM) const override; }; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 4d59da0c646d26b41751f0eaa51cca0f86cf53a1..a4328682b93c37dce9e1ed7f4cf93b174e6db917 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -9,8 +9,8 @@ #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" @@ -176,11 +176,95 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { return TTI::PSK_Software; } +bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode, + ArrayRef Args) { + + // A helper that returns a vector type from the given type. The number of + // elements in type Ty determine the vector width. + auto toVectorTy = [&](Type *ArgTy) { + return VectorType::get(ArgTy->getScalarType(), + DstTy->getVectorNumElements()); + }; + + // Exit early if DstTy is not a vector type whose elements are at least + // 16-bits wide. + if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16) + return false; + + // Determine if the operation has a widening variant. We consider both the + // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the + // instructions. + // + // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we + // verify that their extending operands are eliminated during code + // generation. + switch (Opcode) { + case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2). + case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2). + break; + default: + return false; + } + + // To be a widening instruction (either the "wide" or "long" versions), the + // second operand must be a sign- or zero extend having a single user. We + // only consider extends having a single user because they may otherwise not + // be eliminated. + if (Args.size() != 2 || + (!isa(Args[1]) && !isa(Args[1])) || + !Args[1]->hasOneUse()) + return false; + auto *Extend = cast(Args[1]); + + // Legalize the destination type and ensure it can be used in a widening + // operation. + auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy); + unsigned DstElTySize = DstTyL.second.getScalarSizeInBits(); + if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits()) + return false; + + // Legalize the source type and ensure it can be used in a widening + // operation. + Type *SrcTy = toVectorTy(Extend->getSrcTy()); + auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy); + unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits(); + if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits()) + return false; + + // Get the total number of vector elements in the legalized types. + unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements(); + unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements(); + + // Return true if the legalized types have the same number of vector elements + // and the destination element type size is twice that of the source type. + return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize; +} + int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // If the cast is observable, and it is used by a widening instruction (e.g., + // uaddl, saddw, etc.), it may be free. + if (I && I->hasOneUse()) { + auto *SingleUser = cast(*I->user_begin()); + SmallVector Operands(SingleUser->operand_values()); + if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) { + // If the cast is the second operand, it is free. We will generate either + // a "wide" or "long" version of the widening instruction. + if (I == SingleUser->getOperand(1)) + return 0; + // If the cast is not the second operand, it will be free if it looks the + // same as the second operand. In this case, we will generate a "long" + // version of the widening instruction. + if (auto *Cast = dyn_cast(SingleUser->getOperand(1))) + if (I->getOpcode() == Cast->getOpcode() && + cast(I)->getSrcTy() == Cast->getSrcTy()) + return 0; + } + } + EVT SrcTy = TLI->getValueType(DL, Src); EVT DstTy = TLI->getValueType(DL, Dst); @@ -379,6 +463,16 @@ int AArch64TTIImpl::getArithmeticInstrCost( // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.), + // add in the widening overhead specified by the sub-target. Since the + // extends feeding widening instructions are performed automatically, they + // aren't present in the generated code and have a zero cost. By adding a + // widening overhead here, we attach the total cost of the combined operation + // to the widening instruction. + int Cost = 0; + if (isWideningInstruction(Ty, Opcode, Args)) + Cost += ST->getWideningBaseCost(); + int ISD = TLI->InstructionOpcodeToISD(Opcode); if (ISD == ISD::SDIV && @@ -388,9 +482,9 @@ int AArch64TTIImpl::getArithmeticInstrCost( // normally expanded to the sequence ADD + CMP + SELECT + SRA. // The OperandValue properties many not be same as that of previous // operation; conservatively assume OP_None. - int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, - TargetTransformInfo::OP_None, - TargetTransformInfo::OP_None); + Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, + TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None); Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); @@ -405,8 +499,8 @@ int AArch64TTIImpl::getArithmeticInstrCost( switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, - Opd1PropInfo, Opd2PropInfo); + return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: case ISD::XOR: @@ -414,7 +508,7 @@ int AArch64TTIImpl::getArithmeticInstrCost( case ISD::AND: // These nodes are marked as 'custom' for combining purposes only. // We know that they are legal. See LowerAdd in ISelLowering. - return 1 * LT.first; + return (Cost + 1) * LT.first; } } @@ -675,3 +769,28 @@ unsigned AArch64TTIImpl::getMinPrefetchStride() { unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() { return ST->getMaxPrefetchIterationsAhead(); } + +bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + assert(isa(Ty) && "Expected Ty to be a vector type"); + unsigned ScalarBits = Ty->getScalarSizeInBits(); + switch (Opcode) { + case Instruction::FAdd: + case Instruction::FMul: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Mul: + return false; + case Instruction::Add: + return ScalarBits * Ty->getVectorNumElements() >= 128; + case Instruction::ICmp: + return (ScalarBits < 64) && + (ScalarBits * Ty->getVectorNumElements() >= 128); + case Instruction::FCmp: + return Flags.NoNaN; + default: + llvm_unreachable("Unhandled reduction opcode"); + } + return false; +} diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h index e37c003e064c56f548c22790f1dfa51d0c183f70..290a1ca1f24b47b30e8d48f4eaad15fb76056392 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -43,6 +43,9 @@ class AArch64TTIImpl : public BasicTTIImplBase { VECTOR_LDST_FOUR_ELEMENTS }; + bool isWideningInstruction(Type *Ty, unsigned Opcode, + ArrayRef Args); + public: explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -75,7 +78,7 @@ public: return 31; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; @@ -84,6 +87,10 @@ public: return 64; } + unsigned getMinVectorRegisterBitWidth() { + return ST->getMinVectorRegisterBitWidth(); + } + unsigned getMaxInterleaveFactor(unsigned VF); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, @@ -134,6 +141,13 @@ public: unsigned getMinPrefetchStride(); unsigned getMaxPrefetchIterationsAhead(); + + bool shouldExpandReduction(const IntrinsicInst *II) const { + return false; + } + + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const; /// @} }; diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index cbab68979c56783aa6d55957c4a09430dcd9ad14..e841fb89451911d8e8ac05a5de7417304cdcbb5d 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -2100,27 +2100,9 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { bool isNegative = parseOptionalToken(AsmToken::Minus); const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble(), Tok.getString()); - if (isNegative) - RealVal.changeSign(); - - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); - Parser.Lex(); // Eat the token. - // Check for out of range values. As an exception, we let Zero through, - // as we handle that special case in post-processing before matching in - // order to use the zero register for it. - if (Val == -1 && !RealVal.isPosZero()) { - TokError("expected compatible register or floating-point constant"); - return MatchOperand_ParseFail; - } - Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); - return MatchOperand_Success; - } - if (Tok.is(AsmToken::Integer)) { + if (Tok.is(AsmToken::Real) || Tok.is(AsmToken::Integer)) { int64_t Val; - if (!isNegative && Tok.getString().startswith("0x")) { + if (Tok.is(AsmToken::Integer) && !isNegative && Tok.getString().startswith("0x")) { Val = Tok.getIntVal(); if (Val > 255 || Val < 0) { TokError("encoded floating point value out of range"); @@ -2128,10 +2110,24 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { } } else { APFloat RealVal(APFloat::IEEEdouble(), Tok.getString()); + if (isNegative) + RealVal.changeSign(); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); + + // Check for out of range values. As an exception we let Zero through, + // but as tokens instead of an FPImm so that it can be matched by the + // appropriate alias if one exists. + if (RealVal.isPosZero()) { + Parser.Lex(); // Eat the token. + Operands.push_back(AArch64Operand::CreateToken("#0", false, S, getContext())); + Operands.push_back(AArch64Operand::CreateToken(".0", false, S, getContext())); + return MatchOperand_Success; + } else if (Val == -1) { + TokError("expected compatible register or floating-point constant"); + return MatchOperand_ParseFail; + } } Parser.Lex(); // Eat the token. Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); @@ -2477,16 +2473,14 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { return MatchOperand_ParseFail; } - auto DB = AArch64DB::lookupDBByName(Tok.getString()); - if (!DB) { - TokError("invalid barrier option name"); - return MatchOperand_ParseFail; - } - // The only valid named option for ISB is 'sy' - if (Mnemonic == "isb" && DB->Encoding != AArch64DB::sy) { + auto DB = AArch64DB::lookupDBByName(Tok.getString()); + if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) { TokError("'sy' or #imm operand expected"); return MatchOperand_ParseFail; + } else if (!DB) { + TokError("invalid barrier option name"); + return MatchOperand_ParseFail; } Operands.push_back(AArch64Operand::CreateBarrier( @@ -3655,21 +3649,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } } - // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. - if (NumOperands == 3 && Tok == "fmov") { - AArch64Operand &RegOp = static_cast(*Operands[1]); - AArch64Operand &ImmOp = static_cast(*Operands[2]); - if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) { - unsigned zreg = - !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains( - RegOp.getReg()) - ? AArch64::WZR - : AArch64::XZR; - Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(), - Op.getEndLoc(), getContext()); - } - } - MCInst Inst; // First try to match against the secondary set of tables containing the // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2"). @@ -3925,10 +3904,14 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) { return false; } +static SMLoc incrementLoc(SMLoc L, int Offset) { + return SMLoc::getFromPointer(L.getPointer() + Offset); +} + /// parseDirectiveCPU /// ::= .cpu id bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { - SMLoc CPULoc = getLoc(); + SMLoc CurLoc = getLoc(); StringRef CPU, ExtensionString; std::tie(CPU, ExtensionString) = @@ -3944,15 +3927,19 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { // FIXME This is using tablegen data, but should be moved to ARMTargetParser // once that is tablegen'ed if (!getSTI().isCPUStringValid(CPU)) { - Error(CPULoc, "unknown CPU name"); + Error(CurLoc, "unknown CPU name"); return false; } MCSubtargetInfo &STI = copySTI(); STI.setDefaultFeatures(CPU, ""); + CurLoc = incrementLoc(CurLoc, CPU.size()); FeatureBitset Features = STI.getFeatureBits(); for (auto Name : RequestedExtensions) { + // Advance source location past '+'. + CurLoc = incrementLoc(CurLoc, 1); + bool EnableFeature = true; if (Name.startswith_lower("no")) { @@ -3960,6 +3947,7 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { Name = Name.substr(2); } + bool FoundExtension = false; for (const auto &Extension : ExtensionMap) { if (Extension.Name != Name) continue; @@ -3973,9 +3961,15 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { uint64_t Features = ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); setAvailableFeatures(Features); + FoundExtension = true; break; } + + if (!FoundExtension) + Error(CurLoc, "unsupported architectural extension"); + + CurLoc = incrementLoc(CurLoc, Name.size()); } return false; } diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index 6d0930c358f1d60c2781aa5dae7c366804e5515b..f0f50f29be0f3ef83bc8cbf3d9b1cd1c727fa78f 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -39,7 +39,6 @@ endif() add_llvm_target(AArch64CodeGen AArch64A57FPLoadBalancing.cpp - AArch64AddressTypePromotion.cpp AArch64AdvSIMDScalarPass.cpp AArch64AsmPrinter.cpp AArch64CleanupLocalDynamicTLSPass.cpp diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 41ae70f85e584119f118022b3f8aa40642fc119e..fc89657bffd31e018a1d574ff0615c65528c5c96 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" @@ -275,6 +276,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, } } + if (Opcode == AArch64::CompilerBarrier) { + O << '\t' << MAI.getCommentString() << " COMPILER BARRIER"; + printAnnotation(O, Annot); + return; + } + if (!printAliasInstr(MI, STI, O)) printInstruction(MI, STI, O); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index ebf05ae303ddda4e2fa5d9b980085d5757158afb..43a6fa9ce08962df22539e90c6da79f3d3a8a7f0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -11,8 +11,9 @@ #include "AArch64RegisterInfo.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "llvm/ADT/Triple.h" -#include "llvm/MC/MCAssembler.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -22,7 +23,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index c954c0eb2c6bd35c011d997e98c5fdcda4b777b9..f7dda92fb5514ade91f6e2fda3cffcc33cfa6489 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -15,11 +15,11 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include @@ -69,34 +69,34 @@ static bool isNonILP32reloc(const MCFixup &Fixup, return true; case AArch64MCExpr::VK_ABS_G2_S: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_ABS_G2_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_ABS_G1_S: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_ABS_G1_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_DTPREL_G2: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_DTPREL_G1_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_TPREL_G2: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_TPREL_G1_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_GOTTPREL_G1: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1)); - return ELF::R_AARCH64_NONE; + return true; case AArch64MCExpr::VK_GOTTPREL_G0_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); - return ELF::R_AARCH64_NONE; + return true; default: return false; } return false; @@ -141,6 +141,16 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, case AArch64::fixup_aarch64_pcrel_adrp_imm21: if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC) return R_CLS(ADR_PREL_PG_HI21); + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) { + if (IsILP32) { + Ctx.reportError(Fixup.getLoc(), + "invalid fixup for 32-bit pcrel ADRP instruction " + "VK_ABS VK_NC"); + return ELF::R_AARCH64_NONE; + } else { + return ELF::R_AARCH64_ADR_PREL_PG_HI21_NC; + } + } if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC) return R_CLS(ADR_GOT_PAGE); if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC) @@ -179,7 +189,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(ABS32); case FK_Data_8: if (IsILP32) { - Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(ABS64)); + Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte absolute data " + "relocation not supported (LP64 eqv: ABS64)"); return ELF::R_AARCH64_NONE; } else return ELF::R_AARCH64_ABS64; @@ -197,7 +208,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, if (RefKind == AArch64MCExpr::VK_TPREL_LO12) return R_CLS(TLSLE_ADD_TPREL_LO12); if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12) - return R_CLS(TLSDESC_ADD_LO12_NC); + return R_CLS(TLSDESC_ADD_LO12); if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return R_CLS(ADD_ABS_LO12_NC); @@ -245,15 +256,67 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(TLSLE_LDST32_TPREL_LO12); if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return R_CLS(TLSLE_LDST32_TPREL_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) { + if (IsILP32) { + return ELF::R_AARCH64_P32_LD32_GOT_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), + "LP64 4 byte unchecked GOT load/store relocation " + "not supported (ILP32 eqv: LD32_GOT_LO12_NC"); + return ELF::R_AARCH64_NONE; + } + } + if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC) { + if (IsILP32) { + Ctx.reportError(Fixup.getLoc(), + "ILP32 4 byte checked GOT load/store relocation " + "not supported (unchecked eqv: LD32_GOT_LO12_NC)"); + } else { + Ctx.reportError(Fixup.getLoc(), + "LP64 4 byte checked GOT load/store relocation " + "not supported (unchecked/ILP32 eqv: " + "LD32_GOT_LO12_NC)"); + } + return ELF::R_AARCH64_NONE; + } + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) { + if (IsILP32) { + return ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), "LP64 32-bit load/store " + "relocation not supported (ILP32 eqv: " + "TLSIE_LD32_GOTTPREL_LO12_NC)"); + return ELF::R_AARCH64_NONE; + } + } + if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC) { + if (IsILP32) { + return ELF::R_AARCH64_P32_TLSDESC_LD32_LO12; + } else { + Ctx.reportError(Fixup.getLoc(), + "LP64 4 byte TLSDESC load/store relocation " + "not supported (ILP32 eqv: TLSDESC_LD64_LO12)"); + return ELF::R_AARCH64_NONE; + } + } Ctx.reportError(Fixup.getLoc(), - "invalid fixup for 32-bit load/store instruction"); + "invalid fixup for 32-bit load/store instruction " + "fixup_aarch64_ldst_imm12_scale4"); return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_ldst_imm12_scale8: if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return R_CLS(LDST64_ABS_LO12_NC); - if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) - return R_CLS(LD64_GOT_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) { + if (!IsILP32) { + return ELF::R_AARCH64_LD64_GOT_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " + "relocation not supported (LP64 eqv: " + "LD64_GOT_LO12_NC)"); + return ELF::R_AARCH64_NONE; + } + } if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) return R_CLS(TLSLD_LDST64_DTPREL_LO12); if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) @@ -262,19 +325,40 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, return R_CLS(TLSLE_LDST64_TPREL_LO12); if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) return R_CLS(TLSLE_LDST64_TPREL_LO12_NC); - if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) - return IsILP32 ? ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC - : ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC) - return IsILP32 ? ELF::R_AARCH64_P32_TLSDESC_LD32_LO12_NC - : ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; - + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) { + if (!IsILP32) { + return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; + } else { + Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " + "relocation not supported (LP64 eqv: " + "TLSIE_LD64_GOTTPREL_LO12_NC)"); + return ELF::R_AARCH64_NONE; + } + } + if (SymLoc == AArch64MCExpr::VK_TLSDESC) { + if (!IsILP32) { + return ELF::R_AARCH64_TLSDESC_LD64_LO12; + } else { + Ctx.reportError(Fixup.getLoc(), "ILP32 64-bit load/store " + "relocation not supported (LP64 eqv: " + "TLSDESC_LD64_LO12)"); + return ELF::R_AARCH64_NONE; + } + } Ctx.reportError(Fixup.getLoc(), "invalid fixup for 64-bit load/store instruction"); return ELF::R_AARCH64_NONE; case AArch64::fixup_aarch64_ldst_imm12_scale16: if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) return R_CLS(LDST128_ABS_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) + return R_CLS(TLSLD_LDST128_DTPREL_LO12); + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) + return R_CLS(TLSLD_LDST128_DTPREL_LO12_NC); + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) + return R_CLS(TLSLE_LDST128_TPREL_LO12); + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) + return R_CLS(TLSLE_LDST128_TPREL_LO12_NC); Ctx.reportError(Fixup.getLoc(), "invalid fixup for 128-bit load/store instruction"); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 5903e1e36d453fdec4828e0c708540bb395707d4..031aa8b81e35be3bf0411dd1418a98d61b3cd18e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" @@ -30,7 +31,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" @@ -102,8 +102,8 @@ public: /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - void EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) override { + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override { EmitA64MappingSymbol(); MCELFStreamer::EmitInstruction(Inst, STI); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 8fc82232959586e8bcb0c351b30e62b0ed1314b8..1b28df963b40d681ac4f73fee798b4fe1bf2bf65 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -32,14 +32,15 @@ static cl::opt AsmWriterVariant( clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"))); AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { - // We prefer NEON instructions to be printed in the short form. - AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant; + // We prefer NEON instructions to be printed in the short, Apple-specific + // form when targeting Darwin. + AssemblerDialect = AsmWriterVariant == Default ? Apple : AsmWriterVariant; PrivateGlobalPrefix = "L"; PrivateLabelPrefix = "L"; SeparatorString = "%%"; CommentString = ";"; - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; AlignmentIsInBytes = false; UsesELFSectionDirectiveForBSS = true; @@ -68,10 +69,11 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { if (T.getArch() == Triple::aarch64_be) IsLittleEndian = false; - // We prefer NEON instructions to be printed in the short form. - AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant; + // We prefer NEON instructions to be printed in the generic form when + // targeting ELF. + AssemblerDialect = AsmWriterVariant == Default ? Generic : AsmWriterVariant; - PointerSize = 8; + CodePointerSize = 8; // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 62dfa59483ebc09adca87edc743379786ec2f479..33698d2b8c38f0d4d8bdc5ca3bfc43c96b47bffc 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -565,6 +565,9 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup)); return; + } else if (MI.getOpcode() == AArch64::CompilerBarrier) { + // This just prevents the compiler from reordering accesses, no actual code. + return; } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index a540f49866a9e0826de08591ecec9e8f1162e304..97c92fa0778dbd441967db1ba3267e18b962eddc 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -62,6 +62,7 @@ StringRef AArch64MCExpr::getVariantKindName() const { case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:"; case VK_TLSDESC_LO12: return ":tlsdesc_lo12:"; case VK_ABS_PAGE: return ""; + case VK_ABS_PAGE_NC: return ":pg_hi21_nc:"; case VK_GOT_PAGE: return ":got:"; case VK_GOT_LO12: return ":got_lo12:"; case VK_GOTTPREL_PAGE: return ":gottprel:"; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index db36a65564ce8db4800d96c423e95424bd2dd547..3dbf0f84a6653142860cc0f06b472136470569c5 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -62,6 +62,7 @@ public: // since a user would write ":lo12:"). VK_CALL = VK_ABS, VK_ABS_PAGE = VK_ABS | VK_PAGE, + VK_ABS_PAGE_NC = VK_ABS | VK_PAGE | VK_NC, VK_ABS_G3 = VK_ABS | VK_G3, VK_ABS_G2 = VK_ABS | VK_G2, VK_ABS_G2_S = VK_SABS | VK_G2, @@ -95,7 +96,7 @@ public: VK_TPREL_HI12 = VK_TPREL | VK_HI12, VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF, VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC, - VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC, + VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF, VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE, VK_INVALID = 0xfff diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 3d296ba4806b90157d48e664ef51523b8d8459c4..19b2576f68951ec93f54393a18e230a5ab3c445b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -10,6 +10,7 @@ #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -23,7 +24,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/MathExtras.h" #include #include diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 6725fb37cab029945fd151d881081c747a9f3695..55d18c3f3646b3113503241c6c90cbce9494015a 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -27,12 +27,12 @@ class PassRegistry; class Module; // R600 Passes -FunctionPass *createR600VectorRegMerger(TargetMachine &tm); -FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600VectorRegMerger(); +FunctionPass *createR600ExpandSpecialInstrsPass(); FunctionPass *createR600EmitClauseMarkers(); -FunctionPass *createR600ClauseMergePass(TargetMachine &tm); -FunctionPass *createR600Packetizer(TargetMachine &tm); -FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); +FunctionPass *createR600ClauseMergePass(); +FunctionPass *createR600Packetizer(); +FunctionPass *createR600ControlFlowFinalizer(); FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes @@ -42,16 +42,22 @@ FunctionPass *createSIFoldOperandsPass(); FunctionPass *createSIPeepholeSDWAPass(); FunctionPass *createSILowerI1CopiesPass(); FunctionPass *createSIShrinkInstructionsPass(); -FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm); +FunctionPass *createSILoadStoreOptimizerPass(); FunctionPass *createSIWholeQuadModePass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIFixSGPRCopiesPass(); FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); FunctionPass *createSIInsertWaitcntsPass(); -FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr); +FunctionPass *createAMDGPUCodeGenPreparePass(); +FunctionPass *createAMDGPUMachineCFGStructurizerPass(); -ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr); +void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); +extern char &AMDGPUMachineCFGStructurizerID; + +void initializeAMDGPUAlwaysInlinePass(PassRegistry&); + +ModulePass *createAMDGPUAnnotateKernelFeaturesPass(); void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); extern char &AMDGPUAnnotateKernelFeaturesID; @@ -93,7 +99,7 @@ void initializeSIOptimizeExecMaskingPass(PassRegistry &); extern char &SIOptimizeExecMaskingID; // Passes common to R600 and SI -FunctionPass *createAMDGPUPromoteAlloca(const TargetMachine *TM = nullptr); +FunctionPass *createAMDGPUPromoteAlloca(); void initializeAMDGPUPromoteAllocaPass(PassRegistry&); extern char &AMDGPUPromoteAllocaID; diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 2c7a2d8962d02067b2e4f0fef40cc711083d743b..6ab2b9ef0459859a5e18401ff9de4f2922aaa07c 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -61,6 +61,24 @@ def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", "Support flat address space" >; +def FeatureFlatInstOffsets : SubtargetFeature<"flat-inst-offsets", + "FlatInstOffsets", + "true", + "Flat instructions have immediate offset addressing mode" +>; + +def FeatureFlatGlobalInsts : SubtargetFeature<"flat-global-insts", + "FlatGlobalInsts", + "true", + "Have global_* flat memory instructions" +>; + +def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts", + "FlatScratchInsts", + "true", + "Have scratch_* flat memory instructions" +>; + def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", "UnalignedBufferAccess", "true", @@ -347,6 +365,13 @@ def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "Force to generate flat instruction for global" >; +def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < + "auto-waitcnt-before-barrier", + "AutoWaitcntBeforeBarrier", + "true", + "Hardware automatically inserts waitcnt before barrier" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -406,7 +431,9 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, - FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode + FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, + FeatureFastFMAF32, FeatureSDWA, FeatureDPP, + FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts ] >; @@ -420,6 +447,16 @@ class SubtargetFeatureISAVersion ; +def FeatureISAVersion6_0_0 : SubtargetFeatureISAVersion <6,0,0, + [FeatureSouthernIslands, + FeatureFastFMAF32, + HalfRate64Ops, + FeatureLDSBankCount32]>; + +def FeatureISAVersion6_0_1 : SubtargetFeatureISAVersion <6,0,1, + [FeatureSouthernIslands, + FeatureLDSBankCount32]>; + def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0, [FeatureSeaIslands, FeatureLDSBankCount32]>; @@ -434,6 +471,10 @@ def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2, [FeatureSeaIslands, FeatureLDSBankCount16]>; +def FeatureISAVersion7_0_3 : SubtargetFeatureISAVersion <7,0,3, + [FeatureSeaIslands, + FeatureLDSBankCount16]>; + def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0, [FeatureVolcanicIslands, FeatureLDSBankCount32, @@ -462,8 +503,23 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, FeatureLDSBankCount16, FeatureXNACK]>; -def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>; -def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>; +def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; + +def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, + [FeatureGFX9, + FeatureLDSBankCount32]>; + +def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3, + [FeatureGFX9, + FeatureLDSBankCount32, + FeatureXNACK]>; //===----------------------------------------------------------------------===// // Debugger related subtarget features. @@ -514,10 +570,12 @@ def AMDGPUAsmVariants { int VOP3_ID = 1; string SDWA = "SDWA"; int SDWA_ID = 2; + string SDWA9 = "SDWA9"; + int SDWA9_ID = 3; string DPP = "DPP"; - int DPP_ID = 3; + int DPP_ID = 4; string Disable = "Disable"; - int Disable_ID = 4; + int Disable_ID = 5; } def DefaultAMDGPUAsmParserVariant : AsmParserVariant { @@ -535,6 +593,12 @@ def SDWAAsmParserVariant : AsmParserVariant { let Name = AMDGPUAsmVariants.SDWA; } +def SDWA9AsmParserVariant : AsmParserVariant { + let Variant = AMDGPUAsmVariants.SDWA9_ID; + let Name = AMDGPUAsmVariants.SDWA9; +} + + def DPPAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.DPP_ID; let Name = AMDGPUAsmVariants.DPP; @@ -547,6 +611,7 @@ def AMDGPU : Target { let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, VOP3AsmParserVariant, SDWAAsmParserVariant, + SDWA9AsmParserVariant, DPPAsmParserVariant]; let AssemblyWriters = [AMDGPUAsmWriter]; } @@ -587,7 +652,10 @@ def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<"FeatureVOP3P">; def HasSDWA : Predicate<"Subtarget->hasSDWA()">, - AssemblerPredicate<"FeatureSDWA">; + AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">; + +def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, + AssemblerPredicate<"FeatureSDWA,FeatureGFX9">; def HasDPP : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"FeatureDPP">; diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index f1fde96eeb69c6b4fec58176b8b959fe33dda5b5..faa424eb0a64a9cbe04f2b07f218b112f60d8f1b 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -10,15 +10,15 @@ /// This is the AMGPU address space based alias analysis pass. //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUAliasAnalysis.h" +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -137,9 +137,9 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc, not dereference that pointer argument, even though it may read or write the memory that the pointer points to if accessed through other pointers. */ - if (F->getAttributes().hasAttribute(ArgNo + 1, Attribute::NoAlias) && - (F->getAttributes().hasAttribute(ArgNo + 1, Attribute::ReadNone) || - F->getAttributes().hasAttribute(ArgNo + 1, Attribute::ReadOnly))) { + if (F->hasParamAttribute(ArgNo, Attribute::NoAlias) && + (F->hasParamAttribute(ArgNo, Attribute::ReadNone) || + F->hasParamAttribute(ArgNo, Attribute::ReadOnly))) { return true; } } diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 1d03714874e284277aea2b137a4d4ac3d2dfd49c..6f3742ed039bd9310564d7b57817f754c5e7c6bf 100644 --- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -9,7 +9,7 @@ // /// \file /// This pass marks all internal functions as always_inline and creates -/// duplicates of all other functions a marks the duplicates as always_inline. +/// duplicates of all other functions and marks the duplicates as always_inline. // //===----------------------------------------------------------------------===// @@ -22,18 +22,22 @@ using namespace llvm; namespace { class AMDGPUAlwaysInline : public ModulePass { - static char ID; - bool GlobalOpt; public: - AMDGPUAlwaysInline(bool GlobalOpt) : ModulePass(ID), GlobalOpt(GlobalOpt) { } + static char ID; + + AMDGPUAlwaysInline(bool GlobalOpt = false) : + ModulePass(ID), GlobalOpt(GlobalOpt) { } bool runOnModule(Module &M) override; StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; } }; } // End anonymous namespace +INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", + "AMDGPU Inline All Functions", false, false) + char AMDGPUAlwaysInline::ID = 0; bool AMDGPUAlwaysInline::runOnModule(Module &M) { diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 3d8db7cd8af55a75fe1493cc2a2885fb29ca760c..7235d8fae332701a7c76dfb4ad512ec2bfe94b22 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -15,6 +15,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -27,7 +28,6 @@ namespace { class AMDGPUAnnotateKernelFeatures : public ModulePass { private: - const TargetMachine *TM; AMDGPUAS AS; static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); @@ -37,8 +37,7 @@ private: public: static char ID; - AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) : - ModulePass(ID), TM(TM_) {} + AMDGPUAnnotateKernelFeatures() : ModulePass(ID) {} bool runOnModule(Module &M) override; StringRef getPassName() const override { return "AMDGPU Annotate Kernel Features"; @@ -221,8 +220,10 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { if (F.hasFnAttribute("amdgpu-queue-ptr")) continue; - bool HasApertureRegs = - TM && TM->getSubtarget(F).hasApertureRegs(); + auto *TPC = getAnalysisIfAvailable(); + bool HasApertureRegs = TPC && TPC->getTM() + .getSubtarget(F) + .hasApertureRegs(); if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) F.addFnAttr("amdgpu-queue-ptr"); } @@ -231,6 +232,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { return Changed; } -ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) { - return new AMDGPUAnnotateKernelFeatures(TM); +ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() { + return new AMDGPUAnnotateKernelFeatures(); } diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 91b3649f5c39da89dd579ea9a0a706a912b93b54..6f002860044c007ed747a5c46af67fe1d2998baf 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -19,8 +19,8 @@ #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -107,7 +107,7 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { DFS(Start, Checklist); for (auto &BB : Checklist) { - BasicBlock::iterator StartIt = (BB == Load->getParent()) ? + BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? BasicBlock::iterator(Load) : BB->end(); if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, StartIt, BB, Load).isClobber()) diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0446655830d1f6a6060d5eb5c55192f2ed887bf5..83ad1a5c6ee3065e2f41a2b4123c4816d0ec23c8 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,25 +17,25 @@ // #include "AMDGPUAsmPrinter.h" -#include "AMDGPUTargetMachine.h" -#include "MCTargetDesc/AMDGPUTargetStreamer.h" -#include "InstPrinter/AMDGPUInstPrinter.h" -#include "Utils/AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "InstPrinter/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" #include "SIDefines.h" -#include "SIMachineFunctionInfo.h" #include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -144,12 +144,14 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( } void AMDGPUAsmPrinter::EmitFunctionBodyStart() { + const AMDGPUMachineFunction *MFI = MF->getInfo(); + if (!MFI->isEntryFunction()) + return; + const AMDGPUSubtarget &STM = MF->getSubtarget(); - SIProgramInfo KernelInfo; amd_kernel_code_t KernelCode; if (STM.isAmdCodeObjectV2(*MF)) { - getSIProgramInfo(KernelInfo, *MF); - getAmdKernelCode(KernelCode, KernelInfo, *MF); + getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); getTargetStreamer().EmitAMDKernelCodeT(KernelCode); @@ -183,10 +185,31 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { AsmPrinter::EmitGlobalVariable(GV); } +bool AMDGPUAsmPrinter::doFinalization(Module &M) { + CallGraphResourceInfo.clear(); + return AsmPrinter::doFinalization(M); +} + +// Print comments that apply to both callable functions and entry points. +void AMDGPUAsmPrinter::emitCommonFunctionComments( + uint32_t NumVGPR, + uint32_t NumSGPR, + uint32_t ScratchSize, + uint64_t CodeSize) { + OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); + OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false); + OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false); + OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false); +} + bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + CurrentProgramInfo = SIProgramInfo(); + + const AMDGPUMachineFunction *MFI = MF.getInfo(); // The starting address of all shader programs must be 256 bytes aligned. - MF.setAlignment(8); + // Regular functions just need the basic required instruction alignment. + MF.setAlignment(MFI->isEntryFunction() ? 8 : 2); SetupMachineFunction(MF); @@ -198,11 +221,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->SwitchSection(ConfigSection); } - SIProgramInfo KernelInfo; if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - getSIProgramInfo(KernelInfo, MF); + if (MFI->isEntryFunction()) { + getSIProgramInfo(CurrentProgramInfo, MF); + } else { + auto I = CallGraphResourceInfo.insert( + std::make_pair(MF.getFunction(), SIFunctionResourceInfo())); + SIFunctionResourceInfo &Info = I.first->second; + assert(I.second && "should only be called once per function"); + Info = analyzeResourceUsage(MF); + } + if (!STM.isAmdHsaOS()) { - EmitProgramInfoSI(MF, KernelInfo); + EmitProgramInfoSI(MF, CurrentProgramInfo); } } else { EmitProgramInfoR600(MF); @@ -220,63 +251,87 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer->SwitchSection(CommentSection); if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + if (!MFI->isEntryFunction()) { + OutStreamer->emitRawComment(" Function info:", false); + SIFunctionResourceInfo &Info = CallGraphResourceInfo[MF.getFunction()]; + emitCommonFunctionComments( + Info.NumVGPR, + Info.getTotalNumSGPRs(MF.getSubtarget()), + Info.PrivateSegmentSize, + getFunctionCodeSize(MF)); + return false; + } + OutStreamer->emitRawComment(" Kernel info:", false); - OutStreamer->emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen), - false); - OutStreamer->emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), - false); - OutStreamer->emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), - false); - OutStreamer->emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), - false); - OutStreamer->emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), - false); - OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize), - false); - OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) + - " bytes/workgroup (compile time only)", false); - - OutStreamer->emitRawComment(" SGPRBlocks: " + - Twine(KernelInfo.SGPRBlocks), false); - OutStreamer->emitRawComment(" VGPRBlocks: " + - Twine(KernelInfo.VGPRBlocks), false); - - OutStreamer->emitRawComment(" NumSGPRsForWavesPerEU: " + - Twine(KernelInfo.NumSGPRsForWavesPerEU), false); - OutStreamer->emitRawComment(" NumVGPRsForWavesPerEU: " + - Twine(KernelInfo.NumVGPRsForWavesPerEU), false); - - OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst), - false); - OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount), - false); + emitCommonFunctionComments(CurrentProgramInfo.NumVGPR, + CurrentProgramInfo.NumSGPR, + CurrentProgramInfo.ScratchSize, + getFunctionCodeSize(MF)); + + OutStreamer->emitRawComment(" codeLenInByte = " + + Twine(getFunctionCodeSize(MF)), false); + OutStreamer->emitRawComment( + " NumSgprs: " + Twine(CurrentProgramInfo.NumSGPR), false); + OutStreamer->emitRawComment( + " NumVgprs: " + Twine(CurrentProgramInfo.NumVGPR), false); + + OutStreamer->emitRawComment( + " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false); + OutStreamer->emitRawComment( + " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false); + OutStreamer->emitRawComment( + " ScratchSize: " + Twine(CurrentProgramInfo.ScratchSize), false); + OutStreamer->emitRawComment( + " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) + + " bytes/workgroup (compile time only)", false); + + OutStreamer->emitRawComment( + " SGPRBlocks: " + Twine(CurrentProgramInfo.SGPRBlocks), false); + OutStreamer->emitRawComment( + " VGPRBlocks: " + Twine(CurrentProgramInfo.VGPRBlocks), false); + + OutStreamer->emitRawComment( + " NumSGPRsForWavesPerEU: " + + Twine(CurrentProgramInfo.NumSGPRsForWavesPerEU), false); + OutStreamer->emitRawComment( + " NumVGPRsForWavesPerEU: " + + Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false); + + OutStreamer->emitRawComment( + " ReservedVGPRFirst: " + Twine(CurrentProgramInfo.ReservedVGPRFirst), + false); + OutStreamer->emitRawComment( + " ReservedVGPRCount: " + Twine(CurrentProgramInfo.ReservedVGPRCount), + false); if (MF.getSubtarget().debuggerEmitPrologue()) { - OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + - Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); - OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" + - Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false); + OutStreamer->emitRawComment( + " DebuggerWavefrontPrivateSegmentOffsetSGPR: s" + + Twine(CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false); + OutStreamer->emitRawComment( + " DebuggerPrivateSegmentBufferSGPR: s" + + Twine(CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR), false); } - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " + - Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " + - Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " + - Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " + - Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " + - Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)), - false); - OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " + - Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)), - false); - + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:USER_SGPR: " + + Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " + + Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TGID_X_EN: " + + Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TGID_Y_EN: " + + Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TGID_Z_EN: " + + Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false); + OutStreamer->emitRawComment( + " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " + + Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)), + false); } else { R600MachineFunctionInfo *MFI = MF.getInfo(); OutStreamer->emitRawComment( @@ -317,7 +372,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { const MachineOperand &MO = MI.getOperand(op_idx); if (!MO.isReg()) continue; - unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; + unsigned HWReg = RI->getHWRegIndex(MO.getReg()); // Register with value > 127 aren't GPR if (HWReg > 127) @@ -360,18 +415,12 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) { } } -void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, - const MachineFunction &MF) const { +uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const { const SISubtarget &STM = MF.getSubtarget(); - const SIMachineFunctionInfo *MFI = MF.getInfo(); - uint64_t CodeSize = 0; - unsigned MaxSGPR = 0; - unsigned MaxVGPR = 0; - bool VCCUsed = false; - bool FlatUsed = false; - const SIRegisterInfo *RI = STM.getRegisterInfo(); const SIInstrInfo *TII = STM.getInstrInfo(); + uint64_t CodeSize = 0; + for (const MachineBasicBlock &MBB : MF) { for (const MachineInstr &MI : MBB) { // TODO: CodeSize should account for multiple functions. @@ -380,159 +429,161 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, if (MI.isDebugValue()) continue; - if (isVerbose()) - CodeSize += TII->getInstSizeInBytes(MI); - - unsigned numOperands = MI.getNumOperands(); - for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { - const MachineOperand &MO = MI.getOperand(op_idx); - unsigned width = 0; - bool isSGPR = false; - - if (!MO.isReg()) - continue; - - unsigned reg = MO.getReg(); - switch (reg) { - case AMDGPU::EXEC: - case AMDGPU::EXEC_LO: - case AMDGPU::EXEC_HI: - case AMDGPU::SCC: - case AMDGPU::M0: - case AMDGPU::SRC_SHARED_BASE: - case AMDGPU::SRC_SHARED_LIMIT: - case AMDGPU::SRC_PRIVATE_BASE: - case AMDGPU::SRC_PRIVATE_LIMIT: - continue; - - case AMDGPU::VCC: - case AMDGPU::VCC_LO: - case AMDGPU::VCC_HI: - VCCUsed = true; - continue; + CodeSize += TII->getInstSizeInBytes(MI); + } + } - case AMDGPU::FLAT_SCR: - case AMDGPU::FLAT_SCR_LO: - case AMDGPU::FLAT_SCR_HI: - // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat - // instructions aren't used to access the scratch buffer. - if (MFI->hasFlatScratchInit()) - FlatUsed = true; - continue; + return CodeSize; +} - case AMDGPU::TBA: - case AMDGPU::TBA_LO: - case AMDGPU::TBA_HI: - case AMDGPU::TMA: - case AMDGPU::TMA_LO: - case AMDGPU::TMA_HI: - llvm_unreachable("trap handler registers should not be used"); - - default: - break; - } - - if (AMDGPU::SReg_32RegClass.contains(reg)) { - assert(!AMDGPU::TTMP_32RegClass.contains(reg) && - "trap handler registers should not be used"); - isSGPR = true; - width = 1; - } else if (AMDGPU::VGPR_32RegClass.contains(reg)) { - isSGPR = false; - width = 1; - } else if (AMDGPU::SReg_64RegClass.contains(reg)) { - assert(!AMDGPU::TTMP_64RegClass.contains(reg) && - "trap handler registers should not be used"); - isSGPR = true; - width = 2; - } else if (AMDGPU::VReg_64RegClass.contains(reg)) { - isSGPR = false; - width = 2; - } else if (AMDGPU::VReg_96RegClass.contains(reg)) { - isSGPR = false; - width = 3; - } else if (AMDGPU::SReg_128RegClass.contains(reg)) { - isSGPR = true; - width = 4; - } else if (AMDGPU::VReg_128RegClass.contains(reg)) { - isSGPR = false; - width = 4; - } else if (AMDGPU::SReg_256RegClass.contains(reg)) { - isSGPR = true; - width = 8; - } else if (AMDGPU::VReg_256RegClass.contains(reg)) { - isSGPR = false; - width = 8; - } else if (AMDGPU::SReg_512RegClass.contains(reg)) { - isSGPR = true; - width = 16; - } else if (AMDGPU::VReg_512RegClass.contains(reg)) { - isSGPR = false; - width = 16; - } else { - llvm_unreachable("Unknown register class"); - } - unsigned hwReg = RI->getEncodingValue(reg) & 0xff; - unsigned maxUsed = hwReg + width - 1; - if (isSGPR) { - MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; - } else { - MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; - } - } - } +static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI, + const SIInstrInfo &TII, + unsigned Reg) { + for (const MachineOperand &UseOp : MRI.reg_operands(Reg)) { + if (!UseOp.isImplicit() || !TII.isFLAT(*UseOp.getParent())) + return true; } - unsigned ExtraSGPRs = 0; + return false; +} +static unsigned getNumExtraSGPRs(const SISubtarget &ST, + bool VCCUsed, + bool FlatScrUsed) { + unsigned ExtraSGPRs = 0; if (VCCUsed) ExtraSGPRs = 2; - if (STM.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) { - if (FlatUsed) + if (ST.getGeneration() < SISubtarget::VOLCANIC_ISLANDS) { + if (FlatScrUsed) ExtraSGPRs = 4; } else { - if (STM.isXNACKEnabled()) + if (ST.isXNACKEnabled()) ExtraSGPRs = 4; - if (FlatUsed) + if (FlatScrUsed) ExtraSGPRs = 6; } + return ExtraSGPRs; +} + +int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( + const SISubtarget &ST) const { + return NumExplicitSGPR + getNumExtraSGPRs(ST, UsesVCC, UsesFlatScratch); +} + +AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( + const MachineFunction &MF) const { + SIFunctionResourceInfo Info; + + const SIMachineFunctionInfo *MFI = MF.getInfo(); + const SISubtarget &ST = MF.getSubtarget(); + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + + Info.UsesFlatScratch = MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_LO) || + MRI.isPhysRegUsed(AMDGPU::FLAT_SCR_HI); + + // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat + // instructions aren't used to access the scratch buffer. Inline assembly may + // need it though. + // + // If we only have implicit uses of flat_scr on flat instructions, it is not + // really needed. + if (Info.UsesFlatScratch && !MFI->hasFlatScratchInit() && + (!hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR) && + !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_LO) && + !hasAnyNonFlatUseOfReg(MRI, *TII, AMDGPU::FLAT_SCR_HI))) { + Info.UsesFlatScratch = false; + } + + Info.HasDynamicallySizedStack = FrameInfo.hasVarSizedObjects(); + Info.PrivateSegmentSize = FrameInfo.getStackSize(); + + + Info.UsesVCC = MRI.isPhysRegUsed(AMDGPU::VCC_LO) || + MRI.isPhysRegUsed(AMDGPU::VCC_HI); + + // If there are no calls, MachineRegisterInfo can tell us the used register + // count easily. + + MCPhysReg HighestVGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::VGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestVGPRReg = Reg; + break; + } + } + + MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::SGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestSGPRReg = Reg; + break; + } + } + + // We found the maximum register index. They start at 0, so add one to get the + // number of registers. + Info.NumVGPR = HighestVGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestVGPRReg) + 1; + Info.NumExplicitSGPR = HighestSGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestSGPRReg) + 1; + + return Info; +} + +void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, + const MachineFunction &MF) { + SIFunctionResourceInfo Info = analyzeResourceUsage(MF); + + ProgInfo.NumVGPR = Info.NumVGPR; + ProgInfo.NumSGPR = Info.NumExplicitSGPR; + ProgInfo.ScratchSize = Info.PrivateSegmentSize; + ProgInfo.VCCUsed = Info.UsesVCC; + ProgInfo.FlatUsed = Info.UsesFlatScratch; + ProgInfo.DynamicCallStack = Info.HasDynamicallySizedStack || Info.HasRecursion; + + const SISubtarget &STM = MF.getSubtarget(); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + const SIInstrInfo *TII = STM.getInstrInfo(); + const SIRegisterInfo *RI = &TII->getRegisterInfo(); + + unsigned ExtraSGPRs = getNumExtraSGPRs(STM, + ProgInfo.VCCUsed, + ProgInfo.FlatUsed); unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF); // Check the addressable register limit before we add ExtraSGPRs. if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && !STM.hasSGPRInitBug()) { unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs(); - if (MaxSGPR + 1 > MaxAddressableNumSGPRs) { + if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) { // This can happen due to a compiler bug or when using inline asm. LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "addressable scalar registers", - MaxSGPR + 1, DS_Error, + ProgInfo.NumSGPR, DS_Error, DK_ResourceLimit, MaxAddressableNumSGPRs); Ctx.diagnose(Diag); - MaxSGPR = MaxAddressableNumSGPRs - 1; + ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1; } } // Account for extra SGPRs and VGPRs reserved for debugger use. - MaxSGPR += ExtraSGPRs; - MaxVGPR += ExtraVGPRs; - - // We found the maximum register index. They start at 0, so add one to get the - // number of registers. - ProgInfo.NumSGPR = MaxSGPR + 1; - ProgInfo.NumVGPR = MaxVGPR + 1; + ProgInfo.NumSGPR += ExtraSGPRs; + ProgInfo.NumVGPR += ExtraVGPRs; // Adjust number of registers used to meet default/requested minimum/maximum // number of waves per execution unit request. ProgInfo.NumSGPRsForWavesPerEU = std::max( - ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU())); + std::max(ProgInfo.NumSGPR, 1u), STM.getMinNumSGPRs(MFI->getMaxWavesPerEU())); ProgInfo.NumVGPRsForWavesPerEU = std::max( - ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU())); + std::max(ProgInfo.NumVGPR, 1u), STM.getMinNumVGPRs(MFI->getMaxWavesPerEU())); if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS || STM.hasSGPRInitBug()) { @@ -559,10 +610,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; } - if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) { + if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) { LLVMContext &Ctx = MF.getFunction()->getContext(); DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "user SGPRs", - MFI->NumUserSGPRs, DS_Error); + MFI->getNumUserSGPRs(), DS_Error); Ctx.diagnose(Diag); } @@ -584,7 +635,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1; // Record first reserved VGPR and number of reserved VGPRs. - ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0; + ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? ProgInfo.NumVGPR : 0; ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF); // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and @@ -606,13 +657,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, // Make clamp modifier on NaN input returns 0. ProgInfo.DX10Clamp = STM.enableDX10Clamp(); - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - ProgInfo.ScratchSize = FrameInfo.getStackSize(); - - ProgInfo.FlatUsed = FlatUsed; - ProgInfo.VCCUsed = VCCUsed; - ProgInfo.CodeLen = CodeSize; - unsigned LDSAlignShift; if (STM.getGeneration() < SISubtarget::SEA_ISLANDS) { // LDS is allocated in 64 dword blocks. @@ -623,7 +667,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, } unsigned LDSSpillSize = - MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize(); + MFI->getLDSWaveSpillSize() * MFI->getMaxFlatWorkGroupSize(); ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize; ProgInfo.LDSBlocks = @@ -666,7 +710,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) | S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) | S_00B84C_EXCP_EN_MSB(0) | - S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) | + // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP. + S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) | S_00B84C_EXCP_EN(0); } @@ -674,6 +719,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { switch (CallConv) { default: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1; + case CallingConv::AMDGPU_HS: return R_00B428_SPI_SHADER_PGM_RSRC1_HS; case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS; case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS; case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS; @@ -681,7 +727,7 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { } void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, - const SIProgramInfo &KernelInfo) { + const SIProgramInfo &CurrentProgramInfo) { const SISubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned RsrcReg = getRsrcReg(MF.getFunction()->getCallingConv()); @@ -689,29 +735,29 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { OutStreamer->EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4); - OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc1, 4); + OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc1, 4); OutStreamer->EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); - OutStreamer->EmitIntValue(KernelInfo.ComputePGMRSrc2, 4); + OutStreamer->EmitIntValue(CurrentProgramInfo.ComputePGMRSrc2, 4); OutStreamer->EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4); - OutStreamer->EmitIntValue(S_00B860_WAVESIZE(KernelInfo.ScratchBlocks), 4); + OutStreamer->EmitIntValue(S_00B860_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 = // 0" comment but I don't see a corresponding field in the register spec. } else { OutStreamer->EmitIntValue(RsrcReg, 4); - OutStreamer->EmitIntValue(S_00B028_VGPRS(KernelInfo.VGPRBlocks) | - S_00B028_SGPRS(KernelInfo.SGPRBlocks), 4); + OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | + S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4); if (STM.isVGPRSpillingEnabled(*MF.getFunction())) { OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); - OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(KernelInfo.ScratchBlocks), 4); + OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); } } if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); - OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4); + OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4); OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); @@ -739,7 +785,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { } void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, - const SIProgramInfo &KernelInfo, + const SIProgramInfo &CurrentProgramInfo, const MachineFunction &MF) const { const SIMachineFunctionInfo *MFI = MF.getInfo(); const SISubtarget &STM = MF.getSubtarget(); @@ -747,10 +793,13 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits()); Out.compute_pgm_resource_registers = - KernelInfo.ComputePGMRSrc1 | - (KernelInfo.ComputePGMRSrc2 << 32); + CurrentProgramInfo.ComputePGMRSrc1 | + (CurrentProgramInfo.ComputePGMRSrc2 << 32); Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64; + if (CurrentProgramInfo.DynamicCallStack) + Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK; + AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE, getElementByteSizeValue(STM.getMaxPrivateElementSize())); @@ -802,12 +851,12 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, // FIXME: Should use getKernArgSize Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset()); - Out.wavefront_sgpr_count = KernelInfo.NumSGPR; - Out.workitem_vgpr_count = KernelInfo.NumVGPR; - Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize; - Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize; - Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst; - Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount; + Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR; + Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR; + Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize; + Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize; + Out.reserved_vgpr_first = CurrentProgramInfo.ReservedVGPRFirst; + Out.reserved_vgpr_count = CurrentProgramInfo.ReservedVGPRCount; // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. @@ -816,9 +865,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, if (STM.debuggerEmitPrologue()) { Out.debug_wavefront_private_segment_offset_sgpr = - KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; + CurrentProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; Out.debug_private_segment_buffer_sgpr = - KernelInfo.DebuggerPrivateSegmentBufferSGPR; + CurrentProgramInfo.DebuggerPrivateSegmentBufferSGPR; } } diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 13425c8b2a0f5954617e1abb623aa489139d4e31..0a58ce06704ddd3e7b1edc5e72070745b10cbb15 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -15,8 +15,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H -#include "AMDKernelCodeT.h" #include "AMDGPU.h" +#include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -30,9 +30,26 @@ namespace llvm { class AMDGPUTargetStreamer; class MCOperand; +class SISubtarget; class AMDGPUAsmPrinter final : public AsmPrinter { private: + // Track resource usage for callee functions. + struct SIFunctionResourceInfo { + // Track the number of explicitly used VGPRs. Special registers reserved at + // the end are tracked separately. + int32_t NumVGPR = 0; + int32_t NumExplicitSGPR = 0; + uint32_t PrivateSegmentSize = 0; + bool UsesVCC = false; + bool UsesFlatScratch = false; + bool HasDynamicallySizedStack = false; + bool HasRecursion = false; + + int32_t getTotalNumSGPRs(const SISubtarget &ST) const; + }; + + // Track resource usage for kernels / entry functions. struct SIProgramInfo { // Fields set in PGM_RSRC1 pm4 packet. uint32_t VGPRBlocks = 0; @@ -55,7 +72,7 @@ private: uint32_t NumVGPR = 0; uint32_t NumSGPR = 0; - uint32_t LDSSize; + uint32_t LDSSize = 0; bool FlatUsed = false; // Number of SGPRs that meets number of waves per execution unit request. @@ -83,14 +100,23 @@ private: uint16_t DebuggerPrivateSegmentBufferSGPR = std::numeric_limits::max(); + // Whether there is recursion, dynamic allocas, indirect calls or some other + // reason there may be statically unknown stack usage. + bool DynamicCallStack = false; + // Bonus information for debugging. bool VCCUsed = false; - uint64_t CodeLen = 0; SIProgramInfo() = default; }; - void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const; + SIProgramInfo CurrentProgramInfo; + DenseMap CallGraphResourceInfo; + + uint64_t getFunctionCodeSize(const MachineFunction &MF) const; + SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const; + + void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF); void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; void findNumUsedRegistersSI(const MachineFunction &MF, @@ -101,6 +127,10 @@ private: /// can correctly setup the GPU state. void EmitProgramInfoR600(const MachineFunction &MF); void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo); + void emitCommonFunctionComments(uint32_t NumVGPR, + uint32_t NumSGPR, + uint32_t ScratchSize, + uint64_t CodeSize); public: explicit AMDGPUAsmPrinter(TargetMachine &TM, @@ -112,6 +142,7 @@ public: AMDGPUTargetStreamer& getTargetStreamer() const; + bool doFinalization(Module &M) override; bool runOnMachineFunction(MachineFunction &MF) override; /// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index e67ae092fddae9c5006eae11bcdca356464b22cd..515cc07dd4498f65a552f03530a2f6555466ed16 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -18,8 +18,8 @@ #include "AMDGPUISelLowering.h" #include "AMDGPUSubtarget.h" #include "SIISelLowering.h" -#include "SIRegisterInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index 09bdf8ffcde7b1e1a7ad123d129c5e925be696d6..251cb7a2c440d68eaf613ff2105b8cc3085756a8 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -38,7 +38,8 @@ class AMDGPUCallLowering: public CallLowering { unsigned VReg) const override; bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; + static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); + static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); }; } // End of namespace llvm; #endif diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td index d308f718aae130f3e4fc5815ba9c86ae7212cb25..4bef7a89bfe34be20b6729ea78fe640a64fc49e9 100644 --- a/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -13,6 +13,8 @@ // Inversion of CCIfInReg class CCIfNotInReg : CCIf<"!ArgFlags.isInReg()", A> {} +class CCIfExtend + : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; // Calling convention for SI def CC_SI : CallingConv<[ @@ -52,7 +54,7 @@ def CC_SI : CallingConv<[ ]>>> ]>; -def RetCC_SI : CallingConv<[ +def RetCC_SI_Shader : CallingConv<[ CCIfType<[i32] , CCAssignToReg<[ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, @@ -99,6 +101,52 @@ def CC_AMDGPU_Kernel : CallingConv<[ CCCustom<"allocateKernArg"> ]>; +def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs< + (sequence "VGPR%u", 24, 255) +>; + +def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs< + (sequence "VGPR%u", 32, 255) +>; + +def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs< + (sequence "SGPR%u", 32, 103) +>; + +def CSR_AMDGPU_HighRegs : CalleeSavedRegs< + (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103) +>; + +// Calling convention for leaf functions +def CC_AMDGPU_Func : CallingConv<[ + CCIfByVal>, + CCIfType<[i1], CCPromoteToType>, + CCIfType<[i1, i8, i16], CCIfExtend>>, + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[ + VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, + VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, + VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, + VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, + CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>, + CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>, + CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>, + CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>, + CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>> +]>; + +// Calling convention for leaf functions +def RetCC_AMDGPU_Func : CallingConv<[ + CCIfType<[i1], CCPromoteToType>, + CCIfType<[i1, i16], CCIfExtend>>, + CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[ + VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, + VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, + VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, + VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, + CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">> +]>; + def CC_AMDGPU : CallingConv<[ CCIf<"static_cast" "(State.getMachineFunction().getSubtarget()).getGeneration() >=" diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index e19314fe0a6c83f01843f565274f4759021fcc63..b312dbc8d14d65b32b9b651bfe73fec2ef6677c4 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -19,18 +19,19 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -48,7 +49,6 @@ namespace { class AMDGPUCodeGenPrepare : public FunctionPass, public InstVisitor { - const GCNTargetMachine *TM; const SISubtarget *ST = nullptr; DivergenceAnalysis *DA = nullptr; Module *Mod = nullptr; @@ -127,8 +127,7 @@ class AMDGPUCodeGenPrepare : public FunctionPass, public: static char ID; - AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) : - FunctionPass(ID), TM(static_cast(TM)) {} + AMDGPUCodeGenPrepare() : FunctionPass(ID) {} bool visitFDiv(BinaryOperator &I); @@ -487,10 +486,15 @@ bool AMDGPUCodeGenPrepare::doInitialization(Module &M) { } bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { - if (!TM || skipFunction(F)) + if (skipFunction(F)) + return false; + + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) return false; - ST = &TM->getSubtarget(F); + const TargetMachine &TM = TPC->getTM(); + ST = &TM.getSubtarget(F); DA = &getAnalysis(); HasUnsafeFPMath = hasUnsafeFPMath(F); @@ -507,14 +511,14 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) { return MadeChange; } -INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) -INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, - "AMDGPU IR optimizations", false, false) +INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations", + false, false) char AMDGPUCodeGenPrepare::ID = 0; -FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) { - return new AMDGPUCodeGenPrepare(TM); +FunctionPass *llvm::createAMDGPUCodeGenPreparePass() { + return new AMDGPUCodeGenPrepare(); } diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index fc3ea67fad01a2d5edf07a3b3cc54bddc54fde05..f235313e485357cb17424260c5e3a7d08ea69a43 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterInfo.h" -#include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPUSubtarget.h" #include "SIDefines.h" -#include "SIInstrInfo.h" -#include "SIRegisterInfo.h" #include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -82,7 +82,7 @@ public: void PostprocessISelDAG() override; private: - SDValue foldFrameIndex(SDValue N) const; + std::pair foldFrameIndex(SDValue N) const; bool isNoNanSrc(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, @@ -116,8 +116,13 @@ private: bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; - bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, - SDValue &SOffset, SDValue &ImmOffset) const; + bool SelectMUBUFScratchOffen(SDNode *Root, + SDValue Addr, SDValue &RSrc, SDValue &VAddr, + SDValue &SOffset, SDValue &ImmOffset) const; + bool SelectMUBUFScratchOffset(SDNode *Root, + SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + SDValue &Offset) const; + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; @@ -133,8 +138,10 @@ private: bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, SDValue &ImmOffset, SDValue &VOffset) const; - bool SelectFlat(SDValue Addr, SDValue &VAddr, - SDValue &SLC, SDValue &TFE) const; + bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; + bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; @@ -150,14 +157,12 @@ private: bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; - bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Omod) const; bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const; @@ -206,8 +211,8 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { return true; // TODO: Move into isKnownNeverNaN - if (const auto *BO = dyn_cast(N)) - return BO->Flags.hasNoNaNs(); + if (N->getFlags().isDefined()) + return N->getFlags().hasNoNaNs(); return CurDAG->isKnownNeverNaN(N); } @@ -563,7 +568,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast(getTargetLowering()); - Lowering.legalizeTargetIndependentNode(N, *CurDAG); + N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); break; } case ISD::AND: @@ -953,8 +958,12 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, return true; } +static bool isLegalMUBUFImmOffset(unsigned Imm) { + return isUInt<12>(Imm); +} + static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { - return isUInt<12>(Imm->getZExtValue()); + return isLegalMUBUFImmOffset(Imm->getZExtValue()); } bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, @@ -1070,43 +1079,111 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); } -SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { - if (auto FI = dyn_cast(N)) - return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); - return N; +static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { + auto PSV = PtrInfo.V.dyn_cast(); + return PSV && PSV->isStack(); +} + +std::pair AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { + const MachineFunction &MF = CurDAG->getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + + if (auto FI = dyn_cast(N)) { + SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), + FI->getValueType(0)); + + // If we can resolve this to a frame index access, this is relative to the + // frame pointer SGPR. + return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), + MVT::i32)); + } + + // If we don't know this private access is a local stack object, it needs to + // be relative to the entry point's scratch wave offset register. + return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), + MVT::i32)); } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc, - SDValue &VAddr, SDValue &SOffset, - SDValue &ImmOffset) const { +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, + SDValue Addr, SDValue &Rsrc, + SDValue &VAddr, SDValue &SOffset, + SDValue &ImmOffset) const { SDLoc DL(Addr); MachineFunction &MF = CurDAG->getMachineFunction(); const SIMachineFunctionInfo *Info = MF.getInfo(); Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); - SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); - // (add n0, c1) + if (ConstantSDNode *CAddr = dyn_cast(Addr)) { + unsigned Imm = CAddr->getZExtValue(); + assert(!isLegalMUBUFImmOffset(Imm) && + "should have been selected by other pattern"); + + SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); + MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, + DL, MVT::i32, HighBits); + VAddr = SDValue(MovHighBits, 0); + + // In a call sequence, stores to the argument stack area are relative to the + // stack pointer. + const MachinePointerInfo &PtrInfo = cast(Root)->getPointerInfo(); + unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? + Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); + + SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); + ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); + return true; + } + if (CurDAG->isBaseWithConstantOffset(Addr)) { + // (add n0, c1) + SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); // Offsets in vaddr must be positive. ConstantSDNode *C1 = cast(N1); if (isLegalMUBUFImmOffset(C1)) { - VAddr = foldFrameIndex(N0); + std::tie(VAddr, SOffset) = foldFrameIndex(N0); ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } } // (node) - VAddr = foldFrameIndex(Addr); + std::tie(VAddr, SOffset) = foldFrameIndex(Addr); ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); return true; } +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, + SDValue Addr, + SDValue &SRsrc, + SDValue &SOffset, + SDValue &Offset) const { + ConstantSDNode *CAddr = dyn_cast(Addr); + if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) + return false; + + SDLoc DL(Addr); + MachineFunction &MF = CurDAG->getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + + SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); + + const MachinePointerInfo &PtrInfo = cast(Root)->getPointerInfo(); + unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? + Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); + + // FIXME: Get from MachinePointerInfo? We should only be using the frame + // offset if we know this is in a call sequence. + SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); + + Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); + return true; +} + bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, @@ -1239,15 +1316,37 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, return true; } -bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, - SDValue &VAddr, - SDValue &SLC, - SDValue &TFE) const { +bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + int64_t OffsetVal = 0; + + if (Subtarget->hasFlatInstOffsets() && + CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + uint64_t COffsetVal = cast(N1)->getZExtValue(); + if (isUInt<12>(COffsetVal)) { + Addr = N0; + OffsetVal = COffsetVal; + } + } + VAddr = Addr; - TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); + SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + return true; } +bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + return SelectFlatOffset(Addr, VAddr, Offset, SLC); +} + bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const { @@ -1628,38 +1727,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, return isNoNanSrc(Src); } -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - bool Res = SelectVOP3Mods(In, Src, SrcMods); - return Res && cast(SrcMods)->isNullValue(); +bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { + if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) + return false; + + Src = In; + return true; } bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, SDValue &Clamp, SDValue &Omod) const { SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); - - return SelectVOP3Mods(In, Src, SrcMods); -} - -bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src, - SDValue &SrcMods, SDValue &Clamp, - SDValue &Omod) const { - bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod); - - return Res && cast(SrcMods)->isNullValue() && - cast(Clamp)->isNullValue() && - cast(Omod)->isNullValue(); -} - -bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, - SDValue &SrcMods, - SDValue &Omod) const { - // FIXME: Handle Omod - Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return SelectVOP3Mods(In, Src, SrcMods); } @@ -1677,28 +1758,95 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, Src = In; SDLoc DL(In); - // FIXME: Handle Clamp and Omod - Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32); - Omod = CurDAG->getTargetConstant(0, DL, MVT::i32); + Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); + Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); return true; } +static SDValue stripBitcast(SDValue Val) { + return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; +} + +// Figure out if this is really an extract of the high 16-bits of a dword. +static bool isExtractHiElt(SDValue In, SDValue &Out) { + In = stripBitcast(In); + if (In.getOpcode() != ISD::TRUNCATE) + return false; + + SDValue Srl = In.getOperand(0); + if (Srl.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShiftAmt = dyn_cast(Srl.getOperand(1))) { + if (ShiftAmt->getZExtValue() == 16) { + Out = stripBitcast(Srl.getOperand(0)); + return true; + } + } + } + + return false; +} + +// Look through operations that obscure just looking at the low 16-bits of the +// same register. +static SDValue stripExtractLoElt(SDValue In) { + if (In.getOpcode() == ISD::TRUNCATE) { + SDValue Src = In.getOperand(0); + if (Src.getValueType().getSizeInBits() == 32) + return stripBitcast(Src); + } + + return In; +} + bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const { unsigned Mods = 0; Src = In; - // FIXME: Look for on separate components if (Src.getOpcode() == ISD::FNEG) { - Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI); + Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); Src = Src.getOperand(0); } - // Packed instructions do not have abs modifiers. + if (Src.getOpcode() == ISD::BUILD_VECTOR) { + unsigned VecMods = Mods; + + SDValue Lo = stripBitcast(Src.getOperand(0)); + SDValue Hi = stripBitcast(Src.getOperand(1)); + + if (Lo.getOpcode() == ISD::FNEG) { + Lo = stripBitcast(Lo.getOperand(0)); + Mods ^= SISrcMods::NEG; + } + + if (Hi.getOpcode() == ISD::FNEG) { + Hi = stripBitcast(Hi.getOperand(0)); + Mods ^= SISrcMods::NEG_HI; + } + + if (isExtractHiElt(Lo, Lo)) + Mods |= SISrcMods::OP_SEL_0; + + if (isExtractHiElt(Hi, Hi)) + Mods |= SISrcMods::OP_SEL_1; - // FIXME: Handle abs/neg of individual components. - // FIXME: Handle swizzling with op_sel + Lo = stripExtractLoElt(Lo); + Hi = stripExtractLoElt(Hi); + + if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { + // Really a scalar input. Just select from the low half of the register to + // avoid packing. + + Src = Lo; + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; + } + + Mods = VecMods; + } + + // Packed instructions do not have abs modifiers. Mods |= SISrcMods::OP_SEL_1; SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index c0f336e082bd2b71e554ad95ef58f419daace437..5586b513b5fca6cfbe6da11b66cfbf04d4eec202 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -21,6 +21,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "R600MachineFunctionInfo.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" @@ -29,7 +30,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DiagnosticInfo.h" -#include "SIInstrInfo.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT, @@ -75,6 +76,45 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, } } +// Allocate up to VGPR31. +// +// TODO: Since there are no VGPR alignent requirements would it be better to +// split into individual scalar registers? +static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::i64: + case MVT::f64: + case MVT::v2i32: + case MVT::v2f32: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_64RegClass, 31); + } + case MVT::v4i32: + case MVT::v4f32: + case MVT::v2i64: + case MVT::v2f64: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_128RegClass, 29); + } + case MVT::v8i32: + case MVT::v8f32: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_256RegClass, 25); + + } + case MVT::v16i32: + case MVT::v16f32: { + return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, + &AMDGPU::VReg_512RegClass, 17); + + } + default: + return false; + } +} + #include "AMDGPUGenCallingConv.inc" // Find a larger type to do a load / store of a vector with. @@ -87,6 +127,29 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); } +bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op) +{ + assert(Op.getOpcode() == ISD::OR); + + SDValue N0 = Op->getOperand(0); + SDValue N1 = Op->getOperand(1); + EVT VT = N0.getValueType(); + + if (VT.isInteger() && !VT.isVector()) { + KnownBits LHSKnown, RHSKnown; + DAG.computeKnownBits(N0, LHSKnown); + + if (LHSKnown.Zero.getBoolValue()) { + DAG.computeKnownBits(N1, RHSKnown); + + if (!(~RHSKnown.Zero & ~LHSKnown.Zero)) + return true; + } + } + + return false; +} + AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -566,13 +629,19 @@ static bool hasSourceMods(const SDNode *N) { case AMDGPUISD::INTERP_P1: case AMDGPUISD::INTERP_P2: case AMDGPUISD::DIV_SCALE: + + // TODO: Should really be looking at the users of the bitcast. These are + // problematic because bitcasts are used to legalize all stores to integer + // types. + case ISD::BITCAST: return false; default: return true; } } -static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold = 4) { +bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N, + unsigned CostThreshold) { // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus // it is truly free to use a source modifier in all cases. If there are // multiple users but for each one will necessitate using VOP3, there will be @@ -766,8 +835,43 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { //===---------------------------------------------------------------------===// CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, - bool IsVarArg) const { - return CC_AMDGPU; + bool IsVarArg) { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return CC_AMDGPU_Kernel; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + return CC_AMDGPU; + case CallingConv::C: + case CallingConv::Fast: + return CC_AMDGPU_Func; + default: + report_fatal_error("Unsupported calling convention."); + } +} + +CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, + bool IsVarArg) { + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return CC_AMDGPU_Kernel; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + return RetCC_SI_Shader; + case CallingConv::C: + case CallingConv::Fast: + return RetCC_AMDGPU_Func; + default: + report_fatal_error("Unsupported calling convention."); + } } /// The SelectionDAGBuilder will automatically promote function arguments @@ -867,18 +971,15 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State, } } -void AMDGPUTargetLowering::AnalyzeReturn(CCState &State, - const SmallVectorImpl &Outs) const { - - State.AnalyzeReturn(Outs, RetCC_SI); -} - -SDValue -AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SDLoc &DL, SelectionDAG &DAG) const { +SDValue AMDGPUTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + // FIXME: Fails for r600 tests + //assert(!isVarArg && Outs.empty() && OutVals.empty() && + // "wave terminate should not have return values"); return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain); } @@ -889,19 +990,12 @@ AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) { - switch (CC) { - case CallingConv::C: - case CallingConv::AMDGPU_KERNEL: - case CallingConv::SPIR_KERNEL: - return CC_AMDGPU_Kernel; - case CallingConv::AMDGPU_VS: - case CallingConv::AMDGPU_GS: - case CallingConv::AMDGPU_PS: - case CallingConv::AMDGPU_CS: - return CC_AMDGPU; - default: - report_fatal_error("Unsupported calling convention."); - } + return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg); +} + +CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, + bool IsVarArg) { + return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg); } SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, @@ -2293,11 +2387,11 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, //===----------------------------------------------------------------------===// static bool isU24(SDValue Op, SelectionDAG &DAG) { - APInt KnownZero, KnownOne; + KnownBits Known; EVT VT = Op.getValueType(); - DAG.computeKnownBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, Known); - return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24; + return (VT.getSizeInBits() - Known.countMinLeadingZeros()) <= 24; } static bool isI24(SDValue Op, SelectionDAG &DAG) { @@ -2315,12 +2409,13 @@ static bool simplifyI24(SDNode *Node24, unsigned OpIdx, SelectionDAG &DAG = DCI.DAG; SDValue Op = Node24->getOperand(OpIdx); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = Op.getValueType(); APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, true, true); - if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI)) + if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO)) return true; return false; @@ -2523,7 +2618,54 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const { - if (N->getValueType(0) != MVT::i64) + EVT VT = N->getValueType(0); + + ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); + if (!RHS) + return SDValue(); + + SDValue LHS = N->getOperand(0); + unsigned RHSVal = RHS->getZExtValue(); + if (!RHSVal) + return LHS; + + SDLoc SL(N); + SelectionDAG &DAG = DCI.DAG; + + switch (LHS->getOpcode()) { + default: + break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: { + // shl (ext x) => zext (shl x), if shift does not overflow int + if (VT != MVT::i64) + break; + KnownBits Known; + SDValue X = LHS->getOperand(0); + DAG.computeKnownBits(X, Known); + unsigned LZ = Known.countMinLeadingZeros(); + if (LZ < RHSVal) + break; + EVT XVT = X.getValueType(); + SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); + return DAG.getZExtOrTrunc(Shl, SL, VT); + } + case ISD::OR: if (!isOrEquivalentToAdd(DAG, LHS)) break; + case ISD::ADD: { // Fall through from above + // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) + if (ConstantSDNode *C2 = dyn_cast(LHS->getOperand(1))) { + SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0), + SDValue(RHS, 0)); + SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal, + SDLoc(C2), VT); + return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V); + } + break; + } + } + + if (VT != MVT::i64) return SDValue(); // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) @@ -2531,19 +2673,9 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, // On some subtargets, 64-bit shift is a quarter rate instruction. In the // common case, splitting this into a move and a 32-bit shift is faster and // the same code size. - const ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); - if (!RHS) - return SDValue(); - - unsigned RHSVal = RHS->getZExtValue(); if (RHSVal < 32) return SDValue(); - SDValue LHS = N->getOperand(0); - - SDLoc SL(N); - SelectionDAG &DAG = DCI.DAG; - SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32); SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); @@ -3346,7 +3478,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DL); } - if ((OffsetVal + WidthVal) >= 32) { + if ((OffsetVal + WidthVal) >= 32 && + !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) { SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32); return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, BitsFrom, ShiftVal); @@ -3357,13 +3490,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, OffsetVal, OffsetVal + WidthVal); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || - TLI.SimplifyDemandedBits(BitsFrom, Demanded, - KnownZero, KnownOne, TLO)) { + if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || + TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } } @@ -3436,6 +3568,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(ELSE) NODE_NAME_CASE(LOOP) NODE_NAME_CASE(CALL) + NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) @@ -3514,6 +3647,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) NODE_NAME_CASE(DUMMY_CHAIN) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; + NODE_NAME_CASE(INIT_EXEC) + NODE_NAME_CASE(INIT_EXEC_FROM_INPUT) NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) @@ -3572,14 +3707,12 @@ SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, } void AMDGPUTargetLowering::computeKnownBitsForTargetNode( - const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = KnownZero.getBitWidth(); - KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + Known.resetAll(); // Don't know anything. - APInt KnownZero2; - APInt KnownOne2; + KnownBits Known2; unsigned Opc = Op.getOpcode(); switch (Opc) { @@ -3587,7 +3720,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( break; case AMDGPUISD::CARRY: case AMDGPUISD::BORROW: { - KnownZero = APInt::getHighBitsSet(32, 31); + Known.Zero = APInt::getHighBitsSet(32, 31); break; } @@ -3600,16 +3733,16 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( uint32_t Width = CWidth->getZExtValue() & 0x1f; if (Opc == AMDGPUISD::BFE_U32) - KnownZero = APInt::getHighBitsSet(32, 32 - Width); + Known.Zero = APInt::getHighBitsSet(32, 32 - Width); break; } case AMDGPUISD::FP_TO_FP16: case AMDGPUISD::FP16_ZEXT: { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); // High bits are zero. - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); + Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); break; } } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index d6aa0ba92bf7816e1bfd6f8c502b089d5e557fe0..0d066cdbdff4d8c58a97e34fa6b3135bd2974937 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -34,6 +34,9 @@ private: /// compare. SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const; +public: + static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op); + protected: const AMDGPUSubtarget *Subtarget; AMDGPUAS AMDGPUASI; @@ -115,9 +118,6 @@ protected: SmallVectorImpl &Results) const; void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl &Ins) const; - void AnalyzeReturn(CCState &State, - const SmallVectorImpl &Outs) const; - public: AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); @@ -125,12 +125,15 @@ public: if (getTargetMachine().Options.NoSignedZerosFPMath) return true; - if (const auto *BO = dyn_cast(Op)) - return BO->Flags.hasNoSignedZeros(); + const auto Flags = Op.getNode()->getFlags(); + if (Flags.isDefined()) + return Flags.hasNoSignedZeros(); return false; } + static bool allUsesHaveSourceMods(const SDNode *N, + unsigned CostThreshold = 4); bool isFAbsFree(EVT VT) const override; bool isFNegFree(EVT VT) const override; bool isTruncateFree(EVT Src, EVT Dest) const override; @@ -161,6 +164,8 @@ public: bool isCheapToSpeculateCtlz() const override; static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); + static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, @@ -199,8 +204,7 @@ public: /// either zero or one and return them in the \p KnownZero and \p KnownOne /// bitsets. void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -231,6 +235,10 @@ public: AMDGPUAS getAMDGPUAS() const { return AMDGPUASI; } + + MVT getFenceOperandTy(const DataLayout &DL) const override { + return MVT::i32; + } }; namespace AMDGPUISD { @@ -244,6 +252,7 @@ enum NodeType : unsigned { // Function call. CALL, + TRAP, // Masked control flow nodes. IF, @@ -365,6 +374,8 @@ enum NodeType : unsigned { BUILD_VERTICAL_VECTOR, /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, + INIT_EXEC, + INIT_EXEC_FROM_INPUT, SENDMSG, SENDMSGHALT, INTERP_MOV, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 12caa5118342a13a74490c808d81667a43cf33bd..41cc7d7093ec11118e34574265758355f888c9a4 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -17,8 +17,8 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #include "AMDGPU.h" -#include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "AMDGPUGenInstrInfo.inc" diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 56f060984f0840f134652c76bbd547f4265982ab..e286558ce60d7df736a3c77ec165da0295a9e2fd 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -78,6 +78,11 @@ def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; +def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", + SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, + [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] +>; + def AMDGPUconstdata_ptr : SDNode< "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<0, iPTR>]> @@ -294,6 +299,15 @@ def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; +def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPInGlue]>; + +def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT", + SDTypeProfile<0, 2, + [SDTCisInt<0>, SDTCisInt<1>]>, + [SDNPHasChain, SDNPInGlue]>; + def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; @@ -366,6 +380,6 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 8867ed689a31160c5134aef19f169049436158c4..e54c887d609063c8278013fc46792ba945652e88 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -126,9 +126,10 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) .add(I.getOperand(1)) .add(I.getOperand(0)) - .addImm(0) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc + // Now that we selected an opcode, we need to constrain the register // operands to use appropriate classes. @@ -392,9 +393,9 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) .add(I.getOperand(0)) .addReg(PtrReg) - .addImm(0) - .addImm(0) - .addImm(0); + .addImm(0) // offset + .addImm(0) // glc + .addImm(0); // slc bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); I.eraseFromParent(); diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c87102e55dfb0d0b15d40ace3586e1f7b2144500..ef845f44d365b2ff97cb7d8c31975289be959505 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #include "AMDGPU.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index b8d681298dee9f7116fc0732b67477423dc8808c..4e688ab0b10504c41b4c4a3bad0f049e51762cca 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -50,6 +50,16 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; +def u16ImmTarget : AsmOperandClass { + let Name = "U16Imm"; + let RenderMethod = "addImmOperands"; +} + +def s16ImmTarget : AsmOperandClass { + let Name = "S16Imm"; + let RenderMethod = "addImmOperands"; +} + let OperandType = "OPERAND_IMMEDIATE" in { def u32imm : Operand { @@ -58,6 +68,12 @@ def u32imm : Operand { def u16imm : Operand { let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = u16ImmTarget; +} + +def s16imm : Operand { + let PrintMethod = "printU16ImmOperand"; + let ParserMatchClass = s16ImmTarget; } def u8imm : Operand { diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index a2567a549028f3301c05054376860929fadacadc..790a69b8439797af92eca10d676beecc6867f802 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -14,10 +14,10 @@ #include "AMDGPULegalizerInfo.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Type.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/Target/TargetOpcodes.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOpcodes.h" using namespace llvm; @@ -28,23 +28,39 @@ using namespace llvm; AMDGPULegalizerInfo::AMDGPULegalizerInfo() { using namespace TargetOpcode; + const LLT S1= LLT::scalar(1); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); const LLT P1 = LLT::pointer(1, 64); const LLT P2 = LLT::pointer(2, 64); + setAction({G_ADD, S32}, Legal); + + // FIXME: i1 operands to intrinsics should always be legal, but other i1 + // values may not be legal. We need to figure out how to distinguish + // between these two scenarios. + setAction({G_CONSTANT, S1}, Legal); + setAction({G_CONSTANT, S32}, Legal); setAction({G_CONSTANT, S64}, Legal); + setAction({G_FCONSTANT, S32}, Legal); + setAction({G_GEP, P1}, Legal); setAction({G_GEP, P2}, Legal); setAction({G_GEP, 1, S64}, Legal); + setAction({G_ICMP, S1}, Legal); + setAction({G_ICMP, 1, S32}, Legal); + setAction({G_LOAD, P1}, Legal); setAction({G_LOAD, P2}, Legal); setAction({G_LOAD, S32}, Legal); setAction({G_LOAD, 1, P1}, Legal); setAction({G_LOAD, 1, P2}, Legal); + setAction({G_SELECT, S32}, Legal); + setAction({G_SELECT, 1, S1}, Legal); + setAction({G_STORE, S32}, Legal); setAction({G_STORE, 1, P1}, Legal); diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index 5721ea41e3bdba8f8ec09bf40c0d45dab313de8c..846e7dff5f8cc4daecbc2dc5f196d3d1396890f4 100644 --- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -23,10 +25,14 @@ namespace { const unsigned MaxStaticSize = 1024; class AMDGPULowerIntrinsics : public ModulePass { +private: + bool makeLIDRangeMetadata(Function &F) const; + public: static char ID; - AMDGPULowerIntrinsics() : ModulePass(ID) { } + AMDGPULowerIntrinsics() : ModulePass(ID) {} + bool runOnModule(Module &M) override; StringRef getPassName() const override { return "AMDGPU Lower Intrinsics"; @@ -39,8 +45,8 @@ char AMDGPULowerIntrinsics::ID = 0; char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID; -INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, - "Lower intrinsics", false, false) +INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false, + false) // TODO: Should refine based on estimated number of accesses (e.g. does it // require splitting based on alignment) @@ -96,6 +102,25 @@ static bool expandMemIntrinsicUses(Function &F) { return Changed; } +bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const { + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + const TargetMachine &TM = TPC->getTM(); + const AMDGPUSubtarget &ST = TM.getSubtarget(F); + bool Changed = false; + + for (auto *U : F.users()) { + auto *CI = dyn_cast(U); + if (!CI) + continue; + + Changed |= ST.makeLIDRangeMetadata(CI); + } + return Changed; +} + bool AMDGPULowerIntrinsics::runOnModule(Module &M) { bool Changed = false; @@ -110,6 +135,19 @@ bool AMDGPULowerIntrinsics::runOnModule(Module &M) { if (expandMemIntrinsicUses(F)) Changed = true; break; + + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::r600_read_tidig_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + case Intrinsic::r600_read_local_size_x: + case Intrinsic::r600_read_local_size_y: + case Intrinsic::r600_read_local_size_z: + Changed |= makeLIDRangeMetadata(F); + break; + default: break; } diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 14ee1c81f8fa7cfbccff0a4ed6381b9fab0920de..63dd0d726d91d053c732cb13ea174d4f19004c8e 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -38,7 +38,6 @@ using namespace llvm; #include "AMDGPUGenMCPseudoLowering.inc" - AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st, const AsmPrinter &ap): Ctx(ctx), ST(st), AP(ap) { } @@ -126,9 +125,15 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO, } void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + unsigned Opcode = MI->getOpcode(); - int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode()); + // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We + // need to select it to the subtarget specific version, and there's no way to + // do that with a single pseudo source operation. + if (Opcode == AMDGPU::S_SETPC_B64_return) + Opcode = AMDGPU::S_SETPC_B64; + int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(Opcode); if (MCOpcode == -1) { LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext(); C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " @@ -225,6 +230,12 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) { + if (isVerbose()) + OutStreamer->emitRawComment(" divergent unreachable"); + return; + } + MCInst TmpInst; MCInstLowering.lower(MI, TmpInst); EmitToStreamer(*OutStreamer, TmpInst); diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2071b6f157cd33d2e9f32134e0ecd19f69fc8e40 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -0,0 +1,2881 @@ +//===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level CFG structurizer pass. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "amdgpucfgstructurizer" + +namespace { +class PHILinearizeDestIterator; + +class PHILinearize { + friend class PHILinearizeDestIterator; + +public: + typedef std::pair PHISourceT; + +private: + typedef DenseSet PHISourcesT; + typedef struct { + unsigned DestReg; + DebugLoc DL; + PHISourcesT Sources; + } PHIInfoElementT; + typedef SmallPtrSet PHIInfoT; + PHIInfoT PHIInfo; + + static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); + static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); + static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); + static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, + MachineBasicBlock *SourceMBB); + static void phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg, + MachineBasicBlock *SourceMBB); + PHIInfoElementT *findPHIInfoElement(unsigned DestReg); + PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, + MachineBasicBlock *SourceMBB); + +public: + bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, + SmallVector &Sources); + void addDest(unsigned DestReg, const DebugLoc &DL); + void replaceDef(unsigned OldDestReg, unsigned NewDestReg); + void deleteDef(unsigned DestReg); + void addSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB); + void removeSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB = nullptr); + bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, + unsigned &DestReg); + bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); + unsigned getNumSources(unsigned DestReg); + void dump(MachineRegisterInfo *MRI); + void clear(); + + typedef PHISourcesT::iterator source_iterator; + typedef PHILinearizeDestIterator dest_iterator; + + dest_iterator dests_begin(); + dest_iterator dests_end(); + + source_iterator sources_begin(unsigned Reg); + source_iterator sources_end(unsigned Reg); +}; + +class PHILinearizeDestIterator { +private: + PHILinearize::PHIInfoT::iterator Iter; + +public: + unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } + PHILinearizeDestIterator &operator++() { + ++Iter; + return *this; + } + bool operator==(const PHILinearizeDestIterator &I) const { + return I.Iter == Iter; + } + bool operator!=(const PHILinearizeDestIterator &I) const { + return I.Iter != Iter; + } + + PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} +}; + +unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { + return Info->DestReg; +} + +void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, + unsigned NewDef) { + Info->DestReg = NewDef; +} + +PHILinearize::PHISourcesT & +PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { + return Info->Sources; +} + +void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, + unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + // Assertion ensures we don't use the same SourceMBB for the + // sources, because we cannot have different registers with + // identical predecessors, but we can have the same register for + // multiple predecessors. +#if !defined(NDEBUG) + for (auto SI : phiInfoElementGetSources(Info)) { + assert((SI.second != SourceMBB || SourceReg == SI.first)); + } +#endif + + phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); +} + +void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, + unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + auto &Sources = phiInfoElementGetSources(Info); + SmallVector ElimiatedSources; + for (auto SI : Sources) { + if (SI.first == SourceReg && + (SI.second == nullptr || SI.second == SourceMBB)) { + ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); + } + } + + for (auto &Source : ElimiatedSources) { + Sources.erase(Source); + } +} + +PHILinearize::PHIInfoElementT * +PHILinearize::findPHIInfoElement(unsigned DestReg) { + for (auto I : PHIInfo) { + if (phiInfoElementGetDest(I) == DestReg) { + return I; + } + } + return nullptr; +} + +PHILinearize::PHIInfoElementT * +PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + for (auto I : PHIInfo) { + for (auto SI : phiInfoElementGetSources(I)) { + if (SI.first == SourceReg && + (SI.second == nullptr || SI.second == SourceMBB)) { + return I; + } + } + } + return nullptr; +} + +bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, + SmallVector &Sources) { + bool FoundSource = false; + for (auto I : PHIInfo) { + for (auto SI : phiInfoElementGetSources(I)) { + if (SI.second == SourceMBB) { + FoundSource = true; + Sources.push_back(SI.first); + } + } + } + return FoundSource; +} + +void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { + assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); + PHISourcesT EmptySet; + PHIInfoElementT *NewElement = new PHIInfoElementT(); + NewElement->DestReg = DestReg; + NewElement->DL = DL; + NewElement->Sources = EmptySet; + PHIInfo.insert(NewElement); +} + +void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { + phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); +} + +void PHILinearize::deleteDef(unsigned DestReg) { + PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); + PHIInfo.erase(InfoElement); + delete InfoElement; +} + +void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); +} + +void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, + MachineBasicBlock *SourceMBB) { + phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); +} + +bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, + unsigned &DestReg) { + PHIInfoElementT *InfoElement = + findPHIInfoElementFromSource(SourceReg, SourceMBB); + if (InfoElement != nullptr) { + DestReg = phiInfoElementGetDest(InfoElement); + return true; + } + return false; +} + +bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { + unsigned DestReg; + return findDest(Reg, SourceMBB, DestReg); +} + +unsigned PHILinearize::getNumSources(unsigned DestReg) { + return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); +} + +void PHILinearize::dump(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + dbgs() << "=PHIInfo Start=\n"; + for (auto PII : this->PHIInfo) { + PHIInfoElementT &Element = *PII; + dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) + << " Sources: {"; + for (auto &SI : Element.Sources) { + dbgs() << PrintReg(SI.first, TRI) << "(BB#" + << SI.second->getNumber() << "),"; + } + dbgs() << "}\n"; + } + dbgs() << "=PHIInfo End=\n"; +} + +void PHILinearize::clear() { PHIInfo = PHIInfoT(); } + +PHILinearize::dest_iterator PHILinearize::dests_begin() { + return PHILinearizeDestIterator(PHIInfo.begin()); +} + +PHILinearize::dest_iterator PHILinearize::dests_end() { + return PHILinearizeDestIterator(PHIInfo.end()); +} + +PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).begin(); +} +PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { + auto InfoElement = findPHIInfoElement(Reg); + return phiInfoElementGetSources(InfoElement).end(); +} + +class RegionMRT; +class MBBMRT; + +static unsigned getPHINumInputs(MachineInstr &PHI) { + assert(PHI.isPHI()); + return (PHI.getNumOperands() - 1) / 2; +} + +static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index * 2 + 2).getMBB(); +} + +static void setPhiPred(MachineInstr &PHI, unsigned Index, + MachineBasicBlock *NewPred) { + PHI.getOperand(Index * 2 + 2).setMBB(NewPred); +} + +static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { + assert(PHI.isPHI()); + return PHI.getOperand(Index * 2 + 1).getReg(); +} + +static unsigned getPHIDestReg(MachineInstr &PHI) { + assert(PHI.isPHI()); + return PHI.getOperand(0).getReg(); +} + +class LinearizedRegion { +protected: + MachineBasicBlock *Entry; + // The exit block is part of the region, and is the last + // merge block before exiting the region. + MachineBasicBlock *Exit; + DenseSet LiveOuts; + SmallPtrSet MBBs; + bool HasLoop; + LinearizedRegion *Parent; + RegionMRT *RMRT; + + void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, + MachineInstr *DefInstr, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo); + + void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, + RegionMRT *TopRegion); + + void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, + RegionMRT *TopRegion = nullptr); + +public: + void setRegionMRT(RegionMRT *Region) { RMRT = Region; } + + RegionMRT *getRegionMRT() { return RMRT; } + + void setParent(LinearizedRegion *P) { Parent = P; } + + LinearizedRegion *getParent() { return Parent; } + + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); + + void setBBSelectRegIn(unsigned Reg); + + unsigned getBBSelectRegIn(); + + void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); + + unsigned getBBSelectRegOut(); + + void setHasLoop(bool Value); + + bool getHasLoop(); + + void addLiveOut(unsigned VReg); + + void removeLiveOut(unsigned Reg); + + void replaceLiveOut(unsigned OldReg, unsigned NewReg); + + void replaceRegister(unsigned Register, unsigned NewRegister, + MachineRegisterInfo *MRI, bool ReplaceInside, + bool ReplaceOutside, bool IncludeLoopPHIs); + + void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI); + + DenseSet *getLiveOuts(); + + void setEntry(MachineBasicBlock *NewEntry); + + MachineBasicBlock *getEntry(); + + void setExit(MachineBasicBlock *NewExit); + + MachineBasicBlock *getExit(); + + void addMBB(MachineBasicBlock *MBB); + + void addMBBs(LinearizedRegion *InnerRegion); + + bool contains(MachineBasicBlock *MBB); + + bool isLiveOut(unsigned Reg); + + bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); + + void removeFalseRegisterKills(MachineRegisterInfo *MRI); + + void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); + + LinearizedRegion(); + + ~LinearizedRegion(); +}; + +class MRT { +protected: + RegionMRT *Parent; + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + +public: + unsigned getBBSelectRegIn() { return BBSelectRegIn; } + + unsigned getBBSelectRegOut() { return BBSelectRegOut; } + + void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } + + void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } + + virtual RegionMRT *getRegionMRT() { return nullptr; } + + virtual MBBMRT *getMBBMRT() { return nullptr; } + + bool isRegion() { return getRegionMRT() != nullptr; } + + bool isMBB() { return getMBBMRT() != nullptr; } + + bool isRoot() { return Parent == nullptr; } + + void setParent(RegionMRT *Region) { Parent = Region; } + + RegionMRT *getParent() { return Parent; } + + static MachineBasicBlock * + initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap); + + static RegionMRT *buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + const SIInstrInfo *TII, + MachineRegisterInfo *MRI); + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; + + void dumpDepth(int depth) { + for (int i = depth; i > 0; --i) { + dbgs() << " "; + } + } + + virtual ~MRT() {} +}; + +class MBBMRT : public MRT { + MachineBasicBlock *MBB; + +public: + virtual MBBMRT *getMBBMRT() { return this; } + + MachineBasicBlock *getMBB() { return MBB; } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "MBB: " << getMBB()->getNumber(); + dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); + dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; + } + + MBBMRT(MachineBasicBlock *BB) : MBB(BB) { + setParent(nullptr); + setBBSelectRegOut(0); + setBBSelectRegIn(0); + } +}; + +class RegionMRT : public MRT { +protected: + MachineRegion *Region; + LinearizedRegion *LRegion; + MachineBasicBlock *Succ; + + SetVector Children; + +public: + virtual RegionMRT *getRegionMRT() { return this; } + + void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { + LRegion = LinearizeRegion; + } + + LinearizedRegion *getLinearizedRegion() { return LRegion; } + + MachineRegion *getMachineRegion() { return Region; } + + unsigned getInnerOutputRegister() { + return (*(Children.begin()))->getBBSelectRegOut(); + } + + void addChild(MRT *Tree) { Children.insert(Tree); } + + SetVector *getChildren() { return &Children; } + + virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { + dumpDepth(depth); + dbgs() << "Region: " << (void *)Region; + dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); + dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; + + dumpDepth(depth); + if (getSucc()) + dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; + else + dbgs() << "Succ: none \n"; + for (auto MRTI : Children) { + MRTI->dump(TRI, depth + 1); + } + } + + MRT *getEntryTree() { return Children.back(); } + + MRT *getExitTree() { return Children.front(); } + + MachineBasicBlock *getEntry() { + MRT *Tree = Children.back(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() + : Tree->getMBBMRT()->getMBB(); + } + + MachineBasicBlock *getExit() { + MRT *Tree = Children.front(); + return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() + : Tree->getMBBMRT()->getMBB(); + } + + void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } + + MachineBasicBlock *getSucc() { return Succ; } + + bool contains(MachineBasicBlock *MBB) { + for (auto CI : Children) { + if (CI->isMBB()) { + if (MBB == CI->getMBBMRT()->getMBB()) { + return true; + } + } else { + if (CI->getRegionMRT()->contains(MBB)) { + return true; + } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && + CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { + return true; + } + } + } + return false; + } + + void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { + LinearizedRegion *LRegion = getLinearizedRegion(); + LRegion->replaceLiveOut(Register, NewRegister); + for (auto &CI : Children) { + if (CI->isRegion()) { + CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + } + } + } + + RegionMRT(MachineRegion *MachineRegion) + : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { + setParent(nullptr); + setBBSelectRegOut(0); + setBBSelectRegIn(0); + } + + virtual ~RegionMRT() { + if (LRegion) { + delete LRegion; + } + + for (auto CI : Children) { + delete &(*CI); + } + } +}; + +static unsigned createBBSelectReg(const SIInstrInfo *TII, + MachineRegisterInfo *MRI) { + return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); +} + +MachineBasicBlock * +MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, + DenseMap &RegionMap) { + for (auto &MFI : MF) { + MachineBasicBlock *ExitMBB = &MFI; + if (ExitMBB->succ_size() == 0) { + return ExitMBB; + } + } + llvm_unreachable("CFG has no exit block"); + return nullptr; +} + +RegionMRT *MRT::buildMRT(MachineFunction &MF, + const MachineRegionInfo *RegionInfo, + const SIInstrInfo *TII, MachineRegisterInfo *MRI) { + SmallPtrSet PlacedRegions; + DenseMap RegionMap; + MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); + RegionMRT *Result = new RegionMRT(TopLevelRegion); + RegionMap[TopLevelRegion] = Result; + + // Insert the exit block first, we need it to be the merge node + // for the top level region. + MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); + + unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); + MBBMRT *ExitMRT = new MBBMRT(Exit); + RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); + ExitMRT->setBBSelectRegIn(BBSelectRegIn); + + for (auto MBBI : post_order(&(MF.front()))) { + MachineBasicBlock *MBB = &(*MBBI); + + // Skip Exit since we already added it + if (MBB == Exit) { + continue; + } + + DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); + MBBMRT *NewMBB = new MBBMRT(MBB); + MachineRegion *Region = RegionInfo->getRegionFor(MBB); + + // Ensure we have the MRT region + if (RegionMap.count(Region) == 0) { + RegionMRT *NewMRTRegion = new RegionMRT(Region); + RegionMap[Region] = NewMRTRegion; + + // Ensure all parents are in the RegionMap + MachineRegion *Parent = Region->getParent(); + while (RegionMap.count(Parent) == 0) { + RegionMRT *NewMRTParent = new RegionMRT(Parent); + NewMRTParent->addChild(NewMRTRegion); + NewMRTRegion->setParent(NewMRTParent); + RegionMap[Parent] = NewMRTParent; + NewMRTRegion = NewMRTParent; + Parent = Parent->getParent(); + } + RegionMap[Parent]->addChild(NewMRTRegion); + NewMRTRegion->setParent(RegionMap[Parent]); + } + + // Add MBB to Region MRT + RegionMap[Region]->addChild(NewMBB); + NewMBB->setParent(RegionMap[Region]); + RegionMap[Region]->setSucc(Region->getExit()); + } + return Result; +} + +void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + if (TRI->isVirtualRegister(Reg)) { + DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); + // If this is a source register to a PHI we are chaining, it + // must be live out. + if (PHIInfo.isSource(Reg)) { + DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If this is live out of the MBB + for (auto &UI : MRI->use_operands(Reg)) { + if (UI.getParent()->getParent() != MBB) { + DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() + << "): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } else { + // If the use is in the same MBB we have to make sure + // it is after the def, otherwise it is live out in a loop + MachineInstr *UseInstr = UI.getParent(); + for (MachineBasicBlock::instr_iterator + MII = UseInstr->getIterator(), + MIE = UseInstr->getParent()->instr_end(); + MII != MIE; ++MII) { + if ((&(*MII)) == DefInstr) { + DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) + << "\n"); + addLiveOut(Reg); + } + } + } + } + } + } +} + +void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, + MachineInstr *DefInstr, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + if (TRI->isVirtualRegister(Reg)) { + DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); + for (auto &UI : MRI->use_operands(Reg)) { + if (!Region->contains(UI.getParent()->getParent())) { + DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region + << "): " << PrintReg(Reg, TRI) << "\n"); + addLiveOut(Reg); + } + } + } +} + +void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); + for (auto &II : *MBB) { + for (auto &RI : II.defs()) { + storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); + } + for (auto &IRI : II.implicit_operands()) { + if (IRI.isDef()) { + storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); + } + } + } + + // If we have a successor with a PHI, source coming from this MBB we have to + // add the register as live out + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + E = MBB->succ_end(); + SI != E; ++SI) { + for (auto &II : *(*SI)) { + if (II.isPHI()) { + MachineInstr &PHI = II; + int numPreds = getPHINumInputs(PHI); + for (int i = 0; i < numPreds; ++i) { + if (getPHIPred(PHI, i) == MBB) { + unsigned PHIReg = getPHISourceReg(PHI, i); + DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() + << " -> BB#" << (*SI)->getNumber() + << "): " << PrintReg(PHIReg, TRI) << "\n"); + addLiveOut(PHIReg); + } + } + } + } + } + + DEBUG(dbgs() << "-Store Live Outs Endn-\n"); +} + +void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo, + RegionMRT *TopRegion) { + for (auto &II : *MBB) { + for (auto &RI : II.defs()) { + storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, + PHIInfo); + } + for (auto &IRI : II.implicit_operands()) { + if (IRI.isDef()) { + storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, + TRI, PHIInfo); + } + } + } +} + +void LinearizedRegion::storeLiveOuts(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo, + RegionMRT *CurrentTopRegion) { + MachineBasicBlock *Exit = Region->getSucc(); + + RegionMRT *TopRegion = + CurrentTopRegion == nullptr ? Region : CurrentTopRegion; + + // Check if exit is end of function, if so, no live outs. + if (Exit == nullptr) + return; + + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (CI->isMBB()) { + auto MBB = CI->getMBBMRT()->getMBB(); + storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); + } else { + LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); + // We should be limited to only store registers that are live out from the + // lineaized region + for (auto MBBI : SubRegion->MBBs) { + storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); + } + } + } + + if (CurrentTopRegion == nullptr) { + auto Succ = Region->getSucc(); + for (auto &II : *Succ) { + if (II.isPHI()) { + MachineInstr &PHI = II; + int numPreds = getPHINumInputs(PHI); + for (int i = 0; i < numPreds; ++i) { + if (Region->contains(getPHIPred(PHI, i))) { + unsigned PHIReg = getPHISourceReg(PHI, i); + DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region + << "): " << PrintReg(PHIReg, TRI) << "\n"); + addLiveOut(PHIReg); + } + } + } + } + } +} + +void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { + OS << "Linearized Region {"; + bool IsFirst = true; + for (const auto &MBB : MBBs) { + if (IsFirst) { + IsFirst = false; + } else { + OS << " ,"; + } + OS << MBB->getNumber(); + } + OS << "} (" << Entry->getNumber() << ", " + << (Exit == nullptr ? -1 : Exit->getNumber()) + << "): In:" << PrintReg(getBBSelectRegIn(), TRI) + << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; + for (auto &LI : LiveOuts) { + OS << PrintReg(LI, TRI) << " "; + } + OS << "} \n"; +} + +unsigned LinearizedRegion::getBBSelectRegIn() { + return getRegionMRT()->getBBSelectRegIn(); +} + +unsigned LinearizedRegion::getBBSelectRegOut() { + return getRegionMRT()->getBBSelectRegOut(); +} + +void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } + +bool LinearizedRegion::getHasLoop() { return HasLoop; } + +void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } + +void LinearizedRegion::removeLiveOut(unsigned Reg) { + if (isLiveOut(Reg)) + LiveOuts.erase(Reg); +} + +void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { + if (isLiveOut(OldReg)) { + removeLiveOut(OldReg); + addLiveOut(NewReg); + } +} + +void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, + MachineRegisterInfo *MRI, + bool ReplaceInside, bool ReplaceOutside, + bool IncludeLoopPHI) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + DEBUG(dbgs() << "Pepareing to replace register (region): " + << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); + + // If we are replacing outside, we also need to update the LiveOuts + if (ReplaceOutside && + (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { + LinearizedRegion *Current = this; + while (Current != nullptr && Current->getEntry() != nullptr) { + DEBUG(dbgs() << "Region before register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current->replaceLiveOut(Register, NewRegister); + DEBUG(dbgs() << "Region after register replace\n"); + DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); + Current = Current->getParent(); + } + } + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + + // We don't rewrite defs. + if (O.isDef()) + continue; + + bool IsInside = contains(O.getParent()->getParent()); + bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && + O.getParent()->getParent() == getEntry()); + bool ShouldReplace = (IsInside && ReplaceInside) || + (!IsInside && ReplaceOutside) || + (IncludeLoopPHI && IsLoopPHI); + if (ShouldReplace) { + + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + } else { + DEBUG(dbgs() << "Replacing register (region): " + << PrintReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + O.setReg(NewRegister); + } + } + } +} + +void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); +} + +void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, + unsigned NewRegister, + bool IncludeLoopPHIs, + MachineRegisterInfo *MRI) { + replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); +} + +DenseSet *LinearizedRegion::getLiveOuts() { return &LiveOuts; } + +void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { + Entry = NewEntry; +} + +MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } + +void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } + +MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } + +void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } + +void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { + for (const auto &MBB : InnerRegion->MBBs) { + addMBB(MBB); + } +} + +bool LinearizedRegion::contains(MachineBasicBlock *MBB) { + return MBBs.count(MBB) == 1; +} + +bool LinearizedRegion::isLiveOut(unsigned Reg) { + return LiveOuts.count(Reg) == 1; +} + +bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { + return MRI->def_begin(Reg) == MRI->def_end(); +} + +// After the code has been structurized, what was flagged as kills +// before are no longer register kills. +void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + for (auto MBBI : MBBs) { + MachineBasicBlock *MBB = MBBI; + for (auto &II : *MBB) { + for (auto &RI : II.uses()) { + if (RI.isReg()) { + unsigned Reg = RI.getReg(); + if (TRI->isVirtualRegister(Reg)) { + if (hasNoDef(Reg, MRI)) + continue; + if (!MRI->hasOneDef(Reg)) { + DEBUG(this->getEntry()->getParent()->dump()); + DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); + } + + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " + << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " + << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + MachineOperand *Def = &(*(MRI->def_begin(Reg))); + MachineOperand *UseOperand = &(RI); + bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; + if (UseIsOutsideDefMBB && UseOperand->isKill()) { + DEBUG(dbgs() << "Removing kill flag on register: " + << PrintReg(Reg, TRI) << "\n"); + UseOperand->setIsKill(false); + } + } + } + } + } + } +} + +void LinearizedRegion::initLiveOut(RegionMRT *Region, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + storeLiveOuts(Region, MRI, TRI, PHIInfo); +} + +LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI, + PHILinearize &PHIInfo) { + setEntry(MBB); + setExit(MBB); + storeLiveOuts(MBB, MRI, TRI, PHIInfo); + MBBs.insert(MBB); + Parent = nullptr; +} + +LinearizedRegion::LinearizedRegion() { + setEntry(nullptr); + setExit(nullptr); + Parent = nullptr; +} + +LinearizedRegion::~LinearizedRegion() {} + +class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { +private: + const MachineRegionInfo *Regions; + const SIInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + unsigned BBSelectRegister; + PHILinearize PHIInfo; + DenseMap FallthroughMap; + + void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, + SmallVector &RegionIndices); + void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &RegionIndices); + void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHINonRegionIndices); + + void storePHILinearizationInfoDest( + unsigned LDestReg, MachineInstr &PHI, + SmallVector *RegionIndices = nullptr); + + unsigned storePHILinearizationInfo(MachineInstr &PHI, + SmallVector *RegionIndices); + + void extractKilledPHIs(MachineBasicBlock *MBB); + + bool shrinkPHI(MachineInstr &PHI, SmallVector &PHIIndices, + unsigned *ReplaceReg); + + bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, unsigned *ReplaceReg); + + void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices); + void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, + MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices); + void replaceLiveOutRegs(MachineInstr &PHI, + SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, + LinearizedRegion *LRegion); + void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, + MachineInstr &PHI, LinearizedRegion *LRegion); + + void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion); + void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, + MachineInstr &PHI); + void rewriteRegionEntryPHIs(LinearizedRegion *Region, + MachineBasicBlock *IfMBB); + + bool regionIsSimpleIf(RegionMRT *Region); + + void transformSimpleIfRegion(RegionMRT *Region); + + void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); + + void insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + const DebugLoc &DL = DebugLoc()); + + MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); + + void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, unsigned DestRegister, + unsigned IfSourceRegister, unsigned CodeSourceRegister, + bool IsUndefIfSource = false); + + MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, + MachineBasicBlock *SelectBB, unsigned IfReg, + bool InheritPreds); + + void prunePHIInfo(MachineBasicBlock *MBB); + void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); + + void createEntryPHIs(LinearizedRegion *CurrentRegion); + void resolvePHIInfos(MachineBasicBlock *FunctionEntry); + + void replaceRegisterWith(unsigned Register, unsigned NewRegister); + + MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, + MachineBasicBlock *CodeBB, + LinearizedRegion *LRegion, + unsigned BBSelectRegIn, + unsigned BBSelectRegOut); + + MachineBasicBlock * + createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, + unsigned BBSelectRegIn, unsigned BBSelectRegOut); + void ensureCondIsNotKilled(SmallVector Cond); + + void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg); + + MachineInstr *getDefInstr(unsigned Reg); + void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, unsigned DestReg, + unsigned SourceReg); + bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, + unsigned Register); + void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion); + + void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); + void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion); + + MachineBasicBlock *splitExit(LinearizedRegion *LRegion); + + MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); + + LinearizedRegion *initLinearizedRegion(RegionMRT *Region); + + bool structurizeComplexRegion(RegionMRT *Region); + + bool structurizeRegion(RegionMRT *Region); + + bool structurizeRegions(RegionMRT *Region, bool isTopRegion); + +public: + static char ID; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { + initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); + } + + void initFallthroughMap(MachineFunction &MF); + + void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); + + unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, + MachineRegisterInfo *MRI, + const SIInstrInfo *TII); + + RegionMRT *RMRT; + void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } + + RegionMRT *getRegionMRT() { return RMRT; } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +char AMDGPUMachineCFGStructurizer::ID = 0; + +bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Succ = Region->getSucc(); + bool FoundBypass = false; + bool FoundIf = false; + + if (Entry->succ_size() != 2) { + return false; + } + + for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), + E = Entry->succ_end(); + SI != E; ++SI) { + MachineBasicBlock *Current = *SI; + + if (Current == Succ) { + FoundBypass = true; + } else if ((Current->succ_size() == 1) && + *(Current->succ_begin()) == Succ) { + FoundIf = true; + } + } + + return FoundIf && FoundBypass; +} + +void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { + MachineBasicBlock *Entry = Region->getEntry(); + MachineBasicBlock *Exit = Region->getExit(); + TII->convertNonUniformIfRegion(Entry, Exit); +} + +static void fixMBBTerminator(MachineBasicBlock *MBB) { + + if (MBB->succ_size() == 1) { + auto *Succ = *(MBB->succ_begin()); + for (auto &TI : MBB->terminators()) { + for (auto &UI : TI.uses()) { + if (UI.isMBB() && UI.getMBB() != Succ) { + UI.setMBB(Succ); + } + } + } + } +} + +static void fixRegionTerminator(RegionMRT *Region) { + MachineBasicBlock *InternalSucc = nullptr; + MachineBasicBlock *ExternalSucc = nullptr; + LinearizedRegion *LRegion = Region->getLinearizedRegion(); + auto Exit = LRegion->getExit(); + + SmallPtrSet Successors; + for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), + SE = Exit->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *Succ = *SI; + if (LRegion->contains(Succ)) { + // Do not allow re-assign + assert(InternalSucc == nullptr); + InternalSucc = Succ; + } else { + // Do not allow re-assign + assert(ExternalSucc == nullptr); + ExternalSucc = Succ; + } + } + + for (auto &TI : Exit->terminators()) { + for (auto &UI : TI.uses()) { + if (UI.isMBB()) { + auto Target = UI.getMBB(); + if (Target != InternalSucc && Target != ExternalSucc) { + UI.setMBB(ExternalSucc); + } + } + } + } +} + +// If a region region is just a sequence of regions (and the exit +// block in the case of the top level region), we can simply skip +// linearizing it, because it is already linear +bool regionIsSequence(RegionMRT *Region) { + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (!CI->isRegion()) { + if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { + return false; + } + } + } + return true; +} + +void fixupRegionExits(RegionMRT *Region) { + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (!CI->isRegion()) { + fixMBBTerminator(CI->getMBBMRT()->getMBB()); + } else { + fixRegionTerminator(CI->getRegionMRT()); + } + } +} + +void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( + RegionMRT *Region, MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( + LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHIRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (Region->contains(Pred)) { + PHIRegionIndices.push_back(i); + } + } +} + +void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( + LinearizedRegion *Region, MachineInstr &PHI, + SmallVector &PHINonRegionIndices) { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + MachineBasicBlock *Pred = getPHIPred(PHI, i); + if (!Region->contains(Pred)) { + PHINonRegionIndices.push_back(i); + } + } +} + +void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( + unsigned LDestReg, MachineInstr &PHI, + SmallVector *RegionIndices) { + if (RegionIndices) { + for (auto i : *RegionIndices) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); + } + } else { + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); + } + } +} + +unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( + MachineInstr &PHI, SmallVector *RegionIndices) { + unsigned DestReg = getPHIDestReg(PHI); + unsigned LinearizeDestReg = + MRI->createVirtualRegister(MRI->getRegClass(DestReg)); + PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); + storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); + return LinearizeDestReg; +} + +void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { + // We need to create a new chain for the killed phi, but there is no + // need to do the renaming outside or inside the block. + SmallPtrSet PHIs; + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); + I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + unsigned PHIDestReg = getPHIDestReg(Instr); + DEBUG(dbgs() << "Extractking killed phi:\n"); + DEBUG(Instr.dump()); + PHIs.insert(&Instr); + PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); + storePHILinearizationInfoDest(PHIDestReg, Instr); + } + } + + for (auto PI : PHIs) { + PI->eraseFromParent(); + } +} + +static bool isPHIRegionIndex(SmallVector PHIRegionIndices, + unsigned Index) { + for (auto i : PHIRegionIndices) { + if (i == Index) + return true; + } + return false; +} + +bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); +} + +bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, + unsigned CombinedSourceReg, + MachineBasicBlock *SourceMBB, + SmallVector &PHIIndices, + unsigned *ReplaceReg) { + DEBUG(dbgs() << "Shrink PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) + << " = PHI("); + + bool Replaced = false; + unsigned NumInputs = getPHINumInputs(PHI); + int SingleExternalEntryIndex = -1; + for (unsigned i = 0; i < NumInputs; ++i) { + if (!isPHIRegionIndex(PHIIndices, i)) { + if (SingleExternalEntryIndex == -1) { + // Single entry + SingleExternalEntryIndex = i; + } else { + // Multiple entries + SingleExternalEntryIndex = -2; + } + } + } + + if (SingleExternalEntryIndex > -1) { + *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); + // We should not rewrite the code, we should only pick up the single value + // that represents the shrunk PHI. + Replaced = true; + } else { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + if (SourceMBB) { + MIB.addReg(CombinedSourceReg); + MIB.addMBB(SourceMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << SourceMBB->getNumber()); + } + + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } + PHI.eraseFromParent(); + return Replaced; +} + +void AMDGPUMachineCFGStructurizer::replacePHI( + MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, + SmallVector &PHIRegionIndices) { + DEBUG(dbgs() << "Replace PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) + << " = PHI("); + + bool HasExternalEdge = false; + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + if (!isPHIRegionIndex(PHIRegionIndices, i)) { + HasExternalEdge = true; + } + } + + if (HasExternalEdge) { + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(LastMerge); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << LastMerge->getNumber()); + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + } else { + replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); + } + PHI.eraseFromParent(); +} + +void AMDGPUMachineCFGStructurizer::replaceEntryPHI( + MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, + SmallVector &PHIRegionIndices) { + + DEBUG(dbgs() << "Replace entry PHI: "); + DEBUG(PHI.dump()); + DEBUG(dbgs() << " with "); + + unsigned NumInputs = getPHINumInputs(PHI); + unsigned NumNonRegionInputs = NumInputs; + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + NumNonRegionInputs--; + } + } + + if (NumNonRegionInputs == 0) { + auto DestReg = getPHIDestReg(PHI); + replaceRegisterWith(DestReg, CombinedSourceReg); + DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n"); + PHI.eraseFromParent(); + } else { + DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << " = PHI("); + MachineBasicBlock *MBB = PHI.getParent(); + MachineInstrBuilder MIB = + BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), + getPHIDestReg(PHI)); + MIB.addReg(CombinedSourceReg); + MIB.addMBB(IfMBB); + DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" + << IfMBB->getNumber()); + unsigned NumInputs = getPHINumInputs(PHI); + for (unsigned i = 0; i < NumInputs; ++i) { + if (isPHIRegionIndex(PHIRegionIndices, i)) { + continue; + } + unsigned SourceReg = getPHISourceReg(PHI, i); + MachineBasicBlock *SourcePred = getPHIPred(PHI, i); + MIB.addReg(SourceReg); + MIB.addMBB(SourcePred); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << SourcePred->getNumber()); + } + DEBUG(dbgs() << ")\n"); + PHI.eraseFromParent(); + } +} + +void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( + MachineInstr &PHI, SmallVector &PHIRegionIndices, + unsigned CombinedSourceReg, LinearizedRegion *LRegion) { + bool WasLiveOut = false; + for (auto PII : PHIRegionIndices) { + unsigned Reg = getPHISourceReg(PHI, PII); + if (LRegion->isLiveOut(Reg)) { + bool IsDead = true; + + // Check if register is live out of the basic block + MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); + for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) { + if ((*UI).getParent()->getParent() != DefMBB) { + IsDead = false; + } + } + + DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is " + << (IsDead ? "dead" : "alive") << " after PHI replace\n"); + if (IsDead) { + LRegion->removeLiveOut(Reg); + } + WasLiveOut = true; + } + } + + if (WasLiveOut) + LRegion->addLiveOut(CombinedSourceReg); +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, + MachineBasicBlock *LastMerge, + MachineInstr &PHI, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(Region, PHI, PHIRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHIRegionIndices); + + replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); + replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, + MachineBasicBlock *IfMBB, + MachineInstr &PHI) { + SmallVector PHINonRegionIndices; + getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); + unsigned LinearizedSourceReg = + storePHILinearizationInfo(PHI, &PHINonRegionIndices); + replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); +} + +static void collectPHIs(MachineBasicBlock *MBB, + SmallVector &PHIs) { + for (auto &BBI : *MBB) { + if (BBI.isPHI()) { + PHIs.push_back(&BBI); + } + } +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, + MachineBasicBlock *LastMerge, + LinearizedRegion *LRegion) { + SmallVector PHIs; + auto Exit = Region->getSucc(); + if (Exit == nullptr) + return; + + collectPHIs(Exit, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); + } +} + +void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, + MachineBasicBlock *IfMBB) { + SmallVector PHIs; + auto Entry = Region->getEntry(); + + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + rewriteRegionEntryPHI(Region, IfMBB, *PHII); + } +} + +void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, + MachineBasicBlock *Dest, + const DebugLoc &DL) { + DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() + << " -> " << Dest->getNumber() << "\n"); + MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); + bool HasTerminator = Terminator != MBB->instr_end(); + if (HasTerminator) { + TII->ReplaceTailWithBranchTo(Terminator, Dest); + } + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { + TII->insertUnconditionalBranch(*MBB, Dest, DL); + } +} + +static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { + MachineBasicBlock *result = nullptr; + for (auto &MFI : MF) { + if (MFI.succ_size() == 0) { + if (result == nullptr) { + result = &MFI; + } else { + return nullptr; + } + } + } + + return result; +} + +static bool hasOneExitNode(MachineFunction &MF) { + return getSingleExitNode(MF) != nullptr; +} + +MachineBasicBlock * +AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { + auto Exit = Region->getSucc(); + + // If the exit is the end of the function, we just use the existing + MachineFunction *MF = Region->getEntry()->getParent(); + if (Exit == nullptr && hasOneExitNode(*MF)) { + return &(*(--(Region->getEntry()->getParent()->end()))); + } + + MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); + if (Exit == nullptr) { + MachineFunction::iterator ExitIter = MF->end(); + MF->insert(ExitIter, LastMerge); + } else { + MachineFunction::iterator ExitIter = Exit->getIterator(); + MF->insert(ExitIter, LastMerge); + LastMerge->addSuccessor(Exit); + insertUnconditionalBranch(LastMerge, Exit); + DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); + } + return LastMerge; +} + +void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned DestRegister, + unsigned IfSourceRegister, + unsigned CodeSourceRegister, + bool IsUndefIfSource) { + // If this is the function exit block, we don't need a phi. + if (MergeBB->succ_begin() == MergeBB->succ_end()) { + return; + } + DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() + << "): " << PrintReg(DestRegister, TRI) << " = PHI(" + << PrintReg(IfSourceRegister, TRI) << ", BB#" + << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) + << ", BB#" << CodeBB->getNumber() << ")\n"); + const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); + MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestRegister); + if (IsUndefIfSource && false) { + MIB.addReg(IfSourceRegister, RegState::Undef); + } else { + MIB.addReg(IfSourceRegister); + } + MIB.addMBB(IfBB); + MIB.addReg(CodeSourceRegister); + MIB.addMBB(CodeBB); +} + +static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { + for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), + E = MBB->succ_end(); + PI != E; ++PI) { + if ((*PI) != MBB) { + (MBB)->removeSuccessor(*PI); + } + } +} + +static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, + MachineBasicBlock *EndMBB) { + + // We have to check against the StartMBB successor becasuse a + // structurized region with a loop will have the entry block split, + // and the backedge will go to the entry successor. + DenseSet> Succs; + unsigned SuccSize = StartMBB->succ_size(); + if (SuccSize > 0) { + MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); + for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), + E = EndMBB->succ_end(); + PI != E; ++PI) { + // Either we have a back-edge to the entry block, or a back-edge to the + // succesor of the entry block since the block may be split. + if ((*PI) != StartMBB && + !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { + Succs.insert( + std::pair(EndMBB, *PI)); + } + } + } + + for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), + E = StartMBB->pred_end(); + PI != E; ++PI) { + if ((*PI) != EndMBB) { + Succs.insert( + std::pair(*PI, StartMBB)); + } + } + + for (auto SI : Succs) { + std::pair Edge = SI; + DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" + << Edge.second->getNumber() << "\n"); + Edge.first->removeSuccessor(Edge.second); + } +} + +MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( + MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, + MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, + bool InheritPreds) { + MachineFunction *MF = MergeBB->getParent(); + MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); + + if (InheritPreds) { + for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), + E = CodeBBStart->pred_end(); + PI != E; ++PI) { + if ((*PI) != CodeBBEnd) { + MachineBasicBlock *Pred = (*PI); + Pred->addSuccessor(IfBB); + } + } + } + + removeExternalCFGEdges(CodeBBStart, CodeBBEnd); + + auto CodeBBStartI = CodeBBStart->getIterator(); + auto CodeBBEndI = CodeBBEnd->getIterator(); + auto MergeIter = MergeBB->getIterator(); + MF->insert(MergeIter, IfBB); + MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); + IfBB->addSuccessor(MergeBB); + IfBB->addSuccessor(CodeBBStart); + + DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); + // Ensure that the MergeBB is a succesor of the CodeEndBB. + if (!CodeBBEnd->isSuccessor(MergeBB)) + CodeBBEnd->addSuccessor(MergeBB); + + DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" + << CodeBBEnd->getNumber() << "\n"); + + // If we have a single predecessor we can find a reasonable debug location + MachineBasicBlock *SinglePred = + CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; + const DebugLoc &DL = SinglePred + ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) + : DebugLoc(); + + unsigned Reg = + TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, + SelectBB->getNumber() /* CodeBBStart->getNumber() */); + if (&(*(IfBB->getParent()->begin())) == IfBB) { + TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, + CodeBBStart->getNumber()); + } + MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); + + return IfBB; +} + +void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( + SmallVector Cond) { + if (Cond.size() != 1) + return; + if (!Cond[0].isReg()) + return; + + unsigned CondReg = Cond[0].getReg(); + for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { + (*UI).setIsKill(false); + } +} + +void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + unsigned BBSelectReg) { + MachineBasicBlock *TrueBB = nullptr; + MachineBasicBlock *FalseBB = nullptr; + SmallVector Cond; + MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; + TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); + + const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); + + if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { + // This is an exit block, hence no successors. We will assign the + // bb select register to the entry block. + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, + CodeBB->getParent()->begin()->getNumber()); + insertUnconditionalBranch(CodeBB, MergeBB, DL); + return; + } + + if (FalseBB == nullptr && TrueBB == nullptr) { + TrueBB = FallthroughBB; + } else if (TrueBB != nullptr) { + FalseBB = + (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB; + } + + if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, TrueBB->getNumber()); + } else { + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); + unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); + unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + TrueBBReg, TrueBB->getNumber()); + TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, + FalseBBReg, FalseBB->getNumber()); + ensureCondIsNotKilled(Cond); + TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, + BBSelectReg, Cond, TrueBBReg, FalseBBReg); + } + + insertUnconditionalBranch(CodeBB, MergeBB, DL); +} + +MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { + if (MRI->def_begin(Reg) == MRI->def_end()) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has NO defs\n"); + } else if (!MRI->hasOneDef(Reg)) { + DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) + << " has multiple defs\n"); + DEBUG(dbgs() << "DEFS BEGIN:\n"); + for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { + DEBUG(DI->getParent()->dump()); + } + DEBUG(dbgs() << "DEFS END\n"); + } + + assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); + return (*(MRI->def_begin(Reg))).getParent(); +} + +void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + unsigned DestReg, + unsigned SourceReg) { + // In this function we know we are part of a chain already, so we need + // to add the registers to the existing chain, and rename the register + // inside the region. + bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); + MachineInstr *DefInstr = getDefInstr(SourceReg); + if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) { + // Handle the case where the def is a PHI-def inside a basic + // block, then we only need to do renaming. Special care needs to + // be taken if the PHI-def is part of an existing chain, or if a + // new one needs to be created. + InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); + + // We collect all PHI Information, and if we are at the region entry, + // all PHIs will be removed, and then re-introduced if needed. + storePHILinearizationInfoDest(DestReg, *DefInstr); + // We have picked up all the information we need now and can remove + // the PHI + PHIInfo.removeSource(DestReg, SourceReg, CodeBB); + DefInstr->eraseFromParent(); + } else { + // If this is not a phi-def, or it is a phi-def but from a linearized region + if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) { + // If this is a single BB and the definition is in this block we + // need to replace any uses outside the region. + InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); + } + const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); + unsigned NextDestReg = MRI->createVirtualRegister(RegClass); + bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; + DEBUG(dbgs() << "Insert Chained PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, + SourceReg, IsLastDef); + + PHIInfo.removeSource(DestReg, SourceReg, CodeBB); + if (IsLastDef) { + const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); + TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, + NextDestReg, 0); + PHIInfo.deleteDef(DestReg); + } else { + PHIInfo.replaceDef(DestReg, NextDestReg); + } + } +} + +bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, + LinearizedRegion *InnerRegion, + unsigned Register) { + return getDefInstr(Register)->getParent() == MBB || + InnerRegion->contains(getDefInstr(Register)->getParent()); +} + +void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, + MachineBasicBlock *CodeBB, + MachineBasicBlock *MergeBB, + LinearizedRegion *InnerRegion, + LinearizedRegion *LRegion) { + DenseSet *LiveOuts = InnerRegion->getLiveOuts(); + SmallVector OldLiveOuts; + bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); + for (auto OLI : *LiveOuts) { + OldLiveOuts.push_back(OLI); + } + + for (auto LI : OldLiveOuts) { + DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); + if (!containsDef(CodeBB, InnerRegion, LI) || + (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { + // If the register simly lives through the CodeBB, we don't have + // to rewrite anything since the register is not defined in this + // part of the code. + DEBUG(dbgs() << "- through"); + continue; + } + DEBUG(dbgs() << "\n"); + unsigned Reg = LI; + if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { + // If the register is live out, we do want to create a phi, + // unless it is from the Exit block, becasuse in that case there + // is already a PHI, and no need to create a new one. + + // If the register is just a live out def and not part of a phi + // chain, we need to create a PHI node to handle the if region, + // and replace all uses outside of the region with the new dest + // register, unless it is the outgoing BB select register. We have + // already creaed phi nodes for these. + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); + unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); + // Create initializer, this value is never used, but is needed + // to satisfy SSA. + DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); + TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), + IfSourceReg, 0); + + InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); + DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); + insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, + IfSourceReg, Reg, true); + } + } + + // Handle the chained definitions in PHIInfo, checking if this basic block + // is a source block for a definition. + SmallVector Sources; + if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { + DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() + << "\n"); + for (auto SI : Sources) { + unsigned DestReg; + PHIInfo.findDest(SI, CodeBB, DestReg); + insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); + } + DEBUG(dbgs() << "Insertion done.\n"); + } + + DEBUG(PHIInfo.dump(MRI)); +} + +void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Before PHI Prune\n"); + DEBUG(PHIInfo.dump(MRI)); + SmallVector, 4> + ElimiatedSources; + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + + unsigned DestReg = *DRI; + auto SE = PHIInfo.sources_end(DestReg); + + bool MBBContainsPHISource = false; + // Check if there is a PHI source in this MBB + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + unsigned SourceReg = (*SRI).first; + MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); + if (Def->getParent()->getParent() == MBB) { + MBBContainsPHISource = true; + } + } + + // If so, all other sources are useless since we know this block + // is always executed when the region is executed. + if (MBBContainsPHISource) { + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + PHILinearize::PHISourceT Source = *SRI; + unsigned SourceReg = Source.first; + MachineBasicBlock *SourceMBB = Source.second; + MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); + if (Def->getParent()->getParent() != MBB) { + ElimiatedSources.push_back( + std::make_tuple(DestReg, SourceReg, SourceMBB)); + } + } + } + } + + // Remove the PHI sources that are in the given MBB + for (auto &SourceInfo : ElimiatedSources) { + PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), + std::get<2>(SourceInfo)); + } + DEBUG(dbgs() << "After PHI Prune\n"); + DEBUG(PHIInfo.dump(MRI)); +} + +void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, + unsigned DestReg) { + MachineBasicBlock *Entry = CurrentRegion->getEntry(); + MachineBasicBlock *Exit = CurrentRegion->getExit(); + + DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() + << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n"); + + int NumSources = 0; + auto SE = PHIInfo.sources_end(DestReg); + + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + NumSources++; + } + + if (NumSources == 1) { + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = (*SRI).first; + replaceRegisterWith(DestReg, SourceReg); + } else { + const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); + MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, + TII->get(TargetOpcode::PHI), DestReg); + DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << " = PHI("); + + unsigned CurrentBackedgeReg = 0; + + for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { + unsigned SourceReg = (*SRI).first; + + if (CurrentRegion->contains((*SRI).second)) { + if (CurrentBackedgeReg == 0) { + CurrentBackedgeReg = SourceReg; + } else { + MachineInstr *PHIDefInstr = getDefInstr(SourceReg); + MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); + const TargetRegisterClass *RegClass = + MRI->getRegClass(CurrentBackedgeReg); + unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); + MachineInstrBuilder BackedgePHI = + BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, + TII->get(TargetOpcode::PHI), NewBackedgeReg); + BackedgePHI.addReg(CurrentBackedgeReg); + BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); + BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); + BackedgePHI.addMBB((*SRI).second); + CurrentBackedgeReg = NewBackedgeReg; + DEBUG(dbgs() << "Inserting backedge PHI: " + << PrintReg(NewBackedgeReg, TRI) << " = PHI(" + << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" + << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " + << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI) + << ", BB#" << (*SRI).second->getNumber()); + } + } else { + MIB.addReg(SourceReg); + MIB.addMBB((*SRI).second); + DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" + << (*SRI).second->getNumber() << ", "); + } + } + + // Add the final backedge register source to the entry phi + if (CurrentBackedgeReg != 0) { + MIB.addReg(CurrentBackedgeReg); + MIB.addMBB(Exit); + DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" + << Exit->getNumber() << ")\n"); + } else { + DEBUG(dbgs() << ")\n"); + } + } +} + +void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { + DEBUG(PHIInfo.dump(MRI)); + + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + + unsigned DestReg = *DRI; + createEntryPHI(CurrentRegion, DestReg); + } + PHIInfo.clear(); +} + +void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, + unsigned NewRegister) { + assert(Register != NewRegister && "Cannot replace a reg with itself"); + + for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), + E = MRI->reg_end(); + I != E;) { + MachineOperand &O = *I; + ++I; + if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { + DEBUG(dbgs() << "Trying to substitute physical register: " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + llvm_unreachable("Cannot substitute physical registers"); + // We don't handle physical registers, but if we need to + // in the future This is how we do it: + // O.substPhysReg(NewRegister, *TRI); + } else { + DEBUG(dbgs() << "Replacing register: " + << PrintReg(Register, MRI->getTargetRegisterInfo()) + << " with " + << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) + << "\n"); + O.setReg(NewRegister); + } + } + PHIInfo.deleteDef(Register); + + getRegionMRT()->replaceLiveOutReg(Register, NewRegister); + + DEBUG(PHIInfo.dump(MRI)); +} + +void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { + DEBUG(dbgs() << "Resolve PHI Infos\n"); + DEBUG(PHIInfo.dump(MRI)); + for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; + ++DRI) { + unsigned DestReg = *DRI; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); + auto SRI = PHIInfo.sources_begin(DestReg); + unsigned SourceReg = (*SRI).first; + DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) + << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); + + assert(PHIInfo.sources_end(DestReg) == ++SRI && + "More than one phi source in entry node"); + replaceRegisterWith(DestReg, SourceReg); + } +} + +static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { + return ((&(*(MBB->getParent()->begin()))) == MBB); +} + +MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, + LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, + unsigned BBSelectRegOut) { + if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { + // Handle non-loop function entry block. + // We need to allow loops to the entry block and then + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + resolvePHIInfos(CodeBB); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } + if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { + // Handle non-loop region entry block. + MachineFunction *MF = MergeBB->getParent(); + auto MergeIter = MergeBB->getIterator(); + auto CodeBBStartIter = CodeBB->getIterator(); + auto CodeBBEndIter = ++(CodeBB->getIterator()); + if (CodeBBEndIter != MergeIter) { + MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); + } + rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); + prunePHIInfo(CodeBB); + createEntryPHIs(CurrentRegion); + removeExternalCFGSuccessors(CodeBB); + CodeBB->addSuccessor(MergeBB); + CurrentRegion->addMBB(CodeBB); + return nullptr; + } else { + // Handle internal block. + const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); + unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); + rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); + bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, + BBSelectRegIn, IsRegionEntryBB); + CurrentRegion->addMBB(IfBB); + // If this is the entry block we need to make the If block the new + // linearized region entry. + if (IsRegionEntryBB) { + CurrentRegion->setEntry(IfBB); + + if (CurrentRegion->getHasLoop()) { + MachineBasicBlock *RegionExit = CurrentRegion->getExit(); + MachineBasicBlock *ETrueBB = nullptr; + MachineBasicBlock *EFalseBB = nullptr; + SmallVector ECond; + + const DebugLoc &DL = DebugLoc(); + TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); + TII->removeBranch(*RegionExit); + + // We need to create a backedge if there is a loop + unsigned Reg = TII->insertNE( + RegionExit, RegionExit->instr_end(), DL, + CurrentRegion->getRegionMRT()->getInnerOutputRegister(), + CurrentRegion->getRegionMRT()->getEntry()->getNumber()); + MachineOperand RegOp = + MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + DEBUG(dbgs() << "RegionExitReg: "); + DEBUG(Cond[0].print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, + Cond, DebugLoc()); + RegionExit->addSuccessor(CurrentRegion->getEntry()); + } + } + CurrentRegion->addMBB(CodeBB); + LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); + + InnerRegion.setParent(CurrentRegion); + DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); + insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + InnerRegion.addMBB(MergeBB); + + DEBUG(InnerRegion.print(dbgs(), TRI)); + rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); + extractKilledPHIs(CodeBB); + if (IsRegionEntryBB) { + createEntryPHIs(CurrentRegion); + } + return IfBB; + } +} + +MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( + MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, + LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, + unsigned BBSelectRegIn, unsigned BBSelectRegOut) { + unsigned CodeBBSelectReg = + InnerRegion->getRegionMRT()->getInnerOutputRegister(); + MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); + MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); + MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, + SelectBB, BBSelectRegIn, true); + CurrentRegion->addMBB(IfBB); + bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); + if (isEntry) { + + if (CurrentRegion->getHasLoop()) { + MachineBasicBlock *RegionExit = CurrentRegion->getExit(); + MachineBasicBlock *ETrueBB = nullptr; + MachineBasicBlock *EFalseBB = nullptr; + SmallVector ECond; + + const DebugLoc &DL = DebugLoc(); + TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); + TII->removeBranch(*RegionExit); + + // We need to create a backedge if there is a loop + unsigned Reg = + TII->insertNE(RegionExit, RegionExit->instr_end(), DL, + CurrentRegion->getRegionMRT()->getInnerOutputRegister(), + CurrentRegion->getRegionMRT()->getEntry()->getNumber()); + MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); + ArrayRef Cond(RegOp); + DEBUG(dbgs() << "RegionExitReg: "); + DEBUG(Cond[0].print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, + Cond, DebugLoc()); + RegionExit->addSuccessor(IfBB); + } + } + CurrentRegion->addMBBs(InnerRegion); + DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); + insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, + CodeBBSelectReg); + + rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, + CurrentRegion); + + rewriteRegionEntryPHIs(InnerRegion, IfBB); + + if (isEntry) { + CurrentRegion->setEntry(IfBB); + } + + if (isEntry) { + createEntryPHIs(CurrentRegion); + } + + return IfBB; +} + +void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, + MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIRegionIndices; + getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); + + assert(PHIRegionIndices.size() == 1); + + unsigned RegionIndex = PHIRegionIndices[0]; + unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); + MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); + unsigned PHIDest = getPHIDestReg(PHI); + unsigned PHISource = PHIDest; + unsigned ReplaceReg; + + if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { + PHISource = ReplaceReg; + } + + const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); + unsigned NewDestReg = MRI->createVirtualRegister(RegClass); + LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); + MachineInstrBuilder MIB = + BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), + TII->get(TargetOpcode::PHI), NewDestReg); + DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) + << " = PHI("); + MIB.addReg(PHISource); + MIB.addMBB(Entry); + DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); + MIB.addReg(RegionSourceReg); + MIB.addMBB(RegionSourceMBB); + DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" + << RegionSourceMBB->getNumber() << ")\n"); +} + +void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, + MachineBasicBlock *EntrySucc, + LinearizedRegion *LRegion) { + SmallVector PHIs; + collectPHIs(Entry, PHIs); + + for (auto PHII : PHIs) { + splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); + } +} + +// Split the exit block so that we can insert a end control flow +MachineBasicBlock * +AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { + auto MRTRegion = LRegion->getRegionMRT(); + auto Exit = LRegion->getExit(); + auto MF = Exit->getParent(); + auto Succ = MRTRegion->getSucc(); + + auto NewExit = MF->CreateMachineBasicBlock(); + auto AfterExitIter = Exit->getIterator(); + AfterExitIter++; + MF->insert(AfterExitIter, NewExit); + Exit->removeSuccessor(Succ); + Exit->addSuccessor(NewExit); + NewExit->addSuccessor(Succ); + insertUnconditionalBranch(NewExit, Succ); + LRegion->addMBB(NewExit); + LRegion->setExit(NewExit); + + DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); + + // Replace any PHI Predecessors in the successor with NewExit + for (auto &II : *Succ) { + MachineInstr &Instr = II; + + // If we are past the PHI instructions we are done + if (!Instr.isPHI()) + break; + + int numPreds = getPHINumInputs(Instr); + for (int i = 0; i < numPreds; ++i) { + auto Pred = getPHIPred(Instr, i); + if (Pred == Exit) { + setPhiPred(Instr, i, NewExit); + } + } + } + + return NewExit; +} + + +static MachineBasicBlock *split(MachineBasicBlock::iterator I) { + // Create the fall-through block. + MachineBasicBlock *MBB = (*I).getParent(); + MachineFunction *MF = MBB->getParent(); + MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); + auto MBBIter = ++(MBB->getIterator()); + MF->insert(MBBIter, SuccMBB); + SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); + MBB->addSuccessor(SuccMBB); + + // Splice the code over. + SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); + + return SuccMBB; +} + +// Split the entry block separating PHI-nodes and the rest of the code +// This is needed to insert an initializer for the bb select register +// inloop regions. + +MachineBasicBlock * +AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { + MachineBasicBlock *Entry = LRegion->getEntry(); + MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); + MachineBasicBlock *Exit = LRegion->getExit(); + + DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" + << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() + << "\n"); + LRegion->addMBB(EntrySucc); + + // Make the backedge go to Entry Succ + if (Exit->isSuccessor(Entry)) { + Exit->removeSuccessor(Entry); + } + Exit->addSuccessor(EntrySucc); + MachineInstr &Branch = *(Exit->instr_rbegin()); + for (auto &UI : Branch.uses()) { + if (UI.isMBB() && UI.getMBB() == Entry) { + UI.setMBB(EntrySucc); + } + } + + splitLoopPHIs(Entry, EntrySucc, LRegion); + + return EntrySucc; +} + +LinearizedRegion * +AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { + LinearizedRegion *LRegion = Region->getLinearizedRegion(); + LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); + LRegion->setEntry(Region->getEntry()); + return LRegion; +} + +static void removeOldExitPreds(RegionMRT *Region) { + MachineBasicBlock *Exit = Region->getSucc(); + if (Exit == nullptr) { + return; + } + for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), + E = Exit->pred_end(); + PI != E; ++PI) { + if (Region->contains(*PI)) { + (*PI)->removeSuccessor(Exit); + } + } +} + +static bool mbbHasBackEdge(MachineBasicBlock *MBB, + SmallPtrSet &MBBs) { + for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { + if (MBBs.count(*SI) != 0) { + return true; + } + } + return false; +} + +static bool containsNewBackedge(MRT *Tree, + SmallPtrSet &MBBs) { + // Need to traverse this in reverse since it is in post order. + if (Tree == nullptr) + return false; + + if (Tree->isMBB()) { + MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); + MBBs.insert(MBB); + if (mbbHasBackEdge(MBB, MBBs)) { + return true; + } + } else { + RegionMRT *Region = Tree->getRegionMRT(); + SetVector *Children = Region->getChildren(); + for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { + if (containsNewBackedge(*CI, MBBs)) + return true; + } + } + return false; +} + +static bool containsNewBackedge(RegionMRT *Region) { + SmallPtrSet MBBs; + return containsNewBackedge(Region, MBBs); +} + +bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { + auto *LRegion = initLinearizedRegion(Region); + LRegion->setHasLoop(containsNewBackedge(Region)); + MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); + MachineBasicBlock *CurrentMerge = LastMerge; + LRegion->addMBB(LastMerge); + LRegion->setExit(LastMerge); + + rewriteRegionExitPHIs(Region, LastMerge, LRegion); + removeOldExitPreds(Region); + + DEBUG(PHIInfo.dump(MRI)); + + SetVector *Children = Region->getChildren(); + DEBUG(dbgs() << "===========If Region Start===============\n"); + if (LRegion->getHasLoop()) { + DEBUG(dbgs() << "Has Backedge: Yes\n"); + } else { + DEBUG(dbgs() << "Has Backedge: No\n"); + } + + unsigned BBSelectRegIn; + unsigned BBSelectRegOut; + for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { + DEBUG(dbgs() << "CurrentRegion: \n"); + DEBUG(LRegion->print(dbgs(), TRI)); + + auto CNI = CI; + ++CNI; + + MRT *Child = (*CI); + + if (Child->isRegion()) { + + LinearizedRegion *InnerLRegion = + Child->getRegionMRT()->getLinearizedRegion(); + // We found the block is the exit of an inner region, we need + // to put it in the current linearized region. + + DEBUG(dbgs() << "Linearizing region: "); + DEBUG(InnerLRegion->print(dbgs(), TRI)); + DEBUG(dbgs() << "\n"); + + MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); + if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { + // Entry has already been linearized, no need to do this region. + unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); + unsigned InnerSelectReg = + InnerLRegion->getRegionMRT()->getInnerOutputRegister(); + replaceRegisterWith(InnerSelectReg, OuterSelect), + resolvePHIInfos(InnerEntry); + if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) + InnerLRegion->getExit()->addSuccessor(CurrentMerge); + continue; + } + + BBSelectRegOut = Child->getBBSelectRegOut(); + BBSelectRegIn = Child->getBBSelectRegIn(); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) + << "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) + << "\n"); + + MachineBasicBlock *IfEnd = CurrentMerge; + CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, + Child->getRegionMRT()->getEntry(), + BBSelectRegIn, BBSelectRegOut); + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } else { + MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); + DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); + + if (MBB == getSingleExitNode(*(MBB->getParent()))) { + // If this is the exit block then we need to skip to the next. + // The "in" register will be transferred to "out" in the next + // iteration. + continue; + } + + BBSelectRegOut = Child->getBBSelectRegOut(); + BBSelectRegIn = Child->getBBSelectRegIn(); + + DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) + << "\n"); + DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) + << "\n"); + + MachineBasicBlock *IfEnd = CurrentMerge; + // This is a basic block that is not part of an inner region, we + // need to put it in the current linearized region. + CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, + BBSelectRegOut); + if (CurrentMerge) { + TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); + } + + DEBUG(PHIInfo.dump(MRI)); + } + } + + LRegion->removeFalseRegisterKills(MRI); + + if (LRegion->getHasLoop()) { + MachineBasicBlock *NewSucc = splitEntry(LRegion); + if (isFunctionEntryBlock(LRegion->getEntry())) { + resolvePHIInfos(LRegion->getEntry()); + } + const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); + unsigned InReg = LRegion->getBBSelectRegIn(); + unsigned InnerSelectReg = + MRI->createVirtualRegister(MRI->getRegClass(InReg)); + unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); + TII->materializeImmediate(*(LRegion->getEntry()), + LRegion->getEntry()->getFirstTerminator(), DL, + NewInReg, Region->getEntry()->getNumber()); + // Need to be careful about updating the registers inside the region. + LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); + DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); + insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, + InnerSelectReg, NewInReg, + LRegion->getRegionMRT()->getInnerOutputRegister()); + splitExit(LRegion); + TII->convertNonUniformLoopRegion(NewSucc, LastMerge); + } + + if (Region->isRoot()) { + TII->insertReturn(*LastMerge); + } + + DEBUG(Region->getEntry()->getParent()->dump()); + DEBUG(LRegion->print(dbgs(), TRI)); + DEBUG(PHIInfo.dump(MRI)); + + DEBUG(dbgs() << "===========If Region End===============\n"); + + Region->setLinearizedRegion(LRegion); + return true; +} + +bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { + if (false && regionIsSimpleIf(Region)) { + transformSimpleIfRegion(Region); + return true; + } else if (regionIsSequence(Region)) { + fixupRegionExits(Region); + return false; + } else { + structurizeComplexRegion(Region); + } + return false; +} + +static int structurize_once = 0; + +bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, + bool isTopRegion) { + bool Changed = false; + + auto Children = Region->getChildren(); + for (auto CI : *Children) { + if (CI->isRegion()) { + Changed |= structurizeRegions(CI->getRegionMRT(), false); + } + } + + if (structurize_once < 2 || true) { + Changed |= structurizeRegion(Region); + structurize_once++; + } + return Changed; +} + +void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { + DEBUG(dbgs() << "Fallthrough Map:\n"); + for (auto &MBBI : MF) { + MachineBasicBlock *MBB = MBBI.getFallThrough(); + if (MBB != nullptr) { + DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " + << MBB->getNumber() << "\n"); + } + FallthroughMap[&MBBI] = MBB; + } +} + +void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, + unsigned SelectOut) { + LinearizedRegion *LRegion = new LinearizedRegion(); + if (SelectOut) { + LRegion->addLiveOut(SelectOut); + DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI) + << "\n"); + } + LRegion->setRegionMRT(Region); + Region->setLinearizedRegion(LRegion); + LRegion->setParent(Region->getParent() + ? Region->getParent()->getLinearizedRegion() + : nullptr); +} + +unsigned +AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, + MachineRegisterInfo *MRI, + const SIInstrInfo *TII) { + if (MRT->isRegion()) { + RegionMRT *Region = MRT->getRegionMRT(); + Region->setBBSelectRegOut(SelectOut); + unsigned InnerSelectOut = createBBSelectReg(TII, MRI); + + // Fixme: Move linearization creation to the original spot + createLinearizedRegion(Region, SelectOut); + + for (auto CI = Region->getChildren()->begin(), + CE = Region->getChildren()->end(); + CI != CE; ++CI) { + InnerSelectOut = + initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); + } + MRT->setBBSelectRegIn(InnerSelectOut); + return InnerSelectOut; + } else { + MRT->setBBSelectRegOut(SelectOut); + unsigned NewSelectIn = createBBSelectReg(TII, MRI); + MRT->setBBSelectRegIn(NewSelectIn); + return NewSelectIn; + } +} + +static void checkRegOnlyPHIInputs(MachineFunction &MF) { + for (auto &MBBI : MF) { + for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), + E = MBBI.instr_end(); + I != E; ++I) { + MachineInstr &Instr = *I; + if (Instr.isPHI()) { + int numPreds = getPHINumInputs(Instr); + for (int i = 0; i < numPreds; ++i) { + assert(Instr.getOperand(i * 2 + 1).isReg() && + "PHI Operand not a register"); + } + } + } + } +} + + +INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", + "AMDGPU Machine CFG Structurizer", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) +INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", + "AMDGPU Machine CFG Structurizer", false, false) + +char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; + + +bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { + const SISubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &(MF.getRegInfo()); + initFallthroughMap(MF); + + checkRegOnlyPHIInputs(MF); + DEBUG(dbgs() << "----STRUCTURIZER START----\n"); + DEBUG(MF.dump()); + + Regions = &(getAnalysis().getRegionInfo()); + DEBUG(Regions->dump()); + + RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); + setRegionMRT(RTree); + initializeSelectRegisters(RTree, 0, MRI, TII); + DEBUG(RTree->dump(TRI)); + bool result = structurizeRegions(RTree, true); + delete RTree; + DEBUG(dbgs() << "----STRUCTURIZER END----\n"); + initFallthroughMap(MF); + return result; +} + +FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { + return new AMDGPUMachineCFGStructurizer(); +} diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp index 27fe639e3d4bb208e61d85633afd17ed5bcf332e..9fb7f5f8892712b85641e6f29cecdac0be753460 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -12,20 +12,6 @@ using namespace llvm; -static bool isEntryFunctionCC(CallingConv::ID CC) { - switch (CC) { - case CallingConv::AMDGPU_KERNEL: - case CallingConv::SPIR_KERNEL: - case CallingConv::AMDGPU_VS: - case CallingConv::AMDGPU_GS: - case CallingConv::AMDGPU_PS: - case CallingConv::AMDGPU_CS: - return true; - default: - return false; - } -} - AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MachineFunctionInfo(), LocalMemoryObjects(), @@ -33,7 +19,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : MaxKernArgAlign(0), LDSSize(0), ABIArgOffset(0), - IsEntryFunction(isEntryFunctionCC(MF.getFunction()->getCallingConv())), + IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction()->getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) { // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, // except reserved size is not correctly aligned. diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h index 8bfeb67ad4ecdcb040df0182ead2f26986ef93c1..99bb61b21db06ea2d44c26257b62aab970155273 100644 --- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -10,8 +10,8 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunction.h" namespace llvm { diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 96bc53d06cd9531b785b41eac7b06a13a0c2e5bd..625c9b77e2dec1bf7ea371ef2027e2ee4a33a399 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -32,13 +33,12 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -71,7 +71,6 @@ private: const TargetMachine *TM; Module *Mod = nullptr; const DataLayout *DL = nullptr; - MDNode *MaxWorkGroupSizeRange = nullptr; AMDGPUAS AS; // FIXME: This should be per-kernel. @@ -98,18 +97,20 @@ private: Instruction *UseInst, int OpIdx0, int OpIdx1) const; + /// Check whether we have enough local memory for promotion. + bool hasSufficientLocalMem(const Function &F); + public: static char ID; - AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) : - FunctionPass(ID), TM(TM_) {} + AMDGPUPromoteAlloca() : FunctionPass(ID) {} bool doInitialization(Module &M) override; bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "AMDGPU Promote Alloca"; } - void handleAlloca(AllocaInst &I); + bool handleAlloca(AllocaInst &I, bool SufficientLDS); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -121,143 +122,56 @@ public: char AMDGPUPromoteAlloca::ID = 0; -INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE, - "AMDGPU promote alloca to vector or LDS", false, false) +INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE, + "AMDGPU promote alloca to vector or LDS", false, false) char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID; bool AMDGPUPromoteAlloca::doInitialization(Module &M) { - if (!TM) - return false; - Mod = &M; DL = &Mod->getDataLayout(); - // The maximum workitem id. - // - // FIXME: Should get as subtarget property. Usually runtime enforced max is - // 256. - MDBuilder MDB(Mod->getContext()); - MaxWorkGroupSizeRange = MDB.createRange(APInt(32, 0), APInt(32, 2048)); - - const Triple &TT = TM->getTargetTriple(); - - IsAMDGCN = TT.getArch() == Triple::amdgcn; - IsAMDHSA = TT.getOS() == Triple::AMDHSA; - return false; } bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { - if (!TM || skipFunction(F)) + if (skipFunction(F)) return false; - const AMDGPUSubtarget &ST = TM->getSubtarget(F); - if (!ST.isPromoteAllocaEnabled()) + if (auto *TPC = getAnalysisIfAvailable()) + TM = &TPC->getTM(); + else return false; - AS = AMDGPU::getAMDGPUAS(*F.getParent()); - - FunctionType *FTy = F.getFunctionType(); - // If the function has any arguments in the local address space, then it's - // possible these arguments require the entire local memory space, so - // we cannot use local memory in the pass. - for (Type *ParamTy : FTy->params()) { - PointerType *PtrTy = dyn_cast(ParamTy); - if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { - LocalMemLimit = 0; - DEBUG(dbgs() << "Function has local memory argument. Promoting to " - "local memory disabled.\n"); - return false; - } - } - - LocalMemLimit = ST.getLocalMemorySize(); - if (LocalMemLimit == 0) - return false; - - const DataLayout &DL = Mod->getDataLayout(); - - // Check how much local memory is being used by global objects - CurrentLocalMemUsage = 0; - for (GlobalVariable &GV : Mod->globals()) { - if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) - continue; - - for (const User *U : GV.users()) { - const Instruction *Use = dyn_cast(U); - if (!Use) - continue; - - if (Use->getParent()->getParent() == &F) { - unsigned Align = GV.getAlignment(); - if (Align == 0) - Align = DL.getABITypeAlignment(GV.getValueType()); - - // FIXME: Try to account for padding here. The padding is currently - // determined from the inverse order of uses in the function. I'm not - // sure if the use list order is in any way connected to this, so the - // total reported size is likely incorrect. - uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); - CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align); - CurrentLocalMemUsage += AllocSize; - break; - } - } - } - - unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, - F); - - // Restrict local memory usage so that we don't drastically reduce occupancy, - // unless it is already significantly reduced. - - // TODO: Have some sort of hint or other heuristics to guess occupancy based - // on other factors.. - unsigned OccupancyHint = ST.getWavesPerEU(F).second; - if (OccupancyHint == 0) - OccupancyHint = 7; - - // Clamp to max value. - OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); - - // Check the hint but ignore it if it's obviously wrong from the existing LDS - // usage. - MaxOccupancy = std::min(OccupancyHint, MaxOccupancy); - - - // Round up to the next tier of usage. - unsigned MaxSizeWithWaveCount - = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F); + const Triple &TT = TM->getTargetTriple(); + IsAMDGCN = TT.getArch() == Triple::amdgcn; + IsAMDHSA = TT.getOS() == Triple::AMDHSA; - // Program is possibly broken by using more local mem than available. - if (CurrentLocalMemUsage > MaxSizeWithWaveCount) + const AMDGPUSubtarget &ST = TM->getSubtarget(F); + if (!ST.isPromoteAllocaEnabled()) return false; - LocalMemLimit = MaxSizeWithWaveCount; - - DEBUG( - dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" - << " Rounding size to " << MaxSizeWithWaveCount - << " with a maximum occupancy of " << MaxOccupancy << '\n' - << " and " << (LocalMemLimit - CurrentLocalMemUsage) - << " available for promotion\n" - ); + AS = AMDGPU::getAMDGPUAS(*F.getParent()); + bool SufficientLDS = hasSufficientLocalMem(F); + bool Changed = false; BasicBlock &EntryBB = *F.begin(); for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) { AllocaInst *AI = dyn_cast(I); ++I; if (AI) - handleAlloca(*AI); + Changed |= handleAlloca(*AI, SufficientLDS); } - return true; + return Changed; } std::pair AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { + const AMDGPUSubtarget &ST = TM->getSubtarget( + *Builder.GetInsertBlock()->getParent()); + if (!IsAMDHSA) { Function *LocalSizeYFn = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y); @@ -267,8 +181,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {}); CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {}); - LocalSizeY->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); - LocalSizeZ->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + ST.makeLIDRangeMetadata(LocalSizeY); + ST.makeLIDRangeMetadata(LocalSizeZ); return std::make_pair(LocalSizeY, LocalSizeZ); } @@ -333,7 +247,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { MDNode *MD = MDNode::get(Mod->getContext(), None); LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD); LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD); - LoadZU->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + ST.makeLIDRangeMetadata(LoadZU); // Extract y component. Upper half of LoadZU should be zero already. Value *Y = Builder.CreateLShr(LoadXY, 16); @@ -342,6 +256,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { } Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) { + const AMDGPUSubtarget &ST = TM->getSubtarget( + *Builder.GetInsertBlock()->getParent()); Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic; switch (N) { @@ -364,7 +280,7 @@ Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) { Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID); CallInst *CI = Builder.CreateCall(WorkitemIdFn); - CI->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + ST.makeLIDRangeMetadata(CI); return CI; } @@ -401,14 +317,19 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { // instructions. static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { - case Instruction::Load: + case Instruction::Load: { + LoadInst *LI = cast(Inst); + // Currently only handle the case where the Pointer Operand is a GEP so check for that case. + return isa(LI->getPointerOperand()) && !LI->isVolatile(); + } case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; case Instruction::Store: { - // Must be the stored pointer operand, not a stored value. + // Must be the stored pointer operand, not a stored value, plus + // since it should be canonical form, the User should be a GEP. StoreInst *SI = cast(Inst); - return SI->getPointerOperand() == User; + return (SI->getPointerOperand() == User) && isa(User) && !SI->isVolatile(); } default: return false; @@ -422,8 +343,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { // FIXME: There is no reason why we can't support larger arrays, we // are just being conservative for now. + // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these + // could also be promoted but we don't currently handle this case if (!AllocaTy || AllocaTy->getElementType()->isVectorTy() || + AllocaTy->getElementType()->isArrayTy() || AllocaTy->getNumElements() > 4 || AllocaTy->getNumElements() < 2) { DEBUG(dbgs() << " Cannot convert type to vector\n"); @@ -471,7 +395,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { switch (Inst->getOpcode()) { case Instruction::Load: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(0); + Value *Ptr = cast(Inst)->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); @@ -484,12 +408,13 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { case Instruction::Store: { Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); - Value *Ptr = Inst->getOperand(1); + StoreInst *SI = cast(Inst); + Value *Ptr = SI->getPointerOperand(); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy); Value *VecValue = Builder.CreateLoad(BitCast); Value *NewVecValue = Builder.CreateInsertElement(VecValue, - Inst->getOperand(0), + SI->getValueOperand(), Index); Builder.CreateStore(NewVecValue, BitCast); Inst->eraseFromParent(); @@ -661,12 +586,105 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes( return true; } +bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { + + FunctionType *FTy = F.getFunctionType(); + const AMDGPUSubtarget &ST = TM->getSubtarget(F); + + // If the function has any arguments in the local address space, then it's + // possible these arguments require the entire local memory space, so + // we cannot use local memory in the pass. + for (Type *ParamTy : FTy->params()) { + PointerType *PtrTy = dyn_cast(ParamTy); + if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { + LocalMemLimit = 0; + DEBUG(dbgs() << "Function has local memory argument. Promoting to " + "local memory disabled.\n"); + return false; + } + } + + LocalMemLimit = ST.getLocalMemorySize(); + if (LocalMemLimit == 0) + return false; + + const DataLayout &DL = Mod->getDataLayout(); + + // Check how much local memory is being used by global objects + CurrentLocalMemUsage = 0; + for (GlobalVariable &GV : Mod->globals()) { + if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) + continue; + + for (const User *U : GV.users()) { + const Instruction *Use = dyn_cast(U); + if (!Use) + continue; + + if (Use->getParent()->getParent() == &F) { + unsigned Align = GV.getAlignment(); + if (Align == 0) + Align = DL.getABITypeAlignment(GV.getValueType()); + + // FIXME: Try to account for padding here. The padding is currently + // determined from the inverse order of uses in the function. I'm not + // sure if the use list order is in any way connected to this, so the + // total reported size is likely incorrect. + uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); + CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align); + CurrentLocalMemUsage += AllocSize; + break; + } + } + } + + unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, + F); + + // Restrict local memory usage so that we don't drastically reduce occupancy, + // unless it is already significantly reduced. + + // TODO: Have some sort of hint or other heuristics to guess occupancy based + // on other factors.. + unsigned OccupancyHint = ST.getWavesPerEU(F).second; + if (OccupancyHint == 0) + OccupancyHint = 7; + + // Clamp to max value. + OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); + + // Check the hint but ignore it if it's obviously wrong from the existing LDS + // usage. + MaxOccupancy = std::min(OccupancyHint, MaxOccupancy); + + + // Round up to the next tier of usage. + unsigned MaxSizeWithWaveCount + = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F); + + // Program is possibly broken by using more local mem than available. + if (CurrentLocalMemUsage > MaxSizeWithWaveCount) + return false; + + LocalMemLimit = MaxSizeWithWaveCount; + + DEBUG( + dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" + << " Rounding size to " << MaxSizeWithWaveCount + << " with a maximum occupancy of " << MaxOccupancy << '\n' + << " and " << (LocalMemLimit - CurrentLocalMemUsage) + << " available for promotion\n" + ); + + return true; +} + // FIXME: Should try to pick the most likely to be profitable allocas first. -void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { +bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) - return; + return false; IRBuilder<> Builder(&I); @@ -675,23 +693,30 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I, AS)) { - DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); - return; - } + if (tryPromoteAllocaToVector(&I, AS)) + return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); + CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. - if (AMDGPU::isShader(ContainingFunction.getCallingConv())) - return; + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + break; + default: + DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); + return false; + } + + // Not likely to have sufficient local memory for promotion. + if (!SufficientLDS) + return false; const AMDGPUSubtarget &ST = TM->getSubtarget(ContainingFunction); - // FIXME: We should also try to get this value from the reqd_work_group_size - // function attribute if it is available. unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); @@ -713,7 +738,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); - return; + return false; } CurrentLocalMemUsage = NewSize; @@ -722,7 +747,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); - return; + return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); @@ -868,8 +893,9 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } + return true; } -FunctionPass *llvm::createAMDGPUPromoteAlloca(const TargetMachine *TM) { - return new AMDGPUPromoteAlloca(TM); +FunctionPass *llvm::createAMDGPUPromoteAlloca() { + return new AMDGPUPromoteAlloca(); } diff --git a/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..36d88f52910d59e05230eba475519bfa2dfa02a0 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp @@ -0,0 +1,353 @@ +//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===// + +#ifdef AMDGPU_REG_ASM_NAMES + +static const char *const VGPR32RegNames[] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", + "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", + "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", + "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", + "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", + "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", + "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", + "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", + "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", + "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", + "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", + "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", + "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", + "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", + "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", + "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", + "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", + "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", + "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", + "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", + "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", + "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", + "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", + "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", + "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", + "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", + "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", + "v252", "v253", "v254", "v255" +}; + +static const char *const SGPR32RegNames[] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", + "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", + "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", + "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", + "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", + "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", + "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", + "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", + "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", + "s100", "s101", "s102", "s103" +}; + +static const char *const VGPR64RegNames[] = { + "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]", + "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]", + "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]", + "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]", + "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]", + "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]", + "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]", + "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]", + "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]", + "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]", + "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]", + "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]", + "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]", + "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]", + "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]", + "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]", + "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]", + "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]", + "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]", + "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]", + "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]", + "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]", + "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]", + "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]", + "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]", + "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]", + "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]", + "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]", + "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]", + "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]", + "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]", + "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]", + "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]", + "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]", + "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]", + "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]", + "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]", + "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]", + "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]", + "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]", + "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]", + "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]", + "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]", + "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]", + "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]", + "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]", + "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]", + "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]", + "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]", + "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]", + "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]" +}; + +static const char *const VGPR96RegNames[] = { + "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]", + "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]", + "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]", + "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]", + "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]", + "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]", + "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]", + "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]", + "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]", + "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]", + "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]", + "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]", + "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]", + "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]", + "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]", + "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]", + "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]", + "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]", + "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]", + "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]", + "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]", + "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]", + "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]", + "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]", + "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]", + "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]", + "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]", + "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]", + "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]", + "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]", + "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]", + "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]", + "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]", + "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]", + "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]", + "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]", + "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]", + "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]", + "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]", + "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]", + "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]", + "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]", + "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]", + "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]", + "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]", + "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]", + "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]", + "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]", + "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]", + "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]", + "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]" +}; + +static const char *const VGPR128RegNames[] = { + "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]", + "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]", + "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]", + "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]", + "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]", + "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]", + "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]", + "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]", + "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]", + "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]", + "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]", + "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]", + "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]", + "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]", + "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]", + "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]", + "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]", + "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]", + "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]", + "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]", + "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]", + "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]", + "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]", + "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]", + "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]", + "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]", + "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]", + "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]", + "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]", + "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]", + "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]", + "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]", + "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]", + "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]", + "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]", + "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]", + "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]", + "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]", + "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]", + "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]", + "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]", + "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]", + "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]", + "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]", + "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]", + "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]", + "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]", + "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]", + "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]", + "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]", + "v[250:253]", "v[251:254]", "v[252:255]" +}; + +static const char *const VGPR256RegNames[] = { + "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]", + "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]", + "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]", + "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]", + "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]", + "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]", + "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]", + "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]", + "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]", + "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]", + "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]", + "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]", + "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]", + "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]", + "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]", + "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]", + "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]", + "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]", + "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]", + "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]", + "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]", + "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]", + "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]", + "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]", + "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]", + "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]", + "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]", + "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]", + "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]", + "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]", + "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]", + "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]", + "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]", + "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]", + "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]", + "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]", + "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]", + "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]", + "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]", + "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]", + "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]", + "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]", + "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]", + "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]", + "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]", + "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]", + "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]", + "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]", + "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]", + "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]" +}; + +static const char *const VGPR512RegNames[] = { + "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]", + "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]", + "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]", + "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]", + "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]", + "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]", + "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]", + "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]", + "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]", + "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]", + "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]", + "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]", + "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]", + "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]", + "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]", + "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]", + "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]", + "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]", + "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]", + "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]", + "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]", + "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]", + "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]", + "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]", + "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]", + "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]", + "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]", + "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]", + "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]", + "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]", + "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]", + "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]", + "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]", + "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]", + "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]", + "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]", + "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]", + "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]", + "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]", + "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]", + "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]", + "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]", + "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]", + "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]", + "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]", + "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]", + "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]", + "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]", + "v[240:255]" +}; + +static const char *const SGPR64RegNames[] = { + "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]", + "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]", + "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]", + "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]", + "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]", + "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", + "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", + "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", + "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]" +}; + +static const char *const SGPR128RegNames[] = { + "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]", + "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]", + "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]", + "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]", + "s[96:99]", "s[100:103]" +}; + +static const char *const SGPR256RegNames[] = { + "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]", + "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]", + "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]", + "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]", + "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]" +}; + +static const char *const SGPR512RegNames[] = { + "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]", + "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]", + "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]", + "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" +}; + +#endif diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index a5edc0c3b9377a0293c3def2e579379a760764f4..623b2c88ab8f74898db7c9870b4b182364a0e755 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -82,25 +82,28 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: { // FIXME: Should we be hard coding the size for these mappings? - InstructionMapping SSMapping(1, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(SSMapping)); - - InstructionMapping VVMapping(2, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(VVMapping)); + const InstructionMapping &SSMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.push_back(&SSMapping); + + const InstructionMapping &VVMapping = getInstructionMapping( + 2, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.push_back(&VVMapping); // FIXME: Should this be the pointer-size (64-bits) or the size of the // register that will hold the bufffer resourc (128-bits). - InstructionMapping VSMapping(3, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(VSMapping)); + const InstructionMapping &VSMapping = getInstructionMapping( + 3, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.push_back(&VSMapping); return AltMappings; @@ -124,13 +127,11 @@ static bool isInstrUniform(const MachineInstr &MI) { return AMDGPU::isUniformMMO(MMO); } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - RegisterBankInfo::InstructionMapping Mapping = - InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; SmallVector OpdsMapping(MI.getNumOperands()); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); @@ -150,32 +151,34 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { OpdsMapping[0] = ValMapping; OpdsMapping[1] = PtrMapping; - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping( + 1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); return Mapping; // FIXME: Do we want to add a mapping for FLAT load, or should we just // handle that during instruction selection? } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; SmallVector OpdsMapping(MI.getNumOperands()); + bool IsComplete = true; switch (MI.getOpcode()) { - default: break; + default: + IsComplete = false; + break; case AMDGPU::G_CONSTANT: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + break; } case AMDGPU::G_GEP: { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -185,8 +188,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits(); OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); } - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + break; } case AMDGPU::G_STORE: { assert(MI.getOperand(0).isReg()); @@ -203,28 +205,27 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = ValMapping; OpdsMapping[1] = PtrMapping; - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + break; } case AMDGPU::G_LOAD: return getInstrMappingForLoad(MI); } - unsigned BankID = AMDGPU::SGPRRegBankID; - - Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; - unsigned Size = 0; - for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) { - // If the operand is not a register default to the size of the previous - // operand. - // FIXME: Can't we pull the types from the MachineInstr rather than the - // operands. - if (MI.getOperand(Idx).isReg()) - Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI); - OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size)); + if (!IsComplete) { + unsigned BankID = AMDGPU::SGPRRegBankID; + + unsigned Size = 0; + for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) { + // If the operand is not a register default to the size of the previous + // operand. + // FIXME: Can't we pull the types from the MachineInstr rather than the + // operands. + if (MI.getOperand(Idx).isReg()) + Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI); + OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size)); + } } - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - - return Mapping; + return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), + MI.getNumOperands()); } diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index f13bde87ef2d21dac61d33eadcea13d0f48ab6ae..201fdc1974c684fb11e3c9cb3b04421a940ee819 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -36,7 +36,6 @@ protected: #define GET_TARGET_REGBANK_CLASS #include "AMDGPUGenRegisterBank.inc" - }; class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { const SIRegisterInfo *TRI; @@ -44,7 +43,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; - RegisterBankInfo::InstructionMapping + const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const; public: @@ -59,7 +58,8 @@ public: InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override; - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; }; } // End llvm namespace. #endif diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 941f2d8a468a8732f62a562de2ce9ddde5a3caf5..ff58aa5741a1a4ca150b74950f73c4311a969121 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -14,6 +14,7 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUTargetMachine.h" +#include "SIRegisterInfo.h" using namespace llvm; @@ -24,18 +25,6 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} // they are not supported at this time. //===----------------------------------------------------------------------===// -// Dummy to not crash RegisterClassInfo. -static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister; - -const MCPhysReg *AMDGPURegisterInfo::getCalleeSavedRegs( - const MachineFunction *) const { - return &CalleeSavedReg; -} - -unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { - return AMDGPU::NoRegister; -} - unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { static const unsigned SubRegs[] = { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4, @@ -50,3 +39,34 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { #define GET_REGINFO_TARGET_DESC #include "AMDGPUGenRegisterInfo.inc" + +// Forced to be here by one .inc +const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( + const MachineFunction *MF) const { + CallingConv::ID CC = MF->getFunction()->getCallingConv(); + switch (CC) { + case CallingConv::C: + case CallingConv::Fast: + return CSR_AMDGPU_HighRegs_SaveList; + default: { + // Dummy to not crash RegisterClassInfo. + static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister; + return &NoCalleeSavedReg; + } + } +} + +const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + switch (CC) { + case CallingConv::C: + case CallingConv::Fast: + return CSR_AMDGPU_HighRegs_RegMask; + default: + return nullptr; + } +} + +unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return AMDGPU::NoRegister; +} diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h index 22b1663821d96840ed86b1363f1535f8e261e575..d8604d2590f1f76c8d913033291f6cce9d253a1e 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -30,9 +30,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { /// \returns the sub reg enum value for the given \p Channel /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) unsigned getSubRegFromChannel(unsigned Channel) const; - - const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override; - unsigned getFrameRegister(const MachineFunction &MF) const override; }; } // End namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 695d51a5353264d810a78078a8c7c7ff507b10ef..8d157e2f98f2470210a5876c68110a2b9c0824d8 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -16,6 +16,7 @@ #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Target/TargetFrameLowering.h" #include @@ -90,6 +91,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FPExceptions(false), DX10Clamp(false), FlatForGlobal(false), + AutoWaitcntBeforeBarrier(false), UnalignedScratchAccess(false), UnalignedBufferAccess(false), @@ -124,6 +126,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasSDWA(false), HasDPP(false), FlatAddressSpace(false), + FlatInstOffsets(false), + FlatGlobalInsts(false), + FlatScratchInsts(false), R600ALUInst(false), CaymanISA(false), @@ -240,6 +245,65 @@ std::pair AMDGPUSubtarget::getWavesPerEU( return Requested; } +bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { + Function *Kernel = I->getParent()->getParent(); + unsigned MinSize = 0; + unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; + bool IdQuery = false; + + // If reqd_work_group_size is present it narrows value down. + if (auto *CI = dyn_cast(I)) { + const Function *F = CI->getCalledFunction(); + if (F) { + unsigned Dim = UINT_MAX; + switch (F->getIntrinsicID()) { + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::r600_read_tidig_x: + IdQuery = true; + case Intrinsic::r600_read_local_size_x: + Dim = 0; + break; + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + IdQuery = true; + case Intrinsic::r600_read_local_size_y: + Dim = 1; + break; + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + IdQuery = true; + case Intrinsic::r600_read_local_size_z: + Dim = 2; + break; + default: + break; + } + if (Dim <= 3) { + if (auto Node = Kernel->getMetadata("reqd_work_group_size")) + if (Node->getNumOperands() == 3) + MinSize = MaxSize = mdconst::extract( + Node->getOperand(Dim))->getZExtValue(); + } + } + } + + if (!MaxSize) + return false; + + // Range metadata is [Lo, Hi). For ID query we need to pass max size + // as Hi. For size query we need to pass Hi + 1. + if (IdQuery) + MinSize = 0; + else + ++MaxSize; + + MDBuilder MDB(I->getContext()); + MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), + APInt(32, MaxSize)); + I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange); + return true; +} + R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : AMDGPUSubtarget(TT, GPU, FS, TM), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index c61a2ff818fc13c3623a978de09efbcc5e61c812..5f4f20316a6ba8e1e7d7bbda3e097ac88a755291 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -16,12 +16,12 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H #include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "R600ISelLowering.h" #include "R600FrameLowering.h" -#include "SIInstrInfo.h" -#include "SIISelLowering.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" #include "SIFrameLowering.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/Triple.h" @@ -57,9 +57,12 @@ public: enum { ISAVersion0_0_0, + ISAVersion6_0_0, + ISAVersion6_0_1, ISAVersion7_0_0, ISAVersion7_0_1, ISAVersion7_0_2, + ISAVersion7_0_3, ISAVersion8_0_0, ISAVersion8_0_1, ISAVersion8_0_2, @@ -67,7 +70,9 @@ public: ISAVersion8_0_4, ISAVersion8_1_0, ISAVersion9_0_0, - ISAVersion9_0_1 + ISAVersion9_0_1, + ISAVersion9_0_2, + ISAVersion9_0_3 }; enum TrapHandlerAbi { @@ -110,6 +115,7 @@ protected: bool FPExceptions; bool DX10Clamp; bool FlatForGlobal; + bool AutoWaitcntBeforeBarrier; bool UnalignedScratchAccess; bool UnalignedBufferAccess; bool HasApertureRegs; @@ -145,6 +151,9 @@ protected: bool HasSDWA; bool HasDPP; bool FlatAddressSpace; + bool FlatInstOffsets; + bool FlatGlobalInsts; + bool FlatScratchInsts; bool R600ALUInst; bool CaymanISA; bool CFALUBug; @@ -192,7 +201,8 @@ public: } bool isOpenCLEnv() const { - return TargetTriple.getEnvironment() == Triple::OpenCL; + return TargetTriple.getEnvironment() == Triple::OpenCL || + TargetTriple.getEnvironmentName() == "amdgizcl"; } Generation getGeneration() const { @@ -286,6 +296,10 @@ public: return getGeneration() >= GFX9; } + bool hasMin3Max3_16() const { + return getGeneration() >= GFX9; + } + bool hasCARRY() const { return (getGeneration() >= EVERGREEN); } @@ -356,6 +370,10 @@ public: return FlatForGlobal; } + bool hasAutoWaitcntBeforeBarrier() const { + return AutoWaitcntBeforeBarrier; + } + bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; } @@ -380,6 +398,18 @@ public: return FlatAddressSpace; } + bool hasFlatInstOffsets() const { + return FlatInstOffsets; + } + + bool hasFlatGlobalInsts() const { + return FlatGlobalInsts; + } + + bool hasFlatScratchInsts() const { + return FlatScratchInsts; + } + bool isMesaKernel(const MachineFunction &MF) const { return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv()); } @@ -397,6 +427,10 @@ public: return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; } + bool hasSDWA() const { + return HasSDWA; + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { @@ -415,9 +449,11 @@ public: return 0; } + // Scratch is allocated in 256 dword per wave blocks for the entire + // wavefront. When viewed from the perspecive of an arbitrary workitem, this + // is 4-byte aligned. unsigned getStackAlignment() const { - // Scratch is allocated in 256 dword per wave blocks. - return 4 * 256 / getWavefrontSize(); + return 4; } bool enableMachineScheduler() const override { @@ -512,6 +548,9 @@ public: /// compatible with minimum/maximum number of waves limited by flat work group /// size, register usage, and/or lds usage. std::pair getWavesPerEU(const Function &F) const; + + /// Creates value range metadata on an workitemid.* inrinsic call or load. + bool makeLIDRangeMetadata(Instruction *I) const; }; class R600Subtarget final : public AMDGPUSubtarget { @@ -646,10 +685,6 @@ public: return HasInv2PiInlineImm; } - bool hasSDWA() const { - return HasSDWA; - } - bool hasDPP() const { return HasDPP; } @@ -703,12 +738,6 @@ public: /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; - /// \returns True if waitcnt instruction is needed before barrier instruction, - /// false otherwise. - bool needWaitcntBeforeBarrier() const { - return getGeneration() < GFX9; - } - /// \returns true if the flat_scratch register should be initialized with the /// pointer to the wave's scratch memory rather than a size and offset. bool flatScratchIsPointer() const { @@ -763,7 +792,7 @@ public: /// \returns VGPR allocation granularity supported by the subtarget. unsigned getVGPRAllocGranule() const { - return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());; + return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits()); } /// \returns VGPR encoding granularity supported by the subtarget. diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 1e7ef584d6e29d8180ce9a58ae8273ff99ca5093..b644eba536fa4af2113b5aed324c600d14767029 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -28,26 +28,26 @@ #include "GCNSchedStrategy.h" #include "R600MachineScheduler.h" #include "SIMachineScheduler.h" -#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/IPO/AlwaysInliner.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/GVN.h" -#include "llvm/Transforms/Vectorize.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Vectorize.h" #include using namespace llvm; @@ -116,7 +116,14 @@ static cl::opt EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, static cl::opt EnableSIInsertWaitcntsPass( "enable-si-insert-waitcnts", cl::desc("Use new waitcnt insertion pass"), - cl::init(false)); + cl::init(true)); + +// Option to run late CFG structurizer +static cl::opt LateCFGStructurize( + "amdgpu-late-structurize", + cl::desc("Enable late CFG structurization"), + cl::init(false), + cl::Hidden); extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target @@ -132,6 +139,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeSIShrinkInstructionsPass(*PR); initializeSIFixControlFlowLiveIntervalsPass(*PR); initializeSILoadStoreOptimizerPass(*PR); + initializeAMDGPUAlwaysInlinePass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPULowerIntrinsicsPass(*PR); @@ -309,6 +317,7 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { default: return false; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: @@ -448,7 +457,7 @@ namespace { class AMDGPUPassConfig : public TargetPassConfig { public: - AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) + AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { // Exceptions and StackMaps are not supported, so these passes will never do // anything. @@ -479,7 +488,7 @@ public: class R600PassConfig final : public AMDGPUPassConfig { public: - R600PassConfig(TargetMachine *TM, PassManagerBase &PM) + R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) : AMDGPUPassConfig(TM, PM) {} ScheduleDAGInstrs *createMachineScheduler( @@ -495,7 +504,7 @@ public: class GCNPassConfig final : public AMDGPUPassConfig { public: - GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) + GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM) : AMDGPUPassConfig(TM, PM) {} GCNTargetMachine &getGCNTargetMachine() const { @@ -555,6 +564,8 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { } void AMDGPUPassConfig::addIRPasses() { + const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); + // There is no reason to run these. disablePass(&StackMapLivenessID); disablePass(&FuncletLayoutID); @@ -572,13 +583,10 @@ void AMDGPUPassConfig::addIRPasses() { // without ever running any passes on the second. addPass(createBarrierNoopPass()); - const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); - if (TM.getTargetTriple().getArch() == Triple::amdgcn) { // TODO: May want to move later or split into an early and late one. - addPass(createAMDGPUCodeGenPreparePass( - static_cast(&TM))); + addPass(createAMDGPUCodeGenPreparePass()); } // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. @@ -586,7 +594,7 @@ void AMDGPUPassConfig::addIRPasses() { if (TM.getOptLevel() > CodeGenOpt::None) { addPass(createInferAddressSpacesPass()); - addPass(createAMDGPUPromoteAlloca(&TM)); + addPass(createAMDGPUPromoteAlloca()); if (EnableSROA) addPass(createSROAPass()); @@ -656,26 +664,26 @@ bool R600PassConfig::addPreISel() { } void R600PassConfig::addPreRegAlloc() { - addPass(createR600VectorRegMerger(*TM)); + addPass(createR600VectorRegMerger()); } void R600PassConfig::addPreSched2() { addPass(createR600EmitClauseMarkers(), false); if (EnableR600IfConvert) addPass(&IfConverterID, false); - addPass(createR600ClauseMergePass(*TM), false); + addPass(createR600ClauseMergePass(), false); } void R600PassConfig::addPreEmitPass() { addPass(createAMDGPUCFGStructurizerPass(), false); - addPass(createR600ExpandSpecialInstrsPass(*TM), false); + addPass(createR600ExpandSpecialInstrsPass(), false); addPass(&FinalizeMachineBundlesID, false); - addPass(createR600Packetizer(*TM), false); - addPass(createR600ControlFlowFinalizer(*TM), false); + addPass(createR600Packetizer(), false); + addPass(createR600ControlFlowFinalizer(), false); } TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { - return new R600PassConfig(this, PM); + return new R600PassConfig(*this, PM); } //===----------------------------------------------------------------------===// @@ -695,17 +703,20 @@ bool GCNPassConfig::addPreISel() { // FIXME: We need to run a pass to propagate the attributes when calls are // supported. - const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); - addPass(createAMDGPUAnnotateKernelFeaturesPass(&TM)); + addPass(createAMDGPUAnnotateKernelFeaturesPass()); // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit // regions formed by them. addPass(&AMDGPUUnifyDivergentExitNodesID); - addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + if (!LateCFGStructurize) { + addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions + } addPass(createSinkingPass()); addPass(createSITypeRewriter()); addPass(createAMDGPUAnnotateUniformValues()); - addPass(createSIAnnotateControlFlowPass()); + if (!LateCFGStructurize) { + addPass(createSIAnnotateControlFlowPass()); + } return false; } @@ -723,11 +734,14 @@ void GCNPassConfig::addMachineSSAOptimization() { addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); addPass(&SILoadStoreOptimizerID); - addPass(createSIShrinkInstructionsPass()); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); + addPass(&MachineLICMID); + addPass(&MachineCSEID); + addPass(&SIFoldOperandsID); addPass(&DeadMachineInstructionElimID); } + addPass(createSIShrinkInstructionsPass()); } bool GCNPassConfig::addILPOpts() { @@ -769,6 +783,9 @@ bool GCNPassConfig::addGlobalInstructionSelect() { #endif void GCNPassConfig::addPreRegAlloc() { + if (LateCFGStructurize) { + addPass(createAMDGPUMachineCFGStructurizerPass()); + } addPass(createSIWholeQuadModePass()); } @@ -828,6 +845,6 @@ void GCNPassConfig::addPreEmitPass() { } TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { - return new GCNPassConfig(this, PM); + return new GCNPassConfig(*this, PM); } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 934bf7f31bab45353c909d3263c04b4306d2d9cd..a3c7c1982d0a6eb4b0797156b453833d6ea683b8 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -69,7 +69,6 @@ public: return -1; return 0; } - }; //===----------------------------------------------------------------------===// @@ -89,6 +88,10 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; const R600Subtarget *getSubtargetImpl(const Function &) const override; + + bool isMachineVerifierClean() const override { + return false; + } }; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index c96761c0b04ecf9bda9306bc3b28255ffd502e91..6c1885e67fcb7043d36c2c6c2040dc80f1e4d102 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" -#include "Utils/AMDGPUBaseInfo.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index f6d33740a4ff5df700c0341f5c26e560a1af6875..0d6689bd04c4e2248c6e27cac9454db3e115b2a7 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -20,8 +20,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/IR/Module.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" @@ -195,7 +195,7 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } -unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) const { return Vector ? 0 : 32; } @@ -363,13 +363,22 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index) { switch (Opcode) { case Instruction::ExtractElement: - case Instruction::InsertElement: + case Instruction::InsertElement: { + unsigned EltSize + = DL.getTypeSizeInBits(cast(ValTy)->getElementType()); + if (EltSize < 32) { + if (EltSize == 16 && Index == 0 && ST->has16BitInsts()) + return 0; + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + } + // Extracts are just reads of a subregister, so are free. Inserts are // considered free because we don't want to have any cost for scalarizing // operations, and we don't have to copy into a different register class. // Dynamic indexing isn't free and is best avoided. return Index == ~0u ? 2 : 0; + } default: return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } @@ -426,16 +435,24 @@ static bool isArgPassedInSGPR(const Argument *A) { const Function *F = A->getParent(); // Arguments to compute shaders are never a source of divergence. - if (!AMDGPU::isShader(F->getCallingConv())) - return true; - - // For non-compute shaders, SGPR inputs are marked with either inreg or byval. - if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) || - F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal)) + CallingConv::ID CC = F->getCallingConv(); + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: return true; - - // Everything else is in VGPRs. - return false; + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + // For non-compute shaders, SGPR inputs are marked with either inreg or byval. + // Everything else is in VGPRs. + return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || + F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); + default: + // TODO: Should calls support inreg for SGPR inputs? + return false; + } } /// @@ -471,3 +488,39 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { return false; } + +bool AMDGPUTTIImpl::isAlwaysUniform(const Value *V) const { + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_readfirstlane: + case Intrinsic::amdgcn_readlane: + return true; + } + } + return false; +} + +unsigned AMDGPUTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { + if (ST->hasVOP3PInsts()) { + VectorType *VT = cast(Tp); + if (VT->getNumElements() == 2 && + DL.getTypeSizeInBits(VT->getElementType()) == 16) { + // With op_sel VOP3P instructions freely can access the low half or high + // half of a register, so any swizzle is free. + + switch (Kind) { + case TTI::SK_Broadcast: + case TTI::SK_Reverse: + case TTI::SK_PermuteSingleSrc: + return 0; + default: + break; + } + } + } + + return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); +} diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 71d6306bc1a5ce67b4fc5208a38cac9697084d0b..a60b1bb1b59c70713c7776bae235bd45bc3bc9d1 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -76,7 +76,7 @@ public: } unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, @@ -103,6 +103,7 @@ public: int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; + bool isAlwaysUniform(const Value *V) const; unsigned getFlatAddressSpace() const { // Don't bother running InferAddressSpaces pass on graphics shaders which @@ -114,6 +115,9 @@ public: } unsigned getVectorSplitCost() { return 0; } + + unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp); }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 961f7186f3731d6b84855edfe57f25a69fcf9aa6..392e9d89bd9ba28fb764a9b9fde3bf0b9ce6c3c3 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -11,18 +11,19 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "SIDefines.h" +#include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" -#include "Utils/AMDGPUAsmUtils.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -40,12 +41,11 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -161,7 +161,8 @@ public: ImmTyOpSel, ImmTyOpSelHi, ImmTyNegLo, - ImmTyNegHi + ImmTyNegHi, + ImmTySwizzle }; struct TokOp { @@ -284,6 +285,9 @@ public: bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } + + bool isOffsetU12() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); } + bool isOffsetS13() const { return isImmTy(ImmTyOffset) && isInt<13>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } @@ -474,11 +478,14 @@ public: bool isSWaitCnt() const; bool isHwreg() const; bool isSendMsg() const; + bool isSwizzle() const; bool isSMRDOffset8() const; bool isSMRDOffset20() const; bool isSMRDLiteralOffset() const; bool isDPPCtrl() const; bool isGPRIdxMode() const; + bool isS16Imm() const; + bool isU16Imm() const; StringRef getExpressionAsToken() const { assert(isExpr()); @@ -657,6 +664,7 @@ public: case ImmTyOpSelHi: OS << "OpSelHi"; break; case ImmTyNegLo: OS << "NegLo"; break; case ImmTyNegHi: OS << "NegHi"; break; + case ImmTySwizzle: OS << "Swizzle"; break; } } @@ -809,14 +817,8 @@ private: bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); - bool ParseSectionDirectiveHSAText(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); - bool ParseDirectiveAMDGPUHsaModuleGlobal(); - bool ParseDirectiveAMDGPUHsaProgramGlobal(); - bool ParseSectionDirectiveHSADataGlobalAgent(); - bool ParseSectionDirectiveHSADataGlobalProgram(); - bool ParseSectionDirectiveHSARodataReadonlyAgent(); bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -879,10 +881,18 @@ public: return AMDGPU::isVI(getSTI()); } + bool isGFX9() const { + return AMDGPU::isGFX9(getSTI()); + } + bool hasInv2PiInlineImm() const { return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } + bool hasFlatOffsets() const { + return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; + } + bool hasSGPR102_SGPR103() const { return !isVI(); } @@ -987,7 +997,12 @@ private: bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; - bool isSGPR(unsigned Reg); + + bool trySkipId(const StringRef Id); + bool trySkipToken(const AsmToken::TokenKind Kind); + bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); + bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); + bool parseExpr(int64_t &Imm); public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); @@ -998,6 +1013,19 @@ public: OperandMatchResultTy parseInterpAttr(OperandVector &Operands); OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); + bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg); + OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); + bool parseSwizzleOffset(int64_t &Imm); + bool parseSwizzleMacro(int64_t &Imm); + bool parseSwizzleQuadPerm(int64_t &Imm); + bool parseSwizzleBitmaskPerm(int64_t &Imm); + bool parseSwizzleBroadcast(int64_t &Imm); + bool parseSwizzleSwap(int64_t &Imm); + bool parseSwizzleReverse(int64_t &Imm); + void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } @@ -1013,6 +1041,7 @@ public: AMDGPUOperand::Ptr defaultSMRDOffset8() const; AMDGPUOperand::Ptr defaultSMRDOffset20() const; AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; + AMDGPUOperand::Ptr defaultOffsetU12() const; OperandMatchResultTy parseOModOperand(OperandVector &Operands); @@ -1040,9 +1069,10 @@ public: OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType); + uint64_t BasicInstType, bool skipVcc = false); }; struct OptionalOperand { @@ -1948,6 +1978,15 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } } + if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { + // FIXME: Produces error without correct column reported. + auto OpNum = + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); + const auto &Op = Inst.getOperand(OpNum); + if (Op.getImm() != 0) + return Match_InvalidOperand; + } + return Match_Success; } @@ -1964,7 +2003,8 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { } if (isForcedSDWA()) { - static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA}; + static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, + AMDGPUAsmVariants::SDWA9}; return makeArrayRef(Variants); } @@ -1975,7 +2015,7 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP }; return makeArrayRef(Variants); @@ -1998,14 +2038,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { return AMDGPU::NoRegister; } -bool AMDGPUAsmParser::isSGPR(unsigned Reg) { - const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); - const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); - return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || - Reg == AMDGPU::SCC; -} - // NB: This code is correct only when used to check constant // bus limitations because GFX7 support no f16 inline constants. // Note that there are no cases when a GFX7 opcode violates @@ -2047,7 +2079,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { if (MO.isImm()) { return !isInlineConstant(Inst, OpIdx); } - return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg())); + return !MO.isReg() || + isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); } bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { @@ -2058,7 +2091,8 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { if (Desc.TSFlags & (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | - SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) { + SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | + SIInstrFlags::SDWA)) { // Check special imm operands (used by madmk, etc) if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { @@ -2342,12 +2376,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { return false; } -bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSATextSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { if (getLexer().isNot(AsmToken::Identifier)) return TokError("expected symbol name"); @@ -2361,46 +2389,6 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() { - if (getLexer().isNot(AsmToken::Identifier)) - return TokError("expected symbol name"); - - StringRef GlobalName = Parser.getTok().getIdentifier(); - - getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName); - Lex(); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalAgentSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSADataGlobalProgramSection(getContext())); - return false; -} - -bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() { - getParser().getStreamer().SwitchSection( - AMDGPU::getHSARodataReadonlyAgentSection(getContext())); - return false; -} - bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2416,27 +2404,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); - if (IDVal == ".hsatext") - return ParseSectionDirectiveHSAText(); - if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); - if (IDVal == ".amdgpu_hsa_module_global") - return ParseDirectiveAMDGPUHsaModuleGlobal(); - - if (IDVal == ".amdgpu_hsa_program_global") - return ParseDirectiveAMDGPUHsaProgramGlobal(); - - if (IDVal == ".hsadata_global_agent") - return ParseSectionDirectiveHSADataGlobalAgent(); - - if (IDVal == ".hsadata_global_program") - return ParseSectionDirectiveHSADataGlobalProgram(); - - if (IDVal == ".hsarodata_readonly_agent") - return ParseSectionDirectiveHSARodataReadonlyAgent(); - return true; } @@ -2784,7 +2754,13 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, OptionalIdx[Op.getImmTy()] = i; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); + AMDGPUOperand::ImmTy OffsetType = + (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || + Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : + AMDGPUOperand::ImmTyOffset; + + addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); + if (!IsGdsHardcoded) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); } @@ -2794,6 +2770,7 @@ void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; + unsigned OperandIdx[4]; unsigned EnMask = 0; int SrcIdx = 0; @@ -2802,15 +2779,18 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { // Add the register arguments if (Op.isReg()) { - EnMask |= (1 << SrcIdx); + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); Op.addRegOperands(Inst, 1); ++SrcIdx; continue; } if (Op.isOff()) { - ++SrcIdx; + assert(SrcIdx < 4); + OperandIdx[SrcIdx] = Inst.size(); Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); + ++SrcIdx; continue; } @@ -2826,6 +2806,22 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { OptionalIdx[Op.getImmTy()] = i; } + assert(SrcIdx == 4); + + bool Compr = false; + if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { + Compr = true; + Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); + Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); + Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); + } + + for (auto i = 0; i < SrcIdx; ++i) { + if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { + EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); + } + } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); @@ -2836,6 +2832,28 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { // s_waitcnt //===----------------------------------------------------------------------===// +static bool +encodeCnt( + const AMDGPU::IsaInfo::IsaVersion ISA, + int64_t &IntVal, + int64_t CntVal, + bool Saturate, + unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned), + unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned)) +{ + bool Failed = false; + + IntVal = encode(ISA, IntVal, CntVal); + if (CntVal != decode(ISA, IntVal)) { + if (Saturate) { + IntVal = encode(ISA, IntVal, -1); + } else { + Failed = true; + } + } + return Failed; +} + bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { StringRef CntName = Parser.getTok().getString(); int64_t CntVal; @@ -2848,26 +2866,40 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { if (getLexer().isNot(AsmToken::Integer)) return true; + SMLoc ValLoc = Parser.getTok().getLoc(); if (getParser().parseAbsoluteExpression(CntVal)) return true; - if (getLexer().isNot(AsmToken::RParen)) - return true; - - Parser.Lex(); - if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) - Parser.Lex(); - AMDGPU::IsaInfo::IsaVersion ISA = AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()); - if (CntName == "vmcnt") - IntVal = encodeVmcnt(ISA, IntVal, CntVal); - else if (CntName == "expcnt") - IntVal = encodeExpcnt(ISA, IntVal, CntVal); - else if (CntName == "lgkmcnt") - IntVal = encodeLgkmcnt(ISA, IntVal, CntVal); - else + + bool Failed = true; + bool Sat = CntName.endswith("_sat"); + + if (CntName == "vmcnt" || CntName == "vmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); + } else if (CntName == "expcnt" || CntName == "expcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); + } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { + Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); + } + + if (Failed) { + Error(ValLoc, "too large value for " + CntName); + return true; + } + + if (getLexer().isNot(AsmToken::RParen)) { return true; + } + + Parser.Lex(); + if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { + const AsmToken NextToken = getLexer().peekTok(); + if (NextToken.is(AsmToken::Identifier)) { + Parser.Lex(); + } + } return false; } @@ -3330,6 +3362,298 @@ bool AMDGPUOperand::isSendMsg() const { return isImmTy(ImmTySendMsg); } +//===----------------------------------------------------------------------===// +// parser helpers +//===----------------------------------------------------------------------===// + +bool +AMDGPUAsmParser::trySkipId(const StringRef Id) { + if (getLexer().getKind() == AsmToken::Identifier && + Parser.getTok().getString() == Id) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { + if (getLexer().getKind() == Kind) { + Parser.Lex(); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, + const StringRef ErrMsg) { + if (!trySkipToken(Kind)) { + Error(Parser.getTok().getLoc(), ErrMsg); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseExpr(int64_t &Imm) { + return !getParser().parseAbsoluteExpression(Imm); +} + +bool +AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { + SMLoc S = Parser.getTok().getLoc(); + if (getLexer().getKind() == AsmToken::String) { + Val = Parser.getTok().getStringContents(); + Parser.Lex(); + return true; + } else { + Error(S, ErrMsg); + return false; + } +} + +//===----------------------------------------------------------------------===// +// swizzle +//===----------------------------------------------------------------------===// + +LLVM_READNONE +static unsigned +encodeBitmaskPerm(const unsigned AndMask, + const unsigned OrMask, + const unsigned XorMask) { + using namespace llvm::AMDGPU::Swizzle; + + return BITMASK_PERM_ENC | + (AndMask << BITMASK_AND_SHIFT) | + (OrMask << BITMASK_OR_SHIFT) | + (XorMask << BITMASK_XOR_SHIFT); +} + +bool +AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, + const unsigned MinVal, + const unsigned MaxVal, + const StringRef ErrMsg) { + for (unsigned i = 0; i < OpNum; ++i) { + if (!skipToken(AsmToken::Comma, "expected a comma")){ + return false; + } + SMLoc ExprLoc = Parser.getTok().getLoc(); + if (!parseExpr(Op[i])) { + return false; + } + if (Op[i] < MinVal || Op[i] > MaxVal) { + Error(ExprLoc, ErrMsg); + return false; + } + } + + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + int64_t Lane[LANE_NUM]; + if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, + "expected a 2-bit lane id")) { + Imm = QUAD_PERM_ENC; + for (auto i = 0; i < LANE_NUM; ++i) { + Imm |= Lane[i] << (LANE_SHIFT * i); + } + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + int64_t LaneIdx; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, + "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + if (parseSwizzleOperands(1, &LaneIdx, + 0, GroupSize - 1, + "lane id must be in the interval [0,group size - 1]")) { + Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); + return true; + } + return false; +} + +bool +AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 2, 32, "group size must be in the interval [2,32]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + SMLoc S = Parser.getTok().getLoc(); + int64_t GroupSize; + + if (!parseSwizzleOperands(1, &GroupSize, + 1, 16, "group size must be in the interval [1,16]")) { + return false; + } + if (!isPowerOf2_64(GroupSize)) { + Error(S, "group size must be a power of two"); + return false; + } + + Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (!skipToken(AsmToken::Comma, "expected a comma")) { + return false; + } + + StringRef Ctl; + SMLoc StrLoc = Parser.getTok().getLoc(); + if (!parseString(Ctl)) { + return false; + } + if (Ctl.size() != BITMASK_WIDTH) { + Error(StrLoc, "expected a 5-character mask"); + return false; + } + + unsigned AndMask = 0; + unsigned OrMask = 0; + unsigned XorMask = 0; + + for (size_t i = 0; i < Ctl.size(); ++i) { + unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); + switch(Ctl[i]) { + default: + Error(StrLoc, "invalid mask"); + return false; + case '0': + break; + case '1': + OrMask |= Mask; + break; + case 'p': + AndMask |= Mask; + break; + case 'i': + AndMask |= Mask; + XorMask |= Mask; + break; + } + } + + Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { + + SMLoc OffsetLoc = Parser.getTok().getLoc(); + + if (!parseExpr(Imm)) { + return false; + } + if (!isUInt<16>(Imm)) { + Error(OffsetLoc, "expected a 16-bit offset"); + return false; + } + return true; +} + +bool +AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { + using namespace llvm::AMDGPU::Swizzle; + + if (skipToken(AsmToken::LParen, "expected a left parentheses")) { + + SMLoc ModeLoc = Parser.getTok().getLoc(); + bool Ok = false; + + if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { + Ok = parseSwizzleQuadPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { + Ok = parseSwizzleBitmaskPerm(Imm); + } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { + Ok = parseSwizzleBroadcast(Imm); + } else if (trySkipId(IdSymbolic[ID_SWAP])) { + Ok = parseSwizzleSwap(Imm); + } else if (trySkipId(IdSymbolic[ID_REVERSE])) { + Ok = parseSwizzleReverse(Imm); + } else { + Error(ModeLoc, "expected a swizzle mode"); + } + + return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); + } + + return false; +} + +OperandMatchResultTy +AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { + SMLoc S = Parser.getTok().getLoc(); + int64_t Imm = 0; + + if (trySkipId("offset")) { + + bool Ok = false; + if (skipToken(AsmToken::Colon, "expected a colon")) { + if (trySkipId("swizzle")) { + Ok = parseSwizzleMacro(Imm); + } else { + Ok = parseSwizzleOffset(Imm); + } + } + + Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); + + return Ok? MatchOperand_Success : MatchOperand_ParseFail; + } else { + return MatchOperand_NoMatch; + } +} + +bool +AMDGPUOperand::isSwizzle() const { + return isImmTy(ImmTySwizzle); +} + //===----------------------------------------------------------------------===// // sopp branch targets //===----------------------------------------------------------------------===// @@ -3542,6 +3866,10 @@ AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); } +AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { + return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); +} + //===----------------------------------------------------------------------===// // vop3 //===----------------------------------------------------------------------===// @@ -3608,6 +3936,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = { {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, + {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, @@ -3858,6 +4187,14 @@ bool AMDGPUOperand::isGPRIdxMode() const { return isImm() && isUInt<4>(getImm()); } +bool AMDGPUOperand::isS16Imm() const { + return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); +} + +bool AMDGPUOperand::isU16Imm() const { + return isImm() && isUInt<16>(getImm()); +} + OperandMatchResultTy AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); @@ -4088,14 +4425,19 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); } +void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); +} + void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, SIInstrFlags::VOPC); + cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType) { + uint64_t BasicInstType, bool skipVcc) { using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; + bool skippedVcc = false; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -4105,15 +4447,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - // Add the register arguments - if ((BasicInstType == SIInstrFlags::VOPC || - BasicInstType == SIInstrFlags::VOP2)&& - Op.isReg() && - Op.Reg.RegNo == AMDGPU::VCC) { - // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. - // Skip it. - continue; - } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { + // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. + // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) + // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. + // Skip VCC only if we didn't skip it on previous iteration. + if (BasicInstType == SIInstrFlags::VOP2 && + (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { + skippedVcc = true; + continue; + } else if (BasicInstType == SIInstrFlags::VOPC && + Inst.getNumOperands() == 0) { + skippedVcc = true; + continue; + } + } + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments @@ -4121,20 +4470,30 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } else { llvm_unreachable("Invalid operand type"); } + skippedVcc = false; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && + Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { // V_NOP_sdwa_vi has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (isGFX9() && + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (isGFX9() && + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); @@ -4142,6 +4501,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: + if (isVI()) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; @@ -4157,10 +4519,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { auto it = Inst.begin(); std::advance( - it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); + it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); Inst.insert(it, Inst.getOperand(0)); // src2 = dst } - } /// Force static initialization. diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index a6609f0725ab6a8fea26984ff779c1e6bc679979..2aca65ac843038dd898a108f03769c8f12cc52d5 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -11,7 +11,9 @@ def MUBUFAddr32 : ComplexPattern; def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; -def MUBUFScratch : ComplexPattern; +def MUBUFScratchOffen : ComplexPattern; +def MUBUFScratchOffset : ComplexPattern; + def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; @@ -958,21 +960,30 @@ defm : MUBUFLoad_Pattern ; } // End Predicates = [Has16BitInsts] -class MUBUFScratchLoadPat : Pat < - (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr, - i32:$soffset, u16imm:$offset))), - (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; +multiclass MUBUFScratchLoadPat { + def : Pat < + (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset))), + (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + >; + + def : Pat < + (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), + (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) + >; +} -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; -def : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; // BUFFER_LOAD_DWORD*, addr64=0 multiclass MUBUF_Load_Dword ; defm : MUBUFStore_Pattern ; -class MUBUFScratchStorePat : Pat < - (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, - u16imm:$offset)), - (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) ->; +multiclass MUBUFScratchStorePat { + def : Pat < + (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, + i32:$soffset, u16imm:$offset)), + (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + >; + + def : Pat < + (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, + u16imm:$offset)), + (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0) + >; +} -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; -def : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; //===----------------------------------------------------------------------===// // MTBUF Patterns diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 7c0ef4aeac3c7105937557856cce146bf795ab62..e30844f082cdd8fef0363605458696ae6c3dfe0b 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -48,6 +48,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUISelDAGToDAG.cpp AMDGPULowerIntrinsics.cpp AMDGPUMCInstLower.cpp + AMDGPUMachineCFGStructurizer.cpp AMDGPUMachineFunction.cpp AMDGPUUnifyMetadata.cpp AMDGPUOpenCLImageTypeLoweringPass.cpp @@ -57,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUISelLowering.cpp AMDGPUInstrInfo.cpp AMDGPUPromoteAlloca.cpp + AMDGPURegAsmNames.inc.cpp AMDGPURegisterInfo.cpp AMDGPUUnifyDivergentExitNodes.cpp GCNHazardRecognizer.cpp diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td index 65dcd27ae7a026386fa89de82b2fc932b0a24cfb..fc516c3b39c28b7bb86f783ce9f30bea93c8dd4f 100644 --- a/lib/Target/AMDGPU/DSInstructions.td +++ b/lib/Target/AMDGPU/DSInstructions.td @@ -88,18 +88,6 @@ class DS_1A1D_NORET let has_vdst = 0; } -class DS_1A_Off8_NORET : DS_Pseudo { - - let has_data0 = 0; - let has_data1 = 0; - let has_vdst = 0; - let has_offset = 0; - let AsmMatchConverter = "cvtDSOffset01"; -} - class DS_1A2D_NORET : DS_Pseudo +class DS_1A_RET : DS_Pseudo { let has_data0 = 0; @@ -267,8 +255,6 @@ class DS_1A1D_PERMUTE [(set i32:$vdst, (node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > { - let LGKM_CNT = 0; - let mayLoad = 0; let mayStore = 0; let isConvergent = 1; @@ -450,11 +436,11 @@ def DS_XOR_SRC2_B64 : DS_1A<"ds_xor_src2_b64">; def DS_MIN_SRC2_F64 : DS_1A<"ds_min_src2_f64">; def DS_MAX_SRC2_F64 : DS_1A<"ds_max_src2_f64">; -def DS_WRITE_SRC2_B32 : DS_1A_Off8_NORET<"ds_write_src2_b32">; -def DS_WRITE_SRC2_B64 : DS_1A_Off8_NORET<"ds_write_src2_b64">; +def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">; +def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">; let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in { -def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">; +def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, SwizzleImm>; } let mayStore = 0 in { diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 4fb03b62bba9a50caf7151875dceed15e5dc4a5a..88c92b9582fd01463a669cea8f299a974b230920 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -20,21 +20,20 @@ #include "AMDGPUDisassembler.h" #include "AMDGPU.h" #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/Endian.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/TargetRegistry.h" - using namespace llvm; #define DEBUG_TYPE "amdgpu-disassembler" @@ -62,32 +61,33 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, return addOperand(Inst, MCOperand::createImm(Imm)); } -#define DECODE_OPERAND2(RegClass, DecName) \ -static DecodeStatus Decode##RegClass##RegisterClass(MCInst &Inst, \ - unsigned Imm, \ - uint64_t /*Addr*/, \ - const void *Decoder) { \ +#define DECODE_OPERAND(StaticDecoderName, DecoderName) \ +static DecodeStatus StaticDecoderName(MCInst &Inst, \ + unsigned Imm, \ + uint64_t /*Addr*/, \ + const void *Decoder) { \ auto DAsm = static_cast(Decoder); \ - return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \ + return addOperand(Inst, DAsm->DecoderName(Imm)); \ } -#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass) +#define DECODE_OPERAND_REG(RegClass) \ +DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) -DECODE_OPERAND(VGPR_32) -DECODE_OPERAND(VS_32) -DECODE_OPERAND(VS_64) +DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VS_32) +DECODE_OPERAND_REG(VS_64) -DECODE_OPERAND(VReg_64) -DECODE_OPERAND(VReg_96) -DECODE_OPERAND(VReg_128) +DECODE_OPERAND_REG(VReg_64) +DECODE_OPERAND_REG(VReg_96) +DECODE_OPERAND_REG(VReg_128) -DECODE_OPERAND(SReg_32) -DECODE_OPERAND(SReg_32_XM0_XEXEC) -DECODE_OPERAND(SReg_64) -DECODE_OPERAND(SReg_64_XEXEC) -DECODE_OPERAND(SReg_128) -DECODE_OPERAND(SReg_256) -DECODE_OPERAND(SReg_512) +DECODE_OPERAND_REG(SReg_32) +DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) +DECODE_OPERAND_REG(SReg_64) +DECODE_OPERAND_REG(SReg_64_XEXEC) +DECODE_OPERAND_REG(SReg_128) +DECODE_OPERAND_REG(SReg_256) +DECODE_OPERAND_REG(SReg_512) static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, @@ -106,6 +106,13 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } +#define DECODE_SDWA9(DecName) \ +DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName) + +DECODE_SDWA9(Src32) +DECODE_SDWA9(Src16) +DECODE_SDWA9(VopcDst) + #include "AMDGPUGenDisassemblerTables.inc" //===----------------------------------------------------------------------===// @@ -126,6 +133,7 @@ DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, assert(MI.getOpcode() == 0); assert(MI.getNumOperands() == 0); MCInst TmpInst; + HasLiteral = false; const auto SavedBytes = Bytes; if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { MI = TmpInst; @@ -163,6 +171,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); if (Res) break; + + Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); + if (Res) break; } // Reinitialize Bytes as DPP64 could have eaten too much @@ -343,10 +354,15 @@ MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { // For now all literal constants are supposed to be unsigned integer // ToDo: deal with signed/unsigned 64-bit integer constants // ToDo: deal with float/double constants - if (Bytes.size() < 4) - return errOperand(0, "cannot read literal, inst bytes left " + - Twine(Bytes.size())); - return MCOperand::createImm(eatBytes(Bytes)); + if (!HasLiteral) { + if (Bytes.size() < 4) { + return errOperand(0, "cannot read literal, inst bytes left " + + Twine(Bytes.size())); + } + HasLiteral = true; + Literal = eatBytes(Bytes); + } + return MCOperand::createImm(Literal); } MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { @@ -576,6 +592,48 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { return errOperand(Val, "unknown operand encoding " + Twine(Val)); } +MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width, + unsigned Val) const { + using namespace AMDGPU::SDWA; + + if (SDWA9EncValues::SRC_VGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_VGPR_MAX) { + return createRegOperand(getVgprClassId(Width), + Val - SDWA9EncValues::SRC_VGPR_MIN); + } + if (SDWA9EncValues::SRC_SGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_SGPR_MAX) { + return createSRegOperand(getSgprClassId(Width), + Val - SDWA9EncValues::SRC_SGPR_MIN); + } + + return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); +} + +MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const { + return decodeSDWA9Src(OPW16, Val); +} + +MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const { + return decodeSDWA9Src(OPW32, Val); +} + + +MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const { + using namespace AMDGPU::SDWA; + + if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { + Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + if (Val > AMDGPU::EncValues::SGPR_MAX) { + return decodeSpecialReg64(Val); + } else { + return createSRegOperand(getSgprClassId(OPW64), Val); + } + } else { + return createRegOperand(AMDGPU::VCC); + } +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index d50665187e10ba49aba71a5b8d27c3efeade312c..5fa3cf1a223fac61054b77fd3c07e2274029c6fa 100644 --- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -20,8 +20,8 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -#include #include +#include #include namespace llvm { @@ -39,6 +39,8 @@ class Twine; class AMDGPUDisassembler : public MCDisassembler { private: mutable ArrayRef Bytes; + mutable uint32_t Literal; + mutable bool HasLiteral; public: AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : @@ -102,6 +104,11 @@ public: MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; + + MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSDWA9Src16(unsigned Val) const; + MCOperand decodeSDWA9Src32(unsigned Val) const; + MCOperand decodeSDWA9VopcDst(unsigned Val) const; }; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index b0ac0e689a0b6d8c72fef67188efb0cc5f4e5345..98eda288bcacb90630d9968d6af3bd4dd30a6ef6 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// -def FLATAtomic : ComplexPattern; +def FLATAtomic : ComplexPattern; +def FLATOffset : ComplexPattern; //===----------------------------------------------------------------------===// // FLAT classes @@ -55,6 +56,8 @@ class FLAT_Real op, FLAT_Pseudo ps> : // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; let AsmMatchConverter = ps.AsmMatchConverter; + let TSFlags = ps.TSFlags; + let UseNamedOperandTable = ps.UseNamedOperandTable; // encoding fields bits<8> vaddr; @@ -62,9 +65,24 @@ class FLAT_Real op, FLAT_Pseudo ps> : bits<8> vdst; bits<1> slc; bits<1> glc; - bits<1> tfe; - // 15-0 is reserved. + // Only valid on gfx9 + bits<1> lds = 0; // XXX - What does this actually do? + bits<2> seg; // Segment, 00=flat, 01=scratch, 10=global, 11=reserved + + // Signed offset. Highest bit ignored for flat and treated as 12-bit + // unsigned for flat acceses. + bits<13> offset; + bits<1> nv = 0; // XXX - What does this actually do? + + // We don't use tfe right now, and it was removed in gfx9. + bits<1> tfe = 0; + + // Only valid on GFX9+ + let Inst{12-0} = offset; + let Inst{13} = lds; + let Inst{15-14} = 0; + let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); let Inst{17} = slc; let Inst{24-18} = op; @@ -72,24 +90,30 @@ class FLAT_Real op, FLAT_Pseudo ps> : let Inst{39-32} = vaddr; let Inst{47-40} = !if(ps.has_data, vdata, ?); // 54-48 is reserved. - let Inst{55} = tfe; + let Inst{55} = nv; // nv on GFX9+, TFE before. let Inst{63-56} = !if(ps.has_vdst, vdst, ?); } -class FLAT_Load_Pseudo : FLAT_Pseudo< +class FLAT_Load_Pseudo : FLAT_Pseudo< opName, (outs regClass:$vdst), - (ins VReg_64:$vaddr, GLC:$glc, slc:$slc, tfe:$tfe), - " $vdst, $vaddr$glc$slc$tfe"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vdst, $vaddr$offset$glc$slc"> { let has_data = 0; let mayLoad = 1; } -class FLAT_Store_Pseudo : FLAT_Pseudo< +class FLAT_Store_Pseudo : FLAT_Pseudo< opName, (outs), - (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc, tfe:$tfe), - " $vaddr, $vdata$glc$slc$tfe"> { + !if(HasSignedOffset, + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_s13:$offset, GLC:$glc, slc:$slc), + (ins VReg_64:$vaddr, vdataClass:$vdata, offset_u12:$offset, GLC:$glc, slc:$slc)), + " $vaddr, $vdata$offset$glc$slc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -101,12 +125,15 @@ multiclass FLAT_Atomic_Pseudo< ValueType vt, SDPatternOperator atomic = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> { + RegisterClass data_rc = vdst_rc, + bit HasSignedOffset = 0> { def "" : FLAT_Pseudo , AtomicNoRet { let mayLoad = 1; @@ -119,10 +146,12 @@ multiclass FLAT_Atomic_Pseudo< def _RTN : FLAT_Pseudo , + (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, AtomicNoRet { let mayLoad = 1; let mayStore = 1; @@ -310,31 +339,31 @@ def flat_truncstorei16 : flat_st ; // Patterns for global loads with no offset. class FlatLoadPat : Pat < - (vt (node i64:$addr)), - (inst $addr, 0, 0, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (inst $vaddr, $offset, 0, $slc) >; class FlatLoadAtomicPat : Pat < - (vt (node i64:$addr)), - (inst $addr, 1, 0, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (inst $vaddr, $offset, 1, $slc) >; class FlatStorePat : Pat < - (node vt:$data, i64:$addr), - (inst $addr, $data, 0, 0, 0) + (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)), + (inst $vaddr, $data, $offset, 0, $slc) >; class FlatStoreAtomicPat : Pat < // atomic store follows atomic binop convention so the address comes // first. - (node i64:$addr, vt:$data), - (inst $addr, $data, 1, 0, 0) + (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), + (inst $vaddr, $data, $offset, 1, $slc) >; class FlatAtomicPat : Pat < - (vt (node i64:$addr, data_vt:$data)), - (inst $addr, $data, 0, 0) + (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), + (inst $vaddr, $data, $offset, $slc) >; let Predicates = [isCIVI] in { diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 80fc4ac9d2a3e922a714f437b449a334957c06a5..cd9e7fb04f16b0161c7b596dddf5a198dd8ea682 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUSubtarget.h" #include "GCNHazardRecognizer.h" +#include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIRegisterInfo.h" diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 3bb5c9bc22b7dbd1a40ccb0d4a5ca416a665463d..8ead480673363785e5262ad4147731fd70314ee7 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -191,6 +191,7 @@ public: } }; +namespace { // just a stub to make base class happy class SchedStrategyStub : public MachineSchedStrategy { public: @@ -202,6 +203,7 @@ public: void releaseTopNode(SUnit *SU) override {} void releaseBottomNode(SUnit *SU) override {} }; +} // namespace GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, StrategyKind S) diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp index c6d0f217995089df9becf6b1d4584e6e256ec89a..d378df674be9be536004c0798a07737981ae7571 100644 --- a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp +++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp @@ -17,6 +17,7 @@ using namespace llvm; #define DEBUG_TYPE "misched" +namespace { class GCNMinRegScheduler { struct Candidate : ilist_node { const SUnit *SU; @@ -71,6 +72,7 @@ public: std::vector schedule(ArrayRef TopRoots, const ScheduleDAG &DAG); }; +} // namespace void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) { NumPreds.resize(SUnits.size()); diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 4ecfa118fb27517e277f974f246a3925d06643da..390a8286c76a8e0261ff1174953046c9cd319c72 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "GCNRegPressure.h" +#include "llvm/CodeGen/RegisterPressure.h" using namespace llvm; @@ -27,7 +28,7 @@ void llvm::printLivesAt(SlotIndex SI, unsigned Num = 0; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { const unsigned Reg = TargetRegisterInfo::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) + if (!LIS.hasInterval(Reg)) continue; const auto &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { @@ -63,15 +64,6 @@ static bool isEqual(const GCNRPTracker::LiveRegSet &S1, return true; } -static GCNRPTracker::LiveRegSet -stripEmpty(const GCNRPTracker::LiveRegSet &LR) { - GCNRPTracker::LiveRegSet Res; - for (const auto &P : LR) { - if (P.second.any()) - Res.insert(P); - } - return Res; -} #endif /////////////////////////////////////////////////////////////////////////////// @@ -83,8 +75,8 @@ unsigned GCNRegPressure::getRegKind(unsigned Reg, const auto RC = MRI.getRegClass(Reg); auto STI = static_cast(MRI.getTargetRegisterInfo()); return STI->isSGPRClass(RC) ? - (RC->getSize() == 4 ? SGPR32 : SGPR_TUPLE) : - (RC->getSize() == 4 ? VGPR32 : VGPR_TUPLE); + (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) : + (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE); } void GCNRegPressure::inc(unsigned Reg, @@ -131,13 +123,13 @@ bool GCNRegPressure::less(const SISubtarget &ST, const GCNRegPressure& O, unsigned MaxOccupancy) const { const auto SGPROcc = std::min(MaxOccupancy, - ST.getOccupancyWithNumSGPRs(getSGRPNum())); + ST.getOccupancyWithNumSGPRs(getSGPRNum())); const auto VGPROcc = std::min(MaxOccupancy, - ST.getOccupancyWithNumVGPRs(getVGRPNum())); + ST.getOccupancyWithNumVGPRs(getVGPRNum())); const auto OtherSGPROcc = std::min(MaxOccupancy, - ST.getOccupancyWithNumSGPRs(O.getSGRPNum())); + ST.getOccupancyWithNumSGPRs(O.getSGPRNum())); const auto OtherVGPROcc = std::min(MaxOccupancy, - ST.getOccupancyWithNumVGPRs(O.getVGRPNum())); + ST.getOccupancyWithNumVGPRs(O.getVGPRNum())); const auto Occ = std::min(SGPROcc, VGPROcc); const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc); @@ -167,17 +159,17 @@ bool GCNRegPressure::less(const SISubtarget &ST, return VW < OtherVW; } } - return SGPRImportant ? (getSGRPNum() < O.getSGRPNum()): - (getVGRPNum() < O.getVGRPNum()); + return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()): + (getVGPRNum() < O.getVGPRNum()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const { - OS << "VGPRs: " << getVGRPNum(); - if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGRPNum()) << ')'; - OS << ", SGPRs: " << getSGRPNum(); - if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGRPNum()) << ')'; + OS << "VGPRs: " << getVGPRNum(); + if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGPRNum()) << ')'; + OS << ", SGPRs: " << getSGPRNum(); + if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGPRNum()) << ')'; OS << ", LVGPR WT: " << getVGPRTuplesWeight() << ", LSGPR WT: " << getSGPRTuplesWeight(); if (ST) OS << " -> Occ: " << getOccupancy(*ST); @@ -185,6 +177,64 @@ void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const { } #endif + +static LaneBitmask getDefRegMask(const MachineOperand &MO, + const MachineRegisterInfo &MRI) { + assert(MO.isDef() && MO.isReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())); + + // We don't rely on read-undef flag because in case of tentative schedule + // tracking it isn't set correctly yet. This works correctly however since + // use mask has been tracked before using LIS. + return MO.getSubReg() == 0 ? + MRI.getMaxLaneMaskForVReg(MO.getReg()) : + MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg()); +} + +static LaneBitmask getUsedRegMask(const MachineOperand &MO, + const MachineRegisterInfo &MRI, + const LiveIntervals &LIS) { + assert(MO.isUse() && MO.isReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())); + + if (auto SubReg = MO.getSubReg()) + return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); + + auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg()); + if (MaxMask.getAsInteger() == 1) // cannot have subregs + return MaxMask; + + // For a tentative schedule LIS isn't updated yet but livemask should remain + // the same on any schedule. Subreg defs can be reordered but they all must + // dominate uses anyway. + auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex(); + return getLiveLaneMask(MO.getReg(), SI, LIS, MRI); +} + +static SmallVector +collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI) { + SmallVector Res; + for (const auto &MO : MI.operands()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + if (!MO.isUse() || !MO.readsReg()) + continue; + + auto const UsedMask = getUsedRegMask(MO, MRI, LIS); + + auto Reg = MO.getReg(); + auto I = std::find_if(Res.begin(), Res.end(), [Reg](const RegisterMaskPair &RM) { + return RM.RegUnit == Reg; + }); + if (I != Res.end()) + I->LaneMask |= UsedMask; + else + Res.push_back(RegisterMaskPair(Reg, UsedMask)); + } + return Res; +} + /////////////////////////////////////////////////////////////////////////////// // GCNRPTracker @@ -192,7 +242,6 @@ LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, const MachineRegisterInfo &MRI) { - assert(!MRI.reg_nodbg_empty(Reg)); LaneBitmask LiveMask; const auto &LI = LIS.getInterval(Reg); if (LI.hasSubRanges()) { @@ -214,7 +263,7 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, GCNRPTracker::LiveRegSet LiveRegs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { auto Reg = TargetRegisterInfo::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) + if (!LIS.hasInterval(Reg)) continue; auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI); if (LiveMask.any()) @@ -223,42 +272,18 @@ GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI, return LiveRegs; } -void GCNUpwardRPTracker::reset(const MachineInstr &MI) { +void GCNUpwardRPTracker::reset(const MachineInstr &MI, + const LiveRegSet *LiveRegsCopy) { MRI = &MI.getParent()->getParent()->getRegInfo(); - LiveRegs = getLiveRegsAfter(MI, LIS); + if (LiveRegsCopy) { + if (&LiveRegs != LiveRegsCopy) + LiveRegs = *LiveRegsCopy; + } else { + LiveRegs = getLiveRegsAfter(MI, LIS); + } MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); } -LaneBitmask GCNUpwardRPTracker::getDefRegMask(const MachineOperand &MO) const { - assert(MO.isDef() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); - - // We don't rely on read-undef flag because in case of tentative schedule - // tracking it isn't set correctly yet. This works correctly however since - // use mask has been tracked before using LIS. - return MO.getSubReg() == 0 ? - MRI->getMaxLaneMaskForVReg(MO.getReg()) : - MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg()); -} - -LaneBitmask GCNUpwardRPTracker::getUsedRegMask(const MachineOperand &MO) const { - assert(MO.isUse() && MO.isReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())); - - if (auto SubReg = MO.getSubReg()) - return MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); - - auto MaxMask = MRI->getMaxLaneMaskForVReg(MO.getReg()); - if (MaxMask.getAsInteger() == 1) // cannot have subregs - return MaxMask; - - // For a tentative schedule LIS isn't updated yet but livemask should remain - // the same on any schedule. Subreg defs can be reordered but they all must - // dominate uses anyway. - auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex(); - return getLiveLaneMask(MO.getReg(), SI, LIS, *MRI); -} - void GCNUpwardRPTracker::recede(const MachineInstr &MI) { assert(MRI && "call reset first"); @@ -267,36 +292,136 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { if (MI.isDebugValue()) return; - // process all defs first to ensure early clobbers are handled correctly - // iterating over operands() to catch implicit defs - for (const auto &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + auto const RegUses = collectVirtualRegUses(MI, LIS, *MRI); + + // calc pressure at the MI (defs + uses) + auto AtMIPressure = CurPressure; + for (const auto &U : RegUses) { + auto LiveMask = LiveRegs[U.RegUnit]; + AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI); + } + // update max pressure + MaxPressure = max(AtMIPressure, MaxPressure); + + for (const auto &MO : MI.defs()) { + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()) || + MO.isDead()) continue; auto Reg = MO.getReg(); - auto &LiveMask = LiveRegs[Reg]; + auto I = LiveRegs.find(Reg); + if (I == LiveRegs.end()) + continue; + auto &LiveMask = I->second; auto PrevMask = LiveMask; - LiveMask &= ~getDefRegMask(MO); + LiveMask &= ~getDefRegMask(MO, *MRI); CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); + if (LiveMask.none()) + LiveRegs.erase(I); + } + for (const auto &U : RegUses) { + auto &LiveMask = LiveRegs[U.RegUnit]; + auto PrevMask = LiveMask; + LiveMask |= U.LaneMask; + CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI); + } + assert(CurPressure == getRegPressure(*MRI, LiveRegs)); +} + +bool GCNDownwardRPTracker::reset(const MachineInstr &MI, + const LiveRegSet *LiveRegsCopy) { + MRI = &MI.getParent()->getParent()->getRegInfo(); + LastTrackedMI = nullptr; + MBBEnd = MI.getParent()->end(); + NextMI = &MI; + NextMI = skipDebugInstructionsForward(NextMI, MBBEnd); + if (NextMI == MBBEnd) + return false; + if (LiveRegsCopy) { + if (&LiveRegs != LiveRegsCopy) + LiveRegs = *LiveRegsCopy; + } else { + LiveRegs = getLiveRegsBefore(*NextMI, LIS); } + MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); + return true; +} - // then all uses - for (const auto &MO : MI.uses()) { - if (!MO.isReg() || !MO.readsReg() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; +bool GCNDownwardRPTracker::advanceBeforeNext() { + assert(MRI && "call reset first"); - auto Reg = MO.getReg(); + NextMI = skipDebugInstructionsForward(NextMI, MBBEnd); + if (NextMI == MBBEnd) + return false; + + SlotIndex SI = LIS.getInstructionIndex(*NextMI).getBaseIndex(); + assert(SI.isValid()); + + // Remove dead registers or mask bits. + for (auto &It : LiveRegs) { + const LiveInterval &LI = LIS.getInterval(It.first); + if (LI.hasSubRanges()) { + for (const auto &S : LI.subranges()) { + if (!S.liveAt(SI)) { + auto PrevMask = It.second; + It.second &= ~S.LaneMask; + CurPressure.inc(It.first, PrevMask, It.second, *MRI); + } + } + } else if (!LI.liveAt(SI)) { + auto PrevMask = It.second; + It.second = LaneBitmask::getNone(); + CurPressure.inc(It.first, PrevMask, It.second, *MRI); + } + if (It.second.none()) + LiveRegs.erase(It.first); + } + + MaxPressure = max(MaxPressure, CurPressure); + + return true; +} + +void GCNDownwardRPTracker::advanceToNext() { + LastTrackedMI = &*NextMI++; + + // Add new registers or mask bits. + for (const auto &MO : LastTrackedMI->defs()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; auto &LiveMask = LiveRegs[Reg]; auto PrevMask = LiveMask; - LiveMask |= getUsedRegMask(MO); + LiveMask |= getDefRegMask(MO, *MRI); CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); } MaxPressure = max(MaxPressure, CurPressure); } +bool GCNDownwardRPTracker::advance() { + // If we have just called reset live set is actual. + if ((NextMI == MBBEnd) || (LastTrackedMI && !advanceBeforeNext())) + return false; + advanceToNext(); + return true; +} + +bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator End) { + while (NextMI != End) + if (!advance()) return false; + return true; +} + +bool GCNDownwardRPTracker::advance(MachineBasicBlock::const_iterator Begin, + MachineBasicBlock::const_iterator End, + const LiveRegSet *LiveRegsCopy) { + reset(*Begin, LiveRegsCopy); + return advance(End); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, @@ -331,7 +456,7 @@ static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR, bool GCNUpwardRPTracker::isValid() const { const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex(); const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI); - const auto TrackedLR = stripEmpty(LiveRegs); + const auto &TrackedLR = LiveRegs; if (!isEqual(LISLR, TrackedLR)) { dbgs() << "\nGCNUpwardRPTracker error: Tracked and" @@ -352,4 +477,16 @@ bool GCNUpwardRPTracker::isValid() const { return true; } +void GCNRPTracker::printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs, + const MachineRegisterInfo &MRI) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + auto It = LiveRegs.find(Reg); + if (It != LiveRegs.end() && It->second.any()) + OS << ' ' << PrintVRegOrUnit(Reg, TRI) << ':' + << PrintLaneMask(It->second); + } + OS << '\n'; +} #endif diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h index 82e76a7bfddccfc5e2e55a29b9c84e11484fb7f7..5dfe44053e72893a8986bfe227b02de53dd79937 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.h +++ b/lib/Target/AMDGPU/GCNRegPressure.h @@ -33,19 +33,19 @@ struct GCNRegPressure { clear(); } - bool empty() const { return getSGRPNum() == 0 && getVGRPNum() == 0; } + bool empty() const { return getSGPRNum() == 0 && getVGPRNum() == 0; } void clear() { std::fill(&Value[0], &Value[TOTAL_KINDS], 0); } - unsigned getSGRPNum() const { return Value[SGPR32]; } - unsigned getVGRPNum() const { return Value[VGPR32]; } + unsigned getSGPRNum() const { return Value[SGPR32]; } + unsigned getVGPRNum() const { return Value[VGPR32]; } unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; } unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; } unsigned getOccupancy(const SISubtarget &ST) const { - return std::min(ST.getOccupancyWithNumSGPRs(getSGRPNum()), - ST.getOccupancyWithNumVGPRs(getVGRPNum())); + return std::min(ST.getOccupancyWithNumSGPRs(getSGPRNum()), + ST.getOccupancyWithNumVGPRs(getVGPRNum())); } void inc(unsigned Reg, @@ -92,16 +92,19 @@ public: typedef DenseMap LiveRegSet; protected: + const LiveIntervals &LIS; LiveRegSet LiveRegs; GCNRegPressure CurPressure, MaxPressure; const MachineInstr *LastTrackedMI = nullptr; mutable const MachineRegisterInfo *MRI = nullptr; - GCNRPTracker() {} + GCNRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} public: // live regs for the current state const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; } const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; } + void clearMaxPressure() { MaxPressure.clear(); } + // returns MaxPressure, resetting it decltype(MaxPressure) moveMaxPressure() { auto Res = MaxPressure; @@ -111,17 +114,16 @@ public: decltype(LiveRegs) moveLiveRegs() { return std::move(LiveRegs); } + static void printLiveRegs(raw_ostream &OS, const LiveRegSet& LiveRegs, + const MachineRegisterInfo &MRI); }; class GCNUpwardRPTracker : public GCNRPTracker { - const LiveIntervals &LIS; - LaneBitmask getDefRegMask(const MachineOperand &MO) const; - LaneBitmask getUsedRegMask(const MachineOperand &MO) const; public: - GCNUpwardRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {} + GCNUpwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} // reset tracker to the point just below MI // filling live regs upon this point using LIS - void reset(const MachineInstr &MI); + void reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); // move to the state just above the MI void recede(const MachineInstr &MI); @@ -131,6 +133,41 @@ public: bool isValid() const; }; +class GCNDownwardRPTracker : public GCNRPTracker { + // Last position of reset or advanceBeforeNext + MachineBasicBlock::const_iterator NextMI; + + MachineBasicBlock::const_iterator MBBEnd; + +public: + GCNDownwardRPTracker(const LiveIntervals &LIS_) : GCNRPTracker(LIS_) {} + + const MachineBasicBlock::const_iterator getNext() const { return NextMI; } + + // Reset tracker to the point before the MI + // filling live regs upon this point using LIS. + // Returns false if block is empty except debug values. + bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); + + // Move to the state right before the next MI. Returns false if reached + // end of the block. + bool advanceBeforeNext(); + + // Move to the state at the MI, advanceBeforeNext has to be called first. + void advanceToNext(); + + // Move to the state at the next MI. Returns false if reached end of block. + bool advance(); + + // Advance instructions until before End. + bool advance(MachineBasicBlock::const_iterator End); + + // Reset to Begin and advance to End. + bool advance(MachineBasicBlock::const_iterator Begin, + MachineBasicBlock::const_iterator End, + const LiveRegSet *LiveRegsCopy = nullptr); +}; + LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index ea305a92fc60df4829cfc3da558e1c78ca72d8f2..8ec46665daf564140bc0fce4b91d549d75528349 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -316,46 +316,57 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C, MFI(*MF.getInfo()), StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(), *MF.getFunction())), - MinOccupancy(StartingOccupancy), Stage(0) { + MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) { DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n"); } void GCNScheduleDAGMILive::schedule() { + if (Stage == 0) { + // Just record regions at the first pass. + Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); + return; + } + std::vector Unsched; Unsched.reserve(NumRegionInstrs); for (auto &I : *this) Unsched.push_back(&I); - std::pair PressureBefore; + GCNRegPressure PressureBefore; if (LIS) { - DEBUG(dbgs() << "Pressure before scheduling:\n"); - discoverLiveIns(); - PressureBefore = getRealRegPressure(); + PressureBefore = Pressure[RegionIdx]; + + DEBUG(dbgs() << "Pressure before scheduling:\nRegion live-ins:"; + GCNRPTracker::printLiveRegs(dbgs(), LiveIns[RegionIdx], MRI); + dbgs() << "Region live-in pressure: "; + llvm::getRegPressure(MRI, LiveIns[RegionIdx]).print(dbgs()); + dbgs() << "Region register pressure: "; + PressureBefore.print(dbgs())); } ScheduleDAGMILive::schedule(); - if (Stage == 0) - Regions.push_back(std::make_pair(RegionBegin, RegionEnd)); + Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); if (!LIS) return; // Check the results of scheduling. GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; - DEBUG(dbgs() << "Pressure after scheduling:\n"); auto PressureAfter = getRealRegPressure(); - LiveIns.clear(); - if (PressureAfter.first <= S.SGPRCriticalLimit && - PressureAfter.second <= S.VGPRCriticalLimit) { + DEBUG(dbgs() << "Pressure after scheduling: "; PressureAfter.print(dbgs())); + + if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && + PressureAfter.getVGPRNum() <= S.VGPRCriticalLimit) { + Pressure[RegionIdx] = PressureAfter; DEBUG(dbgs() << "Pressure in desired limits, done.\n"); return; } - unsigned WavesAfter = getMaxWaves(PressureAfter.first, - PressureAfter.second, MF); - unsigned WavesBefore = getMaxWaves(PressureBefore.first, - PressureBefore.second, MF); + unsigned WavesAfter = getMaxWaves(PressureAfter.getSGPRNum(), + PressureAfter.getVGPRNum(), MF); + unsigned WavesBefore = getMaxWaves(PressureBefore.getSGPRNum(), + PressureBefore.getVGPRNum(), MF); DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore << ", after " << WavesAfter << ".\n"); @@ -368,8 +379,10 @@ void GCNScheduleDAGMILive::schedule() { << MinOccupancy << ".\n"); } - if (WavesAfter >= WavesBefore) + if (WavesAfter >= WavesBefore) { + Pressure[RegionIdx] = PressureAfter; return; + } DEBUG(dbgs() << "Attempting to revert scheduling.\n"); RegionEnd = RegionBegin; @@ -398,165 +411,139 @@ void GCNScheduleDAGMILive::schedule() { DEBUG(dbgs() << "Scheduling " << *MI); } RegionBegin = Unsched.front()->getIterator(); - if (Stage == 0) - Regions.back() = std::make_pair(RegionBegin, RegionEnd); + Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); placeDebugValues(); } -static inline void setMask(const MachineRegisterInfo &MRI, - const SIRegisterInfo *SRI, unsigned Reg, - LaneBitmask &PrevMask, LaneBitmask NewMask, - unsigned &SGPRs, unsigned &VGPRs) { - int NewRegs = countPopulation(NewMask.getAsInteger()) - - countPopulation(PrevMask.getAsInteger()); - if (SRI->isSGPRReg(MRI, Reg)) - SGPRs += NewRegs; - if (SRI->isVGPR(MRI, Reg)) - VGPRs += NewRegs; - assert ((int)SGPRs >= 0 && (int)VGPRs >= 0); - PrevMask = NewMask; +GCNRegPressure GCNScheduleDAGMILive::getRealRegPressure() const { + GCNDownwardRPTracker RPTracker(*LIS); + RPTracker.advance(begin(), end(), &LiveIns[RegionIdx]); + return RPTracker.moveMaxPressure(); } -void GCNScheduleDAGMILive::discoverLiveIns() { - unsigned SGPRs = 0; - unsigned VGPRs = 0; +void GCNScheduleDAGMILive::computeBlockPressure(const MachineBasicBlock *MBB) { + GCNDownwardRPTracker RPTracker(*LIS); + + // If the block has the only successor then live-ins of that successor are + // live-outs of the current block. We can reuse calculated live set if the + // successor will be sent to scheduling past current block. + const MachineBasicBlock *OnlySucc = nullptr; + if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) { + SlotIndexes *Ind = LIS->getSlotIndexes(); + if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin())) + OnlySucc = *MBB->succ_begin(); + } - const SIRegisterInfo *SRI = static_cast(TRI); - SlotIndex SI = LIS->getInstructionIndex(*begin()).getBaseIndex(); - assert (SI.isValid()); - - DEBUG(dbgs() << "Region live-ins:"); - for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(I); - if (MRI.reg_nodbg_empty(Reg)) - continue; - const LiveInterval &LI = LIS->getInterval(Reg); - LaneBitmask LaneMask = LaneBitmask::getNone(); - if (LI.hasSubRanges()) { - for (const auto &S : LI.subranges()) - if (S.liveAt(SI)) - LaneMask |= S.LaneMask; - } else if (LI.liveAt(SI)) { - LaneMask = MRI.getMaxLaneMaskForVReg(Reg); - } + // Scheduler sends regions from the end of the block upwards. + size_t CurRegion = RegionIdx; + for (size_t E = Regions.size(); CurRegion != E; ++CurRegion) + if (Regions[CurRegion].first->getParent() != MBB) + break; + --CurRegion; + + auto I = MBB->begin(); + auto LiveInIt = MBBLiveIns.find(MBB); + if (LiveInIt != MBBLiveIns.end()) { + auto LiveIn = std::move(LiveInIt->second); + RPTracker.reset(*MBB->begin(), &LiveIn); + MBBLiveIns.erase(LiveInIt); + } else { + I = Regions[CurRegion].first; + RPTracker.reset(*I); + } - if (LaneMask.any()) { - setMask(MRI, SRI, Reg, LiveIns[Reg], LaneMask, SGPRs, VGPRs); + for ( ; ; ) { + I = RPTracker.getNext(); - DEBUG(dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':' - << PrintLaneMask(LiveIns[Reg])); + if (Regions[CurRegion].first == I) { + LiveIns[CurRegion] = RPTracker.getLiveRegs(); + RPTracker.clearMaxPressure(); } - } - LiveInPressure = std::make_pair(SGPRs, VGPRs); + if (Regions[CurRegion].second == I) { + Pressure[CurRegion] = RPTracker.moveMaxPressure(); + if (CurRegion-- == RegionIdx) + break; + } + RPTracker.advanceToNext(); + RPTracker.advanceBeforeNext(); + } - DEBUG(dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs - << "\nVGPR = " << VGPRs << '\n'); + if (OnlySucc) { + if (I != MBB->end()) { + RPTracker.advanceToNext(); + RPTracker.advance(MBB->end()); + } + RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); + RPTracker.advanceBeforeNext(); + MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); + } } -std::pair -GCNScheduleDAGMILive::getRealRegPressure() const { - unsigned SGPRs, MaxSGPRs, VGPRs, MaxVGPRs; - SGPRs = MaxSGPRs = LiveInPressure.first; - VGPRs = MaxVGPRs = LiveInPressure.second; - - const SIRegisterInfo *SRI = static_cast(TRI); - DenseMap LiveRegs(LiveIns); +void GCNScheduleDAGMILive::finalizeSchedule() { + GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; + DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); - for (const MachineInstr &MI : *this) { - if (MI.isDebugValue()) - continue; - SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex(); - assert (SI.isValid()); + LiveIns.resize(Regions.size()); + Pressure.resize(Regions.size()); - // Remove dead registers or mask bits. - for (auto &It : LiveRegs) { - if (It.second.none()) - continue; - const LiveInterval &LI = LIS->getInterval(It.first); - if (LI.hasSubRanges()) { - for (const auto &S : LI.subranges()) - if (!S.liveAt(SI)) - setMask(MRI, SRI, It.first, It.second, It.second & ~S.LaneMask, - SGPRs, VGPRs); - } else if (!LI.liveAt(SI)) { - setMask(MRI, SRI, It.first, It.second, LaneBitmask::getNone(), - SGPRs, VGPRs); - } - } + do { + Stage++; + RegionIdx = 0; + MachineBasicBlock *MBB = nullptr; - // Add new registers or mask bits. - for (const auto &MO : MI.defs()) { - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - unsigned SubRegIdx = MO.getSubReg(); - LaneBitmask LaneMask = SubRegIdx != 0 - ? TRI->getSubRegIndexLaneMask(SubRegIdx) - : MRI.getMaxLaneMaskForVReg(Reg); - LaneBitmask &LM = LiveRegs[Reg]; - setMask(MRI, SRI, Reg, LM, LM | LaneMask, SGPRs, VGPRs); - } - MaxSGPRs = std::max(MaxSGPRs, SGPRs); - MaxVGPRs = std::max(MaxVGPRs, VGPRs); - } + if (Stage > 1) { + // Retry function scheduling if we found resulting occupancy and it is + // lower than used for first pass scheduling. This will give more freedom + // to schedule low register pressure blocks. + // Code is partially copied from MachineSchedulerBase::scheduleRegions(). - DEBUG(dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs - << "\nVGPR = " << MaxVGPRs << '\n'); + if (!LIS || StartingOccupancy <= MinOccupancy) + break; - return std::make_pair(MaxSGPRs, MaxVGPRs); -} + DEBUG(dbgs() + << "Retrying function scheduling with lowest recorded occupancy " + << MinOccupancy << ".\n"); -void GCNScheduleDAGMILive::finalizeSchedule() { - // Retry function scheduling if we found resulting occupancy and it is - // lower than used for first pass scheduling. This will give more freedom - // to schedule low register pressure blocks. - // Code is partially copied from MachineSchedulerBase::scheduleRegions(). + S.setTargetOccupancy(MinOccupancy); + } - if (!LIS || StartingOccupancy <= MinOccupancy) - return; + for (auto Region : Regions) { + RegionBegin = Region.first; + RegionEnd = Region.second; - DEBUG(dbgs() << "Retrying function scheduling with lowest recorded occupancy " - << MinOccupancy << ".\n"); + if (RegionBegin->getParent() != MBB) { + if (MBB) finishBlock(); + MBB = RegionBegin->getParent(); + startBlock(MBB); + if (Stage == 1) + computeBlockPressure(MBB); + } - Stage++; - GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; - S.setTargetOccupancy(MinOccupancy); + unsigned NumRegionInstrs = std::distance(begin(), end()); + enterRegion(MBB, begin(), end(), NumRegionInstrs); - MachineBasicBlock *MBB = nullptr; - for (auto Region : Regions) { - RegionBegin = Region.first; - RegionEnd = Region.second; + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (begin() == end() || begin() == std::prev(end())) { + exitRegion(); + continue; + } - if (RegionBegin->getParent() != MBB) { - if (MBB) finishBlock(); - MBB = RegionBegin->getParent(); - startBlock(MBB); - } + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << MF.getName() + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *begin() << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); - unsigned NumRegionInstrs = std::distance(begin(), end()); - enterRegion(MBB, begin(), end(), NumRegionInstrs); + schedule(); - // Skip empty scheduling regions (0 or 1 schedulable instructions). - if (begin() == end() || begin() == std::prev(end())) { exitRegion(); - continue; + ++RegionIdx; } - DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(dbgs() << MF.getName() - << ":BB#" << MBB->getNumber() << " " << MBB->getName() - << "\n From: " << *begin() << " To: "; - if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; - else dbgs() << "End"; - dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); + finishBlock(); - schedule(); - - exitRegion(); - } - finishBlock(); - LiveIns.shrink_and_clear(); + } while (Stage < 2); } diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h index 15af232704ffa091dae67ec27581e8b3a06b7555..3ed3cd5b3b1ce6488d29d09fb8bc38bbbf61b69b 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H +#include "GCNRegPressure.h" #include "llvm/CodeGen/MachineScheduler.h" namespace llvm { @@ -74,21 +75,28 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { // Scheduling stage number. unsigned Stage; + // Current region index. + size_t RegionIdx; + // Vecor of regions recorder for later rescheduling SmallVector, 32> Regions; - // Region live-ins. - DenseMap LiveIns; + // Region live-in cache. + SmallVector LiveIns; + + // Region pressure cache. + SmallVector Pressure; + + // Temporary basic block live-in cache. + DenseMap MBBLiveIns; - // Number of live-ins to the current region, first SGPR then VGPR. - std::pair LiveInPressure; + // Return current region pressure. + GCNRegPressure getRealRegPressure() const; - // Collect current region live-ins. - void discoverLiveIns(); + // Compute and cache live-ins and pressure for all regions in block. + void computeBlockPressure(const MachineBasicBlock *MBB); - // Return current region pressure. First value is SGPR number, second is VGPR. - std::pair getRealRegPressure() const; public: GCNScheduleDAGMILive(MachineSchedContext *C, diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp index a817ff3cbaf09697f35938a8bf5958c878486a17..b84640230eeeb8b271c78a6f9c856939c78b4e37 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -9,8 +9,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUInstPrinter.h" -#include "SIDefines.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIDefines.h" #include "Utils/AMDGPUAsmUtils.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCExpr.h" @@ -1160,6 +1160,112 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo, O << SImm16; // Unknown simm16 code. } +static void printSwizzleBitmask(const uint16_t AndMask, + const uint16_t OrMask, + const uint16_t XorMask, + raw_ostream &O) { + using namespace llvm::AMDGPU::Swizzle; + + uint16_t Probe0 = ((0 & AndMask) | OrMask) ^ XorMask; + uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask; + + O << "\""; + + for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) { + uint16_t p0 = Probe0 & Mask; + uint16_t p1 = Probe1 & Mask; + + if (p0 == p1) { + if (p0 == 0) { + O << "0"; + } else { + O << "1"; + } + } else { + if (p0 == 0) { + O << "p"; + } else { + O << "i"; + } + } + } + + O << "\""; +} + +void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + using namespace llvm::AMDGPU::Swizzle; + + uint16_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm == 0) { + return; + } + + O << " offset:"; + + if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) { + + O << "swizzle(" << IdSymbolic[ID_QUAD_PERM]; + for (auto i = 0; i < LANE_NUM; ++i) { + O << ","; + O << formatDec(Imm & LANE_MASK); + Imm >>= LANE_SHIFT; + } + O << ")"; + + } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) { + + uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK; + uint16_t OrMask = (Imm >> BITMASK_OR_SHIFT) & BITMASK_MASK; + uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK; + + if (AndMask == BITMASK_MAX && + OrMask == 0 && + countPopulation(XorMask) == 1) { + + O << "swizzle(" << IdSymbolic[ID_SWAP]; + O << ","; + O << formatDec(XorMask); + O << ")"; + + } else if (AndMask == BITMASK_MAX && + OrMask == 0 && XorMask > 0 && + isPowerOf2_64(XorMask + 1)) { + + O << "swizzle(" << IdSymbolic[ID_REVERSE]; + O << ","; + O << formatDec(XorMask + 1); + O << ")"; + + } else { + + uint16_t GroupSize = BITMASK_MAX - AndMask + 1; + if (GroupSize > 1 && + isPowerOf2_64(GroupSize) && + OrMask < GroupSize && + XorMask == 0) { + + O << "swizzle(" << IdSymbolic[ID_BROADCAST]; + O << ","; + O << formatDec(GroupSize); + O << ","; + O << formatDec(OrMask); + O << ")"; + + } else { + O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM]; + O << ","; + printSwizzleBitmask(AndMask, OrMask, XorMask, O); + O << ")"; + } + } + } else { + printU16ImmDecOperand(MI, OpNo, O); + } +} + void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h index c0b8e5c510893be223ddf280ffe1fff581b01d7d..c8094c4b840a1e4f5d5e96bfb8e086518d0ec90f 100644 --- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -193,6 +193,8 @@ private: raw_ostream &O); void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); + void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &O); void printWaitFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index f3266fe82955c5a349ea0378a5036643ab46d39b..0a9c2b94c1eee9e34acc5e6a4860bf9af0cbb62b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -8,8 +8,8 @@ /// \file //===----------------------------------------------------------------------===// -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 59571a48a962b4d7ca5aac762a78ebb944de306c..4e828a791e09f74912106a34f784b5d64940b5e5 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -13,20 +13,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadataStreamer.h" +#include "AMDGPU.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" -#include "llvm/Support/YAMLTraits.h" - -using namespace llvm::AMDGPU; -using namespace llvm::AMDGPU::CodeObject; - -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t) -LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) -LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -37,192 +29,7 @@ static cl::opt VerifyCodeObjectMetadata( "amdgpu-verify-comd", cl::desc("Verify AMDGPU Code Object Metadata")); -namespace yaml { - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, AccessQualifier &EN) { - YIO.enumCase(EN, "Default", AccessQualifier::Default); - YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly); - YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly); - YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, AddressSpaceQualifier &EN) { - YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private); - YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global); - YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant); - YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local); - YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic); - YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, ValueKind &EN) { - YIO.enumCase(EN, "ByValue", ValueKind::ByValue); - YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer); - YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer); - YIO.enumCase(EN, "Sampler", ValueKind::Sampler); - YIO.enumCase(EN, "Image", ValueKind::Image); - YIO.enumCase(EN, "Pipe", ValueKind::Pipe); - YIO.enumCase(EN, "Queue", ValueKind::Queue); - YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX); - YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY); - YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); - YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); - YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); - YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); - YIO.enumCase(EN, "HiddenCompletionAction", - ValueKind::HiddenCompletionAction); - } -}; - -template <> -struct ScalarEnumerationTraits { - static void enumeration(IO &YIO, ValueType &EN) { - YIO.enumCase(EN, "Struct", ValueType::Struct); - YIO.enumCase(EN, "I8", ValueType::I8); - YIO.enumCase(EN, "U8", ValueType::U8); - YIO.enumCase(EN, "I16", ValueType::I16); - YIO.enumCase(EN, "U16", ValueType::U16); - YIO.enumCase(EN, "F16", ValueType::F16); - YIO.enumCase(EN, "I32", ValueType::I32); - YIO.enumCase(EN, "U32", ValueType::U32); - YIO.enumCase(EN, "F32", ValueType::F32); - YIO.enumCase(EN, "I64", ValueType::I64); - YIO.enumCase(EN, "U64", ValueType::U64); - YIO.enumCase(EN, "F64", ValueType::F64); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) { - YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize, - MD.mReqdWorkGroupSize, std::vector()); - YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint, - MD.mWorkGroupSizeHint, std::vector()); - YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, - MD.mVecTypeHint, std::string()); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { - YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); - YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); - YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); - YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); - YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, - uint32_t(0)); - YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, - AccessQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, - AddressSpaceQualifier::Unknown); - YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); - YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); - YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); - YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); - YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); - YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, - MD.mKernargSegmentSize, uint64_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, - MD.mWorkgroupGroupSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, - MD.mWorkitemPrivateSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, - MD.mWavefrontNumSGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, - MD.mWorkitemNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, - MD.mKernargSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, - MD.mGroupSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, - MD.mPrivateSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, - MD.mWavefrontSize, uint8_t(0)); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) { - YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion, - MD.mDebuggerABIVersion, std::vector()); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs, - MD.mReservedNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR, - MD.mReservedFirstVGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR, - MD.mPrivateSegmentBufferSGPR, uint16_t(-1)); - YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR, - MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1)); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, Kernel::Metadata &MD) { - YIO.mapRequired(Kernel::Key::Name, MD.mName); - YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); - YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, - std::vector()); - if (!MD.mAttrs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs); - if (!MD.mArgs.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::Args, MD.mArgs); - if (!MD.mCodeProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps); - if (!MD.mDebugProps.empty() || !YIO.outputting()) - YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps); - } -}; - -template <> -struct MappingTraits { - static void mapping(IO &YIO, CodeObject::Metadata &MD) { - YIO.mapRequired(Key::Version, MD.mVersion); - YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); - if (!MD.mKernels.empty() || !YIO.outputting()) - YIO.mapOptional(Key::Kernels, MD.mKernels); - } -}; - -} // end namespace yaml - namespace AMDGPU { - -/* static */ -std::error_code CodeObject::Metadata::fromYamlString( - std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) { - yaml::Input YamlInput(YamlString); - YamlInput >> CodeObjectMetadata; - return YamlInput.error(); -} - -/* static */ -std::error_code CodeObject::Metadata::toYamlString( - CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) { - raw_string_ostream YamlStream(YamlString); - yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); - YamlOutput << CodeObjectMetadata; - return std::error_code(); -} - namespace CodeObject { void MetadataStreamer::dump(StringRef YamlString) const { @@ -286,20 +93,20 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, return ValueKind::Pipe; return StringSwitch(BaseTypeName) + .Case("image1d_t", ValueKind::Image) + .Case("image1d_array_t", ValueKind::Image) + .Case("image1d_buffer_t", ValueKind::Image) + .Case("image2d_t", ValueKind::Image) + .Case("image2d_array_t", ValueKind::Image) + .Case("image2d_array_depth_t", ValueKind::Image) + .Case("image2d_array_msaa_t", ValueKind::Image) + .Case("image2d_array_msaa_depth_t", ValueKind::Image) + .Case("image2d_depth_t", ValueKind::Image) + .Case("image2d_msaa_t", ValueKind::Image) + .Case("image2d_msaa_depth_t", ValueKind::Image) + .Case("image3d_t", ValueKind::Image) .Case("sampler_t", ValueKind::Sampler) .Case("queue_t", ValueKind::Queue) - .Cases("image1d_t", - "image1d_array_t", - "image1d_buffer_t", - "image2d_t" , - "image2d_array_t", - "image2d_array_depth_t", - "image2d_array_msaa_t" - "image2d_array_msaa_depth_t" - "image2d_depth_t", - "image2d_msaa_t", - "image2d_msaa_depth_t", - "image3d_t", ValueKind::Image) .Default(isa(Ty) ? (Ty->getPointerAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ? @@ -478,9 +285,14 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) { BaseTypeName = cast(Node->getOperand(ArgNo))->getString(); StringRef AccQual; - Node = Func->getMetadata("kernel_arg_access_qual"); - if (Node && ArgNo < Node->getNumOperands()) - AccQual = cast(Node->getOperand(ArgNo))->getString(); + if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() && + Arg.hasNoAliasAttr()) { + AccQual = "read_only"; + } else { + Node = Func->getMetadata("kernel_arg_access_qual"); + if (Node && ArgNo < Node->getNumOperands()) + AccQual = cast(Node->getOperand(ArgNo))->getString(); + } StringRef Name; Node = Func->getMetadata("kernel_arg_name"); diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h index 8d4c51763f63d8387e3f23cd0fec9082288d69de..c6681431d74d4d7d8f1e083b788dca53993795f2 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -17,9 +17,9 @@ #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #include "AMDGPU.h" -#include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/AMDGPUCodeObjectMetadata.h" #include "llvm/Support/ErrorOr.h" namespace llvm { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 073d19422e863cef76a5ee503028fb8239ecf5e8..6abe7f3d37d5efd390f1c58cdfae405b69ec8d1b 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 1655591abf39084d342590f409d1f1131cc18825..2364e7b7b5fb685e158ca7262e42704fce08c5ba 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -14,6 +14,8 @@ using namespace llvm; AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() { + CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4; + StackGrowsUp = true; HasSingleParameterDotFile = false; //===------------------------------------------------------------------===// MinInstAlignment = 4; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index 3d3858ab47ece4652ec6fc48f823a80f8314154c..a856b17a228f0f5204205f817f882bf5a1531901 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -52,6 +52,18 @@ public: return 0; } + virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + + virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + protected: uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; void verifyInstructionPredicates(const MCInst &MI, diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 8dc863f723e2e260ef57f61b888786c124769612..2a0032fc9adcde745f0556ea22ebd7fc46d833ae 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUTargetStreamer.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" #include "Utils/AMDKernelCodeTUtils.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -25,7 +26,6 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" namespace llvm { @@ -100,16 +100,6 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } } -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n'; -} - -void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n'; -} - bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) @@ -214,24 +204,6 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); } -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_LOCAL); -} - -void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal( - StringRef GlobalName) { - - MCSymbolELF *Symbol = cast( - getStreamer().getContext().getOrCreateSymbol(GlobalName)); - Symbol->setType(ELF::STT_OBJECT); - Symbol->setBinding(ELF::STB_GLOBAL); -} - bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); if (!VerifiedYamlString) diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 5c588bbded9c0b34158ac51f3ea8df7ca89f6d3d..968128e94d0b2b7bd072a5d6f1afae3f0aaec751 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -44,10 +44,6 @@ public: virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; - virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0; - - virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0; - virtual void EmitStartOfCodeObjectMetadata(const Module &Mod); virtual void EmitKernelCodeObjectMetadata( @@ -74,10 +70,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; @@ -105,10 +97,6 @@ public: void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; - void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override; - - void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override; - /// \returns True on success, false on failure. bool EmitCodeObjectMetadata(StringRef YamlString) override; }; diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp index 6015ec190fd471d81858208add478f0179000a1e..eab90e1d344ca691a5aa76756a7d40dce04d8d80 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "R600Defines.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "R600Defines.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index bda0928036fdeccea63121580aecf0cb19acd007..e02acf516c0dbfd27d9542707bfc2ef1aeb04043 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -69,6 +69,14 @@ public: unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + + unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace @@ -319,6 +327,44 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, return getMachineOpValue(MI, MO, Fixups, STI); } +unsigned +SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9EncValues::SRC_VGPR_MASK; + if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { + RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; + } + return RegEnc; +} + +unsigned +SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + if (Reg != AMDGPU::VCC) { + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK; + } + return RegEnc; +} + uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index 0e4eda982139d3c5950969a80fb0c26dcdbb9566..f6f2582aa11b3e948fa26407e2348ea45e1f4d9f 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -80,50 +80,53 @@ def : Proc<"cayman", R600_VLIW4_Itin, // Southern Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"SI", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx600", SIFullSpeedModel, + [FeatureISAVersion6_0_0]>; + +def : ProcessorModel<"SI", SIFullSpeedModel, + [FeatureISAVersion6_0_0] +>; + +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureISAVersion6_0_0] >; -def : ProcessorModel<"tahiti", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] +def : ProcessorModel<"gfx601", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1] >; -def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"verde", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"verde", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"oland", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"oland", SIQuarterSpeedModel, + [FeatureISAVersion6_0_1]>; -def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureSouthernIslands]>; +def : ProcessorModel<"hainan", SIQuarterSpeedModel, [FeatureISAVersion6_0_1]>; //===----------------------------------------------------------------------===// // Sea Islands //===----------------------------------------------------------------------===// -def : ProcessorModel<"bonaire", SIQuarterSpeedModel, +def : ProcessorModel<"gfx700", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"kabini", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2] +def : ProcessorModel<"bonaire", SIQuarterSpeedModel, + [FeatureISAVersion7_0_0] >; def : ProcessorModel<"kaveri", SIQuarterSpeedModel, [FeatureISAVersion7_0_0] >; -def : ProcessorModel<"hawaii", SIFullSpeedModel, +def : ProcessorModel<"gfx701", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; -def : ProcessorModel<"mullins", SIQuarterSpeedModel, - [FeatureISAVersion7_0_2]>; - -def : ProcessorModel<"gfx700", SIQuarterSpeedModel, - [FeatureISAVersion7_0_0] ->; - -def : ProcessorModel<"gfx701", SIFullSpeedModel, +def : ProcessorModel<"hawaii", SIFullSpeedModel, [FeatureISAVersion7_0_1] >; @@ -131,6 +134,17 @@ def : ProcessorModel<"gfx702", SIQuarterSpeedModel, [FeatureISAVersion7_0_2] >; +def : ProcessorModel<"gfx703", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"kabini", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3] +>; + +def : ProcessorModel<"mullins", SIQuarterSpeedModel, + [FeatureISAVersion7_0_3]>; + //===----------------------------------------------------------------------===// // Volcanic Islands //===----------------------------------------------------------------------===// @@ -187,10 +201,23 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel, [FeatureISAVersion8_1_0] >; -def : ProcessorModel<"gfx900", SIQuarterSpeedModel, - [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32] +//===----------------------------------------------------------------------===// +// GFX9 +//===----------------------------------------------------------------------===// + +def : ProcessorModel<"gfx900", SIQuarterSpeedModel, + [FeatureISAVersion9_0_0] +>; + +def : ProcessorModel<"gfx901", SIQuarterSpeedModel, + [FeatureISAVersion9_0_1] +>; + +def : ProcessorModel<"gfx902", SIQuarterSpeedModel, + [FeatureISAVersion9_0_2] >; -def : ProcessorModel<"gfx901", SIQuarterSpeedModel, - [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32] +def : ProcessorModel<"gfx903", SIQuarterSpeedModel, + [FeatureISAVersion9_0_3] >; + diff --git a/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/lib/Target/AMDGPU/R600ClauseMergePass.cpp index d0aba38f786d3bd3175deb6a92d4b69449211207..fbe45cb222d936cbcd4c941f9a3fc88901dbb493 100644 --- a/lib/Target/AMDGPU/R600ClauseMergePass.cpp +++ b/lib/Target/AMDGPU/R600ClauseMergePass.cpp @@ -62,7 +62,7 @@ private: const MachineInstr &LatrCFAlu) const; public: - R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } + R600ClauseMergePass() : MachineFunctionPass(ID) { } bool runOnMachineFunction(MachineFunction &MF) override; @@ -208,6 +208,6 @@ StringRef R600ClauseMergePass::getPassName() const { } // end anonymous namespace -llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { - return new R600ClauseMergePass(TM); +llvm::FunctionPass *llvm::createR600ClauseMergePass() { + return new R600ClauseMergePass(); } diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 811b905588b4ba709aabc01e9087913bcdec2eb6..6993e8a62a9c2240560320ffabde9355d43a6180 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -12,15 +12,14 @@ /// computing their address on the fly ; it also sets STACK_SIZE info. //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,6 +29,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -499,7 +499,7 @@ private: } public: - R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID) {} + R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override { ST = &MF.getSubtarget(); @@ -706,6 +706,6 @@ char R600ControlFlowFinalizer::ID = 0; } // end anonymous namespace -FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { - return new R600ControlFlowFinalizer(TM); +FunctionPass *llvm::createR600ControlFlowFinalizer() { + return new R600ControlFlowFinalizer(); } diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp index 03fc1aff5ec1596a70de05b413da71939b3f0672..0d8ccd088ec4e1291b9eb6408cf6a9dee7fb7844 100644 --- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp +++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp @@ -15,10 +15,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp index 3e46e6387614efea34a1980d4ec791e90e9de2cc..66def2d29caff9669686ace083c73ed03ee78ba7 100644 --- a/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -15,11 +15,11 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -37,7 +37,7 @@ private: unsigned Op); public: - R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), + R600ExpandSpecialInstrsPass() : MachineFunctionPass(ID), TII(nullptr) { } bool runOnMachineFunction(MachineFunction &MF) override; @@ -51,8 +51,8 @@ public: char R600ExpandSpecialInstrsPass::ID = 0; -FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) { - return new R600ExpandSpecialInstrsPass(TM); +FunctionPass *llvm::createR600ExpandSpecialInstrsPass() { + return new R600ExpandSpecialInstrsPass(); } void R600ExpandSpecialInstrsPass::SetFlagInNewMI(MachineInstr *NewMI, diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp index 1f01ad732e00acc05a0cefe3e63fa05a7c578a8c..37787b3c5f7292cbc2a76cb94cb84e4256e4a7e9 100644 --- a/lib/Target/AMDGPU/R600FrameLowering.cpp +++ b/lib/Target/AMDGPU/R600FrameLowering.cpp @@ -10,8 +10,8 @@ #include "R600FrameLowering.h" #include "AMDGPUSubtarget.h" #include "R600RegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/MathExtras.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 3590a9b05e1d04773bc6cb8d08e5cb319801cb52..c55878f8bff0f34b5f76c5e98de5a9991dc66d38 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1120,7 +1120,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, Mask = DAG.getConstant(0xff, DL, MVT::i32); } else if (Store->getMemoryVT() == MVT::i16) { assert(Store->getAlignment() >= 2); - Mask = DAG.getConstant(0xffff, DL, MVT::i32);; + Mask = DAG.getConstant(0xffff, DL, MVT::i32); } else { llvm_unreachable("Unsupported private trunc store"); } @@ -1618,6 +1618,14 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { + // Local and Private addresses do not handle vectors. Limit to i32 + if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) { + return (MemVT.getSizeInBits() <= 32); + } + return true; +} + bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index 9700ce14c6f3179595a00d135662e4a34adb4d30..d6a0876a6ee7d9f31295940d84749a8e52ca590e 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -44,6 +44,8 @@ public: EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override; diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index 2422d57269eb96500c7a5fda5918fa98a137837b..c5da5e4042004179385400de50e2af421ab690ca 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "R600InstrInfo.h" #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600FrameLowering.h" -#include "R600InstrInfo.h" #include "R600RegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/BitVector.h" @@ -35,8 +35,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include #include -#include #include +#include #include #include #include diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td index a5310e9fd6d0486d3592b5dca1231f9b933dd692..4c9e1e8a5434ec9e60e2b4ff21eec93e97927c08 100644 --- a/lib/Target/AMDGPU/R600Intrinsics.td +++ b/lib/Target/AMDGPU/R600Intrinsics.td @@ -61,7 +61,7 @@ def int_r600_ddx : TextureIntrinsicFloatInput; def int_r600_ddy : TextureIntrinsicFloatInput; def int_r600_dot4 : Intrinsic<[llvm_float_ty], - [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem] + [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable] >; } // End TargetPrefix = "r600", isTarget = 1 diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index db18e5bd1afaeb1005fffcedf096de5fa036737b..47fda1c8fa8275415a18454d79d823657211d6a2 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" -#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Pass.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp index d90008a550aeb615838e35a97662c589d208ba77..502dd3bce97e1223c5d969a9905df1db0540d9cb 100644 --- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp @@ -124,7 +124,7 @@ private: public: static char ID; - R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), + R600VectorRegMerger() : MachineFunctionPass(ID), TII(nullptr) { } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -396,6 +396,6 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { return false; } -llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { - return new R600VectorRegMerger(tm); +llvm::FunctionPass *llvm::createR600VectorRegMerger() { + return new R600VectorRegMerger(); } diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp index 5b6dd1ed128dc58cd62ca0f9ebc5538270692ad2..1cb40938cee72d78e47f6fd9b2b8930e39bf631e 100644 --- a/lib/Target/AMDGPU/R600Packetizer.cpp +++ b/lib/Target/AMDGPU/R600Packetizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" @@ -24,6 +23,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -36,7 +36,7 @@ class R600Packetizer : public MachineFunctionPass { public: static char ID; - R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} + R600Packetizer() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -404,6 +404,6 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { } // end anonymous namespace -llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) { - return new R600Packetizer(tm); +llvm::FunctionPass *llvm::createR600Packetizer() { + return new R600Packetizer(); } diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp index dfdc602b80cdfca4afe37d10612fdff46b7e3bfe..7501facb0cba168d1611f11861d7bed6f96a6818 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -56,6 +56,18 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +// Dummy to not crash RegisterClassInfo. +static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister; + +const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs( + const MachineFunction *) const { + return &CalleeSavedReg; +} + +unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return AMDGPU::NoRegister; +} + unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { return this->getEncodingValue(reg) >> HW_CHAN_SHIFT; } diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h index 9dfb3106c6ccbcfa39d1a8b9c27017c989adc50d..f0d9644b02f20ba8dbaaa992e66fce7f6ffd22ae 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.h +++ b/lib/Target/AMDGPU/R600RegisterInfo.h @@ -27,6 +27,8 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo { R600RegisterInfo(); BitVector getReservedRegs(const MachineFunction &MF) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; /// \brief get the HW encoding for a register's channel. unsigned getHWRegChan(unsigned reg) const; diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td index cc667d985a82ebddefa61ef2a8937efe1d8129a4..3c1e8527284cf16bf8e25ab2c30bb83e93864db8 100644 --- a/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/lib/Target/AMDGPU/R600RegisterInfo.td @@ -226,7 +226,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_Addr, R600_KC0, R600_KC1, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, - ALU_CONST, ALU_PARAM, OQAP + ALU_CONST, ALU_PARAM, OQAP, INDIRECT_BASE_ADDR )>; def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp index b7e62075244b8147e79b0d760500c06527d86000..8cb35c506135361cabb23912d247d3bef2a63040 100644 --- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -77,9 +77,10 @@ class SIAnnotateControlFlow : public FunctionPass { void insertElse(BranchInst *Term); - Value *handleLoopCondition(Value *Cond, PHINode *Broken, - llvm::Loop *L, BranchInst *Term, - SmallVectorImpl &LoopPhiConditions); + Value * + handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L, + BranchInst *Term, + SmallVectorImpl &LoopPhiConditions); void handleLoop(BranchInst *Term); @@ -125,7 +126,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) { Void = Type::getVoidTy(Context); Boolean = Type::getInt1Ty(Context); Int64 = Type::getInt64Ty(Context); - ReturnStruct = StructType::get(Boolean, Int64, (Type *)nullptr); + ReturnStruct = StructType::get(Boolean, Int64); BoolTrue = ConstantInt::getTrue(Context); BoolFalse = ConstantInt::getFalse(Context); @@ -212,9 +213,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { /// \brief Recursively handle the condition leading to a loop Value *SIAnnotateControlFlow::handleLoopCondition( - Value *Cond, PHINode *Broken, - llvm::Loop *L, BranchInst *Term, - SmallVectorImpl &LoopPhiConditions) { + Value *Cond, PHINode *Broken, llvm::Loop *L, BranchInst *Term, + SmallVectorImpl &LoopPhiConditions) { // Only search through PHI nodes which are inside the loop. If we try this // with PHI nodes that are outside of the loop, we end up inserting new PHI @@ -281,7 +281,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition( NewPhi->setIncomingValue(i, PhiArg); } - LoopPhiConditions.push_back(WeakVH(Phi)); + LoopPhiConditions.push_back(WeakTrackingVH(Phi)); return Ret; } @@ -323,7 +323,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front()); - SmallVector LoopPhiConditions; + SmallVector LoopPhiConditions; Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions); @@ -333,7 +333,7 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { Term->setCondition(CallInst::Create(Loop, Arg, "", Term)); - for (WeakVH Val : reverse(LoopPhiConditions)) { + for (WeakTrackingVH Val : reverse(LoopPhiConditions)) { if (PHINode *Cond = cast_or_null(Val)) eraseIfUnused(Cond); } diff --git a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp index 62ebef8e91af4b674dc6a2de8ec1c9bd9d161c6b..b5c439b21b893057e1511345063c2338196e17f2 100644 --- a/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp +++ b/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp @@ -19,8 +19,8 @@ // //===----------------------------------------------------------------------===// -#include "SIInstrInfo.h" #include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 3dd372b328668658d0a071871520a0cf0df139e0..5cd90323ff67b4c8e8401ea33ee8ae6a0f4797a1 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -118,6 +118,10 @@ namespace AMDGPU { // Operand for source modifiers for VOP instructions OPERAND_INPUT_MODS, + // Operand for GFX9 SDWA instructions + OPERAND_SDWA9_SRC, + OPERAND_SDWA9_VOPC_DST, + /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, OPERAND_KIMM16 @@ -160,7 +164,8 @@ namespace AMDGPUAsmVariants { DEFAULT = 0, VOP3 = 1, SDWA = 2, - DPP = 3 + SDWA9 = 3, + DPP = 4 }; } @@ -276,6 +281,46 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11] } // namespace Hwreg +namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32. + +enum Id { // id of symbolic names + ID_QUAD_PERM = 0, + ID_BITMASK_PERM, + ID_SWAP, + ID_REVERSE, + ID_BROADCAST +}; + +enum EncBits { + + // swizzle mode encodings + + QUAD_PERM_ENC = 0x8000, + QUAD_PERM_ENC_MASK = 0xFF00, + + BITMASK_PERM_ENC = 0x0000, + BITMASK_PERM_ENC_MASK = 0x8000, + + // QUAD_PERM encodings + + LANE_MASK = 0x3, + LANE_MAX = LANE_MASK, + LANE_SHIFT = 2, + LANE_NUM = 4, + + // BITMASK_PERM encodings + + BITMASK_MASK = 0x1F, + BITMASK_MAX = BITMASK_MASK, + BITMASK_WIDTH = 5, + + BITMASK_AND_SHIFT = 0, + BITMASK_OR_SHIFT = 5, + BITMASK_XOR_SHIFT = 10 +}; + +} // namespace Swizzle + namespace SDWA { enum SdwaSel { @@ -294,6 +339,18 @@ enum DstUnused { UNUSED_PRESERVE = 2, }; +enum SDWA9EncValues{ + SRC_SGPR_MASK = 0x100, + SRC_VGPR_MASK = 0xFF, + VOPC_DST_VCC_MASK = 0x80, + VOPC_DST_SGPR_MASK = 0x7F, + + SRC_VGPR_MIN = 0, + SRC_VGPR_MAX = 255, + SRC_SGPR_MIN = 256, + SRC_SGPR_MAX = 357, +}; + } // namespace SDWA } // namespace AMDGPU @@ -302,6 +359,7 @@ enum DstUnused { #define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 +#define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 34cd6f704a12f51e0fb00bb3c8899c6784ce2eff..5f5f25103c027474ed54c396ba6b3ac1586b5e89 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -68,6 +68,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -80,6 +81,11 @@ using namespace llvm; #define DEBUG_TYPE "si-fix-sgpr-copies" +static cl::opt EnableM0Merge( + "amdgpu-enable-merge-m0", + cl::desc("Merge and hoist M0 initializations"), + cl::init(false)); + namespace { class SIFixSGPRCopies : public MachineFunctionPass { @@ -107,7 +113,7 @@ public: INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE, "SI Fix SGPR copies", false, false) @@ -272,8 +278,7 @@ static bool phiHasBreakDef(const MachineInstr &PHI, Visited.insert(Reg); - MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); - assert(DefInstr); + MachineInstr *DefInstr = MRI.getVRegDef(Reg); switch (DefInstr->getOpcode()) { default: break; @@ -331,6 +336,186 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, return true; } +template +bool searchPredecessors(const MachineBasicBlock *MBB, + const MachineBasicBlock *CutOff, + UnaryPredicate Predicate) { + + if (MBB == CutOff) + return false; + + DenseSet Visited; + SmallVector Worklist(MBB->pred_begin(), + MBB->pred_end()); + + while (!Worklist.empty()) { + MachineBasicBlock *MBB = Worklist.pop_back_val(); + + if (!Visited.insert(MBB).second) + continue; + if (MBB == CutOff) + continue; + if (Predicate(MBB)) + return true; + + Worklist.append(MBB->pred_begin(), MBB->pred_end()); + } + + return false; +} + +static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, + const TargetRegisterInfo *TRI) { + return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { + return hasTerminatorThatModifiesExec(*MBB, *TRI); }); +} + +// Checks if there is potential path From instruction To instruction. +// If CutOff is specified and it sits in between of that path we ignore +// a higher portion of the path and report it is not reachable. +static bool isReachable(const MachineInstr *From, + const MachineInstr *To, + const MachineBasicBlock *CutOff, + MachineDominatorTree &MDT) { + // If either From block dominates To block or instructions are in the same + // block and From is higher. + if (MDT.dominates(From, To)) + return true; + + const MachineBasicBlock *MBBFrom = From->getParent(); + const MachineBasicBlock *MBBTo = To->getParent(); + if (MBBFrom == MBBTo) + return false; + + // Instructions are in different blocks, do predecessor search. + // We should almost never get here since we do not usually produce M0 stores + // other than -1. + return searchPredecessors(MBBTo, CutOff, [MBBFrom] + (const MachineBasicBlock *MBB) { return MBB == MBBFrom; }); +} + +// Hoist and merge identical SGPR initializations into a common predecessor. +// This is intended to combine M0 initializations, but can work with any +// SGPR. A VGPR cannot be processed since we cannot guarantee vector +// executioon. +static bool hoistAndMergeSGPRInits(unsigned Reg, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT) { + // List of inits by immediate value. + typedef std::map> InitListMap; + InitListMap Inits; + // List of clobbering instructions. + SmallVector Clobbers; + bool Changed = false; + + for (auto &MI : MRI.def_instructions(Reg)) { + MachineOperand *Imm = nullptr; + for (auto &MO: MI.operands()) { + if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) || + (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) { + Imm = nullptr; + break; + } else if (MO.isImm()) + Imm = &MO; + } + if (Imm) + Inits[Imm->getImm()].push_front(&MI); + else + Clobbers.push_back(&MI); + } + + for (auto &Init : Inits) { + auto &Defs = Init.second; + + for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) { + MachineInstr *MI1 = *I1; + + for (auto I2 = std::next(I1); I2 != E; ) { + MachineInstr *MI2 = *I2; + + // Check any possible interference + auto intereferes = [&](MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To) -> bool { + + assert(MDT.dominates(&*To, &*From)); + + auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool { + const MachineBasicBlock *MBBFrom = From->getParent(); + const MachineBasicBlock *MBBTo = To->getParent(); + bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT); + bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT); + if (!MayClobberFrom && !MayClobberTo) + return false; + if ((MayClobberFrom && !MayClobberTo) || + (!MayClobberFrom && MayClobberTo)) + return true; + // Both can clobber, this is not an interference only if both are + // dominated by Clobber and belong to the same block or if Clobber + // properly dominates To, given that To >> From, so it dominates + // both and located in a common dominator. + return !((MBBFrom == MBBTo && + MDT.dominates(Clobber, &*From) && + MDT.dominates(Clobber, &*To)) || + MDT.properlyDominates(Clobber->getParent(), MBBTo)); + }; + + return (any_of(Clobbers, interferes)) || + (any_of(Inits, [&](InitListMap::value_type &C) { + return C.first != Init.first && any_of(C.second, interferes); + })); + }; + + if (MDT.dominates(MI1, MI2)) { + if (!intereferes(MI2, MI1)) { + DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber() + << " " << *MI2); + MI2->eraseFromParent(); + Defs.erase(I2++); + Changed = true; + continue; + } + } else if (MDT.dominates(MI2, MI1)) { + if (!intereferes(MI1, MI2)) { + DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() + << " " << *MI1); + MI1->eraseFromParent(); + Defs.erase(I1++); + Changed = true; + break; + } + } else { + auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(), + MI2->getParent()); + if (!MBB) { + ++I2; + continue; + } + + MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); + if (!intereferes(MI1, I) && !intereferes(MI2, I)) { + DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber() + << " " << *MI1 << "and moving from BB#" + << MI2->getParent()->getNumber() << " to BB#" + << I->getParent()->getNumber() << " " << *MI2); + I->getParent()->splice(I, MI2->getParent(), MI2); + MI1->eraseFromParent(); + Defs.erase(I1++); + Changed = true; + break; + } + } + ++I2; + } + ++I1; + } + } + + if (Changed) + MRI.clearKillFlags(Reg); + + return Changed; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -360,7 +545,13 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *SrcRC, *DstRC; std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { - MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + TII->moveToVALU(MI); + break; + } + + MachineInstr *DefMI = MRI.getVRegDef(SrcReg); unsigned SMovOp; int64_t Imm; // If we are just copying an immediate, we can replace the copy with @@ -387,8 +578,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); - MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1); - if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) { + if (!predsHasDivergentTerminator(MBB0, TRI) && + !predsHasDivergentTerminator(MBB1, TRI)) { DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n'); break; } @@ -463,5 +654,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } } + if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) + hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT); + return true; } diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index d63414735b95a3b7b83110e769ca9d9353ec04c5..e10f1ed3762e84b345b562904c9e5fcd129766db 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -35,9 +35,12 @@ struct FoldCandidate { }; unsigned char UseOpNo; MachineOperand::MachineOperandType Kind; + bool Commuted; - FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : - UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) { + FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, + bool Commuted_ = false) : + UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()), + Commuted(Commuted_) { if (FoldOp->isImm()) { ImmToFold = FoldOp->getImm(); } else if (FoldOp->isFI()) { @@ -59,6 +62,10 @@ struct FoldCandidate { bool isReg() const { return Kind == MachineOperand::MO_Register; } + + bool isCommuted() const { + return Commuted; + } }; class SIFoldOperands : public MachineFunctionPass { @@ -237,8 +244,13 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1)) return false; - if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) + if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) { + TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1); return false; + } + + FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true)); + return true; } FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); @@ -247,9 +259,10 @@ static bool tryAddToFoldList(SmallVectorImpl &FoldList, // If the use operand doesn't care about the value, this may be an operand only // used for register indexing, in which case it is unsafe to fold. -static bool isUseSafeToFold(const MachineInstr &MI, +static bool isUseSafeToFold(const SIInstrInfo *TII, + const MachineInstr &MI, const MachineOperand &UseMO) { - return !UseMO.isUndef(); + return !UseMO.isUndef() && !TII->isSDWA(MI); //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); } @@ -261,7 +274,7 @@ void SIFoldOperands::foldOperand( SmallVectorImpl &CopiesToReplace) const { const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); - if (!isUseSafeToFold(*UseMI, UseOp)) + if (!isUseSafeToFold(TII, *UseMI, UseOp)) return; // FIXME: Fold operands with subregs. @@ -698,6 +711,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n'); tryFoldInst(TII, Fold.UseMI); + } else if (Fold.isCommuted()) { + // Restoring instruction's original operand order if fold has failed. + TII->commuteInstruction(*Fold.UseMI, false); } } } @@ -714,7 +730,8 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { // Make sure sources are identical. const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() || + if (!Src0->isReg() || !Src1->isReg() || + Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index abe6af9a6d3fcbbed51c2547b583b72234aa1c21..b1bd14e421f024587bec6a9919fe93e2ce872f11 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -8,10 +8,10 @@ //==-----------------------------------------------------------------------===// #include "SIFrameLowering.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -101,10 +101,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); // We need to insert initialization of the scratch resource descriptor. unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); - if (ScratchRsrcReg == AMDGPU::NoRegister) + if (ScratchRsrcReg == AMDGPU::NoRegister || + !MRI.isPhysRegUsed(ScratchRsrcReg)) return AMDGPU::NoRegister; if (ST.hasSGPRInitBug() || @@ -122,8 +124,6 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( // We find the resource first because it has an alignment requirement. - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4; ArrayRef AllSGPR128s = getAllSGPR128(ST, MF); AllSGPR128s = AllSGPR128s.slice(std::min(static_cast(AllSGPR128s.size()), NumPreloaded)); @@ -143,24 +143,34 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( return ScratchRsrcReg; } -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +// Shift down registers reserved for the scratch wave offset and stack pointer +// SGPRs. +std::pair +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( const SISubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); - if (ST.hasSGPRInitBug() || - ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) - return ScratchWaveOffsetReg; - unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); - MachineRegisterInfo &MRI = MF.getRegInfo(); + // No replacement necessary. + if (ScratchWaveOffsetReg == AMDGPU::NoRegister || + !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { + assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister); + return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); + } + + unsigned SPReg = MFI->getStackPtrOffsetReg(); + if (ST.hasSGPRInitBug()) + return std::make_pair(ScratchWaveOffsetReg, SPReg); + unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, SPReg); AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -175,30 +185,37 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // register from the list to consider, it means that when this // register is being used for the scratch wave offset and there // are no other free SGPRs, then the value will stay in this register. + // + 1 if stack pointer is used. // ---- - // 13 - if (AllSGPRs.size() < 13) - return ScratchWaveOffsetReg; + // 13 (+1) + unsigned ReservedRegCount = 13; - for (MCPhysReg Reg : AllSGPRs.drop_back(13)) { + if (AllSGPRs.size() < ReservedRegCount) + return std::make_pair(ScratchWaveOffsetReg, SPReg); + + bool HandledScratchWaveOffsetReg = + ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + + for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { // Pick the first unallocated SGPR. Be careful not to pick an alias of the // scratch descriptor, since we haven’t added its uses yet. - if (!MRI.isPhysRegUsed(Reg)) { - if (!MRI.isAllocatable(Reg) || - TRI->isSubRegisterEq(ScratchRsrcReg, Reg)) - continue; + if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { + if (!HandledScratchWaveOffsetReg) { + HandledScratchWaveOffsetReg = true; - MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); - MFI->setScratchWaveOffsetReg(Reg); - return Reg; + MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); + MFI->setScratchWaveOffsetReg(Reg); + ScratchWaveOffsetReg = Reg; + break; + } } } - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, SPReg); } -void SIFrameLowering::emitPrologue(MachineFunction &MF, - MachineBasicBlock &MBB) const { +void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget(); @@ -220,18 +237,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, const SIRegisterInfo *TRI = &TII->getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned ScratchRsrcReg - = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); - unsigned ScratchWaveOffsetReg - = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); - - if (ScratchRsrcReg == AMDGPU::NoRegister) { - assert(ScratchWaveOffsetReg == AMDGPU::NoRegister); - return; - } - - assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg)); - // We need to do the replacement of the private segment buffer and wave offset // register even if there are no stack objects. There could be stores to undef // or a constant without an associated object. @@ -244,19 +249,49 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit()) emitFlatScratchInit(ST, MF, MBB); + unsigned SPReg = MFI->getStackPtrOffsetReg(); + if (SPReg != AMDGPU::NoRegister) { + DebugLoc DL; + int64_t StackSize = MF.getFrameInfo().getStackSize(); + + if (StackSize == 0) { + BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) + .addReg(MFI->getScratchWaveOffsetReg()); + } else { + BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg) + .addReg(MFI->getScratchWaveOffsetReg()) + .addImm(StackSize * ST.getWavefrontSize()); + } + } + + unsigned ScratchRsrcReg + = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); + + unsigned ScratchWaveOffsetReg; + std::tie(ScratchWaveOffsetReg, SPReg) + = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); + + // It's possible to have uses of only ScratchWaveOffsetReg without + // ScratchRsrcReg if it's only used for the initialization of flat_scratch, + // but the inverse is not true. + if (ScratchWaveOffsetReg == AMDGPU::NoRegister) { + assert(ScratchRsrcReg == AMDGPU::NoRegister); + return; + } + // We need to insert initialization of the scratch resource descriptor. unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); - unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) { PreloadedPrivateBufferReg = TRI->getPreloadedValue( MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); } - bool OffsetRegUsed = !MRI.use_empty(ScratchWaveOffsetReg); - bool ResourceRegUsed = !MRI.use_empty(ScratchRsrcReg); + bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg); + bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister && + MRI.isPhysRegUsed(ScratchRsrcReg); // We added live-ins during argument lowering, but since they were not used // they were deleted. We're adding the uses now, so add them back. @@ -309,7 +344,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, if (OffsetRegUsed && PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) { BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) - .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); + .addReg(PreloadedScratchWaveOffsetReg, + MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill); } if (CopyBuffer && !CopyBufferFirst) { @@ -379,6 +415,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, } } +void SIFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const SIMachineFunctionInfo *MFI = MF.getInfo(); + if (MFI->isEntryFunction()) + emitEntryFunctionPrologue(MF, MBB); +} + void SIFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { @@ -469,7 +512,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // this also ensures we shouldn't need a register for the offset when // emergency scavenging. int ScavengeFI = MFI.CreateFixedObject( - AMDGPU::SGPR_32RegClass.getSize(), 0, false); + TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false); RS->addScavengingFrameIndex(ScavengeFI); } } diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index 1bfc08093da224d761293e2ebcb520053ed00035..e17adbe273614edcc0e8cb7934d0484a0a203826 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -26,6 +26,8 @@ public: AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {} ~SIFrameLowering() override = default; + void emitEntryFunctionPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const; void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, @@ -49,7 +51,7 @@ private: SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - unsigned getReservedPrivateSegmentWaveByteOffsetReg( + std::pair getReservedPrivateSegmentWaveByteOffsetReg( const SISubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index eda825d8c6eee356eda7caf88a6e24b1362c037e..441f1ef4bd04c06b8286a4d2e51d066033c029dd 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -17,12 +17,12 @@ #define _USE_MATH_DEFINES #endif +#include "SIISelLowering.h" #include "AMDGPU.h" #include "AMDGPUIntrinsicInfo.h" -#include "AMDGPUTargetMachine.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" #include "SIDefines.h" -#include "SIISelLowering.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" @@ -68,6 +68,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetOptions.h" @@ -287,8 +288,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // On SI this is s_memtime and s_memrealtime on VI. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Custom); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -461,6 +462,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand); + } else { + setOperationAction(ISD::SELECT, MVT::v2i16, Custom); + setOperationAction(ISD::SELECT, MVT::v2f16, Custom); + } + + for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8 }) { + setOperationAction(ISD::SELECT, VT, Custom); } setTargetDAGCombine(ISD::FADD); @@ -480,6 +488,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::FCANONICALIZE); setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); // All memory operations. Some folding on the pointer operand is done to help // matching the constant offsets in the addressing modes. @@ -558,9 +567,17 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, } bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const { - // Flat instructions do not have offsets, and only have the register - // address. - return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1); + if (!Subtarget->hasFlatInstOffsets()) { + // Flat instructions do not have offsets, and only have the register + // address. + return AM.BaseOffs == 0 && AM.Scale == 0; + } + + // GFX9 added a 13-bit signed offset. When using regular flat instructions, + // the sign bit is ignored and is treated as a 12-bit unsigned offset. + + // Just r + i + return isUInt<12>(AM.BaseOffs) && AM.Scale == 0; } bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const { @@ -689,6 +706,18 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } } +bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { + if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) { + return (MemVT.getSizeInBits() <= 4 * 32); + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { + unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize(); + return (MemVT.getSizeInBits() <= MaxPrivateBits); + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { + return (MemVT.getSizeInBits() <= 2 * 32); + } + return true; +} + bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, @@ -905,6 +934,55 @@ SDValue SITargetLowering::lowerKernargMemParameter( return DAG.getMergeValues({ Val, Load.getValue(1) }, SL); } +SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, + const SDLoc &SL, SDValue Chain, + const ISD::InputArg &Arg) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + if (Arg.Flags.isByVal()) { + unsigned Size = Arg.Flags.getByValSize(); + int FrameIdx = MFI.CreateFixedObject(Size, VA.getLocMemOffset(), false); + return DAG.getFrameIndex(FrameIdx, MVT::i32); + } + + unsigned ArgOffset = VA.getLocMemOffset(); + unsigned ArgSize = VA.getValVT().getStoreSize(); + + int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, true); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + SDValue ArgValue; + + // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + MVT MemVT = VA.getValVT(); + + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::BCvt: + MemVT = VA.getLocVT(); + break; + case CCValAssign::SExt: + ExtType = ISD::SEXTLOAD; + break; + case CCValAssign::ZExt: + ExtType = ISD::ZEXTLOAD; + break; + case CCValAssign::AExt: + ExtType = ISD::EXTLOAD; + break; + } + + ArgValue = DAG.getExtLoad( + ExtType, SL, VA.getLocVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + MemVT); + return ArgValue; +} + static void processShaderInputArgs(SmallVectorImpl &Splits, CallingConv::ID CallConv, ArrayRef Ins, @@ -1034,6 +1112,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, static void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, bool IsShader) { if (Info.hasWorkGroupIDX()) { unsigned Reg = Info.addWorkGroupIDX(); @@ -1064,8 +1143,15 @@ static void allocateSystemSGPRs(CCState &CCInfo, unsigned PrivateSegmentWaveByteOffsetReg; if (IsShader) { - PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); - Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); + PrivateSegmentWaveByteOffsetReg = + Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); + + // This is true if the scratch wave byte offset doesn't have a fixed + // location. + if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { + PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); + Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); + } } else PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); @@ -1077,10 +1163,12 @@ static void allocateSystemSGPRs(CCState &CCInfo, static void reservePrivateMemoryRegs(const TargetMachine &TM, MachineFunction &MF, const SIRegisterInfo &TRI, - SIMachineFunctionInfo &Info) { + SIMachineFunctionInfo &Info, + bool NeedSP) { // Now that we've figured out where the scratch register inputs are, see if // should reserve the arguments and use them directly. - bool HasStackObjects = MF.getFrameInfo().hasStackObjects(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool HasStackObjects = MFI.hasStackObjects(); // Record that we know we have non-spill stack objects so we don't need to // check all stack objects later. @@ -1138,6 +1226,15 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM, Info.setScratchWaveOffsetReg(ReservedOffsetReg); } } + + if (NeedSP){ + unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF); + Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg); + + assert(Info.getStackPtrOffsetReg() != Info.getFrameOffsetReg()); + assert(!TRI.isSubRegister(Info.getScratchRSrcReg(), + Info.getStackPtrOffsetReg())); + } } SDValue SITargetLowering::LowerFormalArguments( @@ -1206,8 +1303,10 @@ SDValue SITargetLowering::LowerFormalArguments( !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ()); + } else if (IsKernel) { + assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); } else { - assert(!IsKernel || (Info->hasWorkGroupIDX() && Info->hasWorkItemIDX())); + Splits.append(Ins.begin(), Ins.end()); } if (IsEntryFunc) { @@ -1261,11 +1360,14 @@ SDValue SITargetLowering::LowerFormalArguments( InVals.push_back(Arg); continue; + } else if (!IsEntryFunc && VA.isMemLoc()) { + SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg); + InVals.push_back(Val); + if (!Arg.Flags.isByVal()) + Chains.push_back(Val.getValue(1)); + continue; } - if (VA.isMemLoc()) - report_fatal_error("memloc not supported with calling convention"); - assert(VA.isRegLoc() && "Parameter must be in a register!"); unsigned Reg = VA.getLocReg(); @@ -1274,7 +1376,7 @@ SDValue SITargetLowering::LowerFormalArguments( Reg = MF.addLiveIn(Reg, RC); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); - if (Arg.VT.isVector()) { + if (IsShader && Arg.VT.isVector()) { // Build a vector from the registers Type *ParamType = FType->getParamType(Arg.getOrigArgIndex()); unsigned NumElements = ParamType->getVectorNumElements(); @@ -1300,16 +1402,49 @@ SDValue SITargetLowering::LowerFormalArguments( InVals.push_back(Val); } - // Start adding system SGPRs. - if (IsEntryFunc) - allocateSystemSGPRs(CCInfo, MF, *Info, IsShader); + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); - reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info); + // TODO: Could maybe omit SP if only tail calls? + bool NeedSP = FrameInfo.hasCalls() || FrameInfo.hasVarSizedObjects(); + + // Start adding system SGPRs. + if (IsEntryFunc) { + allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader); + reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info, NeedSP); + } else { + CCInfo.AllocateReg(Info->getScratchRSrcReg()); + CCInfo.AllocateReg(Info->getScratchWaveOffsetReg()); + CCInfo.AllocateReg(Info->getFrameOffsetReg()); + + if (NeedSP) { + unsigned StackPtrReg = findFirstFreeSGPR(CCInfo); + CCInfo.AllocateReg(StackPtrReg); + Info->setStackPtrOffsetReg(StackPtrReg); + } + } return Chains.empty() ? Chain : DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } +// TODO: If return values can't fit in registers, we should return as many as +// possible in registers before passing on stack. +bool SITargetLowering::CanLowerReturn( + CallingConv::ID CallConv, + MachineFunction &MF, bool IsVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const { + // Replacing returns with sret/stack usage doesn't make sense for shaders. + // FIXME: Also sort of a workaround for custom vector splitting in LowerReturn + // for shaders. Vector types should be explicitly handled by CC. + if (AMDGPU::isEntryFunctionCC(CallConv)) + return true; + + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg)); +} + SDValue SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1319,11 +1454,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *Info = MF.getInfo(); - if (!AMDGPU::isShader(CallConv)) + if (AMDGPU::isKernel(CallConv)) { return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs, OutVals, DL, DAG); + } + + bool IsShader = AMDGPU::isShader(CallConv); Info->setIfReturnsVoid(Outs.size() == 0); + bool IsWaveEnd = Info->returnsVoid() && IsShader; SmallVector Splits; SmallVector SplitVals; @@ -1332,7 +1471,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, for (unsigned i = 0, e = Outs.size(); i != e; ++i) { const ISD::OutputArg &Out = Outs[i]; - if (Out.VT.isVector()) { + if (IsShader && Out.VT.isVector()) { MVT VT = Out.VT.getVectorElementType(); ISD::OutputArg NewOut = Out; NewOut.Flags.setSplit(); @@ -1363,29 +1502,58 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, *DAG.getContext()); // Analyze outgoing return values. - AnalyzeReturn(CCInfo, Splits); + CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg)); SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + // Add return address for callable functions. + if (!Info->isEntryFunction()) { + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); + SDValue ReturnAddrReg = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); + + // FIXME: Should be able to use a vreg here, but need a way to prevent it + // from being allcoated to a CSR. + + SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), + MVT::i64); + + Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag); + Flag = Chain.getValue(1); + + RetOps.push_back(PhysReturnAddrReg); + } + // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); + // TODO: Partially return in registers if return values don't fit. SDValue Arg = SplitVals[realRVLocIdx]; // Copied from other backends. switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + default: + llvm_unreachable("Unknown loc info!"); } Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); @@ -1393,12 +1561,16 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + // FIXME: Does sret work properly? + // Update chain and glue. RetOps[0] = Chain; if (Flag.getNode()) RetOps.push_back(Flag); - unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN_TO_EPILOG; + unsigned Opc = AMDGPUISD::ENDPGM; + if (!IsWaveEnd) + Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; return DAG.getNode(Opc, DL, MVT::Other, RetOps); } @@ -1637,7 +1809,7 @@ computeIndirectRegAndOffset(const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, unsigned VecReg, int Offset) { - int NumElts = SuperRC->getSize() / 4; + int NumElts = TRI.getRegSizeInBits(*SuperRC) / 32; // Skip out of bounds offsets, or else we would end up using an undefined // register. @@ -1786,17 +1958,18 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI, return LoopBB; } -static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) { - switch (VecRC->getSize()) { - case 4: +static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI, + const TargetRegisterClass *VecRC) { + switch (TRI.getRegSizeInBits(*VecRC)) { + case 32: // 4 bytes return AMDGPU::V_MOVRELD_B32_V1; - case 8: + case 64: // 8 bytes return AMDGPU::V_MOVRELD_B32_V2; - case 16: + case 128: // 16 bytes return AMDGPU::V_MOVRELD_B32_V4; - case 32: + case 256: // 32 bytes return AMDGPU::V_MOVRELD_B32_V8; - case 64: + case 512: // 64 bytes return AMDGPU::V_MOVRELD_B32_V16; default: llvm_unreachable("unsupported size for MOVRELD pseudos"); @@ -1856,7 +2029,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF)); } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC)); + const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); BuildMI(MBB, I, DL, MovRelDesc) .addReg(Dst, RegState::Define) @@ -1900,7 +2073,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI, .addReg(PhiReg, RegState::Implicit) .addReg(AMDGPU::M0, RegState::Implicit); } else { - const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC)); + const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC)); BuildMI(*LoopBB, InsPt, DL, MovRelDesc) .addReg(Dst, RegState::Define) @@ -1941,56 +2114,70 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } switch (MI.getOpcode()) { - case AMDGPU::S_TRAP_PSEUDO: { - const DebugLoc &DL = MI.getDebugLoc(); - const int TrapType = MI.getOperand(0).getImm(); - - if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && - Subtarget->isTrapHandlerEnabled()) { + case AMDGPU::SI_INIT_M0: + BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), + TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .add(MI.getOperand(0)); + MI.eraseFromParent(); + return BB; - MachineFunction *MF = BB->getParent(); - SIMachineFunctionInfo *Info = MF->getInfo(); - unsigned UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); + case AMDGPU::SI_INIT_EXEC: + // This should be before all vector instructions. + BuildMI(*BB, &*BB->begin(), MI.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), + AMDGPU::EXEC) + .addImm(MI.getOperand(0).getImm()); + MI.eraseFromParent(); + return BB; - if (!BB->isLiveIn(UserSGPR)) - BB->addLiveIn(UserSGPR); + case AMDGPU::SI_INIT_EXEC_FROM_INPUT: { + // Extract the thread count from an SGPR input and set EXEC accordingly. + // Since BFM can't shift by 64, handle that case with CMP + CMOV. + // + // S_BFE_U32 count, input, {shift, 7} + // S_BFM_B64 exec, count, 0 + // S_CMP_EQ_U32 count, 64 + // S_CMOV_B64 exec, -1 + MachineInstr *FirstMI = &*BB->begin(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned InputReg = MI.getOperand(0).getReg(); + unsigned CountReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + bool Found = false; + + // Move the COPY of the input reg to the beginning, so that we can use it. + for (auto I = BB->begin(); I != &MI; I++) { + if (I->getOpcode() != TargetOpcode::COPY || + I->getOperand(0).getReg() != InputReg) + continue; - BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1) - .addReg(UserSGPR); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP)) - .addImm(TrapType) - .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit); - } else { - switch (TrapType) { - case SISubtarget::TrapIDLLVMTrap: - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM)); - break; - case SISubtarget::TrapIDLLVMDebugTrap: { - DiagnosticInfoUnsupported NoTrap(*MF->getFunction(), - "debugtrap handler not supported", - DL, - DS_Warning); - LLVMContext &C = MF->getFunction()->getContext(); - C.diagnose(NoTrap); - BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP)) - .addImm(0); - break; - } - default: - llvm_unreachable("unsupported trap handler type!"); + if (I == FirstMI) { + FirstMI = &*++BB->begin(); + } else { + I->removeFromParent(); + BB->insert(FirstMI, &*I); } + Found = true; + break; } - + assert(Found); + (void)Found; + + // This should be before all vector instructions. + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFE_U32), CountReg) + .addReg(InputReg) + .addImm((MI.getOperand(1).getImm() & 0x7f) | 0x70000); + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_BFM_B64), + AMDGPU::EXEC) + .addReg(CountReg) + .addImm(0); + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMP_EQ_U32)) + .addReg(CountReg, RegState::Kill) + .addImm(64); + BuildMI(*BB, FirstMI, DebugLoc(), TII->get(AMDGPU::S_CMOV_B64), + AMDGPU::EXEC) + .addImm(-1); MI.eraseFromParent(); return BB; } - case AMDGPU::SI_INIT_M0: - BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), - TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) - .add(MI.getOperand(0)); - MI.eraseFromParent(); - return BB; case AMDGPU::GET_GROUPSTATICSIZE: { DebugLoc DL = MI.getDebugLoc(); @@ -2156,6 +2343,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::FP_ROUND: return lowerFP_ROUND(Op, DAG); + + case ISD::TRAP: + case ISD::DEBUGTRAP: + return lowerTRAP(Op, DAG); } return SDValue(); } @@ -2191,6 +2382,28 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, break; } } + case ISD::SELECT: { + SDLoc SL(N); + EVT VT = N->getValueType(0); + EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); + SDValue LHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(1)); + SDValue RHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(2)); + + EVT SelectVT = NewVT; + if (NewVT.bitsLT(MVT::i32)) { + LHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, LHS); + RHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, RHS); + SelectVT = MVT::i32; + } + + SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, SelectVT, + N->getOperand(0), LHS, RHS); + + if (NewVT != SelectVT) + NewSelect = DAG.getNode(ISD::TRUNCATE, SL, NewVT, NewSelect); + Results.push_back(DAG.getNode(ISD::BITCAST, SL, VT, NewSelect)); + return; + } default: break; } @@ -2399,7 +2612,58 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16); - return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);; + return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc); +} + +SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Chain = Op.getOperand(0); + + unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ? + SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap; + + if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa && + Subtarget->isTrapHandlerEnabled()) { + SIMachineFunctionInfo *Info = MF.getInfo(); + unsigned UserSGPR = Info->getQueuePtrUserSGPR(); + assert(UserSGPR != AMDGPU::NoRegister); + + SDValue QueuePtr = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + + SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); + + SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, + QueuePtr, SDValue()); + + SDValue Ops[] = { + ToReg, + DAG.getTargetConstant(TrapID, SL, MVT::i16), + SGPR01, + ToReg.getValue(1) + }; + + return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); + } + + switch (TrapID) { + case SISubtarget::TrapIDLLVMTrap: + return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain); + case SISubtarget::TrapIDLLVMDebugTrap: { + DiagnosticInfoUnsupported NoTrap(*MF.getFunction(), + "debugtrap handler not supported", + Op.getDebugLoc(), + DS_Warning); + LLVMContext &Ctx = MF.getFunction()->getContext(); + Ctx.diagnose(NoTrap); + return Chain; + } + default: + llvm_unreachable("unsupported trap handler type!"); + } + + return Chain; } SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, @@ -2551,6 +2815,15 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); + DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); + + // Make sure we we do any optimizations that will make it easier to fold + // source modifiers before obscuring it with bit operations. + + // XXX - Why doesn't this get called when vector_shuffle is expanded? + if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI)) + return Combined; + if (const ConstantSDNode *CIdx = dyn_cast(Idx)) { SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec); @@ -3182,6 +3455,14 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, return DAG.getNode(NodeOp, DL, MVT::Other, Chain, Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_init_exec: { + return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain, + Op.getOperand(2)); + } + case Intrinsic::amdgcn_init_exec_from_input: { + return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain, + Op.getOperand(2), Op.getOperand(3)); + } case AMDGPUIntrinsic::SI_tbuffer_store: { SDValue Ops[] = { Chain, @@ -3298,7 +3579,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && - isMemOpHasNoClobberedMemOperand(Load)) + !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private @@ -3381,9 +3662,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, EVT VT = Op.getValueType(); bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; + if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) + return SDValue(); + if (const ConstantFPSDNode *CLHS = dyn_cast(LHS)) { - if (Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || - VT == MVT::f16) { + if (Unsafe || VT == MVT::f32 || VT == MVT::f16) { if (CLHS->isExactlyValue(1.0)) { // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to // the CI documentation has a worst case error of 1 ulp. @@ -3412,15 +3695,15 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } } - const SDNodeFlags *Flags = Op->getFlags(); + const SDNodeFlags Flags = Op->getFlags(); - if (Unsafe || Flags->hasAllowReciprocal()) { + if (Unsafe || Flags.hasAllowReciprocal()) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + SDNodeFlags NewFlags; + NewFlags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags); } return SDValue(); @@ -3966,12 +4249,40 @@ SDValue SITargetLowering::performAndCombine(SDNode *N, SDValue RHS = N->getOperand(1); - if (VT == MVT::i64) { - const ConstantSDNode *CRHS = dyn_cast(RHS); - if (CRHS) { - if (SDValue Split - = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS)) - return Split; + const ConstantSDNode *CRHS = dyn_cast(RHS); + if (VT == MVT::i64 && CRHS) { + if (SDValue Split + = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS)) + return Split; + } + + if (CRHS && VT == MVT::i32) { + // and (srl x, c), mask => shl (bfe x, nb + c, mask >> nb), nb + // nb = number of trailing zeroes in mask + // It can be optimized out using SDWA for GFX8+ in the SDWA peephole pass, + // given that we are selecting 8 or 16 bit fields starting at byte boundary. + uint64_t Mask = CRHS->getZExtValue(); + unsigned Bits = countPopulation(Mask); + if (getSubtarget()->hasSDWA() && LHS->getOpcode() == ISD::SRL && + (Bits == 8 || Bits == 16) && isShiftedMask_64(Mask) && !(Mask & 1)) { + if (auto *CShift = dyn_cast(LHS->getOperand(1))) { + unsigned Shift = CShift->getZExtValue(); + unsigned NB = CRHS->getAPIntValue().countTrailingZeros(); + unsigned Offset = NB + Shift; + if ((Offset & (Bits - 1)) == 0) { // Starts at a byte or word boundary. + SDLoc SL(N); + SDValue BFE = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, + LHS->getOperand(0), + DAG.getConstant(Offset, SL, MVT::i32), + DAG.getConstant(Bits, SL, MVT::i32)); + EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + SDValue Ext = DAG.getNode(ISD::AssertZext, SL, VT, BFE, + DAG.getValueType(NarrowVT)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(LHS), VT, Ext, + DAG.getConstant(NB, SDLoc(CRHS), MVT::i32)); + return Shl; + } + } } } @@ -4372,7 +4683,8 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY && - VT != MVT::f64) { + VT != MVT::f64 && + ((VT != MVT::f16 && VT != MVT::i16) || Subtarget->hasMin3Max3_16())) { // max(max(a, b), c) -> max3(a, b, c) // min(min(a, b), c) -> min3(a, b, c) if (Op0.getOpcode() == Opc && Op0.hasOneUse()) { @@ -4487,6 +4799,24 @@ SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N, return SDValue(); } +SDValue SITargetLowering::performExtractVectorEltCombine( + SDNode *N, DAGCombinerInfo &DCI) const { + SDValue Vec = N->getOperand(0); + + SelectionDAG &DAG= DCI.DAG; + if (Vec.getOpcode() == ISD::FNEG && allUsesHaveSourceMods(N)) { + SDLoc SL(N); + EVT EltVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Vec.getOperand(0), Idx); + return DAG.getNode(ISD::FNEG, SL, EltVT, Elt); + } + + return SDValue(); +} + + unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, const SDNode *N0, const SDNode *N1) const { @@ -4499,10 +4829,9 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, return ISD::FMAD; const TargetOptions &Options = DAG.getTarget().Options; - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || - (cast(N0)->Flags.hasUnsafeAlgebra() && - cast(N1)->Flags.hasUnsafeAlgebra())) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || + (N0->getFlags().hasUnsafeAlgebra() && + N1->getFlags().hasUnsafeAlgebra())) && isFMAFasterThanFMulAndFAdd(VT)) { return ISD::FMA; } @@ -4663,12 +4992,12 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N, APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Src, Demanded) || - TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) { + if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) || + TLI.SimplifyDemandedBits(Src, Demanded, Known, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } @@ -4775,6 +5104,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, break; } + case ISD::EXTRACT_VECTOR_ELT: + return performExtractVectorEltCombine(N, DCI); } return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } @@ -4885,8 +5216,33 @@ static bool isFrameIndexOp(SDValue Op) { /// \brief Legalize target independent instructions (e.g. INSERT_SUBREG) /// with frame index operands. /// LLVM assumes that inputs are to these instructions are registers. -void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, - SelectionDAG &DAG) const { +SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, + SelectionDAG &DAG) const { + if (Node->getOpcode() == ISD::CopyToReg) { + RegisterSDNode *DestReg = cast(Node->getOperand(1)); + SDValue SrcVal = Node->getOperand(2); + + // Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have + // to try understanding copies to physical registers. + if (SrcVal.getValueType() == MVT::i1 && + TargetRegisterInfo::isPhysicalRegister(DestReg->getReg())) { + SDLoc SL(Node); + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + SDValue VReg = DAG.getRegister( + MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1); + + SDNode *Glued = Node->getGluedNode(); + SDValue ToVReg + = DAG.getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal, + SDValue(Glued, Glued ? Glued->getNumValues() - 1 : 0)); + SDValue ToResultReg + = DAG.getCopyToReg(ToVReg, SL, SDValue(DestReg, 0), + VReg, ToVReg.getValue(1)); + DAG.ReplaceAllUsesWith(Node, ToResultReg.getNode()); + DAG.RemoveDeadNode(Node); + return ToResultReg.getNode(); + } + } SmallVector Ops; for (unsigned i = 0; i < Node->getNumOperands(); ++i) { @@ -4902,6 +5258,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, } DAG.UpdateNodeOperands(Node, Ops); + return Node; } /// \brief Fold the instructions after selecting them. diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 452ee684ef614750510c3f49a28a82eef0028b05..8e2ec40b224cd63e73e21703c179377bdf1e2187 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -28,6 +28,10 @@ class SITargetLowering final : public AMDGPUTargetLowering { uint64_t Offset, bool Signed, const ISD::InputArg *Arg = nullptr) const; + SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, + const SDLoc &SL, SDValue Chain, + const ISD::InputArg &Arg) const; + SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const override; SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, @@ -100,6 +104,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const; unsigned getFusedOpcode(const SelectionDAG &DAG, const SDNode *N0, const SDNode *N1) const; @@ -145,6 +150,8 @@ public: bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override; @@ -176,7 +183,12 @@ public: const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; @@ -206,7 +218,7 @@ public: SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const override; - void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; + SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const; diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index c2a3e62aa82747124e1424afdbc337fb5e552686..0f009a48754adba6c1dd464d5d22cebb118918f4 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -229,7 +229,7 @@ public: MachineInstr &MI); BlockWaitcntBrackets() - : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false), + : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false), LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { @@ -428,8 +428,8 @@ RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI, const MachineInstr &MIA = *MI; const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo); - unsigned Size = RC->getSize(); - Result.second = Result.first + (Size / 4); + unsigned Size = TRI->getRegSizeInBits(*RC); + Result.second = Result.first + (Size / 32); return Result; } @@ -826,7 +826,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::RETURN || - MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) { + MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || + MI.getOpcode() == AMDGPU::S_SETPC_B64_return) { for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) { @@ -1008,7 +1009,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( // occurs before the instruction. Doing it here prevents any additional // S_WAITCNTs from being emitted if the instruction was marked as // requiring a WAITCNT beforehand. - if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) { + if (MI.getOpcode() == AMDGPU::S_BARRIER && + !ST->hasAutoWaitcntBeforeBarrier()) { EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT)); EmitSwaitcnt |= ScoreBrackets->updateByWait( @@ -1087,7 +1089,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) { MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent()); if (ContainingLoop) { - MachineBasicBlock *TBB = ContainingLoop->getTopBlock(); + MachineBasicBlock *TBB = ContainingLoop->getHeader(); BlockWaitcntBrackets *ScoreBracket = BlockWaitcntBracketsMap[TBB].get(); if (!ScoreBracket) { @@ -1097,7 +1099,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( } ScoreBracket->setRevisitLoop(true); DEBUG(dbgs() << "set-revisit: block" - << ContainingLoop->getTopBlock()->getNumber() << '\n';); + << ContainingLoop->getHeader()->getNumber() << '\n';); } } @@ -1149,8 +1151,10 @@ void SIInsertWaitcnts::updateEventWaitCntAfter( // instruction, update the upper-bound of the appropriate counter's // bracket and the destination operand scores. // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere. - if (TII->isDS(Inst) && (Inst.mayLoad() || Inst.mayStore())) { - if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) { + uint64_t TSFlags = Inst.getDesc().TSFlags; + if (TII->isDS(Inst) && (TSFlags & SIInstrFlags::LGKM_CNT)) { + if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds) && + TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) { ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst); ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst); } else { @@ -1183,7 +1187,7 @@ void SIInsertWaitcnts::updateEventWaitCntAfter( Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() && - (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()))) { + (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst); } } else if (TII->isSMRD(Inst)) { @@ -1715,6 +1719,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); MLI = &getAnalysis(); IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits()); + const SIMachineFunctionInfo *MFI = MF.getInfo(); AMDGPUASI = ST->getAMDGPUAS(); HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV); @@ -1758,12 +1763,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { // If we are walking into the block from before the loop, then guarantee // at least 1 re-walk over the loop to propagate the information, even if // no S_WAITCNT instructions were generated. - if (ContainingLoop && ContainingLoop->getTopBlock() == &MBB && J < I && + if (ContainingLoop && ContainingLoop->getHeader() == &MBB && J < I && (BlockWaitcntProcessedSet.find(&MBB) == BlockWaitcntProcessedSet.end())) { BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true); DEBUG(dbgs() << "set-revisit: block" - << ContainingLoop->getTopBlock()->getNumber() << '\n';); + << ContainingLoop->getHeader()->getNumber() << '\n';); } // Walk over the instructions. @@ -1774,7 +1779,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { // See if we want to revisit the loop. if (ContainingLoop && loopBottom(ContainingLoop) == &MBB) { - MachineBasicBlock *EntryBB = ContainingLoop->getTopBlock(); + MachineBasicBlock *EntryBB = ContainingLoop->getHeader(); BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get(); if (EntrySB && EntrySB->getRevisitLoop()) { EntrySB->setRevisitLoop(false); @@ -1859,5 +1864,19 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { } } + if (!MFI->isEntryFunction()) { + // Wait for any outstanding memory operations that the input registers may + // depend on. We can't track them and it's better to to the wait after the + // costly call sequence. + + // TODO: Could insert earlier and schedule more liberally with operations + // that only use caller preserved registers. + MachineBasicBlock &EntryBB = MF.front(); + BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); + + Modified = true; + } + return Modified; } diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 47257ce16ceb3367b88f3a4ab28a1c4cc29d139d..bc86515d8b1fe835dde29a43e37699f2f1299b3a 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -216,8 +216,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // XXX - What if this is a write into a super register? const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0); - unsigned Size = RC->getSize(); - Result.Named.LGKM = Size > 4 ? 2 : 1; + unsigned Size = TRI->getRegSizeInBits(*RC); + Result.Named.LGKM = Size > 32 ? 2 : 1; } else { // s_dcache_inv etc. do not have a a destination register. Assume we // want a wait on these. @@ -289,12 +289,12 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC, const MachineOperand &Reg) const { - unsigned Size = RC->getSize(); - assert(Size >= 4); + unsigned Size = TRI->getRegSizeInBits(*RC); + assert(Size >= 32); RegInterval Result; Result.first = TRI->getEncodingValue(Reg.getReg()); - Result.second = Result.first + Size / 4; + Result.second = Result.first + Size / 32; return Result; } @@ -630,7 +630,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // but we also want to wait for any other outstanding transfers before // signalling other hardware blocks if ((I->getOpcode() == AMDGPU::S_BARRIER && - ST->needWaitcntBeforeBarrier()) || + !ST->hasAutoWaitcntBeforeBarrier()) || I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index b83a1fe187eb7523da475887f1ec35b0b2469b61..02c9b4b1f0eeb1de944aa259115686609e0b2939 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -228,10 +228,10 @@ class EXPe : Enc64 { bits<1> compr; bits<1> done; bits<1> vm; - bits<8> vsrc0; - bits<8> vsrc1; - bits<8> vsrc2; - bits<8> vsrc3; + bits<8> src0; + bits<8> src1; + bits<8> src2; + bits<8> src3; let Inst{3-0} = en; let Inst{9-4} = tgt; @@ -239,10 +239,10 @@ class EXPe : Enc64 { let Inst{11} = done; let Inst{12} = vm; let Inst{31-26} = 0x3e; - let Inst{39-32} = vsrc0; - let Inst{47-40} = vsrc1; - let Inst{55-48} = vsrc2; - let Inst{63-56} = vsrc3; + let Inst{39-32} = src0; + let Inst{47-40} = src1; + let Inst{55-48} = src2; + let Inst{63-56} = src3; } let Uses = [EXEC] in { diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index b1e4c9a7aaa226d4060e62c2888fc1d98ccd9cba..1097814e99ce22024e59af99e4969f8b562901b3 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -20,10 +20,10 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" -#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" @@ -138,6 +138,11 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, } if (isSMRD(Opc0) && isSMRD(Opc1)) { + // Skip time and cache invalidation instructions. + if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || + AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) + return false; + assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); // Check base reg. @@ -245,11 +250,11 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, unsigned EltSize; if (LdSt.mayLoad()) - EltSize = getOpRegClass(LdSt, 0)->getSize() / 2; + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16; else { assert(LdSt.mayStore()); int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); - EltSize = getOpRegClass(LdSt, Data0Idx)->getSize(); + EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8; } if (isStride64(Opc)) @@ -345,7 +350,7 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, FirstLdSt.getParent()->getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); - return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; + return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold; } static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, @@ -433,7 +438,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned EltSize = 4; unsigned Opcode = AMDGPU::V_MOV_B32_e32; if (RI.isSGPRClass(RC)) { - if (RC->getSize() > 4) { + if (RI.getRegSizeInBits(*RC) > 32) { Opcode = AMDGPU::S_MOV_B64; EltSize = 8; } else { @@ -463,13 +468,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); - if (Idx == SubIndices.size() - 1) - Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); - if (Idx == 0) Builder.addReg(DestReg, RegState::Define | RegState::Implicit); - Builder.addReg(SrcReg, RegState::Implicit); + bool UseKill = KillSrc && Idx == SubIndices.size() - 1; + Builder.addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); } } @@ -491,13 +494,195 @@ int SIInstrInfo::commuteOpcode(unsigned Opcode) const { return Opcode; } +void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, unsigned DestReg, + int64_t Value) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RegClass = MRI.getRegClass(DestReg); + if (RegClass == &AMDGPU::SReg_32RegClass || + RegClass == &AMDGPU::SGPR_32RegClass || + RegClass == &AMDGPU::SReg_32_XM0RegClass || + RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) + .addImm(Value); + return; + } + + if (RegClass == &AMDGPU::SReg_64RegClass || + RegClass == &AMDGPU::SGPR_64RegClass || + RegClass == &AMDGPU::SReg_64_XEXECRegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) + .addImm(Value); + return; + } + + if (RegClass == &AMDGPU::VGPR_32RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addImm(Value); + return; + } + if (RegClass == &AMDGPU::VReg_64RegClass) { + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg) + .addImm(Value); + return; + } + + unsigned EltSize = 4; + unsigned Opcode = AMDGPU::V_MOV_B32_e32; + if (RI.isSGPRClass(RegClass)) { + if (RI.getRegSizeInBits(*RegClass) > 32) { + Opcode = AMDGPU::S_MOV_B64; + EltSize = 8; + } else { + Opcode = AMDGPU::S_MOV_B32; + EltSize = 4; + } + } + + ArrayRef SubIndices = RI.getRegSplitParts(RegClass, EltSize); + for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { + int64_t IdxValue = Idx == 0 ? Value : 0; + + MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, + get(Opcode), RI.getSubReg(DestReg, Idx)); + Builder.addImm(IdxValue); + } +} + +const TargetRegisterClass * +SIInstrInfo::getPreferredSelectRegClass(unsigned Size) const { + return &AMDGPU::VGPR_32RegClass; +} + +void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DstReg, + ArrayRef Cond, + unsigned TrueReg, + unsigned FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass && + "Not a VGPR32 reg"); + + if (Cond.size() == 1) { + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .add(Cond[0]); + } else if (Cond.size() == 2) { + assert(Cond[0].isImm() && "Cond[0] is not an immediate"); + switch (Cond[0].getImm()) { + case SIInstrInfo::SCC_TRUE: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(-1) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + break; + } + case SIInstrInfo::SCC_FALSE: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(0) + .addImm(-1); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + break; + } + case SIInstrInfo::VCCNZ: { + MachineOperand RegOp = Cond[1]; + RegOp.setImplicit(false); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .add(RegOp); + break; + } + case SIInstrInfo::VCCZ: { + MachineOperand RegOp = Cond[1]; + RegOp.setImplicit(false); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(TrueReg) + .addReg(FalseReg) + .add(RegOp); + break; + } + case SIInstrInfo::EXECNZ: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(-1) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + break; + } + case SIInstrInfo::EXECZ: { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) + .addImm(0); + BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) + .addImm(0) + .addImm(-1); + BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) + .addReg(FalseReg) + .addReg(TrueReg) + .addReg(SReg); + llvm_unreachable("Unhandled branch predicate EXECZ"); + break; + } + default: + llvm_unreachable("invalid branch predicate"); + } + } else { + llvm_unreachable("Can only handle Cond size 1 or 2"); + } +} + +unsigned SIInstrInfo::insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + unsigned SrcReg, int Value) const { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg) + .addImm(Value) + .addReg(SrcReg); + + return Reg; +} + +unsigned SIInstrInfo::insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + unsigned SrcReg, int Value) const { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg) + .addImm(Value) + .addReg(SrcReg); + + return Reg; +} + unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { - if (DstRC->getSize() == 4) { + if (RI.getRegSizeInBits(*DstRC) == 32) { return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { + } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) { return AMDGPU::S_MOV_B64; - } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { + } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) { return AMDGPU::V_MOV_B64_PSEUDO; } return AMDGPU::COPY; @@ -557,17 +742,18 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, Size, Align); + unsigned SpillSize = TRI->getSpillSize(*RC); if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. - const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize())); + const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize)); // The SGPR spill/restore instructions only work on number sgprs, so we need // to make sure we are using the correct register class. - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); } @@ -577,7 +763,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) - .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit); + .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); // Add the scratch resource registers as implicit uses because we may end up // needing them, and need to ensure that the reserved registers are // correctly handled. @@ -602,13 +788,13 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); + unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) .addReg(SrcReg, getKillRegState(isKill)) // data .addFrameIndex(FrameIndex) // addr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addReg(MFI->getFrameOffsetReg()) // scratch_offset .addImm(0) // offset .addMemOperand(MMO); } @@ -660,6 +846,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, DebugLoc DL = MBB.findDebugLoc(MI); unsigned Align = FrameInfo.getObjectAlignment(FrameIndex); unsigned Size = FrameInfo.getObjectSize(FrameIndex); + unsigned SpillSize = TRI->getSpillSize(*RC); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(*MF, FrameIndex); @@ -670,8 +857,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (RI.isSGPRClass(RC)) { // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. - const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize())); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { + const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize)); + if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) { MachineRegisterInfo &MRI = MF->getRegInfo(); MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); } @@ -680,7 +867,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO) .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) - .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit); + .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); if (ST.hasScalarStores()) { // m0 is used for offset to scalar stores if used to spill. @@ -701,12 +888,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); + unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex) // vaddr - .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset - .addImm(0) // offset + .addFrameIndex(FrameIndex) // vaddr + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getFrameOffsetReg()) // scratch_offset + .addImm(0) // offset .addMemOperand(MMO); } @@ -827,6 +1014,20 @@ void SIInstrInfo::insertNoop(MachineBasicBlock &MBB, insertWaitStates(MBB, MI, 1); } +void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const { + auto MF = MBB.getParent(); + SIMachineFunctionInfo *Info = MF->getInfo(); + + assert(Info->isEntryFunction()); + + if (MBB.succ_empty()) { + bool HasNoTerminator = MBB.getFirstTerminator() == MBB.end(); + if (HasNoTerminator) + BuildMI(MBB, MBB.end(), DebugLoc(), + get(Info->returnsVoid() ? AMDGPU::S_ENDPGM : AMDGPU::SI_RETURN_TO_EPILOG)); + } +} + unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return 1; // FIXME: Do wait states equal cycles? @@ -1234,14 +1435,20 @@ bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB, return false; } - BranchPredicate Pred = getBranchPredicate(I->getOpcode()); - if (Pred == INVALID_BR) - return true; + MachineBasicBlock *CondBB = nullptr; - MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); - Cond.push_back(MachineOperand::CreateImm(Pred)); - Cond.push_back(I->getOperand(1)); // Save the branch register. + if (I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + CondBB = I->getOperand(1).getMBB(); + Cond.push_back(I->getOperand(0)); + } else { + BranchPredicate Pred = getBranchPredicate(I->getOpcode()); + if (Pred == INVALID_BR) + return true; + CondBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Pred)); + Cond.push_back(I->getOperand(1)); // Save the branch register. + } ++I; if (I == MBB.end()) { @@ -1344,6 +1551,13 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, return 1; } + if(Cond.size() == 1 && Cond[0].isReg()) { + BuildMI(&MBB, DL, get(AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO)) + .add(Cond[0]) + .addMBB(TBB); + return 1; + } + assert(TBB && Cond[0].isImm()); unsigned Opcode @@ -1383,9 +1597,16 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, bool SIInstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { - assert(Cond.size() == 2); - Cond[0].setImm(-Cond[0].getImm()); - return false; + if (Cond.size() != 2) { + return true; + } + + if (Cond[0].isImm()) { + Cond[0].setImm(-Cond[0].getImm()); + return false; + } + + return true; } bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, @@ -1440,9 +1661,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); - unsigned DstSize = DstRC->getSize(); + unsigned DstSize = RI.getRegSizeInBits(*DstRC); - if (DstSize == 4) { + if (DstSize == 32) { unsigned SelOp = Pred == SCC_TRUE ? AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32; @@ -1456,7 +1677,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, return; } - if (DstSize == 8 && Pred == SCC_TRUE) { + if (DstSize == 64 && Pred == SCC_TRUE) { MachineInstr *Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) .addReg(FalseReg) @@ -1483,7 +1704,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB, unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32; const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass; const int16_t *SubIndices = Sub0_15; - int NElts = DstSize / 4; + int NElts = DstSize / 32; // 64-bit select is only avaialble for SALU. if (Pred == SCC_TRUE) { @@ -2108,7 +2329,12 @@ static bool isSubRegOf(const SIRegisterInfo &TRI, bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { uint16_t Opcode = MI.getOpcode(); - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) + return true; + + const MachineFunction *MF = MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); @@ -2338,6 +2564,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (isFLAT(MI) && !MF->getSubtarget().hasFlatInstOffsets()) { + const MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); + if (Offset->getImm() != 0) { + ErrInfo = "subtarget does not support offsets in flat instructions"; + return false; + } + } + return true; } @@ -2635,6 +2869,19 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) return; + // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for + // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane + // select is uniform. + if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() && + RI.isVGPR(MRI, Src1.getReg())) { + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + const DebugLoc &DL = MI.getDebugLoc(); + BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .add(Src1); + Src1.ChangeToRegister(Reg, false); + return; + } + // We do not use commuteInstruction here because it is too aggressive and will // commute if it is possible. We only want to commute here if it improves // legality. This can be called a fairly large number of times so don't waste @@ -2729,7 +2976,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); unsigned DstReg = MRI.createVirtualRegister(SRC); - unsigned SubRegs = VRC->getSize() / 4; + unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32; SmallVector SRegs; for (unsigned i = 0; i < SubRegs; ++i) { @@ -3560,36 +3807,22 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, switch (Inst.getOpcode()) { case AMDGPU::S_PACK_LL_B32_B16: { - // v_pack_b32_f16 flushes denormals if not enabled. Use it if the default - // is to leave them untouched. - // XXX: Does this do anything to NaNs? - if (ST.hasFP16Denormals()) { - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_PACK_B32_F16), ResultReg) - .addImm(0) // src0_modifiers - .add(Src0) // src0 - .addImm(0) // src1_modifiers - .add(Src1) // src2 - .addImm(0) // clamp - .addImm(0); // omod - } else { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - - // FIXME: Can do a lot better if we know the high bits of src0 or src1 are - // 0. - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) - .addImm(0xffff); + unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) - .addReg(ImmReg, RegState::Kill) - .add(Src0); + // FIXME: Can do a lot better if we know the high bits of src0 or src1 are + // 0. + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) + .addImm(0xffff); - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg) - .add(Src1) - .addImm(16) - .addReg(TmpReg, RegState::Kill); - } + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) + .addReg(ImmReg, RegState::Kill) + .add(Src0); + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg) + .add(Src1) + .addImm(16) + .addReg(TmpReg, RegState::Kill); break; } case AMDGPU::S_PACK_LH_B32_B16: { @@ -3609,7 +3842,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl &Worklist, .addImm(16) .add(Src0); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) - .addImm(0xffff); + .addImm(0xffff0000); BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg) .add(Src1) .addReg(ImmReg, RegState::Kill) @@ -3914,6 +4147,82 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { return false; } +bool SIInstrInfo::isNonUniformBranchInstr(MachineInstr &Branch) const { + return Branch.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO; +} + +void SIInstrInfo::convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const { + MachineBasicBlock::iterator TI = IfEntry->getFirstTerminator(); + assert(TI != IfEntry->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = IfEntry->getParent(); + MachineRegisterInfo &MRI = IfEntry->getParent()->getRegInfo(); + + if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstr *SIIF = + BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_IF), DstReg) + .add(Branch->getOperand(0)) + .add(Branch->getOperand(1)); + MachineInstr *SIEND = + BuildMI(*MF, Branch->getDebugLoc(), get(AMDGPU::SI_END_CF)) + .addReg(DstReg); + + IfEntry->erase(TI); + IfEntry->insert(IfEntry->end(), SIIF); + IfEnd->insert(IfEnd->getFirstNonPHI(), SIEND); + } +} + +void SIInstrInfo::convertNonUniformLoopRegion( + MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const { + MachineBasicBlock::iterator TI = LoopEnd->getFirstTerminator(); + // We expect 2 terminators, one conditional and one unconditional. + assert(TI != LoopEnd->end()); + + MachineInstr *Branch = &(*TI); + MachineFunction *MF = LoopEnd->getParent(); + MachineRegisterInfo &MRI = LoopEnd->getParent()->getRegInfo(); + + if (Branch->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) { + + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned BackEdgeReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + MachineInstrBuilder HeaderPHIBuilder = + BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); + for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), + E = LoopEntry->pred_end(); + PI != E; ++PI) { + if (*PI == LoopEnd) { + HeaderPHIBuilder.addReg(BackEdgeReg); + } else { + MachineBasicBlock *PMBB = *PI; + unsigned ZeroReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), + ZeroReg, 0); + HeaderPHIBuilder.addReg(ZeroReg); + } + HeaderPHIBuilder.addMBB(*PI); + } + MachineInstr *HeaderPhi = HeaderPHIBuilder; + MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), + get(AMDGPU::SI_IF_BREAK), BackEdgeReg) + .addReg(DstReg) + .add(Branch->getOperand(0)); + MachineInstr *SILOOP = + BuildMI(*(MF), Branch->getDebugLoc(), get(AMDGPU::SI_LOOP)) + .addReg(BackEdgeReg) + .addMBB(LoopEntry); + + LoopEntry->insert(LoopEntry->begin(), HeaderPhi); + LoopEnd->erase(TI); + LoopEnd->insert(LoopEnd->end(), SIIFBREAK); + LoopEnd->insert(LoopEnd->end(), SILOOP); + } +} + ArrayRef> SIInstrInfo::getSerializableTargetIndices() const { static const std::pair TargetIndices[] = { @@ -3944,3 +4253,16 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); } + +MachineInstrBuilder +SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + unsigned DestReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + + return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) + .addReg(UnusedCarry, RegState::Define | RegState::Dead); +} diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 18099abc1019374a9d9f4334d9af28f5f90a9a8b..f6e5e8883f63c8f940022a4571b36ad57a06fc29 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -143,6 +143,23 @@ public: RegScavenger *RS, unsigned TmpReg, unsigned Offset, unsigned Size) const; + void materializeImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const DebugLoc &DL, + unsigned DestReg, + int64_t Value) const; + + const TargetRegisterClass *getPreferredSelectRegClass( + unsigned Size) const; + + unsigned insertNE(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned SrcReg, int Value) const; + + unsigned insertEQ(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned SrcReg, int Value) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -193,7 +210,7 @@ public: bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, - bool AllowModify) const override; + bool AllowModify = false) const override; unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; @@ -218,6 +235,11 @@ public: unsigned DstReg, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const override; + void insertVectorSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DstReg, ArrayRef Cond, + unsigned TrueReg, unsigned FalseReg) const; + bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA = nullptr) const override; @@ -626,13 +648,13 @@ public: return 4; } - return RI.getRegClass(OpInfo.RegClass)->getSize(); + return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8; } /// \brief This form should usually be preferred since it handles operands /// with unknown register classes. unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const { - return getOpRegClass(MI, OpNo)->getSize(); + return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; } /// \returns true if it is legal for the operand at index \p OpNo @@ -705,6 +727,7 @@ public: void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; + void insertReturn(MachineBasicBlock &MBB) const; /// \brief Return the number of wait states that result from executing this /// instruction. unsigned getNumWaitStates(const MachineInstr &MI) const; @@ -750,6 +773,14 @@ public: bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; + bool isNonUniformBranchInstr(MachineInstr &Instr) const; + + void convertNonUniformIfRegion(MachineBasicBlock *IfEntry, + MachineBasicBlock *IfEnd) const; + + void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, + MachineBasicBlock *LoopEnd) const; + ArrayRef> getSerializableTargetIndices() const override; @@ -761,6 +792,15 @@ public: CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; bool isBasicBlockPrologue(const MachineInstr &MI) const override; + + /// \brief Return a partially built integer add instruction without carry. + /// Caller must add source operands. + /// For pre-GFX9 it will generate unused carry destination operand. + /// TODO: After GFX9 it should return a no-carry operation. + MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, + unsigned DestReg) const; }; namespace AMDGPU { diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index c6daf743f3ac1e4157b377152fb5b7153013421e..470a47b024433fa5a0669fec5106ba184b2ce906 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -383,6 +383,14 @@ def SendMsgMatchClass : AsmOperandClass { let RenderMethod = "addImmOperands"; } +def SwizzleMatchClass : AsmOperandClass { + let Name = "Swizzle"; + let PredicateMethod = "isSwizzle"; + let ParserMethod = "parseSwizzleOp"; + let RenderMethod = "addImmOperands"; + let IsOptional = 1; +} + def ExpTgtMatchClass : AsmOperandClass { let Name = "ExpTgt"; let PredicateMethod = "isExpTgt"; @@ -395,6 +403,11 @@ def SendMsgImm : Operand { let ParserMatchClass = SendMsgMatchClass; } +def SwizzleImm : Operand { + let PrintMethod = "printSwizzle"; + let ParserMatchClass = SwizzleMatchClass; +} + def SWaitMatchClass : AsmOperandClass { let Name = "SWaitCnt"; let RenderMethod = "addImmOperands"; @@ -439,6 +452,27 @@ def ExpSrc3 : RegisterOperand { let ParserMatchClass = VReg32OrOffClass; } +class SDWA9Src : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA9_SRC"; + let EncoderMethod = "getSDWA9SrcEncoding"; +} + +def SDWA9Src32 : SDWA9Src { + let DecoderMethod = "decodeSDWA9Src32"; +} + +def SDWA9Src16 : SDWA9Src { + let DecoderMethod = "decodeSDWA9Src16"; +} + +def SDWA9VopcDst : VOPDstOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA9_VOPC_DST"; + let EncoderMethod = "getSDWA9VopcDstEncoding"; + let DecoderMethod = "decodeSDWA9VopcDst"; +} + class NamedMatchClass : AsmOperandClass { let Name = "Imm"#CName; let PredicateMethod = "is"#CName; @@ -458,11 +492,21 @@ class NamedOperandU8 : Operand { let ParserMatchClass = MatchClass; } +class NamedOperandU12 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU16 : Operand { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; } +class NamedOperandS13 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; +} + class NamedOperandU32 : Operand { let PrintMethod = "print"#Name; let ParserMatchClass = MatchClass; @@ -480,6 +524,8 @@ def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; +def offset_u12 : NamedOperandU12<"Offset", NamedMatchClass<"OffsetU12">>; +def offset_s13 : NamedOperandS13<"Offset", NamedMatchClass<"OffsetS13">>; def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; @@ -588,6 +634,16 @@ class IntInputMods : InputMods def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; +def FPRegInputModsMatchClass : AsmOperandClass { + let Name = "RegWithFPInputMods"; + let ParserMethod = "parseRegWithFPInputMods"; + let PredicateMethod = "isRegKind"; +} + +def FPRegInputMods : InputMods { + let PrintMethod = "printOperandAndFPInputMods"; +} + def FPVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; @@ -598,6 +654,17 @@ def FPVRegInputMods : InputMods { let PrintMethod = "printOperandAndFPInputMods"; } + +def IntRegInputModsMatchClass : AsmOperandClass { + let Name = "RegWithIntInputMods"; + let ParserMethod = "parseRegWithIntInputMods"; + let PredicateMethod = "isRegKind"; +} + +def IntRegInputMods : InputMods { + let PrintMethod = "printOperandAndIntInputMods"; +} + def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; let ParserMethod = "parseRegWithIntInputMods"; @@ -646,11 +713,10 @@ def DS64Bit4ByteAligned : ComplexPattern; def MOVRELOffset : ComplexPattern; def VOP3Mods0 : ComplexPattern; -def VOP3NoMods0 : ComplexPattern; def VOP3Mods0Clamp : ComplexPattern; def VOP3Mods0Clamp0OMod : ComplexPattern; def VOP3Mods : ComplexPattern; -def VOP3NoMods : ComplexPattern; +def VOP3NoMods : ComplexPattern; // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern; @@ -784,6 +850,14 @@ class getVALUDstForVT { VOPDstOperand)))); // else VT == i1 } +// Returns the register class to use for the destination of VOP[12C] +// instructions with GFX9 SDWA extension +class getSDWA9DstForVT { + RegisterOperand ret = !if(!eq(VT.Size, 1), + SDWA9VopcDst, // VOPC + VOPDstOperand); // VOP1/2 32-bit dst +} + // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT { @@ -824,6 +898,9 @@ class getVregSrcForVT { !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } +class getSDWA9SrcForVT { + RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32); +} // Returns the register class to use for sources of VOP3 instructions for the // given VT. @@ -927,6 +1004,15 @@ class getSrcModExt { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +// Return type of input modifiers operand specified input operand for SDWA 9 +class getSrcModSDWA9 { + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + 0))); + Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods); +} + // Returns the input arguments for VOP[12C] instructions for the given SrcVT. class getIns32 { dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 @@ -1063,6 +1149,7 @@ class getInsSDWA { + + dag ret = !if(!eq(NumSrcArgs, 0), + // VOP1 without input operands (V_NOP) + (ins), + !if(!eq(NumSrcArgs, 1), + // VOP1 + !if(!eq(HasSDWAOMod, 0), + // VOP1_SDWA9 without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel), + // VOP1_SDWA9 with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel)), + !if(!eq(NumSrcArgs, 2), + !if(!eq(DstVT.Size, 1), + // VOPC_SDWA9 + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA9 + !if(!eq(HasSDWAOMod, 0), + // VOP2_SDWA9 without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP1_SDWA9 with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel))), + (ins)/* endif */))); +} + // Outs for DPP and SDWA -class getOutsExt { +class getOutsExt { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions - (outs DstRCDPP:$vdst)), + (outs DstRCExt:$vdst)), + (outs)); // V_NOP +} + +// Outs for GFX9 SDWA +class getOutsSDWA9 { + dag ret = !if(HasDst, + !if(!eq(DstVT.Size, 1), + (outs DstRCSDWA9:$sdst), + (outs DstRCSDWA9:$vdst)), (outs)); // V_NOP } @@ -1154,8 +1294,7 @@ class getAsmDPP { +class getAsmSDWA { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), " vcc", // use vcc token as dst for VOPC instructioins @@ -1183,6 +1322,35 @@ class getAsmSDWA { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", // VOPC + "$vdst"), // VOP1/2 + ""); + string src0 = "$src0_modifiers"; + string src1 = "$src1_modifiers"; + string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod"); + string args = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + ", "#src0, + ", "#src0#", "#src1 + ) + ); + string sdwa = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + out_mods#" $dst_sel $dst_unused $src0_sel", + !if(!eq(DstVT.Size, 1), + " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC + out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" + ) + ) + ); + string ret = dst#args#sdwa; +} + + // Function that checks if instruction supports DPP and SDWA class getHasExt { @@ -1220,6 +1388,7 @@ class VOPProfile _ArgVT> { field RegisterOperand DstRC = getVALUDstForVT.ret; field RegisterOperand DstRCDPP = getVALUDstForVT.ret; field RegisterOperand DstRCSDWA = getVALUDstForVT.ret; + field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterClass Src1RC32 = getVregSrcForVT.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; @@ -1229,6 +1398,8 @@ class VOPProfile _ArgVT> { field RegisterClass Src1DPP = getVregSrcForVT.ret; field RegisterClass Src0SDWA = getVregSrcForVT.ret; field RegisterClass Src1SDWA = getVregSrcForVT.ret; + field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT.ret; + field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT.ret; field Operand Src0Mod = getSrcMod.ret; field Operand Src1Mod = getSrcMod.ret; field Operand Src2Mod = getSrcMod.ret; @@ -1236,6 +1407,8 @@ class VOPProfile _ArgVT> { field Operand Src1ModDPP = getSrcModExt.ret; field Operand Src0ModSDWA = getSrcModExt.ret; field Operand Src1ModSDWA = getSrcModExt.ret; + field Operand Src0ModSDWA9 = getSrcModSDWA9.ret; + field Operand Src1ModSDWA9 = getSrcModSDWA9.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); @@ -1262,14 +1435,16 @@ class VOPProfile _ArgVT> { field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0); field bit HasClamp = HasModifiers; - field bit HasSDWAClamp = HasSrc0; + field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd.ret, HasClamp>.ret; field bit IsPacked = isPackedType.ret; field bit HasOpSel = IsPacked; field bit HasOMod = !if(HasOpSel, 0, HasModifiers); + field bit HasSDWAOMod = isFloatType.ret; field bit HasExt = getHasExt.ret; + field bit HasSDWA9 = HasExt; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -1283,6 +1458,7 @@ class VOPProfile _ArgVT> { field dag Outs64 = Outs; field dag OutsDPP = getOutsExt.ret; field dag OutsSDWA = getOutsExt.ret; + field dag OutsSDWA9 = getOutsSDWA9.ret; field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64 _ArgVT> { field dag InsSDWA = getInsSDWA.ret; + field dag InsSDWA9 = getInsSDWA9.ret; field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; field string AsmVOP3P = getAsmVOP3P.ret; field string AsmDPP = getAsmDPP.ret; - field string AsmSDWA = getAsmSDWA.ret; + field string AsmSDWA = getAsmSDWA.ret; + field string AsmSDWA9 = getAsmSDWA9.ret; } class VOP_NO_EXT : VOPProfile { let HasExt = 0; + let HasSDWA9 = 0; } def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; @@ -1447,6 +1628,15 @@ def getSDWAOp : InstrMapping { let ValueCols = [["SDWA"]]; } +// Maps ordinary instructions to their SDWA GFX9 counterparts +def getSDWA9Op : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["AsmVariantName"]; + let KeyCol = ["Default"]; + let ValueCols = [["SDWA9"]]; +} + def getMaskedMIMGOp : InstrMapping { let FilterClass = "MIMG_Mask"; let RowFields = ["Op"]; diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 2f89503e129a3abdb1260e1cc33b8feeaca4474e..3b4bdc864253cb2ab646392b1331cee978b38c43 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -94,6 +94,12 @@ defm V_INTERP_MOV_F32 : VINTRP_m < //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// +def ATOMIC_FENCE : SPseudoInstSI< + (outs), (ins i32imm:$ordering, i32imm:$scope), + [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))], + "ATOMIC_FENCE $ordering, $scope"> { + let hasSideEffects = 1; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { @@ -111,12 +117,6 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)>; } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] -def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> { - let hasSideEffects = 1; - let SALU = 1; - let usesCustomInserter = 1; -} - let usesCustomInserter = 1, SALU = 1 in { def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins), [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>; @@ -174,6 +174,13 @@ def SI_MASK_BRANCH : VPseudoInstSI < let isTerminator = 1 in { + def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI < + (outs), + (ins SReg_64:$vcc, brtarget:$target), + [(brcond i1:$vcc, bb:$target)]> { + let Size = 12; +} + def SI_IF: CFPseudoInstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> { @@ -286,6 +293,19 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> { let isReMaterializable = 1; } +def SI_INIT_EXEC : SPseudoInstSI < + (outs), (ins i64imm:$src), []> { + let Defs = [EXEC]; + let usesCustomInserter = 1; + let isAsCheapAsAMove = 1; +} + +def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI < + (outs), (ins SSrc_b32:$input, i32imm:$shift), []> { + let Defs = [EXEC]; + let usesCustomInserter = 1; +} + // Return for returning shaders to a shader variant epilog. def SI_RETURN_TO_EPILOG : SPseudoInstSI < (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> { @@ -399,14 +419,19 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < } // End SubtargetPredicate = isGCN let Predicates = [isGCN] in { -def : Pat< - (trap), - (S_TRAP_PSEUDO TRAPID.LLVM_TRAP) +def : Pat < + (AMDGPUinit_exec i64:$src), + (SI_INIT_EXEC (as_i64imm $src)) +>; + +def : Pat < + (AMDGPUinit_exec_from_input i32:$input, i32:$shift), + (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift)) >; def : Pat< - (debugtrap), - (S_TRAP_PSEUDO TRAPID.LLVM_DEBUG_TRAP) + (AMDGPUtrap timm:$trapid), + (S_TRAP $trapid) >; def : Pat< @@ -477,8 +502,8 @@ def : Pat < // fp_to_fp16 patterns def : Pat < - (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))), - (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod) + (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), + (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; def : Pat < @@ -507,11 +532,11 @@ def : Pat < multiclass FMADPat { def : Pat < - (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), - (VOP3NoMods vt:$src1, i32:$src1_modifiers), - (VOP3NoMods vt:$src2, i32:$src2_modifiers))), - (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - $src2_modifiers, $src2, $clamp, $omod) + (vt (fmad (VOP3NoMods vt:$src0), + (VOP3NoMods vt:$src1), + (VOP3NoMods vt:$src2))), + (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE) >; } @@ -681,10 +706,9 @@ def : BitConvert ; // If denormals are not enabled, it only impacts the compare of the // inputs. The output result is not flushed. class ClampPat : Pat < - (vt (AMDGPUclamp - (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))), + (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))), (inst i32:$src0_modifiers, vt:$src0, - i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod) + i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE) >; def : ClampPat; diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 6b0d18efa81566cb08f2bc138bc10a96ec016669..c6ad61a325ccdfc2d2a90b1709df8af755660fd2 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -68,39 +68,36 @@ using namespace llvm; namespace { class SILoadStoreOptimizer : public MachineFunctionPass { + + typedef struct { + MachineBasicBlock::iterator I; + MachineBasicBlock::iterator Paired; + unsigned EltSize; + unsigned Offset0; + unsigned Offset1; + unsigned BaseOff; + bool UseST64; + SmallVector InstsToMove; + } CombineInfo; + private: const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; MachineRegisterInfo *MRI = nullptr; AliasAnalysis *AA = nullptr; - static bool offsetsCanBeCombined(unsigned Offset0, - unsigned Offset1, - unsigned EltSize); + static bool offsetsCanBeCombined(CombineInfo &CI); - MachineBasicBlock::iterator findMatchingDSInst( - MachineBasicBlock::iterator I, - unsigned EltSize, - SmallVectorImpl &InstsToMove); + bool findMatchingDSInst(CombineInfo &CI); - MachineBasicBlock::iterator mergeRead2Pair( - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - unsigned EltSize, - ArrayRef InstsToMove); + MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI); - MachineBasicBlock::iterator mergeWrite2Pair( - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - unsigned EltSize, - ArrayRef InstsToMove); + MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI); public: static char ID; - SILoadStoreOptimizer() : MachineFunctionPass(ID) {} - - SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) { + SILoadStoreOptimizer() : MachineFunctionPass(ID) { initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); } @@ -130,8 +127,8 @@ char SILoadStoreOptimizer::ID = 0; char &llvm::SILoadStoreOptimizerID = SILoadStoreOptimizer::ID; -FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) { - return new SILoadStoreOptimizer(TM); +FunctionPass *llvm::createSILoadStoreOptimizerPass() { + return new SILoadStoreOptimizer(); } static void moveInstsAfter(MachineBasicBlock::iterator I, @@ -199,46 +196,68 @@ canMoveInstsAcrossMemOp(MachineInstr &MemOp, return true; } -bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0, - unsigned Offset1, - unsigned Size) { +bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { // XXX - Would the same offset be OK? Is there any reason this would happen or // be useful? - if (Offset0 == Offset1) + if (CI.Offset0 == CI.Offset1) return false; // This won't be valid if the offset isn't aligned. - if ((Offset0 % Size != 0) || (Offset1 % Size != 0)) + if ((CI.Offset0 % CI.EltSize != 0) || (CI.Offset1 % CI.EltSize != 0)) return false; - unsigned EltOffset0 = Offset0 / Size; - unsigned EltOffset1 = Offset1 / Size; + unsigned EltOffset0 = CI.Offset0 / CI.EltSize; + unsigned EltOffset1 = CI.Offset1 / CI.EltSize; + CI.UseST64 = false; + CI.BaseOff = 0; + + // If the offset in elements doesn't fit in 8-bits, we might be able to use + // the stride 64 versions. + if ((EltOffset0 % 64 == 0) && (EltOffset1 % 64) == 0 && + isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64)) { + CI.Offset0 = EltOffset0 / 64; + CI.Offset1 = EltOffset1 / 64; + CI.UseST64 = true; + return true; + } // Check if the new offsets fit in the reduced 8-bit range. - if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1)) + if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1)) { + CI.Offset0 = EltOffset0; + CI.Offset1 = EltOffset1; return true; + } - // If the offset in elements doesn't fit in 8-bits, we might be able to use - // the stride 64 versions. - if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0) - return false; + // Try to shift base address to decrease offsets. + unsigned OffsetDiff = std::abs((int)EltOffset1 - (int)EltOffset0); + CI.BaseOff = std::min(CI.Offset0, CI.Offset1); + + if ((OffsetDiff % 64 == 0) && isUInt<8>(OffsetDiff / 64)) { + CI.Offset0 = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64; + CI.Offset1 = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64; + CI.UseST64 = true; + return true; + } + + if (isUInt<8>(OffsetDiff)) { + CI.Offset0 = EltOffset0 - CI.BaseOff / CI.EltSize; + CI.Offset1 = EltOffset1 - CI.BaseOff / CI.EltSize; + return true; + } - return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64); + return false; } -MachineBasicBlock::iterator -SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, - unsigned EltSize, - SmallVectorImpl &InstsToMove) { - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineBasicBlock::iterator MBBI = I; +bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) { + MachineBasicBlock::iterator E = CI.I->getParent()->end(); + MachineBasicBlock::iterator MBBI = CI.I; ++MBBI; SmallVector DefsToMove; - addDefsToList(*I, DefsToMove); + addDefsToList(*CI.I, DefsToMove); for ( ; MBBI != E; ++MBBI) { - if (MBBI->getOpcode() != I->getOpcode()) { + if (MBBI->getOpcode() != CI.I->getOpcode()) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: @@ -249,14 +268,14 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, if (MBBI->hasUnmodeledSideEffects()) // We can't re-order this instruction with respect to other memory // opeations, so we fail both conditions mentioned above. - return E; + return false; if (MBBI->mayLoadOrStore() && - !memAccessesCanBeReordered(*I, *MBBI, TII, AA)) { + !memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA)) { // We fail condition #1, but we may still be able to satisfy condition // #2. Add this instruction to the move list and then we will check // if condition #2 holds once we have selected the matching instruction. - InstsToMove.push_back(&*MBBI); + CI.InstsToMove.push_back(&*MBBI); addDefsToList(*MBBI, DefsToMove); continue; } @@ -264,13 +283,13 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, // When we match I with another DS instruction we will be moving I down // to the location of the matched instruction any uses of I will need to // be moved down as well. - addToListsIfDependent(*MBBI, DefsToMove, InstsToMove); + addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove); continue; } // Don't merge volatiles. if (MBBI->hasOrderedMemoryRef()) - return E; + return false; // Handle a case like // DS_WRITE_B32 addr, v, idx0 @@ -278,77 +297,67 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I, // DS_WRITE_B32 addr, f(w), idx1 // where the DS_READ_B32 ends up in InstsToMove and therefore prevents // merging of the two writes. - if (addToListsIfDependent(*MBBI, DefsToMove, InstsToMove)) + if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove)) continue; - int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr); - const MachineOperand &AddrReg0 = I->getOperand(AddrIdx); + int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), + AMDGPU::OpName::addr); + const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx); const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx); // Check same base pointer. Be careful of subregisters, which can occur with // vectors of pointers. if (AddrReg0.getReg() == AddrReg1.getReg() && AddrReg0.getSubReg() == AddrReg1.getSubReg()) { - int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), + int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); - unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff; - unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff; + CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm() & 0xffff; + CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff; + CI.Paired = MBBI; // Check both offsets fit in the reduced range. // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. - if (offsetsCanBeCombined(Offset0, Offset1, EltSize) && - canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA)) - return MBBI; + if (offsetsCanBeCombined(CI)) + if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) + return true; } // We've found a load/store that we couldn't merge for some reason. // We could potentially keep looking, but we'd need to make sure that // it was safe to move I and also all the instruction in InstsToMove // down past this instruction. - if (!memAccessesCanBeReordered(*I, *MBBI, TII, AA) || // check if we can move I across MBBI - !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA) // check if we can move all I's users - ) + // check if we can move I across MBBI and if we can move all I's users + if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) || + !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA)) break; } - return E; + return false; } MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - unsigned EltSize, - ArrayRef InstsToMove) { - MachineBasicBlock *MBB = I->getParent(); + CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); // Be careful, since the addresses could be subregisters themselves in weird // cases, like vectors of pointers. - const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr); - - const MachineOperand *Dest0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst); - const MachineOperand *Dest1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst); - - unsigned Offset0 - = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff; - unsigned Offset1 - = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff; - - unsigned NewOffset0 = Offset0 / EltSize; - unsigned NewOffset1 = Offset1 / EltSize; - unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64; - - // Prefer the st64 form if we can use it, even if we can fit the offset in the - // non st64 version. I'm not sure if there's any real reason to do this. - bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0); - if (UseST64) { - NewOffset0 /= 64; - NewOffset1 /= 64; - Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64; - } + const auto *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr); - unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1; - unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3; + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst); + const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdst); + + unsigned NewOffset0 = CI.Offset0; + unsigned NewOffset1 = CI.Offset1; + unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2_B32 + : AMDGPU::DS_READ2_B64; + + if (CI.UseST64) + Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 + : AMDGPU::DS_READ2ST64_B64; + + unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1; + unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3; if (NewOffset0 > NewOffset1) { // Canonicalize the merged instruction so the smaller offset comes first. @@ -363,71 +372,70 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair( const MCInstrDesc &Read2Desc = TII->get(Opc); const TargetRegisterClass *SuperRC - = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass; + = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass; unsigned DestReg = MRI->createVirtualRegister(SuperRC); - DebugLoc DL = I->getDebugLoc(); - MachineInstrBuilder Read2 = BuildMI(*MBB, Paired, DL, Read2Desc, DestReg) - .add(*AddrReg) // addr - .addImm(NewOffset0) // offset0 - .addImm(NewOffset1) // offset1 - .addImm(0) // gds - .addMemOperand(*I->memoperands_begin()) - .addMemOperand(*Paired->memoperands_begin()); + DebugLoc DL = CI.I->getDebugLoc(); + + unsigned BaseReg = AddrReg->getReg(); + unsigned BaseRegFlags = 0; + if (CI.BaseOff) { + BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BaseRegFlags = RegState::Kill; + BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg) + .addImm(CI.BaseOff) + .addReg(AddrReg->getReg()); + } + + MachineInstrBuilder Read2 = + BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg) + .addReg(BaseReg, BaseRegFlags) // addr + .addImm(NewOffset0) // offset0 + .addImm(NewOffset1) // offset1 + .addImm(0) // gds + .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired)); + (void)Read2; const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); // Copy to the old destination registers. - BuildMI(*MBB, Paired, DL, CopyDesc) + BuildMI(*MBB, CI.Paired, DL, CopyDesc) .add(*Dest0) // Copy to same destination including flags and sub reg. .addReg(DestReg, 0, SubRegIdx0); - MachineInstr *Copy1 = BuildMI(*MBB, Paired, DL, CopyDesc) + MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) .add(*Dest1) .addReg(DestReg, RegState::Kill, SubRegIdx1); - moveInstsAfter(Copy1, InstsToMove); + moveInstsAfter(Copy1, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(I); - I->eraseFromParent(); - Paired->eraseFromParent(); + MachineBasicBlock::iterator Next = std::next(CI.I); + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n'); return Next; } MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( - MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - unsigned EltSize, - ArrayRef InstsToMove) { - MachineBasicBlock *MBB = I->getParent(); + CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); // Be sure to use .addOperand(), and not .addReg() with these. We want to be // sure we preserve the subregister index and any register flags set on them. - const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr); - const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0); + const MachineOperand *Addr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr); + const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0); const MachineOperand *Data1 - = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0); - + = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0); - unsigned Offset0 - = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff; - unsigned Offset1 - = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff; + unsigned NewOffset0 = CI.Offset0; + unsigned NewOffset1 = CI.Offset1; + unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2_B32 + : AMDGPU::DS_WRITE2_B64; - unsigned NewOffset0 = Offset0 / EltSize; - unsigned NewOffset1 = Offset1 / EltSize; - unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64; - - // Prefer the st64 form if we can use it, even if we can fit the offset in the - // non st64 version. I'm not sure if there's any real reason to do this. - bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0); - if (UseST64) { - NewOffset0 /= 64; - NewOffset1 /= 64; - Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64; - } + if (CI.UseST64) + Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 + : AMDGPU::DS_WRITE2ST64_B64; if (NewOffset0 > NewOffset1) { // Canonicalize the merged instruction so the smaller offset comes first. @@ -440,23 +448,33 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair( "Computed offset doesn't fit"); const MCInstrDesc &Write2Desc = TII->get(Opc); - DebugLoc DL = I->getDebugLoc(); + DebugLoc DL = CI.I->getDebugLoc(); + + unsigned BaseReg = Addr->getReg(); + unsigned BaseRegFlags = 0; + if (CI.BaseOff) { + BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BaseRegFlags = RegState::Kill; + BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg) + .addImm(CI.BaseOff) + .addReg(Addr->getReg()); + } - MachineInstrBuilder Write2 = BuildMI(*MBB, Paired, DL, Write2Desc) - .add(*Addr) // addr - .add(*Data0) // data0 - .add(*Data1) // data1 - .addImm(NewOffset0) // offset0 - .addImm(NewOffset1) // offset1 - .addImm(0) // gds - .addMemOperand(*I->memoperands_begin()) - .addMemOperand(*Paired->memoperands_begin()); + MachineInstrBuilder Write2 = + BuildMI(*MBB, CI.Paired, DL, Write2Desc) + .addReg(BaseReg, BaseRegFlags) // addr + .add(*Data0) // data0 + .add(*Data1) // data1 + .addImm(NewOffset0) // offset0 + .addImm(NewOffset1) // offset1 + .addImm(0) // gds + .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired)); - moveInstsAfter(Write2, InstsToMove); + moveInstsAfter(Write2, CI.InstsToMove); - MachineBasicBlock::iterator Next = std::next(I); - I->eraseFromParent(); - Paired->eraseFromParent(); + MachineBasicBlock::iterator Next = std::next(CI.I); + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n'); return Next; @@ -477,27 +495,24 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { continue; } - SmallVector InstsToMove; + CombineInfo CI; + CI.I = I; unsigned Opc = MI.getOpcode(); if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) { - unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4; - MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size, - InstsToMove); - if (Match != E) { + CI.EltSize = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4; + if (findMatchingDSInst(CI)) { Modified = true; - I = mergeRead2Pair(I, Match, Size, InstsToMove); + I = mergeRead2Pair(CI); } else { ++I; } continue; } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) { - unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4; - MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size, - InstsToMove); - if (Match != E) { + CI.EltSize = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4; + if (findMatchingDSInst(CI)) { Modified = true; - I = mergeWrite2Pair(I, Match, Size, InstsToMove); + I = mergeWrite2Pair(CI); } else { ++I; } diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 35d3a93d8710d946be94f2a95c722865cf12904c..5f1c7f1fc42f1d7bc0a5fc5c631c2acb4edf6357 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -60,8 +60,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index 3680e02da5769d9a548b54868025c8ba66ac2bb7..ba616ada0c9cef63782386bb533cdc142bbabd0d 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -21,8 +21,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 8e612d2ddfdae85f40b3d73712b79465609a38b7..18b197ddb7ae7c04aaf76e5982cb413bafc014ea 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -25,6 +25,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) TIDReg(AMDGPU::NoRegister), ScratchRSrcReg(AMDGPU::NoRegister), ScratchWaveOffsetReg(AMDGPU::NoRegister), + FrameOffsetReg(AMDGPU::NoRegister), + StackPtrOffsetReg(AMDGPU::NoRegister), PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), DispatchPtrUserSGPR(AMDGPU::NoRegister), QueuePtrUserSGPR(AMDGPU::NoRegister), @@ -78,17 +80,22 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); WavesPerEU = ST.getWavesPerEU(*F); - // Non-entry functions have no special inputs for now. - // TODO: Return early for non-entry CCs. + if (!isEntryFunction()) { + // Non-entry functions have no special inputs for now, other registers + // required for scratch access. + ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; + ScratchWaveOffsetReg = AMDGPU::SGPR4; + FrameOffsetReg = AMDGPU::SGPR5; + return; + } CallingConv::ID CC = F->getCallingConv(); - if (CC == CallingConv::AMDGPU_PS) - PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); - - if (AMDGPU::isKernel(CC)) { + if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; + } else if (CC == CallingConv::AMDGPU_PS) { + PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); } if (ST.debuggerEmitPrologue()) { @@ -118,11 +125,17 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); bool MaySpill = ST.isVGPRSpillingEnabled(*F); - bool HasStackObjects = FrameInfo.hasStackObjects(); + bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls(); - if (HasStackObjects || MaySpill) + if (HasStackObjects || MaySpill) { PrivateSegmentWaveByteOffset = true; + // HS and GS always have the scratch wave offset in SGPR5 on GFX9. + if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && + (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) + PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; + } + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index a84f3e274f82aa22d5350860a02df2fae9d63ef1..9fdb8caac6f2132a1bc6e64a3dd73c3745c28b92 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -15,8 +15,8 @@ #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H #include "AMDGPUMachineFunction.h" -#include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" @@ -88,6 +88,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { unsigned ScratchRSrcReg; unsigned ScratchWaveOffsetReg; + // This is the current function's incremented size from the kernel's scratch + // wave offset register. For an entry function, this is exactly the same as + // the ScratchWaveOffsetReg. + unsigned FrameOffsetReg; + + // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg. + unsigned StackPtrOffsetReg; + // Input registers for non-HSA ABI unsigned PrivateMemoryPtrUserSGPR; @@ -133,14 +141,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { AMDGPUBufferPseudoSourceValue BufferPSV; AMDGPUImagePseudoSourceValue ImagePSV; -public: - // FIXME: Make private +private: unsigned LDSWaveSpillSize; unsigned ScratchOffsetReg; unsigned NumUserSGPRs; unsigned NumSystemSGPRs; -private: bool HasSpilledSGPRs; bool HasSpilledVGPRs; bool HasNonSpillStackObjects; @@ -366,9 +372,24 @@ public: return ScratchWaveOffsetReg; } + unsigned getFrameOffsetReg() const { + return FrameOffsetReg; + } + + void setStackPtrOffsetReg(unsigned Reg) { + assert(Reg != AMDGPU::NoRegister && "Should never be unset"); + StackPtrOffsetReg = Reg; + } + + unsigned getStackPtrOffsetReg() const { + return StackPtrOffsetReg; + } + void setScratchWaveOffsetReg(unsigned Reg) { assert(Reg != AMDGPU::NoRegister && "Should never be unset"); ScratchWaveOffsetReg = Reg; + if (isEntryFunction()) + FrameOffsetReg = ScratchWaveOffsetReg; } unsigned getQueuePtrUserSGPR() const { @@ -535,6 +556,10 @@ public: llvm_unreachable("unexpected dimension"); } + unsigned getLDSWaveSpillSize() const { + return LDSWaveSpillSize; + } + const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { return &BufferPSV; } diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index 9d4e677400e69580e431303c84a03fe6f82ea940..bb17dbbdfbd62225e2d1eec0fa5473f10258ca39 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "SIMachineScheduler.h" #include "AMDGPU.h" #include "SIInstrInfo.h" -#include "SIMachineScheduler.h" #include "SIRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index e02c2e3240e849409404ca4f0999405b90781776..f4ddf1891683bde512ca99f5b05ee0640dfb67b3 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -20,16 +20,16 @@ /// //===----------------------------------------------------------------------===// - #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include +#include using namespace llvm; @@ -44,27 +44,32 @@ namespace { class SDWAOperand; class SIPeepholeSDWA : public MachineFunctionPass { +public: + typedef SmallVector SDWAOperandsVector; + private: MachineRegisterInfo *MRI; const SIRegisterInfo *TRI; const SIInstrInfo *TII; std::unordered_map> SDWAOperands; + std::unordered_map PotentialMatches; + SmallVector ConvertedInstructions; Optional foldToImm(const MachineOperand &Op) const; public: static char ID; - typedef SmallVector, 4> SDWAOperandsVector; - SIPeepholeSDWA() : MachineFunctionPass(ID) { initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; void matchSDWAOperands(MachineFunction &MF); + bool isConvertibleToSDWA(const MachineInstr &MI) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); + void legalizeScalarOperands(MachineInstr &MI) const; StringRef getPassName() const override { return "SI Peephole SDWA"; } @@ -123,7 +128,8 @@ public: bool getNeg() const { return Neg; } bool getSext() const { return Sext; } - uint64_t getSrcMods() const; + uint64_t getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const; }; class SDWADstOperand : public SDWAOperand { @@ -234,13 +240,24 @@ static bool isSubregOf(const MachineOperand &SubReg, return SuperMask.all(); } -uint64_t SDWASrcOperand::getSrcMods() const { +uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, + const MachineOperand *SrcOp) const { uint64_t Mods = 0; + const auto *MI = SrcOp->getParent(); + if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { + Mods = Mod->getImm(); + } + } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { + if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { + Mods = Mod->getImm(); + } + } if (Abs || Neg) { assert(!Sext && "Float and integer src modifiers can't be set simulteniously"); Mods |= Abs ? SISrcMods::ABS : 0; - Mods |= Neg ? SISrcMods::NEG : 0; + Mods ^= Neg ? SISrcMods::NEG : 0; } else if (Sext) { Mods |= SISrcMods::SEXT; } @@ -285,7 +302,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); MachineOperand *SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); - assert(Src && Src->isReg()); + assert(Src && (Src->isReg() || Src->isImm())); if (!isSameReg(*Src, *getReplacedOperand())) { // If this is not src0 then it should be src1 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); @@ -306,7 +323,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { } copyRegOperand(*Src, *getTargetOperand()); SrcSel->setImm(getSrcSel()); - SrcMods->setImm(getSrcMods()); + SrcMods->setImm(getSrcMods(TII, Src)); getTargetOperand()->setIsKill(false); return true; } @@ -403,7 +420,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { switch (Opcode) { case AMDGPU::V_LSHRREV_B32_e32: case AMDGPU::V_ASHRREV_I32_e32: - case AMDGPU::V_LSHLREV_B32_e32: { + case AMDGPU::V_LSHLREV_B32_e32: + case AMDGPU::V_LSHRREV_B32_e64: + case AMDGPU::V_ASHRREV_I32_e64: + case AMDGPU::V_LSHLREV_B32_e64: { // from: v_lshrrev_b32_e32 v1, 16/24, v0 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 @@ -426,7 +446,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B32_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || + Opcode == AMDGPU::V_LSHLREV_B32_e64) { auto SDWADst = make_unique( Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); @@ -435,7 +456,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } else { auto SDWASrc = make_unique( Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, - Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B32_e32 && + Opcode != AMDGPU::V_LSHRREV_B32_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -445,7 +467,10 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { case AMDGPU::V_LSHRREV_B16_e32: case AMDGPU::V_ASHRREV_I16_e32: - case AMDGPU::V_LSHLREV_B16_e32: { + case AMDGPU::V_LSHLREV_B16_e32: + case AMDGPU::V_LSHRREV_B16_e64: + case AMDGPU::V_ASHRREV_I16_e64: + case AMDGPU::V_LSHLREV_B16_e64: { // from: v_lshrrev_b16_e32 v1, 8, v0 // to SDWA src:v0 src_sel:BYTE_1 @@ -466,16 +491,18 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { TRI->isPhysicalRegister(Dst->getReg())) break; - if (Opcode == AMDGPU::V_LSHLREV_B16_e32) { + if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || + Opcode == AMDGPU::V_LSHLREV_B16_e64) { auto SDWADst = - make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); + make_unique(Dst, Src1, BYTE_1, UNUSED_PAD); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); SDWAOperands[&MI] = std::move(SDWADst); ++NumSDWAPatternsFound; } else { auto SDWASrc = make_unique( Src1, Dst, BYTE_1, false, false, - Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true); + Opcode != AMDGPU::V_LSHRREV_B16_e32 && + Opcode != AMDGPU::V_LSHRREV_B16_e64); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -543,20 +570,25 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { ++NumSDWAPatternsFound; break; } - case AMDGPU::V_AND_B32_e32: { + case AMDGPU::V_AND_B32_e32: + case AMDGPU::V_AND_B32_e64: { // e.g.: // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 // to SDWA src:v0 src_sel:WORD_0/BYTE_0 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); + auto ValSrc = Src1; auto Imm = foldToImm(*Src0); - if (!Imm) - break; - if (*Imm != 0x0000ffff && *Imm != 0x000000ff) + if (!Imm) { + Imm = foldToImm(*Src1); + ValSrc = Src0; + } + + if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) break; - MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (TRI->isPhysicalRegister(Src1->getReg()) || @@ -564,7 +596,7 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { break; auto SDWASrc = make_unique( - Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); + ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); SDWAOperands[&MI] = std::move(SDWASrc); ++NumSDWAPatternsFound; @@ -575,36 +607,40 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { } } +bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const { + // Check if this instruction has opcode that supports SDWA + unsigned Opc = MI.getOpcode(); + if (AMDGPU::getSDWAOp(Opc) != -1) + return true; + int Opc32 = AMDGPU::getVOPe32(Opc); + if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) + return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && + !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); + return false; +} + bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands) { - // Check if this instruction can be converted to SDWA: - // 1. Does this opcode support SDWA - if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1) - return false; - - // 2. Are all operands - VGPRs - for (const MachineOperand &Operand : MI.explicit_operands()) { - if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg())) - return false; - } - // Convert to sdwa int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); + if (SDWAOpcode == -1) + SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); assert(SDWAOpcode != -1); + // Copy dst, if it is present in original then should also be present in SDWA + MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); + if (!Dst && !TII->isVOPC(MI)) + return false; + const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); // Create SDWA version of instruction MI and initialize its operands MachineInstrBuilder SDWAInst = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); - // Copy dst, if it is present in original then should also be present in SDWA - MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); SDWAInst.add(*Dst); - } else { - assert(TII->isVOPC(MI)); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and @@ -614,7 +650,10 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, Src0 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src0); // Copy src1 if present, initialize src1_modifiers. @@ -623,10 +662,11 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, assert( AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); - SDWAInst.addImm(0); + if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) + SDWAInst.addImm(Mod->getImm()); + else + SDWAInst.addImm(0); SDWAInst.add(*Src1); - } else { - assert(TII->isVOP1(MI)); } if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || @@ -664,9 +704,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Apply all sdwa operand pattenrs bool Converted = false; for (auto &Operand : SDWAOperands) { - Converted |= Operand->convertToSDWA(*SDWAInst, TII); + // There should be no intesection between SDWA operands and potential MIs + // e.g.: + // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 + // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0 + // v_add_u32 v3, v4, v2 + // + // In that example it is possible that we would fold 2nd instruction into 3rd + // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was + // already destroyed). So if SDWAOperand is also a potential MI then do not + // apply it. + if (PotentialMatches.count(Operand->getParentInst()) == 0) + Converted |= Operand->convertToSDWA(*SDWAInst, TII); } - if (!Converted) { + if (Converted) { + ConvertedInstructions.push_back(SDWAInst); + } else { SDWAInst->eraseFromParent(); return false; } @@ -679,6 +732,29 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, return true; } +// If an instruction was converted to SDWA it should not have immediates or SGPR +// operands. Copy its scalar operands into VGPRs. +void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const { + const MCInstrDesc &Desc = TII->get(MI.getOpcode()); + for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) + continue; + if (Desc.OpInfo[I].RegClass == -1 || + !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) + continue; + unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); + auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), + TII->get(AMDGPU::V_MOV_B32_e32), VGPR); + if (Op.isImm()) + Copy.addImm(Op.getImm()); + else if (Op.isReg()) + Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0, + Op.getSubReg()); + Op.ChangeToRegister(VGPR, false); + } +} + bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); @@ -690,16 +766,15 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); - - std::unordered_map PotentialMatches; - + + // Find all SDWA operands in MF. matchSDWAOperands(MF); - for (auto &OperandPair : SDWAOperands) { - auto &Operand = OperandPair.second; + for (const auto &OperandPair : SDWAOperands) { + const auto &Operand = OperandPair.second; MachineInstr *PotentialMI = Operand->potentialToConvert(TII); - if (PotentialMI) { - PotentialMatches[PotentialMI].push_back(std::move(Operand)); + if (PotentialMI && isConvertibleToSDWA(*PotentialMI)) { + PotentialMatches[PotentialMI].push_back(Operand.get()); } } @@ -708,6 +783,12 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { convertToSDWA(PotentialMI, PotentialPair.second); } + PotentialMatches.clear(); SDWAOperands.clear(); - return false; + + bool Ret = !ConvertedInstructions.empty(); + while (!ConvertedInstructions.empty()) + legalizeScalarOperands(*ConvertedInstructions.pop_back_val()); + + return Ret; } diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 39324cbbcc02427a508be61d3607d3d70e2a0b27..b611f28fcabdfc5daa83450b2a8e8163faa69ac2 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "SIRegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" -#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -117,14 +117,10 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass); } -unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( - const MachineFunction &MF) const { - - const SISubtarget &ST = MF.getSubtarget(); - unsigned RegCount = ST.getMaxNumSGPRs(MF); +static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) { unsigned Reg; - // Try to place it in a hole after PrivateSegmentbufferReg. + // Try to place it in a hole after PrivateSegmentBufferReg. if (RegCount & 3) { // We cannot put the segment buffer in (Idx - 4) ... (Idx - 1) due to // alignment constraints, so we have a hole where can put the wave offset. @@ -134,9 +130,22 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( // wave offset before it. Reg = RegCount - 5; } + + return Reg; +} + +unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg( + const MachineFunction &MF) const { + const SISubtarget &ST = MF.getSubtarget(); + unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF)); return AMDGPU::SGPR_32RegClass.getRegister(Reg); } +unsigned SIRegisterInfo::reservedStackPtrOffsetReg( + const MachineFunction &MF) const { + return AMDGPU::SGPR32; +} + BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); @@ -146,6 +155,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, AMDGPU::EXEC); reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR); + // M0 has to be reserved so that llvm accepts it as a live-in into a block. + reserveRegisterTuples(Reserved, AMDGPU::M0); + // Reserve the memory aperture registers. reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE); reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT); @@ -195,15 +207,33 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg)); } + unsigned StackPtrReg = MFI->getStackPtrOffsetReg(); + if (StackPtrReg != AMDGPU::NoRegister) { + reserveRegisterTuples(Reserved, StackPtrReg); + assert(!isSubRegister(ScratchRSrcReg, StackPtrReg)); + } + + unsigned FrameReg = MFI->getFrameOffsetReg(); + if (FrameReg != AMDGPU::NoRegister) { + reserveRegisterTuples(Reserved, FrameReg); + assert(!isSubRegister(ScratchRSrcReg, FrameReg)); + } + return Reserved; } bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const { - return Fn.getFrameInfo().hasStackObjects(); + const SIMachineFunctionInfo *Info = Fn.getInfo(); + if (Info->isEntryFunction()) { + const MachineFrameInfo &MFI = Fn.getFrameInfo(); + return MFI.hasStackObjects() || MFI.hasCalls(); + } + + // May need scavenger for dealing with callee saved registers. + return true; } -bool -SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { +bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const { return MF.getFrameInfo().hasStackObjects(); } @@ -278,7 +308,6 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, } MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -288,8 +317,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg) .addFrameIndex(FrameIdx); - BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) - .addReg(UnusedCarry, RegState::Define | RegState::Dead) + TII->getAddNoCarry(*MBB, Ins, DL, BaseReg) .addReg(OffsetReg, RegState::Kill) .addReg(FIReg); } @@ -317,8 +345,11 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineOperand *FIOp = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); assert(FIOp && FIOp->isFI() && "frame index must be address operand"); - assert(TII->isMUBUF(MI)); + assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == + MF->getInfo()->getFrameOffsetReg() && + "should only be seeing frame offset relative FrameIndex"); + MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); int64_t NewOffset = OffsetOp->getImm() + Offset; @@ -617,7 +648,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. - std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true); + std::tie(EltSize, ScalarStoreOp) = + getSpillEltSize(getRegSizeInBits(*RC) / 8, true); } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); @@ -652,11 +684,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) + .addReg(MFI->getFrameOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); + .addReg(MFI->getFrameOffsetReg()); } BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) @@ -713,11 +745,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, EltSize, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) - .addReg(TmpReg, RegState::Kill) // src - .addFrameIndex(Index) // vaddr - .addReg(MFI->getScratchRSrcReg()) // srrsrc - .addReg(MFI->getScratchWaveOffsetReg()) // soffset - .addImm(i * 4) // offset + .addReg(TmpReg, RegState::Kill) // src + .addFrameIndex(Index) // vaddr + .addReg(MFI->getScratchRSrcReg()) // srrsrc + .addReg(MFI->getFrameOffsetReg()) // soffset + .addImm(i * 4) // offset .addMemOperand(MMO); } } @@ -777,7 +809,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, if (SpillToSMEM && isSGPRClass(RC)) { // XXX - if private_element_size is larger than 4 it might be useful to be // able to spill wider vmem spills. - std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false); + std::tie(EltSize, ScalarLoadOp) = + getSpillEltSize(getRegSizeInBits(*RC) / 8, false); } ArrayRef SplitParts = getRegSplitParts(RC, EltSize); @@ -803,11 +836,11 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i); if (Offset != 0) { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()) + .addReg(MFI->getFrameOffsetReg()) .addImm(Offset); } else { BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) - .addReg(MFI->getScratchWaveOffsetReg()); + .addReg(MFI->getFrameOffsetReg()); } auto MIB = @@ -850,10 +883,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, MinAlign(Align, EltSize * i)); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) - .addFrameIndex(Index) // vaddr - .addReg(MFI->getScratchRSrcReg()) // srsrc - .addReg(MFI->getScratchWaveOffsetReg()) // soffset - .addImm(i * 4) // offset + .addFrameIndex(Index) // vaddr + .addReg(MFI->getScratchRSrcReg()) // srsrc + .addReg(MFI->getFrameOffsetReg()) // soffset + .addImm(i * 4) // offset .addMemOperand(MMO); auto MIB = @@ -978,12 +1011,72 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } default: { - if (TII->isMUBUF(*MI)) { + const DebugLoc &DL = MI->getDebugLoc(); + bool IsMUBUF = TII->isMUBUF(*MI); + + if (!IsMUBUF && + MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) { + // Convert to an absolute stack address by finding the offset from the + // scratch wave base and scaling by the wave size. + // + // In an entry function/kernel the stack address is already the absolute + // address relative to the the scratch wave offset. + + unsigned DiffReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32; + unsigned ResultReg = IsCopy ? + MI->getOperand(0).getReg() : + MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) + .addReg(MFI->getFrameOffsetReg()) + .addReg(MFI->getScratchWaveOffsetReg()); + + int64_t Offset = FrameInfo.getObjectOffset(Index); + if (Offset == 0) { + // XXX - This never happens because of emergency scavenging slot at 0? + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) + .addImm(Log2_32(ST.getWavefrontSize())) + .addReg(DiffReg); + } else { + unsigned CarryOut + = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + unsigned ScaledReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + // XXX - Should this use a vector shift? + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(Log2_32(ST.getWavefrontSize())); + + // TODO: Fold if use instruction is another add of a constant. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) + .addReg(CarryOut, RegState::Define | RegState::Dead) + .addImm(Offset) + .addReg(ScaledReg, RegState::Kill); + + MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC); + } + + // Don't introduce an extra copy if we're just materializing in a mov. + if (IsCopy) + MI->eraseFromParent(); + else + FIOp.ChangeToRegister(ResultReg, false, false, true); + return; + } + + if (IsMUBUF) { // Disable offen so we don't need a 0 vgpr base. assert(static_cast(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)); + assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() + == MFI->getFrameOffsetReg()); + int64_t Offset = FrameInfo.getObjectOffset(Index); int64_t OldImm = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); @@ -992,23 +1085,85 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (isUInt<12>(NewOffset) && buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { MI->eraseFromParent(); - break; + return; } } + // If the offset is simply too big, don't convert to a scratch wave offset + // relative index. + int64_t Offset = FrameInfo.getObjectOffset(Index); FIOp.ChangeToImmediate(Offset); if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) { unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) - .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + .addImm(Offset); FIOp.ChangeToRegister(TmpReg, false, false, true); } } } } +StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { + #define AMDGPU_REG_ASM_NAMES + #include "AMDGPURegAsmNames.inc.cpp" + + #define REG_RANGE(BeginReg, EndReg, RegTable) \ + if (Reg >= BeginReg && Reg <= EndReg) { \ + unsigned Index = Reg - BeginReg; \ + assert(Index < array_lengthof(RegTable)); \ + return RegTable[Index]; \ + } + + REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); + REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR103, SGPR32RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR102_SGPR103, SGPR64RegNames); + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, + VGPR96RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, + AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, + VGPR128RegNames); + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, + AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, + SGPR128RegNames); + + REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, + AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR256RegNames); + + REG_RANGE( + AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, + AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, + VGPR512RegNames); + + REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, + AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR256RegNames); + + REG_RANGE( + AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, + AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, + SGPR512RegNames + ); + +#undef REG_RANGE + + // FIXME: Rename flat_scr so we don't need to special case this. + switch (Reg) { + case AMDGPU::FLAT_SCR: + return "flat_scratch"; + case AMDGPU::FLAT_SCR_LO: + return "flat_scratch_lo"; + case AMDGPU::FLAT_SCR_HI: + return "flat_scratch_hi"; + default: + // For the special named registers the default is fine. + return TargetRegisterInfo::getRegAsmName(Reg); + } +} + // FIXME: This is very slow. It might be worth creating a map from physreg to // register class. const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { @@ -1040,20 +1195,21 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { // TODO: It might be helpful to have some target specific flags in // TargetRegisterClass to mark which classes are VGPRs to make this trivial. bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { - switch (RC->getSize()) { - case 0: return false; - case 1: return false; - case 4: + unsigned Size = getRegSizeInBits(*RC); + if (Size < 32) + return false; + switch (Size) { + case 32: return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr; - case 8: + case 64: return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr; - case 12: + case 96: return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr; - case 16: + case 128: return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr; - case 32: + case 256: return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr; - case 64: + case 512: return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr; default: llvm_unreachable("Invalid register class size"); @@ -1062,18 +1218,18 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const { const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SRC) const { - switch (SRC->getSize()) { - case 4: + switch (getRegSizeInBits(*SRC)) { + case 32: return &AMDGPU::VGPR_32RegClass; - case 8: + case 64: return &AMDGPU::VReg_64RegClass; - case 12: + case 96: return &AMDGPU::VReg_96RegClass; - case 16: + case 128: return &AMDGPU::VReg_128RegClass; - case 32: + case 256: return &AMDGPU::VReg_256RegClass; - case 64: + case 512: return &AMDGPU::VReg_512RegClass; default: llvm_unreachable("Invalid register class size"); @@ -1082,16 +1238,16 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass( const TargetRegisterClass *VRC) const { - switch (VRC->getSize()) { - case 4: + switch (getRegSizeInBits(*VRC)) { + case 32: return &AMDGPU::SGPR_32RegClass; - case 8: + case 64: return &AMDGPU::SReg_64RegClass; - case 16: + case 128: return &AMDGPU::SReg_128RegClass; - case 32: + case 256: return &AMDGPU::SReg_256RegClass; - case 64: + case 512: return &AMDGPU::SReg_512RegClass; default: llvm_unreachable("Invalid register class size"); @@ -1356,15 +1512,15 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC) const { - unsigned SrcSize = SrcRC->getSize(); - unsigned DstSize = DstRC->getSize(); - unsigned NewSize = NewRC->getSize(); + unsigned SrcSize = getRegSizeInBits(*SrcRC); + unsigned DstSize = getRegSizeInBits(*DstRC); + unsigned NewSize = getRegSizeInBits(*NewRC); // Do not increase size of registers beyond dword, we would need to allocate // adjacent registers and constraint regalloc more than needed. // Always allow dword coalescing. - if (SrcSize <= 4 || DstSize <= 4) + if (SrcSize <= 32 || DstSize <= 32) return true; return NewSize <= DstSize || NewSize <= SrcSize; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 679ed229758a0eef73f9e928942c43878d9e5706..8fed6d5f9710f84ba2d41bed2af95bc10a961935 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H #include "AMDGPURegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -57,8 +58,22 @@ public: unsigned reservedPrivateSegmentWaveByteOffsetReg( const MachineFunction &MF) const; + unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID) const override; + + // Stack access is very expensive. CSRs are also the high registers, and we + // want to minimize the number of used registers. + unsigned getCSRFirstUseCost() const override { + return 100; + } + + unsigned getFrameRegister(const MachineFunction &MF) const override; + bool requiresRegisterScavenging(const MachineFunction &Fn) const override; bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; @@ -103,6 +118,8 @@ public: bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS) const; + StringRef getRegAsmName(unsigned Reg) const override; + unsigned getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } @@ -228,6 +245,11 @@ public: const int *getRegUnitPressureSets(unsigned RegUnit) const override; + unsigned getReturnAddressReg(const MachineFunction &MF) const { + // Not a callee saved register. + return AMDGPU::SGPR30_SGPR31; + } + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 5b840a14dbc337b42048594882121cfc03b84100..73dd8b7daa4ea834c5a9268cfc5f5389b77b5f17 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N)) || (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && + !Ld->isVolatile() && static_cast(getTargetLowering())->isMemOpUniform(N) && static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index 597d9ba668dc0a65ea158039ffe5023d3dd80f09..ec29a66c8bbbe6bfaf9dd12ce625875be911272d 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -184,13 +184,27 @@ def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32">; def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">; def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">; def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">; -def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64">; +def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64", + [(set i64:$sdst, (int_amdgcn_s_getpc))] +>; + +let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in { -let isTerminator = 1, isBarrier = 1, - isBranch = 1, isIndirectBranch = 1 in { +let isBranch = 1, isIndirectBranch = 1 in { def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">; +} // End isBranch = 1, isIndirectBranch = 1 + +let isReturn = 1 in { +// Define variant marked as return rather than branch. +def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>; } -def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64">; +} // End isTerminator = 1, isBarrier = 1 + +let isCall = 1 in { +def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64" +>; +} + def S_RFE_B64 : SOP1_1 <"s_rfe_b64">; let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in { @@ -434,7 +448,7 @@ def S_BFE_I64 : SOP2_64_32 <"s_bfe_i64">; def S_CBRANCH_G_FORK : SOP2_Pseudo < "s_cbranch_g_fork", (outs), - (ins SReg_64:$src0, SReg_64:$src1), + (ins SCSrc_b64:$src0, SCSrc_b64:$src1), "$src0, $src1" > { let has_sdst = 0; @@ -530,14 +544,16 @@ class SOPKInstTable { class SOPK_32 pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), - (ins u16imm:$simm16), + (ins s16imm:$simm16), "$sdst, $simm16", pattern>; -class SOPK_SCC : SOPK_Pseudo < +class SOPK_SCC : SOPK_Pseudo < opName, (outs), - (ins SReg_32:$sdst, u16imm:$simm16), + !if(isSignExt, + (ins SReg_32:$sdst, s16imm:$simm16), + (ins SReg_32:$sdst, u16imm:$simm16)), "$sdst, $simm16", []>, SOPKInstTable<1, base_op>{ let Defs = [SCC]; @@ -546,7 +562,7 @@ class SOPK_SCC : SOPK_Pseudo < class SOPK_32TIE pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), - (ins SReg_32:$src0, u16imm:$simm16), + (ins SReg_32:$src0, s16imm:$simm16), "$sdst, $simm16", pattern >; @@ -575,20 +591,20 @@ let isCompare = 1 in { // [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))] // >; -def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">; -def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">; -def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">; -def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">; -def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">; -def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">; +def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32", 1>; +def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32", 1>; +def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32", 1>; +def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32", 1>; +def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32", 1>; +def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32", 1>; let SOPKZext = 1 in { -def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">; -def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">; -def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">; -def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">; -def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">; -def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">; +def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32", 0>; +def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32", 0>; +def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32", 0>; +def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32", 0>; +def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32", 0>; +def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32", 0>; } // End SOPKZext = 1 } // End isCompare = 1 @@ -600,7 +616,7 @@ let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", def S_CBRANCH_I_FORK : SOPK_Pseudo < "s_cbranch_i_fork", - (outs), (ins SReg_64:$sdst, u16imm:$simm16), + (outs), (ins SReg_64:$sdst, s16imm:$simm16), "$sdst, $simm16" >; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp index b6868de6a74e322d096152c8adbd3dea7fa1ba0e..03b11ae80500efc5698bf22d717acc5e42f26583 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp @@ -65,5 +65,18 @@ const char* const IdSymbolic[] = { }; } // namespace Hwreg + +namespace Swizzle { + +// This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h. +const char* const IdSymbolic[] = { + "QUAD_PERM", + "BITMASK_PERM", + "SWAP", + "REVERSE", + "BROADCAST", +}; + +} // namespace Swizzle } // namespace AMDGPU } // namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h index b2dc2c0e364cde6688e3aa8d1151c9316e780a4e..ebb2be22b48795ae04134d7ac3d25d05f869d564 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.h @@ -25,6 +25,12 @@ namespace Hwreg { // Symbolic names for the hwreg(...) syntax. extern const char* const IdSymbolic[]; } // namespace Hwreg + +namespace Swizzle { // Symbolic names for the swizzle(...) syntax. + +extern const char* const IdSymbolic[]; + +} // namespace Swizzle } // namespace AMDGPU } // namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 86095a8e1142f2310670508e9cb911cdb14bdb30..f581e69980c7907f96260c0eef98048859bb334b 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPU.h" #include "AMDGPUBaseInfo.h" +#include "AMDGPU.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -27,7 +28,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include @@ -38,7 +38,6 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" - #define GET_INSTRINFO_NAMED_OPS #include "AMDGPUGenInstrInfo.inc" #undef GET_INSTRINFO_NAMED_OPS @@ -93,11 +92,22 @@ unsigned getVmcntBitWidthHi() { return 2; } } // end namespace anonymous namespace llvm { + +static cl::opt EnablePackedInlinableLiterals( + "enable-packed-inlinable-literals", + cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), + cl::init(false)); + namespace AMDGPU { namespace IsaInfo { IsaVersion getIsaVersion(const FeatureBitset &Features) { + // SI. + if (Features.test(FeatureISAVersion6_0_0)) + return {6, 0, 0}; + if (Features.test(FeatureISAVersion6_0_1)) + return {6, 0, 1}; // CI. if (Features.test(FeatureISAVersion7_0_0)) return {7, 0, 0}; @@ -105,6 +115,8 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 1}; if (Features.test(FeatureISAVersion7_0_2)) return {7, 0, 2}; + if (Features.test(FeatureISAVersion7_0_3)) + return {7, 0, 3}; // VI. if (Features.test(FeatureISAVersion8_0_0)) @@ -125,6 +137,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {9, 0, 0}; if (Features.test(FeatureISAVersion9_0_1)) return {9, 0, 1}; + if (Features.test(FeatureISAVersion9_0_2)) + return {9, 0, 2}; + if (Features.test(FeatureISAVersion9_0_3)) + return {9, 0, 3}; if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) return {0, 0, 0}; @@ -321,33 +337,6 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, Header.private_segment_alignment = 4; } -MCSection *getHSATextSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_EXECINSTR | - ELF::SHF_AMDGPU_HSA_AGENT | - ELF::SHF_AMDGPU_HSA_CODE); -} - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL | - ELF::SHF_AMDGPU_HSA_AGENT); -} - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_WRITE | - ELF::SHF_AMDGPU_HSA_GLOBAL); -} - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { - return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | - ELF::SHF_AMDGPU_HSA_AGENT); -} - bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } @@ -497,6 +486,7 @@ unsigned getInitialPSInputAddr(const Function &F) { bool isShader(CallingConv::ID cc) { switch(cc) { case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_HS: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: @@ -511,7 +501,18 @@ bool isCompute(CallingConv::ID cc) { } bool isEntryFunctionCC(CallingConv::ID CC) { - return true; + switch (CC) { + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_HS: + return true; + default: + return false; + } } bool isSI(const MCSubtargetInfo &STI) { @@ -526,6 +527,17 @@ bool isVI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; } +bool isGFX9(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; +} + +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { + const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); + const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || + Reg == AMDGPU::SCC; +} + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { switch(Reg) { @@ -703,6 +715,9 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { assert(HasInv2Pi); + if (!EnablePackedInlinableLiterals) + return false; + int16_t Lo16 = static_cast(Literal); int16_t Hi16 = static_cast(Literal >> 16); return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index d6c836eb748b1521c9edfd24031d76b1c74111e9..eff0230d21f57c909e4eb9515c7378fe33d899b4 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -149,13 +149,6 @@ int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, const FeatureBitset &Features); -MCSection *getHSATextSection(MCContext &Ctx); - -MCSection *getHSADataGlobalAgentSection(MCContext &Ctx); - -MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); - -MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); @@ -262,7 +255,6 @@ bool isEntryFunctionCC(CallingConv::ID CC); LLVM_READNONE inline bool isKernel(CallingConv::ID CC) { switch (CC) { - case CallingConv::C: case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: return true; @@ -274,6 +266,10 @@ inline bool isKernel(CallingConv::ID CC) { bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); +bool isGFX9(const MCSubtargetInfo &STI); + +/// \brief Is Reg - scalar register +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index 1febc6bf8ec2081bab1c495c2765ebb9138ea52f..95b5ef0a49dba59e7daf6cd5fa325417a9626774 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -30,6 +30,15 @@ class VOP1_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{31-25} = 0x3f; // encoding } +class VOP1_SDWA9Ae op, VOPProfile P> : VOP_SDWA9Ae

{ + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + class VOP1_Pseudo pattern=[], bit VOP1Only = 0> : InstSI , VOP , @@ -84,6 +93,11 @@ class VOP1_SDWA_Pseudo pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP1"; } +class VOP1_SDWA9_Pseudo pattern=[]> : + VOP_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP1"; +} + class getVOP1Pat64 : LetDummies { list ret = !if(P.HasModifiers, @@ -103,6 +117,7 @@ multiclass VOP1Inst ; def _e64 : VOP3_Pseudo .ret>; def _sdwa : VOP1_SDWA_Pseudo ; + def _sdwa9 : VOP1_SDWA9_Pseudo ; } // Special profile for instructions which have clamp @@ -243,6 +258,7 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC64 = VRegSrc_32; let HasExt = 0; + let HasSDWA9 = 0; } // Special case because there are no true output operands. Hack vdst @@ -258,16 +274,21 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, VCSrc_b32:$src0, + let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); + let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; let Asm64 = getAsm64<1, 1, 0, 1>.ret; let AsmDPP = getAsmDPP<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; + let AsmSDWA = getAsmSDWA<1, 1>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; let HasExt = 0; + let HasSDWA9 = 0; let HasDst = 0; let EmitDst = 1; // force vdst emission } @@ -324,7 +345,7 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; @@ -347,7 +368,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { def : Pat< (f32 (f16_to_fp i16:$src)), @@ -523,6 +544,10 @@ multiclass VOP1_Real_vi op> { VOP_SDWA_Real (NAME#"_sdwa")>, VOP1_SDWAe (NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa9")>, + VOP1_SDWA9Ae (NAME#"_sdwa9").Pfl>; + // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP1_DPP(NAME#"_e32")>; diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 2281f338ab45ea571be028682ab0da191acf4be5..657cacaa792ca2dd4fba602a20d507576cc2436d 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -48,6 +48,18 @@ class VOP2_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{31} = 0x0; // encoding } +class VOP2_SDWA9Ae op, VOPProfile P> : VOP_SDWA9Ae

{ + bits<8> vdst; + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + class VOP2_Pseudo pattern=[], string suffix = "_e32"> : InstSI , VOP , @@ -102,6 +114,11 @@ class VOP2_SDWA_Pseudo pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP2"; } +class VOP2_SDWA9_Pseudo pattern=[]> : + VOP_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2"; +} + class getVOP2Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, @@ -121,10 +138,10 @@ multiclass VOP2Inst .ret>, Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa9 : VOP2_SDWA9_Pseudo ; } -// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst multiclass VOP2bInst , Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } + + def _sdwa9 : VOP2_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } } def _e64 : VOP3_Pseudo .ret>, @@ -164,8 +187,11 @@ multiclass VOP2eInst : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm); - field string Asm32 = "$vdst, $src0, $src1, $imm"; field bit HasExt = 0; + + // Hack to stop printing _e64 + let DstRC = RegisterOperand; + field string Asm32 = " $vdst, $src0, $src1, $imm"; } def VOP_MADAK_F16 : VOP_MADAK ; @@ -174,8 +200,11 @@ def VOP_MADAK_F32 : VOP_MADAK ; class VOP_MADMK : VOPProfile <[vt, vt, vt, vt]> { field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); - field string Asm32 = "$vdst, $src0, $imm, $src1"; field bit HasExt = 0; + + // Hack to stop printing _e64 + let DstRC = RegisterOperand; + field string Asm32 = " $vdst, $src0, $imm, $src1"; } def VOP_MADMK_F16 : VOP_MADMK ; @@ -197,13 +226,21 @@ class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { VGPR_32:$src2, // stub argument clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + VGPR_32:$src2, // stub argument + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; - let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret; + let AsmSDWA = getAsmSDWA<1, 2, vt>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret; let HasSrc2 = 0; let HasSrc2Mods = 0; let HasExt = 1; + let HasSDWA9 = 0; } def VOP_MAC_F16 : VOP_MAC { @@ -223,6 +260,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$vdst, vcc, $src0, $src1"; let Asm64 = "$vdst, $sdst, $src0, $src1"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -240,6 +278,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -248,16 +287,23 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { // implicit VCC use. let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); - let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0, - Src1Mod:$src1_modifiers, Src1SDWA:$src1, + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, Src1Mod:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; + let HasSDWA9 = 1; } // Read in from vcc or arbitrary SGPR @@ -298,7 +344,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { let SubtargetPredicate = isGCN in { defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; -def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32>; +def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, [], "">; let isCommutable = 1 in { defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>; @@ -328,7 +374,7 @@ let Constraints = "$vdst = $src2", DisableEncoding="$src2", defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; } -def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32>; +def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, [], "">; // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. @@ -381,9 +427,9 @@ defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; } // End let SubtargetPredicate = SICI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { -def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16>; +def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>; defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>; @@ -394,7 +440,7 @@ defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>; defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>; defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; -def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16>; +def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>; defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>; defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; @@ -412,7 +458,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; } } // End isCommutable = 1 -} // End SubtargetPredicate = isVI +} // End SubtargetPredicate = Has16BitInsts // Note: 16-bit instructions produce a 0 result in the high 16-bits. multiclass Arithmetic_i16_Pats { @@ -462,7 +508,7 @@ class ZExt_i16_i1_Pat : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) >; -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; @@ -507,7 +553,7 @@ def : Pat< (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] //===----------------------------------------------------------------------===// // SI @@ -651,6 +697,17 @@ multiclass VOP2_Real_e64_vi op> { VOP3e_vi (NAME#"_e64").Pfl>; } +multiclass VOP2_Real_e64only_vi op> { + def _e64_vi : + VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, + VOP3e_vi (NAME#"_e64").Pfl> { + // Hack to stop printing _e64 + VOP3_Pseudo ps = !cast(NAME#"_e64"); + let OutOperandList = (outs VGPR_32:$vdst); + let AsmString = ps.Mnemonic # " " # ps.AsmOperands; + } +} + multiclass Base_VOP2be_Real_e32e64_vi op> : VOP2_Real_e32_vi { def _e64_vi : VOP3_Real(NAME#"_e64"), SIEncodingFamily.VI>, @@ -669,15 +726,21 @@ multiclass VOP2_SDWA_Real op> { VOP2_SDWAe (NAME#"_sdwa").Pfl>; } +multiclass VOP2_SDWA9_Real op> { + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa9")>, + VOP2_SDWA9Ae (NAME#"_sdwa9").Pfl>; +} + multiclass VOP2be_Real_e32e64_vi op> : - Base_VOP2be_Real_e32e64_vi, VOP2_SDWA_Real { + Base_VOP2be_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP(NAME#"_e32")>; } multiclass VOP2_Real_e32e64_vi op> : - Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real { + Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP(NAME#"_e32")>; @@ -718,17 +781,17 @@ defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi <0x1e>; defm V_READLANE_B32 : VOP32_Real_vi <0x289>; defm V_WRITELANE_B32 : VOP32_Real_vi <0x28a>; -defm V_BFM_B32 : VOP2_Real_e64_vi <0x293>; -defm V_BCNT_U32_B32 : VOP2_Real_e64_vi <0x28b>; -defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64_vi <0x28c>; -defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64_vi <0x28d>; -defm V_LDEXP_F32 : VOP2_Real_e64_vi <0x288>; -defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>; -defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>; -defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>; -defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64_vi <0x296>; -defm V_CVT_PK_U16_U32 : VOP2_Real_e64_vi <0x297>; -defm V_CVT_PK_I16_I32 : VOP2_Real_e64_vi <0x298>; +defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; +defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; +defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; +defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; +defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; +defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; +defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; +defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; +defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; +defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 217a0748885320c139ff7ad117b5632a8581ade6..a8ca593f14ed0d44517b3c9e46dfd78631f03f12 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -172,8 +172,8 @@ def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile, def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile, AMDGPUbfe_u32>; def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile, AMDGPUbfe_i32>; def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile, AMDGPUbfi>; -def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile>; -def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile>; +def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile, int_amdgcn_alignbit>; +def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile, int_amdgcn_alignbyte>; def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile, AMDGPUfmin3>; def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile, AMDGPUsmin3>; def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile, AMDGPUumin3>; @@ -209,7 +209,10 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, } def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile, int_amdgcn_msad_u8>; + +let Constraints = "@earlyclobber $vdst" in { def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile, int_amdgcn_mqsad_pk_u16_u8>; +} // End Constraints = "@earlyclobber $vdst" def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile, AMDGPUtrig_preop> { let SchedRW = [WriteDouble]; @@ -232,9 +235,10 @@ def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; let SubtargetPredicate = isCIVI in { -def V_MQSAD_U16_U8 : VOP3Inst <"v_mqsad_u16_u8", VOP3_Profile>; +let Constraints = "@earlyclobber $vdst" in { def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile, int_amdgcn_qsad_pk_u16_u8>; def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile, int_amdgcn_mqsad_u32_u8>; +} // End Constraints = "@earlyclobber $vdst" let isCommutable = 1 in { def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; @@ -244,11 +248,12 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { + +def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; let isCommutable = 1 in { -def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile, AMDGPUdiv_fixup>; def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile, fma>; def V_INTERP_P1LL_F16 : VOP3Inst <"v_interp_p1ll_f16", VOP3_Profile>; def V_INTERP_P1LV_F16 : VOP3Inst <"v_interp_p1lv_f16", VOP3_Profile>; @@ -259,12 +264,13 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; } // End isCommutable = 1 +} // End SubtargetPredicate = Has16BitInsts +let SubtargetPredicate = isVI in { def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile>; - } // End SubtargetPredicate = isVI -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { multiclass Ternary_i16_Pats { @@ -289,7 +295,7 @@ def : Pat< defm: Ternary_i16_Pats; defm: Ternary_i16_Pats; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile>; @@ -301,10 +307,19 @@ def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile>; def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile>; def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile>; + def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile, AMDGPUfmed3>; def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile, AMDGPUsmed3>; def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile, AMDGPUumed3>; -} + +def V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile, AMDGPUfmin3>; +def V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile, AMDGPUsmin3>; +def V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile, AMDGPUumin3>; + +def V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile, AMDGPUfmax3>; +def V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile, AMDGPUsmax3>; +def V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile, AMDGPUumax3>; +} // End SubtargetPredicate = isGFX9 //===----------------------------------------------------------------------===// @@ -402,7 +417,6 @@ multiclass VOP3be_Real_ci op> { } } -defm V_MQSAD_U16_U8 : VOP3_Real_ci <0x172>; defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>; defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>; defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>; @@ -426,7 +440,6 @@ multiclass VOP3be_Real_vi op> { } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" -defm V_MQSAD_U16_U8 : VOP3_Real_vi <0x172>; defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>; @@ -512,6 +525,15 @@ defm V_OR3_B32 : VOP3_Real_vi <0x202>; defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>; defm V_XAD_U32 : VOP3_Real_vi <0x1f3>; + +defm V_MIN3_F16 : VOP3_Real_vi <0x1f4>; +defm V_MIN3_I16 : VOP3_Real_vi <0x1f5>; +defm V_MIN3_U16 : VOP3_Real_vi <0x1f6>; + +defm V_MAX3_F16 : VOP3_Real_vi <0x1f7>; +defm V_MAX3_I16 : VOP3_Real_vi <0x1f8>; +defm V_MAX3_U16 : VOP3_Real_vi <0x1f9>; + defm V_MED3_F16 : VOP3_Real_vi <0x1fa>; defm V_MED3_I16 : VOP3_Real_vi <0x1fb>; defm V_MED3_U16 : VOP3_Real_vi <0x1fc>; diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index a3550a63677bac9e6ec842c9077fcff6f0b12855..cd347b86d3050ca195d8df97c6489fb428dc3ab2 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -34,6 +34,17 @@ class VOPC_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{44-43} = SDWA.UNUSED_PRESERVE; } +class VOPC_SDWA9e op, VOPProfile P> : VOP_SDWA9Be

{ + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + + //===----------------------------------------------------------------------===// // VOPC classes //===----------------------------------------------------------------------===// @@ -102,6 +113,11 @@ class VOPC_SDWA_Pseudo pattern=[]> : let AsmMatchConverter = "cvtSdwaVOPC"; } +class VOPC_SDWA9_Pseudo pattern=[]> : + VOP_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOPC"; +} + // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias : InstAlias , PredicateControl { @@ -173,6 +189,13 @@ multiclass VOPC_Pseudos { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = P.Schedule; + let isConvergent = DefExec; + let isCompare = 1; + } } def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; @@ -520,7 +543,11 @@ class VOPC_Class_Profile sched, ValueType vt> : let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + src0_sel:$src0_sel, src1_sel:$src1_sel); let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; + //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; @@ -553,6 +580,12 @@ multiclass VOPC_Class_Pseudos { let SchedRW = p.Schedule; let isConvergent = DefExec; } + + def _sdwa9 : VOPC_SDWA9_Pseudo { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = p.Schedule; + let isConvergent = DefExec; + } } def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>; @@ -920,6 +953,10 @@ multiclass VOPC_Real_vi op> { VOP_SDWA_Real (NAME#"_sdwa")>, VOPC_SDWAe (NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa9")>, + VOPC_SDWA9e (NAME#"_sdwa9").Pfl>; + def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_vi")> { let AssemblerPredicate = isVI; diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index 69906c419db3b9e6773bc57012fe8b5c1422dede..4da654f84f9d15aebd8a3c6ac0894f70933b2d03 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -293,11 +293,52 @@ class VOP_SDWAe : Enc64 { let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); - let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); + let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); +} + +// gfx9 SDWA basic encoding +class VOP_SDWA9e : Enc64 { + bits<9> src0; // {src0_sgpr{0}, src0{7-0}} + bits<3> src0_sel; + bits<2> src0_modifiers; // float: {abs,neg}, int {sext} + bits<3> src1_sel; + bits<2> src1_modifiers; + bits<1> src1_sgpr; + + let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); + let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); + let Inst{55} = !if(P.HasSrc0, src0{8}, 0); + let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); + let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); + let Inst{63} = 0; // src1_sgpr - should be specified in subclass +} + +// gfx9 SDWA-A +class VOP_SDWA9Ae : VOP_SDWA9e

{ + bits<3> dst_sel; + bits<2> dst_unused; + bits<1> clamp; + bits<2> omod; + + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); + let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); + let Inst{47-46} = !if(P.HasSDWAOMod, omod{1-0}, 0); +} + +// gfx9 SDWA-B +class VOP_SDWA9Be : VOP_SDWA9e

{ + bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}} + + let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0); + let Inst{47} = !if(P.EmitDst, sdst{7}, 0); } class VOP_SDWA_Pseudo pattern=[]> : @@ -331,6 +372,50 @@ class VOP_SDWA_Pseudo pattern=[]> : VOPProfile Pfl = P; } +// GFX9 adds two features to SDWA: +// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. +// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather +// than VGPRs (at most 1 can be an SGPR); +// b. OMOD is the standard output modifier (result *2, *4, /2) +// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This +// replaces OMOD and the dest fields with SD and SDST (SGPR destination) +// field. +// a. When SD=1, the SDST is used as the destination for the compare result; +// b.when SD=0, VCC is used. +// +// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA + +class VOP_SDWA9_Pseudo pattern=[]> : + InstSI , + VOP , + SIMCInstr , + MnemonicAlias { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.AsmSDWA9; + + let Size = 8; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + + let VALU = 1; + let SDWA = 1; + let Uses = [EXEC]; + + let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); + let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); + let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA9"; + + VOPProfile Pfl = P; +} + class VOP_SDWA_Real : InstSI , SIMCInstr { @@ -358,6 +443,33 @@ class VOP_SDWA_Real : let TSFlags = ps.TSFlags; } +class VOP_SDWA9_Real : + InstSI , + SIMCInstr { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + + // Copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AssemblerPredicate = ps.AssemblerPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let DecoderNamespace = ps.DecoderNamespace; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; +} + class VOP_DPPe : Enc64 { bits<2> src0_modifiers; bits<8> src0; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 39f7988200ea8904e09e531dd5c46ae62e8e3ba3..4676226acd9c8404eba866a0728beb56ec37d7d9 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -23,9 +23,12 @@ namespace llvm { class ARMAsmPrinter; class ARMBaseTargetMachine; +class ARMRegisterBankInfo; +class ARMSubtarget; struct BasicBlockInfo; class Function; class FunctionPass; +class InstructionSelector; class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -43,6 +46,9 @@ FunctionPass *createThumb2ITBlockPass(); FunctionPass *createARMOptimizeBarriersPass(); FunctionPass *createThumb2SizeReductionPass( std::function Ftor = nullptr); +InstructionSelector * +createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 57f9d1c6b6109e297c71f757e1d2504084c09d56..ca68f5d42c32c00b3abe7d56d8819e9e321e4c5a 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -67,8 +67,9 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", [FeatureFPARMv8]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict FP to 16 double registers">; -def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", - "Enable divide instructions">; +def FeatureHWDivThumb : SubtargetFeature<"hwdiv", "HasHardwareDivideInThumb", + "true", + "Enable divide instructions in Thumb">; def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm", "HasHardwareDivideInARM", "true", "Enable divide instructions in ARM mode">; @@ -204,6 +205,13 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", "AvoidCPSRPartialUpdate", "true", "Avoid CPSR partial update for OOO execution">; +/// Disable +1 predication cost for instructions updating CPSR. +/// Enabled for Cortex-A57. +def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr", + "CheapPredicableCPSRDef", + "true", + "Disable +1 predication cost for instructions updating CPSR">; + def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop", "AvoidMOVsShifterOperand", "true", "Avoid movs instructions with shifter operand">; @@ -225,7 +233,7 @@ def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", def FeatureVirtualization : SubtargetFeature<"virtualization", "HasVirtualization", "true", "Supports Virtualization extension", - [FeatureHWDiv, FeatureHWDivARM]>; + [FeatureHWDivThumb, FeatureHWDivARM]>; // M-series ISA def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", @@ -433,21 +441,21 @@ def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops, def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, FeatureDB, FeatureDSP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureRClass]>; def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass]>; def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureMClass, FeatureDSP]>; @@ -502,7 +510,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", [HasV8MBaselineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, FeatureV7Clrex, Feature8MSecExt, FeatureAcquireRelease, @@ -512,7 +520,7 @@ def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", [HasV8MMainlineOps, FeatureNoARM, FeatureDB, - FeatureHWDiv, + FeatureHWDivThumb, Feature8MSecExt, FeatureAcquireRelease, FeatureMClass]>; @@ -576,6 +584,7 @@ def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>; def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>; def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>; +def : Processor<"arm1176j-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>; def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz, FeatureVFP2, @@ -678,7 +687,7 @@ def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait, FeatureFP16, FeatureAvoidPartialCPSR, FeatureVFP4, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM]>; def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, @@ -686,7 +695,7 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -768,39 +777,41 @@ def : ProcNoItin<"cortex-m33", [ARMv8mMainline, FeatureVFPOnlySP]>; def : ProcNoItin<"cortex-a32", [ARMv8a, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC, FeatureFPAO]>; -def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57, - FeatureHWDiv, - FeatureHWDivARM, - FeatureCrypto, - FeatureCRC, - FeatureFPAO]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureCrypto, + FeatureCRC, + FeatureFPAO, + FeatureAvoidPartialCPSR, + FeatureCheapPredicableCPSR]>; def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; @@ -811,7 +822,7 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureNEONForFP, FeatureVFP4, FeatureMP, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureAvoidPartialCPSR, FeatureAvoidMOVsShOp, @@ -820,25 +831,25 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureZCZeroing]>; def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m2", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; def : ProcNoItin<"kryo", [ARMv8a, ProcKryo, - FeatureHWDiv, + FeatureHWDivThumb, FeatureHWDivARM, FeatureCrypto, FeatureCRC]>; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index eb0d410b596be7926fffef317d7b2617aed5f2ae..f9da036c7e468348b7c4f5152d07707d59f10208 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -23,6 +23,8 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" @@ -43,9 +45,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" @@ -589,12 +589,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { ATS.finishAttributeSection(); } -static bool isV8M(const ARMSubtarget *Subtarget) { - // Note that v8M Baseline is a subset of v6T2! - return (Subtarget->hasV8MBaselineOps() && !Subtarget->hasV6T2Ops()) || - Subtarget->hasV8MMainlineOps(); -} - //===----------------------------------------------------------------------===// // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile() // FIXME: @@ -602,39 +596,6 @@ static bool isV8M(const ARMSubtarget *Subtarget) { // to appear in the .ARM.attributes section in ELF. // Instead of subclassing the MCELFStreamer, we do the work here. -static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, - const ARMSubtarget *Subtarget) { - if (CPU == "xscale") - return ARMBuildAttrs::v5TEJ; - - if (Subtarget->hasV8Ops()) { - if (Subtarget->isRClass()) - return ARMBuildAttrs::v8_R; - return ARMBuildAttrs::v8_A; - } else if (Subtarget->hasV8MMainlineOps()) - return ARMBuildAttrs::v8_M_Main; - else if (Subtarget->hasV7Ops()) { - if (Subtarget->isMClass() && Subtarget->hasDSP()) - return ARMBuildAttrs::v7E_M; - return ARMBuildAttrs::v7; - } else if (Subtarget->hasV6T2Ops()) - return ARMBuildAttrs::v6T2; - else if (Subtarget->hasV8MBaselineOps()) - return ARMBuildAttrs::v8_M_Base; - else if (Subtarget->hasV6MOps()) - return ARMBuildAttrs::v6S_M; - else if (Subtarget->hasV6Ops()) - return ARMBuildAttrs::v6; - else if (Subtarget->hasV5TEOps()) - return ARMBuildAttrs::v5TE; - else if (Subtarget->hasV5TOps()) - return ARMBuildAttrs::v5T; - else if (Subtarget->hasV4TOps()) - return ARMBuildAttrs::v4T; - else - return ARMBuildAttrs::v4; -} - // Returns true if all functions have the same function attribute value. // It also returns true when the module has no functions. static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr, @@ -671,89 +632,8 @@ void ARMAsmPrinter::emitAttributes() { static_cast(TM); const ARMSubtarget STI(TT, CPU, ArchFS, ATM, ATM.isLittleEndian()); - const std::string &CPUString = STI.getCPUString(); - - if (!StringRef(CPUString).startswith("generic")) { - // FIXME: remove krait check when GNU tools support krait cpu - if (STI.isKrait()) { - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); - // We consider krait as a "cortex-a9" + hwdiv CPU - // Enable hwdiv through ".arch_extension idiv" - if (STI.hasDivide() || STI.hasDivideInARMMode()) - ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM); - } else - ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); - } - - ATS.emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(CPUString, &STI)); - - // Tag_CPU_arch_profile must have the default value of 0 when "Architecture - // profile is not applicable (e.g. pre v7, or cross-profile code)". - if (STI.hasV7Ops() || isV8M(&STI)) { - if (STI.isAClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - } else if (STI.isRClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::RealTimeProfile); - } else if (STI.isMClass()) { - ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::MicroControllerProfile); - } - } - - ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, - STI.hasARMOps() ? ARMBuildAttrs::Allowed - : ARMBuildAttrs::Not_Allowed); - if (isV8M(&STI)) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumbDerived); - } else if (STI.isThumb1Only()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); - } else if (STI.hasThumb2()) { - ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - } - - if (STI.hasNEON()) { - /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (STI.hasFPARMv8()) { - if (STI.hasCrypto()) - ATS.emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); - else - ATS.emitFPU(ARM::FK_NEON_FP_ARMV8); - } else if (STI.hasVFP4()) - ATS.emitFPU(ARM::FK_NEON_VFPV4); - else - ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON); - // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture - if (STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a: - ARMBuildAttrs::AllowNeonARMv8); - } else { - if (STI.hasFPARMv8()) - // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one - // FPU, but there are two different names for it depending on the CPU. - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16) - : ARM::FK_FP_ARMV8); - else if (STI.hasVFP4()) - ATS.emitFPU(STI.hasD16() - ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) - : ARM::FK_VFPV4); - else if (STI.hasVFP3()) - ATS.emitFPU(STI.hasD16() - // +d16 - ? (STI.isFPOnlySP() - ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) - : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) - // -d16 - : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); - else if (STI.hasVFP2()) - ATS.emitFPU(ARM::FK_VFPV2); - } + // Emit build attributes for the available hardware. + ATS.emitTargetAttributes(STI); // RW data addressing. if (isPositionIndependent()) { @@ -846,32 +726,15 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::AllowIEEE754); - if (STI.allowsUnalignedMem()) - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Allowed); - else - ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, - ARMBuildAttrs::Not_Allowed); - // FIXME: add more flags to ARMBuildAttributes.h // 8-bytes alignment stuff. ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1); ATS.emitAttribute(ARMBuildAttrs::ABI_align_preserved, 1); - // ABI_HardFP_use attribute to indicate single precision FP. - if (STI.isFPOnlySP()) - ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use, - ARMBuildAttrs::HardFPSinglePrecision); - // Hard float. Use both S and D registers and conform to AAPCS-VFP. if (STI.isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); - // FIXME: Should we signal R9 usage? - - if (STI.hasFP16()) - ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); - // FIXME: To support emitting this build attribute as GCC does, the // -mfp16-format option and associated plumbing must be // supported. For now the __fp16 type is exposed by default, so this @@ -879,21 +742,6 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, ARMBuildAttrs::FP16FormatIEEE); - if (STI.hasMPExtension()) - ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); - - // Hardware divide in ARM mode is part of base arch, starting from ARMv8. - // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). - // It is not possible to produce DisallowDIV: if hwdiv is present in the base - // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. - // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; - // otherwise, the default value (AllowDIVIfExists) applies. - if (STI.hasDivideInARMMode() && !STI.hasV8Ops()) - ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); - - if (STI.hasDSP() && isV8M(&STI)) - ATS.emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); - if (MMI) { if (const Module *SourceModule = MMI->getModule()) { // ABI_PCS_wchar_t to indicate wchar_t width @@ -930,16 +778,6 @@ void ARMAsmPrinter::emitAttributes() { else ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR); - - if (STI.hasTrustZone() && STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZVirtualization); - else if (STI.hasTrustZone()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowTZ); - else if (STI.hasVirtualization()) - ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, - ARMBuildAttrs::AllowVirtualization); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 4f5711ca9a796bf6a18af072b97175f8f31762f4..8715657ad5e2531218e2b7e489e96770ece2fd8b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -21,9 +21,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -558,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate( return Found; } -static bool isCPSRDefined(const MachineInstr *MI) { - for (const auto &MO : MI->operands()) +bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) { + for (const auto &MO : MI.operands()) if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead()) return true; return false; } +bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + return Offset.getReg() != 0; +} + +// Load with negative register offset requires additional 1cyc and +I unit +// for Cortex A57 +bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Offset = MI.getOperand(Op + 1); + const MachineOperand &Opc = MI.getOperand(Op + 2); + assert(Opc.isImm()); + assert(Offset.isReg()); + int64_t OpcImm = Opc.getImm(); + + bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub; + return (isSub && Offset.getReg() != 0); +} + +bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + +// Load, scaled register offset, not plus LSL2 +bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, + unsigned Op) const { + const MachineOperand &Opc = MI.getOperand(Op + 2); + unsigned OffImm = Opc.getImm(); + + bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm); + if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled + bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2); + return !SimpleScaled; +} + +// Minus reg for ldstso addr mode +bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; +} + +// Load, scaled register offset +bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI, + unsigned Op) const { + unsigned OffImm = MI.getOperand(Op + 2).getImm(); + return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift; +} + static bool isEligibleForITBlock(const MachineInstr *MI) { switch (MI->getOpcode()) { default: return true; @@ -590,7 +645,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) { case ARM::tSUBi3: // SUB (immediate) T1 case ARM::tSUBi8: // SUB (immediate) T2 case ARM::tSUBrr: // SUB (register) T1 - return !isCPSRDefined(MI); + return !ARMBaseInstrInfo::isCPSRDefined(*MI); } } @@ -905,7 +960,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::STRi12)) @@ -1103,7 +1158,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 4: if (ARM::GPRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) @@ -3349,6 +3404,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, return DefCycle; } +bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const { + unsigned BaseReg = MI.getOperand(0).getReg(); + for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) { + const auto &Op = MI.getOperand(i); + if (Op.isReg() && Op.getReg() == BaseReg) + return true; + } + return false; +} +unsigned +ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const { + // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops + // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops) + return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands(); +} + int ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID, @@ -4119,7 +4190,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const { const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef())) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. return 1; @@ -4148,7 +4220,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, } const MCInstrDesc &MCID = MI.getDesc(); - if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) { + if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) && + !Subtarget.cheapPredicableCPSRDef()))) { // When predicated, CPSR is an additional source operand for CPSR updating // instructions, this apparently increases their latencies. *PredCost = 1; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 23777b821f9f360703bb452e646e85cd583a0287..c52e572786d4823d3e22e157f9ba1c5a5d3bd9ec 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -105,10 +105,6 @@ public: // Return whether the target has an explicit NOP encoding. bool hasNOP() const; - virtual void getNoopForElfTarget(MCInst &NopInst) const { - getNoopForMachoTarget(NopInst); - } - // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0; @@ -163,6 +159,24 @@ public: bool isPredicable(const MachineInstr &MI) const override; + // CPSR defined in instruction + static bool isCPSRDefined(const MachineInstr &MI); + bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const; + bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const; + + // Load, scaled register offset + bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load, scaled register offset, not plus LSL2 + bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const; + // Minus reg for ldstso addr mode + bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const; + // Scaled register offset in address mode 2 + bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const; + // Load multiple, base reg in list + bool isLDMBaseRegInList(const MachineInstr &MI) const; + // get LDM variable defs size + unsigned getLDMVariableDefsSize(const MachineInstr &MI) const; + /// GetInstSize - Returns the size of the specified MachineInstr. /// unsigned getInstSizeInBytes(const MachineInstr &MI) const override; @@ -404,6 +418,19 @@ public: /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. bool isSwiftFastImmShift(const MachineInstr *MI) const; + + /// Returns predicate register associated with the given frame instruction. + unsigned getFramePred(const MachineInstr &MI) const { + assert(isFrameInstr(MI)); + // Operands of ADJCALLSTACKDOWN/ADJCALLSTACKUP: + // - argument declared in the pattern: + // 0 - frame size + // 1 - arg of CALLSEQ_START/CALLSEQ_END + // 2 - predicate code (like ARMCC::AL) + // - added by predOps: + // 3 - predicate reg + return MI.getOperand(3).getReg(); + } }; /// Get the operands corresponding to the given \p Pred value. By default, the diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 70a44eaaceb8ed31764aa7d70dea55c3ce8d5993..b4fb292c0116ded762358225e626efde69eb3ec5 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -245,11 +245,18 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; - case ARM::tGPRRegClassID: - return TFI->hasFP(MF) ? 4 : 5; + case ARM::tGPRRegClassID: { + // hasFP ends up calling getMaxCallFrameComputed() which may not be + // available when getPressureLimit() is called as part of + // ScheduleDAGRRList. + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 5 - HasFP; + } case ARM::GPRRegClassID: { - unsigned FP = TFI->hasFP(MF) ? 1 : 0; - return 10 - FP - (STI.isR9Reserved() ? 1 : 0); + bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() + ? TFI->hasFP(MF) : true; + return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. case ARM::DPRRegClassID: @@ -806,7 +813,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, if (!DstSubReg) return true; // Small registers don't frequently cause a problem, so we can coalesce them. - if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32) + if (getRegSizeInBits(*NewRC) < 256 && getRegSizeInBits(*DstRC) < 256 && + getRegSizeInBits(*SrcRC) < 256) return true; auto NewRCWeight = diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 94b317a8f9863ea419714011e0642b96f722f436..e498f70b820dbcbd5b403a4ef8fb2039772dbf72 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -34,8 +35,22 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { + if (T->isArrayTy()) + return true; + + if (T->isStructTy()) { + // For now we only allow homogeneous structs that we can manipulate with + // G_MERGE_VALUES and G_UNMERGE_VALUES + auto StructT = cast(T); + for (unsigned i = 1, e = StructT->getNumElements(); i != e; ++i) + if (StructT->getElementType(i) != StructT->getElementType(0)) + return false; + return true; + } + EVT VT = TLI.getValueType(DL, T, true); - if (!VT.isSimple() || VT.isVector()) + if (!VT.isSimple() || VT.isVector() || + !(VT.isInteger() || VT.isFloatingPoint())) return false; unsigned VTSize = VT.getSimpleVT().getSizeInBits(); @@ -118,8 +133,7 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), MRI.createGenericVirtualRegister(LLT::scalar(32))}; - MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0); - MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32); + MIRBuilder.buildUnmerge(NewRegs, Arg.Reg); bool IsLittle = MIRBuilder.getMF().getSubtarget().isLittle(); if (!IsLittle) @@ -147,23 +161,55 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { }; } // End anonymous namespace. -void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, - SmallVectorImpl &SplitArgs, - const DataLayout &DL, - MachineRegisterInfo &MRI) const { +void ARMCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, + MachineFunction &MF, const SplitArgTy &PerformArgSplit) const { const ARMTargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); + const DataLayout &DL = MF.getDataLayout(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const Function *F = MF.getFunction(); SmallVector SplitVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - assert(SplitVTs.size() == 1 && "Unsupported type"); + if (SplitVTs.size() == 1) { + // Even if there is no splitting to do, we still want to replace the + // original type (e.g. pointer type -> integer). + auto Flags = OrigArg.Flags; + unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); + Flags.setOrigAlign(OriginalAlignment); + SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags, + OrigArg.IsFixed); + return; + } + + unsigned FirstRegIdx = SplitArgs.size(); + for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { + EVT SplitVT = SplitVTs[i]; + Type *SplitTy = SplitVT.getTypeForEVT(Ctx); + auto Flags = OrigArg.Flags; + + unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); + Flags.setOrigAlign(OriginalAlignment); + + bool NeedsConsecutiveRegisters = + TLI.functionArgumentNeedsConsecutiveRegisters( + SplitTy, F->getCallingConv(), F->isVarArg()); + if (NeedsConsecutiveRegisters) { + Flags.setInConsecutiveRegs(); + if (i == e - 1) + Flags.setInConsecutiveRegsLast(); + } + + SplitArgs.push_back( + ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)), + SplitTy, Flags, OrigArg.IsFixed}); + } - // Even if there is no splitting to do, we still want to replace the original - // type (e.g. pointer type -> integer). - SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); + for (unsigned i = 0; i < Offsets.size(); ++i) + PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8); } /// Lower the return value for the already existing \p Ret. This assumes that @@ -184,9 +230,15 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, return false; SmallVector SplitVTs; + SmallVector Regs; ArgInfo RetInfo(VReg, Val->getType()); setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(RetInfo, SplitVTs, DL, MF.getRegInfo()); + splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { + Regs.push_back(Reg); + }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, VReg); CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); @@ -244,12 +296,21 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { // that's what we should load. Size = 4; assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm"); - MRI.setType(ValVReg, LLT::scalar(32)); + + auto LoadVReg = MRI.createGenericVirtualRegister(LLT::scalar(32)); + buildLoad(LoadVReg, Addr, Size, /* Alignment */ 0, MPO); + MIRBuilder.buildTrunc(ValVReg, LoadVReg); + } else { + // If the value is not extended, a simple load will suffice. + buildLoad(ValVReg, Addr, Size, /* Alignment */ 0, MPO); } + } + void buildLoad(unsigned Val, unsigned Addr, uint64_t Size, unsigned Alignment, + MachinePointerInfo &MPO) { auto MMO = MIRBuilder.getMF().getMachineMemOperand( - MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0); - MIRBuilder.buildLoad(ValVReg, Addr, *MMO); + MPO, MachineMemOperand::MOLoad, Size, Alignment); + MIRBuilder.buildLoad(Val, Addr, *MMO); } void assignValueToReg(unsigned ValVReg, unsigned PhysReg, @@ -292,7 +353,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { if (!IsLittle) std::swap(NewRegs[0], NewRegs[1]); - MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32}); + MIRBuilder.buildMerge(Arg.Reg, NewRegs); return 1; } @@ -325,6 +386,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return false; auto &MF = MIRBuilder.getMF(); + auto &MBB = MIRBuilder.getMBB(); auto DL = MF.getDataLayout(); auto &TLI = *getTLI(); @@ -340,17 +402,31 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); + FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), + AssignFn); + SmallVector ArgInfos; + SmallVector SplitRegs; unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); - setArgFlags(AInfo, Idx + 1, DL, F); - splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo()); + setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); + + SplitRegs.clear(); + + splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + SplitRegs.push_back(Reg); + }); + + if (!SplitRegs.empty()) + MIRBuilder.buildMerge(VRegs[Idx], SplitRegs); + Idx++; } - FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), - AssignFn); + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); + return handleAssignments(MIRBuilder, ArgInfos, ArgHandler); } @@ -376,7 +452,8 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI(); const auto &DL = MF.getDataLayout(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const auto &STI = MF.getSubtarget(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); if (MF.getSubtarget().genLongCalls()) @@ -388,6 +465,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // registers, but don't insert it yet. auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( TRI->getCallPreservedMask(MF, CallConv)); + if (Callee.isReg()) { + auto CalleeReg = Callee.getReg(); + if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *STI.getInstrInfo(), *STI.getRegBankInfo(), + *MIB.getInstr(), MIB->getDesc(), CalleeReg, 0)); + } SmallVector ArgInfos; for (auto Arg : OrigArgs) { @@ -397,7 +481,13 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!Arg.IsFixed) return false; - splitToValueTypes(Arg, ArgInfos, DL, MRI); + SmallVector Regs; + splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + Regs.push_back(Reg); + }); + + if (Regs.size() > 1) + MIRBuilder.buildUnmerge(Regs, Arg.Reg); } auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); @@ -413,17 +503,27 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; ArgInfos.clear(); - splitToValueTypes(OrigRet, ArgInfos, DL, MRI); + SmallVector SplitRegs; + splitToValueTypes(OrigRet, ArgInfos, MF, + [&](unsigned Reg, uint64_t Offset) { + SplitRegs.push_back(Reg); + }); auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false); CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; + + if (!SplitRegs.empty()) { + // We have split the value and allocated each individual piece, now build + // it up again. + MIRBuilder.buildMerge(OrigRet.Reg, SplitRegs); + } } // We now know the size of the stack - update the ADJCALLSTACKDOWN // accordingly. - CallSeqStart.addImm(ArgHandler.StackSize).add(predOps(ARMCC::AL)); + CallSeqStart.addImm(ArgHandler.StackSize).addImm(0).add(predOps(ARMCC::AL)); MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP) .addImm(ArgHandler.StackSize) diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h index 6404c7a2689eedc60ab8ca6de8f67428827f6573..f5a6872336f60f72f840170653784e4f61dba838 100644 --- a/lib/Target/ARM/ARMCallLowering.h +++ b/lib/Target/ARM/ARMCallLowering.h @@ -42,11 +42,14 @@ private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg, MachineInstrBuilder &Ret) const; + typedef std::function SplitArgTy; + /// Split an argument into one or more arguments that the CC lowering can cope /// with (e.g. replace pointers with integers). void splitToValueTypes(const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI) const; + MachineFunction &MF, + const SplitArgTy &PerformArgSplit) const; }; } // End of namespace llvm #endif diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 7a7b7fede7c832fbe1c00ea17ce07378526d6fb9..bc7afdb7f1c9f9e0c11c50b2bb076c85a456694c 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -273,9 +273,9 @@ def CSR_iOS_SwiftError : CalleeSavedRegs<(sub CSR_iOS, R8)>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; -def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP, - (sequence "R%u", 12, 1), - (sequence "D%u", 31, 0))>; +def CSR_iOS_TLSCall + : CalleeSavedRegs<(add LR, SP, (sub(sequence "R%u", 12, 1), R9, R12), + (sequence "D%u", 31, 0))>; // C++ TLS access function saves all registers except SP. Try to match // the order of CSRs in CSR_iOS. diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 23722f1b7f3ff50b0abb398e33aff04a26760aa0..667337dc9267f71b5b661d08eb25a0687bbb946b 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -21,10 +21,10 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -1741,10 +1741,9 @@ bool ARMConstantIslands::undoLRSpillRestore() { .add(MI->getOperand(1)); MI->eraseFromParent(); MadeChange = true; - } - if (MI->getOpcode() == ARM::tPUSH && - MI->getOperand(2).getReg() == ARM::LR && - MI->getNumExplicitOperands() == 3) { + } else if (MI->getOpcode() == ARM::tPUSH && + MI->getOperand(2).getReg() == ARM::LR && + MI->getNumExplicitOperands() == 3) { // Just remove the push. MI->eraseFromParent(); MadeChange = true; @@ -2158,6 +2157,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // If we're in PIC mode, there should be another ADD following. auto *TRI = STI->getRegisterInfo(); + + // %base cannot be redefined after the load as it will appear before + // TBB/TBH like: + // %base = + // %base = + // tBB %base, %idx + if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI)) + continue; + if (isPositionIndependentOrROPI) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index e0aecff2633b4fddf1111e1f8fea508a7e22ab8b..ec49f0d37af44107cb16186a92905cc87f9eef44 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -661,7 +661,6 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { return false; case MachineOperand::MO_IntrinsicID: case MachineOperand::MO_Predicate: - case MachineOperand::MO_Placeholder: llvm_unreachable("should not exist post-isel"); } llvm_unreachable("unhandled machine operand type"); @@ -758,14 +757,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MI.eraseFromParent(); } -static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MBB->addLiveIn(*I); -} - /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as -/// possible. This only gets used at -O0 so we don't care about efficiency of the -/// generated code. +/// possible. This only gets used at -O0 so we don't care about efficiency of +/// the generated code. bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdrexOp, unsigned StrexOp, @@ -774,16 +768,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, bool IsThumb = STI->isThumb(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - MachineOperand &Dest = MI.getOperand(0); + const MachineOperand &Dest = MI.getOperand(0); unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + bool StatusDead = MI.getOperand(1).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + unsigned NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -796,25 +789,35 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, if (UxtOp) { MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg()) - .addReg(Desired.getReg(), RegState::Kill); + BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) + .addReg(DesiredReg, RegState::Kill); if (!IsThumb) MIB.addImm(0); MIB.add(predOps(ARMCC::AL)); } // .Lloadcmp: + // mov wStatus, #0 // ldrex rDest, [rAddr] // cmp rDest, rDesired // bne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); + if (!StatusDead) { + if (IsThumb) { + BuildMI(LoadCmpBB, DL, TII->get(ARM::tMOVi8), StatusReg) + .addDef(ARM::CPSR, RegState::Dead) + .addImm(0) + .add(predOps(ARMCC::AL)); + } else { + BuildMI(LoadCmpBB, DL, TII->get(ARM::MOVi), StatusReg) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + } + } MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); - MIB.addReg(Addr.getReg()); + MIB.addReg(AddrReg); if (LdrexOp == ARM::t2LDREX) MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. MIB.add(predOps(ARMCC::AL)); @@ -822,7 +825,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) - .add(Desired) + .addReg(DesiredReg) .add(predOps(ARMCC::AL)); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; BuildMI(LoadCmpBB, DL, TII->get(Bcc)) @@ -836,21 +839,16 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, // strex rStatus, rNew, [rAddr] // cmp rStatus, #0 // bne .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - - - MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg); - MIB.add(New); - MIB.add(Addr); + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg) + .addReg(NewReg) + .addReg(AddrReg); if (StrexOp == ARM::t2STREX) MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. MIB.add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) @@ -862,12 +860,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -895,19 +905,19 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, DebugLoc DL = MI.getDebugLoc(); MachineOperand &Dest = MI.getOperand(0); unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); + bool StatusDead = MI.getOperand(1).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + MachineOperand New = MI.getOperand(4); + New.setIsKill(false); unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); - unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0); - unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + unsigned DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); + unsigned DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -923,26 +933,21 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // cmp rDestLo, rDesiredLo // sbcs rStatus, rDestHi, rDesiredHi // bne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); - MIB.addReg(Addr.getReg()).add(predOps(ARMCC::AL)); + MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestLo, getKillRegState(Dest.isDead())) - .addReg(DesiredLo, getKillRegState(Desired.isDead())) + .addReg(DesiredLo) .add(predOps(ARMCC::AL)); BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) .addReg(DestHi, getKillRegState(Dest.isDead())) - .addReg(DesiredHi, getKillRegState(Desired.isDead())) + .addReg(DesiredHi) .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; @@ -957,18 +962,14 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // strexd rStatus, rNewLo, rNewHi, [rAddr] // cmp rStatus, #0 // bne .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); - MIB.add(Addr).add(predOps(ARMCC::AL)); + MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; BuildMI(StoreBB, DL, TII->get(CMPri)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addImm(0) .add(predOps(ARMCC::AL)); BuildMI(StoreBB, DL, TII->get(Bcc)) @@ -980,12 +981,24 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6638edfa05c3b523baf47acfb81a931372899c8f..384f80356cc841502a34ce1d641a5ac373ddba1c 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -26,8 +26,8 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -1702,7 +1702,8 @@ bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) { // If we have integer div support we should have selected this automagically. // In case we have a real miss go ahead and return false and we'll pick // it up later. - if (Subtarget->hasDivide()) return false; + if (Subtarget->hasDivideInThumbMode()) + return false; // Otherwise emit a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1948,7 +1949,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes)); + .addImm(NumBytes).addImm(0)); // Process the args. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -2351,19 +2352,19 @@ bool ARMFastISel::SelectCall(const Instruction *I, break; ISD::ArgFlagsTy Flags; - unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + unsigned ArgIdx = i - CS.arg_begin(); + if (CS.paramHasAttr(ArgIdx, Attribute::SExt)) Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + if (CS.paramHasAttr(ArgIdx, Attribute::ZExt)) Flags.setZExt(); // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || - CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) || - CS.paramHasAttr(AttrInd, Attribute::SwiftError) || - CS.paramHasAttr(AttrInd, Attribute::Nest) || - CS.paramHasAttr(AttrInd, Attribute::ByVal)) + if (CS.paramHasAttr(ArgIdx, Attribute::InReg) || + CS.paramHasAttr(ArgIdx, Attribute::StructRet) || + CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) || + CS.paramHasAttr(ArgIdx, Attribute::SwiftError) || + CS.paramHasAttr(ArgIdx, Attribute::Nest) || + CS.paramHasAttr(ArgIdx, Attribute::ByVal)) return false; Type *ArgTy = (*i)->getType(); @@ -3024,20 +3025,18 @@ bool ARMFastISel::fastLowerArguments() { // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments // which are passed in r0 - r3. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - if (Idx > 4) + for (const Argument &Arg : F->args()) { + if (Arg.getArgNo() >= 4) return false; - if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::ByVal)) return false; - Type *ArgTy = I->getType(); + Type *ArgTy = Arg.getType(); if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; @@ -3058,10 +3057,10 @@ bool ARMFastISel::fastLowerArguments() { }; const TargetRegisterClass *RC = &ARM::rGPRRegClass; - Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - unsigned SrcReg = GPRArgRegs[Idx]; + I != E; ++I) { + unsigned ArgNo = I->getArgNo(); + unsigned SrcReg = GPRArgRegs[ArgNo]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 37be22bed54087b05a36a3c47e470c3b4f6d6e97..16b54e8848c23328b9555a4e582aa8921608fbf1 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -20,9 +20,9 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -322,6 +322,18 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, } } +/// We need the offset of the frame pointer relative to other MachineFrameInfo +/// offsets which are encoded relative to SP at function begin. +/// See also emitPrologue() for how the FP is set up. +/// Unfortunately we cannot determine this value in determineCalleeSaves() yet +/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use +/// this to produce a conservative estimate that we check in an assert() later. +static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) { + // This is a conservative estimation: Assume the frame pointer being r7 and + // pc("r15") up to r8 getting spilled before (= 8 registers). + return -AFI.getArgRegsSaveSize() - (8 * 4); +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -432,8 +444,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = - MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; + int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); + assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset && + "Max FP estimation is wrong"); + FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -954,8 +968,9 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - bool isLiveIn = MF.getRegInfo().isLiveIn(Reg); - if (!isLiveIn) + const MachineRegisterInfo &MRI = MF.getRegInfo(); + bool isLiveIn = MRI.isLiveIn(Reg); + if (!isLiveIn && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); // If NoGap is true, push consecutive registers and then leave the rest // for other instructions. e.g. @@ -1700,6 +1715,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // worth the effort and added fragility? unsigned EstimatedStackSize = MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills); + + // Determine biggest (positive) SP offset in MachineFrameInfo. + int MaxFixedOffset = 0; + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I); + MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset); + } + bool HasFP = hasFP(MF); if (HasFP) { if (AFI->hasStackFrame()) @@ -1707,15 +1730,20 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } else { // If FP is not used, SP will be used to access arguments, so count the // size of arguments into the estimation. - EstimatedStackSize += AFI->getArgumentStackSize(); + EstimatedStackSize += MaxFixedOffset; } EstimatedStackSize += 16; // For possible paddings. - bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) || - MFI.hasVarSizedObjects() || - (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)); + unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); + int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI); + bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit || + MFI.hasVarSizedObjects() || + (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) || + // For large argument stacks fp relative addressed may overflow. + (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit); bool ExtraCSSpill = false; - if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { + if (BigFrameOffsets || + !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); if (HasFP) { @@ -1899,7 +1927,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // callee-saved register or reserve a special spill slot to facilitate // register scavenging. Thumb1 needs a spill slot for stack pointer // adjustments also, even when the frame itself is small. - if (BigStack && !ExtraCSSpill) { + if (BigFrameOffsets && !ExtraCSSpill) { // If any non-reserved CS register isn't spilled, just spill one or two // extra. That should take care of it! unsigned NumExtras = TargetAlign / 4; @@ -1933,10 +1961,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. assert(RS && "Register scavenging not provided"); - const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + const TargetRegisterClass &RC = ARM::GPRRegClass; + unsigned Size = TRI->getSpillSize(RC); + unsigned Align = TRI->getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } } @@ -1958,7 +1986,7 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -1976,14 +2004,11 @@ MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr( ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)Old.getOperand(PIdx).getImm(); + unsigned PredReg = TII.getFramePred(Old); if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old.getOperand(2).getReg(); emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, Pred, PredReg); } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old.getOperand(3).getReg(); assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, Pred, PredReg); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index b07b4e1f5cfbdc43df0c4d4ab4c1892ca4f0555f..7f9fe55a5c38b54c26d7cf984e6bf2115a1c1887 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -228,11 +228,6 @@ private: const uint16_t *DOpcodes, const uint16_t *QOpcodes = nullptr); - /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, - /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be - /// generated to force the table registers to be consecutive. - void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); - /// Try to select SBFX/UBFX instructions for ARM. bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -544,11 +539,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, SDValue NewMulConst; if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { HandleSDNode Handle(N); + SDLoc Loc(N); replaceDAGValue(N.getOperand(1), NewMulConst); BaseReg = Handle.getValue(); - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, - PowerOfTwo), - SDLoc(N), MVT::i32); + Opc = CurDAG->getTargetConstant( + ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); return true; } } @@ -745,7 +740,9 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { + HandleSDNode Handle(Offset); replaceDAGValue(Offset.getOperand(1), NewMulConst); + Offset = Handle.getValue(); ShAmt = PowerOfTwo; ShOpcVal = ARM_AM::lsl; } @@ -1425,7 +1422,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { + HandleSDNode Handle(OffReg); replaceDAGValue(OffReg.getOperand(1), NewMulConst); + OffReg = Handle.getValue(); ShAmt = PowerOfTwo; } } @@ -1859,6 +1858,14 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { return Opc; // If not one we handle, return it unchanged. } +/// Returns true if the given increment is a Constant known to be equal to the +/// access size performed by a NEON load/store. This means the "[rN]!" form can +/// be used. +static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { + auto C = dyn_cast(Inc); + return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; +} + void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *DOpcodes, const uint16_t *QOpcodes0, @@ -1926,13 +1933,13 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if ((NumVecs <= 2) && !isa(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if ((NumVecs <= 2) && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || - !isa(Inc.getNode())) - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2080,11 +2087,12 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs <= 2 && !isa(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if (!isa(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); else if (NumVecs > 2 && !isVSTfixed(Opc)) Ops.push_back(Reg0); @@ -2214,7 +2222,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } SDValue SuperReg; @@ -2318,9 +2328,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, // fixed-stride update instructions don't have an explicit writeback // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - if (NumVecs <= 2 && !isa(Inc.getNode())) + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); - if (!isa(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); // FIXME: VLD3 and VLD4 haven't been updated to that form yet. else if (NumVecs > 2) @@ -2356,39 +2368,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } -void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, - unsigned Opc) { - assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); - SDLoc dl(N); - EVT VT = N->getValueType(0); - unsigned FirstTblReg = IsExt ? 2 : 1; - - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - SDValue V0 = N->getOperand(FirstTblReg + 0); - SDValue V1 = N->getOperand(FirstTblReg + 1); - if (NumVecs == 2) - RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - else { - SDValue V2 = N->getOperand(FirstTblReg + 2); - // If it's a vtbl3, form a quad D-register and leave the last part as - // an undef. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) - : N->getOperand(FirstTblReg + 3); - RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); - } - - SmallVector Ops; - if (IsExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); - Ops.push_back(getAL(CurDAG, dl)); // predicate - Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register - ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); -} - bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) return false; @@ -3730,59 +3709,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { break; } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - - case Intrinsic::arm_neon_vtbl2: - SelectVTBL(N, false, 2, ARM::VTBL2); - return; - case Intrinsic::arm_neon_vtbl3: - SelectVTBL(N, false, 3, ARM::VTBL3Pseudo); - return; - case Intrinsic::arm_neon_vtbl4: - SelectVTBL(N, false, 4, ARM::VTBL4Pseudo); - return; - - case Intrinsic::arm_neon_vtbx2: - SelectVTBL(N, true, 2, ARM::VTBX2); - return; - case Intrinsic::arm_neon_vtbx3: - SelectVTBL(N, true, 3, ARM::VTBX3Pseudo); - return; - case Intrinsic::arm_neon_vtbx4: - SelectVTBL(N, true, 4, ARM::VTBX4Pseudo); - return; - } - break; - } - - case ARMISD::VTBL1: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - SDValue Ops[] = {N->getOperand(0), N->getOperand(1), - getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops)); - return; - } - case ARMISD::VTBL2: { - SDLoc dl(N); - EVT VT = N->getValueType(0); - - // Form a REG_SEQUENCE to force register allocation. - SDValue V0 = N->getOperand(0); - SDValue V1 = N->getOperand(1); - SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0); - - SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate - CurDAG->getRegister(0, MVT::i32)}; // Predicate Register - ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops)); - return; - } - case ISD::ATOMIC_CMP_SWAP: SelectCMP_SWAP(N); return; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e697c8ca5339e873f3e55d7f0fb378f138317c0e..5b2d093e8f0da235df02a58bf294330746027fe3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "ARMISelLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" -#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" @@ -29,13 +29,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/VectorUtils.h" @@ -61,7 +61,6 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" @@ -91,6 +90,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -102,8 +102,8 @@ #include #include #include -#include #include +#include #include #include @@ -126,7 +126,7 @@ static cl::opt EnableConstpoolPromotion( "arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), - cl::init(true)); + cl::init(false)); // FIXME: set to true by default once PR32780 is fixed static cl::opt ConstpoolPromotionMaxSize( "arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), @@ -201,7 +201,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) - for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); } @@ -472,9 +472,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->isTargetWatchOS() || - (Subtarget->isTargetIOS() && - !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { + if (Subtarget->isTargetMachO() && + !(Subtarget->isTargetIOS() && + Subtarget->getTargetTriple().isOSVersionLT(5, 0))) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } @@ -584,7 +584,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); - setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); @@ -602,7 +601,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); - setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); @@ -619,7 +617,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); - setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); @@ -742,7 +739,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOWI, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f64, Expand); setOperationAction(ISD::FLOG2, MVT::f64, Expand); @@ -821,6 +817,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); setOperationAction(ISD::ADDC, MVT::i32, Custom); setOperationAction(ISD::ADDE, MVT::i32, Custom); @@ -852,7 +849,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); - bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide() + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() : Subtarget->hasDivideInARMMode(); if (!hasDivide) { // These are expanded into libcalls if the cpu doesn't have HW divider. @@ -860,7 +857,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i32, LibCall); } - if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) { + if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) { setOperationAction(ISD::SDIV, MVT::i32, Custom); setOperationAction(ISD::UDIV, MVT::i32, Custom); @@ -1343,6 +1340,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SMLALTT: return "ARMISD::SMLALTT"; case ARMISD::SMULWB: return "ARMISD::SMULWB"; case ARMISD::SMULWT: return "ARMISD::SMULWT"; + case ARMISD::SMLALD: return "ARMISD::SMLALD"; + case ARMISD::SMLALDX: return "ARMISD::SMLALDX"; + case ARMISD::SMLSLD: return "ARMISD::SMLSLD"; + case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; @@ -1811,8 +1812,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(NumBytes, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout())); @@ -2633,7 +2633,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return true; } -bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { if (!Subtarget->supportsTailCall()) return false; @@ -3310,6 +3310,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } + case Intrinsic::arm_neon_vabs: + return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1)); case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) @@ -3347,13 +3350,23 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::arm_neon_vtbl1: + return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::arm_neon_vtbl2: + return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } } static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - // FIXME: handle "fence singlethread" more efficiently. SDLoc dl(Op); + ConstantSDNode *ScopeN = cast(Op.getOperand(2)); + auto Scope = static_cast(ScopeN->getZExtValue()); + if (Scope == SynchronizationScope::SingleThread) + return Op; + if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get @@ -7346,7 +7359,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr); + Type *RetTy = StructType::get(ArgTy, ArgTy); auto &DL = DAG.getDataLayout(); ArgListTy Args; @@ -7711,6 +7724,37 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } +static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) { + unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); + unsigned Opc = 0; + if (IntNo == Intrinsic::arm_smlald) + Opc = ARMISD::SMLALD; + else if (IntNo == Intrinsic::arm_smlaldx) + Opc = ARMISD::SMLALDX; + else if (IntNo == Intrinsic::arm_smlsld) + Opc = ARMISD::SMLSLD; + else if (IntNo == Intrinsic::arm_smlsldx) + Opc = ARMISD::SMLSLDX; + else + return; + + SDLoc dl(N); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(1, dl, MVT::i32)); + + SDValue LongMul = DAG.getNode(Opc, dl, + DAG.getVTList(MVT::i32, MVT::i32), + N->getOperand(1), N->getOperand(2), + Lo, Hi); + Results.push_back(LongMul.getValue(0)); + Results.push_back(LongMul.getValue(1)); +} + /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, @@ -7752,6 +7796,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_64Results(N, Results, DAG); return; + case ISD::INTRINSIC_WO_CHAIN: + return ReplaceLongIntrinsic(N, Results, DAG); } if (Res.getNode()) Results.push_back(Res); @@ -9470,8 +9516,11 @@ AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } - // Don't generate vpaddl+vmovn; we'll match it to vpadd later. - if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) + // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure + // we're using the entire input vector, otherwise there's a size/legality + // mismatch somewhere. + if (nextIndex != Vec.getValueType().getVectorNumElements() || + Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) return SDValue(); // Create VPADDL node. @@ -10867,11 +10916,8 @@ static SDValue CombineBaseUpdate(SDNode *N, // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { - uint64_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - } else if (NumBytes >= 3 * 16) { + ConstantSDNode *CInc = dyn_cast(Inc.getNode()); + if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; @@ -11688,34 +11734,6 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero, - APInt &KnownOne) { - if (Op.getOpcode() == ARMISD::BFI) { - // Conservatively, we can recurse down the first operand - // and just mask out all affected bits. - computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - - // The operand to BFI is already a mask suitable for removing the bits it - // sets. - ConstantSDNode *CI = cast(Op.getOperand(2)); - const APInt &Mask = CI->getAPIntValue(); - KnownZero &= Mask; - KnownOne &= Mask; - return; - } - if (Op.getOpcode() == ARMISD::CMOV) { - APInt KZ2(KnownZero.getBitWidth(), 0); - APInt KO2(KnownOne.getBitWidth(), 0); - computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne); - computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2); - - KnownZero &= KZ2; - KnownOne &= KO2; - return; - } - return DAG.computeKnownBits(Op, KnownZero, KnownOne); -} - SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11776,9 +11794,9 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D // Lastly, can we determine that the bits defined by OrCI // are zero in Y? - APInt KnownZero, KnownOne; - computeKnownBits(DAG, Y, KnownZero, KnownOne); - if ((OrCI & KnownZero) != OrCI) + KnownBits Known; + DAG.computeKnownBits(Y, Known); + if ((OrCI & Known.Zero) != OrCI) return SDValue(); // OK, we can do the combine. @@ -11916,16 +11934,16 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { } if (Res.getNode()) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); + KnownBits Known; + DAG.computeKnownBits(SDValue(N,0), Known); // Capture demanded bits information that would be otherwise lost. - if (KnownZero == 0xfffffffe) + if (Known.Zero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i1)); - else if (KnownZero == 0xffffff00) + else if (Known.Zero == 0xffffff00) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i8)); - else if (KnownZero == 0xffff0000) + else if (Known.Zero == 0xffff0000) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, DAG.getValueType(MVT::i16)); } @@ -12124,12 +12142,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, } } - // Lowering to i32/i16 if the size permits. - if (Size >= 4) - return MVT::i32; - else if (Size >= 2) - return MVT::i16; - // Let the target-independent logic figure it out. return MVT::Other; } @@ -12614,13 +12626,12 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = KnownOne.getBitWidth(); - KnownZero = KnownOne = APInt(BitWidth, 0); + unsigned BitWidth = Known.getBitWidth(); + Known.resetAll(); switch (Op.getOpcode()) { default: break; case ARMISD::ADDC: @@ -12630,17 +12641,18 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // These nodes' second result is a boolean if (Op.getResNo() == 0) break; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - if (KnownZero == 0 && KnownOne == 0) return; + DAG.computeKnownBits(Op.getOperand(0), Known, Depth+1); + if (Known.isUnknown()) + return; - APInt KnownZeroRHS, KnownOneRHS; - DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); - KnownZero &= KnownZeroRHS; - KnownOne &= KnownOneRHS; + KnownBits KnownRHS; + DAG.computeKnownBits(Op.getOperand(1), KnownRHS, Depth+1); + Known.Zero &= KnownRHS.Zero; + Known.One &= KnownRHS.One; return; } case ISD::INTRINSIC_W_CHAIN: { @@ -12652,11 +12664,24 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::arm_ldrex: { EVT VT = cast(Op)->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); return; } } } + case ARMISD::BFI: { + // Conservatively, we can recurse down the first operand + // and just mask out all affected bits. + DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1); + + // The operand to BFI is already a mask suitable for removing the bits it + // sets. + ConstantSDNode *CI = cast(Op.getOperand(2)); + const APInt &Mask = CI->getAPIntValue(); + Known.Zero &= Mask; + Known.One &= Mask; + return; + } } } @@ -13052,7 +13077,9 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { // rem = a - b * div // return {div, rem} // This should be lowered into UDIV/SDIV + MLS later on. - if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() && + bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode() + : Subtarget->hasDivideInARMMode(); + if (hasDivide && Op->getValueType(0).isSimple() && Op->getSimpleValueType(0) == MVT::i32) { unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; const SDValue Dividend = Op->getOperand(0); @@ -13076,7 +13103,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); - Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); + Type *RetTy = StructType::get(Ty, Ty); if (Subtarget->isTargetWindows()) InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain); @@ -13378,9 +13405,9 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, } // Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html -Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { +Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: @@ -13389,7 +13416,7 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, case AtomicOrdering::Acquire: return nullptr; // Nothing to do case AtomicOrdering::SequentiallyConsistent: - if (!IsStore) + if (!Inst->hasAtomicStore()) return nullptr; // Nothing to do /*FALLTHROUGH*/ case AtomicOrdering::Release: @@ -13403,9 +13430,9 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, llvm_unreachable("Unknown fence ordering in emitLeadingFence"); } -Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { +Instruction *ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { switch (Ord) { case AtomicOrdering::NotAtomic: case AtomicOrdering::Unordered: @@ -14018,3 +14045,8 @@ void ARMTargetLowering::insertCopiesSplitCSR( .addReg(NewVR); } } + +void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const { + MF.getFrameInfo().computeMaxCallFrameSize(MF); + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 70a0b1380ec98bfdfb637ea2769cbfbaa5965877..26da528c19e6da3f85b3f9c1a2af09b740aee190 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -184,6 +184,10 @@ class InstrItineraryData; SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 SMLALTT, // 64-bit signed accumulate multiply top, top 16 + SMLALD, // Signed multiply accumulate long dual + SMLALDX, // Signed multiply accumulate long dual exchange + SMLSLD, // Signed multiply subtract long dual + SMLSLDX, // Signed multiply subtract long dual exchange // Operands of the standard BUILD_VECTOR node are not legalized, which // is fine if BUILD_VECTORs are always lowered to shuffles or other @@ -350,8 +354,7 @@ class InstrItineraryData; SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override; @@ -480,10 +483,10 @@ class InstrItineraryData; void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override; - Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; - Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; + Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; unsigned getMaxSupportedInterleaveFactor() const override { return 4; } @@ -507,7 +510,7 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; - bool canMergeStoresTo(EVT MemVT) const override { + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override { // Do not merge to larger than i32. return (MemVT.getSizeInBits() <= 32); } @@ -541,6 +544,8 @@ class InstrItineraryData; unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const; + void finalizeLowering(MachineFunction &MF) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -717,7 +722,7 @@ class InstrItineraryData; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 3b3606ef462a962011e25c4850016a2f35e1f732..a0e2ac4cbc6f7c62d76b3a448d42b015f56b2bd8 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -32,8 +32,8 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void ARMInstrInfo::getNoop(MCInst &NopInst) const { if (hasNOP()) { NopInst.setOpcode(ARM::HINT); NopInst.addOperand(MCOperand::createImm(0)); diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 4b1b7097b18d9e0e3b5e39e82ba9adad9235425e..c87fb97448c91358673adf0354a37b133bf4140e 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -25,8 +25,8 @@ class ARMInstrInfo : public ARMBaseInstrInfo { public: explicit ARMInstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cc0e7d4d9c3595fb4c3d5f21b9c792246e3cd8f7..d06b7d0896f1605b67d6f81ce529bff31a291210 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -16,7 +16,8 @@ // // Type profiles. -def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; def SDT_ARMCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def SDT_ARMStructByVal : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, @@ -99,6 +100,11 @@ def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>, SDTCisSameAs<0, 4>, SDTCisSameAs<0, 5>]>; +def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; +def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; +def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; +def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; @@ -259,8 +265,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float conversions">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16","full half-float">; -def HasDivide : Predicate<"Subtarget->hasDivide()">, - AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; +def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, + AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; def HasDSP : Predicate<"Subtarget->hasDSP()">, @@ -317,8 +323,10 @@ def UseNegativeImmediates : "NegativeImmediates">; // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; -def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; +let RecomputePerFunction = 1 in { + def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; + def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; +} def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -345,8 +353,10 @@ def UseVMOVSR : Predicate<"Subtarget->preferVMOVSR() ||" def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&" "Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; -def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +let RecomputePerFunction = 1 in { + def IsLE : Predicate<"MF->getDataLayout().isLittleEndian()">; + def IsBE : Predicate<"MF->getDataLayout().isBigEndian()">; +} def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">; @@ -866,7 +876,9 @@ def imm1_16_XFORM: SDNodeXForm; def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; } -def imm1_16 : Operand, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }], +def imm1_16 : Operand, ImmLeaf 0 && Imm <= 16; + }], imm1_16_XFORM> { let PrintMethod = "printImmPlusOneOperand"; let ParserMatchClass = Imm1_16AsmOperand; @@ -1957,8 +1969,8 @@ PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : -PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, - [(ARMcallseq_start timm:$amt)]>; +PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2, pred:$p), NoItinerary, + [(ARMcallseq_start timm:$amt, timm:$amt2)]>; } def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, @@ -1979,7 +1991,9 @@ def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", - "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, + Requires<[IsARM, HasV6]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3468,8 +3482,12 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">; +def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src), + (SXTB16 GPR:$Src, 0)>; def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">; +def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS), + (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>; // Zero extenders @@ -3489,6 +3507,8 @@ def UXTB16 : AI_ext_rrot<0b01101100, // (UXTB16r_rot GPR:$Src, 3)>; def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (UXTB16 GPR:$Src, 1)>; +def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src), + (UXTB16 GPR:$Src, 0)>; def UXTAB : AI_exta_rrot<0b01101110, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -3503,6 +3523,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), // This isn't safe in general, the add is two 16-bit units, not a 32-bit add. def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">; +def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS), + (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>; def SBFX : I<(outs GPRnopc:$Rd), @@ -3629,71 +3651,85 @@ class AAI op27_20, bits<8> op11_4, string opc, let Unpredictable{11-8} = 0b1111; } -// Saturating add/subtract +// Wrappers around the AAI class +class AAIRevOpr op27_20, bits<8> op11_4, string opc, + list pattern = []> + : AAI; + +class AAIIntrinsic op27_20, bits<8> op11_4, string opc, + Intrinsic intrinsic> + : AAI; +// Saturating add/subtract +let hasSideEffects = 1 in { +def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>; +def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>; +def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>; +def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>; + +def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd", + [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm, + GPRnopc:$Rm), + GPRnopc:$Rn))]>; +def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, + (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>; +def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub", + [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>; let DecoderMethod = "DecodeQADDInstruction" in -def QADD : AAI<0b00010000, 0b00000101, "qadd", - [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; - -def QSUB : AAI<0b00010010, 0b00000101, "qsub", - [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], - (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; -def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; -def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [], - (ins GPRnopc:$Rm, GPRnopc:$Rn), - "\t$Rd, $Rm, $Rn">; - -def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">; -def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">; -def QASX : AAI<0b01100010, 0b11110011, "qasx">; -def QSAX : AAI<0b01100010, 0b11110101, "qsax">; -def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">; -def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">; -def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">; -def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">; -def UQASX : AAI<0b01100110, 0b11110011, "uqasx">; -def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">; -def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">; -def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">; + def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd", + [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>; +} + +def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>; +def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>; +def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>; +def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>; +def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>; +def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>; +def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>; +def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>; // Signed/Unsigned add/subtract -def SASX : AAI<0b01100001, 0b11110011, "sasx">; -def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">; -def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">; -def SSAX : AAI<0b01100001, 0b11110101, "ssax">; -def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">; -def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">; -def UASX : AAI<0b01100101, 0b11110011, "uasx">; -def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">; -def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">; -def USAX : AAI<0b01100101, 0b11110101, "usax">; -def USUB16 : AAI<0b01100101, 0b11110111, "usub16">; -def USUB8 : AAI<0b01100101, 0b11111111, "usub8">; +def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>; +def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>; +def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>; +def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>; +def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>; +def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>; +def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>; +def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>; +def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>; +def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>; +def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>; +def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>; // Signed/Unsigned halving add/subtract -def SHASX : AAI<0b01100011, 0b11110011, "shasx">; -def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">; -def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">; -def SHSAX : AAI<0b01100011, 0b11110101, "shsax">; -def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">; -def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">; -def UHASX : AAI<0b01100111, 0b11110011, "uhasx">; -def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">; -def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">; -def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">; -def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">; -def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; +def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>; +def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>; +def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>; +def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>; +def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>; +def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>; +def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>; +def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>; +def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>; +def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>; +def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>; +def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>; // Unsigned Sum of Absolute Differences [and Accumulate]. def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", - "\t$Rd, $Rn, $Rm", []>, + "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -3707,7 +3743,8 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), } def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", - "\t$Rd, $Rn, $Rm, $Ra", []>, + "\t$Rd, $Rn, $Rm, $Ra", + [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>, Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ bits<4> Rd; bits<4> Rn; @@ -3722,7 +3759,6 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), } // Signed/Unsigned saturate - def SSAT : AI<(outs GPRnopc:$Rd), (ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh), SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>, @@ -3791,6 +3827,10 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), (USAT imm0_31:$pos, GPRnopc:$a, 0)>; def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), + (SSAT16 imm1_16:$pos, GPRnopc:$a)>; +def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), + (USAT16 imm0_15:$pos, GPRnopc:$a)>; //===----------------------------------------------------------------------===// // Bitwise Instructions. @@ -4216,8 +4256,8 @@ multiclass AI_smla { IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; } } @@ -4251,7 +4291,8 @@ def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), // Helper class for AI_smld. class AMulDualIbase - : AI, Requires<[IsARM, HasV6]> { + : AI, + Requires<[IsARM, HasV6]> { bits<4> Rn; bits<4> Rm; let Inst{27-23} = 0b01110; @@ -4301,20 +4342,40 @@ multiclass AI_smld { Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), - (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, + (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + NoItinerary, !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; - } defm SMLA : AI_smld<0, "smla">; defm SMLS : AI_smld<1, "smls">; +def : ARMV6Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), + (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>; +def : ARMV6Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; +def : ARMV6Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi), + (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>; + multiclass AI_sdml { def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), @@ -4328,6 +4389,15 @@ multiclass AI_sdml { defm SMUA : AI_sdml<0, "smua">; defm SMUS : AI_sdml<1, "smus">; +def : ARMV6Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm), + (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>; +def : ARMV6Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm), + (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>; + //===----------------------------------------------------------------------===// // Division Instructions (ARMv7-A with virtualization extension) // @@ -5644,6 +5714,32 @@ def : ARMV5MOPat<(add GPR:$acc, (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, Sched<[WriteMUL32, ReadMUL, ReadMUL]>; +def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b), + (SMULTT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b), + (SMULWT GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; + // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Requires<[IsARM, HasV6]>; @@ -5975,3 +6071,10 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } + +def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, + [(atomic_fence imm:$ordering, 0)]> { + let hasSideEffects = 1; + let Size = 0; + let AsmString = "@ COMPILER BARRIER"; +} diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 681e235d78f08026631425e741afd3d1a8fb646e..858136a82078442101eb57655999eb55e151d0b6 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -587,6 +587,14 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; +def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>]>; +def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, + SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; +def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; +def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; + + def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; @@ -666,7 +674,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { class VLD1D op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins AddrMode:$Rn), IIC_VLD1, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -674,7 +682,7 @@ class VLD1D op7_4, string Dt, Operand AddrMode> class VLD1Q op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins AddrMode:$Rn), IIC_VLD1x2, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -695,7 +703,7 @@ multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -703,7 +711,7 @@ multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -712,7 +720,7 @@ multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -720,7 +728,7 @@ multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -739,7 +747,7 @@ defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VLD1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -748,7 +756,7 @@ multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -756,7 +764,7 @@ multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -772,15 +780,15 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VLD1d64TPseudo : VLDQQPseudo; -def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo; +def VLD1d64TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD3]>; // ...with 4 registers class VLD1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -789,7 +797,7 @@ multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -797,7 +805,7 @@ multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -813,9 +821,9 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VLD1d64QPseudo : VLDQQPseudo; -def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo; +def VLD1d64QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -829,22 +837,22 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8Pseudo : VLDQQPseudo; -def VLD2q16Pseudo : VLDQQPseudo; -def VLD2q32Pseudo : VLDQQPseudo; +def VLD2q8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; // ...with address register writeback: multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, @@ -867,45 +875,45 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, } defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo; -def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo; -def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; // ...with double-spaced registers def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6:$Rn), IIC_VLD3, - "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -915,9 +923,9 @@ def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo : VLDQQPseudo; -def VLD3d16Pseudo : VLDQQPseudo; -def VLD3d32Pseudo : VLDQQPseudo; +def VLD3d8Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; // ...with address register writeback: class VLD3DWB op11_8, bits<4> op7_4, string Dt> @@ -925,7 +933,7 @@ class VLD3DWB op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -934,9 +942,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo_UPD : VLDQQWBPseudo; -def VLD3d16Pseudo_UPD : VLDQQWBPseudo; -def VLD3d32Pseudo_UPD : VLDQQWBPseudo; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; // ...with double-spaced registers: def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; @@ -946,25 +954,26 @@ def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; -def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; // ...alternate versions to be allocated odd register numbers: -def VLD3q8oddPseudo : VLDQQQQPseudo; -def VLD3q16oddPseudo : VLDQQQQPseudo; -def VLD3q32oddPseudo : VLDQQQQPseudo; +def VLD3q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$Rn), IIC_VLD4, - "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, + Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -974,9 +983,9 @@ def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo : VLDQQPseudo; -def VLD4d16Pseudo : VLDQQPseudo; -def VLD4d32Pseudo : VLDQQPseudo; +def VLD4d8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; // ...with address register writeback: class VLD4DWB op11_8, bits<4> op7_4, string Dt> @@ -984,7 +993,7 @@ class VLD4DWB op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -993,9 +1002,9 @@ def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo_UPD : VLDQQWBPseudo; -def VLD4d16Pseudo_UPD : VLDQQWBPseudo; -def VLD4d32Pseudo_UPD : VLDQQWBPseudo; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; // ...with double-spaced registers: def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; @@ -1005,18 +1014,18 @@ def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; -def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; // ...alternate versions to be allocated odd register numbers: -def VLD4q8oddPseudo : VLDQQQQPseudo; -def VLD4q16oddPseudo : VLDQQQQPseudo; -def VLD4q32oddPseudo : VLDQQQQPseudo; +def VLD4q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1068,11 +1077,12 @@ class VLD1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, "$src = $Vd", [(set DPR:$Vd, (vector_insert (Ty DPR:$src), (i32 (LoadOp addrmode6oneL32:$Rn)), - imm:$lane))]> { + imm:$lane))]>, Sched<[WriteVLD1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD1LN"; } -class VLD1QLNPseudo : VLDQLNPseudo { +class VLD1QLNPseudo : VLDQLNPseudo, + Sched<[WriteVLD1]> { let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), (i32 (LoadOp addrmode6:$addr)), imm:$lane))]; @@ -1109,7 +1119,7 @@ class VLD1LNWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$src = $Vd, $Rn.addr = $wb", []> { + "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let DecoderMethod = "DecodeVLD1LN"; } @@ -1126,16 +1136,16 @@ def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { let Inst{4} = Rn{4}; } -def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo; -def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo; -def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo; +def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2", []> { + "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2LN"; @@ -1151,9 +1161,9 @@ def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo : VLDQLNPseudo; -def VLD2LNd16Pseudo : VLDQLNPseudo; -def VLD2LNd32Pseudo : VLDQLNPseudo; +def VLD2LNd8Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; // ...with double-spaced registers: def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { @@ -1163,8 +1173,8 @@ def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo : VLDQQLNPseudo; -def VLD2LNq32Pseudo : VLDQQLNPseudo; +def VLD2LNq16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; // ...with address register writeback: class VLD2LNWB op11_8, bits<4> op7_4, string Dt> @@ -1187,9 +1197,9 @@ def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo; -def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo; -def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo; +def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1198,8 +1208,8 @@ def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN op11_8, bits<4> op7_4, string Dt> @@ -1207,7 +1217,7 @@ class VLD3LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD3LN"; } @@ -1222,9 +1232,9 @@ def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo : VLDQQLNPseudo; -def VLD3LNd16Pseudo : VLDQQLNPseudo; -def VLD3LNd32Pseudo : VLDQQLNPseudo; +def VLD3LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { @@ -1234,8 +1244,8 @@ def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo : VLDQQQQLNPseudo; -def VLD3LNq32Pseudo : VLDQQQQLNPseudo; +def VLD3LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD3LNWB op11_8, bits<4> op7_4, string Dt> @@ -1246,7 +1256,7 @@ class VLD3LNWB op11_8, bits<4> op7_4, string Dt> IIC_VLD3lnu, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", - []> { + []>, Sched<[WriteVLD2]> { let DecoderMethod = "DecodeVLD3LN"; } @@ -1260,9 +1270,9 @@ def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo; -def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1271,8 +1281,8 @@ def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; -def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN op11_8, bits<4> op7_4, string Dt> @@ -1281,7 +1291,8 @@ class VLD4LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; @@ -1298,9 +1309,9 @@ def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo : VLDQQLNPseudo; -def VLD4LNd16Pseudo : VLDQQLNPseudo; -def VLD4LNd32Pseudo : VLDQQLNPseudo; +def VLD4LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { @@ -1311,8 +1322,8 @@ def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo : VLDQQQQLNPseudo; -def VLD4LNq32Pseudo : VLDQQQQLNPseudo; +def VLD4LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD4LNWB op11_8, bits<4> op7_4, string Dt> @@ -1339,9 +1350,9 @@ def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1351,8 +1362,8 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; -def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1363,7 +1374,8 @@ class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1426,7 +1438,7 @@ multiclass VLD1QDUPWB op7_4, string Dt, Operand AddrMode> { (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1483,7 +1495,7 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; @@ -1492,7 +1504,7 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } @@ -1516,7 +1528,8 @@ defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, class VLD3DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6dup:$Rn), IIC_VLD3dup, - "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; @@ -1526,9 +1539,9 @@ def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; -def VLD3DUPd8Pseudo : VLDQQPseudo; -def VLD3DUPd16Pseudo : VLDQQPseudo; -def VLD3DUPd32Pseudo : VLDQQPseudo; +def VLD3DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; @@ -1540,7 +1553,7 @@ class VLD3DUPWB op7_4, string Dt, Operand AddrMode> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } @@ -1553,9 +1566,9 @@ def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; -def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; -def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; -def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo; +def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; // VLD4DUP : Vector Load (single 4-element structure to all lanes) class VLD4DUP op7_4, string Dt> @@ -1572,9 +1585,9 @@ def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo : VLDQQPseudo; -def VLD4DUPd16Pseudo : VLDQQPseudo; -def VLD4DUPd32Pseudo : VLDQQPseudo; +def VLD4DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; @@ -1587,7 +1600,7 @@ class VLD4DUPWB op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4DupInstruction"; } @@ -1600,9 +1613,9 @@ def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; -def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; -def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo; +def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1649,14 +1662,14 @@ class VSTQQQQWBPseudo // VST1 : Vector Store (multiple single elements) class VST1D op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), - IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), - IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1677,7 +1690,7 @@ multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1686,7 +1699,7 @@ multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1695,7 +1708,7 @@ multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1704,7 +1717,7 @@ multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1724,7 +1737,7 @@ defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VST1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins AddrMode:$Rn, VecListThreeD:$Vd), - IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1733,7 +1746,7 @@ multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1742,7 +1755,7 @@ multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1758,16 +1771,16 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VST1d64TPseudo : VSTQQPseudo; -def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo; -def VST1d64TPseudoWB_register : VSTQQWBPseudo; +def VST1d64TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST3]>; // ...with 4 registers class VST1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", - []> { + []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1776,7 +1789,7 @@ multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1785,7 +1798,7 @@ multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1801,9 +1814,9 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VST1d64QPseudo : VSTQQPseudo; -def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo; -def VST1d64QPseudoWB_register : VSTQQWBPseudo; +def VST1d64QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST4]>; // VST2 : Vector Store (multiple 2-element structures) class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -1816,22 +1829,22 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; -def VST2q8Pseudo : VSTQQPseudo; -def VST2q16Pseudo : VSTQQPseudo; -def VST2q32Pseudo : VSTQQPseudo; +def VST2q8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; // ...with address register writeback: multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, @@ -1839,7 +1852,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1847,7 +1860,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1856,7 +1869,7 @@ multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1865,7 +1878,7 @@ multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1882,12 +1895,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; -def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q8PseudoWB_register : VSTQQWBregisterPseudo; -def VST2q16PseudoWB_register : VSTQQWBregisterPseudo; -def VST2q32PseudoWB_register : VSTQQWBregisterPseudo; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; // ...with double-spaced registers def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, @@ -1907,7 +1920,7 @@ defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, class VST3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, - "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -1917,9 +1930,9 @@ def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo : VSTQQPseudo; -def VST3d16Pseudo : VSTQQPseudo; -def VST3d32Pseudo : VSTQQPseudo; +def VST3d8Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; // ...with address register writeback: class VST3DWB op11_8, bits<4> op7_4, string Dt> @@ -1927,7 +1940,7 @@ class VST3DWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -1936,9 +1949,9 @@ def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo_UPD : VSTQQWBPseudo; -def VST3d16Pseudo_UPD : VSTQQWBPseudo; -def VST3d32Pseudo_UPD : VSTQQWBPseudo; +def VST3d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; // ...with double-spaced registers: def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; @@ -1948,25 +1961,25 @@ def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; -def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; // ...alternate versions to be allocated odd register numbers: -def VST3q8oddPseudo : VSTQQQQPseudo; -def VST3q16oddPseudo : VSTQQQQPseudo; -def VST3q32oddPseudo : VSTQQQQPseudo; +def VST3q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; // VST4 : Vector Store (multiple 4-element structures) class VST4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -1976,9 +1989,9 @@ def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo : VSTQQPseudo; -def VST4d16Pseudo : VSTQQPseudo; -def VST4d32Pseudo : VSTQQPseudo; +def VST4d8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; // ...with address register writeback: class VST4DWB op11_8, bits<4> op7_4, string Dt> @@ -1986,7 +1999,7 @@ class VST4DWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -1995,9 +2008,9 @@ def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo_UPD : VSTQQWBPseudo; -def VST4d16Pseudo_UPD : VSTQQWBPseudo; -def VST4d32Pseudo_UPD : VSTQQWBPseudo; +def VST4d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; // ...with double-spaced registers: def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; @@ -2007,18 +2020,18 @@ def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; -def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; // ...alternate versions to be allocated odd register numbers: -def VST4q8oddPseudo : VSTQQQQPseudo; -def VST4q16oddPseudo : VSTQQQQPseudo; -def VST4q32oddPseudo : VSTQQQQPseudo; +def VST4q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 @@ -2052,12 +2065,13 @@ class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, + Sched<[WriteVST1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } class VST1QLNPseudo - : VSTQLNPseudo { + : VSTQLNPseudo, Sched<[WriteVST1]> { let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr)]; } @@ -2096,11 +2110,12 @@ class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - AdrMode:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]>, + Sched<[WriteVST1]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo - : VSTQLNWBPseudo { + : VSTQLNWBPseudo, Sched<[WriteVST1]> { let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr, am6offset:$offset))]; } @@ -2131,7 +2146,7 @@ class VST2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; @@ -2147,9 +2162,9 @@ def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo : VSTQLNPseudo; -def VST2LNd16Pseudo : VSTQLNPseudo; -def VST2LNd32Pseudo : VSTQLNPseudo; +def VST2LNd8Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; // ...with double-spaced registers: def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { @@ -2161,8 +2176,8 @@ def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { let Inst{4} = Rn{4}; } -def VST2LNq16Pseudo : VSTQQLNPseudo; -def VST2LNq32Pseudo : VSTQQLNPseudo; +def VST2LNq16Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; // ...with address register writeback: class VST2LNWB op11_8, bits<4> op7_4, string Dt> @@ -2185,9 +2200,9 @@ def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo; +def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2196,15 +2211,16 @@ def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo; -def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo; +def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, + Sched<[WriteVST2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST3LN"; } @@ -2219,9 +2235,9 @@ def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo : VSTQQLNPseudo; -def VST3LNd16Pseudo : VSTQQLNPseudo; -def VST3LNd32Pseudo : VSTQQLNPseudo; +def VST3LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { @@ -2255,9 +2271,9 @@ def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2266,8 +2282,8 @@ def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN op11_8, bits<4> op7_4, string Dt> @@ -2275,7 +2291,7 @@ class VST4LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST4LN"; @@ -2292,9 +2308,9 @@ def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo : VSTQQLNPseudo; -def VST4LNd16Pseudo : VSTQQLNPseudo; -def VST4LNd32Pseudo : VSTQQLNPseudo; +def VST4LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { @@ -2305,8 +2321,8 @@ def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo : VSTQQQQLNPseudo; -def VST4LNq32Pseudo : VSTQQQQLNPseudo; +def VST4LNq16Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; // ...with address register writeback: class VST4LNWB op11_8, bits<4> op7_4, string Dt> @@ -2331,9 +2347,9 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2343,8 +2359,8 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 @@ -5550,8 +5566,7 @@ defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; // VABS : Vector Absolute Value defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, - IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", - int_arm_neon_vabs>; + IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs", "f32", v2f32, v2f32, fabs>; @@ -5567,29 +5582,6 @@ def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, v8f16, v8f16, fabs>, Requires<[HasNEON, HasFullFP16]>; -def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), - (v2i32 (bitconvert (v8i8 (add DPR:$src, - (NEONvshrs DPR:$src, (i32 7))))))), - (VABSv8i8 DPR:$src)>; -def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), - (v2i32 (bitconvert (v4i16 (add DPR:$src, - (NEONvshrs DPR:$src, (i32 15))))))), - (VABSv4i16 DPR:$src)>; -def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), - (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), - (VABSv2i32 DPR:$src)>; -def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), - (v4i32 (bitconvert (v16i8 (add QPR:$src, - (NEONvshrs QPR:$src, (i32 7))))))), - (VABSv16i8 QPR:$src)>; -def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), - (v4i32 (bitconvert (v8i16 (add QPR:$src, - (NEONvshrs QPR:$src, (i32 15))))))), - (VABSv8i16 QPR:$src)>; -def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), - (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), - (VABSv4i32 QPR:$src)>; - // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", @@ -6443,7 +6435,8 @@ def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, "vtbl", "8", "$Vd, $Vn, $Vm", "", - [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; + let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), @@ -6498,6 +6491,49 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), + (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vm)), + (v8i8 (VTBX2 v8i8:$orig, + (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vm)), + (v8i8 (VTBX3Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + (v8i8 (IMPLICIT_DEF)), dsub_3), + v8i8:$Vm))>; + +def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; +def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, + v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), + (v8i8 (VTBX4Pseudo v8i8:$orig, + (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, + v8i8:$Vn1, dsub_1, + v8i8:$Vn2, dsub_2, + v8i8:$Vn3, dsub_3), + v8i8:$Vm))>; + // VRINT : Vector Rounding multiclass VRINT_FPI op9_7, SDPatternOperator Int> { let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index f2f426e867014f040840b9f4331459a28a7dc9e8..423f97ccacd64f978508c64875e78c957ca8dd9a 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -284,8 +284,8 @@ def tADJCALLSTACKUP : Requires<[IsThumb, IsThumb1Only]>; def tADJCALLSTACKDOWN : - PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, - [(ARMcallseq_start imm:$amt)]>, + PseudoInst<(outs), (ins i32imm:$amt, i32imm:$amt2), NoItinerary, + [(ARMcallseq_start imm:$amt, imm:$amt2)]>, Requires<[IsThumb, IsThumb1Only]>; } @@ -953,7 +953,7 @@ let isAdd = 1 in { /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { - let isCommutable = 1 in + let isCommutable = 1, Uses = [CPSR] in def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm, @@ -1292,6 +1292,7 @@ def tSUBrr : // A8.6.212 /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { + let Uses = [CPSR] in def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm, @@ -1412,7 +1413,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them // and make use of the same compressed jump table format as Thumb-2. -let Size = 2 in { +let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, + isIndirectBranch = 1 in { def tTBB_JT : tPseudoInst<(outs), (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index f5b673b78ad711aba64fd668d6adef10bda62a17..45471a4e95b395a50b5c7170bf82df24546fc0e8 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1993,6 +1993,10 @@ def : Thumb2DSPPat<(add rGPR:$Rn, def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)), (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn), + (t2SXTB16 rGPR:$Rn, 0)>; +def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm), + (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>; // A simple right-shift can also be used in most cases (the exception is the @@ -2026,6 +2030,9 @@ def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x0000FFFF), def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF), (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm), + (t2UXTB16 rGPR:$Rm, 0)>; + // FIXME: This pattern incorrectly assumes the shl operator is a rotate. // The transformation should probably be done as a combiner action // instead so we can include a check for masking back in the upper @@ -2053,6 +2060,8 @@ def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>; +def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm), + (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>; } @@ -2137,10 +2146,9 @@ def : T2Pat<(ARMadde rGPR:$src, t2_so_imm_not:$imm, CPSR), def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR), (t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>; -// Select Bytes -- for disassembly only - def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "sel", "\t$Rd, $Rn, $Rm", + [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; let Inst{26-24} = 0b010; @@ -2154,9 +2162,7 @@ def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), // A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned) // And Miscellaneous operations -- for disassembly only class T2I_pam op22_20, bits<4> op7_4, string opc, - list pat = [/* For disassembly only; pattern left blank */], - dag iops = (ins rGPR:$Rn, rGPR:$Rm), - string asm = "\t$Rd, $Rn, $Rm"> + list pat, dag iops, string asm> : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>, Requires<[IsThumb2, HasDSP]> { let Inst{31-27} = 0b11111; @@ -2174,60 +2180,72 @@ class T2I_pam op22_20, bits<4> op7_4, string opc, let Inst{3-0} = Rm; } -// Saturating add/subtract -- for disassembly only - -def t2QADD : T2I_pam<0b000, 0b1000, "qadd", - [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">; -def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">; -def t2QASX : T2I_pam<0b010, 0b0001, "qasx">; -def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">; -def t2QSUB : T2I_pam<0b000, 0b1010, "qsub", - [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))], - (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">; -def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">; -def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">; -def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">; -def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">; -def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">; -def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">; -def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">; -def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">; - -// Signed/Unsigned add/subtract -- for disassembly only - -def t2SASX : T2I_pam<0b010, 0b0000, "sasx">; -def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">; -def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">; -def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">; -def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">; -def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">; -def t2UASX : T2I_pam<0b010, 0b0100, "uasx">; -def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">; -def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">; -def t2USAX : T2I_pam<0b110, 0b0100, "usax">; -def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">; -def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">; - -// Signed/Unsigned halving add/subtract -- for disassembly only - -def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">; -def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">; -def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">; -def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">; -def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">; -def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">; -def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">; -def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">; -def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">; -def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">; -def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">; -def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">; +class T2I_pam_intrinsics op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> + : T2I_pam; + +class T2I_pam_intrinsics_rev op22_20, bits<4> op7_4, string opc> + : T2I_pam; + +// Saturating add/subtract +def t2QADD16 : T2I_pam_intrinsics<0b001, 0b0001, "qadd16", int_arm_qadd16>; +def t2QADD8 : T2I_pam_intrinsics<0b000, 0b0001, "qadd8", int_arm_qadd8>; +def t2QASX : T2I_pam_intrinsics<0b010, 0b0001, "qasx", int_arm_qasx>; +def t2UQSUB8 : T2I_pam_intrinsics<0b100, 0b0101, "uqsub8", int_arm_uqsub8>; +def t2QSAX : T2I_pam_intrinsics<0b110, 0b0001, "qsax", int_arm_qsax>; +def t2QSUB16 : T2I_pam_intrinsics<0b101, 0b0001, "qsub16", int_arm_qsub16>; +def t2QSUB8 : T2I_pam_intrinsics<0b100, 0b0001, "qsub8", int_arm_qsub8>; +def t2UQADD16 : T2I_pam_intrinsics<0b001, 0b0101, "uqadd16", int_arm_uqadd16>; +def t2UQADD8 : T2I_pam_intrinsics<0b000, 0b0101, "uqadd8", int_arm_uqadd8>; +def t2UQASX : T2I_pam_intrinsics<0b010, 0b0101, "uqasx", int_arm_uqasx>; +def t2UQSAX : T2I_pam_intrinsics<0b110, 0b0101, "uqsax", int_arm_uqsax>; +def t2UQSUB16 : T2I_pam_intrinsics<0b101, 0b0101, "uqsub16", int_arm_uqsub16>; +def t2QADD : T2I_pam_intrinsics_rev<0b000, 0b1000, "qadd">; +def t2QSUB : T2I_pam_intrinsics_rev<0b000, 0b1010, "qsub">; +def t2QDADD : T2I_pam_intrinsics_rev<0b000, 0b1001, "qdadd">; +def t2QDSUB : T2I_pam_intrinsics_rev<0b000, 0b1011, "qdsub">; + +def : Thumb2DSPPat<(int_arm_qadd rGPR:$Rm, rGPR:$Rn), + (t2QADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, rGPR:$Rn), + (t2QSUB rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn), + (t2QDADD rGPR:$Rm, rGPR:$Rn)>; +def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)), + (t2QDSUB rGPR:$Rm, rGPR:$Rn)>; + +// Signed/Unsigned add/subtract + +def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>; +def t2SADD16 : T2I_pam_intrinsics<0b001, 0b0000, "sadd16", int_arm_sadd16>; +def t2SADD8 : T2I_pam_intrinsics<0b000, 0b0000, "sadd8", int_arm_sadd8>; +def t2SSAX : T2I_pam_intrinsics<0b110, 0b0000, "ssax", int_arm_ssax>; +def t2SSUB16 : T2I_pam_intrinsics<0b101, 0b0000, "ssub16", int_arm_ssub16>; +def t2SSUB8 : T2I_pam_intrinsics<0b100, 0b0000, "ssub8", int_arm_ssub8>; +def t2UASX : T2I_pam_intrinsics<0b010, 0b0100, "uasx", int_arm_uasx>; +def t2UADD16 : T2I_pam_intrinsics<0b001, 0b0100, "uadd16", int_arm_uadd16>; +def t2UADD8 : T2I_pam_intrinsics<0b000, 0b0100, "uadd8", int_arm_uadd8>; +def t2USAX : T2I_pam_intrinsics<0b110, 0b0100, "usax", int_arm_usax>; +def t2USUB16 : T2I_pam_intrinsics<0b101, 0b0100, "usub16", int_arm_usub16>; +def t2USUB8 : T2I_pam_intrinsics<0b100, 0b0100, "usub8", int_arm_usub8>; + +// Signed/Unsigned halving add/subtract + +def t2SHASX : T2I_pam_intrinsics<0b010, 0b0010, "shasx", int_arm_shasx>; +def t2SHADD16 : T2I_pam_intrinsics<0b001, 0b0010, "shadd16", int_arm_shadd16>; +def t2SHADD8 : T2I_pam_intrinsics<0b000, 0b0010, "shadd8", int_arm_shadd8>; +def t2SHSAX : T2I_pam_intrinsics<0b110, 0b0010, "shsax", int_arm_shsax>; +def t2SHSUB16 : T2I_pam_intrinsics<0b101, 0b0010, "shsub16", int_arm_shsub16>; +def t2SHSUB8 : T2I_pam_intrinsics<0b100, 0b0010, "shsub8", int_arm_shsub8>; +def t2UHASX : T2I_pam_intrinsics<0b010, 0b0110, "uhasx", int_arm_uhasx>; +def t2UHADD16 : T2I_pam_intrinsics<0b001, 0b0110, "uhadd16", int_arm_uhadd16>; +def t2UHADD8 : T2I_pam_intrinsics<0b000, 0b0110, "uhadd8", int_arm_uhadd8>; +def t2UHSAX : T2I_pam_intrinsics<0b110, 0b0110, "uhsax", int_arm_uhsax>; +def t2UHSUB16 : T2I_pam_intrinsics<0b101, 0b0110, "uhsub16", int_arm_uhsub16>; +def t2UHSUB8 : T2I_pam_intrinsics<0b100, 0b0110, "uhsub8", int_arm_uhsub8>; // Helper class for disassembly only // A6.3.16 & A6.3.17 @@ -2255,16 +2273,19 @@ class T2FourReg_mac op22_20, bits<4> op7_4, dag oops, // Unsigned Sum of Absolute Differences [and Accumulate]. def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, + NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (int_arm_usad8 rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2, HasDSP]> { let Inst{15-12} = 0b1111; } def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary, - "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, + "usada8", "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (int_arm_usada8 rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>, Requires<[IsThumb2, HasDSP]>; // Signed/Unsigned saturate. +let hasSideEffects = 1 in class T2SatI : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> { bits<4> Rd; @@ -2313,10 +2334,16 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), let Inst{4} = 0; } -def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; -def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>; def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), + (t2SSAT imm1_32:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), + (t2USAT imm0_31:$pos, GPR:$a, 0)>; +def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos), + (t2SSAT16 imm1_16:$pos, GPR:$a)>; +def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos), + (t2USAT16 imm0_15:$pos, GPR:$a)>; //===----------------------------------------------------------------------===// // Shift and rotate Instructions. @@ -2689,6 +2716,18 @@ def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))), (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm), (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm), + (t2SMULBB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm), + (t2SMULBT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultb rGPR:$Rn, rGPR:$Rm), + (t2SMULTB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smultt rGPR:$Rn, rGPR:$Rm), + (t2SMULTT rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwb rGPR:$Rn, rGPR:$Rm), + (t2SMULWB rGPR:$Rn, rGPR:$Rm)>; +def : Thumb2DSPPat<(int_arm_smulwt rGPR:$Rn, rGPR:$Rm), + (t2SMULWT rGPR:$Rn, rGPR:$Rm)>; class T2FourRegSMLA op22_20, bits<2> op5_4, string opc, list pattern> @@ -2730,6 +2769,19 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)), (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>; +def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLATT GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc), + (t2SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : Thumb2DSPPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc), + (t2SMLAWT GPR:$a, GPR:$b, GPR:$acc)>; + // Halfword multiple accumulate long: SMLAL def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">, Requires<[IsThumb2, HasDSP]>; @@ -2749,39 +2801,44 @@ def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), (t2SMLALTT $Rn, $Rm, $RLo, $RHi)>; -class T2DualHalfMul op22_20, bits<4> op7_4, string opc> +class T2DualHalfMul op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> : T2ThreeReg_mac<0, op22_20, op7_4, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>, + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))]>, Requires<[IsThumb2, HasDSP]>, Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { let Inst{15-12} = 0b1111; } // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD -def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">; -def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">; -def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">; -def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">; +def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad", int_arm_smuad>; +def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx", int_arm_smuadx>; +def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd", int_arm_smusd>; +def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx", int_arm_smusdx>; -class T2DualHalfMulAdd op22_20, bits<4> op7_4, string opc> +class T2DualHalfMulAdd op22_20, bits<4> op7_4, string opc, + Intrinsic intrinsic> : T2FourReg_mac<0, op22_20, op7_4, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), - IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>, + IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", + [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>, Requires<[IsThumb2, HasDSP]>; -def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad">; -def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx">; -def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">; -def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">; +def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad", int_arm_smlad>; +def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx", int_arm_smladx>; +def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd", int_arm_smlsd>; +def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx", int_arm_smlsdx>; class T2DualHalfMulAddLong op22_20, bits<4> op7_4, string opc> : T2FourReg_mac<1, op22_20, op7_4, (outs rGPR:$Ra, rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm), + (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>, + RegConstraint<"$Ra = $RLo, $Rd = $RHi">, Requires<[IsThumb2, HasDSP]>, Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; @@ -2790,6 +2847,15 @@ def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">; def t2SMLSLD : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">; def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; +def : Thumb2DSPPat<(ARMSmlald rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLALD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlaldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLALDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlsld rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLSLD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; +def : Thumb2DSPPat<(ARMSmlsldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), + (t2SMLSLDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>; + //===----------------------------------------------------------------------===// // Division Instructions. // Signed and unsigned division on v7-M @@ -2797,7 +2863,7 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">; def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011100; @@ -2809,7 +2875,7 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>, - Requires<[HasDivide, IsThumb, HasV8MBaseline]>, + Requires<[HasDivideInThumb, IsThumb, HasV8MBaseline]>, Sched<[WriteDIV]> { let Inst{31-27} = 0b11111; let Inst{26-21} = 0b011101; @@ -3428,7 +3494,8 @@ def t2B : T2I<(outs), (ins thumb_br_target:$target), IIC_Br, let AsmMatchConverter = "cvtThumbBranches"; } -let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in { +let Size = 4, isNotDuplicable = 1, isBranch = 1, isTerminator = 1, + isBarrier = 1, isIndirectBranch = 1 in { // available in both v8-M.Baseline and Thumb2 targets def t2BR_JT : t2basePseudoInst<(outs), @@ -4640,6 +4707,19 @@ def : t2InstSubst<"and${s}${p} $Rd, $Rn, $imm", def : t2InstSubst<"and${s}${p} $Rdn, $imm", (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, pred:$p, cc_out:$s)>; +// And ORR <--> ORN +def : t2InstSubst<"orn${s}${p} $Rd, $Rn, $imm", + (t2ORRri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orn${s}${p} $Rdn, $imm", + (t2ORRri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orr${s}${p} $Rd, $Rn, $imm", + (t2ORNri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"orr${s}${p} $Rdn, $imm", + (t2ORNri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm, + pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0f225156d4cac4b4d51b50a0bfd4bc2010a4a7b5..5d887c4fcbf24fb341614e2c3b8f421d5c4a3291 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1958,7 +1958,8 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1966,7 +1967,8 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -1976,7 +1978,8 @@ def VFMSH : AHbI<0b11101, 0b10, 1, 0, IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -2007,7 +2010,8 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -2015,7 +2019,8 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2025,7 +2030,8 @@ def VFNMAH : AHbI<0b11101, 0b01, 1, 0, IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, @@ -2056,14 +2062,16 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; + Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>, + Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } @@ -2073,7 +2081,8 @@ def VFNMSH : AHbI<0b11101, 0b01, 0, 0, IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", []>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFusedMAC]>; + Requires<[HasFullFP16,UseFusedMAC]>, + Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 8d224d6a70fa8ecb0a8c2940d4ad8c8e929eb8a6..2ae3bad4076b0bc726219385c86c7df4b0984a4d 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -11,10 +11,10 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// -#include "ARMInstructionSelector.h" #include "ARMRegisterBankInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -26,10 +26,68 @@ using namespace llvm; #error "You shouldn't build this" #endif -ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI, +namespace { + +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + +class ARMInstructionSelector : public InstructionSelector { +public: + ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI); + + bool select(MachineInstr &I) const override; + +private: + bool selectImpl(MachineInstr &I) const; + + const ARMBaseInstrInfo &TII; + const ARMBaseRegisterInfo &TRI; + const ARMBaseTargetMachine &TM; + const ARMRegisterBankInfo &RBI; + const ARMSubtarget &STI; + +#define GET_GLOBALISEL_PREDICATES_DECL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +// We declare the temporaries used by selectImpl() in the class to minimize the +// cost of constructing placeholder values. +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL +}; +} // end anonymous namespace + +namespace llvm { +InstructionSelector * +createARMInstructionSelector(const ARMBaseTargetMachine &TM, + const ARMSubtarget &STI, + const ARMRegisterBankInfo &RBI) { + return new ARMInstructionSelector(TM, STI, RBI); +} +} + +unsigned zero_reg = 0; + +#define GET_GLOBALISEL_IMPL +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + +ARMInstructionSelector::ARMInstructionSelector(const ARMBaseTargetMachine &TM, + const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} + TRI(*STI.getRegisterInfo()), TM(TM), RBI(RBI), STI(STI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "ARMGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT +{ +} static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, @@ -43,17 +101,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, assert(RegBank && "Can't get reg bank for virtual register"); const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); - (void)DstSize; - unsigned SrcReg = I.getOperand(1).getReg(); - const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); - (void)SrcSize; - assert((DstSize == SrcSize || - // Copies are a means to setup initial types, the number of - // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - DstSize <= SrcSize)) && - "Copy with different width?!"); - assert((RegBank->getID() == ARM::GPRRegBankID || RegBank->getID() == ARM::FPRRegBankID) && "Unsupported reg bank"); @@ -80,56 +127,30 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, return true; } -static bool selectFAdd(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select fp add without vfp"); - - LLT Ty = MRI.getType(MIB->getOperand(0).getReg()); - unsigned ValSize = Ty.getSizeInBits(); - - if (ValSize == 32) { - if (TII.getSubtarget().useNEONForSinglePrecisionFP()) - return false; - MIB->setDesc(TII.get(ARM::VADDS)); - } else { - assert(ValSize == 64 && "Unsupported size for floating point value"); - if (TII.getSubtarget().isFPOnlySP()) - return false; - MIB->setDesc(TII.get(ARM::VADDD)); - } - MIB.add(predOps(ARMCC::AL)); - - return true; -} - -static bool selectSequence(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP"); +static bool selectMergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select merge without VFP"); - // We only support G_SEQUENCE as a way to stick together two scalar GPRs + // We only support G_MERGE_VALUES as a way to stick together two scalar GPRs // into one DPR. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 64 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_SEQUENCE"); + "Unsupported operand for G_MERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; assert(MRI.getType(VReg1).getSizeInBits() == 32 && RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - unsigned VReg2 = MIB->getOperand(3).getReg(); + "Unsupported operand for G_MERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); (void)VReg2; assert(MRI.getType(VReg2).getSizeInBits() == 32 && RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_SEQUENCE"); - - // Remove the operands corresponding to the offsets. - MIB->RemoveOperand(4); - MIB->RemoveOperand(2); + "Unsupported operand for G_MERGE_VALUES"); MIB->setDesc(TII.get(ARM::VMOVDRR)); MIB.add(predOps(ARMCC::AL)); @@ -137,30 +158,32 @@ static bool selectSequence(MachineInstrBuilder &MIB, return true; } -static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) { - assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP"); +static bool selectUnmergeValues(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(TII.getSubtarget().hasVFP2() && "Can't select unmerge without VFP"); - // We only support G_EXTRACT as a way to break up one DPR into two GPRs. + // We only support G_UNMERGE_VALUES as a way to break up one DPR into two + // GPRs. unsigned VReg0 = MIB->getOperand(0).getReg(); (void)VReg0; assert(MRI.getType(VReg0).getSizeInBits() == 32 && RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Unsupported operand for G_EXTRACT"); + "Unsupported operand for G_UNMERGE_VALUES"); unsigned VReg1 = MIB->getOperand(1).getReg(); (void)VReg1; - assert(MRI.getType(VReg1).getSizeInBits() == 64 && - RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID && - "Unsupported operand for G_EXTRACT"); - assert(MIB->getOperand(2).getImm() % 32 == 0 && - "Unsupported operand for G_EXTRACT"); - - // Remove the operands corresponding to the offsets. - MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32); + assert(MRI.getType(VReg1).getSizeInBits() == 32 && + RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); + unsigned VReg2 = MIB->getOperand(2).getReg(); + (void)VReg2; + assert(MRI.getType(VReg2).getSizeInBits() == 64 && + RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::FPRRegBankID && + "Unsupported operand for G_UNMERGE_VALUES"); - MIB->setDesc(TII.get(ARM::VGETLNi32)); + MIB->setDesc(TII.get(ARM::VMOVRRD)); MIB.add(predOps(ARMCC::AL)); return true; @@ -235,6 +258,9 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { return true; } + if (selectImpl(I)) + return true; + MachineInstrBuilder MIB{MF, I}; bool isSExt = false; @@ -294,15 +320,33 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { } break; } - case G_ADD: + case G_ANYEXT: + case G_TRUNC: { + // The high bits are undefined, so there's nothing special to do, just + // treat it as a copy. + auto SrcReg = I.getOperand(1).getReg(); + auto DstReg = I.getOperand(0).getReg(); + + const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + if (SrcRegBank.getID() != DstRegBank.getID()) { + DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); + return false; + } + + if (SrcRegBank.getID() != ARM::GPRRegBankID) { + DEBUG(dbgs() << "G_TRUNC/G_ANYEXT on non-GPR not supported yet\n"); + return false; + } + + I.setDesc(TII.get(COPY)); + return selectCopy(I, TII, MRI, TRI, RBI); + } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); break; - case G_FADD: - if (!selectFAdd(MIB, TII, MRI)) - return false; - break; case G_FRAME_INDEX: // Add 0 to the given frame index and hope it will eventually be folded into // the user(s). @@ -318,6 +362,18 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { "Expected constant to live in a GPR"); I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + + auto &Val = I.getOperand(1); + if (Val.isCImm()) { + if (Val.getCImm()->getBitWidth() > 32) + return false; + Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); + } + + if (!Val.isImm()) { + return false; + } + break; } case G_STORE: @@ -349,13 +405,13 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { MIB.addImm(0).add(predOps(ARMCC::AL)); break; } - case G_SEQUENCE: { - if (!selectSequence(MIB, TII, MRI, TRI, RBI)) + case G_MERGE_VALUES: { + if (!selectMergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } - case G_EXTRACT: { - if (!selectExtract(MIB, TII, MRI, TRI, RBI)) + case G_UNMERGE_VALUES: { + if (!selectUnmergeValues(MIB, TII, MRI, TRI, RBI)) return false; break; } diff --git a/lib/Target/ARM/ARMInstructionSelector.h b/lib/Target/ARM/ARMInstructionSelector.h deleted file mode 100644 index 530141d92c2c6f99b69aaab69d93616f1dc1c974..0000000000000000000000000000000000000000 --- a/lib/Target/ARM/ARMInstructionSelector.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- ARMInstructionSelector -----------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This file declares the targeting of the InstructionSelector class for ARM. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H -#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H - -#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" - -namespace llvm { - -class ARMBaseInstrInfo; -class ARMBaseRegisterInfo; -class ARMRegisterBankInfo; -class ARMSubtarget; - -class ARMInstructionSelector : public InstructionSelector { -public: - ARMInstructionSelector(const ARMSubtarget &STI, - const ARMRegisterBankInfo &RBI); - - bool select(MachineInstr &I) const override; - -private: - const ARMBaseInstrInfo &TII; - const ARMBaseRegisterInfo &TRI; - const ARMRegisterBankInfo &RBI; -}; - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 994bbd673dd87145354cab57de0a65ade1fc59cc..a706079d986627300692884d351787bfd1862576 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -12,7 +12,11 @@ //===----------------------------------------------------------------------===// #include "ARMLegalizerInfo.h" +#include "ARMCallLowering.h" #include "ARMSubtarget.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" @@ -43,8 +47,33 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, 1, p0}, Legal); } - for (auto Ty : {s1, s8, s16, s32}) - setAction({G_ADD, Ty}, Legal); + for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { + for (auto Ty : {s1, s8, s16}) + setAction({Op, Ty}, WidenScalar); + setAction({Op, s32}, Legal); + } + + for (unsigned Op : {G_SDIV, G_UDIV}) { + for (auto Ty : {s8, s16}) + // FIXME: We need WidenScalar here, but in the case of targets with + // software division we'll also need Libcall afterwards. Treat as Custom + // until we have better support for chaining legalization actions. + setAction({Op, Ty}, Custom); + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Legal); + else + setAction({Op, s32}, Libcall); + } + + // FIXME: Support s8 and s16 as well + for (unsigned Op : {G_SREM, G_UREM}) + if (ST.hasDivideInARMMode()) + setAction({Op, s32}, Lower); + else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() || + ST.isTargetMuslAEABI()) + setAction({Op, s32}, Custom); + else + setAction({Op, s32}, Libcall); for (unsigned Op : {G_SEXT, G_ZEXT}) { setAction({Op, s32}, Legal); @@ -74,3 +103,81 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { computeTables(); } + +bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const { + using namespace TargetOpcode; + + switch (MI.getOpcode()) { + default: + return false; + case G_SDIV: + case G_UDIV: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8)) + return false; + + // We need to widen to 32 bits and then maybe, if the target requires, + // transform into a libcall. + LegalizerHelper Helper(MIRBuilder.getMF()); + + MachineInstr *NewMI = nullptr; + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + // Store the new, 32-bit div instruction. + if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV) + NewMI = MI; + }); + + auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32)); + Helper.MIRBuilder.stopRecordingInsertions(); + if (Result == LegalizerHelper::UnableToLegalize) { + return false; + } + assert(NewMI && "Couldn't find widened instruction"); + assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) && + "Unexpected widened instruction"); + assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 && + "Unexpected type for the widened instruction"); + + Result = Helper.legalizeInstrStep(*NewMI); + if (Result == LegalizerHelper::UnableToLegalize) { + return false; + } + return true; + } + case G_SREM: + case G_UREM: { + unsigned OriginalResult = MI.getOperand(0).getReg(); + auto Size = MRI.getType(OriginalResult).getSizeInBits(); + if (Size != 32) + return false; + + auto Libcall = + MI.getOpcode() == G_SREM ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; + + // Our divmod libcalls return a struct containing the quotient and the + // remainder. We need to create a virtual register for it. + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + Type *ArgTy = Type::getInt32Ty(Ctx); + StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); + auto RetVal = MRI.createGenericVirtualRegister( + getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); + + auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); + if (Status != LegalizerHelper::Legalized) + return false; + + // The remainder is the second result of divmod. Split the return value into + // a new, unused register for the quotient and the destination of the + // original instruction for the remainder. + MIRBuilder.buildUnmerge( + {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, + RetVal); + + return LegalizerHelper::Legalized; + } + } +} diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h index 0b8a608a6bdea077b811bb9c5d13d747bcc026cc..a9bdd367737e50d3c7ff90e3c5c2b730e7cb9644 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.h +++ b/lib/Target/ARM/ARMLegalizerInfo.h @@ -24,6 +24,9 @@ class ARMSubtarget; class ARMLegalizerInfo : public LegalizerInfo { public: ARMLegalizerInfo(const ARMSubtarget &ST); + + bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const override; }; } // End llvm namespace. #endif diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 72fcf7cd6a4fdd967c46d726065f5aa785670592..7a452d4a209521b4e6f302ccb379b46699b7ec9c 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -33,7 +34,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 0fd98268723ab49c4823b5148ebd3cb35c4ea1d8..13acea3c28a9ef56664132b9628831e51e44ba46 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -25,9 +25,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/Constants.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/ErrorHandling.h" @@ -211,11 +211,9 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) .addImm(ARMCC::AL).addReg(0)); MCInst Noop; - Subtarget->getInstrInfo()->getNoopForElfTarget(Noop); + Subtarget->getInstrInfo()->getNoop(Noop); for (int8_t I = 0; I < NoopsInSledCount; I++) - { OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); - } OutStreamer->EmitLabel(Target); recordSled(CurSled, MI, Kind); diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 581d5fe159fd3a748fcbec8885029a37d45f6cdd..7e4d598a6e0beb80a2ee400db92be077ee838665 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -88,13 +88,15 @@ bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) { } } } + bool Changed = false; // Remove the tagged DMB for (auto MI : ToRemove) { MI->eraseFromParent(); ++NumDMBsRemoved; + Changed = true; } - return NumDMBsRemoved > 0; + return Changed; } /// createARMOptimizeBarriersPass - Returns an instance of the remove double diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 08f3da7388684cdebe20e1d285817719d432bad7..f59b075e6dd9a6fdd6fe1990534e4aa7d832ba89 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -196,14 +196,14 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass( llvm_unreachable("Switch should handle all register classes"); } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { auto Opc = MI.getOpcode(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc)) { - InstructionMapping Mapping = getInstrMappingImpl(MI); + const InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } @@ -219,8 +219,17 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { switch (Opc) { case G_ADD: + case G_SUB: + case G_MUL: + case G_AND: + case G_OR: + case G_XOR: + case G_SDIV: + case G_UDIV: case G_SEXT: case G_ZEXT: + case G_ANYEXT: + case G_TRUNC: case G_GEP: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. @@ -246,34 +255,36 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; - case G_SEQUENCE: { - // We only support G_SEQUENCE for creating a double precision floating point - // value out of two GPRs. + case G_MERGE_VALUES: { + // We only support G_MERGE_VALUES for creating a double precision floating + // point value out of two GPRs. LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); - LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || Ty2.getSizeInBits() != 32) - return InstructionMapping{}; + return getInvalidInstructionMapping(); OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, - &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); break; } - case G_EXTRACT: { - // We only support G_EXTRACT for splitting a double precision floating point - // value into two GPRs. + case G_UNMERGE_VALUES: { + // We only support G_UNMERGE_VALUES for splitting a double precision + // floating point value into two GPRs. LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); - if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 || - MI.getOperand(2).getImm() % 32 != 0) - return InstructionMapping{}; - OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], - &ARM::ValueMappings[ARM::DPR3OpsIdx], - nullptr, nullptr}); + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || + Ty2.getSizeInBits() != 64) + return getInvalidInstructionMapping(); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); break; } default: - return InstructionMapping{}; + return getInvalidInstructionMapping(); } #ifndef NDEBUG @@ -287,6 +298,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } #endif - return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping, - NumOperands}; + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping, + NumOperands); } diff --git a/lib/Target/ARM/ARMRegisterBankInfo.h b/lib/Target/ARM/ARMRegisterBankInfo.h index 5222c1e6389f054b1d10dbfd4d1dcf59f805b05a..9650b358f3194bb66f6407000770cce7027d7832 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.h +++ b/lib/Target/ARM/ARMRegisterBankInfo.h @@ -36,7 +36,8 @@ public: const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; }; } // End llvm namespace. #endif diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 87eb4c2b9074d308fba1747d993ebb1ce2704152..1c7902520f2d4cc3233ccb733b86ab0d7790aa9e 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -131,11 +131,25 @@ def WriteFPDIV64 : SchedWrite; def WriteFPSQRT32 : SchedWrite; def WriteFPSQRT64 : SchedWrite; +// Vector load and stores +def WriteVLD1 : SchedWrite; +def WriteVLD2 : SchedWrite; +def WriteVLD3 : SchedWrite; +def WriteVLD4 : SchedWrite; +def WriteVST1 : SchedWrite; +def WriteVST2 : SchedWrite; +def WriteVST3 : SchedWrite; +def WriteVST4 : SchedWrite; + + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = static_cast(SchedModel->getInstrInfo()); (void)TII; + const ARMSubtarget *STI = + static_cast(SchedModel->getSubtargetInfo()); + (void)STI; }]>; def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>; @@ -409,3 +423,4 @@ include "ARMScheduleA8.td" include "ARMScheduleA9.td" include "ARMScheduleSwift.td" include "ARMScheduleR52.td" +include "ARMScheduleA57.td" diff --git a/lib/Target/ARM/ARMScheduleA57.td b/lib/Target/ARM/ARMScheduleA57.td new file mode 100644 index 0000000000000000000000000000000000000000..525079d12d516fac9474c2b82f5e5b423ca0a3b5 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA57.td @@ -0,0 +1,1471 @@ +//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// *** Common description and scheduling model parameters taken from AArch64 *** +// The Cortex-A57 is a traditional superscalar microprocessor with a +// conservative 3-wide in-order stage for decode and dispatch. Combined with the +// much wider out-of-order issue stage, this produced a need to carefully +// schedule micro-ops so that all three decoded each cycle are successfully +// issued as the reservation station(s) simply don't stay occupied for long. +// Therefore, IssueWidth is set to the narrower of the two at three, while still +// modeling the machine as out-of-order. + +def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>; +def IsCPSRDefinedAndPredicatedPred : + SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>; + +// Cortex A57 rev. r1p0 or later (false = r0px) +def IsR1P0AndLaterPred : SchedPredicate<[{false}]>; + +// If Addrmode3 contains register offset (not immediate) +def IsLdrAm3RegOffPred : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3RegOffPredX2 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>; +def IsLdrAm3RegOffPredX3 : + SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>; + +// If Addrmode3 contains "minus register" +def IsLdrAm3NegRegOffPred : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>; +// The same predicate with operand offset 2 and 3: +def IsLdrAm3NegRegOffPredX2 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>; +def IsLdrAm3NegRegOffPredX3 : + SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>; + +// Load, scaled register offset, not plus LSL2 +def IsLdstsoScaledNotOptimalPredX0 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>; +def IsLdstsoScaledNotOptimalPred : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>; +def IsLdstsoScaledNotOptimalPredX2 : + SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdstsoScaledPred : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>; +def IsLdstsoScaledPredX2 : + SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>; + +def IsLdstsoMinusRegPredX0 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>; +def IsLdstsoMinusRegPred : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>; +def IsLdstsoMinusRegPredX2 : + SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>; + +// Load, scaled register offset +def IsLdrAm2ScaledPred : + SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>; + +// LDM, base reg in list +def IsLdmBaseRegInList : + SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>; + +class A57WriteLMOpsListType writes> { + list Writes = writes; + SchedMachineModel SchedModel = ?; +} + +// *** Common description and scheduling model parameters taken from AArch64 *** +// (AArch64SchedA57.td) +def CortexA57Model : SchedMachineModel { + let IssueWidth = 3; // 3-way decode and dispatch + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch + + // Enable partial & runtime unrolling. + let LoopMicroOpBufferSize = 16; + let CompleteModel = 1; +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1>; // Type B micro-ops +def A57UnitI : ProcResource<2>; // Type I micro-ops +def A57UnitM : ProcResource<1>; // Type M micro-ops +def A57UnitL : ProcResource<1>; // Type L micro-ops +def A57UnitS : ProcResource<1>; // Type S micro-ops + +def A57UnitX : ProcResource<1>; // Type X micro-ops (F1) +def A57UnitW : ProcResource<1>; // Type W micro-ops (F0) + +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "ARMScheduleA57WriteRes.td" + +// To have "CompleteModel = 1", support of pseudos and special instructions +def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", + "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", + "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", + "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", + "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", + "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG", + "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>; + +def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; + +// Specific memory instrs +def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", + "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>; + +// coprocessor moves +def : InstRW<[WriteNoop, WriteNoop], (instregex + "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$", + "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$", + "(t2)?MSR(banked|i|_AR|_M)?$")>; + +// Deprecated instructions +def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; + +// Pseudos +def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", + "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", + "tLDRpci_pic", "t2SUBS_PC_LR", + "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", + "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", + "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", + "WIN__CHKSTK", "WIN__DBZCHK")>; + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; + +// --- 3.2 Branch Instructions --- +// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ + +def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$", + "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>; +def : InstRW<[A57Write_1cyc_1B_1I], + (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>; +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>; +// Pseudos +def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>; +def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr", + "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>; +def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>; + +// --- 3.3 Arithmetic and Logical Instructions --- +// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S}, +// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST + +def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; + +// shift by register, conditional or unconditional +// TODO: according to the doc, conditional uses I0/I1, unconditional uses M +// Why more complex instruction uses more simple pipeline? +// May be an error in doc. +def A57WriteALUsi : SchedWriteVariant<[ + // lsl #2, lsl #1, or lsr #1. + SchedVar, + SchedVar +]>; +def A57WriteALUsr : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteALUSsr : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57ReadALUsr : SchedReadVariant<[ + SchedVar, + SchedVar +]>; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def A57WriteCMPsr : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +// --- 3.4 Move and Shift Instructions --- +// Move, basic +// MOV{S}, MOVW, MVN{S} +def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)", + "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL", + "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>; + +// Move, shift by immed, setflags/no setflags +// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN +// setflags = isCPSRDefined +def A57WriteMOVsi : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi", + "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi", + "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>; + +// shift by register, conditional or unconditional, setflags/no setflags +def A57WriteMOVsr : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs", + "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr", + "(t2|t)RORrr")>; + +// Move, top +// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later +def A57WriteMOVT : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>; + +def A57WriteI2pc : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>; +def A57WriteI2ld : + WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>; +def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>; +def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; + +// +2cyc for branch forms +def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>; + +// --- 3.5 Divide and Multiply Instructions --- +// Divide: SDIV, UDIV +// latency from documentration: 4 ­‐ 20, maximum taken +def : SchedAlias; +// Multiply: tMul not bound to common WriteRes types +def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>; +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; + +// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, +// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R} +// Multiply-accumulate pipelines support late-forwarding of accumulate operands +// from similar μops, allowing a typical sequence of multiply-accumulate μops +// to issue one every 1 cycle (sched advance = 2). +def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } +def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; + +// Multiply long: SMULL, UMULL +def : SchedAlias; +def : SchedAlias; + +// --- 3.6 Saturating and Parallel Arithmetic Instructions --- +// Parallel arith +// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8 +// Conditional GE-setting instructions require three extra μops +// and two additional cycles to conditionally update the GE field. +def A57WriteParArith : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW< [A57WriteParArith], (instregex + "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)", + "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>; + +// Parallel arith with exchange: SASX, SSAX, UASX, USAX +def A57WriteParArithExch : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteParArithExch], + (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>; + +// Parallel halving arith +// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex + "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)", + "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>; + +// Parallel halving arith with exchange +// SHASX, SHSAX, UHASX, UHSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX", + "(t2)?UHASX", "(t2)?UHSAX")>; + +// Parallel saturating arith +// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8 +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)", + "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>; + +// Parallel saturating arith with exchange +// QASX, QSAX, UQASX, UQSAX +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX", + "(t2)?UQASX", "(t2)?UQSAX")>; + +// Saturate: SSAT, SSAT16, USAT, USAT16 +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>; + +// Saturating arith: QADD, QSUB +def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>; + +// Saturating doubling arith: QDADD, QDSUB +def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>; + +// --- 3.7 Miscellaneous Data-Processing Instructions --- +// Bit field extract: SBFX, UBFX +def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>; + +// Bit field insert/clear: BFI, BFC +def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>; + +// Select bytes, conditional/unconditional +def A57WriteSEL : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>; + +// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH +def : InstRW<[A57Write_1cyc_1I], + (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>; + +// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH +def : InstRW<[A57Write_2cyc_1M], + (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>; + +// Sign/zero extend and add, parallel: SXTAB16, UXTAB16 +def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>; + +// Sum of absolute differences: USAD8, USADA8 +def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>; + +// --- 3.8 Load Instructions --- + +// Load, immed offset +// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate +def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12", + "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)", + "PICLDR", "tLDR")>; + +def : InstRW<[A57Write_4cyc_1L], + (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>; + +// For "Load, register offset, minus" we need +1cyc, +1I +def A57WriteLdrAm3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>; +def A57WriteLdrAm3X2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>; +def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>; + +def A57WriteLdrAmLDSTSO : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>; + +def A57WrBackOne : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; +} +def A57WrBackTwo : SchedWriteRes<[]> { + let Latency = 2; + let NumMicroOps = 0; +} +def A57WrBackThree : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} + +// --- LDR pre-indexed --- +// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update) +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM", + "LDRB_PRE_IMM", "t2LDRB_PRE")>; + +// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update) +// (5 cyc load result for not-lsl2 scaled) +def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo], + (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>; + +def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack], + (instregex "LDR(H|SH|SB)_PRE")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_PRE")>; + +// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm. +def A57WriteLdrDAm3Pre : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack], + (instregex "LDRD_PRE")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_PRE")>; + +// --- LDR post-indexed --- +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM", + "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>; + +def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack], + (instregex "LDR(H|SH|SB)_POST")>; +def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], + (instregex "t2LDR(H|SH|SB)?_POST")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG", + "LDRB_POST_REG", "LDR(B?)T_POST$")>; + +def A57WriteLdrTRegPost : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L" +def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack], + (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>; + +def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>; + +def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm. +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>; +def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], + (instregex "t2LDRD_POST")>; + +// --- Preload instructions --- +// Preload, immed offset +def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12", + "t2PLDW?(i8|pci|s)", "(t2)?PLI")>; + +// Preload, register offset, +// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2 +// otherwise 4cyc "L" +def A57WritePLD : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>; + +// --- Load multiple instructions --- +foreach NumAddr = 1-8 in { + def A57LMAddrPred#NumAddr : + SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>; +} + +def A57LDMOpsListNoregin : A57WriteLMOpsListType< + [A57Write_3cyc_1L, A57Write_3cyc_1L, + A57Write_4cyc_1L, A57Write_4cyc_1L, + A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L]>; +def A57WriteLDMnoreginlist : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57LDMOpsListRegin : A57WriteLMOpsListType< + [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>; +def A57WriteLDMreginlist : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57LDMOpsList_Upd : A57WriteLMOpsListType< + [A57WrBackOne, + A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I, + A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, + A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>; +def A57WriteLDM_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57WriteLDM : SchedWriteVariant<[ + SchedVar, + SchedVar +]> { let Variadic=1; } + +def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>; + +// TODO: no writeback latency defined in documentation (implemented as 1 cyc) +def : InstRW<[A57WriteLDM_Upd], + (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; + +// --- 3.9 Store Instructions --- + +// Store, immed offset +def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", + "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; + +// Store, register offset +// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S", +// otherwise 1cyc S. +def A57WriteStrAmLDSTSO : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>; + +// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg. +def A57WriteStrAm3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>; +def A57WriteStrAm3X2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>; + +// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback) +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM", + "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)", + "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>; + +// Store, register pre-indexed: +// 1(1) "S, I0/I1" for plus reg +// 3(2) "I0/I1, S" for minus reg +// 1(2) "S, M" for scaled plus lsl2 +// 3(2) "I0/I1, S" for other scaled +def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre], + (instregex "STR_PRE_REG", "STRB_PRE_REG")>; + +// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE) +// 1(1) "S, I0/I1" for imm or reg plus +// 3(2) "I0/I1, S" for reg minus +def A57WriteStrAm3PreX2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2], + (instregex "STRH_PRE")>; + +def A57WriteStrAm3PreX3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3], + (instregex "STRD_PRE")>; + +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM", + "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>; + +// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not) +def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG", + "STRB(T?)_POST_REG", "STR(B?)T_POST$")>; + +// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr +// 1(1) "S, I0/I1" both for reg or imm +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>; + +// --- Store multiple instructions --- +// TODO: no writeback latency defined in documentation +def A57WriteSTM : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteSTM_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; + +def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; +def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], + (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; + +// --- 3.10 FP Data Processing Instructions --- +def : SchedAlias; +def : SchedAlias; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; + +// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional +def A57WriteVcmp : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; +def : InstRW<[A57WriteVcmp], + (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; + +// fp convert +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; + +def : SchedAlias; + +// FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; + +// FP divide, FP square root +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +// FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; + +// FP multiply-accumulate pipelines support late forwarding of the result +// from FP multiply μops to the accumulate operands of an +// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle +// after the FP multiply μop has been issued +// FP multiply, FZ +def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; } + +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; + +// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate +// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS +def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } + +// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.) +// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.) +// Currently, there is no way to define different read advances for VFMA operand +// from VFMA or from VMUL, so there will be 5 read advance. +// Zero latency (instead of one) for VMUL->VFMA shouldn't break something. +// The same situation with ASIMD VMUL/VFMA instructions +// def A57ReadVFMA : SchedRead; +// def : ReadAdvance; +// def : ReadAdvance; +def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>; + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; + +// --- 3.11 FP Miscellaneous Instructions --- +// VMOV: 3cyc "F0/F1" for imm/reg +def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; + +// 5cyc L for FP transfer, vfp to core reg, +// 5cyc L for FP transfer, core reg to vfp +def : SchedAlias; +// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). +def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>; + +// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg +def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>; + +// --- 3.12 FP Load Instructions --- +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>; + +// FP load multiple (VLDM) + +def A57VLDMOpsListUncond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_5cyc_1L, + A57Write_6cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_7cyc_1L, + A57Write_8cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_9cyc_1L, + A57Write_10cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_11cyc_1L, + A57Write_12cyc_1L, A57Write_12cyc_1L]>; +def A57WriteVLDMuncond : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57VLDMOpsListCond : A57WriteLMOpsListType< + [A57Write_5cyc_1L, A57Write_6cyc_1L, + A57Write_7cyc_1L, A57Write_8cyc_1L, + A57Write_9cyc_1L, A57Write_10cyc_1L, + A57Write_11cyc_1L, A57Write_12cyc_1L, + A57Write_13cyc_1L, A57Write_14cyc_1L, + A57Write_15cyc_1L, A57Write_16cyc_1L, + A57Write_17cyc_1L, A57Write_18cyc_1L, + A57Write_19cyc_1L, A57Write_20cyc_1L]>; +def A57WriteVLDMcond : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57WriteVLDM : SchedWriteVariant<[ + SchedVar, + SchedVar +]> { let Variadic=1; } + +def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>; + +def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, + A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, + A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, + A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I, + A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>; +def A57WriteVLDMuncond_UPD : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType< + [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I, + A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I, + A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I, + A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I, + A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I, + A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I, + A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I, + A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>; +def A57WriteVLDMcond_UPD : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]> { let Variadic=1; } + +def A57WriteVLDM_UPD : SchedWriteVariant<[ + SchedVar, + SchedVar +]> { let Variadic=1; } + +def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD], + (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>; + +// --- 3.13 FP Store Instructions --- +def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>; + +def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>; + +def A57WriteVSTMs : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteVSTMd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteVSTMs_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; +def A57WriteVSTMd_Upd : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar +]>; + +def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>; +def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd], + (instregex "VSTM(SIA_UPD|SDB_UPD)")>; +def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd], + (instregex "VSTM(DIA_UPD|DDB_UPD)")>; + +// --- 3.14 ASIMD Integer Instructions --- + +// ASIMD absolute diff, 3cyc F0/F1 for integer VABD +def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>; + +// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form +def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>; +def : InstRW<[A57WriteVABAD, A57ReadVABAD], + (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>; +def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>; +def : InstRW<[A57WriteVABAQ, A57ReadVABAQ], + (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>; + +// ASIMD absolute diff accum long: 4(1) F1 for VABAL +def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>; +def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; + +// ASIMD absolute diff long: 3cyc F0/F1 for VABDL +def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; + +// ASIMD arith, basic +def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW", + "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", + "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>; + +// ASIMD arith, complex +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", + "VQABS", "VQADD", "VQNEG", "VQSUB", + "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>; + +// ASIMD compare +def : InstRW<[A57Write_3cyc_1V], + (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>; + +// ASIMD logical +def : InstRW<[A57Write_3cyc_1V], + (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>; + +// ASIMD max/min +def : InstRW<[A57Write_3cyc_1V], + (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>; + +// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply +// and multiply-with-accumulate instructions relative to r0pX. +def A57WriteVMULD_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A57WriteVMULD_VecInt], (instregex + "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)", + "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>; + +// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later +def A57WriteVMULQ_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A57WriteVMULQ_VecInt], (instregex + "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)", + "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>; + +// ASIMD multiply accumulate, D-form +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAD_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVMLAD_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt], + (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>; + +// ASIMD multiply accumulate, Q-form +// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAQ_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVMLAQ_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt], + (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>; + +// ASIMD multiply accumulate long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence +// (4 or 3 ReadAdvance) +def A57WriteVMLAL_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVMLAL_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt], + (instregex "VMLAL(s|u)", "VMLSL(s|u)")>; + +// ASIMD multiply accumulate saturating long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence +// (3 or 2 ReadAdvance) +def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def A57ReadVQDMLAL_VecInt : SchedReadVariant<[ + SchedVar]>, + SchedVar]> +]>; +def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], + (instregex "VQDMLAL", "VQDMLSL")>; + +// ASIMD multiply long +// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later +def A57WriteVMULL_VecInt : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[A57WriteVMULL_VecInt], + (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>; + +// ASIMD pairwise add and accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>; +def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>; + +// ASIMD shift accumulate +// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) +def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>; +def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>; + +// ASIMD shift by immed, basic +def : InstRW<[A57Write_3cyc_1X], + (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>; + +// ASIMD shift by immed, complex +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)", + "VRSHRN")>; + +// ASIMD shift by immed and insert, basic, D-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by immed and insert, basic, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, basic, D-form +def : InstRW<[A57Write_3cyc_1X], (instregex + "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, basic, Q-form +def : InstRW<[A57Write_4cyc_1X], (instregex + "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// ASIMD shift by register, complex, D-form +// VQRSHL, VQSHL, VRSHL +def : InstRW<[A57Write_4cyc_1X], (instregex + "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", + "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; + +// ASIMD shift by register, complex, Q-form +def : InstRW<[A57Write_5cyc_1X], (instregex + "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", + "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; + +// --- 3.15 ASIMD Floating-Point Instructions --- +// ASIMD FP absolute value +def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>; + +// ASIMD FP arith +def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", + "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; + +// ASIMD FP compare +def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", + "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; + +// ASIMD FP convert, integer +def : InstRW<[A57Write_5cyc_1V], (instregex + "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)", + "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)", + "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>; + +// ASIMD FP convert, half-precision: 8cyc F0/F1 +def : InstRW<[A57Write_8cyc_1V], (instregex + "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)", + "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", + "VCVT(f2h|h2f)")>; + +// ASIMD FP max/min +def : InstRW<[A57Write_5cyc_1V], (instregex + "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>; + +// ASIMD FP multiply +def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; + +// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence +def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57ReadVMLA_VecFP : + SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>; +def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP], + (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>; + +// ASIMD FP negate +def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>; + +// ASIMD FP round to integral +def : InstRW<[A57Write_5cyc_1V], (instregex + "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>; + +// --- 3.16 ASIMD Miscellaneous Instructions --- + +// ASIMD bitwise insert +def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>; + +// ASIMD count +def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; + +// ASIMD duplicate, core reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>; + +// ASIMD duplicate, scalar: 3cyc "F0/F1" +def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>; + +// ASIMD extract +def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; + +// ASIMD move, immed +def : InstRW<[A57Write_3cyc_1V], (instregex + "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", + "VMOVQ0")>; + +// ASIMD move, narrowing +def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; + +// ASIMD move, saturating +def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>; + +// ASIMD reciprocal estimate +def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>; + +// ASIMD reciprocal step, FZ +def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>; + +// ASIMD reverse, swap, table lookup (1-2 reg) +def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>; + +// ASIMD table lookup (3-4 reg) +def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>; + +// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1" +def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>; + +// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>; + +// ASIMD transpose +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>; + +// ASIMD unzip/zip, D-form +def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], + (instregex "VUZPd", "VZIPd")>; + +// ASIMD unzip/zip, Q-form +def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V], + (instregex "VUZPq", "VZIPq")>; + +// --- 3.17 ASIMD Load Instructions --- + +// Overriden via InstRW for this processor. +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>; +def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)wb")>; + +// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency +def : InstRW<[A57Write_6cyc_1L], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>; + +def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne], + (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>; + +// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex + "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V], + (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; + +// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2b(8|16|32)wb")>; + +// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", + "VLD2LN(d|q)(8|16|32)Pseudo$")>; +// 2 results + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne], + (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; +// 1 result + wb result +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb", + "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1" +// 3 results +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)$")>; +// 1 result +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; +// 3 results + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)_UPD$")>; +// 1 result + wb +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3LN(d|q)32$", + "VLD3LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], + (instregex "VLD3LN(d|q)(8|16)$", + "VLD3LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], + (instregex "VLD3DUP(d|q)(8|16|32)$", + "VLD3DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)$")>; +def : InstRW<[A57Write_9cyc_1L_1V], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4LN(d|q)32$", + "VLD4LN(d|q)32Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)32_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)32Pseudo_UPD")>; + +// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1" +def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, + A57Write_9cyc_1L_1V], + (instregex "VLD4LN(d|q)(8|16)$", + "VLD4LN(d|q)(8|16)Pseudo$")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)_UPD")>; +def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>; + +// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1" +def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, + A57Write_8cyc_1L_1V], + (instregex "VLD4DUP(d|q)(8|16|32)$", + "VLD4DUP(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, + A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>; +def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], + (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.18 ASIMD Store Instructions --- + +// ASIMD store, 1 element, multiple, 1 reg: 1cyc S +def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], + (instregex "VST1d(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 2 reg: 2cyc S +def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>; +def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I], + (instregex "VST1q(8|16|32|64)wb")>; +// ASIMD store, 1 element, multiple, 3 reg: 3cyc S +def : InstRW<[A57Write_3cyc_1S], + (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I], + (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; +// ASIMD store, 1 element, multiple, 4 reg: 4cyc S +def : InstRW<[A57Write_4cyc_1S], + (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I], + (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; +// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; +// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2(d|b)(8|16|32)$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2(b|d)(8|16|32)wb")>; +// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S" +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; +// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S" +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST2LN(d|q)(8|16|32)_UPD", + "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 3 element, multiple, 3 reg +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3(d|q)(8|16|32)_UPD", + "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 3 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST3LN(d|q)(8|16|32)_UPD", + "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; +// ASIMD store, 4 element, multiple, 4 reg +def : InstRW<[A57Write_4cyc_1S_1V], + (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; +def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], + (instregex "VST4(d|q)(8|16|32)_UPD", + "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; +// ASIMD store, 4 element, one lane +def : InstRW<[A57Write_3cyc_1S_1V], + (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; +def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], + (instregex "VST4LN(d|q)(8|16|32)_UPD", + "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; + +// --- 3.19 Cryptography Extensions --- +// Crypto AES ops +// AESD, AESE, AESIMC, AESMC: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; +// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>; +// Crypto SHA1 xor ops: 6cyc F0/F1 +def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; +// Crypto SHA1 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; +// Crypto SHA1 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; +// Crypto SHA256 fast ops: 3cyc F0 +def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; +// Crypto SHA256 slow ops: 6cyc F0 +def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; + +// --- 3.20 CRC --- +def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>; + +// ----------------------------------------------------------------------------- +// Common definitions +def : WriteRes { let Latency = 0; let NumMicroOps = 0; } +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; + +} // SchedModel = CortexA57Model + diff --git a/lib/Target/ARM/ARMScheduleA57WriteRes.td b/lib/Target/ARM/ARMScheduleA57WriteRes.td new file mode 100644 index 0000000000000000000000000000000000000000..670717dc7c13886c3378528e793c7853c5612f7b --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA57WriteRes.td @@ -0,0 +1,323 @@ +//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued as follows: one to I pipe, six to S pipes and +// four to V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17; + let ResourceCycles = [17]; } +def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; + let ResourceCycles = [18]; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; + let ResourceCycles = [19]; } +def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20; + let ResourceCycles = [20]; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; } +def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; } +def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; + let ResourceCycles = [32]; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; + let ResourceCycles = [35]; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } + +// A57Write_3cyc_1L - A57Write_20cyc_1L +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L : SchedWriteRes<[A57UnitL]> { + let Latency = Lat; + } +} + +// A57Write_4cyc_1S - A57Write_16cyc_1S +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S : SchedWriteRes<[A57UnitS]> { + let Latency = Lat; + } +} + +def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; } +def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; } +def A57Write_6cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 6; } +def A57Write_6cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 6; } +def A57Write_8cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 8; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 64; + let NumMicroOps = 2; + let ResourceCycles = [32, 32]; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1I : SchedWriteRes<[A57UnitS, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_1cyc_1S_1M : SchedWriteRes<[A57UnitS, + A57UnitM]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_3cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_6cyc_1B_1L : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_2cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 36; + let NumMicroOps = 2; + let ResourceCycles = [18, 18]; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 4; + let NumMicroOps = 2; +} + +// A57Write_3cyc_1L_1I - A57Write_20cyc_1L_1I +foreach Lat = 3-20 in { + def A57Write_#Lat#cyc_1L_1I : SchedWriteRes<[A57UnitL, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} + +// A57Write_4cyc_1S_1I - A57Write_16cyc_1S_1I +foreach Lat = 4-16 in { + def A57Write_#Lat#cyc_1S_1I : SchedWriteRes<[A57UnitS, A57UnitI]> { + let Latency = Lat; let NumMicroOps = 2; + } +} + +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_4cyc_1S_1V_1I : SchedWriteRes<[A57UnitS, + A57UnitV, + A57UnitI]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_4cyc_1I_1L_1M : SchedWriteRes<[A57UnitI, A57UnitL, A57UnitM]> { + let Latency = 4; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_1L_1V_1I : SchedWriteRes<[A57UnitL, + A57UnitV, + A57UnitI]> { + let Latency = 9; + let NumMicroOps = 3; +} diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 8fb8a2a3b6d2df6ad971b7a21a76ff3fbf4a2b83..4e72b13d94cbcf6118566f71bd70d329b5078689 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1981,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + // Reserve A9UnitFP for 2 consecutive cycles. def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td index 537e5da9669f39f873be0c562eeb06dc4e143b58..782be9b60a7ae4d607fe1a51e5fe7481698a0b11 100644 --- a/lib/Target/ARM/ARMScheduleR52.td +++ b/lib/Target/ARM/ARMScheduleR52.td @@ -120,6 +120,12 @@ def : WriteRes { def : WriteRes { let Latency = 7; } def : WriteRes { let Latency = 17; } +// Overriden via InstRW for this processor. +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + def : ReadAdvance; // mul operand read in F1 def : ReadAdvance; // fp-mac operand read in F1 @@ -712,20 +718,20 @@ def R52WriteSTM : SchedWriteVariant<[ // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with // another instruction in slot-1, but only in the last issue. -def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;} -def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 5;} +def : WriteRes { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2]; let SingleIssue = 1; } -def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3]; let SingleIssue = 1; } -def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 8; let NumMicroOps = 7; let ResourceCycles = [4]; @@ -828,95 +834,6 @@ def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>; def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; -//--- -// VLDx. Vector Loads -//--- -// 1-element structure load -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>; - -// 2-element structure load -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>; - -// 3-element structure load -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - -// 4-element structure load -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - //--- // VSTx. Vector Stores //--- diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index dc041c6c6006bd3dd82f0c5c467bcee987468c4f..b838688c6f04e638917a8c24a85e527a1f7bd6c8 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -1070,6 +1070,16 @@ let SchedModel = SwiftModel in { def : ReadAdvance; def : ReadAdvance; + // Overriden via InstRW for this processor. + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + // Not specified. def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; // Preload. diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b8a708a20a9556343956de5dbfe51ab1ba84d0f2..d9d0c27c6304de834252d4840249988ba1b49e17 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -28,10 +28,10 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Target/TargetOptions.h" #include #include diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 40993fc0aa8acdea1ca8d93c4eb7a0b587dbcfd0..af682dd8321cfa8de8514712bda2cb3a735778f6 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -208,8 +208,8 @@ protected: /// FP registers for VFPv3. bool HasD16 = false; - /// HasHardwareDivide - True if subtarget supports [su]div - bool HasHardwareDivide = false; + /// HasHardwareDivide - True if subtarget supports [su]div in Thumb mode + bool HasHardwareDivideInThumb = false; /// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode bool HasHardwareDivideInARM = false; @@ -234,6 +234,10 @@ protected: /// CPSR setting instruction. bool AvoidCPSRPartialUpdate = false; + /// CheapPredicableCPSRDef - If true, disable +1 predication cost + /// for instructions updating CPSR. Enabled for Cortex-A57. + bool CheapPredicableCPSRDef = false; + /// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting /// movs with shifter operand (i.e. asr, lsl, lsr). bool AvoidMOVsShifterOperand = false; @@ -507,7 +511,7 @@ public: return hasNEON() && UseNEONForSinglePrecisionFP; } - bool hasDivide() const { return HasHardwareDivide; } + bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasDataBarrier() const { return HasDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } @@ -543,6 +547,7 @@ public: bool nonpipelinedVFP() const { return NonpipelinedVFP; } bool prefers32BitThumb() const { return Pref32BitThumb; } bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } + bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; } bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; } bool hasRetAddrStack() const { return HasRetAddrStack; } bool hasMPExtension() const { return HasMPExtension; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b8dadb331ecf6033e4436036d796f155452ae3be..c0506cfda61299b4c743adaaa0a58bd60a2a204e 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -12,9 +12,10 @@ #include "ARM.h" #include "ARMCallLowering.h" -#include "ARMInstructionSelector.h" #include "ARMLegalizerInfo.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL #include "ARMRegisterBankInfo.h" +#endif #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -36,6 +37,7 @@ #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" @@ -84,9 +86,9 @@ namespace llvm { extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine X(getTheARMLETarget()); + RegisterTargetMachine A(getTheThumbLETarget()); RegisterTargetMachine Y(getTheARMBETarget()); - RegisterTargetMachine A(getTheThumbLETarget()); - RegisterTargetMachine B(getTheThumbBETarget()); + RegisterTargetMachine B(getTheThumbBETarget()); PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeGlobalISel(Registry); @@ -262,6 +264,11 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, else this->Options.EABIVersion = EABI::EABI5; } + + initAsmInfo(); + if (!Subtarget.isThumb() && !Subtarget.hasARMOps()) + report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " + "support ARM mode execution!"); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; @@ -339,7 +346,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // FIXME: At this point, we can't rely on Subtarget having RBI. // It's awkward to mix passing RBI and the Subtarget; should we pass // TII/TRI as well? - GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI)); + GISel->InstSelector.reset(createARMInstructionSelector(*this, *I, *RBI)); GISel->RegBankInfo.reset(RBI); #endif @@ -354,22 +361,6 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { }); } -void ARMTargetMachine::anchor() {} - -ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, CodeGenOpt::Level OL, - bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); - if (!Subtarget.hasARMOps()) - report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " - "support ARM mode execution!"); -} - -void ARMLETargetMachine::anchor() {} ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -377,9 +368,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ARMBETargetMachine::anchor() {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -387,52 +376,34 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void ThumbTargetMachine::anchor() {} - -ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); -} - -void ThumbLETargetMachine::anchor() {} - -ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ThumbBETargetMachine::anchor() {} - -ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { /// ARM Code Generator Pass Configuration Options. class ARMPassConfig : public TargetPassConfig { public: - ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM) + ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} ARMBaseTargetMachine &getARMTargetMachine() const { return getTM(); } + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMILive *DAG = createGenericSchedLive(C); + // add DAG Mutations here. + return DAG; + } + + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + // add DAG Mutations here. + return DAG; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; @@ -463,14 +434,14 @@ INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix", "ARM Execution Dependency Fix", false, false) TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { - return new ARMPassConfig(this, PM); + return new ARMPassConfig(*this, PM); } void ARMPassConfig::addIRPasses() { if (TM->Options.ThreadModel == ThreadModel::Single) addPass(createLowerAtomicPass()); else - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in @@ -485,7 +456,7 @@ void ARMPassConfig::addIRPasses() { // Match interleaved memory accesses to ldN/stN intrinsics. if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createInterleavedAccessPass(TM)); + addPass(createInterleavedAccessPass()); } bool ARMPassConfig::addPreISel() { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index f0ca9427d9fb08c6394ce4f12f182628d29c2101..2fcee73228fe7a33e98559dea344513682f71781 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -60,25 +60,15 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } -}; - -/// ARM target machine. -/// -class ARMTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); -public: - ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); + bool isMachineVerifierClean() const override { + return false; + } }; -/// ARM little endian target machine. +/// ARM/Thumb little endian target machine. /// -class ARMLETargetMachine : public ARMTargetMachine { - void anchor() override; - +class ARMLETargetMachine : public ARMBaseTargetMachine { public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -86,11 +76,9 @@ public: CodeGenOpt::Level OL); }; -/// ARM big endian target machine. +/// ARM/Thumb big endian target machine. /// -class ARMBETargetMachine : public ARMTargetMachine { - void anchor() override; - +class ARMBETargetMachine : public ARMBaseTargetMachine { public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -98,44 +86,6 @@ public: CodeGenOpt::Level OL); }; -/// Thumb target machine. -/// Due to the way architectures are handled, this represents both -/// Thumb-1 and Thumb-2. -/// -class ThumbTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); - -public: - ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); -}; - -/// Thumb little endian target machine. -/// -class ThumbLETargetMachine : public ThumbTargetMachine { - void anchor() override; - -public: - ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// Thumb big endian target machine. -/// -class ThumbBETargetMachine : public ThumbTargetMachine { - void anchor() override; - -public: - ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 94f9e8dfebbf8b49547897e74b5b5311db34d7fb..a5b27abeb27fe1573bd542172f2696f462358736 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" -#include "ARMTargetObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" #include @@ -30,8 +30,8 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { - const ARMTargetMachine &ARM_TM = static_cast(TM); - bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS; + const ARMBaseTargetMachine &ARM_TM = static_cast(TM); + bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS; genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 7de0543dfa5e0f438f34750ecb78da02e56c3c19..8a1a3786387793832925c5f2e8790ead207b91ce 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -78,7 +78,7 @@ public: return 13; } - unsigned getRegisterBitWidth(bool Vector) { + unsigned getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasNEON()) return 128; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f421d3ac1693b03d67ef2e1e97d4e986e4f007b0..19fba3033bb2b41c2ea36243314b68dbe27500bc 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -39,10 +41,8 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetParser.h" @@ -67,6 +67,9 @@ static cl::opt ImplicitItMode( clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb", "Warn in ARM, emit implicit ITs in Thumb"))); +static cl::opt AddBuildAttributes("arm-add-build-attributes", + cl::init(false)); + class ARMOperand; enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; @@ -540,6 +543,10 @@ public: // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + // Add build attributes based on the selected target. + if (AddBuildAttributes) + getTargetStreamer().emitTargetAttributes(STI); + // Not in an ITBlock to start with. ITState.CurPosition = ~0U; @@ -1019,6 +1026,15 @@ public: ARM_AM::getSOImmVal(-Value) != -1); } bool isT2SOImm() const { + // If we have an immediate that's not a constant, treat it as an expression + // needing a fixup. + if (isImm() && !isa(getImm())) { + // We want to avoid matching :upper16: and :lower16: as we want these + // expressions to match in isImm0_65535Expr() + const ARMMCExpr *ARM16Expr = dyn_cast(getImm()); + return (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)); + } if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); if (!CE) return false; @@ -8397,7 +8413,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // wide encoding wasn't explicit. if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || !isARMLowRegister(Inst.getOperand(0).getReg()) || - (unsigned)Inst.getOperand(2).getImm() > 255 || + (Inst.getOperand(2).isImm() && + (unsigned)Inst.getOperand(2).getImm() > 255) || ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || (inITBlock() && Inst.getOperand(5).getReg() != 0)) || (static_cast(*Operands[3]).isToken() && @@ -8549,7 +8566,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - (unsigned)Inst.getOperand(1).getImm() <= 255 && + (Inst.getOperand(1).isImm() && + (unsigned)Inst.getOperand(1).getImm() <= 255) && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR) || (inITBlock() && Inst.getOperand(4).getReg() == 0)) && @@ -10189,8 +10207,8 @@ static const struct { { ARM::AEK_CRYPTO, Feature_HasV8, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} }, - { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, - {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} }, + { (ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass, + {ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM} }, { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} }, { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} }, { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} }, diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 1062c79432011a431f5aa1956ac2131c01aa721f..3cde43967568b69971b6c64069f217e37e48320c 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -1,6 +1,9 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) -tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) +if(LLVM_BUILD_GLOBAL_ISEL) + tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) + tablegen(LLVM ARMGenGlobalISel.inc -gen-global-isel) +endif() tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e812d32cc76f6f6928b574ad9b91862aa22df5d9..585726208a8d603d364c3501ce681b1aaf9adaa9 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -20,8 +20,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 40bf545e83224ab32e3218b1bfef8abc343b682b..716492ea256627b4ad954ad71069dbd3030c9853 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" -#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackend.h" +#include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMAsmBackendDarwin.h" #include "MCTargetDesc/ARMAsmBackendELF.h" #include "MCTargetDesc/ARMAsmBackendWinCOFF.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -31,10 +33,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -98,6 +98,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 0, 20, 0}, {"fixup_t2_movw_lo16", 0, 20, 0}, {"fixup_arm_mod_imm", 0, 12, 0}, + {"fixup_t2_so_imm", 0, 26, 0}, }; const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { // This table *must* be in the order that the fixup_* kinds are defined in @@ -148,6 +149,7 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_movt_hi16", 12, 20, 0}, {"fixup_t2_movw_lo16", 12, 20, 0}, {"fixup_arm_mod_imm", 20, 12, 0}, + {"fixup_t2_so_imm", 26, 6, 0}, }; if (Kind < FirstTargetFixupKind) @@ -693,6 +695,23 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return 0; } return Value; + case ARM::fixup_t2_so_imm: { + Value = ARM_AM::getT2SOImmVal(Value); + if ((int64_t)Value < 0) { + Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value"); + return 0; + } + // Value will contain a 12-bit value broken up into a 4-bit shift in bits + // 11:8 and the 8-bit immediate in 0:7. The instruction has the immediate + // in 0:7. The 4-bit shift is split up into i:imm3 where i is placed at bit + // 10 of the upper half-word and imm3 is placed at 14:12 of the lower + // half-word. + uint64_t EncValue = 0; + EncValue |= (Value & 0x800) << 15; + EncValue |= (Value & 0x700) << 4; + EncValue |= (Value & 0xff); + return swapHalfWords(EncValue, IsLittleEndian); + } } } @@ -704,16 +723,17 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; + const unsigned FixupKind = Fixup.getKind() ; // MachO (the only user of "Value") tries to make .o files that look vaguely // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit // into the addend if possible. Other relocation types don't want this bit // though (branches couldn't encode it if it *was* present, and no other // relocations exist) and it can interfere with checking valid expressions. - if ((unsigned)Fixup.getKind() == FK_Data_4 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 || - (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) { + if (FixupKind == FK_Data_4 || + FixupKind == ARM::fixup_arm_movw_lo16 || + FixupKind == ARM::fixup_arm_movt_hi16 || + FixupKind == ARM::fixup_t2_movw_lo16 || + FixupKind == ARM::fixup_t2_movt_hi16) { if (Sym) { if (Asm.isThumbFunc(Sym)) Value |= 1; @@ -730,13 +750,26 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, if (Sym->isExternal() || Value >= 0x400004) IsResolved = false; } + // Create relocations for unconditional branches to function symbols with + // different execution mode in ELF binaries. + if (Sym && Sym->isELF()) { + unsigned Type = dyn_cast(Sym)->getType(); + if ((Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)) { + if (Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_uncondbranch)) + IsResolved = false; + if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || + FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_uncondbranch)) + IsResolved = false; + } + } // We must always generate a relocation for BL/BLX instructions if we have // a symbol to reference, as the linker relies on knowing the destination // symbol's thumb-ness to get interworking right. - if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || - (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) + if (A && (FixupKind == ARM::fixup_arm_thumb_blx || + FixupKind == ARM::fixup_arm_blx || + FixupKind == ARM::fixup_arm_uncondbl || + FixupKind == ARM::fixup_arm_condbl)) IsResolved = false; } @@ -783,6 +816,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_movw_lo16: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_so_imm: return 4; case FK_SecRel_2: @@ -835,6 +869,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: case ARM::fixup_arm_mod_imm: + case ARM::fixup_t2_so_imm: // Instruction size is 4 bytes. return 4; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index 09dc0173ade6537d22c7995095c322ed286999df..bd729fabedf5ac1e6a33c4db9e92b110d3d04bcd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -11,7 +11,7 @@ #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H #include "ARMAsmBackend.h" -#include "llvm/Support/MachO.h" +#include "llvm/BinaryFormat/MachO.h" namespace llvm { class ARMAsmBackendDarwin : public ARMAsmBackend { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index e1fa2457182024b4c50d2b38856c72dedd6e05e3..59f31be69d58c2201338acc9a66c934eff23075d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -9,12 +9,12 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 774a0b3771b14a6266016e912851514710369bfb..93f4006cee87653dabc701385511d9d70507fa96 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -43,12 +44,11 @@ #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetParser.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -464,7 +464,7 @@ public: void emitUnwindRaw(int64_t Offset, const SmallVectorImpl &Opcodes); void ChangeSection(MCSection *Section, const MCExpr *Subsection) override { - LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo); + LastMappingSymbols[getCurrentSection().first] = std::move(LastEMSInfo); MCELFStreamer::ChangeSection(Section, Subsection); auto LastMappingSymbol = LastMappingSymbols.find(Section); if (LastMappingSymbol != LastMappingSymbols.end()) { @@ -477,8 +477,8 @@ public: /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - void EmitInstruction(const MCInst& Inst, - const MCSubtargetInfo &STI) override { + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override { if (IsThumb) EmitThumbMappingSymbol(); else diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 3fe2302bdd37270069c82d181f14357ca390a2c2..9f6c5d7bf920ab652356dca28f9e2e2c87fc0106 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -110,6 +110,9 @@ enum Fixups { // fixup_arm_mod_imm - Fixup for mod_imm fixup_arm_mod_imm, + // fixup_t2_so_imm - Fixup for Thumb2 8-bit rotated operand + fixup_t2_so_imm, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index d9df2c6da7ec4d2c6afc694c936e7472a556d0a3..f1f35f409900de6601c472d0c10b8852392e7fd9 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -339,7 +339,17 @@ public: unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - unsigned SoImm = MI.getOperand(Op).getImm(); + const MCOperand &MO = MI.getOperand(Op); + + // Support for fixups (MCFixup) + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // Fixups resolve to plain values that need to be encoded. + MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_so_imm); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + return 0; + } + unsigned SoImm = MO.getImm(); unsigned Encoded = ARM_AM::getT2SOImmVal(SoImm); assert(Encoded != ~0U && "Not a Thumb2 so_imm value?"); return Encoded; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 477755157040da50190aef773312706cd41338bc..b8a8b1f7619a069c1bf6fe17235dd15a1ba9dc06 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMMCTargetDesc.h" #include "ARMBaseInfo.h" #include "ARMMCAsmInfo.h" -#include "ARMMCTargetDesc.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCELFStreamer.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 34c770440e1ba5799b71a2a70c1dc54c5fc72b77..5516a1bdb03da996809aae4abaee3956b631ab5b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -9,10 +9,10 @@ #include "ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm-c/Disassembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm-c/Disassembler.h" using namespace llvm; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index b77181f29b2d09f5b47f54f60e9abae806ea5bcb..4a8139dea668261960ce23c759c7779a39dbff1a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -21,7 +22,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" using namespace llvm; namespace { diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 73e563890dd9ffc5431938fb3a10cfefeb531d12..4a943187ab6da611825e796c569ae18e2bd5ce59 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -11,9 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetMachine.h" #include "llvm/MC/ConstantPools.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; @@ -34,6 +38,7 @@ const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc void ARMTargetStreamer::emitCurrentConstantPool() { ConstantPools->emitForCurrentSection(Streamer); + ConstantPools->clearCacheForCurrentSection(Streamer); } // finish() - write out any non-empty assembler constant pools. @@ -75,3 +80,179 @@ void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} void ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} + +static ARMBuildAttrs::CPUArch getArchForCPU(const MCSubtargetInfo &STI) { + if (STI.getCPU() == "xscale") + return ARMBuildAttrs::v5TEJ; + + if (STI.hasFeature(ARM::HasV8Ops)) { + if (STI.hasFeature(ARM::FeatureRClass)) + return ARMBuildAttrs::v8_R; + return ARMBuildAttrs::v8_A; + } else if (STI.hasFeature(ARM::HasV8MMainlineOps)) + return ARMBuildAttrs::v8_M_Main; + else if (STI.hasFeature(ARM::HasV7Ops)) { + if (STI.hasFeature(ARM::FeatureMClass) && STI.hasFeature(ARM::FeatureDSP)) + return ARMBuildAttrs::v7E_M; + return ARMBuildAttrs::v7; + } else if (STI.hasFeature(ARM::HasV6T2Ops)) + return ARMBuildAttrs::v6T2; + else if (STI.hasFeature(ARM::HasV8MBaselineOps)) + return ARMBuildAttrs::v8_M_Base; + else if (STI.hasFeature(ARM::HasV6MOps)) + return ARMBuildAttrs::v6S_M; + else if (STI.hasFeature(ARM::HasV6Ops)) + return ARMBuildAttrs::v6; + else if (STI.hasFeature(ARM::HasV5TEOps)) + return ARMBuildAttrs::v5TE; + else if (STI.hasFeature(ARM::HasV5TOps)) + return ARMBuildAttrs::v5T; + else if (STI.hasFeature(ARM::HasV4TOps)) + return ARMBuildAttrs::v4T; + else + return ARMBuildAttrs::v4; +} + +static bool isV8M(const MCSubtargetInfo &STI) { + // Note that v8M Baseline is a subset of v6T2! + return (STI.hasFeature(ARM::HasV8MBaselineOps) && + !STI.hasFeature(ARM::HasV6T2Ops)) || + STI.hasFeature(ARM::HasV8MMainlineOps); +} + +/// Emit the build attributes that only depend on the hardware that we expect +// /to be available, and not on the ABI, or any source-language choices. +void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { + switchVendor("aeabi"); + + const StringRef CPUString = STI.getCPU(); + if (!CPUString.empty() && !CPUString.startswith("generic")) { + // FIXME: remove krait check when GNU tools support krait cpu + if (STI.hasFeature(ARM::ProcKrait)) { + emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); + // We consider krait as a "cortex-a9" + hwdiv CPU + // Enable hwdiv through ".arch_extension idiv" + if (STI.hasFeature(ARM::FeatureHWDivThumb) || + STI.hasFeature(ARM::FeatureHWDivARM)) + emitArchExtension(ARM::AEK_HWDIVTHUMB | ARM::AEK_HWDIVARM); + } else { + emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + } + } + + emitAttribute(ARMBuildAttrs::CPU_arch, getArchForCPU(STI)); + + if (STI.hasFeature(ARM::FeatureAClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + } else if (STI.hasFeature(ARM::FeatureRClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::RealTimeProfile); + } else if (STI.hasFeature(ARM::FeatureMClass)) { + emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::MicroControllerProfile); + } + + emitAttribute(ARMBuildAttrs::ARM_ISA_use, STI.hasFeature(ARM::FeatureNoARM) + ? ARMBuildAttrs::Not_Allowed + : ARMBuildAttrs::Allowed); + + if (isV8M(STI)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumbDerived); + } else if (STI.hasFeature(ARM::FeatureThumb2)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); + } else if (STI.hasFeature(ARM::HasV4TOps)) { + emitAttribute(ARMBuildAttrs::THUMB_ISA_use, ARMBuildAttrs::Allowed); + } + + if (STI.hasFeature(ARM::FeatureNEON)) { + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (STI.hasFeature(ARM::FeatureFPARMv8)) { + if (STI.hasFeature(ARM::FeatureCrypto)) + emitFPU(ARM::FK_CRYPTO_NEON_FP_ARMV8); + else + emitFPU(ARM::FK_NEON_FP_ARMV8); + } else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(ARM::FK_NEON_VFPV4); + else + emitFPU(STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_NEON_FP16 + : ARM::FK_NEON); + // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture + if (STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + STI.hasFeature(ARM::HasV8_1aOps) + ? ARMBuildAttrs::AllowNeonARMv8_1a + : ARMBuildAttrs::AllowNeonARMv8); + } else { + if (STI.hasFeature(ARM::FeatureFPARMv8)) + // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one + // FPU, but there are two different names for it depending on the CPU. + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV5_SP_D16 + : ARM::FK_FPV5_D16) + : ARM::FK_FP_ARMV8); + else if (STI.hasFeature(ARM::FeatureVFP4)) + emitFPU(STI.hasFeature(ARM::FeatureD16) + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) ? ARM::FK_FPV4_SP_D16 + : ARM::FK_VFPV4_D16) + : ARM::FK_VFPV4); + else if (STI.hasFeature(ARM::FeatureVFP3)) + emitFPU( + STI.hasFeature(ARM::FeatureD16) + // +d16 + ? (STI.hasFeature(ARM::FeatureVFPOnlySP) + ? (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3XD_FP16 + : ARM::FK_VFPV3XD) + : (STI.hasFeature(ARM::FeatureFP16) + ? ARM::FK_VFPV3_D16_FP16 + : ARM::FK_VFPV3_D16)) + // -d16 + : (STI.hasFeature(ARM::FeatureFP16) ? ARM::FK_VFPV3_FP16 + : ARM::FK_VFPV3)); + else if (STI.hasFeature(ARM::FeatureVFP2)) + emitFPU(ARM::FK_VFPV2); + } + + // ABI_HardFP_use attribute to indicate single precision FP. + if (STI.hasFeature(ARM::FeatureVFPOnlySP)) + emitAttribute(ARMBuildAttrs::ABI_HardFP_use, + ARMBuildAttrs::HardFPSinglePrecision); + + if (STI.hasFeature(ARM::FeatureFP16)) + emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + + if (STI.hasFeature(ARM::FeatureMP)) + emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); + + // Hardware divide in ARM mode is part of base arch, starting from ARMv8. + // If only Thumb hwdiv is present, it must also be in base arch (ARMv7-R/M). + // It is not possible to produce DisallowDIV: if hwdiv is present in the base + // arch, supplying -hwdiv downgrades the effective arch, via ClearImpliedBits. + // AllowDIVExt is only emitted if hwdiv isn't available in the base arch; + // otherwise, the default value (AllowDIVIfExists) applies. + if (STI.hasFeature(ARM::FeatureHWDivARM) && !STI.hasFeature(ARM::HasV8Ops)) + emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + + if (STI.hasFeature(ARM::FeatureDSP) && isV8M(STI)) + emitAttribute(ARMBuildAttrs::DSP_extension, ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureStrictAlign)) + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Not_Allowed); + else + emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Allowed); + + if (STI.hasFeature(ARM::FeatureTrustZone) && + STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZVirtualization); + else if (STI.hasFeature(ARM::FeatureTrustZone)) + emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZ); + else if (STI.hasFeature(ARM::FeatureVirtualization)) + emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowVirtualization); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 7ae2f864d79dcffb462e5900133bc5e9126b0ecc..00505a103e00ff9c851dee3da7c38dee17731d1c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index fc083b98395b00ae9ea22ff127921c490b53abbf..0b6574c37de1234a32017c411e2cf1ebbfd5ea64 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -11,26 +11,26 @@ // //===----------------------------------------------------------------------===// +#include "Thumb1FrameLowering.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" -#include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "ThumbRegisterInfo.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCDwarf.h" @@ -83,13 +83,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // ADJCALLSTACKUP -> add, sp, sp, amount MachineInstr &Old = *I; DebugLoc dl = Old.getDebugLoc(); - unsigned Amount = Old.getOperand(0).getImm(); + unsigned Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned Align = getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; + Amount = alignTo(Amount, getStackAlignment()); // Replace the pseudo instruction with a new instruction... unsigned Opc = Old.getOpcode(); @@ -536,14 +535,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. - LivePhysRegs UsedRegs(STI.getRegisterInfo()); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + LivePhysRegs UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); @@ -562,18 +561,17 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); - for (int Register = GPRsNoLRSP.find_first(); Register != -1; - Register = GPRsNoLRSP.find_next(Register)) { + for (unsigned Register : GPRsNoLRSP.set_bits()) { if (!UsedRegs.contains(Register)) { // Remember the first pop-friendly register and exit. if (PopFriendly.test(Register)) { @@ -700,13 +698,14 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, CopyRegs.insert(ArgReg); // Push the low registers and lr + const MachineRegisterInfo &MRI = MF.getRegInfo(); if (!LoRegsToSave.empty()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { if (LoRegsToSave.count(Reg)) { - bool isKill = !MF.getRegInfo().isLiveIn(Reg); - if (isKill) + bool isKill = !MRI.isLiveIn(Reg); + if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); MIB.addReg(Reg, getKillRegState(isKill)); @@ -748,8 +747,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, SmallVector RegsToPush; while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { if (HiRegsToSave.count(*HiRegToSave)) { - bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave); - if (isKill) + bool isKill = !MRI.isLiveIn(*HiRegToSave); + if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); // Emit a MOV from the high reg to the low reg. diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 27bff4d75acf4c2724a1e03509c12e9271526102..3a3920a2db32766afd66070508dd3973a51ecc5c 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -24,8 +24,8 @@ using namespace llvm; Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI() {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb1InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tMOVr); NopInst.addOperand(MCOperand::createReg(ARM::R8)); NopInst.addOperand(MCOperand::createReg(ARM::R8)); diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 931914ad2799c1bc682da5d4083da8ca2221cbd5..e8d9a9c4ff1445130e356a39e20ae80eff4557f8 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -25,8 +25,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 818ba85c7d4083e408f3788181197a2ff41ee319..9125be96a07b487657ea60ef4b68185bc97763f5 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------------===// // // The LLVM Compiler Infrastructure // @@ -11,16 +11,26 @@ // //===----------------------------------------------------------------------===// -#include "Thumb2InstrInfo.h" -#include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include using namespace llvm; @@ -30,10 +40,10 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden, cl::init(false)); Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI() {} + : ARMBaseInstrInfo(STI) {} -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void Thumb2InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(ARM::tHINT); NopInst.addOperand(MCOperand::createImm(0)); NopInst.addOperand(MCOperand::createImm(ARMCC::AL)); @@ -539,9 +549,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Add cc_out operand if the original instruction did not have one. if (!HasCCOut) MI.addOperand(MachineOperand::CreateReg(0, false)); - } else { - // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) return false; diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 15d63300b6a295cff8e367e9f40f41de9104ef14..c834ba73bfea7392fb7dddf3a2186e72743fb0ca 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -26,8 +26,8 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); - /// getNoopForMachoTarget - Return the noop instruction to use for a noop. - void getNoopForMachoTarget(MCInst &NopInst) const override; + /// Return the noop instruction to use for a noop. + void getNoop(MCInst &NopInst) const override; // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index c90475c28db77995916f8f13bd8c301c6463155e..d911dd97b1ac7e6a7d7628af576c0ecdb8fb9fb8 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -14,10 +14,10 @@ #include "Thumb2InstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h index 8e5cc5360ad430792bd7833cd9343d0869424113..5eadf7bdcef6a4b803c159bb8b7aa868a869b2e2 100644 --- a/lib/Target/AVR/AVR.h +++ b/lib/Target/AVR/AVR.h @@ -15,8 +15,8 @@ #ifndef LLVM_AVR_H #define LLVM_AVR_H -#include "llvm/Target/TargetMachine.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index 50bb50b44f27b80dad552d22bdd88852c6e32db7..f0c7b11895b4a08f0129dca9f7ed5330bd94c281 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -18,8 +18,8 @@ #include "InstPrinter/AVRInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" @@ -112,7 +112,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const AVRSubtarget &STI = MF->getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); - unsigned BytesPerReg = TRI.getMinimalPhysRegClass(Reg)->getSize(); + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg); + unsigned BytesPerReg = TRI.getRegSizeInBits(*RC) / 8; assert(BytesPerReg <= 2 && "Only 8 and 16 bit regs are supported."); unsigned RegIdx = ByteNumber / BytesPerReg; diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 13080a5d72f0485a5027fdb18db15608d369680d..540e05a929971f818d4c4f9b2fe762d94c6df6aa 100644 --- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -88,6 +88,9 @@ private: unsigned ArithOpcode, Block &MBB, BlockIt MBBI); + + /// Scavenges a free GPR8 register for use. + unsigned scavengeGPR8(MachineInstr &MI); }; char AVRExpandPseudo::ID = 0; @@ -577,24 +580,43 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned OpLo, OpHi, DstLoReg, DstHiReg; unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::LDRdPtr; OpHi = AVR::LDDRdPtrQ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same"); + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); + + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(CurDstLoReg, RegState::Define) .addReg(SrcReg); + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); + + // Load high byte. auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(CurDstHiReg, RegState::Define) .addReg(SrcReg, getKillRegState(SrcIsKill)) .addImm(1); + if (TmpReg) { + // Move the high byte into the final destination. + buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); + + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); + } + MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -669,9 +691,9 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { MachineInstr &MI = *MBBI; unsigned OpLo, OpHi, DstLoReg, DstHiReg; unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); - bool DstIsDead = MI.getOperand(0).isDead(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::LDDRdPtrQ; OpHi = AVR::LDDRdPtrQ; @@ -679,60 +701,35 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { assert(Imm <= 63 && "Offset is out of range"); - MachineInstr *MIBLO, *MIBHI; - - // HACK: We shouldn't have instances of this instruction - // where src==dest because the instruction itself is - // marked earlyclobber. We do however get this instruction when - // loading from stack slots where the earlyclobber isn't useful. - // - // In this case, just use a temporary register. - if (DstReg == SrcReg) { - RegScavenger RS; - - RS.enterBasicBlock(MBB); - RS.forward(MBBI); - - BitVector Candidates = - TRI->getAllocatableSet - (*MBB.getParent(), &AVR::GPR8RegClass); - - // Exclude all the registers being used by the instruction. - for (MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - Candidates.reset(MO.getReg()); - } - - BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass); - Available &= Candidates; + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); - signed TmpReg = Available.find_first(); - assert(TmpReg != -1 && "ran out of registers"); + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; - MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(TmpReg, RegState::Define) - .addReg(SrcReg) - .addImm(Imm); + // Load low byte. + auto MIBLO = buildMI(MBB, MBBI, OpLo) + .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg) + .addImm(Imm); - buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstLoReg).addReg(TmpReg); + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); - MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(TmpReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1); + // Load high byte. + auto MIBHI = buildMI(MBB, MBBI, OpHi) + .addReg(CurDstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(Imm + 1); + if (TmpReg) { + // Move the high byte into the final destination. buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); - } else { - MIBLO = buildMI(MBB, MBBI, OpLo) - .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(SrcReg) - .addImm(Imm); - MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(Imm + 1); + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); } MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -819,6 +816,32 @@ bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width, }); } +unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) { + MachineBasicBlock &MBB = *MI.getParent(); + RegScavenger RS; + + RS.enterBasicBlock(MBB); + RS.forward(MI); + + BitVector Candidates = + TRI->getAllocatableSet + (*MBB.getParent(), &AVR::GPR8RegClass); + + // Exclude all the registers being used by the instruction. + for (MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + Candidates.reset(MO.getReg()); + } + + BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass); + Available &= Candidates; + + signed Reg = Available.find_first(); + assert(Reg != -1 && "ran out of registers"); + return Reg; +} + template<> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI); @@ -948,7 +971,6 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { unsigned OpLo, OpHi, SrcLoReg, SrcHiReg; unsigned DstReg = MI.getOperand(0).getReg(); unsigned SrcReg = MI.getOperand(1).getReg(); - bool DstIsKill = MI.getOperand(0).isKill(); bool SrcIsKill = MI.getOperand(1).isKill(); OpLo = AVR::STPtrRr; OpHi = AVR::STDPtrQRr; @@ -960,7 +982,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { .addReg(SrcLoReg, getKillRegState(SrcIsKill)); auto MIBHI = buildMI(MBB, MBBI, OpHi) - .addReg(DstReg, getKillRegState(DstIsKill)) + .addReg(DstReg) .addImm(1) .addReg(SrcHiReg, getKillRegState(SrcIsKill)); diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp index b8cb2215ddb4035bb124b0a0f1c6f30c5ec13eea..0ec8e8b08ceb451d4e870ac91a3e5f85973cf8ec 100644 --- a/lib/Target/AVR/AVRFrameLowering.cpp +++ b/lib/Target/AVR/AVRFrameLowering.cpp @@ -57,6 +57,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc(); const AVRSubtarget &STI = MF.getSubtarget(); const AVRInstrInfo &TII = *STI.getInstrInfo(); + bool HasFP = hasFP(MF); // Interrupt handlers re-enable interrupts in function entry. if (CallConv == CallingConv::AVR_INTR) { @@ -65,6 +66,13 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + // Save the frame pointer if we have one. + if (HasFP) { + BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) + .addReg(AVR::R29R28, RegState::Kill) + .setMIFlag(MachineInstr::FrameSetup); + } + // Emit special prologue code to save R1, R0 and SREG in interrupt/signal // handlers before saving any other registers. if (CallConv == CallingConv::AVR_INTR || @@ -72,6 +80,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr)) .addReg(AVR::R1R0, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0) .addImm(0x3f) .setMIFlag(MachineInstr::FrameSetup); @@ -86,7 +95,7 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF, } // Early exit if the frame pointer is not needed in this function. - if (!hasFP(MF)) { + if (!HasFP) { return; } @@ -165,6 +174,9 @@ void AVRFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0); } + if (hasFP(MF)) + BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R29R28); + // Early exit if there is no need to restore the frame pointer. if (!FrameSize) { return; @@ -239,7 +251,7 @@ bool AVRFrameLowering::spillCalleeSavedRegisters( unsigned Reg = CSI[i - 1].getReg(); bool IsNotLiveIn = !MBB.isLiveIn(Reg); - assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 && + assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 && "Invalid register size"); // Add the callee-saved register as live-in only if it is not already a @@ -277,7 +289,7 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters( for (const CalleeSavedInfo &CCSI : CSI) { unsigned Reg = CCSI.getReg(); - assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 && + assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 && "Invalid register size"); BuildMI(MBB, MI, DL, TII.get(AVR::POPRd), Reg); @@ -363,7 +375,7 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr( DebugLoc DL = MI->getDebugLoc(); unsigned int Opcode = MI->getOpcode(); - int Amount = MI->getOperand(0).getImm(); + int Amount = TII.getFrameSize(*MI); // Adjcallstackup does not need to allocate stack space for the call, instead // we insert push instructions that will allocate the necessary stack. @@ -407,12 +419,9 @@ void AVRFrameLowering::determineCalleeSaves(MachineFunction &MF, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - // Spill register Y when it is used as the frame pointer. - if (hasFP(MF)) { - SavedRegs.set(AVR::R29R28); - SavedRegs.set(AVR::R29); - SavedRegs.set(AVR::R28); - } + // If we have a frame pointer, the Y register needs to be saved as well. + // We don't do that here however - the prologue and epilogue generation + // code will handle it specially. } /// The frame analyzer pass. /// diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index 0b95d3819399c11b660d841d13e686ef4fe1448f..7d3faac1dcc201bdee6bb11c7543c60694f3d51c 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -79,6 +79,11 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Custom); + setOperationAction(ISD::ROTL, MVT::i16, Custom); + setOperationAction(ISD::ROTR, MVT::i8, Custom); + setOperationAction(ISD::ROTR, MVT::i16, Custom); + setOperationAction(ISD::BR_CC, MVT::i8, Custom); setOperationAction(ISD::BR_CC, MVT::i16, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); @@ -273,6 +278,12 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { case ISD::SRL: return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); + case ISD::ROTL: + return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), + N->getOperand(1)); + case ISD::ROTR: + return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), + N->getOperand(1)); case ISD::SRA: return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0), N->getOperand(1)); @@ -350,7 +361,7 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy(DAG.getDataLayout())); - Type *RetTy = (Type *)StructType::get(Ty, Ty, nullptr); + Type *RetTy = (Type *)StructType::get(Ty, Ty); SDLoc dl(Op); TargetLowering::CallLoweringInfo CLI(DAG); @@ -1155,8 +1166,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL, true), - DL); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); SmallVector, 8> RegsToPass; @@ -1440,6 +1450,22 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, Opc = AVR::LSRWRd; RC = &AVR::DREGSRegClass; break; + case AVR::Rol8: + Opc = AVR::ROLRd; + RC = &AVR::GPR8RegClass; + break; + case AVR::Rol16: + Opc = AVR::ROLWRd; + RC = &AVR::DREGSRegClass; + break; + case AVR::Ror8: + Opc = AVR::RORRd; + RC = &AVR::GPR8RegClass; + break; + case AVR::Ror16: + Opc = AVR::RORWRd; + RC = &AVR::DREGSRegClass; + break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -1474,9 +1500,9 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); // BB: - // cp 0, N + // cpi N, 0 // breq RemBB - BuildMI(BB, dl, TII.get(AVR::CPRdRr)).addReg(ShiftAmtSrcReg).addReg(AVR::R0); + BuildMI(BB, dl, TII.get(AVR::CPIRdK)).addReg(ShiftAmtSrcReg).addImm(0); BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB); // LoopBB: @@ -1552,6 +1578,10 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case AVR::Lsl16: case AVR::Lsr8: case AVR::Lsr16: + case AVR::Rol8: + case AVR::Rol16: + case AVR::Ror8: + case AVR::Ror16: case AVR::Asr8: case AVR::Asr16: return insertShift(MI, MBB); @@ -1580,8 +1610,9 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineFunction::iterator I = MBB->getParent()->begin(); - ++I; + MachineFunction::iterator I; + for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I); + if (I != MF->end()) ++I; MF->insert(I, trueMBB); MF->insert(I, falseMBB); diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h index a8cdc4e7ae234f4c53c047df19ecd8c223b90a31..b44c62a21ac3833000dfee8d4937a164ccf84454 100644 --- a/lib/Target/AVR/AVRISelLowering.h +++ b/lib/Target/AVR/AVRISelLowering.h @@ -43,6 +43,8 @@ enum NodeType { ROL, ///< Bit rotate left. LSLLOOP, ///< A loop of single logical shift left instructions. LSRLOOP, ///< A loop of single logical shift right instructions. + ROLLOOP, ///< A loop of single left bit rotate instructions. + RORLOOP, ///< A loop of single right bit rotate instructions. ASRLOOP, ///< A loop of single arithmetic shift right instructions. /// AVR conditional branches. Operand 0 is the chain operand, operand 1 /// is the block to branch if condition is true, operand 2 is the diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp index 88f889260cce381f5709608e05abc992716002aa..afba66b2e69bb5d8eb6d8df0e9671c0894a41919 100644 --- a/lib/Target/AVR/AVRInstrInfo.cpp +++ b/lib/Target/AVR/AVRInstrInfo.cpp @@ -142,9 +142,9 @@ void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlignment(FrameIndex)); unsigned Opcode = 0; - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::STDPtrQRr; - } else if (RC->hasType(MVT::i16)) { + } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { Opcode = AVR::STDWPtrQRr; } else { llvm_unreachable("Cannot store this register into a stack slot!"); @@ -176,9 +176,9 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MFI.getObjectAlignment(FrameIndex)); unsigned Opcode = 0; - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { Opcode = AVR::LDDRdPtrQ; - } else if (RC->hasType(MVT::i16)) { + } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { // Opcode = AVR::LDDWRdPtrQ; //:FIXME: remove this once PR13375 gets fixed Opcode = AVR::LDDWRdYQ; diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td index 693d80a1c06fbf95d915d46b903e08f65f9027be..5dd8b2c27b212e2585e8bb65799e0824d56b0527 100644 --- a/lib/Target/AVR/AVRInstrInfo.td +++ b/lib/Target/AVR/AVRInstrInfo.td @@ -17,7 +17,7 @@ include "AVRInstrFormats.td" // AVR Type Profiles //===----------------------------------------------------------------------===// -def SDT_AVRCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>; +def SDT_AVRCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDT_AVRCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDT_AVRCall : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_AVRWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -64,6 +64,8 @@ def AVRasr : SDNode<"AVRISD::ASR", SDTIntUnaryOp>; // Pseudo shift nodes for non-constant shift amounts. def AVRlslLoop : SDNode<"AVRISD::LSLLOOP", SDTIntShiftOp>; def AVRlsrLoop : SDNode<"AVRISD::LSRLOOP", SDTIntShiftOp>; +def AVRrolLoop : SDNode<"AVRISD::ROLLOOP", SDTIntShiftOp>; +def AVRrorLoop : SDNode<"AVRISD::RORLOOP", SDTIntShiftOp>; def AVRasrLoop : SDNode<"AVRISD::ASRLOOP", SDTIntShiftOp>; //===----------------------------------------------------------------------===// @@ -183,33 +185,33 @@ def call_target : Operand // A 16-bit address (which can lead to an R_AVR_16 relocation). def imm16 : Operand { - let EncoderMethod = "encodeImm"; + let EncoderMethod = "encodeImm"; } /// A 6-bit immediate used in the ADIW/SBIW instructions. def imm_arith6 : Operand { - let EncoderMethod = "encodeImm"; + let EncoderMethod = "encodeImm"; } /// An 8-bit immediate inside an instruction with the same format /// as the `LDI` instruction (the `FRdK` format). def imm_ldi8 : Operand { - let EncoderMethod = "encodeImm"; + let EncoderMethod = "encodeImm"; } /// A 5-bit port number used in SBIC and friends (the `FIOBIT` format). def imm_port5 : Operand { - let EncoderMethod = "encodeImm"; + let EncoderMethod = "encodeImm"; } /// A 6-bit port number used in the `IN` instruction and friends (the /// `FIORdA` format. def imm_port6 : Operand { - let EncoderMethod = "encodeImm"; + let EncoderMethod = "encodeImm"; } // Addressing mode pattern reg+imm6 @@ -331,9 +333,9 @@ let Defs = [SP, SREG], Uses = [SP] in { def ADJCALLSTACKDOWN : Pseudo<(outs), - (ins i16imm:$amt), + (ins i16imm:$amt, i16imm:$amt2), "#ADJCALLSTACKDOWN", - [(AVRcallseq_start timm:$amt)]>; + [(AVRcallseq_start timm:$amt, timm:$amt2)]>; // R31R30 is used to update SP, since it is a scratch reg and this instruction // is placed after the function call then R31R30 should be always free. @@ -900,10 +902,9 @@ let Defs = [SREG] in // CPI Rd, K // Compares a register with an 8 bit immediate. - let Uses = [SREG] in def CPIRdK : FRdK<0b0011, (outs), - (ins GPR8:$rd, imm_ldi8:$k), + (ins LD8:$rd, imm_ldi8:$k), "cpi\t$rd, $k", [(AVRcmp i8:$rd, imm:$k), (implicit SREG)]>; } @@ -1932,7 +1933,6 @@ def Lsr8 : ShiftPseudo< [(set i8:$dst, (AVRlsrLoop i8:$src, i8:$cnt))] >; - def Lsr16 : ShiftPseudo< (outs DREGS:$dst), (ins DREGS:$src, GPR8:$cnt), @@ -1940,6 +1940,34 @@ def Lsr16 : ShiftPseudo< [(set i16:$dst, (AVRlsrLoop i16:$src, i8:$cnt))] >; +def Rol8 : ShiftPseudo< + (outs GPR8:$dst), + (ins GPR8:$src, GPR8:$cnt), + "# Rol8 PSEUDO", + [(set i8:$dst, (AVRrolLoop i8:$src, i8:$cnt))] +>; + +def Rol16 : ShiftPseudo< + (outs DREGS:$dst), + (ins DREGS:$src, GPR8:$cnt), + "# Rol16 PSEUDO", + [(set i16:$dst, (AVRrolLoop i16:$src, i8:$cnt))] +>; + +def Ror8 : ShiftPseudo< + (outs GPR8:$dst), + (ins GPR8:$src, GPR8:$cnt), + "# Ror8 PSEUDO", + [(set i8:$dst, (AVRrorLoop i8:$src, i8:$cnt))] +>; + +def Ror16 : ShiftPseudo< + (outs DREGS:$dst), + (ins DREGS:$src, GPR8:$cnt), + "# Ror16 PSEUDO", + [(set i16:$dst, (AVRrorLoop i16:$src, i8:$cnt))] +>; + def Asr8 : ShiftPseudo< (outs GPR8:$dst), (ins GPR8:$src, GPR8:$cnt), diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 48798bd4a1da868f799f56ca7ade6251cfe0221e..55f3f5cf428acdee6ebc4ff819ede03f6b955e21 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -51,8 +51,6 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF, BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const AVRTargetMachine &TM = static_cast(MF.getTarget()); - const TargetFrameLowering *TFI = TM.getSubtargetImpl()->getFrameLowering(); // Reserve the intermediate result registers r1 and r2 // The result of instructions like 'mul' is always stored here. @@ -65,12 +63,18 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(AVR::SPH); Reserved.set(AVR::SP); - // Reserve the frame pointer registers r28 and r29 if the function requires one. - if (TFI->hasFP(MF)) { - Reserved.set(AVR::R28); - Reserved.set(AVR::R29); - Reserved.set(AVR::R29R28); - } + // We tenatively reserve the frame pointer register r29:r28 because the + // function may require one, but we cannot tell until register allocation + // is complete, which can be too late. + // + // Instead we just unconditionally reserve the Y register. + // + // TODO: Write a pass to enumerate functions which reserved the Y register + // but didn't end up needing a frame pointer. In these, we can + // convert one or two of the spills inside to use the Y register. + Reserved.set(AVR::R28); + Reserved.set(AVR::R29); + Reserved.set(AVR::R29R28); return Reserved; } @@ -78,11 +82,12 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const { const TargetRegisterClass * AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const { - if (RC->hasType(MVT::i16)) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (TRI->isTypeLegalForClass(*RC, MVT::i16)) { return &AVR::DREGSRegClass; } - if (RC->hasType(MVT::i8)) { + if (TRI->isTypeLegalForClass(*RC, MVT::i8)) { return &AVR::GPR8RegClass; } @@ -263,4 +268,3 @@ void AVRRegisterInfo::splitReg(unsigned Reg, } } // end of namespace llvm - diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp index c228d051d7719d5757e01ccbf9a302cebd0cc660..556d69ec523413ef2e9926142cac1c7f3b73387d 100644 --- a/lib/Target/AVR/AVRSubtarget.cpp +++ b/lib/Target/AVR/AVRSubtarget.cpp @@ -13,7 +13,7 @@ #include "AVRSubtarget.h" -#include "llvm/Support/ELF.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "AVR.h" diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h index a37849c3f3f77efc6a595b32be9d024f07e0c360..b0e634f8616878141544dbc2043734013ef0dbc1 100644 --- a/lib/Target/AVR/AVRSubtarget.h +++ b/lib/Target/AVR/AVRSubtarget.h @@ -14,10 +14,9 @@ #ifndef LLVM_AVR_SUBTARGET_H #define LLVM_AVR_SUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "AVRFrameLowering.h" #include "AVRISelLowering.h" diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp index fb3262916b4fdba641c09d1c5fc0f50ec4463021..91d2a8737b8701aaf94c9344ae267b03a34bf966 100644 --- a/lib/Target/AVR/AVRTargetMachine.cpp +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -15,12 +15,12 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/TargetRegistry.h" -#include "AVRTargetObjectFile.h" #include "AVR.h" +#include "AVRTargetObjectFile.h" #include "MCTargetDesc/AVRMCTargetDesc.h" namespace llvm { @@ -57,7 +57,7 @@ namespace { /// AVR Code Generator Pass Configuration Options. class AVRPassConfig : public TargetPassConfig { public: - AVRPassConfig(AVRTargetMachine *TM, PassManagerBase &PM) + AVRPassConfig(AVRTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} AVRTargetMachine &getAVRTargetMachine() const { @@ -71,7 +71,7 @@ public: } // namespace TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) { - return new AVRPassConfig(this, PM); + return new AVRPassConfig(*this, PM); } extern "C" void LLVMInitializeAVRTarget() { diff --git a/lib/Target/AVR/AVRTargetMachine.h b/lib/Target/AVR/AVRTargetMachine.h index 10345193d14af6744d453ec78be0bd53cf296ca5..795e94e6af03a869f796a72b5ec154b5eb9bfea2 100644 --- a/lib/Target/AVR/AVRTargetMachine.h +++ b/lib/Target/AVR/AVRTargetMachine.h @@ -41,6 +41,10 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool isMachineVerifierClean() const override { + return false; + } + private: std::unique_ptr TLOF; AVRSubtarget SubTarget; diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp index af14d9292f2760562079d1272fca3bd2418a0d94..0cebb0f043f927fd3a2db12cf3856d05e2e64cb2 100644 --- a/lib/Target/AVR/AVRTargetObjectFile.cpp +++ b/lib/Target/AVR/AVRTargetObjectFile.cpp @@ -9,12 +9,12 @@ #include "AVRTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "AVR.h" diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 5b0398c0ca34bc176ecb8336a44bc08135412ecf..cf52e552978f1303334ef510251d77d546f93da7 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -18,12 +18,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" diff --git a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp index d2a21fb64635488bf7682c060db858fbfdd4c89c..e69accfa9393b15116892bbc3c92e4fe87135e39 100644 --- a/lib/Target/AVR/Disassembler/AVRDisassembler.cpp +++ b/lib/Target/AVR/Disassembler/AVRDisassembler.cpp @@ -16,11 +16,11 @@ #include "AVRSubtarget.h" #include "MCTargetDesc/AVRMCTargetDesc.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp index 7137548210058ff00fb6273406588e604671b33e..1e61eccf775f55695bd7c81a65e5cccdffebf056 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp @@ -1,8 +1,8 @@ #include "AVRELFStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/FormattedStream.h" #include "AVRMCTargetDesc.h" diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp index 9f2ee8cf80356599eb9031c4d3bfa673c5ee5fcf..535bb012eb07cae173e943f84bdb367f7d3b1613 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp @@ -18,7 +18,7 @@ namespace llvm { AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) { - PointerSize = 2; + CodePointerSize = 2; CalleeSaveStackSlotSize = 2; CommentString = ";"; PrivateGlobalPrefix = ".L"; diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp index c3d43ebb407ecde1817ad874e58af77a85aff009..4dbbce8c205e21cf5acb4039b81060d83606b111 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp @@ -177,7 +177,7 @@ unsigned AVRMCCodeEmitter::encodeComplement(const MCInst &MI, unsigned OpNo, return (~0) - Imm; } -template +template unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { @@ -193,7 +193,7 @@ unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo, } MCFixupKind FixupKind = static_cast(Fixup); - Fixups.push_back(MCFixup::create(0, MO.getExpr(), FixupKind, MI.getLoc())); + Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), FixupKind, MI.getLoc())); return 0; } diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h index 4cee8d904c9d16bcc15d37053a5f105910926c80..883abf8db78a93406f01346a8193165691d44db2 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h +++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h @@ -69,7 +69,8 @@ private: const MCSubtargetInfo &STI) const; /// Encodes an immediate value with a given fixup. - template + /// \tparam Offset The offset into the instruction for the fixup. + template unsigned encodeImm(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp index 400296b8409b49c6718496d920b5c3567fc60830..085afd23a83ce42c99f2faec7e8e8c64c80dc685 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp @@ -9,11 +9,11 @@ #include "AVRMCExpr.h" +#include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCValue.h" -#include "llvm/MC/MCAsmLayout.h" namespace llvm { diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index a4fa5c0a931089c908ef3621c7d7990b7a64c0b7..826430e94b9c2c3a7904699f07827b4c60e68773 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "AVRMCTargetDesc.h" #include "AVRELFStreamer.h" #include "AVRMCAsmInfo.h" -#include "AVRMCTargetDesc.h" #include "AVRTargetStreamer.h" #include "InstPrinter/AVRInstPrinter.h" diff --git a/lib/Target/BPF/BPFAsmPrinter.cpp b/lib/Target/BPF/BPFAsmPrinter.cpp index c5201465e07439bd3e65dd4fd206ab7b9ad76840..9397c78f3dff82c579a170f6d2d590b9b19fc673 100644 --- a/lib/Target/BPF/BPFAsmPrinter.cpp +++ b/lib/Target/BPF/BPFAsmPrinter.cpp @@ -18,10 +18,10 @@ #include "BPFTargetMachine.h" #include "InstPrinter/BPFInstPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" @@ -35,14 +35,15 @@ using namespace llvm; namespace { class BPFAsmPrinter : public AsmPrinter { public: - explicit BPFAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) + explicit BPFAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) {} StringRef getPassName() const override { return "BPF Assembly Printer"; } void EmitInstruction(const MachineInstr *MI) override; }; -} +} // namespace void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) { diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 8772be9bba9e22e3681714651d24ceea2e00edb8..c6ddd6bdad5e639fb70ab372be4443d8410593a6 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -22,11 +22,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" + using namespace llvm; #define DEBUG_TYPE "bpf-isel" @@ -42,6 +45,8 @@ public: return "BPF DAG->DAG Pattern Instruction Selection"; } + void PreprocessISelDAG() override; + private: // Include the pieces autogenerated from the target description. #include "BPFGenDAGISel.inc" @@ -51,15 +56,31 @@ private: // Complex Pattern for address selection. bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + + // Find constants from a constant structure + typedef std::vector val_vec_type; + bool fillGenericConstant(const DataLayout &DL, const Constant *CV, + val_vec_type &Vals, uint64_t Offset); + bool fillConstantDataArray(const DataLayout &DL, const ConstantDataArray *CDA, + val_vec_type &Vals, int Offset); + bool fillConstantArray(const DataLayout &DL, const ConstantArray *CA, + val_vec_type &Vals, int Offset); + bool fillConstantStruct(const DataLayout &DL, const ConstantStruct *CS, + val_vec_type &Vals, int Offset); + bool getConstantFieldValue(const GlobalAddressSDNode *Node, uint64_t Offset, + uint64_t Size, unsigned char *ByteSeq); + + // Mapping from ConstantStruct global value to corresponding byte-list values + std::map cs_vals_; }; -} +} // namespace // ComplexPattern used on BPF Load/Store instructions bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // if Address is FI, get the TargetFrameIndex. SDLoc DL(Addr); if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; } @@ -71,7 +92,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { // Addresses of the form Addr+const or Addr|const if (CurDAG->isBaseWithConstantOffset(Addr)) { ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); - if (isInt<32>(CN->getSExtValue())) { + if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node if (FrameIndexSDNode *FIN = @@ -85,13 +106,14 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { } } - Base = Addr; + Base = Addr; Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); return true; } // ComplexPattern used on BPF FI instruction -bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { +bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) { SDLoc DL(Addr); if (!CurDAG->isBaseWithConstantOffset(Addr)) @@ -99,11 +121,10 @@ bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) // Addresses of the form Addr+const or Addr|const ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); - if (isInt<32>(CN->getSExtValue())) { + if (isInt<16>(CN->getSExtValue())) { // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = - dyn_cast(Addr.getOperand(0))) + if (FrameIndexSDNode *FIN = dyn_cast(Addr.getOperand(0))) Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); else return false; @@ -129,7 +150,8 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { // tablegen selection should be handled here. switch (Opcode) { - default: break; + default: + break; case ISD::SDIV: { DebugLoc Empty; const DebugLoc &DL = Node->getDebugLoc(); @@ -181,6 +203,218 @@ void BPFDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } +void BPFDAGToDAGISel::PreprocessISelDAG() { + // Iterate through all nodes, only interested in loads from ConstantStruct + // ConstantArray should have converted by IR->DAG processing + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); + I != E;) { + SDNode *Node = &*I++; + unsigned Opcode = Node->getOpcode(); + if (Opcode != ISD::LOAD) + continue; + + union { + uint8_t c[8]; + uint16_t s; + uint32_t i; + uint64_t d; + } new_val; // hold up the constant values replacing loads. + bool to_replace = false; + SDLoc DL(Node); + const LoadSDNode *LD = cast(Node); + uint64_t size = LD->getMemOperand()->getSize(); + if (!size || size > 8 || (size & (size - 1))) + continue; + + SDNode *LDAddrNode = LD->getOperand(1).getNode(); + // Match LDAddr against either global_addr or (global_addr + offset) + unsigned opcode = LDAddrNode->getOpcode(); + if (opcode == ISD::ADD) { + SDValue OP1 = LDAddrNode->getOperand(0); + SDValue OP2 = LDAddrNode->getOperand(1); + + // We want to find the pattern global_addr + offset + SDNode *OP1N = OP1.getNode(); + if (OP1N->getOpcode() <= ISD::BUILTIN_OP_END || + OP1N->getNumOperands() == 0) + continue; + + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + const GlobalAddressSDNode *GADN = + dyn_cast(OP1N->getOperand(0).getNode()); + const ConstantSDNode *CDN = dyn_cast(OP2.getNode()); + if (GADN && CDN) + to_replace = + getConstantFieldValue(GADN, CDN->getZExtValue(), size, new_val.c); + } else if (LDAddrNode->getOpcode() > ISD::BUILTIN_OP_END && + LDAddrNode->getNumOperands() > 0) { + DEBUG(dbgs() << "Check candidate load: "; LD->dump(); dbgs() << '\n'); + + SDValue OP1 = LDAddrNode->getOperand(0); + if (const GlobalAddressSDNode *GADN = + dyn_cast(OP1.getNode())) + to_replace = getConstantFieldValue(GADN, 0, size, new_val.c); + } + + if (!to_replace) + continue; + + // replacing the old with a new value + uint64_t val; + if (size == 1) + val = new_val.c[0]; + else if (size == 2) + val = new_val.s; + else if (size == 4) + val = new_val.i; + else { + val = new_val.d; + } + + DEBUG(dbgs() << "Replacing load of size " << size << " with constant " + << val << '\n'); + SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64); + + // After replacement, the current node is dead, we need to + // go backward one step to make iterator still work + I--; + SDValue From[] = {SDValue(Node, 0), SDValue(Node, 1)}; + SDValue To[] = {NVal, NVal}; + CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); + I++; + // It is safe to delete node now + CurDAG->DeleteNode(Node); + } +} + +bool BPFDAGToDAGISel::getConstantFieldValue(const GlobalAddressSDNode *Node, + uint64_t Offset, uint64_t Size, + unsigned char *ByteSeq) { + const GlobalVariable *V = dyn_cast(Node->getGlobal()); + + if (!V || !V->hasInitializer()) + return false; + + const Constant *Init = V->getInitializer(); + const DataLayout &DL = CurDAG->getDataLayout(); + val_vec_type TmpVal; + + auto it = cs_vals_.find(static_cast(Init)); + if (it != cs_vals_.end()) { + TmpVal = it->second; + } else { + uint64_t total_size = 0; + if (const ConstantStruct *CS = dyn_cast(Init)) + total_size = + DL.getStructLayout(cast(CS->getType()))->getSizeInBytes(); + else if (const ConstantArray *CA = dyn_cast(Init)) + total_size = DL.getTypeAllocSize(CA->getType()->getElementType()) * + CA->getNumOperands(); + else + return false; + + val_vec_type Vals(total_size, 0); + if (fillGenericConstant(DL, Init, Vals, 0) == false) + return false; + cs_vals_[static_cast(Init)] = Vals; + TmpVal = std::move(Vals); + } + + // test whether host endianness matches target + union { + uint8_t c[2]; + uint16_t s; + } test_buf; + uint16_t test_val = 0x2345; + if (DL.isLittleEndian()) + support::endian::write16le(test_buf.c, test_val); + else + support::endian::write16be(test_buf.c, test_val); + + bool endian_match = test_buf.s == test_val; + for (uint64_t i = Offset, j = 0; i < Offset + Size; i++, j++) + ByteSeq[j] = endian_match ? TmpVal[i] : TmpVal[Offset + Size - 1 - j]; + + return true; +} + +bool BPFDAGToDAGISel::fillGenericConstant(const DataLayout &DL, + const Constant *CV, + val_vec_type &Vals, uint64_t Offset) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + + if (isa(CV) || isa(CV)) + return true; // already done + + if (const ConstantInt *CI = dyn_cast(CV)) { + uint64_t val = CI->getZExtValue(); + DEBUG(dbgs() << "Byte array at offset " << Offset << " with value " << val + << '\n'); + + if (Size > 8 || (Size & (Size - 1))) + return false; + + // Store based on target endian + for (uint64_t i = 0; i < Size; ++i) { + Vals[Offset + i] = DL.isLittleEndian() + ? ((val >> (i * 8)) & 0xFF) + : ((val >> ((Size - i - 1) * 8)) & 0xFF); + } + return true; + } + + if (const ConstantDataArray *CDA = dyn_cast(CV)) + return fillConstantDataArray(DL, CDA, Vals, Offset); + + if (const ConstantArray *CA = dyn_cast(CV)) + return fillConstantArray(DL, CA, Vals, Offset); + + if (const ConstantStruct *CVS = dyn_cast(CV)) + return fillConstantStruct(DL, CVS, Vals, Offset); + + return false; +} + +bool BPFDAGToDAGISel::fillConstantDataArray(const DataLayout &DL, + const ConstantDataArray *CDA, + val_vec_type &Vals, int Offset) { + for (unsigned i = 0, e = CDA->getNumElements(); i != e; ++i) { + if (fillGenericConstant(DL, CDA->getElementAsConstant(i), Vals, Offset) == + false) + return false; + Offset += DL.getTypeAllocSize(CDA->getElementAsConstant(i)->getType()); + } + + return true; +} + +bool BPFDAGToDAGISel::fillConstantArray(const DataLayout &DL, + const ConstantArray *CA, + val_vec_type &Vals, int Offset) { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + if (fillGenericConstant(DL, CA->getOperand(i), Vals, Offset) == false) + return false; + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } + + return true; +} + +bool BPFDAGToDAGISel::fillConstantStruct(const DataLayout &DL, + const ConstantStruct *CS, + val_vec_type &Vals, int Offset) { + const StructLayout *Layout = DL.getStructLayout(CS->getType()); + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + const Constant *Field = CS->getOperand(i); + uint64_t SizeSoFar = Layout->getElementOffset(i); + if (fillGenericConstant(DL, Field, Vals, Offset + SizeSoFar) == false) + return false; + } + return true; +} + FunctionPass *llvm::createBPFISelDag(BPFTargetMachine &TM) { return new BPFDAGToDAGISel(TM); } diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index b9b3dff95c0aea5d3b5abfcb5585ce3a07b2f8b6..cc7a7c3849bca9dade42784c4cfdc4b0c3d0b016 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -132,6 +132,10 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128; } +bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + return false; +} + SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::BR_CC: @@ -257,8 +261,7 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } auto PtrVT = getPointerTy(MF.getDataLayout()); - Chain = DAG.getCALLSEQ_START( - Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), CLI.DL); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); SmallVector, MaxArgs> RegsToPass; @@ -497,8 +500,11 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + auto N = cast(Op); + assert(N->getOffset() == 0 && "Invalid offset for global address"); + SDLoc DL(Op); - const GlobalValue *GV = cast(Op)->getGlobal(); + const GlobalValue *GV = N->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64); return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h index 3d1726be286eb59c6d260dd519bddfbf5aeb2b37..0b8a8ca20c3b3057b56a1b8e1ec5cff8bc90c5b6 100644 --- a/lib/Target/BPF/BPFISelLowering.h +++ b/lib/Target/BPF/BPFISelLowering.h @@ -42,6 +42,10 @@ public: // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + // This method decides whether folding a constant offset + // with the given GlobalAddress is legal. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/lib/Target/BPF/BPFInstrInfo.cpp b/lib/Target/BPF/BPFInstrInfo.cpp index e38facead9228ac31e268d289a0f598fff867162..5351cfa95020e66c7fc41d9ed33e243b76cacfcc 100644 --- a/lib/Target/BPF/BPFInstrInfo.cpp +++ b/lib/Target/BPF/BPFInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstrInfo.h" +#include "BPF.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index 93ee24371c4d92bd077f305ba548e89a6d57c7f9..5ad77726820843bd6ec5e8d6114733820c407d6e 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -16,7 +16,8 @@ include "BPFInstrFormats.td" // Instruction Operands and Patterns // These are target-independent nodes, but have target-specific formats. -def SDT_BPFCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>; +def SDT_BPFCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>, + SDTCisVT<1, iPTR>]>; def SDT_BPFCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_BPFCall : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_BPFSetFlag : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>]>; @@ -50,7 +51,7 @@ def u64imm : Operand { let PrintMethod = "printImm64Operand"; } -def i64immSExt32 : PatLeaf<(imm), +def i64immSExt32 : PatLeaf<(i64 imm), [{return isInt<32>(N->getSExtValue()); }]>; // Addressing modes. @@ -66,17 +67,17 @@ def MEMri : Operand { } // Conditional code predicates - used for pattern matching for jump instructions -def BPF_CC_EQ : PatLeaf<(imm), +def BPF_CC_EQ : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETEQ);}]>; -def BPF_CC_NE : PatLeaf<(imm), +def BPF_CC_NE : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETNE);}]>; -def BPF_CC_GE : PatLeaf<(imm), +def BPF_CC_GE : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETGE);}]>; -def BPF_CC_GT : PatLeaf<(imm), +def BPF_CC_GT : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETGT);}]>; -def BPF_CC_GTU : PatLeaf<(imm), +def BPF_CC_GTU : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETUGT);}]>; -def BPF_CC_GEU : PatLeaf<(imm), +def BPF_CC_GEU : PatLeaf<(i64 imm), [{return (N->getZExtValue() == ISD::SETUGE);}]>; // jump instructions @@ -445,9 +446,9 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1, // ADJCALLSTACKDOWN/UP pseudo insns let Defs = [R11], Uses = [R11] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt), - "#ADJCALLSTACKDOWN $amt", - [(BPFcallseq_start timm:$amt)]>; +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + "#ADJCALLSTACKDOWN $amt1 $amt2", + [(BPFcallseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2", [(BPFcallseq_end timm:$amt1, timm:$amt2)]>; diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp index 7925bee9c5879098196e1ba006f5953ac3a6f9a5..273843e92701e582488f648d219cbd8e7f6d8810 100644 --- a/lib/Target/BPF/BPFRegisterInfo.cpp +++ b/lib/Target/BPF/BPFRegisterInfo.cpp @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFRegisterInfo.h" +#include "BPF.h" #include "BPFSubtarget.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/IR/DiagnosticInfo.h" #define GET_REGINFO_TARGET_DESC #include "BPFGenRegisterInfo.inc" diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp index 897695633e46bb1247deee959ad21a78eb6ee047..d84b0a80fc0cc6b7addd765acf8a344d7a073508 100644 --- a/lib/Target/BPF/BPFTargetMachine.cpp +++ b/lib/Target/BPF/BPFTargetMachine.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFTargetMachine.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/LegacyPassManager.h" +#include "BPF.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" @@ -58,7 +58,7 @@ namespace { // BPF Code Generator Pass Configuration Options. class BPFPassConfig : public TargetPassConfig { public: - BPFPassConfig(BPFTargetMachine *TM, PassManagerBase &PM) + BPFPassConfig(BPFTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} BPFTargetMachine &getBPFTargetMachine() const { @@ -70,7 +70,7 @@ public: } TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) { - return new BPFPassConfig(this, PM); + return new BPFPassConfig(*this, PM); } // Install an instruction selector pass using diff --git a/lib/Target/BPF/CMakeLists.txt b/lib/Target/BPF/CMakeLists.txt index e2654b0465df139edf7ccb6e8cf865c984cc5022..4918653ff19dabc3096e492afeb2882f09900c19 100644 --- a/lib/Target/BPF/CMakeLists.txt +++ b/lib/Target/BPF/CMakeLists.txt @@ -4,7 +4,7 @@ tablegen(LLVM BPFGenRegisterInfo.inc -gen-register-info) tablegen(LLVM BPFGenInstrInfo.inc -gen-instr-info) tablegen(LLVM BPFGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM BPFGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM BPFGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM BPFGenDAGISel.inc -gen-dag-isel) tablegen(LLVM BPFGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM BPFGenCallingConv.inc -gen-callingconv) diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index 9beefcdcc1d5d9d124ba03d2c17f9a3a8dd9ec59..a1d732c339e5fb2c808b29e97322f59e6e91e01b 100644 --- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -15,6 +15,8 @@ #include "BPFSubtarget.h" #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -88,9 +90,9 @@ static DecodeStatus decodeMemoryOpValue(MCInst &Inst, unsigned Insn, } #include "BPFGenDisassemblerTables.inc" - static DecodeStatus readInstruction64(ArrayRef Bytes, uint64_t Address, - uint64_t &Size, uint64_t &Insn) { + uint64_t &Size, uint64_t &Insn, + bool IsLittleEndian) { uint64_t Lo, Hi; if (Bytes.size() < 8) { @@ -99,8 +101,14 @@ static DecodeStatus readInstruction64(ArrayRef Bytes, uint64_t Address, } Size = 8; - Hi = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 0) | (Bytes[3] << 8); - Lo = (Bytes[4] << 0) | (Bytes[5] << 8) | (Bytes[6] << 16) | (Bytes[7] << 24); + if (IsLittleEndian) { + Hi = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 0) | (Bytes[3] << 8); + Lo = (Bytes[4] << 0) | (Bytes[5] << 8) | (Bytes[6] << 16) | (Bytes[7] << 24); + } else { + Hi = (Bytes[0] << 24) | ((Bytes[1] & 0x0F) << 20) | ((Bytes[1] & 0xF0) << 12) | + (Bytes[2] << 8) | (Bytes[3] << 0); + Lo = (Bytes[4] << 24) | (Bytes[5] << 16) | (Bytes[6] << 8) | (Bytes[7] << 0); + } Insn = Make_64(Hi, Lo); return MCDisassembler::Success; @@ -111,10 +119,11 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, uint64_t Address, raw_ostream &VStream, raw_ostream &CStream) const { - uint64_t Insn; + bool IsLittleEndian = getContext().getAsmInfo()->isLittleEndian(); + uint64_t Insn, Hi; DecodeStatus Result; - Result = readInstruction64(Bytes, Address, Size, Insn); + Result = readInstruction64(Bytes, Address, Size, Insn, IsLittleEndian); if (Result == MCDisassembler::Fail) return MCDisassembler::Fail; Result = decodeInstruction(DecoderTableBPF64, Instr, Insn, @@ -128,7 +137,10 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return MCDisassembler::Fail; } Size = 16; - uint64_t Hi = (Bytes[12] << 0) | (Bytes[13] << 8) | (Bytes[14] << 16) | (Bytes[15] << 24); + if (IsLittleEndian) + Hi = (Bytes[12] << 0) | (Bytes[13] << 8) | (Bytes[14] << 16) | (Bytes[15] << 24); + else + Hi = (Bytes[12] << 24) | (Bytes[13] << 16) | (Bytes[14] << 8) | (Bytes[15] << 0); auto& Op = Instr.getOperand(1); Op.setImm(Make_64(Hi, Op.getImm())); break; diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp index ffd29f3ea9915b44c0bd595c82246827aaa2976a..64e986fe0f04693205c026aff9f66fbcdc2c93a8 100644 --- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp +++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "BPF.h" #include "BPFInstPrinter.h" +#include "BPF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 1f355171ebd3f5b0207b4433f59f5e23d5c66813..80357a63a4e12f5f31833207a894e04180c60a52 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -70,7 +70,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned Size = Fixup.getKind() == FK_Data_4 ? 4 : 8; for (unsigned i = 0; i != Size; ++i) { - unsigned Idx = IsLittleEndian ? i : Size - i; + unsigned Idx = IsLittleEndian ? i : Size - i - 1; Data[Fixup.getOffset() + Idx] = uint8_t(Value >> (i * 8)); } } else { diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index ebe9abd8ffac4ba4bdd700cf3de9c00c2da79c77..d5e1d7706edc00c1d6d38014b9d4237d06234de6 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/BPFMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h index 559ac291a79e19f9bbb7ccb08a41aa91a0fea8b3..fd7c97bf1f0a44cd1c0a9f27a08631f5dce74f06 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h @@ -42,7 +42,7 @@ public: // messed up in random places by 4 bytes. .debug_line // section will be parsable, but with odd offsets and // line numbers, etc. - PointerSize = 8; + CodePointerSize = 8; } }; } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index b58409730de049d3b18d6cdf41a5862b06825e3f..797904e1c97687650db6fdbc7fb9e9f1d10de6e6 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/BPFMCTargetDesc.h" #include "BPF.h" #include "InstPrinter/BPFInstPrinter.h" -#include "MCTargetDesc/BPFMCTargetDesc.h" #include "MCTargetDesc/BPFMCAsmInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index 3df673eaeb4b4e8ed16b502a29c441d601d51443..d1c97c9987e1bac1f9cff9ebd8dfc73b01e8a32c 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -14,8 +14,8 @@ #ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H #define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCTARGETDESC_H -#include "llvm/Support/DataTypes.h" #include "llvm/Config/config.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class MCAsmBackend; diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 4bbc36a86e5b76950baca77e4d77ac5779ca45cf..c19e636d79ca219ee9133cd91cb7558ea49f30b5 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -17,11 +17,12 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonShuffler.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -42,13 +43,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -307,7 +307,7 @@ public: bool iss31_1Imm() const { return true; } bool iss30_2Imm() const { return true; } bool iss29_3Imm() const { return true; } - bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); } + bool iss27_2Imm() const { return CheckImmRange(27, 2, true, true, false); } bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); } bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); } bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); } @@ -459,94 +459,16 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { DEBUG(MCB.dump_pretty(dbgs())); DEBUG(dbgs() << "--\n"); + MCB.setLoc(IDLoc); // Check the bundle for errors. const MCRegisterInfo *RI = getContext().getRegisterInfo(); - HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI); + HexagonMCChecker Check(getContext(), MCII, getSTI(), MCB, *RI); bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(), getContext(), MCB, &Check); - while (Check.getNextErrInfo()) { - unsigned Reg = Check.getErrRegister(); - Twine R(RI->getName(Reg)); - - uint64_t Err = Check.getError(); - if (Err != HexagonMCErrInfo::CHECK_SUCCESS) { - if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err) - return Error( - IDLoc, - "unconditional branch cannot precede another branch in packet"); - - if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err || - HexagonMCErrInfo::CHECK_ERROR_NEWV & Err) - return Error(IDLoc, "register `" + R + - "' used with `.new' " - "but not validly modified in the same packet"); - - if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err) - return Error(IDLoc, "register `" + R + "' modified more than once"); - - if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err) - return Error(IDLoc, "cannot write to read-only register `" + R + "'"); - - if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err) - return Error(IDLoc, "loop-setup and some branch instructions " - "cannot be in the same packet"); - - if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) { - Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1'); - return Error(IDLoc, - "packet marked with `:endloop" + N + "' " + - "cannot contain instructions that modify register " + - "`" + R + "'"); - } - - if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err) - return Error( - IDLoc, - "instruction cannot appear in packet with other instructions"); - - if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err) - return Error(IDLoc, "too many slots used in packet"); - - if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) { - uint64_t Erm = Check.getShuffleError(); - - if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm) - return Error(IDLoc, "invalid instruction packet"); - else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm) - return Error(IDLoc, "invalid instruction packet: too many stores"); - else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm) - return Error(IDLoc, "invalid instruction packet: too many loads"); - else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm) - return Error(IDLoc, "too many branches in packet"); - else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm) - return Error(IDLoc, "invalid instruction packet: out of slots"); - else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm) - return Error(IDLoc, "invalid instruction packet: slot error"); - else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm) - return Error(IDLoc, "v60 packet violation"); - else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm) - return Error(IDLoc, "slot 0 instruction does not allow slot 1 store"); - else - return Error(IDLoc, "unknown error in instruction packet"); - } - } - - unsigned Warn = Check.getWarning(); - if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) { - if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn) - Warning(IDLoc, "register `" + R + "' used with `.cur' " - "but not used in the same packet"); - else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn) - Warning(IDLoc, "register `" + R + "' used with `.tmp' " - "but not used in the same packet"); - } - } - if (CheckOk) { - MCB.setLoc(IDLoc); if (HexagonMCInstrInfo::bundleSize(MCB) == 0) { assert(!HexagonMCInstrInfo::isInnerLoop(MCB)); assert(!HexagonMCInstrInfo::isOuterLoop(MCB)); @@ -1370,13 +1292,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, case Hexagon::A2_iconst: { Inst.setOpcode(Hexagon::A2_addi); MCOperand Reg = Inst.getOperand(0); - MCOperand S16 = Inst.getOperand(1); - HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); - HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + MCOperand S27 = Inst.getOperand(1); + HexagonMCInstrInfo::setMustNotExtend(*S27.getExpr()); + HexagonMCInstrInfo::setS27_2_reloc(*S27.getExpr()); Inst.clear(); Inst.addOperand(Reg); Inst.addOperand(MCOperand::createReg(Hexagon::R0)); - Inst.addOperand(S16); + Inst.addOperand(S27); break; } case Hexagon::M4_mpyrr_addr: diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index 61d3630ac095a5ed1c1ae24f734addec36bc302e..5b02aa3ca3ae9ded1d5a860d9fd21833e9136308 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -65,9 +65,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include #include #include +#include using namespace llvm; @@ -347,7 +347,7 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub); const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS); - uint16_t BW = RC->getSize()*8; + uint16_t BW = TRI.getRegSizeInBits(*RC); return BW; } @@ -1011,12 +1011,7 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { bool BT::reached(const MachineBasicBlock *B) const { int BN = B->getNumber(); assert(BN >= 0); - for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end(); - I != E; ++I) { - if (I->second == BN) - return true; - } - return false; + return ReachedBB.count(BN); } // Visit an individual instruction. This could be a newly added instruction, @@ -1036,6 +1031,8 @@ void BT::reset() { EdgeExec.clear(); InstrExec.clear(); Map.clear(); + ReachedBB.clear(); + ReachedBB.reserve(MF.size()); } void BT::run() { @@ -1068,6 +1065,7 @@ void BT::run() { if (EdgeExec.count(Edge)) continue; EdgeExec.insert(Edge); + ReachedBB.insert(Edge.second); const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second); MachineBasicBlock::const_iterator It = B.begin(), End = B.end(); diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h index a547b34e852f6e28268f7d0255389dd7879b48ce..7f49f430382d890e695e1f26bed44ecfb42b8c77 100644 --- a/lib/Target/Hexagon/BitTracker.h +++ b/lib/Target/Hexagon/BitTracker.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H #define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" @@ -68,10 +69,11 @@ private: typedef std::set InstrSetType; typedef std::queue EdgeQueueType; - EdgeSetType EdgeExec; // Executable flow graph edges. - InstrSetType InstrExec; // Executable instructions. - EdgeQueueType FlowQ; // Work queue of CFG edges. - bool Trace; // Enable tracing for debugging. + EdgeSetType EdgeExec; // Executable flow graph edges. + InstrSetType InstrExec; // Executable instructions. + EdgeQueueType FlowQ; // Work queue of CFG edges. + DenseSet ReachedBB; // Cache of reached blocks. + bool Trace; // Enable tracing for debugging. const MachineEvaluator &ME; MachineFunction &MF; diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index ae15ed0e924055c73b6666d0a00a42e6946c8172..586220dfec262bfe124334d9b85fca7399e40a19 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -12,12 +12,12 @@ #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -25,8 +25,8 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -191,7 +191,8 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return Result; if (Size > HEXAGON_MAX_PACKET_SIZE) return MCDisassembler::Fail; - HexagonMCChecker Checker(*MCII, STI, MI, MI, *getContext().getRegisterInfo()); + HexagonMCChecker Checker(getContext(), *MCII, STI, MI, + *getContext().getRegisterInfo(), false); if (!Checker.check()) return MCDisassembler::Fail; return MCDisassembler::Success; @@ -552,7 +553,7 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, using namespace Hexagon; static const MCPhysReg CtrlRegDecoderTable[] = { /* 0 */ SA0, LC0, SA1, LC1, - /* 4 */ P3_0, C5, C6, C7, + /* 4 */ P3_0, C5, M0, M1, /* 8 */ USR, PC, UGP, GP, /* 12 */ CS0, CS1, UPCYCLELO, UPCYCLEHI, /* 16 */ FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index fda23f8f6b05f17a443090e38bb261d0e04470a8..e689483a099933e72ed865c6895e57fc4a2e8e36 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonAsmPrinter.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" @@ -23,6 +23,7 @@ #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" @@ -286,9 +286,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo(); const MachineFunction &MF = *MI.getParent()->getParent(); const auto &HST = MF.getSubtarget(); - unsigned VectorSize = HST.useHVXSglOps() - ? Hexagon::VectorRegsRegClass.getSize() - : Hexagon::VectorRegs128BRegClass.getSize(); + const auto &VecRC = HST.useHVXSglOps() ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned VectorSize = HST.getRegisterInfo()->getSpillSize(VecRC); switch (Inst.getOpcode()) { default: return; @@ -298,7 +298,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MCOperand Reg = Inst.getOperand(0); MCOperand S16 = Inst.getOperand(1); HexagonMCInstrInfo::setMustNotExtend(*S16.getExpr()); - HexagonMCInstrInfo::setS23_2_reloc(*S16.getExpr()); + HexagonMCInstrInfo::setS27_2_reloc(*S16.getExpr()); Inst.clear(); Inst.addOperand(Reg); Inst.addOperand(MCOperand::createReg(Hexagon::R0)); diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 61f290ca98d70beb18a22ad530ecc0b4aa9cbdcb..14c682c6df4bcbc2f8b109d2f778d3554128a1db 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -13,8 +13,8 @@ #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -407,7 +407,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg); if (RR.Sub == 0) { Begin = 0; - Width = RC->getSize()*8; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC); return true; } @@ -417,7 +417,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR, case Hexagon::DoubleRegsRegClassID: case Hexagon::VecDblRegsRegClassID: case Hexagon::VecDblRegs128BRegClassID: - Width = RC->getSize()*8 / 2; + Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2; if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi) Begin = Width; break; @@ -1054,8 +1054,8 @@ namespace { class RedundantInstrElimination : public Transformation { public: RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii, - MachineRegisterInfo &mri) - : Transformation(true), HII(hii), MRI(mri), BT(bt) {} + const HexagonRegisterInfo &hri, MachineRegisterInfo &mri) + : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {} bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override; @@ -1070,6 +1070,7 @@ namespace { bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS); const HexagonInstrInfo &HII; + const HexagonRegisterInfo &HRI; MachineRegisterInfo &MRI; BitTracker &BT; }; @@ -1262,7 +1263,7 @@ bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI, assert(MI.getOperand(OpN).isReg()); BitTracker::RegisterRef RR = MI.getOperand(OpN); const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI); - uint16_t Width = RC->getSize()*8; + uint16_t Width = HRI.getRegSizeInBits(*RC); if (!GotBits) T.set(Begin, Begin+Width); @@ -2173,8 +2174,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI, const RegisterSet &AVs) { if (!GenBitSplit) return false; - if (CountBitSplit >= MaxBitSplit) - return false; + if (MaxBitSplit.getNumOccurrences()) { + if (CountBitSplit >= MaxBitSplit) + return false; + } unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -2253,7 +2256,8 @@ bool BitSimplification::genBitSplit(MachineInstr *MI, continue; // Generate bitsplit where S is defined. - CountBitSplit++; + if (MaxBitSplit.getNumOccurrences()) + CountBitSplit++; MachineInstr *DefS = MRI.getVRegDef(S); assert(DefS != nullptr); DebugLoc DL = DefS->getDebugLoc(); @@ -2379,9 +2383,11 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI, const RegisterSet &AVs) { if (!GenExtract) return false; - if (CountExtract >= MaxExtract) - return false; - CountExtract++; + if (MaxExtract.getNumOccurrences()) { + if (CountExtract >= MaxExtract) + return false; + CountExtract++; + } unsigned W = RC.width(); unsigned RW = W; @@ -2651,7 +2657,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { Changed |= visitBlock(Entry, ImmG, AIG); RegisterSet ARE; // Available registers for RIE. - RedundantInstrElimination RIE(BT, HII, MRI); + RedundantInstrElimination RIE(BT, HII, HRI, MRI); bool Ried = visitBlock(Entry, RIE, ARE); if (Ried) { Changed = true; diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index 90ccecb6629acbd0713e5d00206d834201f735b0..730026121d3beec677d9001ea6ae060bcaa5c841 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonBitTracker.h" +#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" @@ -57,12 +57,10 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, // tion). To avoid the complications with in-memory arguments, only consi- // der the initial sequence of formal parameters that are known to be // passed via registers. - unsigned AttrIdx = 0; unsigned InVirtReg, InPhysReg = 0; const Function &F = *MF.getFunction(); typedef Function::const_arg_iterator arg_iterator; for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { - AttrIdx++; const Argument &Arg = *I; Type *ATy = Arg.getType(); unsigned Width = 0; @@ -74,8 +72,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, // Module::AnyPointerSize. if (Width == 0 || Width > 64) break; - AttributeList Attrs = F.getAttributes(); - if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal)) + if (Arg.hasAttribute(Attribute::ByVal)) continue; InPhysReg = getNextPhysReg(InPhysReg, Width); if (!InPhysReg) @@ -83,9 +80,9 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, InVirtReg = getVirtRegFor(InPhysReg); if (!InVirtReg) continue; - if (Attrs.hasAttribute(AttrIdx, Attribute::SExt)) + if (Arg.hasAttribute(Attribute::SExt)) VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width))); - else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt)) + else if (Arg.hasAttribute(Attribute::ZExt)) VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width))); } } diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp index 721cf0417289b93cc394a7df1f1b570744afaeeb..1640b40c164f3ad315c68c2a6eaf43def3898977 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -232,14 +232,16 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns( const TargetRegisterInfo &TRI) { RegisterSet LiveIns; RegisterSet Tmp; + for (auto I : B.liveins()) { - if (I.LaneMask.all()) { - Tmp.insert({I.PhysReg,0}); + MCSubRegIndexIterator S(I.PhysReg, &TRI); + if (I.LaneMask.all() || (I.LaneMask.any() && !S.isValid())) { + Tmp.insert({I.PhysReg, 0}); continue; } - for (MCSubRegIndexIterator S(I.PhysReg, &TRI); S.isValid(); ++S) { - LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex()); - if ((M & I.LaneMask).any()) + for (; S.isValid(); ++S) { + unsigned SI = S.getSubRegIndex(); + if ((I.LaneMask & TRI.getSubRegIndexLaneMask(SI)).any()) Tmp.insert({S.getSubReg(), 0}); } } diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h index 717480314d1606723914f1adc4e5e49e07401a08..769ec7044a0ee66df9ffb9ea2a2dafcba6d831f3 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.h +++ b/lib/Target/Hexagon/HexagonBlockRanges.h @@ -14,8 +14,8 @@ #include #include #include -#include #include +#include namespace llvm { diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 2f8fe6e087f587fe3d6716758d09eaa518128054..c7b422e7efd09838c6c1b5b1d76f3e7d5dcb5469 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -38,6 +38,7 @@ class HexagonCFGOptimizer : public MachineFunctionPass { private: void InvertAndChangeJumpTarget(MachineInstr &, MachineBasicBlock *); + bool isOnFallThroughPath(MachineBasicBlock *MBB); public: static char ID; @@ -106,6 +107,14 @@ void HexagonCFGOptimizer::InvertAndChangeJumpTarget( MI.getOperand(1).setMBB(NewTarget); } +bool HexagonCFGOptimizer::isOnFallThroughPath(MachineBasicBlock *MBB) { + if (MBB->canFallThrough()) + return true; + for (MachineBasicBlock *PB : MBB->predecessors()) + if (PB->isLayoutSuccessor(MBB) && PB->canFallThrough()) + return true; + return false; +} bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(*Fn.getFunction())) @@ -182,7 +191,6 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { } if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) { - // Ensure that BB2 has one instruction -- an unconditional jump. if ((LayoutSucc->size() == 1) && IsUnconditionalJump(LayoutSucc->front().getOpcode())) { @@ -211,9 +219,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { JumpAroundTarget->moveAfter(LayoutSucc); // only move a block if it doesn't have a fall-thru. otherwise // the CFG will be incorrect. - if (!UncondTarget->canFallThrough()) { + if (!isOnFallThroughPath(UncondTarget)) UncondTarget->moveAfter(JumpAroundTarget); - } } // diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp index a07ba77e6f3e1b33d724bbeafb72df9b56a5d6f1..b5b46f2b7d199752f899ca07d575f74ff689e3fc 100644 --- a/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -175,7 +175,8 @@ namespace { None = 0, Root = 0x01, Internal = 0x02, - Used = 0x04 + Used = 0x04, + InBounds = 0x08 }; uint32_t Flags; @@ -231,6 +232,11 @@ namespace { OS << ','; OS << "used"; } + if (GN.Flags & GepNode::InBounds) { + if (Comma) + OS << ','; + OS << "inbounds"; + } OS << "} "; if (GN.Flags & GepNode::Root) OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')'; @@ -334,10 +340,11 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); GepNode *N = new (*Mem) GepNode; Value *PtrOp = GepI->getPointerOperand(); + uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0; ValueToNodeMap::iterator F = NM.find(PtrOp); if (F == NM.end()) { N->BaseVal = PtrOp; - N->Flags |= GepNode::Root; + N->Flags |= GepNode::Root | InBounds; } else { // If PtrOp was a GEP instruction, it must have already been processed. // The ValueToNodeMap entry for it is the last gep node in the generated @@ -373,7 +380,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, Value *Op = *OI; GepNode *Nx = new (*Mem) GepNode; Nx->Parent = PN; // Link Nx to the previous node. - Nx->Flags |= GepNode::Internal; + Nx->Flags |= GepNode::Internal | InBounds; Nx->PTy = PtrTy; Nx->Idx = Op; Nodes.push_back(Nx); @@ -1081,7 +1088,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, GepNode *RN = NA[0]; assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); - Value *NewInst = nullptr; + GetElementPtrInst *NewInst = nullptr; Value *Input = RN->BaseVal; Value **IdxList = new Value*[Num+1]; unsigned nax = 0; @@ -1112,6 +1119,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, Type *InpTy = Input->getType(); Type *ElTy = cast(InpTy->getScalarType())->getElementType(); NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); + NewInst->setIsInBounds(RN->Flags & GepNode::InBounds); DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); Input = NewInst; } while (nax <= Num); diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index 783b916e04b05fb99d0e478199c18867068d9b61..aa68f6cfdfc1115ec30476883a371311a5ce44c3 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2276,7 +2276,7 @@ Undetermined: goto Undetermined; uint32_t Props = PredC.properties(); - bool CTrue = false, CFalse = false;; + bool CTrue = false, CFalse = false; if (Props & ConstantProperties::Zero) CFalse = true; else if (Props & ConstantProperties::NonZero) diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 5f375f8dc74284b3e2c2f276eb1612115823aa95..6b4f53428256818fe855dd24d47730beb5e844ca 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" -#include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/PassSupport.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -460,6 +460,8 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { } bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; if (IsCombinesDisabled) return false; diff --git a/lib/Target/Hexagon/HexagonDepIICHVX.td b/lib/Target/Hexagon/HexagonDepIICHVX.td new file mode 100644 index 0000000000000000000000000000000000000000..1c1788264c66eed4f7d06b17d68e71836de82328 --- /dev/null +++ b/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -0,0 +1,1143 @@ +//===--- HexagonDepIICHVX.td ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def tc_0317c6ca : InstrItinClass; +def tc_1b93bdc6 : InstrItinClass; +def tc_2171ebae : InstrItinClass; +def tc_28978789 : InstrItinClass; +def tc_316c637c : InstrItinClass; +def tc_354299ad : InstrItinClass; +def tc_35e92f8e : InstrItinClass; +def tc_38208312 : InstrItinClass; +def tc_4105d6b5 : InstrItinClass; +def tc_41f4b64e : InstrItinClass; +def tc_41f99e1c : InstrItinClass; +def tc_45453b98 : InstrItinClass; +def tc_4e2a5159 : InstrItinClass; +def tc_4fd8566e : InstrItinClass; +def tc_51cd3aab : InstrItinClass; +def tc_5a9fc4ec : InstrItinClass; +def tc_5c120602 : InstrItinClass; +def tc_5cbf490b : InstrItinClass; +def tc_644584f8 : InstrItinClass; +def tc_69b6dd20 : InstrItinClass; +def tc_6b78cf13 : InstrItinClass; +def tc_6fd9ad30 : InstrItinClass; +def tc_71337255 : InstrItinClass; +def tc_72ad7b54 : InstrItinClass; +def tc_77a4c701 : InstrItinClass; +def tc_7c3f55c4 : InstrItinClass; +def tc_7e9f581b : InstrItinClass; +def tc_7fa82b08 : InstrItinClass; +def tc_7fa8b40f : InstrItinClass; +def tc_85d237e3 : InstrItinClass; +def tc_8b6a873f : InstrItinClass; +def tc_908a4c8c : InstrItinClass; +def tc_9311da3f : InstrItinClass; +def tc_9777e6bf : InstrItinClass; +def tc_97c165b9 : InstrItinClass; +def tc_99093773 : InstrItinClass; +def tc_9b9642a1 : InstrItinClass; +def tc_9c267309 : InstrItinClass; +def tc_a3127e12 : InstrItinClass; +def tc_a4c9df3b : InstrItinClass; +def tc_aedb9f9e : InstrItinClass; +def tc_b06ab583 : InstrItinClass; +def tc_b712833a : InstrItinClass; +def tc_b77635b4 : InstrItinClass; +def tc_bbaf280e : InstrItinClass; +def tc_bf142ae2 : InstrItinClass; +def tc_c00bf9c9 : InstrItinClass; +def tc_c4b515c5 : InstrItinClass; +def tc_cbf6d1dc : InstrItinClass; +def tc_cedf314b : InstrItinClass; +def tc_d2cb81ea : InstrItinClass; +def tc_d5090f3e : InstrItinClass; +def tc_d642eff3 : InstrItinClass; +def tc_d725e5b0 : InstrItinClass; +def tc_d7bea0ec : InstrItinClass; +def tc_d98f4d63 : InstrItinClass; +def tc_da979fb3 : InstrItinClass; +def tc_db5b9e2f : InstrItinClass; +def tc_e172d86a : InstrItinClass; +def tc_e231aa4f : InstrItinClass; +def tc_e3748cdf : InstrItinClass; +def tc_e5053c8f : InstrItinClass; +def tc_e6299d16 : InstrItinClass; +def tc_eb669007 : InstrItinClass; +def tc_eda67dcd : InstrItinClass; +def tc_f3fc3f83 : InstrItinClass; + +class DepHVXItinV55 { + list DepHVXItinV55_list = [ + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]> + ]; +} + +class DepHVXItinV60 { + list DepHVXItinV60_list = [ + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]> + ]; +} + +class DepHVXItinV62 { + list DepHVXItinV62_list = [ + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]> + ]; +} diff --git a/lib/Target/Hexagon/HexagonDepIICScalar.td b/lib/Target/Hexagon/HexagonDepIICScalar.td new file mode 100644 index 0000000000000000000000000000000000000000..261778bda724395cb689ac69affba8f9c7b8c89d --- /dev/null +++ b/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -0,0 +1,2504 @@ +//===--- HexagonDepIICScalar.td -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def tc_049dfb74 : InstrItinClass; +def tc_0767081f : InstrItinClass; +def tc_07ac815d : InstrItinClass; +def tc_090485bb : InstrItinClass; +def tc_09c86199 : InstrItinClass; +def tc_09faec3b : InstrItinClass; +def tc_0cb867f2 : InstrItinClass; +def tc_1000eb10 : InstrItinClass; +def tc_128719e8 : InstrItinClass; +def tc_136c4786 : InstrItinClass; +def tc_14da557c : InstrItinClass; +def tc_1b6011fb : InstrItinClass; +def tc_1b834fe7 : InstrItinClass; +def tc_1e062b18 : InstrItinClass; +def tc_1e69aa99 : InstrItinClass; +def tc_1f9668cc : InstrItinClass; +def tc_1fe8323c : InstrItinClass; +def tc_20a8e109 : InstrItinClass; +def tc_210b2456 : InstrItinClass; +def tc_251c87b2 : InstrItinClass; +def tc_261d9b78 : InstrItinClass; +def tc_28d296df : InstrItinClass; +def tc_29c14515 : InstrItinClass; +def tc_2aaab1e0 : InstrItinClass; +def tc_2c8fe5ae : InstrItinClass; +def tc_2d1e6f5c : InstrItinClass; +def tc_2e55aa16 : InstrItinClass; +def tc_30665cb0 : InstrItinClass; +def tc_336e698c : InstrItinClass; +def tc_34e882a4 : InstrItinClass; +def tc_35fb9d13 : InstrItinClass; +def tc_37326008 : InstrItinClass; +def tc_3993c58b : InstrItinClass; +def tc_3b4892c6 : InstrItinClass; +def tc_3bea1824 : InstrItinClass; +def tc_3c10f809 : InstrItinClass; +def tc_3d905451 : InstrItinClass; +def tc_3e61d314 : InstrItinClass; +def tc_3eab77bd : InstrItinClass; +def tc_43068634 : InstrItinClass; +def tc_45631a8d : InstrItinClass; +def tc_47ab9233 : InstrItinClass; +def tc_47f0b7ad : InstrItinClass; +def tc_485bb57c : InstrItinClass; +def tc_4997da4a : InstrItinClass; +def tc_511f28f6 : InstrItinClass; +def tc_537e2013 : InstrItinClass; +def tc_53ee6546 : InstrItinClass; +def tc_548f402d : InstrItinClass; +def tc_5625c6c1 : InstrItinClass; +def tc_580a779c : InstrItinClass; +def tc_583510c7 : InstrItinClass; +def tc_5d806107 : InstrItinClass; +def tc_5fa2857c : InstrItinClass; +def tc_5fe9fcd0 : InstrItinClass; +def tc_6264c5e0 : InstrItinClass; +def tc_639d93ee : InstrItinClass; +def tc_63cd9d2d : InstrItinClass; +def tc_65dc7cc4 : InstrItinClass; +def tc_69bb508b : InstrItinClass; +def tc_6c52d277 : InstrItinClass; +def tc_6c576d46 : InstrItinClass; +def tc_70cabf66 : InstrItinClass; +def tc_7639d4b0 : InstrItinClass; +def tc_7675c0e9 : InstrItinClass; +def tc_76c4c5ef : InstrItinClass; +def tc_77781686 : InstrItinClass; +def tc_78b3c689 : InstrItinClass; +def tc_7986ba30 : InstrItinClass; +def tc_7bc567a7 : InstrItinClass; +def tc_7c2dcd4d : InstrItinClass; +def tc_7ca2ea10 : InstrItinClass; +def tc_7d01cbdc : InstrItinClass; +def tc_7d9a56cd : InstrItinClass; +def tc_81a23d44 : InstrItinClass; +def tc_821c4233 : InstrItinClass; +def tc_82f0f122 : InstrItinClass; +def tc_84630363 : InstrItinClass; +def tc_86442910 : InstrItinClass; +def tc_87601822 : InstrItinClass; +def tc_88fa2da6 : InstrItinClass; +def tc_8c8041e6 : InstrItinClass; +def tc_8cb685d9 : InstrItinClass; +def tc_8def9c57 : InstrItinClass; +def tc_8f0a6bad : InstrItinClass; +def tc_8fab9ac3 : InstrItinClass; +def tc_92d1833c : InstrItinClass; +def tc_94e6ffd9 : InstrItinClass; +def tc_95c54f8b : InstrItinClass; +def tc_9a13af9d : InstrItinClass; +def tc_9b73d261 : InstrItinClass; +def tc_9c18c9a5 : InstrItinClass; +def tc_9c68db63 : InstrItinClass; +def tc_9ce7a5ab : InstrItinClass; +def tc_9da3628f : InstrItinClass; +def tc_9dafb7d3 : InstrItinClass; +def tc_9df8b0dc : InstrItinClass; +def tc_9e86015f : InstrItinClass; +def tc_9f518242 : InstrItinClass; +def tc_a12a5971 : InstrItinClass; +def tc_a1fb80e1 : InstrItinClass; +def tc_a333d2a9 : InstrItinClass; +def tc_a4567c39 : InstrItinClass; +def tc_a87879e8 : InstrItinClass; +def tc_a9c993d9 : InstrItinClass; +def tc_aad55963 : InstrItinClass; +def tc_ab1b5e74 : InstrItinClass; +def tc_ae0722f7 : InstrItinClass; +def tc_ae2c2dc2 : InstrItinClass; +def tc_ae762521 : InstrItinClass; +def tc_b08b653e : InstrItinClass; +def tc_b08be45e : InstrItinClass; +def tc_b0f50e3c : InstrItinClass; +def tc_b189ad4c : InstrItinClass; +def tc_b324366f : InstrItinClass; +def tc_b5bfaa60 : InstrItinClass; +def tc_b5f5a094 : InstrItinClass; +def tc_b86c7e8b : InstrItinClass; +def tc_baccf077 : InstrItinClass; +def tc_bc5561d8 : InstrItinClass; +def tc_bcf0e36e : InstrItinClass; +def tc_bd16579e : InstrItinClass; +def tc_be995eaf : InstrItinClass; +def tc_bf6fa601 : InstrItinClass; +def tc_c0cd91a8 : InstrItinClass; +def tc_c14739d5 : InstrItinClass; +def tc_c1dbc916 : InstrItinClass; +def tc_c58f771a : InstrItinClass; +def tc_c85212ca : InstrItinClass; +def tc_c8f9a6f6 : InstrItinClass; +def tc_ca280e8b : InstrItinClass; +def tc_cbe45117 : InstrItinClass; +def tc_cd321066 : InstrItinClass; +def tc_d108a090 : InstrItinClass; +def tc_d1b5a4b6 : InstrItinClass; +def tc_d2609065 : InstrItinClass; +def tc_d267fa19 : InstrItinClass; +def tc_d2a33af5 : InstrItinClass; +def tc_d63b71d1 : InstrItinClass; +def tc_d6a805a8 : InstrItinClass; +def tc_d95f4e98 : InstrItinClass; +def tc_da79106e : InstrItinClass; +def tc_dbe218dd : InstrItinClass; +def tc_dcfee7ae : InstrItinClass; +def tc_e17ce9ad : InstrItinClass; +def tc_e2480a7f : InstrItinClass; +def tc_e2c08bb4 : InstrItinClass; +def tc_e2c31426 : InstrItinClass; +def tc_e578178f : InstrItinClass; +def tc_e836c161 : InstrItinClass; +def tc_e8c7a357 : InstrItinClass; +def tc_eb07ef6f : InstrItinClass; +def tc_ecfaae86 : InstrItinClass; +def tc_ef0ebaaa : InstrItinClass; +def tc_ef2676fd : InstrItinClass; +def tc_f027ebe9 : InstrItinClass; +def tc_f055fbb6 : InstrItinClass; +def tc_f1240c08 : InstrItinClass; +def tc_f16d5b17 : InstrItinClass; +def tc_f1aa2cdb : InstrItinClass; +def tc_f26aa619 : InstrItinClass; +def tc_f4608adc : InstrItinClass; +def tc_faab1248 : InstrItinClass; +def tc_fcee8723 : InstrItinClass; +def tc_feb4974b : InstrItinClass; + +class DepScalarItinV4 { + list DepScalarItinV4_list = [ + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]> ]; +} + +class DepScalarItinV5 { + list DepScalarItinV5_list = [ + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]>, + InstrItinData ]> ]; +} + +class DepScalarItinV55 { + list DepScalarItinV55_list = [ + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 3, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV60 { + list DepScalarItinV60_list = [ + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 1, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 3, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV62 { + list DepScalarItinV62_list = [ + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 3, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]> + ]; +} diff --git a/lib/Target/Hexagon/HexagonDepITypes.h b/lib/Target/Hexagon/HexagonDepITypes.h index f8ae39a379942446717fa73c3194d5ed5ab769c6..be831b9501ea3ef6092b22eaf04dece0d7714f43 100644 --- a/lib/Target/Hexagon/HexagonDepITypes.h +++ b/lib/Target/Hexagon/HexagonDepITypes.h @@ -15,39 +15,38 @@ enum Type { TypeALU32_ADDI = 2, TypeALU64 = 3, TypeCJ = 4, - TypeCOPROC_VMEM = 5, - TypeCR = 7, + TypeCR = 6, TypeCVI_HIST = 10, TypeCVI_VA = 16, TypeCVI_VA_DV = 17, TypeCVI_VINLANESAT = 18, - TypeCVI_VM_CUR_LD = 19, - TypeCVI_VM_LD = 20, - TypeCVI_VM_NEW_ST = 21, - TypeCVI_VM_ST = 22, - TypeCVI_VM_STU = 23, - TypeCVI_VM_TMP_LD = 24, - TypeCVI_VM_VP_LDU = 25, - TypeCVI_VP = 26, - TypeCVI_VP_VS = 27, - TypeCVI_VS = 28, - TypeCVI_VX = 30, - TypeCVI_VX_DV = 31, - TypeDUPLEX = 32, - TypeENDLOOP = 33, - TypeEXTENDER = 34, - TypeJ = 35, - TypeLD = 36, - TypeM = 37, - TypeMAPPING = 38, - TypeNCJ = 39, - TypePSEUDO = 40, - TypeST = 41, - TypeSUBINSN = 42, - TypeS_2op = 43, - TypeS_3op = 44, - TypeV2LDST = 47, - TypeV4LDST = 48 + TypeCVI_VM_LD = 19, + TypeCVI_VM_NEW_ST = 20, + TypeCVI_VM_ST = 21, + TypeCVI_VM_STU = 22, + TypeCVI_VM_TMP_LD = 23, + TypeCVI_VM_VP_LDU = 24, + TypeCVI_VP = 25, + TypeCVI_VP_VS = 26, + TypeCVI_VS = 27, + TypeCVI_VX = 29, + TypeCVI_VX_DV = 30, + TypeCVI_VX_LATE = 31, + TypeDUPLEX = 33, + TypeENDLOOP = 34, + TypeEXTENDER = 35, + TypeJ = 36, + TypeLD = 37, + TypeM = 38, + TypeMAPPING = 39, + TypeNCJ = 40, + TypePSEUDO = 41, + TypeST = 42, + TypeSUBINSN = 43, + TypeS_2op = 44, + TypeS_3op = 45, + TypeV2LDST = 48, + TypeV4LDST = 49 }; } } diff --git a/lib/Target/Hexagon/HexagonDepITypes.td b/lib/Target/Hexagon/HexagonDepITypes.td index f1d689ce12f4319d73725c6eb7254dfe99b5b8fa..ac1989e4dd82fd569128d03a036651e0a7ccd344 100644 --- a/lib/Target/Hexagon/HexagonDepITypes.td +++ b/lib/Target/Hexagon/HexagonDepITypes.td @@ -13,36 +13,35 @@ def TypeALU32_3op : IType<1>; def TypeALU32_ADDI : IType<2>; def TypeALU64 : IType<3>; def TypeCJ : IType<4>; -def TypeCOPROC_VMEM : IType<5>; -def TypeCR : IType<7>; +def TypeCR : IType<6>; def TypeCVI_HIST : IType<10>; def TypeCVI_VA : IType<16>; def TypeCVI_VA_DV : IType<17>; def TypeCVI_VINLANESAT : IType<18>; -def TypeCVI_VM_CUR_LD : IType<19>; -def TypeCVI_VM_LD : IType<20>; -def TypeCVI_VM_NEW_ST : IType<21>; -def TypeCVI_VM_ST : IType<22>; -def TypeCVI_VM_STU : IType<23>; -def TypeCVI_VM_TMP_LD : IType<24>; -def TypeCVI_VM_VP_LDU : IType<25>; -def TypeCVI_VP : IType<26>; -def TypeCVI_VP_VS : IType<27>; -def TypeCVI_VS : IType<28>; -def TypeCVI_VX : IType<30>; -def TypeCVI_VX_DV : IType<31>; -def TypeDUPLEX : IType<32>; -def TypeENDLOOP : IType<33>; -def TypeEXTENDER : IType<34>; -def TypeJ : IType<35>; -def TypeLD : IType<36>; -def TypeM : IType<37>; -def TypeMAPPING : IType<38>; -def TypeNCJ : IType<39>; -def TypePSEUDO : IType<40>; -def TypeST : IType<41>; -def TypeSUBINSN : IType<42>; -def TypeS_2op : IType<43>; -def TypeS_3op : IType<44>; -def TypeV2LDST : IType<47>; -def TypeV4LDST : IType<48>; +def TypeCVI_VM_LD : IType<19>; +def TypeCVI_VM_NEW_ST : IType<20>; +def TypeCVI_VM_ST : IType<21>; +def TypeCVI_VM_STU : IType<22>; +def TypeCVI_VM_TMP_LD : IType<23>; +def TypeCVI_VM_VP_LDU : IType<24>; +def TypeCVI_VP : IType<25>; +def TypeCVI_VP_VS : IType<26>; +def TypeCVI_VS : IType<27>; +def TypeCVI_VX : IType<29>; +def TypeCVI_VX_DV : IType<30>; +def TypeCVI_VX_LATE : IType<31>; +def TypeDUPLEX : IType<33>; +def TypeENDLOOP : IType<34>; +def TypeEXTENDER : IType<35>; +def TypeJ : IType<36>; +def TypeLD : IType<37>; +def TypeM : IType<38>; +def TypeMAPPING : IType<39>; +def TypeNCJ : IType<40>; +def TypePSEUDO : IType<41>; +def TypeST : IType<42>; +def TypeSUBINSN : IType<43>; +def TypeS_2op : IType<44>; +def TypeS_3op : IType<45>; +def TypeV2LDST : IType<48>; +def TypeV4LDST : IType<49>; diff --git a/lib/Target/Hexagon/HexagonDepInstrFormats.td b/lib/Target/Hexagon/HexagonDepInstrFormats.td index d7a99f48803bf728df9061384b76b327af4c0e02..1b24be47715871d0f47018908e410ce020c9297c 100644 --- a/lib/Target/Hexagon/HexagonDepInstrFormats.td +++ b/lib/Target/Hexagon/HexagonDepInstrFormats.td @@ -7,233 +7,140 @@ // //===----------------------------------------------------------------------===// -class Enc_12122225 : OpcodeHexagon { - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; - bits <3> Qd8; - let Inst{2-0} = Qd8{2-0}; -} -class Enc_16626097 : OpcodeHexagon { - bits <2> Qs4; - let Inst{6-5} = Qs4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vw32; - let Inst{4-0} = Vw32{4-0}; -} -class Enc_13397056 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <2> Qv4; - let Inst{12-11} = Qv4{1-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_7315939 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <6> n1; - let Inst{28-28} = n1{5-5}; - let Inst{24-22} = n1{4-2}; - let Inst{13-13} = n1{1-1}; - let Inst{8-8} = n1{0-0}; -} -class Enc_15275738 : OpcodeHexagon { - bits <12> Ii; - let Inst{26-25} = Ii{11-10}; - let Inst{13-5} = Ii{9-1}; +class Enc_890909 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; -} -class Enc_12822813 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rxx32; - let Inst{4-0} = Rxx32{4-0}; bits <2> Pe4; let Inst{6-5} = Pe4{1-0}; } -class Enc_10282127 : OpcodeHexagon { - bits <7> Ii; - let Inst{12-7} = Ii{6-1}; - bits <8> II; - let Inst{13-13} = II{7-7}; - let Inst{6-0} = II{6-0}; +class Enc_527412 : OpcodeHexagon { + bits <2> Ps4; + let Inst{17-16} = Ps4{1-0}; + bits <2> Pt4; + let Inst{9-8} = Pt4{1-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_efaed8 : OpcodeHexagon { + bits <1> Ii; + let Inst{8-8} = Ii{0-0}; +} +class Enc_a568d4 : OpcodeHexagon { + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Rx32; + let Inst{4-0} = Rx32{4-0}; +} +class Enc_27b757 : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <2> Pv4; + let Inst{12-11} = Pv4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; } -class Enc_14264243 : OpcodeHexagon { +class Enc_5de85f : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <4> Rt16; - let Inst{11-8} = Rt16{3-0}; -} -class Enc_6778937 : OpcodeHexagon { - bits <5> Rxx32; - let Inst{20-16} = Rxx32{4-0}; - bits <0> sgp10; -} -class Enc_5480539 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vxx32; - let Inst{7-3} = Vxx32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; } -class Enc_11422009 : OpcodeHexagon { +class Enc_0e41fa : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Vy32; - let Inst{12-8} = Vy32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; -} -class Enc_16357011 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{8-4} = Vv32{4-0}; - bits <5> Vt32; - let Inst{13-9} = Vt32{4-0}; - bits <4> Vdd16; - let Inst{3-0} = Vdd16{3-0}; -} -class Enc_4975051 : OpcodeHexagon { - bits <19> Ii; - let Inst{26-25} = Ii{18-17}; - let Inst{20-16} = Ii{16-12}; - let Inst{13-5} = Ii{11-3}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_14786238 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; -} -class Enc_15472748 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_6773159 : OpcodeHexagon { - bits <6> Ii; - let Inst{12-7} = Ii{5-0}; - bits <5> II; - let Inst{4-0} = II{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_12535811 : OpcodeHexagon { +class Enc_802dc0 : OpcodeHexagon { + bits <1> Ii; + let Inst{8-8} = Ii{0-0}; bits <2> Qv4; let Inst{23-22} = Qv4{1-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; } -class Enc_14007201 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <8> II; - let Inst{22-16} = II{7-1}; - let Inst{13-13} = II{0-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; +class Enc_6b197f : OpcodeHexagon { + bits <4> Ii; + let Inst{8-5} = Ii{3-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_2577026 : OpcodeHexagon { - bits <3> Qt8; - let Inst{2-0} = Qt8{2-0}; - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; +class Enc_1f5d8f : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_7305764 : OpcodeHexagon { - bits <5> II; - let Inst{12-8} = II{4-0}; - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; +class Enc_51436c : OpcodeHexagon { + bits <16> Ii; + let Inst{23-22} = Ii{15-14}; + let Inst{13-0} = Ii{13-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_11682941 : OpcodeHexagon { - bits <19> Ii; - let Inst{26-25} = Ii{18-17}; - let Inst{20-16} = Ii{16-12}; - let Inst{13-13} = Ii{11-11}; - let Inst{7-0} = Ii{10-3}; +class Enc_c7a204 : OpcodeHexagon { + bits <6> II; + let Inst{5-0} = II{5-0}; bits <5> Rtt32; let Inst{12-8} = Rtt32{4-0}; + bits <5> Re32; + let Inst{20-16} = Re32{4-0}; } -class Enc_16376009 : OpcodeHexagon { +class Enc_db40cd : OpcodeHexagon { bits <6> Ii; - let Inst{8-5} = Ii{5-2}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + let Inst{6-3} = Ii{5-2}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_13249928 : OpcodeHexagon { - bits <9> Ii; - let Inst{13-5} = Ii{8-0}; +class Enc_a1e29d : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; + bits <5> II; + let Inst{22-21} = II{4-3}; + let Inst{7-5} = II{2-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; + bits <5> Rx32; + let Inst{4-0} = Rx32{4-0}; } -class Enc_1971351 : OpcodeHexagon { - bits <5> Ii; - let Inst{8-5} = Ii{4-1}; +class Enc_d15d19 : OpcodeHexagon { bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_13715847 : OpcodeHexagon { - bits <6> Ii; - let Inst{17-16} = Ii{5-4}; - let Inst{6-3} = Ii{3-0}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; +class Enc_e90a15 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; + bits <4> n1; + let Inst{29-29} = n1{3-3}; + let Inst{26-25} = n1{2-1}; + let Inst{22-22} = n1{0-0}; } -class Enc_13303422 : OpcodeHexagon { - bits <5> Ii; - let Inst{8-5} = Ii{4-1}; +class Enc_e0a47a : OpcodeHexagon { + bits <4> Ii; + let Inst{8-5} = Ii{3-0}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; bits <5> Rd32; @@ -241,29 +148,32 @@ class Enc_13303422 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_14574598 : OpcodeHexagon { - bits <6> Ii; - let Inst{13-8} = Ii{5-0}; +class Enc_140c83 : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-5} = Ii{8-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_13094118 : OpcodeHexagon { - bits <5> Css32; - let Inst{20-16} = Css32{4-0}; +class Enc_7eee72 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_4231995 : OpcodeHexagon { - bits <6> Ii; - let Inst{13-8} = Ii{5-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; +class Enc_d7dc10 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; } -class Enc_844699 : OpcodeHexagon { +class Enc_736575 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; @@ -271,74 +181,87 @@ class Enc_844699 : OpcodeHexagon { let Inst{19-16} = Rs16{3-0}; bits <4> n1; let Inst{28-28} = n1{3-3}; - let Inst{24-22} = n1{2-0}; + let Inst{25-23} = n1{2-0}; } -class Enc_8752140 : OpcodeHexagon { - bits <6> Ii; - let Inst{8-5} = Ii{5-2}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_8dec2e : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_7978128 : OpcodeHexagon { - bits <1> Ii; - let Inst{8-8} = Ii{0-0}; - bits <2> Qv4; - let Inst{23-22} = Qv4{1-0}; +class Enc_eaa9f8 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <2> Qx4; + let Inst{1-0} = Qx4{1-0}; } -class Enc_10492541 : OpcodeHexagon { - bits <6> Ii; - let Inst{6-3} = Ii{5-2}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_509701 : OpcodeHexagon { + bits <19> Ii; + let Inst{26-25} = Ii{18-17}; + let Inst{20-16} = Ii{16-12}; + let Inst{13-5} = Ii{11-3}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_0 : OpcodeHexagon { +class Enc_830e5d : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; + bits <8> II; + let Inst{22-16} = II{7-1}; + let Inst{13-13} = II{0-0}; + bits <2> Pu4; + let Inst{24-23} = Pu4{1-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_15733946 : OpcodeHexagon { - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; +class Enc_79b8c8 : OpcodeHexagon { + bits <6> Ii; + let Inst{6-3} = Ii{5-2}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_738356 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; +class Enc_58a8bf : OpcodeHexagon { + bits <3> Ii; let Inst{10-8} = Ii{2-0}; bits <2> Pv4; let Inst{12-11} = Pv4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; bits <5> Vd32; let Inst{4-0} = Vd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_14400220 : OpcodeHexagon { - bits <5> Ii; - let Inst{9-5} = Ii{4-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; +class Enc_041d7b : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <5> n1; + let Inst{28-28} = n1{4-4}; + let Inst{24-23} = n1{3-2}; + let Inst{13-13} = n1{1-1}; + let Inst{8-8} = n1{0-0}; } -class Enc_15194851 : OpcodeHexagon { +class Enc_f44229 : OpcodeHexagon { + bits <7> Ii; + let Inst{13-13} = Ii{6-6}; + let Inst{7-3} = Ii{5-1}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <2> Pu4; - let Inst{6-5} = Pu4{1-0}; - bits <5> Rx32; - let Inst{4-0} = Rx32{4-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; } -class Enc_14172170 : OpcodeHexagon { - bits <1> Ii; - let Inst{5-5} = Ii{0-0}; +class Enc_aad80c : OpcodeHexagon { bits <5> Vuu32; let Inst{12-8} = Vuu32{4-0}; bits <5> Rt32; @@ -346,413 +269,269 @@ class Enc_14172170 : OpcodeHexagon { bits <5> Vdd32; let Inst{4-0} = Vdd32{4-0}; } -class Enc_10065510 : OpcodeHexagon { - bits <6> Ii; - let Inst{6-3} = Ii{5-2}; +class Enc_87c142 : OpcodeHexagon { + bits <7> Ii; + let Inst{8-4} = Ii{6-2}; + bits <4> Rt16; + let Inst{3-0} = Rt16{3-0}; +} +class Enc_86a14b : OpcodeHexagon { + bits <8> Ii; + let Inst{7-3} = Ii{7-3}; + bits <3> Rdd8; + let Inst{2-0} = Rdd8{2-0}; +} +class Enc_9a33d5 : OpcodeHexagon { + bits <7> Ii; + let Inst{6-3} = Ii{6-3}; bits <2> Pv4; let Inst{1-0} = Pv4{1-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_14998517 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; - bits <3> n1; - let Inst{29-29} = n1{2-2}; - let Inst{26-25} = n1{1-0}; +class Enc_a56825 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_16657398 : OpcodeHexagon { - bits <6> Ii; - let Inst{17-16} = Ii{5-4}; - let Inst{6-3} = Ii{3-0}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; +class Enc_9ea4cf : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{6-6} = Ii{0-0}; + bits <6> II; + let Inst{5-0} = II{5-0}; + bits <5> Ru32; + let Inst{20-16} = Ru32{4-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; } -class Enc_14620934 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; +class Enc_ee5ed0 : OpcodeHexagon { + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; + bits <2> n1; + let Inst{9-8} = n1{1-0}; } -class Enc_10075393 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; +class Enc_935d9b : OpcodeHexagon { + bits <5> Ii; + let Inst{6-3} = Ii{4-1}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; -} -class Enc_8638014 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Vss32; - let Inst{7-3} = Vss32{4-0}; + let Inst{12-8} = Rt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_13261538 : OpcodeHexagon { - bits <3> Ii; - let Inst{7-5} = Ii{2-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; +class Enc_61f0b0 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rxx32; + let Inst{4-0} = Rxx32{4-0}; } -class Enc_8990840 : OpcodeHexagon { - bits <13> Ii; - let Inst{26-25} = Ii{12-11}; - let Inst{13-5} = Ii{10-2}; +class Enc_bd6011 : OpcodeHexagon { + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_5974204 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vvv32; - let Inst{12-8} = Vvv32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; +class Enc_65d691 : OpcodeHexagon { + bits <2> Ps4; + let Inst{17-16} = Ps4{1-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; } -class Enc_4711514 : OpcodeHexagon { - bits <2> Qu4; - let Inst{9-8} = Qu4{1-0}; +class Enc_e8c45e : OpcodeHexagon { + bits <7> Ii; + let Inst{13-13} = Ii{6-6}; + let Inst{7-3} = Ii{5-1}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; + let Inst{12-8} = Rt32{4-0}; } -class Enc_11492529 : OpcodeHexagon { - bits <5> Ii; - let Inst{6-3} = Ii{4-1}; +class Enc_ca3887 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; } -class Enc_9277990 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; +class Enc_a94f3b : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; + bits <2> Pe4; + let Inst{6-5} = Pe4{1-0}; } -class Enc_6690615 : OpcodeHexagon { - bits <7> Ii; - let Inst{8-4} = Ii{6-2}; +class Enc_625deb : OpcodeHexagon { + bits <4> Ii; + let Inst{10-8} = Ii{3-1}; + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; bits <4> Rt16; let Inst{3-0} = Rt16{3-0}; } -class Enc_1220199 : OpcodeHexagon { - bits <2> Qv4; - let Inst{23-22} = Qv4{1-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_7785569 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <6> n1; - let Inst{28-28} = n1{5-5}; - let Inst{25-22} = n1{4-1}; - let Inst{8-8} = n1{0-0}; +class Enc_1f5ba6 : OpcodeHexagon { + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; } -class Enc_2880796 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; - bits <5> II; - let Inst{22-21} = II{4-3}; - let Inst{7-5} = II{2-0}; +class Enc_cd82bc : OpcodeHexagon { + bits <4> Ii; + let Inst{21-21} = Ii{3-3}; + let Inst{7-5} = Ii{2-0}; + bits <6> II; + let Inst{13-8} = II{5-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rx32; let Inst{4-0} = Rx32{4-0}; } -class Enc_6858527 : OpcodeHexagon { - bits <2> Qs4; - let Inst{6-5} = Qs4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vv32; - let Inst{4-0} = Vv32{4-0}; -} -class Enc_11863656 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; +class Enc_399e12 : OpcodeHexagon { + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; + bits <3> Rdd8; + let Inst{2-0} = Rdd8{2-0}; } -class Enc_151014 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <2> Px4; - let Inst{6-5} = Px4{1-0}; -} -class Enc_10333841 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_14044877 : OpcodeHexagon { +class Enc_d7a65e : OpcodeHexagon { bits <6> Ii; - let Inst{13-13} = Ii{5-5}; - let Inst{7-3} = Ii{4-0}; + let Inst{12-7} = Ii{5-0}; + bits <6> II; + let Inst{13-13} = II{5-5}; + let Inst{4-0} = II{4-0}; bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; + let Inst{6-5} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; -} -class Enc_13691337 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; - bits <2> Qx4; - let Inst{6-5} = Qx4{1-0}; } -class Enc_3817033 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <3> Qt8; - let Inst{10-8} = Qt8{2-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; +class Enc_607661 : OpcodeHexagon { + bits <6> Ii; + let Inst{12-7} = Ii{5-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_3540372 : OpcodeHexagon { - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; +class Enc_6a5972 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <4> Rt16; + let Inst{11-8} = Rt16{3-0}; } -class Enc_5200852 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_53dca9 : OpcodeHexagon { + bits <6> Ii; + let Inst{11-8} = Ii{5-2}; + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; } -class Enc_15949334 : OpcodeHexagon { +class Enc_27fd0e : OpcodeHexagon { + bits <6> Ii; + let Inst{8-5} = Ii{5-2}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_3831744 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; -} -class Enc_8280533 : OpcodeHexagon { - bits <3> Ii; - let Inst{7-5} = Ii{2-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; -} -class Enc_10969213 : OpcodeHexagon { - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vvv32; - let Inst{12-8} = Vvv32{4-0}; - bits <5> Vw32; - let Inst{4-0} = Vw32{4-0}; -} -class Enc_3974695 : OpcodeHexagon { +class Enc_93af4c : OpcodeHexagon { bits <7> Ii; let Inst{10-4} = Ii{6-0}; bits <4> Rx16; let Inst{3-0} = Rx16{3-0}; } -class Enc_7255914 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; +class Enc_5bdd42 : OpcodeHexagon { + bits <7> Ii; + let Inst{8-5} = Ii{6-3}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_7212930 : OpcodeHexagon { - bits <5> Ii; - let Inst{8-5} = Ii{4-1}; - bits <2> Pt4; - let Inst{10-9} = Pt4{1-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_71f1b4 : OpcodeHexagon { + bits <6> Ii; + let Inst{8-5} = Ii{5-2}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_12781442 : OpcodeHexagon { - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <2> Qd4; - let Inst{1-0} = Qd4{1-0}; -} -class Enc_799555 : OpcodeHexagon { - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_11083408 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{23-19} = Vv32{4-0}; - bits <3> Rt8; - let Inst{18-16} = Rt8{2-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_900013 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_9487067 : OpcodeHexagon { - bits <12> Ii; - let Inst{19-16} = Ii{11-8}; - let Inst{12-5} = Ii{7-0}; - bits <2> Pu4; - let Inst{22-21} = Pu4{1-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_16014536 : OpcodeHexagon { - bits <10> Ii; - let Inst{21-21} = Ii{9-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; -} -class Enc_12419313 : OpcodeHexagon { +class Enc_14640c : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; bits <4> Rs16; let Inst{19-16} = Rs16{3-0}; - bits <4> n1; - let Inst{28-28} = n1{3-3}; - let Inst{24-23} = n1{2-1}; + bits <5> n1; + let Inst{28-28} = n1{4-4}; + let Inst{24-22} = n1{3-1}; let Inst{13-13} = n1{0-0}; } -class Enc_5503430 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; -} -class Enc_14767681 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{23-19} = Vv32{4-0}; - bits <3> Rt8; - let Inst{18-16} = Rt8{2-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; -} -class Enc_9093094 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <8> II; - let Inst{22-16} = II{7-1}; - let Inst{13-13} = II{0-0}; - bits <2> Pu4; - let Inst{24-23} = Pu4{1-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_11542684 : OpcodeHexagon { - bits <16> Ii; - let Inst{27-21} = Ii{15-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_8877260 : OpcodeHexagon { +class Enc_31db33 : OpcodeHexagon { + bits <2> Qt4; + let Inst{6-5} = Qt4{1-0}; bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; bits <5> Vv32; - let Inst{23-19} = Vv32{4-0}; - bits <3> Rt8; - let Inst{18-16} = Rt8{2-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_1737833 : OpcodeHexagon { +class Enc_65f095 : OpcodeHexagon { bits <6> Ii; - let Inst{13-13} = Ii{5-5}; - let Inst{7-3} = Ii{4-0}; + let Inst{6-3} = Ii{5-2}; bits <2> Pv4; let Inst{1-0} = Pv4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_255516 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; +class Enc_784502 : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <2> Pv4; + let Inst{12-11} = Pv4{1-0}; + bits <3> Os8; + let Inst{2-0} = Os8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_6413b6 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; + bits <5> n1; + let Inst{29-29} = n1{4-4}; + let Inst{26-25} = n1{3-2}; + let Inst{23-23} = n1{1-1}; + let Inst{13-13} = n1{0-0}; +} +class Enc_7a0ea6 : OpcodeHexagon { + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; + bits <1> n1; + let Inst{9-9} = n1{0-0}; } -class Enc_10721363 : OpcodeHexagon { +class Enc_84bff1 : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; let Inst{7-7} = Ii{0-0}; @@ -760,90 +539,138 @@ class Enc_10721363 : OpcodeHexagon { let Inst{20-16} = Rs32{4-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_7076358 : OpcodeHexagon { - bits <5> Zdd8; - let Inst{4-0} = Zdd8{4-0}; +class Enc_74aef2 : OpcodeHexagon { + bits <4> Ii; + let Inst{8-5} = Ii{3-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_11930928 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; - bits <5> II; - let Inst{22-21} = II{4-3}; - let Inst{7-5} = II{2-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_78e566 : OpcodeHexagon { + bits <2> Pt4; + let Inst{9-8} = Pt4{1-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_2410156 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; +class Enc_437f33 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <2> Pu4; + let Inst{6-5} = Pu4{1-0}; bits <5> Rx32; let Inst{4-0} = Rx32{4-0}; } -class Enc_6735062 : OpcodeHexagon { - bits <2> Ps4; - let Inst{17-16} = Ps4{1-0}; - bits <2> Pt4; - let Inst{9-8} = Pt4{1-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_7965855 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_5202340 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vyy32; - let Inst{4-0} = Vyy32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_0527db : OpcodeHexagon { + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; + bits <4> Rx16; + let Inst{3-0} = Rx16{3-0}; } -class Enc_10568534 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <2> Pu4; - let Inst{22-21} = Pu4{1-0}; +class Enc_420cf3 : OpcodeHexagon { + bits <6> Ii; + let Inst{22-21} = Ii{5-4}; + let Inst{13-13} = Ii{3-3}; + let Inst{7-5} = Ii{2-0}; + bits <5> Ru32; + let Inst{4-0} = Ru32{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + let Inst{12-8} = Rd32{4-0}; } -class Enc_16730127 : OpcodeHexagon { - bits <3> Ii; - let Inst{7-5} = Ii{2-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; +class Enc_e39bb2 : OpcodeHexagon { + bits <6> Ii; + let Inst{9-4} = Ii{5-0}; + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; } -class Enc_11224149 : OpcodeHexagon { - bits <8> Ii; - let Inst{13-13} = Ii{7-7}; - let Inst{7-3} = Ii{6-2}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; +class Enc_1b64fb : OpcodeHexagon { + bits <16> Ii; + let Inst{26-25} = Ii{15-14}; + let Inst{20-16} = Ii{13-9}; + let Inst{13-13} = Ii{8-8}; + let Inst{7-0} = Ii{7-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_c6220b : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{7-7} = Ii{0-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Ru32; + let Inst{12-8} = Ru32{4-0}; + bits <3> Nt8; + let Inst{2-0} = Nt8{2-0}; +} +class Enc_322e1b : OpcodeHexagon { + bits <6> Ii; + let Inst{22-21} = Ii{5-4}; + let Inst{13-13} = Ii{3-3}; + let Inst{7-5} = Ii{2-0}; + bits <6> II; + let Inst{23-23} = II{5-5}; + let Inst{4-0} = II{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{12-8} = Rd32{4-0}; +} +class Enc_989021 : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vy32; + let Inst{12-8} = Vy32{4-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; +} +class Enc_178717 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <6> n1; + let Inst{28-28} = n1{5-5}; + let Inst{25-23} = n1{4-2}; + let Inst{13-13} = n1{1-1}; + let Inst{8-8} = n1{0-0}; +} +class Enc_78cbf0 : OpcodeHexagon { + bits <18> Ii; + let Inst{26-25} = Ii{17-16}; + let Inst{20-16} = Ii{15-11}; + let Inst{13-13} = Ii{10-10}; + let Inst{7-0} = Ii{9-2}; bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; } -class Enc_9772987 : OpcodeHexagon { +class Enc_052c7d : OpcodeHexagon { + bits <5> Ii; + let Inst{6-3} = Ii{4-1}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_fcf7a7 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; +} +class Enc_55355c : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; let Inst{7-7} = Ii{0-0}; @@ -854,76 +681,105 @@ class Enc_9772987 : OpcodeHexagon { bits <5> Rtt32; let Inst{4-0} = Rtt32{4-0}; } -class Enc_9238139 : OpcodeHexagon { +class Enc_211aaa : OpcodeHexagon { + bits <11> Ii; + let Inst{26-25} = Ii{10-9}; + let Inst{13-5} = Ii{8-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_6185fe : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{7-7} = Ii{0-0}; + bits <6> II; + let Inst{11-8} = II{5-2}; + let Inst{6-5} = II{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_cd4705 : OpcodeHexagon { + bits <3> Ii; + let Inst{7-5} = Ii{2-0}; + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; +} +class Enc_2ebe3b : OpcodeHexagon { bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Zdd8; - let Inst{4-0} = Zdd8{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_2082775 : OpcodeHexagon { - bits <4> Ii; - let Inst{11-8} = Ii{3-0}; +class Enc_3d5b28 : OpcodeHexagon { bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_5790679 : OpcodeHexagon { - bits <9> Ii; - let Inst{12-8} = Ii{8-4}; - let Inst{4-3} = Ii{3-2}; +class Enc_5ab2be : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_9305257 : OpcodeHexagon { - bits <5> Zu8; - let Inst{12-8} = Zu8{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; +class Enc_fef969 : OpcodeHexagon { + bits <6> Ii; + let Inst{20-16} = Ii{5-1}; + let Inst{5-5} = Ii{0-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_3735566 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; +class Enc_63eaeb : OpcodeHexagon { + bits <2> Ii; + let Inst{1-0} = Ii{1-0}; + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; +} +class Enc_95441f : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <2> Qd4; + let Inst{1-0} = Qd4{1-0}; +} +class Enc_372c9d : OpcodeHexagon { bits <2> Pv4; let Inst{12-11} = Pv4{1-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; bits <3> Os8; let Inst{2-0} = Os8{2-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_12654528 : OpcodeHexagon { - bits <2> Qs4; - let Inst{6-5} = Qs4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; +class Enc_4dff07 : OpcodeHexagon { + bits <2> Qv4; + let Inst{12-11} = Qv4{1-0}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Vvv32; - let Inst{4-0} = Vvv32{4-0}; -} -class Enc_15290236 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; -} -class Enc_11139981 : OpcodeHexagon { - bits <2> Ps4; - let Inst{17-16} = Ps4{1-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_15546666 : OpcodeHexagon { - bits <9> Ii; - let Inst{10-8} = Ii{8-6}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_486163 : OpcodeHexagon { +class Enc_04c959 : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; let Inst{7-7} = Ii{0-0}; @@ -932,143 +788,231 @@ class Enc_486163 : OpcodeHexagon { let Inst{6-5} = II{1-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_2079016 : OpcodeHexagon { - bits <2> Ii; - let Inst{1-0} = Ii{1-0}; - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; } -class Enc_10095813 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; +class Enc_b62ef7 : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_13133322 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; +class Enc_2b518f : OpcodeHexagon { + bits <32> Ii; + let Inst{27-16} = Ii{31-20}; + let Inst{13-0} = Ii{19-6}; } -class Enc_9422954 : OpcodeHexagon { - bits <2> Pu4; - let Inst{9-8} = Pu4{1-0}; +class Enc_b388cf : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; + bits <5> II; + let Inst{22-21} = II{4-3}; + let Inst{7-5} = II{2-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_10642833 : OpcodeHexagon { +class Enc_ad1c74 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; +} +class Enc_74d4e5 : OpcodeHexagon { bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Vs32; - let Inst{7-3} = Vs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_14989332 : OpcodeHexagon { - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vv32; - let Inst{4-0} = Vv32{4-0}; -} -class Enc_10263630 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; +class Enc_c90aca : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rx32; + let Inst{4-0} = Rx32{4-0}; } -class Enc_13937564 : OpcodeHexagon { +class Enc_222336 : OpcodeHexagon { bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; + let Inst{8-5} = Ii{3-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_7171569 : OpcodeHexagon { - bits <3> Ii; - let Inst{7-5} = Ii{2-0}; +class Enc_5e87ce : OpcodeHexagon { + bits <16> Ii; + let Inst{23-22} = Ii{15-14}; + let Inst{20-16} = Ii{13-9}; + let Inst{13-5} = Ii{8-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_f7ea77 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; + bits <4> n1; + let Inst{29-29} = n1{3-3}; + let Inst{26-25} = n1{2-1}; + let Inst{13-13} = n1{0-0}; +} +class Enc_245865 : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_2702036 : OpcodeHexagon { - bits <10> Ii; - let Inst{21-21} = Ii{9-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + let Inst{23-19} = Vv32{4-0}; + bits <3> Rt8; + let Inst{18-16} = Rt8{2-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; } -class Enc_1928953 : OpcodeHexagon { +class Enc_88d4d9 : OpcodeHexagon { bits <2> Pu4; let Inst{9-8} = Pu4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; } -class Enc_5853469 : OpcodeHexagon { +class Enc_c0cdde : OpcodeHexagon { + bits <9> Ii; + let Inst{13-5} = Ii{8-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; +} +class Enc_226535 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-7} = Ii{7-2}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{4-0} = Rt32{4-0}; +} +class Enc_31aa6a : OpcodeHexagon { + bits <5> Ii; + let Inst{6-3} = Ii{4-1}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_397f23 : OpcodeHexagon { + bits <8> Ii; + let Inst{13-13} = Ii{7-7}; + let Inst{7-3} = Ii{6-2}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_865390 : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <2> Pv4; + let Inst{12-11} = Pv4{1-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_98c0b8 : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{7-7} = Ii{0-0}; + bits <2> Pv4; + let Inst{6-5} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_bfbf03 : OpcodeHexagon { + bits <2> Qs4; + let Inst{9-8} = Qs4{1-0}; + bits <2> Qd4; + let Inst{1-0} = Qd4{1-0}; +} +class Enc_ecbcc8 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; +} +class Enc_f5e933 : OpcodeHexagon { + bits <2> Ps4; + let Inst{17-16} = Ps4{1-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; - bits <2> Pe4; - let Inst{6-5} = Pe4{1-0}; } -class Enc_7692963 : OpcodeHexagon { +class Enc_3fc427 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vxx32; + let Inst{4-0} = Vxx32{4-0}; +} +class Enc_01d3d0 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; +} +class Enc_b0e9d8 : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-5} = Ii{8-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rx32; let Inst{4-0} = Rx32{4-0}; } -class Enc_15140689 : OpcodeHexagon { +class Enc_3694bd : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; bits <3> Ns8; let Inst{18-16} = Ns8{2-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; + bits <5> n1; + let Inst{29-29} = n1{4-4}; + let Inst{26-25} = n1{3-2}; + let Inst{23-22} = n1{1-0}; } -class Enc_748676 : OpcodeHexagon { - bits <12> Ii; - let Inst{26-25} = Ii{11-10}; - let Inst{13-13} = Ii{9-9}; - let Inst{7-0} = Ii{8-1}; +class Enc_a42857 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <5> n1; + let Inst{28-28} = n1{4-4}; + let Inst{24-22} = n1{3-1}; + let Inst{8-8} = n1{0-0}; +} +class Enc_b7fad3 : OpcodeHexagon { + bits <2> Pv4; + let Inst{9-8} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; -} -class Enc_3372766 : OpcodeHexagon { - bits <5> Ii; - let Inst{8-5} = Ii{4-1}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_7900405 : OpcodeHexagon { +class Enc_223005 : OpcodeHexagon { bits <6> Ii; let Inst{6-3} = Ii{5-2}; bits <3> Nt8; @@ -1076,180 +1020,206 @@ class Enc_7900405 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_11930027 : OpcodeHexagon { - bits <12> Ii; - let Inst{26-25} = Ii{11-10}; - let Inst{13-5} = Ii{9-1}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; +class Enc_9e4c3f : OpcodeHexagon { + bits <6> II; + let Inst{13-8} = II{5-0}; + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rd16; + let Inst{19-16} = Rd16{3-0}; } -class Enc_971574 : OpcodeHexagon { +class Enc_8b8d61 : OpcodeHexagon { bits <6> Ii; let Inst{22-21} = Ii{5-4}; let Inst{13-13} = Ii{3-3}; let Inst{7-5} = Ii{2-0}; - bits <6> II; - let Inst{23-23} = II{5-5}; - let Inst{4-0} = II{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Ru32; + let Inst{4-0} = Ru32{4-0}; bits <5> Rd32; let Inst{12-8} = Rd32{4-0}; } -class Enc_13453446 : OpcodeHexagon { - bits <24> Ii; - let Inst{24-16} = Ii{23-15}; - let Inst{13-1} = Ii{14-2}; +class Enc_88c16c : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rxx32; + let Inst{4-0} = Rxx32{4-0}; } -class Enc_6356866 : OpcodeHexagon { - bits <10> Ii; - let Inst{21-21} = Ii{9-9}; - let Inst{13-5} = Ii{8-0}; +class Enc_770858 : OpcodeHexagon { + bits <2> Ps4; + let Inst{6-5} = Ps4{1-0}; + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} +class Enc_bd811a : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rx32; - let Inst{4-0} = Rx32{4-0}; -} -class Enc_16246706 : OpcodeHexagon { - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; + bits <5> Cd32; + let Inst{4-0} = Cd32{4-0}; } -class Enc_5326450 : OpcodeHexagon { - bits <4> Ii; - let Inst{6-3} = Ii{3-0}; +class Enc_b05839 : OpcodeHexagon { + bits <7> Ii; + let Inst{8-5} = Ii{6-3}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_11687333 : OpcodeHexagon { - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; +class Enc_bc03e5 : OpcodeHexagon { + bits <17> Ii; + let Inst{26-25} = Ii{16-15}; + let Inst{20-16} = Ii{14-10}; + let Inst{13-13} = Ii{9-9}; + let Inst{7-0} = Ii{8-1}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; +} +class Enc_412ff0 : OpcodeHexagon { bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + bits <5> Ru32; + let Inst{4-0} = Ru32{4-0}; + bits <5> Rxx32; + let Inst{12-8} = Rxx32{4-0}; } -class Enc_2771456 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_c9a18e : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; } -class Enc_11282123 : OpcodeHexagon { - bits <6> Ii; - let Inst{12-7} = Ii{5-0}; - bits <8> II; - let Inst{13-13} = II{7-7}; - let Inst{6-0} = II{6-0}; +class Enc_be32a5 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; -} -class Enc_518319 : OpcodeHexagon { - bits <6> Ii; - let Inst{20-16} = Ii{5-1}; - let Inst{5-5} = Ii{0-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_16104442 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; +class Enc_e6abcf : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; + let Inst{12-8} = Rtt32{4-0}; } -class Enc_7912540 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rxx32; - let Inst{4-0} = Rxx32{4-0}; -} -class Enc_15560488 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_7581852 : OpcodeHexagon { +class Enc_6339d5 : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; let Inst{7-7} = Ii{0-0}; + bits <2> Pv4; + let Inst{6-5} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Ru32; + let Inst{12-8} = Ru32{4-0}; bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + let Inst{4-0} = Rt32{4-0}; } -class Enc_10030031 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; +class Enc_d6990d : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; + bits <5> Vxx32; + let Inst{4-0} = Vxx32{4-0}; } -class Enc_3915770 : OpcodeHexagon { - bits <4> Ii; +class Enc_6c9440 : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-5} = Ii{8-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_0d8adb : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; +} +class Enc_50e578 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_1cf4ca : OpcodeHexagon { + bits <6> Ii; + let Inst{17-16} = Ii{5-4}; let Inst{6-3} = Ii{3-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; } -class Enc_4075554 : OpcodeHexagon { +class Enc_48b75f : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; } -class Enc_11326438 : OpcodeHexagon { +class Enc_b97f71 : OpcodeHexagon { bits <6> Ii; - let Inst{6-3} = Ii{5-2}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; + let Inst{8-5} = Ii{5-2}; + bits <2> Pt4; + let Inst{10-9} = Pt4{1-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_4050532 : OpcodeHexagon { - bits <16> Ii; - let Inst{26-25} = Ii{15-14}; - let Inst{20-16} = Ii{13-9}; - let Inst{13-13} = Ii{8-8}; - let Inst{7-0} = Ii{7-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; +class Enc_9d1247 : OpcodeHexagon { + bits <7> Ii; + let Inst{8-5} = Ii{6-3}; + bits <2> Pt4; + let Inst{10-9} = Pt4{1-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_14461004 : OpcodeHexagon { - bits <11> Ii; - let Inst{26-25} = Ii{10-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; +class Enc_f4413a : OpcodeHexagon { + bits <4> Ii; + let Inst{8-5} = Ii{3-0}; + bits <2> Pt4; + let Inst{10-9} = Pt4{1-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_f7430e : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <2> Pv4; + let Inst{12-11} = Pv4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <3> Os8; + let Inst{2-0} = Os8{2-0}; +} +class Enc_e7581c : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_13344657 : OpcodeHexagon { +class Enc_2301d6 : OpcodeHexagon { bits <6> Ii; let Inst{20-16} = Ii{5-1}; let Inst{8-8} = Ii{0-0}; @@ -1258,32 +1228,122 @@ class Enc_13344657 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_13114546 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; +class Enc_c31910 : OpcodeHexagon { + bits <8> Ii; + let Inst{23-21} = Ii{7-5}; + let Inst{13-13} = Ii{4-4}; + let Inst{7-5} = Ii{3-1}; + let Inst{3-3} = Ii{0-0}; + bits <5> II; + let Inst{12-8} = II{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_2f2f04 : OpcodeHexagon { + bits <1> Ii; let Inst{5-5} = Ii{0-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rxx32; - let Inst{4-0} = Rxx32{4-0}; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; +} +class Enc_8d8a30 : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <2> Pv4; + let Inst{12-11} = Pv4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} +class Enc_2d7491 : OpcodeHexagon { + bits <13> Ii; + let Inst{26-25} = Ii{12-11}; + let Inst{13-5} = Ii{10-2}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_a803e0 : OpcodeHexagon { + bits <7> Ii; + let Inst{12-7} = Ii{6-1}; + bits <8> II; + let Inst{13-13} = II{7-7}; + let Inst{6-0} = II{6-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; +} +class Enc_45364e : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_14530015 : OpcodeHexagon { +class Enc_b909d2 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; bits <4> Rs16; let Inst{19-16} = Rs16{3-0}; - bits <6> n1; - let Inst{28-28} = n1{5-5}; - let Inst{25-23} = n1{4-2}; + bits <7> n1; + let Inst{28-28} = n1{6-6}; + let Inst{25-22} = n1{5-2}; let Inst{13-13} = n1{1-1}; let Inst{8-8} = n1{0-0}; } -class Enc_5967898 : OpcodeHexagon { +class Enc_e6c957 : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-5} = Ii{8-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_fa3ba4 : OpcodeHexagon { + bits <14> Ii; + let Inst{26-25} = Ii{13-12}; + let Inst{13-5} = Ii{11-3}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_0d8870 : OpcodeHexagon { + bits <12> Ii; + let Inst{26-25} = Ii{11-10}; + let Inst{13-13} = Ii{9-9}; + let Inst{7-0} = Ii{8-1}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; +} +class Enc_9fae8a : OpcodeHexagon { bits <6> Ii; - let Inst{12-7} = Ii{5-0}; + let Inst{13-8} = Ii{5-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_18c338 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; + bits <8> II; + let Inst{22-16} = II{7-1}; + let Inst{13-13} = II{0-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_5ccba9 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-7} = Ii{7-2}; bits <6> II; let Inst{13-13} = II{5-5}; let Inst{4-0} = II{4-0}; @@ -1292,7 +1352,29 @@ class Enc_5967898 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; } -class Enc_15450971 : OpcodeHexagon { +class Enc_0ed752 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Cdd32; + let Inst{4-0} = Cdd32{4-0}; +} +class Enc_143445 : OpcodeHexagon { + bits <13> Ii; + let Inst{26-25} = Ii{12-11}; + let Inst{13-13} = Ii{10-10}; + let Inst{7-0} = Ii{9-2}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_3a3d62 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_3e3989 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; @@ -1301,1981 +1383,915 @@ class Enc_15450971 : OpcodeHexagon { bits <6> n1; let Inst{28-28} = n1{5-5}; let Inst{25-22} = n1{4-1}; - let Inst{13-13} = n1{0-0}; -} -class Enc_15536400 : OpcodeHexagon { - bits <6> Ii; - let Inst{3-0} = Ii{5-2}; - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; -} -class Enc_1291652 : OpcodeHexagon { - bits <1> Ii; - let Inst{8-8} = Ii{0-0}; + let Inst{8-8} = n1{0-0}; } -class Enc_5636753 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; +class Enc_152467 : OpcodeHexagon { + bits <5> Ii; + let Inst{8-5} = Ii{4-1}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_5757366 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; -} -class Enc_9752128 : OpcodeHexagon { - bits <7> Ii; - let Inst{8-5} = Ii{6-3}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_13618890 : OpcodeHexagon { +class Enc_daea09 : OpcodeHexagon { bits <17> Ii; - let Inst{26-25} = Ii{16-15}; + let Inst{23-22} = Ii{16-15}; let Inst{20-16} = Ii{14-10}; let Inst{13-13} = Ii{9-9}; - let Inst{7-0} = Ii{8-1}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; -} -class Enc_5890213 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; -} -class Enc_5582416 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{7-7} = Ii{0-0}; - bits <6> II; - let Inst{11-8} = II{5-2}; - let Inst{6-5} = II{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_13536408 : OpcodeHexagon { - bits <4> Ii; - let Inst{3-0} = Ii{3-0}; - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; -} -class Enc_9773189 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Ru32; - let Inst{4-0} = Ru32{4-0}; - bits <5> Rxx32; - let Inst{12-8} = Rxx32{4-0}; -} -class Enc_2152247 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; -} -class Enc_12848507 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{6-6} = Ii{0-0}; - bits <6> II; - let Inst{5-0} = II{5-0}; - bits <5> Ru32; - let Inst{20-16} = Ru32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; -} -class Enc_16279406 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <2> Qv4; - let Inst{12-11} = Qv4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; -} -class Enc_1734121 : OpcodeHexagon { - bits <4> Ii; - let Inst{10-8} = Ii{3-1}; - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; - bits <4> Rt16; - let Inst{3-0} = Rt16{3-0}; -} -class Enc_766909 : OpcodeHexagon { - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <2> Pe4; - let Inst{6-5} = Pe4{1-0}; -} -class Enc_4527648 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; + let Inst{7-1} = Ii{8-2}; + bits <2> Pu4; + let Inst{9-8} = Pu4{1-0}; } -class Enc_8849208 : OpcodeHexagon { - bits <7> Ii; - let Inst{12-7} = Ii{6-1}; +class Enc_f37377 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-7} = Ii{7-2}; + bits <8> II; + let Inst{13-13} = II{7-7}; + let Inst{6-0} = II{6-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{4-0} = Rt32{4-0}; -} -class Enc_9894557 : OpcodeHexagon { - bits <6> Ii; - let Inst{13-8} = Ii{5-0}; - bits <6> II; - let Inst{23-21} = II{5-3}; - let Inst{7-5} = II{2-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_4109168 : OpcodeHexagon { - bits <2> Qv4; - let Inst{23-22} = Qv4{1-0}; -} -class Enc_14560494 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; } -class Enc_9773167 : OpcodeHexagon { +class Enc_a198f6 : OpcodeHexagon { bits <7> Ii; - let Inst{12-7} = Ii{6-1}; - bits <5> II; - let Inst{4-0} = II{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; -} -class Enc_1898420 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; -} -class Enc_11498120 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <2> Qd4; - let Inst{1-0} = Qd4{1-0}; -} -class Enc_15459921 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_10058269 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; -} -class Enc_10197700 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <5> Vvv32; - let Inst{12-8} = Vvv32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; -} -class Enc_12608570 : OpcodeHexagon { - bits <17> Ii; - let Inst{26-25} = Ii{16-15}; - let Inst{20-16} = Ii{14-10}; - let Inst{13-5} = Ii{9-1}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_4804090 : OpcodeHexagon { - bits <6> Ss64; - let Inst{21-16} = Ss64{5-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_14973146 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <3> Qd8; - let Inst{5-3} = Qd8{2-0}; -} -class Enc_5718302 : OpcodeHexagon { + let Inst{10-5} = Ii{6-1}; + bits <2> Pt4; + let Inst{12-11} = Pt4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; - bits <2> Pe4; - let Inst{6-5} = Pe4{1-0}; -} -class Enc_2103742 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; } -class Enc_7564330 : OpcodeHexagon { +class Enc_3dac0b : OpcodeHexagon { + bits <2> Qt4; + let Inst{6-5} = Qt4{1-0}; bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; + let Inst{12-8} = Vu32{4-0}; bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_2176383 : OpcodeHexagon { - bits <6> Ii; - let Inst{9-4} = Ii{5-0}; - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; -} -class Enc_7736768 : OpcodeHexagon { - bits <12> Ii; - let Inst{26-25} = Ii{11-10}; - let Inst{13-13} = Ii{9-9}; - let Inst{7-0} = Ii{8-1}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; -} -class Enc_13189194 : OpcodeHexagon { - bits <1> Ii; - let Inst{5-5} = Ii{0-0}; - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vxx32; - let Inst{4-0} = Vxx32{4-0}; -} -class Enc_5154851 : OpcodeHexagon { - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; + let Inst{20-16} = Vv32{4-0}; bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; -} -class Enc_1329520 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Cdd32; - let Inst{4-0} = Cdd32{4-0}; -} -class Enc_14057553 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_9223889 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rx32; - let Inst{4-0} = Rx32{4-0}; -} -class Enc_10979813 : OpcodeHexagon { - bits <7> Ii; - let Inst{13-13} = Ii{6-6}; - let Inst{7-3} = Ii{5-1}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; + let Inst{4-0} = Vdd32{4-0}; } -class Enc_13490067 : OpcodeHexagon { - bits <3> Qt8; - let Inst{2-0} = Qt8{2-0}; - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_10076500 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{6-6} = Ii{0-0}; - bits <6> II; - let Inst{5-0} = II{5-0}; - bits <5> Ru32; - let Inst{20-16} = Ru32{4-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; -} -class Enc_163381 : OpcodeHexagon { - bits <14> Ii; - let Inst{26-25} = Ii{13-12}; - let Inst{13-5} = Ii{11-3}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_10328975 : OpcodeHexagon { - bits <2> Pt4; - let Inst{9-8} = Pt4{1-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_14939491 : OpcodeHexagon { - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; -} -class Enc_8891794 : OpcodeHexagon { - bits <2> Pt4; - let Inst{9-8} = Pt4{1-0}; - bits <2> Ps4; - let Inst{17-16} = Ps4{1-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; -} -class Enc_7723767 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_2639299 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <4> Rd16; - let Inst{11-8} = Rd16{3-0}; -} -class Enc_11552785 : OpcodeHexagon { - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <2> Pu4; - let Inst{6-5} = Pu4{1-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_11849200 : OpcodeHexagon { - bits <6> Ii; - let Inst{12-7} = Ii{5-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{4-0} = Rt32{4-0}; -} -class Enc_14868535 : OpcodeHexagon { - bits <17> Ii; - let Inst{23-22} = Ii{16-15}; - let Inst{20-16} = Ii{14-10}; - let Inst{13-13} = Ii{9-9}; - let Inst{7-1} = Ii{8-2}; - bits <2> Pu4; - let Inst{9-8} = Pu4{1-0}; -} -class Enc_48594 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_6608821 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; -} -class Enc_11049656 : OpcodeHexagon { - bits <9> Ii; - let Inst{13-13} = Ii{8-8}; - let Inst{7-3} = Ii{7-3}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; -} -class Enc_117962 : OpcodeHexagon { - bits <8> Ii; - let Inst{23-21} = Ii{7-5}; - let Inst{13-13} = Ii{4-4}; - let Inst{7-5} = Ii{3-1}; - let Inst{3-3} = Ii{0-0}; - bits <5> II; - let Inst{12-8} = II{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_5900401 : OpcodeHexagon { - bits <4> Ii; - let Inst{6-3} = Ii{3-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_36641 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_9626139 : OpcodeHexagon { - bits <2> Pu4; - let Inst{6-5} = Pu4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_11971407 : OpcodeHexagon { - bits <3> Ii; - let Inst{7-5} = Ii{2-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_9852473 : OpcodeHexagon { - bits <13> Ii; - let Inst{26-25} = Ii{12-11}; - let Inst{13-5} = Ii{10-2}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_6495334 : OpcodeHexagon { - bits <6> Ii; - let Inst{22-21} = Ii{5-4}; - let Inst{13-13} = Ii{3-3}; - let Inst{7-5} = Ii{2-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Ru32; - let Inst{4-0} = Ru32{4-0}; - bits <5> Rd32; - let Inst{12-8} = Rd32{4-0}; -} -class Enc_1186018 : OpcodeHexagon { - bits <17> Ii; - let Inst{26-25} = Ii{16-15}; - let Inst{20-16} = Ii{14-10}; - let Inst{13-13} = Ii{9-9}; - let Inst{7-0} = Ii{8-1}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; -} -class Enc_15999208 : OpcodeHexagon { - bits <18> Ii; - let Inst{26-25} = Ii{17-16}; - let Inst{20-16} = Ii{15-11}; - let Inst{13-13} = Ii{10-10}; - let Inst{7-0} = Ii{9-2}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; -} -class Enc_11477246 : OpcodeHexagon { - bits <6> II; - let Inst{5-0} = II{5-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Re32; - let Inst{20-16} = Re32{4-0}; -} -class Enc_7971062 : OpcodeHexagon { - bits <16> Ii; - let Inst{23-22} = Ii{15-14}; - let Inst{20-16} = Ii{13-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_4327792 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vxx32; - let Inst{4-0} = Vxx32{4-0}; -} -class Enc_10326434 : OpcodeHexagon { - bits <5> Ii; - let Inst{6-3} = Ii{4-1}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_1572239 : OpcodeHexagon { - bits <2> Qt4; - let Inst{6-5} = Qt4{1-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_6372758 : OpcodeHexagon { - bits <4> Ii; - let Inst{8-5} = Ii{3-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_15793331 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; -} -class Enc_11424254 : OpcodeHexagon { - bits <2> Qt4; - let Inst{6-5} = Qt4{1-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; -} -class Enc_4983213 : OpcodeHexagon { - bits <14> Ii; - let Inst{10-0} = Ii{13-3}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; -} -class Enc_16035138 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; -} -class Enc_8225953 : OpcodeHexagon { - bits <8> Ii; - let Inst{13-13} = Ii{7-7}; - let Inst{7-3} = Ii{6-2}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; -} -class Enc_4397470 : OpcodeHexagon { - bits <5> II; - let Inst{12-8} = II{4-0}; - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; -} -class Enc_1004392 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vxx32; - let Inst{7-3} = Vxx32{4-0}; -} -class Enc_16319737 : OpcodeHexagon { - bits <14> Ii; - let Inst{26-25} = Ii{13-12}; - let Inst{13-13} = Ii{11-11}; - let Inst{7-0} = Ii{10-3}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; -} -class Enc_2296022 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_9664427 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <5> Vvv32; - let Inst{12-8} = Vvv32{4-0}; - bits <3> Qss8; - let Inst{2-0} = Qss8{2-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_877823 : OpcodeHexagon { - bits <6> II; - let Inst{11-8} = II{5-2}; - let Inst{6-5} = II{1-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <5> Re32; - let Inst{20-16} = Re32{4-0}; -} -class Enc_1589406 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_6900405 : OpcodeHexagon { - bits <5> Ii; - let Inst{6-3} = Ii{4-1}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_14150875 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <5> n1; - let Inst{28-28} = n1{4-4}; - let Inst{25-22} = n1{3-0}; -} -class Enc_15707793 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Gd32; - let Inst{4-0} = Gd32{4-0}; -} -class Enc_14689096 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{6-6} = Ii{0-0}; - bits <6> II; - let Inst{5-0} = II{5-0}; - bits <5> Ru32; - let Inst{20-16} = Ru32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; -} -class Enc_9915754 : OpcodeHexagon { - bits <6> Ii; - let Inst{6-3} = Ii{5-2}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_7470998 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <2> Qx4; - let Inst{1-0} = Qx4{1-0}; -} -class Enc_11471622 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; -} -class Enc_14363183 : OpcodeHexagon { - bits <2> Qv4; - let Inst{23-22} = Qv4{1-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_15816255 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_5321335 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <4> Vdd16; - let Inst{7-4} = Vdd16{3-0}; -} -class Enc_12702821 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rxx32; - let Inst{4-0} = Rxx32{4-0}; -} -class Enc_449439 : OpcodeHexagon { - bits <11> Ii; - let Inst{26-25} = Ii{10-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; -} -class Enc_2054304 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <6> Sd64; - let Inst{5-0} = Sd64{5-0}; -} -class Enc_236434 : OpcodeHexagon { - bits <6> Ii; - let Inst{22-21} = Ii{5-4}; - let Inst{13-13} = Ii{3-3}; - let Inst{7-5} = Ii{2-0}; - bits <5> Ru32; - let Inst{4-0} = Ru32{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{12-8} = Rd32{4-0}; -} -class Enc_5598813 : OpcodeHexagon { - bits <4> Ii; - let Inst{8-5} = Ii{3-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_8409782 : OpcodeHexagon { - bits <13> Ii; - let Inst{26-25} = Ii{12-11}; - let Inst{13-13} = Ii{10-10}; - let Inst{7-0} = Ii{9-2}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; -} -class Enc_15182416 : OpcodeHexagon { - bits <6> Ii; - let Inst{20-16} = Ii{5-1}; - let Inst{8-8} = Ii{0-0}; - bits <2> Pt4; - let Inst{10-9} = Pt4{1-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_4501395 : OpcodeHexagon { - bits <7> Ii; - let Inst{6-3} = Ii{6-3}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_6039436 : OpcodeHexagon { - bits <3> Qtt8; - let Inst{2-0} = Qtt8{2-0}; - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <5> Vvv32; - let Inst{12-8} = Vvv32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; -} -class Enc_476163 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; - bits <5> Vy32; - let Inst{12-8} = Vy32{4-0}; -} -class Enc_11281763 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_9929262 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vs32; - let Inst{7-3} = Vs32{4-0}; -} -class Enc_13174858 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Vs32; - let Inst{7-3} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_8437395 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_16578332 : OpcodeHexagon { - bits <9> Ii; - let Inst{10-8} = Ii{8-6}; - bits <5> Zdd8; - let Inst{4-0} = Zdd8{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_12829314 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; -} -class Enc_9744403 : OpcodeHexagon { - bits <5> Vu32; - let Inst{13-9} = Vu32{4-0}; - bits <5> Vv32; - let Inst{8-4} = Vv32{4-0}; - bits <4> Vdd16; - let Inst{3-0} = Vdd16{3-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_10968391 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <7> n1; - let Inst{28-28} = n1{6-6}; - let Inst{25-22} = n1{5-2}; - let Inst{13-13} = n1{1-1}; - let Inst{8-8} = n1{0-0}; -} -class Enc_64199 : OpcodeHexagon { - bits <7> Ii; - let Inst{8-4} = Ii{6-2}; - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; -} -class Enc_11039423 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_6730375 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; -} -class Enc_16213761 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{23-19} = Vv32{4-0}; - bits <3> Rt8; - let Inst{18-16} = Rt8{2-0}; - bits <5> Vxx32; - let Inst{4-0} = Vxx32{4-0}; -} -class Enc_13204995 : OpcodeHexagon { - bits <4> Ii; - let Inst{11-8} = Ii{3-0}; - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; - bits <4> Rt16; - let Inst{3-0} = Rt16{3-0}; -} -class Enc_13338314 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_9920336 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{7-7} = Ii{0-0}; - bits <2> Pv4; - let Inst{6-5} = Pv4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Ru32; - let Inst{12-8} = Ru32{4-0}; - bits <5> Rtt32; - let Inst{4-0} = Rtt32{4-0}; -} -class Enc_15380240 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; - bits <5> Vy32; - let Inst{12-8} = Vy32{4-0}; -} -class Enc_3296020 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_2428539 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <4> n1; - let Inst{28-28} = n1{3-3}; - let Inst{24-23} = n1{2-1}; - let Inst{8-8} = n1{0-0}; -} -class Enc_10039393 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_9372046 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; -} -class Enc_2901241 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_16145290 : OpcodeHexagon { - bits <2> Ps4; - let Inst{6-5} = Ps4{1-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; -} -class Enc_13783220 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_12261611 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_6135183 : OpcodeHexagon { - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; - bits <4> Rx16; - let Inst{3-0} = Rx16{3-0}; -} -class Enc_5523416 : OpcodeHexagon { - bits <6> Ii; - let Inst{13-8} = Ii{5-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_13472494 : OpcodeHexagon { - bits <10> Ii; - let Inst{21-21} = Ii{9-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_16303398 : OpcodeHexagon { - bits <4> Ii; - let Inst{8-5} = Ii{3-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_3494181 : OpcodeHexagon { - bits <3> Ii; - let Inst{7-5} = Ii{2-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_13983714 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <2> Qd4; - let Inst{1-0} = Qd4{1-0}; -} -class Enc_931653 : OpcodeHexagon { - bits <7> Ii; - let Inst{8-5} = Ii{6-3}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_7622936 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vxx32; - let Inst{7-3} = Vxx32{4-0}; - bits <5> Vy32; - let Inst{12-8} = Vy32{4-0}; -} -class Enc_8773155 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-7} = Ii{7-2}; - bits <5> II; - let Inst{4-0} = II{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; -} -class Enc_5401217 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <3> n1; - let Inst{28-28} = n1{2-2}; - let Inst{24-23} = n1{1-0}; -} -class Enc_6736678 : OpcodeHexagon { +class Enc_e38e1f : OpcodeHexagon { bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; -} -class Enc_3457570 : OpcodeHexagon { - bits <3> Ii; - let Inst{7-5} = Ii{2-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; - bits <5> Vxx32; - let Inst{4-0} = Vxx32{4-0}; -} -class Enc_3813442 : OpcodeHexagon { - bits <5> Ii; - let Inst{6-3} = Ii{4-1}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_3135259 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; -} -class Enc_5486172 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{7-7} = Ii{0-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Ru32; - let Inst{12-8} = Ru32{4-0}; - bits <3> Nt8; - let Inst{2-0} = Nt8{2-0}; -} -class Enc_11081334 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vss32; - let Inst{7-3} = Vss32{4-0}; -} -class Enc_9470751 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; -} -class Enc_2683366 : OpcodeHexagon { - bits <3> Quu8; - let Inst{10-8} = Quu8{2-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <3> Qdd8; - let Inst{5-3} = Qdd8{2-0}; + let Inst{12-5} = Ii{7-0}; + bits <2> Pu4; + let Inst{22-21} = Pu4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_15830826 : OpcodeHexagon { - bits <14> Ii; - let Inst{10-0} = Ii{13-3}; +class Enc_f8ecf9 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Vvv32; + let Inst{20-16} = Vvv32{4-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; } -class Enc_4967902 : OpcodeHexagon { - bits <7> Ii; - let Inst{12-7} = Ii{6-1}; - bits <6> II; - let Inst{13-13} = II{5-5}; - let Inst{4-0} = II{4-0}; - bits <2> Pv4; - let Inst{6-5} = Pv4{1-0}; +class Enc_7f1a05 : OpcodeHexagon { + bits <5> Ru32; + let Inst{4-0} = Ru32{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Ry32; + let Inst{12-8} = Ry32{4-0}; } -class Enc_14287645 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; +class Enc_2df31d : OpcodeHexagon { + bits <8> Ii; + let Inst{9-4} = Ii{7-2}; + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; +} +class Enc_25bef0 : OpcodeHexagon { + bits <16> Ii; + let Inst{26-25} = Ii{15-14}; + let Inst{20-16} = Ii{13-9}; + let Inst{13-5} = Ii{8-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_8324216 : OpcodeHexagon { - bits <2> Ps4; - let Inst{17-16} = Ps4{1-0}; - bits <2> Pt4; - let Inst{9-8} = Pt4{1-0}; +class Enc_f82302 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; + bits <4> n1; + let Inst{29-29} = n1{3-3}; + let Inst{26-25} = n1{2-1}; + let Inst{23-23} = n1{0-0}; +} +class Enc_83ee64 : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } -class Enc_913538 : OpcodeHexagon { +class Enc_adf111 : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <3> Qd8; - let Inst{5-3} = Qd8{2-0}; + bits <2> Qx4; + let Inst{1-0} = Qx4{1-0}; } -class Enc_16311032 : OpcodeHexagon { +class Enc_46c951 : OpcodeHexagon { + bits <6> Ii; + let Inst{12-7} = Ii{5-0}; + bits <5> II; + let Inst{4-0} = II{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rx32; - let Inst{4-0} = Rx32{4-0}; -} -class Enc_9864697 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <6> II; - let Inst{20-16} = II{5-1}; - let Inst{13-13} = II{0-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; } -class Enc_11205051 : OpcodeHexagon { +class Enc_5d6c34 : OpcodeHexagon { bits <6> Ii; - let Inst{11-8} = Ii{5-2}; - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; - bits <4> Rt16; - let Inst{3-0} = Rt16{3-0}; + let Inst{13-8} = Ii{5-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; } -class Enc_5611087 : OpcodeHexagon { - bits <7> Ii; - let Inst{8-5} = Ii{6-3}; - bits <2> Pt4; - let Inst{10-9} = Pt4{1-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_4df4e9 : OpcodeHexagon { + bits <11> Ii; + let Inst{26-25} = Ii{10-9}; + let Inst{13-13} = Ii{8-8}; + let Inst{7-0} = Ii{7-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; } -class Enc_10915758 : OpcodeHexagon { +class Enc_91b9fe : OpcodeHexagon { bits <5> Ii; let Inst{6-3} = Ii{4-1}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_8943121 : OpcodeHexagon { +class Enc_a7b8e8 : OpcodeHexagon { + bits <6> Ii; + let Inst{22-21} = Ii{5-4}; + let Inst{13-13} = Ii{3-3}; + let Inst{7-5} = Ii{2-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_2b3f60 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; bits <5> Rtt32; let Inst{12-8} = Rtt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; + bits <2> Px4; + let Inst{6-5} = Px4{1-0}; } -class Enc_1539665 : OpcodeHexagon { - bits <5> Cs32; - let Inst{20-16} = Cs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_bd1cbc : OpcodeHexagon { + bits <5> Ii; + let Inst{8-5} = Ii{4-1}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_8479583 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; - bits <5> n1; - let Inst{29-29} = n1{4-4}; - let Inst{26-25} = n1{3-2}; - let Inst{23-23} = n1{1-1}; - let Inst{13-13} = n1{0-0}; +class Enc_a30110 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{23-19} = Vv32{4-0}; + bits <3> Rt8; + let Inst{18-16} = Rt8{2-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_313333 : OpcodeHexagon { +class Enc_f3f408 : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; -} -class Enc_11544269 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; - bits <4> n1; - let Inst{29-29} = n1{3-3}; - let Inst{26-25} = n1{2-1}; - let Inst{13-13} = n1{0-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_9018141 : OpcodeHexagon { +class Enc_690862 : OpcodeHexagon { + bits <13> Ii; + let Inst{26-25} = Ii{12-11}; + let Inst{13-13} = Ii{10-10}; + let Inst{7-0} = Ii{9-2}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Cd32; - let Inst{4-0} = Cd32{4-0}; -} -class Enc_6152036 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Gdd32; - let Inst{4-0} = Gdd32{4-0}; -} -class Enc_1954437 : OpcodeHexagon { - bits <6> Sss64; - let Inst{21-16} = Sss64{5-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_3742184 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; } -class Enc_1835415 : OpcodeHexagon { - bits <7> Ii; - let Inst{10-5} = Ii{6-1}; - bits <2> Pt4; - let Inst{12-11} = Pt4{1-0}; +class Enc_2a3787 : OpcodeHexagon { + bits <13> Ii; + let Inst{26-25} = Ii{12-11}; + let Inst{13-5} = Ii{10-2}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_1085466 : OpcodeHexagon { - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; -} -class Enc_13150110 : OpcodeHexagon { - bits <11> Ii; - let Inst{26-25} = Ii{10-9}; - let Inst{13-13} = Ii{8-8}; - let Inst{7-0} = Ii{7-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; +class Enc_d5c73f : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_6772177 : OpcodeHexagon { - bits <5> Zu8; - let Inst{12-8} = Zu8{4-0}; - bits <5> Zd8; - let Inst{4-0} = Zd8{4-0}; +class Enc_3f97c8 : OpcodeHexagon { + bits <6> Ii; + let Inst{6-3} = Ii{5-2}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_6616512 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; +class Enc_d50cd3 : OpcodeHexagon { + bits <3> Ii; + let Inst{7-5} = Ii{2-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_1886960 : OpcodeHexagon { - bits <16> Ii; - let Inst{26-25} = Ii{15-14}; - let Inst{20-16} = Ii{13-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_729ff7 : OpcodeHexagon { + bits <3> Ii; + let Inst{7-5} = Ii{2-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_2835415 : OpcodeHexagon { - bits <8> Ii; - let Inst{10-5} = Ii{7-2}; - bits <2> Pt4; - let Inst{12-11} = Pt4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; +class Enc_217147 : OpcodeHexagon { + bits <2> Qv4; + let Inst{23-22} = Qv4{1-0}; +} +class Enc_b9c5fb : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_f394d3 : OpcodeHexagon { + bits <6> II; + let Inst{11-8} = II{5-2}; + let Inst{6-5} = II{1-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; + bits <5> Re32; + let Inst{20-16} = Re32{4-0}; +} +class Enc_0cb018 : OpcodeHexagon { + bits <5> Cs32; + let Inst{20-16} = Cs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_14024197 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vxx32; - let Inst{4-0} = Vxx32{4-0}; -} -class Enc_12297800 : OpcodeHexagon { +class Enc_541f26 : OpcodeHexagon { bits <18> Ii; let Inst{26-25} = Ii{17-16}; let Inst{20-16} = Ii{15-11}; let Inst{13-13} = Ii{10-10}; let Inst{7-0} = Ii{9-2}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_724154 : OpcodeHexagon { + bits <6> II; + let Inst{5-0} = II{5-0}; bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; + bits <5> Re32; + let Inst{20-16} = Re32{4-0}; } -class Enc_7254313 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{7-7} = Ii{0-0}; +class Enc_179b35 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rx32; + let Inst{4-0} = Rx32{4-0}; +} +class Enc_585242 : OpcodeHexagon { + bits <6> Ii; + let Inst{13-13} = Ii{5-5}; + let Inst{7-3} = Ii{4-0}; bits <2> Pv4; - let Inst{6-5} = Pv4{1-0}; + let Inst{1-0} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; +} +class Enc_cf1927 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <3> Os8; + let Inst{2-0} = Os8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_b84c4c : OpcodeHexagon { + bits <6> Ii; + let Inst{13-8} = Ii{5-0}; + bits <6> II; + let Inst{23-21} = II{5-3}; + let Inst{7-5} = II{2-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } -class Enc_677558 : OpcodeHexagon { - bits <9> Ii; - let Inst{10-5} = Ii{8-3}; +class Enc_9ac432 : OpcodeHexagon { + bits <2> Ps4; + let Inst{17-16} = Ps4{1-0}; bits <2> Pt4; - let Inst{12-11} = Pt4{1-0}; + let Inst{9-8} = Pt4{1-0}; + bits <2> Pu4; + let Inst{7-6} = Pu4{1-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; +} +class Enc_8203bb : OpcodeHexagon { + bits <6> Ii; + let Inst{12-7} = Ii{5-0}; + bits <8> II; + let Inst{13-13} = II{7-7}; + let Inst{6-0} = II{6-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; } -class Enc_6223403 : OpcodeHexagon { +class Enc_e66a97 : OpcodeHexagon { + bits <7> Ii; + let Inst{12-7} = Ii{6-1}; + bits <5> II; + let Inst{4-0} = II{4-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; +} +class Enc_8c2412 : OpcodeHexagon { + bits <2> Ps4; + let Inst{6-5} = Ps4{1-0}; bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; bits <5> Vv32; let Inst{20-16} = Vv32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_674613 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; -} -class Enc_16479122 : OpcodeHexagon { - bits <8> Ii; - let Inst{7-3} = Ii{7-3}; - bits <3> Rdd8; - let Inst{2-0} = Rdd8{2-0}; -} -class Enc_11704059 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; + let Inst{4-0} = Vdd32{4-0}; } -class Enc_9165078 : OpcodeHexagon { - bits <9> Ii; - let Inst{8-3} = Ii{8-3}; - bits <3> Rtt8; - let Inst{2-0} = Rtt8{2-0}; +class Enc_284ebb : OpcodeHexagon { + bits <2> Ps4; + let Inst{17-16} = Ps4{1-0}; + bits <2> Pt4; + let Inst{9-8} = Pt4{1-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; } -class Enc_15376009 : OpcodeHexagon { +class Enc_733b27 : OpcodeHexagon { bits <5> Ii; let Inst{8-5} = Ii{4-1}; + bits <2> Pt4; + let Inst{10-9} = Pt4{1-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_8838398 : OpcodeHexagon { - bits <4> Ii; - let Inst{21-21} = Ii{3-3}; - let Inst{7-5} = Ii{2-0}; - bits <6> II; - let Inst{13-8} = II{5-0}; +class Enc_22c845 : OpcodeHexagon { + bits <14> Ii; + let Inst{10-0} = Ii{13-3}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_9b0bc1 : OpcodeHexagon { + bits <2> Pu4; + let Inst{6-5} = Pu4{1-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rx32; - let Inst{4-0} = Rx32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_2328527 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vv32; - let Inst{20-16} = Vv32{4-0}; +class Enc_ea4c54 : OpcodeHexagon { + bits <2> Pu4; + let Inst{6-5} = Pu4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_b72622 : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{5-5} = Ii{0-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rxx32; + let Inst{4-0} = Rxx32{4-0}; +} +class Enc_569cfe : OpcodeHexagon { + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; bits <5> Vx32; let Inst{4-0} = Vx32{4-0}; } -class Enc_1451363 : OpcodeHexagon { - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; +class Enc_96ce4f : OpcodeHexagon { + bits <4> Ii; + let Inst{6-3} = Ii{3-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} +class Enc_143a3c : OpcodeHexagon { + bits <6> Ii; + let Inst{13-8} = Ii{5-0}; + bits <6> II; + let Inst{23-21} = II{5-3}; + let Inst{7-5} = II{2-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rxx32; + let Inst{4-0} = Rxx32{4-0}; } -class Enc_4030179 : OpcodeHexagon { +class Enc_57a33e : OpcodeHexagon { + bits <9> Ii; + let Inst{13-13} = Ii{8-8}; + let Inst{7-3} = Ii{7-3}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; } -class Enc_13770697 : OpcodeHexagon { - bits <5> Ru32; - let Inst{4-0} = Ru32{4-0}; +class Enc_311abd : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Ry32; - let Inst{12-8} = Ry32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_12212978 : OpcodeHexagon { - bits <4> Ii; - let Inst{8-5} = Ii{3-0}; - bits <2> Pt4; - let Inst{10-9} = Pt4{1-0}; +class Enc_a1640c : OpcodeHexagon { + bits <6> Ii; + let Inst{13-8} = Ii{5-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; } -class Enc_12665927 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_de0214 : OpcodeHexagon { + bits <12> Ii; + let Inst{26-25} = Ii{11-10}; + let Inst{13-5} = Ii{9-1}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_2082956 : OpcodeHexagon { - bits <32> Ii; - let Inst{27-16} = Ii{31-20}; - let Inst{13-0} = Ii{19-6}; +class Enc_a90628 : OpcodeHexagon { + bits <2> Qv4; + let Inst{23-22} = Qv4{1-0}; + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; } -class Enc_220949 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <5> n1; - let Inst{28-28} = n1{4-4}; - let Inst{25-23} = n1{3-1}; - let Inst{13-13} = n1{0-0}; +class Enc_fda92c : OpcodeHexagon { + bits <17> Ii; + let Inst{26-25} = Ii{16-15}; + let Inst{20-16} = Ii{14-10}; + let Inst{13-13} = Ii{9-9}; + let Inst{7-0} = Ii{8-1}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; } -class Enc_9939385 : OpcodeHexagon { - bits <9> Ii; - let Inst{12-8} = Ii{8-4}; - let Inst{4-3} = Ii{3-2}; - bits <10> II; - let Inst{20-16} = II{9-5}; - let Inst{7-5} = II{4-2}; - let Inst{1-0} = II{1-0}; +class Enc_831a7d : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rxx32; + let Inst{4-0} = Rxx32{4-0}; + bits <2> Pe4; + let Inst{6-5} = Pe4{1-0}; } -class Enc_2117024 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-8} = Ii{7-3}; - let Inst{4-2} = Ii{2-0}; +class Enc_11a146 : OpcodeHexagon { + bits <4> Ii; + let Inst{11-8} = Ii{3-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_b15941 : OpcodeHexagon { + bits <4> Ii; + let Inst{6-3} = Ii{3-0}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_8390029 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; +class Enc_b78edd : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <4> n1; + let Inst{28-28} = n1{3-3}; + let Inst{24-23} = n1{2-1}; + let Inst{8-8} = n1{0-0}; } -class Enc_10989558 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; +class Enc_a27588 : OpcodeHexagon { + bits <11> Ii; + let Inst{26-25} = Ii{10-9}; + let Inst{13-5} = Ii{8-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; +} +class Enc_2a7b91 : OpcodeHexagon { + bits <6> Ii; + let Inst{20-16} = Ii{5-1}; + let Inst{8-8} = Ii{0-0}; + bits <2> Pt4; + let Inst{10-9} = Pt4{1-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_5972412 : OpcodeHexagon { +class Enc_b43b67 : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; bits <5> Vv32; let Inst{20-16} = Vv32{4-0}; - bits <5> Vxx32; - let Inst{4-0} = Vxx32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; + bits <2> Qx4; + let Inst{6-5} = Qx4{1-0}; } -class Enc_12851489 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vss32; - let Inst{7-3} = Vss32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_4aca3a : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; + bits <3> n1; + let Inst{29-29} = n1{2-2}; + let Inst{26-25} = n1{1-0}; } -class Enc_9554661 : OpcodeHexagon { - bits <6> Ii; - let Inst{12-7} = Ii{5-0}; +class Enc_b38ffc : OpcodeHexagon { + bits <4> Ii; + let Inst{11-8} = Ii{3-0}; + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; + bits <4> Rt16; + let Inst{3-0} = Rt16{3-0}; +} +class Enc_cda00a : OpcodeHexagon { + bits <12> Ii; + let Inst{19-16} = Ii{11-8}; + let Inst{12-5} = Ii{7-0}; + bits <2> Pu4; + let Inst{22-21} = Pu4{1-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_4202401 : OpcodeHexagon { +class Enc_2fbf3c : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; +} +class Enc_70b24b : OpcodeHexagon { + bits <6> Ii; + let Inst{8-5} = Ii{5-2}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_6091631 : OpcodeHexagon { - bits <2> Qs4; - let Inst{9-8} = Qs4{1-0}; - bits <2> Qt4; - let Inst{23-22} = Qt4{1-0}; - bits <2> Qd4; - let Inst{1-0} = Qd4{1-0}; -} -class Enc_10157519 : OpcodeHexagon { +class Enc_2ae154 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; + bits <5> Rx32; + let Inst{4-0} = Rx32{4-0}; } -class Enc_4835423 : OpcodeHexagon { +class Enc_50b5ac : OpcodeHexagon { bits <6> Ii; - let Inst{10-5} = Ii{5-0}; - bits <2> Pt4; - let Inst{12-11} = Pt4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + let Inst{17-16} = Ii{5-4}; + let Inst{6-3} = Ii{3-0}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; } -class Enc_14046916 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{7-7} = Ii{0-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Ru32; - let Inst{12-8} = Ru32{4-0}; +class Enc_2ea740 : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <2> Qv4; + let Inst{12-11} = Qv4{1-0}; bits <5> Rt32; - let Inst{4-0} = Rt32{4-0}; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; } -class Enc_2921694 : OpcodeHexagon { +class Enc_08d755 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } -class Enc_8732960 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-8} = Ii{7-3}; - let Inst{4-2} = Ii{2-0}; +class Enc_1178da : OpcodeHexagon { + bits <3> Ii; + let Inst{7-5} = Ii{2-0}; + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vxx32; + let Inst{4-0} = Vxx32{4-0}; +} +class Enc_8dbe85 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_5338033 : OpcodeHexagon { +class Enc_5a18b3 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; bits <5> n1; - let Inst{28-28} = n1{4-4}; - let Inst{24-22} = n1{3-1}; + let Inst{29-29} = n1{4-4}; + let Inst{26-25} = n1{3-2}; + let Inst{22-22} = n1{1-1}; let Inst{13-13} = n1{0-0}; } -class Enc_6956613 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_14d27a : OpcodeHexagon { + bits <5> II; + let Inst{12-8} = II{4-0}; + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; +} +class Enc_a05677 : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_f0cca7 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; + bits <6> II; + let Inst{20-16} = II{5-1}; + let Inst{13-13} = II{0-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_2153798 : OpcodeHexagon { +class Enc_500cb0 : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; bits <5> Vxx32; let Inst{4-0} = Vxx32{4-0}; } -class Enc_16210172 : OpcodeHexagon { - bits <3> Qt8; - let Inst{10-8} = Qt8{2-0}; - bits <3> Qd8; - let Inst{5-3} = Qd8{2-0}; +class Enc_7e5a82 : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_5023792 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; +class Enc_12b6e9 : OpcodeHexagon { + bits <4> Ii; + let Inst{11-8} = Ii{3-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_6f70ca : OpcodeHexagon { + bits <8> Ii; + let Inst{8-4} = Ii{7-3}; +} +class Enc_7222b7 : OpcodeHexagon { bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; + bits <2> Qd4; + let Inst{1-0} = Qd4{1-0}; } -class Enc_1244745 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; +class Enc_e3b0c4 : OpcodeHexagon { +} +class Enc_a255dc : OpcodeHexagon { + bits <3> Ii; let Inst{10-8} = Ii{2-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; bits <5> Vd32; let Inst{4-0} = Vd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_10002182 : OpcodeHexagon { - bits <11> Ii; - let Inst{26-25} = Ii{10-9}; - let Inst{13-13} = Ii{8-8}; - let Inst{7-0} = Ii{7-0}; +class Enc_cb4b4e : OpcodeHexagon { + bits <2> Pu4; + let Inst{6-5} = Pu4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; -} -class Enc_12492533 : OpcodeHexagon { - bits <4> Ii; - let Inst{6-3} = Ii{3-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_1774350 : OpcodeHexagon { - bits <6> Ii; - let Inst{17-16} = Ii{5-4}; - let Inst{6-3} = Ii{3-0}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; +class Enc_9cdba7 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_2703240 : OpcodeHexagon { - bits <4> Ii; - let Inst{13-13} = Ii{3-3}; - let Inst{10-8} = Ii{2-0}; - bits <2> Qv4; - let Inst{12-11} = Qv4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; +class Enc_5cd7e9 : OpcodeHexagon { + bits <12> Ii; + let Inst{26-25} = Ii{11-10}; + let Inst{13-5} = Ii{9-1}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; } -class Enc_6975103 : OpcodeHexagon { +class Enc_454a26 : OpcodeHexagon { + bits <2> Pt4; + let Inst{9-8} = Pt4{1-0}; bits <2> Ps4; let Inst{17-16} = Ps4{1-0}; bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } -class Enc_9789480 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; -} -class Enc_12244921 : OpcodeHexagon { - bits <6> Ii; - let Inst{10-8} = Ii{2-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_8674673 : OpcodeHexagon { +class Enc_a6853f : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; bits <3> Ns8; let Inst{18-16} = Ns8{2-0}; - bits <5> n1; - let Inst{29-29} = n1{4-4}; - let Inst{26-25} = n1{3-2}; - let Inst{23-22} = n1{1-0}; -} -class Enc_8514936 : OpcodeHexagon { - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_13455308 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; -} -class Enc_10188026 : OpcodeHexagon { - bits <6> Ii; - let Inst{13-8} = Ii{5-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_3158657 : OpcodeHexagon { - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; + bits <6> n1; + let Inst{29-29} = n1{5-5}; + let Inst{26-25} = n1{4-3}; + let Inst{23-22} = n1{2-1}; + let Inst{13-13} = n1{0-0}; } -class Enc_10597934 : OpcodeHexagon { +class Enc_c175d0 : OpcodeHexagon { + bits <4> Ii; + let Inst{11-8} = Ii{3-0}; bits <4> Rs16; let Inst{7-4} = Rs16{3-0}; bits <4> Rd16; let Inst{3-0} = Rd16{3-0}; - bits <2> n1; - let Inst{9-8} = n1{1-0}; } -class Enc_10612292 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; +class Enc_895bd9 : OpcodeHexagon { + bits <2> Qu4; + let Inst{9-8} = Qu4{1-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <2> Qx4; - let Inst{1-0} = Qx4{1-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; } -class Enc_5178985 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; +class Enc_ea23e4 : OpcodeHexagon { bits <5> Rtt32; let Inst{12-8} = Rtt32{4-0}; - bits <2> Pu4; - let Inst{6-5} = Pu4{1-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } -class Enc_3967902 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-7} = Ii{7-2}; - bits <6> II; - let Inst{13-13} = II{5-5}; - let Inst{4-0} = II{4-0}; - bits <2> Pv4; - let Inst{6-5} = Pv4{1-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; -} -class Enc_2462143 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; +class Enc_4dc228 : OpcodeHexagon { + bits <9> Ii; + let Inst{12-8} = Ii{8-4}; + let Inst{4-3} = Ii{3-2}; + bits <10> II; + let Inst{20-16} = II{9-5}; + let Inst{7-5} = II{4-2}; + let Inst{1-0} = II{1-0}; } -class Enc_9849208 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-7} = Ii{7-2}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; +class Enc_10bc21 : OpcodeHexagon { + bits <4> Ii; + let Inst{6-3} = Ii{3-0}; bits <5> Rt32; - let Inst{4-0} = Rt32{4-0}; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_12618352 : OpcodeHexagon { - bits <5> Rtt32; - let Inst{20-16} = Rtt32{4-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; +class Enc_1aaec1 : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <3> Os8; + let Inst{2-0} = Os8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_7303598 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{7-7} = Ii{0-0}; - bits <6> II; - let Inst{11-8} = II{5-2}; - let Inst{6-5} = II{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; +class Enc_329361 : OpcodeHexagon { + bits <2> Pu4; + let Inst{6-5} = Pu4{1-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_13823098 : OpcodeHexagon { - bits <5> Gss32; - let Inst{20-16} = Gss32{4-0}; +class Enc_d2c7f1 : OpcodeHexagon { + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; + bits <2> Pe4; + let Inst{6-5} = Pe4{1-0}; } -class Enc_16388420 : OpcodeHexagon { - bits <2> Qs4; - let Inst{6-5} = Qs4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; +class Enc_3680c2 : OpcodeHexagon { + bits <7> Ii; + let Inst{11-5} = Ii{6-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; +} +class Enc_1ef990 : OpcodeHexagon { + bits <2> Pv4; + let Inst{12-11} = Pv4{1-0}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Vvv32; - let Inst{12-8} = Vvv32{4-0}; - bits <5> Vw32; - let Inst{4-0} = Vw32{4-0}; -} -class Enc_8328140 : OpcodeHexagon { - bits <16> Ii; - let Inst{21-21} = Ii{15-15}; - let Inst{13-8} = Ii{14-9}; - let Inst{2-0} = Ii{8-6}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_1793896 : OpcodeHexagon { +class Enc_e957fb : OpcodeHexagon { + bits <12> Ii; + let Inst{26-25} = Ii{11-10}; + let Inst{13-13} = Ii{9-9}; + let Inst{7-0} = Ii{8-1}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_c9e3bc : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; +} +class Enc_2e1979 : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; let Inst{7-7} = Ii{0-0}; @@ -3288,102 +2304,97 @@ class Enc_1793896 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_4944558 : OpcodeHexagon { - bits <2> Qu4; - let Inst{9-8} = Qu4{1-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vx32; - let Inst{4-0} = Vx32{4-0}; +class Enc_0b2e5b : OpcodeHexagon { + bits <3> Ii; + let Inst{7-5} = Ii{2-0}; + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_13211717 : OpcodeHexagon { +class Enc_d483b9 : OpcodeHexagon { + bits <1> Ii; + let Inst{5-5} = Ii{0-0}; bits <5> Vuu32; let Inst{12-8} = Vuu32{4-0}; - bits <5> Vvv32; - let Inst{20-16} = Vvv32{4-0}; - bits <5> Vdd32; - let Inst{4-0} = Vdd32{4-0}; -} -class Enc_8170340 : OpcodeHexagon { bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; - bits <3> Qdd8; - let Inst{2-0} = Qdd8{2-0}; + bits <5> Vxx32; + let Inst{4-0} = Vxx32{4-0}; } -class Enc_14071773 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_51635c : OpcodeHexagon { + bits <7> Ii; + let Inst{8-4} = Ii{6-2}; + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; } -class Enc_8605375 : OpcodeHexagon { - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_e26546 : OpcodeHexagon { + bits <5> Ii; + let Inst{6-3} = Ii{4-1}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_12711252 : OpcodeHexagon { - bits <2> Pv4; - let Inst{9-8} = Pv4{1-0}; +class Enc_70fb07 : OpcodeHexagon { + bits <6> Ii; + let Inst{13-8} = Ii{5-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rxx32; + let Inst{4-0} = Rxx32{4-0}; } -class Enc_8202458 : OpcodeHexagon { - bits <2> Pu4; - let Inst{6-5} = Pu4{1-0}; +class Enc_277737 : OpcodeHexagon { + bits <8> Ii; + let Inst{22-21} = Ii{7-6}; + let Inst{13-13} = Ii{5-5}; + let Inst{7-5} = Ii{4-2}; + bits <5> Ru32; + let Inst{4-0} = Ru32{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + bits <5> Rd32; + let Inst{12-8} = Rd32{4-0}; } -class Enc_8577055 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <5> n1; - let Inst{28-28} = n1{4-4}; - let Inst{25-23} = n1{3-1}; - let Inst{8-8} = n1{0-0}; +class Enc_5c124a : OpcodeHexagon { + bits <19> Ii; + let Inst{26-25} = Ii{18-17}; + let Inst{20-16} = Ii{16-12}; + let Inst{13-13} = Ii{11-11}; + let Inst{7-0} = Ii{10-3}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; +} +class Enc_928ca1 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_1409050 : OpcodeHexagon { +class Enc_da664b : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{7-7} = Ii{0-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <5> Rxx32; - let Inst{4-0} = Rxx32{4-0}; -} -class Enc_7466005 : OpcodeHexagon { - bits <5> Gs32; - let Inst{20-16} = Gs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_2380082 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_10067774 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_7b7ba8 : OpcodeHexagon { + bits <2> Qu4; + let Inst{9-8} = Qu4{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_11000933 : OpcodeHexagon { +class Enc_47ee5e : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; let Inst{7-7} = Ii{0-0}; @@ -3396,50 +2407,96 @@ class Enc_11000933 : OpcodeHexagon { bits <3> Nt8; let Inst{2-0} = Nt8{2-0}; } -class Enc_13201267 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; +class Enc_8bcba4 : OpcodeHexagon { + bits <6> II; + let Inst{5-0} = II{5-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Re32; + let Inst{20-16} = Re32{4-0}; +} +class Enc_3a2484 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <4> n1; + let Inst{28-28} = n1{3-3}; + let Inst{24-23} = n1{2-1}; + let Inst{13-13} = n1{0-0}; } -class Enc_1989309 : OpcodeHexagon { +class Enc_a5ed8a : OpcodeHexagon { bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vvv32; - let Inst{4-0} = Vvv32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; } -class Enc_9082775 : OpcodeHexagon { - bits <10> Ii; - let Inst{21-21} = Ii{9-9}; +class Enc_cb9321 : OpcodeHexagon { + bits <16> Ii; + let Inst{27-21} = Ii{15-9}; let Inst{13-5} = Ii{8-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_8065534 : OpcodeHexagon { - bits <4> Ii; - let Inst{6-3} = Ii{3-0}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_668704 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <5> n1; + let Inst{28-28} = n1{4-4}; + let Inst{25-22} = n1{3-0}; } -class Enc_4631106 : OpcodeHexagon { - bits <2> Ps4; - let Inst{17-16} = Ps4{1-0}; - bits <2> Pt4; - let Inst{9-8} = Pt4{1-0}; - bits <2> Pu4; - let Inst{7-6} = Pu4{1-0}; +class Enc_a7341a : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; +} +class Enc_5eac98 : OpcodeHexagon { + bits <6> Ii; + let Inst{13-8} = Ii{5-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_02553a : OpcodeHexagon { + bits <7> Ii; + let Inst{11-5} = Ii{6-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } -class Enc_11065510 : OpcodeHexagon { +class Enc_acd6ed : OpcodeHexagon { + bits <9> Ii; + let Inst{10-5} = Ii{8-3}; + bits <2> Pt4; + let Inst{12-11} = Pt4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_8e583a : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <5> n1; + let Inst{28-28} = n1{4-4}; + let Inst{25-23} = n1{3-1}; + let Inst{13-13} = n1{0-0}; +} +class Enc_b886fd : OpcodeHexagon { bits <5> Ii; let Inst{6-3} = Ii{4-1}; bits <2> Pv4; @@ -3449,204 +2506,144 @@ class Enc_11065510 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_6673186 : OpcodeHexagon { - bits <13> Ii; - let Inst{26-25} = Ii{12-11}; - let Inst{13-13} = Ii{10-10}; - let Inst{7-0} = Ii{9-2}; +class Enc_24a7dc : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{23-19} = Vv32{4-0}; + bits <3> Rt8; + let Inst{18-16} = Rt8{2-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; +} +class Enc_2d829e : OpcodeHexagon { + bits <14> Ii; + let Inst{10-0} = Ii{13-3}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; -} -class Enc_8498433 : OpcodeHexagon { - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; } -class Enc_4395009 : OpcodeHexagon { - bits <7> Ii; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_10926598 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{12-8} = Vuu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vxx32; - let Inst{7-3} = Vxx32{4-0}; -} -class Enc_7606379 : OpcodeHexagon { - bits <2> Pu4; - let Inst{6-5} = Pu4{1-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; +class Enc_4f4ed7 : OpcodeHexagon { + bits <18> Ii; + let Inst{26-25} = Ii{17-16}; + let Inst{20-16} = Ii{15-11}; + let Inst{13-5} = Ii{10-2}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_8131399 : OpcodeHexagon { - bits <6> II; - let Inst{5-0} = II{5-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Re32; - let Inst{20-16} = Re32{4-0}; +class Enc_84b2cd : OpcodeHexagon { + bits <8> Ii; + let Inst{12-7} = Ii{7-2}; + bits <5> II; + let Inst{4-0} = II{4-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; } -class Enc_11522288 : OpcodeHexagon { +class Enc_8dbdfe : OpcodeHexagon { bits <8> Ii; - let Inst{12-5} = Ii{7-0}; + let Inst{13-13} = Ii{7-7}; + let Inst{7-3} = Ii{6-2}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rx32; - let Inst{4-0} = Rx32{4-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; } -class Enc_114098 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{5-5} = Ii{0-0}; +class Enc_90cd8b : OpcodeHexagon { bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_5654851 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; +class Enc_bd0b33 : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-5} = Ii{8-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_12023037 : OpcodeHexagon { - bits <2> Ps4; - let Inst{6-5} = Ps4{1-0}; - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_176263 : OpcodeHexagon { - bits <8> Ii; - let Inst{9-4} = Ii{7-2}; - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; } -class Enc_6130414 : OpcodeHexagon { - bits <16> Ii; - let Inst{23-22} = Ii{15-14}; - let Inst{13-0} = Ii{13-0}; +class Enc_c7cd90 : OpcodeHexagon { + bits <4> Ii; + let Inst{6-3} = Ii{3-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_631197 : OpcodeHexagon { - bits <6> Ii; - let Inst{13-8} = Ii{5-0}; - bits <6> II; - let Inst{23-21} = II{5-3}; - let Inst{7-5} = II{2-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rxx32; - let Inst{4-0} = Rxx32{4-0}; +class Enc_405228 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <3> n1; + let Inst{28-28} = n1{2-2}; + let Inst{24-23} = n1{1-0}; } -class Enc_16214129 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; +class Enc_81ac1d : OpcodeHexagon { + bits <24> Ii; + let Inst{24-16} = Ii{23-15}; + let Inst{13-1} = Ii{14-2}; } -class Enc_8333157 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; +class Enc_395cc4 : OpcodeHexagon { + bits <7> Ii; + let Inst{6-3} = Ii{6-3}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; bits <5> Rtt32; let Inst{12-8} = Rtt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_4834775 : OpcodeHexagon { - bits <6> II; - let Inst{13-8} = II{5-0}; - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rd16; - let Inst{19-16} = Rd16{3-0}; +class Enc_a51a9a : OpcodeHexagon { + bits <8> Ii; + let Inst{12-8} = Ii{7-3}; + let Inst{4-2} = Ii{2-0}; } -class Enc_16601956 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; +class Enc_d44e31 : OpcodeHexagon { + bits <6> Ii; + let Inst{12-7} = Ii{5-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; -} -class Enc_15946706 : OpcodeHexagon { - bits <2> Ii; - let Inst{6-5} = Ii{1-0}; - bits <3> Rdd8; - let Inst{2-0} = Rdd8{2-0}; + bits <5> Rt32; + let Inst{4-0} = Rt32{4-0}; } -class Enc_6923828 : OpcodeHexagon { +class Enc_f77fbc : OpcodeHexagon { bits <4> Ii; let Inst{13-13} = Ii{3-3}; let Inst{10-8} = Ii{2-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; -} -class Enc_1332717 : OpcodeHexagon { - bits <2> Pu4; - let Inst{6-5} = Pu4{1-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + bits <3> Os8; + let Inst{2-0} = Os8{2-0}; } -class Enc_1786883 : OpcodeHexagon { +class Enc_d2216a : OpcodeHexagon { bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; - bits <6> Sdd64; - let Inst{5-0} = Sdd64{5-0}; -} -class Enc_14303394 : OpcodeHexagon { - bits <6> Ii; - let Inst{8-5} = Ii{5-2}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; +} +class Enc_85bf58 : OpcodeHexagon { + bits <7> Ii; + let Inst{6-3} = Ii{6-3}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_9282127 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-7} = Ii{7-2}; - bits <8> II; - let Inst{13-13} = II{7-7}; - let Inst{6-0} = II{6-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; +class Enc_71bb9b : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; } -class Enc_2813446 : OpcodeHexagon { +class Enc_52a5dd : OpcodeHexagon { bits <4> Ii; let Inst{6-3} = Ii{3-0}; bits <2> Pv4; @@ -3656,527 +2653,589 @@ class Enc_2813446 : OpcodeHexagon { bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_364753 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; - bits <4> n1; - let Inst{29-29} = n1{3-3}; - let Inst{26-25} = n1{2-1}; - let Inst{23-23} = n1{0-0}; +class Enc_5e2823 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_12477789 : OpcodeHexagon { - bits <15> Ii; - let Inst{21-21} = Ii{14-14}; - let Inst{13-13} = Ii{13-13}; - let Inst{11-1} = Ii{12-2}; +class Enc_28a2dc : OpcodeHexagon { + bits <5> Ii; + let Inst{12-8} = Ii{4-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; + bits <5> Rx32; + let Inst{4-0} = Rx32{4-0}; } -class Enc_44555 : OpcodeHexagon { +class Enc_5138b3 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; bits <5> Rt32; let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; } -class Enc_8497723 : OpcodeHexagon { - bits <6> Ii; - let Inst{13-8} = Ii{5-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rxx32; - let Inst{4-0} = Rxx32{4-0}; +class Enc_84d359 : OpcodeHexagon { + bits <4> Ii; + let Inst{3-0} = Ii{3-0}; + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; +} +class Enc_e07374 : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_323f2d : OpcodeHexagon { + bits <6> II; + let Inst{11-8} = II{5-2}; + let Inst{6-5} = II{1-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; + bits <5> Re32; + let Inst{20-16} = Re32{4-0}; +} +class Enc_1a9974 : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{7-7} = Ii{0-0}; + bits <2> Pv4; + let Inst{6-5} = Pv4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Ru32; + let Inst{12-8} = Ru32{4-0}; + bits <5> Rtt32; + let Inst{4-0} = Rtt32{4-0}; } -class Enc_4359901 : OpcodeHexagon { +class Enc_1de724 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; bits <4> n1; - let Inst{29-29} = n1{3-3}; - let Inst{26-25} = n1{2-1}; - let Inst{22-22} = n1{0-0}; + let Inst{28-28} = n1{3-3}; + let Inst{24-22} = n1{2-0}; } -class Enc_11271630 : OpcodeHexagon { - bits <7> Ii; - let Inst{6-3} = Ii{6-3}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; +class Enc_dd766a : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; +} +class Enc_0b51ce : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <2> Qv4; + let Inst{12-11} = Qv4{1-0}; + bits <5> Vs32; + let Inst{4-0} = Vs32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_10501894 : OpcodeHexagon { - bits <4> Rs16; - let Inst{7-4} = Rs16{3-0}; - bits <3> Rdd8; - let Inst{2-0} = Rdd8{2-0}; -} -class Enc_9768377 : OpcodeHexagon { - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{4-0} = Vd32{4-0}; -} -class Enc_16268019 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <5> Vvv32; - let Inst{12-8} = Vvv32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; +class Enc_b4e6cf : OpcodeHexagon { + bits <10> Ii; + let Inst{21-21} = Ii{9-9}; + let Inst{13-5} = Ii{8-0}; + bits <5> Ru32; + let Inst{4-0} = Ru32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; } -class Enc_8814718 : OpcodeHexagon { - bits <18> Ii; - let Inst{26-25} = Ii{17-16}; - let Inst{20-16} = Ii{15-11}; - let Inst{13-5} = Ii{10-2}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_44215c : OpcodeHexagon { + bits <6> Ii; + let Inst{17-16} = Ii{5-4}; + let Inst{6-3} = Ii{3-0}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; } -class Enc_6212930 : OpcodeHexagon { +class Enc_a21d47 : OpcodeHexagon { bits <6> Ii; - let Inst{8-5} = Ii{5-2}; + let Inst{10-5} = Ii{5-0}; bits <2> Pt4; - let Inst{10-9} = Pt4{1-0}; + let Inst{12-11} = Pt4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; +} +class Enc_cc449f : OpcodeHexagon { + bits <4> Ii; + let Inst{6-3} = Ii{3-0}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_5462762 : OpcodeHexagon { +class Enc_645d54 : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{5-5} = Ii{0-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vv32; - let Inst{12-8} = Vv32{4-0}; - bits <5> Vw32; - let Inst{4-0} = Vw32{4-0}; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_667b39 : OpcodeHexagon { + bits <5> Css32; + let Inst{20-16} = Css32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} +class Enc_927852 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_6154421 : OpcodeHexagon { +class Enc_163a3c : OpcodeHexagon { bits <7> Ii; - let Inst{13-13} = Ii{6-6}; - let Inst{7-3} = Ii{5-1}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; + let Inst{12-7} = Ii{6-1}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; + bits <5> Rt32; + let Inst{4-0} = Rt32{4-0}; +} +class Enc_b087ac : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} +class Enc_b1e1fb : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <5> n1; + let Inst{28-28} = n1{4-4}; + let Inst{25-23} = n1{3-1}; + let Inst{8-8} = n1{0-0}; } -class Enc_8940892 : OpcodeHexagon { +class Enc_1f19b5 : OpcodeHexagon { + bits <5> Ii; + let Inst{9-5} = Ii{4-0}; bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_3531000 : OpcodeHexagon { - bits <7> Ii; - let Inst{11-5} = Ii{6-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } -class Enc_14311138 : OpcodeHexagon { - bits <5> Vuu32; - let Inst{20-16} = Vuu32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; -} -class Enc_2216485 : OpcodeHexagon { - bits <6> Ii; - let Inst{22-21} = Ii{5-4}; - let Inst{13-13} = Ii{3-3}; - let Inst{7-5} = Ii{2-0}; +class Enc_b8c967 : OpcodeHexagon { + bits <8> Ii; + let Inst{12-5} = Ii{7-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } -class Enc_12395768 : OpcodeHexagon { - bits <16> Ii; - let Inst{26-25} = Ii{15-14}; - let Inst{20-16} = Ii{13-9}; - let Inst{13-13} = Ii{8-8}; - let Inst{7-0} = Ii{7-0}; - bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; +class Enc_fb6577 : OpcodeHexagon { + bits <2> Pu4; + let Inst{9-8} = Pu4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_11047413 : OpcodeHexagon { - bits <6> II; - let Inst{11-8} = II{5-2}; - let Inst{6-5} = II{1-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; - bits <5> Re32; - let Inst{20-16} = Re32{4-0}; +class Enc_2bae10 : OpcodeHexagon { + bits <4> Ii; + let Inst{10-8} = Ii{3-1}; + bits <4> Rs16; + let Inst{7-4} = Rs16{3-0}; + bits <4> Rd16; + let Inst{3-0} = Rd16{3-0}; } -class Enc_1256611 : OpcodeHexagon { +class Enc_c4dc92 : OpcodeHexagon { + bits <2> Qv4; + let Inst{23-22} = Qv4{1-0}; bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} +class Enc_03833b : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <2> Pd4; + let Inst{1-0} = Pd4{1-0}; +} +class Enc_dbd70c : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <2> Pu4; + let Inst{6-5} = Pu4{1-0}; bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } -class Enc_7884306 : OpcodeHexagon { - bits <8> Ii; - let Inst{8-4} = Ii{7-3}; +class Enc_f6fe0b : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <6> n1; + let Inst{28-28} = n1{5-5}; + let Inst{24-22} = n1{4-2}; + let Inst{13-13} = n1{1-1}; + let Inst{8-8} = n1{0-0}; } -class Enc_11244923 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; +class Enc_9e2e1c : OpcodeHexagon { + bits <5> Ii; + let Inst{8-5} = Ii{4-1}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Ryy32; + let Inst{4-0} = Ryy32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_8612939 : OpcodeHexagon { +class Enc_8df4be : OpcodeHexagon { + bits <17> Ii; + let Inst{26-25} = Ii{16-15}; + let Inst{20-16} = Ii{14-10}; + let Inst{13-5} = Ii{9-1}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_66bce1 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; - bits <5> n1; - let Inst{29-29} = n1{4-4}; - let Inst{26-25} = n1{3-2}; - let Inst{22-22} = n1{1-1}; - let Inst{13-13} = n1{0-0}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <4> Rd16; + let Inst{11-8} = Rd16{3-0}; } -class Enc_16355964 : OpcodeHexagon { - bits <8> Ii; - let Inst{12-5} = Ii{7-0}; - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_b8309d : OpcodeHexagon { + bits <9> Ii; + let Inst{8-3} = Ii{8-3}; + bits <3> Rtt8; + let Inst{2-0} = Rtt8{2-0}; +} +class Enc_5e8512 : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vxx32; + let Inst{4-0} = Vxx32{4-0}; } -class Enc_12616482 : OpcodeHexagon { +class Enc_4f677b : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{7-7} = Ii{0-0}; bits <6> II; let Inst{11-8} = II{5-2}; let Inst{6-5} = II{1-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; - bits <5> Re32; - let Inst{20-16} = Re32{4-0}; -} -class Enc_5915771 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <5> n1; - let Inst{28-28} = n1{4-4}; - let Inst{24-22} = n1{3-1}; - let Inst{8-8} = n1{0-0}; } -class Enc_14459927 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; +class Enc_3d920a : OpcodeHexagon { + bits <6> Ii; + let Inst{8-5} = Ii{5-2}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_7504828 : OpcodeHexagon { - bits <10> Ii; - let Inst{21-21} = Ii{9-9}; - let Inst{13-5} = Ii{8-0}; - bits <5> Ru32; - let Inst{4-0} = Ru32{4-0}; +class Enc_e83554 : OpcodeHexagon { + bits <5> Ii; + let Inst{8-5} = Ii{4-1}; + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_14209223 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; +class Enc_ed48be : OpcodeHexagon { + bits <2> Ii; + let Inst{6-5} = Ii{1-0}; + bits <3> Rdd8; + let Inst{2-0} = Rdd8{2-0}; } -class Enc_3931661 : OpcodeHexagon { - bits <6> Ii; - let Inst{8-5} = Ii{5-2}; +class Enc_f8c1c4 : OpcodeHexagon { + bits <2> Pv4; + let Inst{12-11} = Pv4{1-0}; bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_13606251 : OpcodeHexagon { - bits <6> Ii; - let Inst{11-8} = Ii{5-2}; +class Enc_1aa186 : OpcodeHexagon { + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; + bits <5> Rxx32; + let Inst{4-0} = Rxx32{4-0}; +} +class Enc_134437 : OpcodeHexagon { + bits <2> Qs4; + let Inst{9-8} = Qs4{1-0}; + bits <2> Qt4; + let Inst{23-22} = Qt4{1-0}; + bits <2> Qd4; + let Inst{1-0} = Qd4{1-0}; +} +class Enc_97d666 : OpcodeHexagon { bits <4> Rs16; let Inst{7-4} = Rs16{3-0}; bits <4> Rd16; let Inst{3-0} = Rd16{3-0}; } -class Enc_11475992 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vdd32; - let Inst{7-3} = Vdd32{4-0}; +class Enc_f82eaf : OpcodeHexagon { + bits <8> Ii; + let Inst{10-5} = Ii{7-2}; + bits <2> Pt4; + let Inst{12-11} = Pt4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; } -class Enc_13133231 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; +class Enc_69d63b : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; } -class Enc_9959498 : OpcodeHexagon { - bits <8> Ii; - let Inst{22-21} = Ii{7-6}; - let Inst{13-13} = Ii{5-5}; - let Inst{7-5} = Ii{4-2}; +class Enc_f79415 : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{6-6} = Ii{0-0}; + bits <6> II; + let Inst{5-0} = II{5-0}; bits <5> Ru32; - let Inst{4-0} = Ru32{4-0}; + let Inst{20-16} = Ru32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; +} +class Enc_ce6828 : OpcodeHexagon { + bits <14> Ii; + let Inst{26-25} = Ii{13-12}; + let Inst{13-13} = Ii{11-11}; + let Inst{7-0} = Ii{10-3}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; +} +class Enc_800e04 : OpcodeHexagon { + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <4> Rs16; + let Inst{19-16} = Rs16{3-0}; + bits <6> n1; + let Inst{28-28} = n1{5-5}; + let Inst{25-22} = n1{4-1}; + let Inst{13-13} = n1{0-0}; +} +class Enc_ad1831 : OpcodeHexagon { + bits <16> Ii; + let Inst{26-25} = Ii{15-14}; + let Inst{20-16} = Ii{13-9}; + let Inst{13-13} = Ii{8-8}; + let Inst{7-0} = Ii{7-0}; + bits <3> Nt8; + let Inst{10-8} = Nt8{2-0}; +} +class Enc_0fa531 : OpcodeHexagon { + bits <15> Ii; + let Inst{21-21} = Ii{14-14}; + let Inst{13-13} = Ii{13-13}; + let Inst{11-1} = Ii{12-2}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Rd32; - let Inst{12-8} = Rd32{4-0}; -} -class Enc_8919369 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <5> n1; - let Inst{28-28} = n1{4-4}; - let Inst{24-23} = n1{3-2}; - let Inst{13-13} = n1{1-1}; - let Inst{8-8} = n1{0-0}; -} -class Enc_2968094 : OpcodeHexagon { - bits <7> Ii; - let Inst{11-5} = Ii{6-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <2> Pd4; - let Inst{1-0} = Pd4{1-0}; } -class Enc_4813442 : OpcodeHexagon { +class Enc_7eaeb6 : OpcodeHexagon { bits <6> Ii; let Inst{6-3} = Ii{5-2}; bits <2> Pv4; let Inst{1-0} = Pv4{1-0}; - bits <3> Nt8; - let Inst{10-8} = Nt8{2-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } -class Enc_4684887 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <4> Rs16; - let Inst{19-16} = Rs16{3-0}; - bits <4> n1; - let Inst{28-28} = n1{3-3}; - let Inst{25-23} = n1{2-0}; -} -class Enc_15606259 : OpcodeHexagon { - bits <4> Ii; - let Inst{11-8} = Ii{3-0}; +class Enc_f55a0c : OpcodeHexagon { + bits <6> Ii; + let Inst{11-8} = Ii{5-2}; bits <4> Rs16; let Inst{7-4} = Rs16{3-0}; - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; -} -class Enc_2268028 : OpcodeHexagon { - bits <3> Qtt8; - let Inst{10-8} = Qtt8{2-0}; - bits <3> Qdd8; - let Inst{5-3} = Qdd8{2-0}; -} -class Enc_13430430 : OpcodeHexagon { - bits <5> Vu32; - let Inst{12-8} = Vu32{4-0}; - bits <5> Rt32; - let Inst{20-16} = Rt32{4-0}; - bits <5> Vd32; - let Inst{7-3} = Vd32{4-0}; - bits <3> Qxx8; - let Inst{2-0} = Qxx8{2-0}; -} -class Enc_13336212 : OpcodeHexagon { - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; - bits <1> n1; - let Inst{9-9} = n1{0-0}; -} -class Enc_15008287 : OpcodeHexagon { - bits <5> Vu32; - let Inst{20-16} = Vu32{4-0}; - bits <3> Rt8; - let Inst{2-0} = Rt8{2-0}; - bits <5> Vx32; - let Inst{7-3} = Vx32{4-0}; - bits <5> Vy32; - let Inst{12-8} = Vy32{4-0}; + bits <4> Rt16; + let Inst{3-0} = Rt16{3-0}; } -class Enc_4897205 : OpcodeHexagon { - bits <2> Qs4; - let Inst{9-8} = Qs4{1-0}; - bits <2> Qd4; - let Inst{1-0} = Qd4{1-0}; +class Enc_f20719 : OpcodeHexagon { + bits <7> Ii; + let Inst{12-7} = Ii{6-1}; + bits <6> II; + let Inst{13-13} = II{5-5}; + let Inst{4-0} = II{4-0}; + bits <2> Pv4; + let Inst{6-5} = Pv4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; } -class Enc_8038806 : OpcodeHexagon { - bits <4> Ii; - let Inst{11-8} = Ii{3-0}; - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; +class Enc_eafd18 : OpcodeHexagon { + bits <5> II; + let Inst{12-8} = II{4-0}; + bits <11> Ii; + let Inst{21-20} = Ii{10-9}; + let Inst{7-1} = Ii{8-2}; + bits <3> Ns8; + let Inst{18-16} = Ns8{2-0}; } -class Enc_12669374 : OpcodeHexagon { +class Enc_7b523d : OpcodeHexagon { bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{23-19} = Vv32{4-0}; + bits <3> Rt8; + let Inst{18-16} = Rt8{2-0}; bits <5> Vxx32; let Inst{4-0} = Vxx32{4-0}; } -class Enc_971347 : OpcodeHexagon { - bits <4> Ii; - let Inst{8-5} = Ii{3-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Ryy32; - let Inst{4-0} = Ryy32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_1997594 : OpcodeHexagon { - bits <5> Rs32; - let Inst{20-16} = Rs32{4-0}; +class Enc_47ef61 : OpcodeHexagon { + bits <3> Ii; + let Inst{7-5} = Ii{2-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; - bits <5> Rdd32; - let Inst{4-0} = Rdd32{4-0}; -} -class Enc_11940513 : OpcodeHexagon { - bits <2> Ii; - let Inst{13-13} = Ii{1-1}; - let Inst{7-7} = Ii{0-0}; - bits <2> Pv4; - let Inst{6-5} = Pv4{1-0}; bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; - bits <5> Ru32; - let Inst{12-8} = Ru32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} +class Enc_cc857d : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; bits <5> Rt32; - let Inst{4-0} = Rt32{4-0}; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vx32; + let Inst{4-0} = Vx32{4-0}; } -class Enc_2735552 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <2> Pv4; - let Inst{12-11} = Pv4{1-0}; - bits <3> Os8; - let Inst{2-0} = Os8{2-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_7fa7f6 : OpcodeHexagon { + bits <6> II; + let Inst{11-8} = II{5-2}; + let Inst{6-5} = II{1-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; + bits <5> Re32; + let Inst{20-16} = Re32{4-0}; } -class Enc_16410950 : OpcodeHexagon { - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; +class Enc_0f8bab : OpcodeHexagon { + bits <5> Vu32; + let Inst{12-8} = Vu32{4-0}; bits <5> Rt32; - let Inst{12-8} = Rt32{4-0}; - bits <5> Vs32; - let Inst{7-3} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_6226085 : OpcodeHexagon { - bits <5> Ii; - let Inst{12-8} = Ii{4-0}; - bits <5> II; - let Inst{22-21} = II{4-3}; - let Inst{7-5} = II{2-0}; - bits <5> Rd32; - let Inst{4-0} = Rd32{4-0}; + let Inst{20-16} = Rt32{4-0}; + bits <2> Qd4; + let Inst{1-0} = Qd4{1-0}; } -class Enc_14193700 : OpcodeHexagon { +class Enc_7eb485 : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{6-6} = Ii{0-0}; bits <6> II; let Inst{5-0} = II{5-0}; + bits <5> Ru32; + let Inst{20-16} = Ru32{4-0}; bits <3> Nt8; let Inst{10-8} = Nt8{2-0}; - bits <5> Re32; - let Inst{20-16} = Re32{4-0}; } -class Enc_15763937 : OpcodeHexagon { - bits <11> Ii; - let Inst{21-20} = Ii{10-9}; - let Inst{7-1} = Ii{8-2}; - bits <3> Ns8; - let Inst{18-16} = Ns8{2-0}; - bits <6> n1; - let Inst{29-29} = n1{5-5}; - let Inst{26-25} = n1{4-3}; - let Inst{23-22} = n1{2-1}; - let Inst{13-13} = n1{0-0}; +class Enc_864a5a : OpcodeHexagon { + bits <9> Ii; + let Inst{12-8} = Ii{8-4}; + let Inst{4-3} = Ii{3-2}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; } -class Enc_2492727 : OpcodeHexagon { - bits <5> Rss32; - let Inst{20-16} = Rss32{4-0}; +class Enc_c2b48e : OpcodeHexagon { + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; bits <5> Rt32; let Inst{12-8} = Rt32{4-0}; bits <2> Pd4; let Inst{1-0} = Pd4{1-0}; } -class Enc_13425035 : OpcodeHexagon { - bits <2> Qv4; - let Inst{12-11} = Qv4{1-0}; - bits <1> Mu2; - let Inst{13-13} = Mu2{0-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; +class Enc_8c6530 : OpcodeHexagon { + bits <5> Rtt32; + let Inst{12-8} = Rtt32{4-0}; + bits <5> Rss32; + let Inst{20-16} = Rss32{4-0}; + bits <2> Pu4; + let Inst{6-5} = Pu4{1-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; } -class Enc_4135257 : OpcodeHexagon { - bits <4> Ii; - let Inst{10-8} = Ii{3-1}; +class Enc_448f7f : OpcodeHexagon { + bits <11> Ii; + let Inst{26-25} = Ii{10-9}; + let Inst{13-13} = Ii{8-8}; + let Inst{7-0} = Ii{7-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_da8d43 : OpcodeHexagon { + bits <6> Ii; + let Inst{13-13} = Ii{5-5}; + let Inst{7-3} = Ii{4-0}; + bits <2> Pv4; + let Inst{1-0} = Pv4{1-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Rt32; + let Inst{12-8} = Rt32{4-0}; +} +class Enc_a6ce9c : OpcodeHexagon { + bits <6> Ii; + let Inst{3-0} = Ii{5-2}; bits <4> Rs16; let Inst{7-4} = Rs16{3-0}; - bits <4> Rd16; - let Inst{3-0} = Rd16{3-0}; } -class Enc_14631806 : OpcodeHexagon { +class Enc_eca7c8 : OpcodeHexagon { + bits <2> Ii; + let Inst{13-13} = Ii{1-1}; + let Inst{7-7} = Ii{0-0}; + bits <5> Rs32; + let Inst{20-16} = Rs32{4-0}; + bits <5> Ru32; + let Inst{12-8} = Ru32{4-0}; + bits <5> Rt32; + let Inst{4-0} = Rt32{4-0}; +} +class Enc_4b39e4 : OpcodeHexagon { + bits <3> Ii; + let Inst{7-5} = Ii{2-0}; bits <5> Vu32; let Inst{12-8} = Vu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; bits <5> Vdd32; let Inst{4-0} = Vdd32{4-0}; } -class Enc_12397062 : OpcodeHexagon { - bits <3> Ii; - let Inst{10-8} = Ii{2-0}; - bits <2> Qv4; - let Inst{12-11} = Qv4{1-0}; - bits <5> Vs32; - let Inst{4-0} = Vs32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} -class Enc_11959851 : OpcodeHexagon { - bits <7> Ii; - let Inst{6-3} = Ii{6-3}; - bits <2> Pv4; - let Inst{1-0} = Pv4{1-0}; - bits <5> Rtt32; - let Inst{12-8} = Rtt32{4-0}; - bits <5> Rx32; - let Inst{20-16} = Rx32{4-0}; -} diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td index 2bfde9acaea9cd7f426ea4775ce645ade49753dd..2dc74632e9be2818ae46971d58cfbbf231ced51e 100644 --- a/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -11,36 +11,39 @@ def A2_abs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = abs($Rs32)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 { +tc_94e6ffd9, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_absp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = abs($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000100; +let prefersSlot3 = 1; } def A2_abssat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = abs($Rs32):sat", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 { +tc_94e6ffd9, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_add : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011000; @@ -56,145 +59,157 @@ def A2_addh_h16_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.h):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_addh_h16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.l):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_addh_h16_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_addh_h16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_addh_h16_sat_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_addh_h16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_addh_h16_sat_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_addh_h16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_addh_l16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_7ca2ea10, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_addh_l16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_7ca2ea10, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_addh_l16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.h):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_addh_l16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = add($Rt32.l,$Rs32.l):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_addi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = add($Rs32,#$Ii)", -ALU32_ADDI_tc_1_SLOT0123, TypeALU32_ADDI>, Enc_11542684, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_ADDI>, Enc_cb9321, PredNewRel, ImmRegRel { let Inst{31-28} = 0b1011; let hasNewValue = 1; let opNewValue = 0; @@ -213,7 +228,7 @@ def A2_addp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_9c18c9a5, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -224,10 +239,11 @@ def A2_addpsat : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_47ab9233, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; +let prefersSlot3 = 1; let Defs = [USR_OVF]; let isCommutable = 1; } @@ -235,12 +251,13 @@ def A2_addsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32):sat", -ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 { +tc_b0f50e3c, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; let InputType = "reg"; let isCommutable = 1; @@ -249,32 +266,34 @@ def A2_addsp : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rs32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64> { +tc_bd16579e, TypeALU64> { let isPseudo = 1; } def A2_addsph : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32):raw:hi", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_bd16579e, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; +let prefersSlot3 = 1; } def A2_addspl : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = add($Rss32,$Rtt32):raw:lo", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_bd16579e, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; +let prefersSlot3 = 1; } def A2_and : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = and($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001000; @@ -290,7 +309,7 @@ def A2_andir : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = and($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, ImmRegRel { +tc_548f402d, TypeALU32_2op>, Enc_140c83, ImmRegRel { let Inst{31-22} = 0b0111011000; let hasNewValue = 1; let opNewValue = 0; @@ -306,7 +325,7 @@ def A2_andp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = and($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_9c18c9a5, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -316,7 +335,7 @@ def A2_aslh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = aslh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel { +tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000000; let hasNewValue = 1; @@ -328,7 +347,7 @@ def A2_asrh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = asrh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel { +tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000001; let hasNewValue = 1; @@ -340,7 +359,7 @@ def A2_combine_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.h,$Rs32.h)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_548f402d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011100; @@ -352,7 +371,7 @@ def A2_combine_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.h,$Rs32.l)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_548f402d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011101; @@ -364,7 +383,7 @@ def A2_combine_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.l,$Rs32.h)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_548f402d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011110; @@ -376,7 +395,7 @@ def A2_combine_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = combine($Rt32.l,$Rs32.l)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_548f402d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011111; @@ -388,7 +407,7 @@ def A2_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, s8_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_14007201 { +tc_548f402d, TypeALU32_2op>, Enc_18c338 { let Inst{31-23} = 0b011111000; let isReMaterializable = 1; let isAsCheapAsAMove = 1; @@ -403,7 +422,7 @@ def A2_combinew : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = combine($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1997594, PredNewRel { +tc_548f402d, TypeALU32_3op>, Enc_be32a5, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110101000; @@ -415,87 +434,95 @@ def A2_max : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = max($Rs32,$Rt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 { +tc_47ab9233, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101110; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_maxp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = max($Rss32,$Rtt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_47ab9233, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_maxu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = maxu($Rs32,$Rt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 { +tc_47ab9233, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101110; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_maxup : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = maxu($Rss32,$Rtt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_47ab9233, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_min : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = min($Rt32,$Rs32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101101; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_minp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = min($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_minu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = minu($Rt32,$Rs32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101101; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_minup : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = minu($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_neg : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = neg($Rs32)", -PSEUDO, TypeALU32_2op> { +tc_f16d5b17, TypeALU32_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -505,7 +532,7 @@ def A2_negp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = neg($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000100; } @@ -513,18 +540,19 @@ def A2_negsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = neg($Rs32):sat", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 { +tc_94e6ffd9, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_nop : HInst< (outs), (ins), "nop", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_0 { +tc_e2c31426, TypeALU32_2op>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b0111111100000000; } @@ -532,7 +560,7 @@ def A2_not : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = not($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> { +tc_f16d5b17, TypeALU32_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -542,7 +570,7 @@ def A2_notp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = not($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000100; } @@ -550,7 +578,7 @@ def A2_or : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = or($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001001; @@ -566,7 +594,7 @@ def A2_orir : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = or($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, ImmRegRel { +tc_548f402d, TypeALU32_2op>, Enc_140c83, ImmRegRel { let Inst{31-22} = 0b0111011010; let hasNewValue = 1; let opNewValue = 0; @@ -582,7 +610,7 @@ def A2_orp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = or($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_9c18c9a5, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -592,7 +620,7 @@ def A2_paddf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = add($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011000; @@ -608,7 +636,7 @@ def A2_paddfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011000; @@ -625,7 +653,7 @@ def A2_paddif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if (!$Pu4) $Rd32 = add($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel { +tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011101001; let isPredicated = 1; @@ -645,7 +673,7 @@ def A2_paddifnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel { +tc_28d296df, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-23} = 0b011101001; let isPredicated = 1; @@ -666,7 +694,7 @@ def A2_paddit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if ($Pu4) $Rd32 = add($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel { +tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011101000; let isPredicated = 1; @@ -685,7 +713,7 @@ def A2_padditnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel { +tc_28d296df, TypeALU32_2op>, Enc_e38e1f, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-23} = 0b011101000; let isPredicated = 1; @@ -705,7 +733,7 @@ def A2_paddt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = add($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011000; @@ -720,7 +748,7 @@ def A2_paddtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel, ImmRegRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011000; @@ -736,7 +764,7 @@ def A2_pandf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = and($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001000; @@ -750,7 +778,7 @@ def A2_pandfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001000; @@ -765,7 +793,7 @@ def A2_pandt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = and($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001000; @@ -778,7 +806,7 @@ def A2_pandtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001000; @@ -792,7 +820,7 @@ def A2_porf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = or($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001001; @@ -806,7 +834,7 @@ def A2_porfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001001; @@ -821,7 +849,7 @@ def A2_port : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = or($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001001; @@ -834,7 +862,7 @@ def A2_portnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001001; @@ -848,7 +876,7 @@ def A2_psubf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011001; @@ -862,7 +890,7 @@ def A2_psubfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011001; @@ -877,7 +905,7 @@ def A2_psubt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if ($Pu4) $Rd32 = sub($Rt32,$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111011001; @@ -890,7 +918,7 @@ def A2_psubtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_9b0bc1, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111011001; @@ -904,7 +932,7 @@ def A2_pxorf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001011; @@ -918,7 +946,7 @@ def A2_pxorfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001011; @@ -933,7 +961,7 @@ def A2_pxort : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rd32 = xor($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111001011; @@ -946,7 +974,7 @@ def A2_pxortnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_ea4c54, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111001011; @@ -960,18 +988,19 @@ def A2_roundsat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = round($Rss32):sat", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_94e6ffd9, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000110; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = sat($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000110; let hasNewValue = 1; @@ -982,7 +1011,7 @@ def A2_satb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = satb($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -993,7 +1022,7 @@ def A2_sath : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sath($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -1004,7 +1033,7 @@ def A2_satub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = satub($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -1015,7 +1044,7 @@ def A2_satuh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = satuh($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100110; let hasNewValue = 1; @@ -1026,7 +1055,7 @@ def A2_sub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32,$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_3op>, Enc_bd6011, PredNewRel, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011001; @@ -1041,145 +1070,157 @@ def A2_subh_h16_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.h):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_subh_h16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.l):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_subh_h16_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_subh_h16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l):<<16", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_bd16579e, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_subh_h16_sat_hh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_subh_h16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_subh_h16_sat_lh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_subh_h16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_subh_l16_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_7ca2ea10, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_subh_l16_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 { +tc_7ca2ea10, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def A2_subh_l16_sat_hl : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.h):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_subh_l16_sat_ll : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32.l,$Rs32.l):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101001; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_subp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = sub($Rtt32,$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -1188,7 +1229,7 @@ def A2_subri : HInst< (outs IntRegs:$Rd32), (ins s32_0Imm:$Ii, IntRegs:$Rs32), "$Rd32 = sub(#$Ii,$Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_2op>, Enc_140c83, PredNewRel, ImmRegRel { let Inst{31-22} = 0b0111011001; let hasNewValue = 1; let opNewValue = 0; @@ -1204,12 +1245,13 @@ def A2_subsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32,$Rs32):sat", -ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_b0f50e3c, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110110; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; let InputType = "reg"; } @@ -1217,7 +1259,7 @@ def A2_svaddh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vaddh($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773 { +tc_548f402d, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110000; @@ -1230,12 +1272,13 @@ def A2_svaddhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vaddh($Rs32,$Rt32):sat", -ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 { +tc_b0f50e3c, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110001; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; let InputType = "reg"; let isCommutable = 1; @@ -1244,12 +1287,13 @@ def A2_svadduhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vadduh($Rs32,$Rt32):sat", -ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 { +tc_b0f50e3c, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; let InputType = "reg"; let isCommutable = 1; @@ -1258,12 +1302,13 @@ def A2_svavgh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vavgh($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773 { +tc_511f28f6, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110111000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let InputType = "reg"; let isCommutable = 1; } @@ -1271,12 +1316,13 @@ def A2_svavghs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vavgh($Rs32,$Rt32):rnd", -ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 { +tc_76c4c5ef, TypeALU32_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110111001; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let InputType = "reg"; let isCommutable = 1; } @@ -1284,19 +1330,20 @@ def A2_svnavgh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vnavgh($Rt32,$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_511f28f6, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110111011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let InputType = "reg"; } def A2_svsubh : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vsubh($Rt32,$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_548f402d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110100; @@ -1308,12 +1355,13 @@ def A2_svsubhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vsubh($Rt32,$Rs32):sat", -ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_b0f50e3c, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110101; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; let InputType = "reg"; } @@ -1321,12 +1369,13 @@ def A2_svsubuhs : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = vsubuh($Rt32,$Rs32):sat", -ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_b0f50e3c, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110110111; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; let InputType = "reg"; } @@ -1334,7 +1383,7 @@ def A2_swiz : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = swiz($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -1344,7 +1393,7 @@ def A2_sxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel { +tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000101; let hasNewValue = 1; @@ -1356,7 +1405,7 @@ def A2_sxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel { +tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000111; let hasNewValue = 1; @@ -1368,7 +1417,7 @@ def A2_sxtw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = sxtw($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 { +tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000100010; } @@ -1376,7 +1425,7 @@ def A2_tfr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = $Rs32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel { +tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000011; let hasNewValue = 1; @@ -1389,7 +1438,7 @@ def A2_tfrcrr : HInst< (outs IntRegs:$Rd32), (ins CtrRegs:$Cs32), "$Rd32 = $Cs32", -CR_tc_3x_SLOT3, TypeCR>, Enc_1539665 { +tc_3b4892c6, TypeCR>, Enc_0cb018 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01101010000; let hasNewValue = 1; @@ -1399,7 +1448,7 @@ def A2_tfrf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = $Rs32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_1b6011fb, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -1414,7 +1463,7 @@ def A2_tfrfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = $Rs32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_28d296df, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -1430,7 +1479,7 @@ def A2_tfrih : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, u16_0Imm:$Ii), "$Rx32.h = #$Ii", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_6130414 { +tc_548f402d, TypeALU32_2op>, Enc_51436c { let Inst{21-21} = 0b1; let Inst{31-24} = 0b01110010; let hasNewValue = 1; @@ -1441,7 +1490,7 @@ def A2_tfril : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, u16_0Imm:$Ii), "$Rx32.l = #$Ii", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_6130414 { +tc_548f402d, TypeALU32_2op>, Enc_51436c { let Inst{21-21} = 0b1; let Inst{31-24} = 0b01110001; let hasNewValue = 1; @@ -1452,7 +1501,7 @@ def A2_tfrp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = $Rss32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel { +tc_548f402d, TypeALU32_2op>, PredNewRel { let BaseOpcode = "A2_tfrp"; let isPredicable = 1; let isPseudo = 1; @@ -1461,7 +1510,7 @@ def A2_tfrpf : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if (!$Pu4) $Rdd32 = $Rss32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel { +tc_548f402d, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let isPredicatedFalse = 1; let BaseOpcode = "A2_tfrp"; @@ -1471,7 +1520,7 @@ def A2_tfrpfnew : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if (!$Pu4.new) $Rdd32 = $Rss32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel { +tc_b08be45e, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let isPredicatedFalse = 1; let isPredicatedNew = 1; @@ -1482,7 +1531,7 @@ def A2_tfrpi : HInst< (outs DoubleRegs:$Rdd32), (ins s8_0Imm:$Ii), "$Rdd32 = #$Ii", -ALU64_tc_1_SLOT23, TypeALU64> { +tc_548f402d, TypeALU64> { let isReMaterializable = 1; let isAsCheapAsAMove = 1; let isMoveImm = 1; @@ -1492,7 +1541,7 @@ def A2_tfrpt : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if ($Pu4) $Rdd32 = $Rss32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel { +tc_548f402d, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let BaseOpcode = "A2_tfrp"; let isPseudo = 1; @@ -1501,7 +1550,7 @@ def A2_tfrptnew : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32), "if ($Pu4.new) $Rdd32 = $Rss32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel { +tc_b08be45e, TypeALU32_2op>, PredNewRel { let isPredicated = 1; let isPredicatedNew = 1; let BaseOpcode = "A2_tfrp"; @@ -1511,7 +1560,7 @@ def A2_tfrrcr : HInst< (outs CtrRegs:$Cd32), (ins IntRegs:$Rs32), "$Cd32 = $Rs32", -CR_tc_3x_SLOT3, TypeCR>, Enc_9018141 { +tc_82f0f122, TypeCR>, Enc_bd811a { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01100010001; let hasNewValue = 1; @@ -1521,7 +1570,7 @@ def A2_tfrsi : HInst< (outs IntRegs:$Rd32), (ins s32_0Imm:$Ii), "$Rd32 = #$Ii", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_7971062, PredNewRel, ImmRegRel { +tc_f16d5b17, TypeALU32_2op>, Enc_5e87ce, PredNewRel, ImmRegRel { let Inst{21-21} = 0b0; let Inst{31-24} = 0b01111000; let hasNewValue = 1; @@ -1543,7 +1592,7 @@ def A2_tfrt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = $Rs32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_1b6011fb, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -1557,7 +1606,7 @@ def A2_tfrtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = $Rs32", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel { +tc_28d296df, TypeALU32_2op>, PredNewRel, ImmRegRel { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -1572,41 +1621,45 @@ def A2_vabsh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsh($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000010; +let prefersSlot3 = 1; } def A2_vabshsat : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsh($Rss32):sat", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000010; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vabsw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsw($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000010; +let prefersSlot3 = 1; } def A2_vabswsat : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vabsw($Rss32):sat", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000010; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vaddb_map : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddb($Rss32,$Rtt32)", -PSEUDO, TypeMAPPING> { +tc_9c18c9a5, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -1614,7 +1667,7 @@ def A2_vaddh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddh($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_9c18c9a5, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1623,17 +1676,18 @@ def A2_vaddhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddh($Rss32,$Rtt32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_47ab9233, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vaddub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddub($Rss32,$Rtt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_9c18c9a5, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1642,27 +1696,29 @@ def A2_vaddubs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddub($Rss32,$Rtt32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_47ab9233, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vadduhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vadduh($Rss32,$Rtt32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_47ab9233, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vaddw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddw($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_9c18c9a5, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; @@ -1671,26 +1727,28 @@ def A2_vaddws : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vaddw($Rss32,$Rtt32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_47ab9233, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011000; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vavgh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgh($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_cd321066, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; +let prefersSlot3 = 1; } def A2_vavghcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgh($Rss32,$Rtt32):crnd", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_63cd9d2d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; @@ -1700,79 +1758,87 @@ def A2_vavghr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgh($Rss32,$Rtt32):rnd", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_37326008, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; +let prefersSlot3 = 1; } def A2_vavgub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgub($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_cd321066, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; +let prefersSlot3 = 1; } def A2_vavgubr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgub($Rss32,$Rtt32):rnd", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_37326008, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; +let prefersSlot3 = 1; } def A2_vavguh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguh($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_cd321066, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; +let prefersSlot3 = 1; } def A2_vavguhr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguh($Rss32,$Rtt32):rnd", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_37326008, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011010; +let prefersSlot3 = 1; } def A2_vavguw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguw($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_cd321066, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; +let prefersSlot3 = 1; } def A2_vavguwr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavguw($Rss32,$Rtt32):rnd", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_37326008, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; +let prefersSlot3 = 1; } def A2_vavgw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgw($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_cd321066, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; +let prefersSlot3 = 1; } def A2_vavgwcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgw($Rss32,$Rtt32):crnd", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_63cd9d2d, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; @@ -1782,16 +1848,17 @@ def A2_vavgwr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vavgw($Rss32,$Rtt32):rnd", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 { +tc_37326008, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011011; +let prefersSlot3 = 1; } def A2_vcmpbeq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpb.eq($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b110000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1800,7 +1867,7 @@ def A2_vcmpbgtu : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpb.gtu($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b111000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1809,7 +1876,7 @@ def A2_vcmpheq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmph.eq($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1818,7 +1885,7 @@ def A2_vcmphgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmph.gt($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1827,7 +1894,7 @@ def A2_vcmphgtu : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmph.gtu($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1836,7 +1903,7 @@ def A2_vcmpweq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpw.eq($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1845,7 +1912,7 @@ def A2_vcmpwgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpw.gt($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1854,7 +1921,7 @@ def A2_vcmpwgtu : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpw.gtu($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010000; @@ -1863,133 +1930,147 @@ def A2_vconj : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vconj($Rss32):sat", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_94e6ffd9, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000100; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vmaxb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxb($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_vmaxh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxh($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_vmaxub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxub($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_vmaxuh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxuh($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_vmaxuw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxuw($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_vmaxw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vmaxw($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_vminb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminb($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011110; +let prefersSlot3 = 1; } def A2_vminh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminh($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_vminub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminub($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_vminuh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminuh($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_vminuw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminuw($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_vminw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vminw($Rtt32,$Rss32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011101; +let prefersSlot3 = 1; } def A2_vnavgh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgh($Rtt32,$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_cd321066, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; +let prefersSlot3 = 1; } def A2_vnavghcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2000,7 +2081,7 @@ def A2_vnavghr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2011,16 +2092,17 @@ def A2_vnavgw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgw($Rtt32,$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_cd321066, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; +let prefersSlot3 = 1; } def A2_vnavgwcr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2031,7 +2113,7 @@ def A2_vnavgwr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_63cd9d2d, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011100; @@ -2042,7 +2124,7 @@ def A2_vraddub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vraddub($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -2052,7 +2134,7 @@ def A2_vraddub_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vraddub($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -2063,7 +2145,7 @@ def A2_vrsadub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrsadub($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -2073,7 +2155,7 @@ def A2_vrsadub_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrsadub($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -2084,7 +2166,7 @@ def A2_vsubb_map : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vsubb($Rss32,$Rtt32)", -PSEUDO, TypeMAPPING> { +tc_9c18c9a5, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -2092,7 +2174,7 @@ def A2_vsubh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubh($Rtt32,$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2101,17 +2183,18 @@ def A2_vsubhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubh($Rtt32,$Rss32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vsubub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubub($Rtt32,$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2120,27 +2203,29 @@ def A2_vsububs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubub($Rtt32,$Rss32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vsubuhs : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubuh($Rtt32,$Rss32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_vsubw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubw($Rtt32,$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; @@ -2149,17 +2234,18 @@ def A2_vsubws : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vsubw($Rtt32,$Rss32):sat", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 { +tc_47ab9233, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011001; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def A2_xor : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = xor($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel { +tc_548f402d, TypeALU32_3op>, Enc_5ab2be, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001011; @@ -2174,7 +2260,7 @@ def A2_xorp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = xor($Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 { +tc_9c18c9a5, TypeALU64>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2184,7 +2270,7 @@ def A2_zxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel { +tc_548f402d, TypeALU32_2op>, PredNewRel { let hasNewValue = 1; let opNewValue = 0; let BaseOpcode = "A2_zxtb"; @@ -2196,7 +2282,7 @@ def A2_zxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel { +tc_f16d5b17, TypeALU32_2op>, Enc_5e2823, PredNewRel { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01110000110; let hasNewValue = 1; @@ -2208,7 +2294,7 @@ def A4_addp_c : HInst< (outs DoubleRegs:$Rdd32, PredRegs:$Px4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in), "$Rdd32 = add($Rss32,$Rtt32,$Px4):carry", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_151014 { +tc_a87879e8, TypeS_3op>, Enc_2b3f60 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010110; @@ -2219,7 +2305,7 @@ def A4_andn : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = and($Rt32,~$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_548f402d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001100; @@ -2231,7 +2317,7 @@ def A4_andnp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = and($Rtt32,~$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2240,32 +2326,34 @@ def A4_bitsplit : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = bitsplit($Rs32,$Rt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_1997594 { +tc_7ca2ea10, TypeALU64>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010100001; +let prefersSlot3 = 1; } def A4_bitspliti : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rdd32 = bitsplit($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_5654851 { +tc_7ca2ea10, TypeS_2op>, Enc_311abd { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001000110; +let prefersSlot3 = 1; } def A4_boundscheck : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Pd4 = boundscheck($Rs32,$Rtt32)", -M_tc_3x_SLOT23, TypeALU64> { +tc_c58f771a, TypeALU64> { let isPseudo = 1; } def A4_boundscheck_hi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -2274,7 +2362,7 @@ def A4_boundscheck_lo : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -2283,7 +2371,7 @@ def A4_cmpbeq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmpb.eq($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b110000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2296,7 +2384,7 @@ def A4_cmpbeqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u8_0Imm:$Ii), "$Pd4 = cmpb.eq($Rs32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel { +tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101000; @@ -2309,7 +2397,7 @@ def A4_cmpbgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmpb.gt($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2321,7 +2409,7 @@ def A4_cmpbgti : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s8_0Imm:$Ii), "$Pd4 = cmpb.gt($Rs32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel { +tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101001; @@ -2333,7 +2421,7 @@ def A4_cmpbgtu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmpb.gtu($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b111000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2345,7 +2433,7 @@ def A4_cmpbgtui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = cmpb.gtu($Rs32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3531000, ImmRegRel { +tc_5fa2857c, TypeALU64>, Enc_02553a, ImmRegRel { let Inst{4-2} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011101010; @@ -2362,7 +2450,7 @@ def A4_cmpheq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmph.eq($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2375,7 +2463,7 @@ def A4_cmpheqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmph.eq($Rs32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel { +tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101000; @@ -2393,7 +2481,7 @@ def A4_cmphgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmph.gt($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2405,7 +2493,7 @@ def A4_cmphgti : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmph.gt($Rs32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel { +tc_5fa2857c, TypeALU64>, Enc_08d755, ImmRegRel { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011101001; @@ -2422,7 +2510,7 @@ def A4_cmphgtu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmph.gtu($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111110; @@ -2434,7 +2522,7 @@ def A4_cmphgtui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = cmph.gtu($Rs32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3531000, ImmRegRel { +tc_5fa2857c, TypeALU64>, Enc_02553a, ImmRegRel { let Inst{4-2} = 0b010; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011101010; @@ -2451,7 +2539,7 @@ def A4_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s8_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9864697 { +tc_548f402d, TypeALU32_2op>, Enc_f0cca7 { let Inst{31-21} = 0b01111100100; let isExtendable = 1; let opExtendable = 2; @@ -2463,7 +2551,7 @@ def A4_combineir : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, IntRegs:$Rs32), "$Rdd32 = combine(#$Ii,$Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_2462143 { +tc_548f402d, TypeALU32_2op>, Enc_9cdba7 { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011001; let isExtendable = 1; @@ -2476,7 +2564,7 @@ def A4_combineri : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rdd32 = combine($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_2462143 { +tc_548f402d, TypeALU32_2op>, Enc_9cdba7 { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011000; let isExtendable = 1; @@ -2489,7 +2577,7 @@ def A4_cround_ri : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = cround($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 { +tc_63cd9d2d, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100111; @@ -2501,7 +2589,7 @@ def A4_cround_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cround($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 { +tc_63cd9d2d, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110110; @@ -2513,14 +2601,14 @@ def A4_ext : HInst< (outs), (ins u26_6Imm:$Ii), "immext(#$Ii)", -EXTENDER_tc_1_SLOT0123, TypeEXTENDER>, Enc_2082956 { +tc_9a13af9d, TypeEXTENDER>, Enc_2b518f { let Inst{31-28} = 0b0000; } def A4_modwrapu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = modwrap($Rs32,$Rt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 { +tc_47ab9233, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2532,7 +2620,7 @@ def A4_orn : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = or($Rt32,~$Rs32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 { +tc_548f402d, TypeALU32_3op>, Enc_bd6011 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110001101; @@ -2544,7 +2632,7 @@ def A4_ornp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = or($Rtt32,~$Rss32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 { +tc_9c18c9a5, TypeALU64>, Enc_ea23e4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010011111; @@ -2553,7 +2641,7 @@ def A4_paslhf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = aslh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000000; @@ -2567,7 +2655,7 @@ def A4_paslhfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = aslh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000000; @@ -2582,7 +2670,7 @@ def A4_paslht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = aslh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000000; @@ -2595,7 +2683,7 @@ def A4_paslhtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = aslh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000000; @@ -2609,7 +2697,7 @@ def A4_pasrhf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = asrh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000001; @@ -2623,7 +2711,7 @@ def A4_pasrhfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = asrh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000001; @@ -2638,7 +2726,7 @@ def A4_pasrht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = asrh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000001; @@ -2651,7 +2739,7 @@ def A4_pasrhtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = asrh($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000001; @@ -2665,7 +2753,7 @@ def A4_psxtbf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = sxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000101; @@ -2679,7 +2767,7 @@ def A4_psxtbfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = sxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000101; @@ -2694,7 +2782,7 @@ def A4_psxtbt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = sxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000101; @@ -2707,7 +2795,7 @@ def A4_psxtbtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = sxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000101; @@ -2721,7 +2809,7 @@ def A4_psxthf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = sxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000111; @@ -2735,7 +2823,7 @@ def A4_psxthfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = sxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000111; @@ -2750,7 +2838,7 @@ def A4_psxtht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = sxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000111; @@ -2763,7 +2851,7 @@ def A4_psxthtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = sxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000111; @@ -2777,7 +2865,7 @@ def A4_pzxtbf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = zxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000100; @@ -2791,7 +2879,7 @@ def A4_pzxtbfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = zxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000100; @@ -2806,7 +2894,7 @@ def A4_pzxtbt : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = zxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000100; @@ -2819,7 +2907,7 @@ def A4_pzxtbtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = zxtb($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000100; @@ -2833,7 +2921,7 @@ def A4_pzxthf : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) $Rd32 = zxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; let Inst{31-21} = 0b01110000110; @@ -2847,7 +2935,7 @@ def A4_pzxthfnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) $Rd32 = zxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1011; let Inst{31-21} = 0b01110000110; @@ -2862,7 +2950,7 @@ def A4_pzxtht : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) $Rd32 = zxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_548f402d, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b01110000110; @@ -2875,7 +2963,7 @@ def A4_pzxthtnew : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) $Rd32 = zxth($Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel { +tc_b08be45e, TypeALU32_2op>, Enc_fb6577, PredNewRel { let Inst{7-5} = 0b000; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b01110000110; @@ -2889,7 +2977,7 @@ def A4_rcmpeq : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmp.eq($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, ImmRegRel { +tc_548f402d, TypeALU32_3op>, Enc_5ab2be, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011010; @@ -2903,7 +2991,7 @@ def A4_rcmpeqi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = cmp.eq($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_16355964, ImmRegRel { +tc_548f402d, TypeALU32_2op>, Enc_b8c967, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011010; let hasNewValue = 1; @@ -2920,7 +3008,7 @@ def A4_rcmpneq : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = !cmp.eq($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, ImmRegRel { +tc_548f402d, TypeALU32_3op>, Enc_5ab2be, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110011011; @@ -2934,7 +3022,7 @@ def A4_rcmpneqi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = !cmp.eq($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_16355964, ImmRegRel { +tc_548f402d, TypeALU32_2op>, Enc_b8c967, ImmRegRel { let Inst{13-13} = 0b1; let Inst{31-21} = 0b01110011011; let hasNewValue = 1; @@ -2951,7 +3039,7 @@ def A4_round_ri : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = round($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 { +tc_63cd9d2d, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100111; @@ -2963,7 +3051,7 @@ def A4_round_ri_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = round($Rs32,#$Ii):sat", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 { +tc_63cd9d2d, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100111; @@ -2976,7 +3064,7 @@ def A4_round_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = round($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 { +tc_63cd9d2d, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110110; @@ -2988,7 +3076,7 @@ def A4_round_rr_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = round($Rs32,$Rt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 { +tc_63cd9d2d, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110110; @@ -3001,7 +3089,7 @@ def A4_subp_c : HInst< (outs DoubleRegs:$Rdd32, PredRegs:$Px4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in), "$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_151014 { +tc_a87879e8, TypeS_3op>, Enc_2b3f60 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010111; @@ -3012,7 +3100,7 @@ def A4_tfrcpp : HInst< (outs DoubleRegs:$Rdd32), (ins CtrRegs64:$Css32), "$Rdd32 = $Css32", -CR_tc_3x_SLOT3, TypeCR>, Enc_13094118 { +tc_3b4892c6, TypeCR>, Enc_667b39 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01101000000; } @@ -3020,7 +3108,7 @@ def A4_tfrpcp : HInst< (outs CtrRegs64:$Cdd32), (ins DoubleRegs:$Rss32), "$Cdd32 = $Rss32", -CR_tc_3x_SLOT3, TypeCR>, Enc_1329520 { +tc_82f0f122, TypeCR>, Enc_0ed752 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b01100011001; } @@ -3028,7 +3116,7 @@ def A4_tlbmatch : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Pd4 = tlbmatch($Rss32,$Rt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2492727 { +tc_e2c08bb4, TypeALU64>, Enc_03833b { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -3038,7 +3126,7 @@ def A4_vcmpbeq_any : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -3047,7 +3135,7 @@ def A4_vcmpbeqi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u8_0Imm:$Ii), "$Pd4 = vcmpb.eq($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 { +tc_5fa2857c, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100000; @@ -3056,7 +3144,7 @@ def A4_vcmpbgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = vcmpb.gt($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -3065,7 +3153,7 @@ def A4_vcmpbgti : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmpb.gt($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 { +tc_5fa2857c, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100001; @@ -3074,7 +3162,7 @@ def A4_vcmpbgtui : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u7_0Imm:$Ii), "$Pd4 = vcmpb.gtu($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 { +tc_5fa2857c, TypeALU64>, Enc_3680c2 { let Inst{4-2} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011100010; @@ -3083,7 +3171,7 @@ def A4_vcmpheqi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmph.eq($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 { +tc_5fa2857c, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100000; @@ -3092,7 +3180,7 @@ def A4_vcmphgti : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmph.gt($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 { +tc_5fa2857c, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100001; @@ -3101,7 +3189,7 @@ def A4_vcmphgtui : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u7_0Imm:$Ii), "$Pd4 = vcmph.gtu($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 { +tc_5fa2857c, TypeALU64>, Enc_3680c2 { let Inst{4-2} = 0b010; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011100010; @@ -3110,7 +3198,7 @@ def A4_vcmpweqi : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmpw.eq($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 { +tc_5fa2857c, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100000; @@ -3119,7 +3207,7 @@ def A4_vcmpwgti : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, s8_0Imm:$Ii), "$Pd4 = vcmpw.gt($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 { +tc_5fa2857c, TypeALU64>, Enc_0d8adb { let Inst{4-2} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11011100001; @@ -3128,7 +3216,7 @@ def A4_vcmpwgtui : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u7_0Imm:$Ii), "$Pd4 = vcmpw.gtu($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 { +tc_5fa2857c, TypeALU64>, Enc_3680c2 { let Inst{4-2} = 0b100; let Inst{13-12} = 0b00; let Inst{31-21} = 0b11011100010; @@ -3137,7 +3225,7 @@ def A4_vrmaxh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxh($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3148,7 +3236,7 @@ def A4_vrmaxuh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxuh($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3159,7 +3247,7 @@ def A4_vrmaxuw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxuw($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3170,7 +3258,7 @@ def A4_vrmaxw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrmaxw($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3181,7 +3269,7 @@ def A4_vrminh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminh($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3192,7 +3280,7 @@ def A4_vrminuh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminuh($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3203,7 +3291,7 @@ def A4_vrminuw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminuw($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -3214,7 +3302,7 @@ def A4_vrminw : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32), "$Rxx32 = vrminw($Rss32,$Ru32)", -S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 { +tc_2aaab1e0, TypeS_3op>, Enc_412ff0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011001; @@ -3225,7 +3313,7 @@ def A5_ACS : HInst< (outs DoubleRegs:$Rxx32, PredRegs:$Pe4), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)", -M_tc_3stall_SLOT23, TypeM>, Enc_12822813, Requires<[HasV55T]> { +tc_ae0722f7, TypeM>, Enc_831a7d, Requires<[HasV55T]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -3238,7 +3326,7 @@ def A5_vaddhubs : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vaddhub($Rss32,$Rtt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9277990, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_3op>, Enc_d2216a, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; @@ -3251,7 +3339,7 @@ def A6_vminub_RdP : HInst< (outs DoubleRegs:$Rdd32, PredRegs:$Pe4), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)", -M_tc_2_SLOT23, TypeM>, Enc_766909, Requires<[HasV62T]> { +tc_583510c7, TypeM>, Enc_d2c7f1, Requires<[HasV62T]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -3262,7 +3350,7 @@ def C2_all8 : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = all8($Ps4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 { +tc_81a23d44, TypeCR>, Enc_65d691 { let Inst{13-2} = 0b000000000000; let Inst{31-18} = 0b01101011101000; } @@ -3270,7 +3358,7 @@ def C2_and : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = and($Pt4,$Ps4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 { +tc_d63b71d1, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011000000; @@ -3279,7 +3367,7 @@ def C2_andn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = and($Pt4,!$Ps4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 { +tc_d63b71d1, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011011000; @@ -3288,7 +3376,7 @@ def C2_any8 : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = any8($Ps4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 { +tc_81a23d44, TypeCR>, Enc_65d691 { let Inst{13-2} = 0b000000000000; let Inst{31-18} = 0b01101011100000; } @@ -3296,7 +3384,7 @@ def C2_bitsclr : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = bitsclr($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 { +tc_c58f771a, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111100; @@ -3305,7 +3393,7 @@ def C2_bitsclri : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u6_0Imm:$Ii), "$Pd4 = bitsclr($Rs32,#$Ii)", -S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_14574598 { +tc_5fa2857c, TypeS_2op>, Enc_5d6c34 { let Inst{7-2} = 0b000000; let Inst{31-21} = 0b10000101100; } @@ -3313,7 +3401,7 @@ def C2_bitsset : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = bitsset($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 { +tc_c58f771a, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111010; @@ -3322,7 +3410,7 @@ def C2_ccombinewf : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111101000; @@ -3334,7 +3422,7 @@ def C2_ccombinewnewf : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111101000; @@ -3347,7 +3435,7 @@ def C2_ccombinewnewt : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel { +tc_28d296df, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11111101000; @@ -3359,7 +3447,7 @@ def C2_ccombinewt : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel { +tc_1b6011fb, TypeALU32_3op>, Enc_cb4b4e, PredNewRel { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11111101000; @@ -3370,7 +3458,7 @@ def C2_cmoveif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4) $Rd32 = #$Ii", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111101; @@ -3392,7 +3480,7 @@ def C2_cmoveit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4) $Rd32 = #$Ii", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel { +tc_548f402d, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b0; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111100; @@ -3413,7 +3501,7 @@ def C2_cmovenewif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4.new) $Rd32 = #$Ii", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel { +tc_b08be45e, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111101; @@ -3436,7 +3524,7 @@ def C2_cmovenewit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4.new) $Rd32 = #$Ii", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel { +tc_b08be45e, TypeALU32_2op>, Enc_cda00a, PredNewRel, ImmRegRel { let Inst{13-13} = 0b1; let Inst{20-20} = 0b0; let Inst{31-23} = 0b011111100; @@ -3458,7 +3546,7 @@ def C2_cmpeq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.eq($Rs32,$Rt32)", -ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel { +tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010000; @@ -3471,7 +3559,7 @@ def C2_cmpeqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmp.eq($Rs32,#$Ii)", -ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel { +tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b000; let Inst{31-22} = 0b0111010100; let CextOpcode = "C2_cmpeq"; @@ -3487,7 +3575,7 @@ def C2_cmpeqp : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = cmp.eq($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010100; @@ -3498,7 +3586,7 @@ def C2_cmpgei : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s8_0Imm:$Ii), "$Pd4 = cmp.ge($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> { +tc_9df8b0dc, TypeALU32_2op> { let isCompare = 1; let isPseudo = 1; } @@ -3506,7 +3594,7 @@ def C2_cmpgeui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u8_0Imm:$Ii), "$Pd4 = cmp.geu($Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> { +tc_9df8b0dc, TypeALU32_2op> { let isCompare = 1; let isPseudo = 1; } @@ -3514,7 +3602,7 @@ def C2_cmpgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.gt($Rs32,$Rt32)", -ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel { +tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010010; @@ -3526,7 +3614,7 @@ def C2_cmpgti : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmp.gt($Rs32,#$Ii)", -ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel { +tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b000; let Inst{31-22} = 0b0111010101; let CextOpcode = "C2_cmpgt"; @@ -3542,7 +3630,7 @@ def C2_cmpgtp : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = cmp.gt($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010100; @@ -3552,7 +3640,7 @@ def C2_cmpgtu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.gtu($Rs32,$Rt32)", -ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel { +tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010011; @@ -3564,7 +3652,7 @@ def C2_cmpgtui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = cmp.gtu($Rs32,#$Ii)", -ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_13249928, ImmRegRel { +tc_9df8b0dc, TypeALU32_2op>, Enc_c0cdde, ImmRegRel { let Inst{4-2} = 0b000; let Inst{31-21} = 0b01110101100; let CextOpcode = "C2_cmpgtu"; @@ -3580,7 +3668,7 @@ def C2_cmpgtup : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = cmp.gtu($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 { +tc_c58f771a, TypeALU64>, Enc_fcf7a7 { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010100; @@ -3590,7 +3678,7 @@ def C2_cmplt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.lt($Rs32,$Rt32)", -PSEUDO, TypeALU32_3op> { +tc_9df8b0dc, TypeALU32_3op> { let isCompare = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -3599,7 +3687,7 @@ def C2_cmpltu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = cmp.ltu($Rs32,$Rt32)", -PSEUDO, TypeALU32_3op> { +tc_9df8b0dc, TypeALU32_3op> { let isCompare = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -3608,7 +3696,7 @@ def C2_mask : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4), "$Rdd32 = mask($Pt4)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_10328975 { +tc_b86c7e8b, TypeS_2op>, Enc_78e566 { let Inst{7-5} = 0b000; let Inst{13-10} = 0b0000; let Inst{31-16} = 0b1000011000000000; @@ -3617,7 +3705,7 @@ def C2_mux : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mux($Pu4,$Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139 { +tc_1b6011fb, TypeALU32_3op>, Enc_ea4c54 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110100000; @@ -3629,7 +3717,7 @@ def C2_muxii : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II), "$Rd32 = mux($Pu4,#$Ii,#$II)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9093094 { +tc_1b6011fb, TypeALU32_2op>, Enc_830e5d { let Inst{31-25} = 0b0111101; let hasNewValue = 1; let opNewValue = 0; @@ -3643,7 +3731,7 @@ def C2_muxir : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = mux($Pu4,$Rs32,#$Ii)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534 { +tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011100110; let hasNewValue = 1; @@ -3659,7 +3747,7 @@ def C2_muxri : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32), "$Rd32 = mux($Pu4,#$Ii,$Rs32)", -ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534 { +tc_1b6011fb, TypeALU32_2op>, Enc_e38e1f { let Inst{13-13} = 0b0; let Inst{31-23} = 0b011100111; let hasNewValue = 1; @@ -3675,7 +3763,7 @@ def C2_not : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = not($Ps4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 { +tc_81a23d44, TypeCR>, Enc_65d691 { let Inst{13-2} = 0b000000000000; let Inst{31-18} = 0b01101011110000; } @@ -3683,7 +3771,7 @@ def C2_or : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = or($Pt4,$Ps4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 { +tc_d63b71d1, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011001000; @@ -3692,7 +3780,7 @@ def C2_orn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Pt4, PredRegs:$Ps4), "$Pd4 = or($Pt4,!$Ps4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 { +tc_d63b71d1, TypeCR>, Enc_454a26 { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011111000; @@ -3701,7 +3789,7 @@ def C2_pxfer_map : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4), "$Pd4 = $Ps4", -S_2op_tc_1_SLOT23, TypeMAPPING> { +tc_d63b71d1, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -3709,7 +3797,7 @@ def C2_tfrpr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Ps4), "$Rd32 = $Ps4", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_11139981 { +tc_b86c7e8b, TypeS_2op>, Enc_f5e933 { let Inst{13-5} = 0b000000000; let Inst{31-18} = 0b10001001010000; let hasNewValue = 1; @@ -3719,7 +3807,7 @@ def C2_tfrrp : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32), "$Pd4 = $Rs32", -S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_4527648 { +tc_47f0b7ad, TypeS_2op>, Enc_48b75f { let Inst{13-2} = 0b000000000000; let Inst{31-21} = 0b10000101010; } @@ -3727,18 +3815,19 @@ def C2_vitpack : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Rd32 = vitpack($Ps4,$Pt4)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_6735062 { +tc_7ca2ea10, TypeS_2op>, Enc_527412 { let Inst{7-5} = 0b000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b10001001000000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def C2_vmux : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_7606379 { +tc_d1b5a4b6, TypeALU64>, Enc_329361 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010001000; @@ -3747,7 +3836,7 @@ def C2_xor : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Pd4 = xor($Ps4,$Pt4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 { +tc_d63b71d1, TypeCR>, Enc_284ebb { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011010000; @@ -3756,7 +3845,7 @@ def C4_addipc : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = add(pc,#$Ii)", -CR_tc_2_SLOT3, TypeCR>, Enc_9554661 { +tc_1fe8323c, TypeCR>, Enc_607661 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0110101001001001; @@ -3772,7 +3861,7 @@ def C4_and_and : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,and($Pt4,$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011000100; @@ -3781,7 +3870,7 @@ def C4_and_andn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,and($Pt4,!$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011100100; @@ -3790,7 +3879,7 @@ def C4_and_or : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,or($Pt4,$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011001100; @@ -3799,7 +3888,7 @@ def C4_and_orn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = and($Ps4,or($Pt4,!$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011101100; @@ -3808,7 +3897,7 @@ def C4_cmplte : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !cmp.gt($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel { +tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010010; @@ -3820,7 +3909,7 @@ def C4_cmpltei : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = !cmp.gt($Rs32,#$Ii)", -ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel { +tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b100; let Inst{31-22} = 0b0111010101; let CextOpcode = "C4_cmplte"; @@ -3836,7 +3925,7 @@ def C4_cmplteu : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !cmp.gtu($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel { +tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010011; @@ -3848,7 +3937,7 @@ def C4_cmplteui : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Pd4 = !cmp.gtu($Rs32,#$Ii)", -ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_13249928, ImmRegRel { +tc_9df8b0dc, TypeALU32_2op>, Enc_c0cdde, ImmRegRel { let Inst{4-2} = 0b100; let Inst{31-21} = 0b01110101100; let CextOpcode = "C4_cmplteu"; @@ -3864,7 +3953,7 @@ def C4_cmpneq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !cmp.eq($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel { +tc_5fe9fcd0, TypeALU32_3op>, Enc_c2b48e, ImmRegRel { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110010000; @@ -3877,7 +3966,7 @@ def C4_cmpneqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = !cmp.eq($Rs32,#$Ii)", -ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel { +tc_9df8b0dc, TypeALU32_2op>, Enc_bd0b33, ImmRegRel { let Inst{4-2} = 0b100; let Inst{31-22} = 0b0111010100; let CextOpcode = "C4_cmpneq"; @@ -3893,7 +3982,7 @@ def C4_fastcorner9 : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Pd4 = fastcorner9($Ps4,$Pt4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 { +tc_d63b71d1, TypeCR>, Enc_284ebb { let Inst{7-2} = 0b100100; let Inst{13-10} = 0b1000; let Inst{31-18} = 0b01101011000000; @@ -3902,7 +3991,7 @@ def C4_fastcorner9_not : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4), "$Pd4 = !fastcorner9($Ps4,$Pt4)", -CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 { +tc_d63b71d1, TypeCR>, Enc_284ebb { let Inst{7-2} = 0b100100; let Inst{13-10} = 0b1000; let Inst{31-18} = 0b01101011000100; @@ -3911,7 +4000,7 @@ def C4_nbitsclr : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !bitsclr($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 { +tc_c58f771a, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111101; @@ -3920,7 +4009,7 @@ def C4_nbitsclri : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u6_0Imm:$Ii), "$Pd4 = !bitsclr($Rs32,#$Ii)", -S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_14574598 { +tc_5fa2857c, TypeS_2op>, Enc_5d6c34 { let Inst{7-2} = 0b000000; let Inst{31-21} = 0b10000101101; } @@ -3928,7 +4017,7 @@ def C4_nbitsset : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !bitsset($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 { +tc_c58f771a, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111011; @@ -3937,7 +4026,7 @@ def C4_or_and : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,and($Pt4,$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011010100; @@ -3946,7 +4035,7 @@ def C4_or_andn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,and($Pt4,!$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011110100; @@ -3955,7 +4044,7 @@ def C4_or_or : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,or($Pt4,$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011011100; @@ -3964,7 +4053,7 @@ def C4_or_orn : HInst< (outs PredRegs:$Pd4), (ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4), "$Pd4 = or($Ps4,or($Pt4,!$Pu4))", -CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 { +tc_43068634, TypeCR>, Enc_9ac432 { let Inst{5-2} = 0b0000; let Inst{13-10} = 0b0000; let Inst{31-18} = 0b01101011111100; @@ -3973,319 +4062,293 @@ def F2_conv_d2df : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_d2df($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10000000111; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_d2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_d2sf($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2d : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2d($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000000111; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2d_chop : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2d($Rss32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000111; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2sf($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2ud : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2ud($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10000000111; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2ud_chop : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2ud($Rss32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000111; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2uw : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2uw($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2uw_chop : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2uw($Rss32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000101; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2w : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2w($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_df2w_chop : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2w($Rss32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000111; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2d : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2d($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100100; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2d_chop : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2d($Rs32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000100100; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2df($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000100100; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2ud : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2ud($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10000100100; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2ud_chop : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2ud($Rs32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000100100; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2uw : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2uw($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011011; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2uw_chop : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2uw($Rs32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001011011; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2w : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2w($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011100; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_sf2w_chop : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2w($Rs32):chop", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001011100; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_ud2df : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_ud2df($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000000111; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_ud2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_ud2sf($Rss32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000001; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_uw2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_uw2df($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10000100100; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_uw2sf : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_uw2sf($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011001; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_w2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_w2df($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000100100; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_conv_w2sf : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_w2sf($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011010; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def F2_dfclass : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Pd4 = dfclass($Rss32,#$Ii)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_14400220, Requires<[HasV5T]> { +tc_5fa2857c, TypeALU64>, Enc_1f19b5, Requires<[HasV5T]> { let Inst{4-2} = 0b100; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b11011100100; @@ -4296,7 +4359,7 @@ def F2_dfcmpeq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.eq($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> { +tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4308,7 +4371,7 @@ def F2_dfcmpge : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.ge($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> { +tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4320,7 +4383,7 @@ def F2_dfcmpgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.gt($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> { +tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4332,7 +4395,7 @@ def F2_dfcmpuo : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.uo($Rss32,$Rtt32)", -ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> { +tc_c58f771a, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4344,7 +4407,7 @@ def F2_dfimm_n : HInst< (outs DoubleRegs:$Rdd32), (ins u10_0Imm:$Ii), "$Rdd32 = dfmake(#$Ii):neg", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_2702036, Requires<[HasV5T]> { +tc_485bb57c, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101100101; let prefersSlot3 = 1; @@ -4353,7 +4416,7 @@ def F2_dfimm_p : HInst< (outs DoubleRegs:$Rdd32), (ins u10_0Imm:$Ii), "$Rdd32 = dfmake(#$Ii):pos", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_2702036, Requires<[HasV5T]> { +tc_485bb57c, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101100100; let prefersSlot3 = 1; @@ -4362,14 +4425,13 @@ def F2_sfadd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfadd($Rs32,$Rt32)", -M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> { +tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011000; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; let isCommutable = 1; } @@ -4377,7 +4439,7 @@ def F2_sfclass : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Pd4 = sfclass($Rs32,#$Ii)", -S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742, Requires<[HasV5T]> { +tc_5fa2857c, TypeS_2op>, Enc_83ee64, Requires<[HasV5T]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000101111; @@ -4388,7 +4450,7 @@ def F2_sfcmpeq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.eq($Rs32,$Rt32)", -ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4400,7 +4462,7 @@ def F2_sfcmpge : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.ge($Rs32,$Rt32)", -ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4412,7 +4474,7 @@ def F2_sfcmpgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.gt($Rs32,$Rt32)", -ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4424,7 +4486,7 @@ def F2_sfcmpuo : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.uo($Rs32,$Rt32)", -ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> { +tc_c58f771a, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4436,52 +4498,48 @@ def F2_sffixupd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sffixupd($Rs32,$Rt32)", -M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> { +tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011110; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; } def F2_sffixupn : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sffixupn($Rs32,$Rt32)", -M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> { +tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011110; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; } def F2_sffixupr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sffixupr($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> { +tc_e836c161, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011101; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; } def F2_sffma : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += sfmpy($Rs32,$Rt32)", -M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> { +tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; let Constraints = "$Rx32 = $Rx32in"; } @@ -4489,14 +4547,13 @@ def F2_sffma_lib : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += sfmpy($Rs32,$Rt32):lib", -M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> { +tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; let Constraints = "$Rx32 = $Rx32in"; } @@ -4504,14 +4561,13 @@ def F2_sffma_sc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4), "$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale", -M_tc_3or4x_acc_SLOT23, TypeM>, Enc_15194851, Requires<[HasV5T]> { +tc_2e55aa16, TypeM>, Enc_437f33, Requires<[HasV5T]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111011; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; let Constraints = "$Rx32 = $Rx32in"; } @@ -4519,14 +4575,13 @@ def F2_sffms : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= sfmpy($Rs32,$Rt32)", -M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> { +tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; let Constraints = "$Rx32 = $Rx32in"; } @@ -4534,14 +4589,13 @@ def F2_sffms_lib : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= sfmpy($Rs32,$Rt32):lib", -M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> { +tc_2d1e6f5c, TypeM>, Enc_2ae154, Requires<[HasV5T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; let Constraints = "$Rx32 = $Rx32in"; } @@ -4549,7 +4603,7 @@ def F2_sfimm_n : HInst< (outs IntRegs:$Rd32), (ins u10_0Imm:$Ii), "$Rd32 = sfmake(#$Ii):neg", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_9082775, Requires<[HasV5T]> { +tc_485bb57c, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101011001; let hasNewValue = 1; @@ -4560,7 +4614,7 @@ def F2_sfimm_p : HInst< (outs IntRegs:$Rd32), (ins u10_0Imm:$Ii), "$Rd32 = sfmake(#$Ii):pos", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_9082775, Requires<[HasV5T]> { +tc_485bb57c, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101011000; let hasNewValue = 1; @@ -4571,20 +4625,19 @@ def F2_sfinvsqrta : HInst< (outs IntRegs:$Rd32, PredRegs:$Pe4), (ins IntRegs:$Rs32), "$Rd32,$Pe4 = sfinvsqrta($Rs32)", -S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_5718302, Requires<[HasV5T]> { +tc_f1aa2cdb, TypeS_2op>, Enc_890909, Requires<[HasV5T]> { let Inst{13-7} = 0b0000000; let Inst{31-21} = 0b10001011111; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; let isPredicateLate = 1; -let prefersSlot3 = 1; } def F2_sfmax : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmax($Rs32,$Rt32)", -M_tc_2_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> { +tc_f1240c08, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011100; @@ -4598,7 +4651,7 @@ def F2_sfmin : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmin($Rs32,$Rt32)", -M_tc_2_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> { +tc_f1240c08, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011100; @@ -4612,14 +4665,13 @@ def F2_sfmpy : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmpy($Rs32,$Rt32)", -M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> { +tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011010; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; let isCommutable = 1; } @@ -4627,7 +4679,7 @@ def F2_sfrecipa : HInst< (outs IntRegs:$Rd32, PredRegs:$Pe4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)", -M_tc_3or4x_SLOT23, TypeM>, Enc_5853469, Requires<[HasV5T]> { +tc_09c86199, TypeM>, Enc_a94f3b, Requires<[HasV5T]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011111; @@ -4635,27 +4687,25 @@ let hasNewValue = 1; let opNewValue = 0; let isFP = 1; let isPredicateLate = 1; -let prefersSlot3 = 1; } def F2_sfsub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfsub($Rs32,$Rt32)", -M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> { +tc_3bea1824, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011000; let hasNewValue = 1; let opNewValue = 0; let isFP = 1; -let prefersSlot3 = 1; let Uses = [USR]; } def J2_call : HInst< (outs), (ins a30_2Imm:$Ii), "call $Ii", -J_tc_2early_SLOT23, TypeJ>, Enc_13453446, PredRel { +tc_639d93ee, TypeJ>, Enc_81ac1d, PredRel { let Inst{0-0} = 0b0; let Inst{31-25} = 0b0101101; let isCall = 1; @@ -4675,7 +4725,7 @@ def J2_callf : HInst< (outs), (ins PredRegs:$Pu4, a30_2Imm:$Ii), "if (!$Pu4) call $Ii", -J_tc_2early_SLOT23, TypeJ>, Enc_14868535, PredRel { +tc_0767081f, TypeJ>, Enc_daea09, PredRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b1; @@ -4699,7 +4749,7 @@ def J2_callr : HInst< (outs), (ins IntRegs:$Rs32), "callr $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_11704059 { +tc_ecfaae86, TypeJ>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010000101; let cofMax1 = 1; @@ -4713,7 +4763,7 @@ def J2_callrf : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) callr $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953 { +tc_84630363, TypeJ>, Enc_88d4d9 { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010001001; @@ -4731,7 +4781,7 @@ def J2_callrt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) callr $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953 { +tc_84630363, TypeJ>, Enc_88d4d9 { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010001000; @@ -4748,7 +4798,7 @@ def J2_callt : HInst< (outs), (ins PredRegs:$Pu4, a30_2Imm:$Ii), "if ($Pu4) call $Ii", -J_tc_2early_SLOT23, TypeJ>, Enc_14868535, PredRel { +tc_0767081f, TypeJ>, Enc_daea09, PredRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b0; @@ -4771,16 +4821,18 @@ def J2_endloop0 : HInst< (outs), (ins), "endloop0", -PSEUDO, TypeJ> { +tc_aad55963, TypeJ> { let Uses = [LC0, SA0]; let Defs = [LC0, P3, PC, USR]; +let isBranch = 1; +let isTerminator = 1; let isPseudo = 1; } def J2_endloop01 : HInst< (outs), (ins), "endloop01", -PSEUDO, TypeJ> { +tc_aad55963, TypeJ> { let Uses = [LC0, LC1, SA0, SA1]; let Defs = [LC0, LC1, P3, PC, USR]; let isPseudo = 1; @@ -4789,16 +4841,18 @@ def J2_endloop1 : HInst< (outs), (ins), "endloop1", -PSEUDO, TypeJ> { +tc_aad55963, TypeJ> { let Uses = [LC1, SA1]; let Defs = [LC1, PC]; +let isBranch = 1; +let isTerminator = 1; let isPseudo = 1; } def J2_jump : HInst< (outs), (ins b30_2Imm:$Ii), "jump $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_13453446, PredNewRel { +tc_a333d2a9, TypeJ>, Enc_81ac1d, PredNewRel { let Inst{0-0} = 0b0; let Inst{31-25} = 0b0101100; let isTerminator = 1; @@ -4818,7 +4872,7 @@ def J2_jumpf : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4) jump:nt $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel { +tc_1b834fe7, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b1; @@ -4841,7 +4895,7 @@ def J2_jumpf_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, b15_2Imm:$Ii), "if (!$Pu4) jump $Ii", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_1b834fe7, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -4849,7 +4903,7 @@ def J2_jumpfnew : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4.new) jump:nt $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel { +tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b010; let Inst{21-21} = 0b1; @@ -4873,7 +4927,7 @@ def J2_jumpfnewpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4.new) jump:t $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel { +tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b110; let Inst{21-21} = 0b1; @@ -4897,7 +4951,7 @@ def J2_jumpfpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4) jump:t $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, Requires<[HasV60T]>, PredNewRel { +tc_b5bfaa60, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b100; let Inst{21-21} = 0b1; @@ -4920,7 +4974,7 @@ def J2_jumpr : HInst< (outs), (ins IntRegs:$Rs32), "jumpr $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_11704059, PredNewRel { +tc_b08b653e, TypeJ>, Enc_ecbcc8, PredNewRel { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010010100; let isTerminator = 1; @@ -4937,7 +4991,7 @@ def J2_jumprf : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr:nt $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel { +tc_07ac815d, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010011011; @@ -4956,7 +5010,7 @@ def J2_jumprf_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr $Rs32", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_07ac815d, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -4964,7 +5018,7 @@ def J2_jumprfnew : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) jumpr:nt $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel { +tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0010; let Inst{31-21} = 0b01010011011; @@ -4984,7 +5038,7 @@ def J2_jumprfnewpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4.new) jumpr:t $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel { +tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0110; let Inst{31-21} = 0b01010011011; @@ -5004,7 +5058,7 @@ def J2_jumprfpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr:t $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, Requires<[HasV60T]>, PredNewRel { +tc_a1fb80e1, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0100; let Inst{31-21} = 0b01010011011; @@ -5023,7 +5077,7 @@ def J2_jumprgtez : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32>=#0) jump:nt $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000101; @@ -5038,7 +5092,7 @@ def J2_jumprgtezpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32>=#0) jump:t $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000101; @@ -5053,7 +5107,7 @@ def J2_jumprltez : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32<=#0) jump:nt $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000111; @@ -5068,7 +5122,7 @@ def J2_jumprltezpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32<=#0) jump:t $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000111; @@ -5083,7 +5137,7 @@ def J2_jumprnz : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32==#0) jump:nt $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000110; @@ -5098,7 +5152,7 @@ def J2_jumprnzpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32==#0) jump:t $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000110; @@ -5113,7 +5167,7 @@ def J2_jumprt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr:nt $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel { +tc_07ac815d, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b01010011010; @@ -5131,7 +5185,7 @@ def J2_jumprt_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr $Rs32", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_07ac815d, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5139,7 +5193,7 @@ def J2_jumprtnew : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) jumpr:nt $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel { +tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0010; let Inst{31-21} = 0b01010011010; @@ -5158,7 +5212,7 @@ def J2_jumprtnewpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4.new) jumpr:t $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel { +tc_1f9668cc, TypeJ>, Enc_88d4d9, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0110; let Inst{31-21} = 0b01010011010; @@ -5177,7 +5231,7 @@ def J2_jumprtpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr:t $Rs32", -J_tc_2early_SLOT2, TypeJ>, Enc_1928953, Requires<[HasV60T]>, PredNewRel { +tc_a1fb80e1, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0100; let Inst{31-21} = 0b01010011010; @@ -5195,7 +5249,7 @@ def J2_jumprz : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32!=#0) jump:nt $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b0; let Inst{31-22} = 0b0110000100; @@ -5210,7 +5264,7 @@ def J2_jumprzpt : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), "if ($Rs32!=#0) jump:t $Ii", -CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 { +tc_b324366f, TypeCR>, Enc_0fa531 { let Inst{0-0} = 0b0; let Inst{12-12} = 0b1; let Inst{31-22} = 0b0110000100; @@ -5225,7 +5279,7 @@ def J2_jumpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4) jump:nt $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel { +tc_1b834fe7, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b000; let Inst{21-21} = 0b0; @@ -5247,7 +5301,7 @@ def J2_jumpt_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, b15_2Imm:$Ii), "if ($Pu4) jump $Ii", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_1b834fe7, TypeMAPPING>, Requires<[HasV60T]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5255,7 +5309,7 @@ def J2_jumptnew : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4.new) jump:nt $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel { +tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b010; let Inst{21-21} = 0b0; @@ -5278,7 +5332,7 @@ def J2_jumptnewpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4.new) jump:t $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel { +tc_537e2013, TypeJ>, Enc_daea09, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b110; let Inst{21-21} = 0b0; @@ -5301,7 +5355,7 @@ def J2_jumptpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4) jump:t $Ii", -J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, Requires<[HasV60T]>, PredNewRel { +tc_b5bfaa60, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b100; let Inst{21-21} = 0b0; @@ -5323,7 +5377,7 @@ def J2_loop0i : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "loop0($Ii,#$II)", -CR_tc_3x_SLOT3, TypeCR>, Enc_9939385 { +tc_1000eb10, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001000; @@ -5338,7 +5392,7 @@ def J2_loop0r : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "loop0($Ii,$Rs32)", -CR_tc_3x_SLOT3, TypeCR>, Enc_5790679 { +tc_f055fbb6, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5354,7 +5408,7 @@ def J2_loop1i : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "loop1($Ii,#$II)", -CR_tc_3x_SLOT3, TypeCR>, Enc_9939385 { +tc_1000eb10, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001001; @@ -5369,7 +5423,7 @@ def J2_loop1r : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "loop1($Ii,$Rs32)", -CR_tc_3x_SLOT3, TypeCR>, Enc_5790679 { +tc_f055fbb6, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5385,7 +5439,7 @@ def J2_pause : HInst< (outs), (ins u8_0Imm:$Ii), "pause(#$Ii)", -J_tc_2early_SLOT2, TypeJ>, Enc_8732960 { +tc_b189ad4c, TypeJ>, Enc_a51a9a { let Inst{1-0} = 0b00; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5396,7 +5450,7 @@ def J2_ploop1si : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "p3 = sp1loop0($Ii,#$II)", -CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 { +tc_feb4974b, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001101; @@ -5412,7 +5466,7 @@ def J2_ploop1sr : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "p3 = sp1loop0($Ii,$Rs32)", -CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 { +tc_d6a805a8, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5429,7 +5483,7 @@ def J2_ploop2si : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "p3 = sp2loop0($Ii,#$II)", -CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 { +tc_feb4974b, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001110; @@ -5445,7 +5499,7 @@ def J2_ploop2sr : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "p3 = sp2loop0($Ii,$Rs32)", -CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 { +tc_d6a805a8, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5462,7 +5516,7 @@ def J2_ploop3si : HInst< (outs), (ins b30_2Imm:$Ii, u10_0Imm:$II), "p3 = sp3loop0($Ii,#$II)", -CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 { +tc_feb4974b, TypeCR>, Enc_4dc228 { let Inst{2-2} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b01101001111; @@ -5478,7 +5532,7 @@ def J2_ploop3sr : HInst< (outs), (ins b30_2Imm:$Ii, IntRegs:$Rs32), "p3 = sp3loop0($Ii,$Rs32)", -CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 { +tc_d6a805a8, TypeCR>, Enc_864a5a { let Inst{2-0} = 0b000; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5495,7 +5549,7 @@ def J2_trap0 : HInst< (outs), (ins u8_0Imm:$Ii), "trap0(#$Ii)", -J_tc_2early_SLOT2, TypeJ>, Enc_8732960 { +tc_cbe45117, TypeJ>, Enc_a51a9a { let Inst{1-0} = 0b00; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; @@ -5506,7 +5560,7 @@ def J4_cmpeq_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5531,7 +5585,7 @@ def J4_cmpeq_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -5556,7 +5610,7 @@ def J4_cmpeq_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010001; @@ -5579,7 +5633,7 @@ def J4_cmpeq_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010001; @@ -5602,7 +5656,7 @@ def J4_cmpeq_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010001; @@ -5625,7 +5679,7 @@ def J4_cmpeq_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010001; @@ -5648,7 +5702,7 @@ def J4_cmpeq_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5672,7 +5726,7 @@ def J4_cmpeq_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -5696,7 +5750,7 @@ def J4_cmpeq_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010000; @@ -5718,7 +5772,7 @@ def J4_cmpeq_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010000; @@ -5740,7 +5794,7 @@ def J4_cmpeq_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010000; @@ -5762,7 +5816,7 @@ def J4_cmpeq_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010000; @@ -5784,7 +5838,7 @@ def J4_cmpeqi_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5809,7 +5863,7 @@ def J4_cmpeqi_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -5834,7 +5888,7 @@ def J4_cmpeqi_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000001; @@ -5857,7 +5911,7 @@ def J4_cmpeqi_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000001; @@ -5880,7 +5934,7 @@ def J4_cmpeqi_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001001; @@ -5903,7 +5957,7 @@ def J4_cmpeqi_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001001; @@ -5926,7 +5980,7 @@ def J4_cmpeqi_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -5950,7 +6004,7 @@ def J4_cmpeqi_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$II)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -5974,7 +6028,7 @@ def J4_cmpeqi_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000000; @@ -5996,7 +6050,7 @@ def J4_cmpeqi_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000000; @@ -6018,7 +6072,7 @@ def J4_cmpeqi_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001000; @@ -6040,7 +6094,7 @@ def J4_cmpeqi_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001000; @@ -6062,7 +6116,7 @@ def J4_cmpeqn1_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4359901, PredRel { +tc_09faec3b, TypeNCJ>, Enc_e90a15, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -6087,7 +6141,7 @@ def J4_cmpeqn1_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8612939, PredRel { +tc_09faec3b, TypeNCJ>, Enc_5a18b3, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -6112,7 +6166,7 @@ def J4_cmpeqn1_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_844699, PredRel { +tc_d108a090, TypeCJ>, Enc_1de724, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001000111; @@ -6135,7 +6189,7 @@ def J4_cmpeqn1_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5338033, PredRel { +tc_d108a090, TypeCJ>, Enc_14640c, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001000111; @@ -6158,7 +6212,7 @@ def J4_cmpeqn1_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14150875, PredRel { +tc_d108a090, TypeCJ>, Enc_668704, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001001111; @@ -6181,7 +6235,7 @@ def J4_cmpeqn1_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_15450971, PredRel { +tc_d108a090, TypeCJ>, Enc_800e04, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001001111; @@ -6204,7 +6258,7 @@ def J4_cmpeqn1_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_14998517, PredRel { +tc_09faec3b, TypeNCJ>, Enc_4aca3a, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -6228,7 +6282,7 @@ def J4_cmpeqn1_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_11544269, PredRel { +tc_09faec3b, TypeNCJ>, Enc_f7ea77, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -6252,7 +6306,7 @@ def J4_cmpeqn1_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5401217, PredRel { +tc_d108a090, TypeCJ>, Enc_405228, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001000110; @@ -6274,7 +6328,7 @@ def J4_cmpeqn1_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12419313, PredRel { +tc_d108a090, TypeCJ>, Enc_3a2484, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001000110; @@ -6296,7 +6350,7 @@ def J4_cmpeqn1_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_4684887, PredRel { +tc_d108a090, TypeCJ>, Enc_736575, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{31-22} = 0b0001001110; @@ -6318,7 +6372,7 @@ def J4_cmpeqn1_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_220949, PredRel { +tc_d108a090, TypeCJ>, Enc_8e583a, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{31-22} = 0b0001001110; @@ -6340,7 +6394,7 @@ def J4_cmpgt_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6365,7 +6419,7 @@ def J4_cmpgt_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6390,7 +6444,7 @@ def J4_cmpgt_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010011; @@ -6413,7 +6467,7 @@ def J4_cmpgt_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010011; @@ -6436,7 +6490,7 @@ def J4_cmpgt_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010011; @@ -6459,7 +6513,7 @@ def J4_cmpgt_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010011; @@ -6482,7 +6536,7 @@ def J4_cmpgt_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6506,7 +6560,7 @@ def J4_cmpgt_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6530,7 +6584,7 @@ def J4_cmpgt_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010010; @@ -6552,7 +6606,7 @@ def J4_cmpgt_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010010; @@ -6574,7 +6628,7 @@ def J4_cmpgt_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010010; @@ -6596,7 +6650,7 @@ def J4_cmpgt_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010010; @@ -6618,7 +6672,7 @@ def J4_cmpgti_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6643,7 +6697,7 @@ def J4_cmpgti_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6668,7 +6722,7 @@ def J4_cmpgti_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000011; @@ -6691,7 +6745,7 @@ def J4_cmpgti_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000011; @@ -6714,7 +6768,7 @@ def J4_cmpgti_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001011; @@ -6737,7 +6791,7 @@ def J4_cmpgti_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001011; @@ -6760,7 +6814,7 @@ def J4_cmpgti_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -6784,7 +6838,7 @@ def J4_cmpgti_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$II)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -6808,7 +6862,7 @@ def J4_cmpgti_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000010; @@ -6830,7 +6884,7 @@ def J4_cmpgti_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000010; @@ -6852,7 +6906,7 @@ def J4_cmpgti_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001010; @@ -6874,7 +6928,7 @@ def J4_cmpgti_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001010; @@ -6896,7 +6950,7 @@ def J4_cmpgtn1_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8674673, PredRel { +tc_09faec3b, TypeNCJ>, Enc_3694bd, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -6921,7 +6975,7 @@ def J4_cmpgtn1_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15763937, PredRel { +tc_09faec3b, TypeNCJ>, Enc_a6853f, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -6946,7 +7000,7 @@ def J4_cmpgtn1_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5915771, PredRel { +tc_d108a090, TypeCJ>, Enc_a42857, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001000111; @@ -6969,7 +7023,7 @@ def J4_cmpgtn1_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7315939, PredRel { +tc_d108a090, TypeCJ>, Enc_f6fe0b, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001000111; @@ -6992,7 +7046,7 @@ def J4_cmpgtn1_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7785569, PredRel { +tc_d108a090, TypeCJ>, Enc_3e3989, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001001111; @@ -7015,7 +7069,7 @@ def J4_cmpgtn1_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_10968391, PredRel { +tc_d108a090, TypeCJ>, Enc_b909d2, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001001111; @@ -7038,7 +7092,7 @@ def J4_cmpgtn1_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_364753, PredRel { +tc_09faec3b, TypeNCJ>, Enc_f82302, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -7062,7 +7116,7 @@ def J4_cmpgtn1_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii), "if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8479583, PredRel { +tc_09faec3b, TypeNCJ>, Enc_6413b6, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -7086,7 +7140,7 @@ def J4_cmpgtn1_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_2428539, PredRel { +tc_d108a090, TypeCJ>, Enc_b78edd, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001000110; @@ -7108,7 +7162,7 @@ def J4_cmpgtn1_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_8919369, PredRel { +tc_d108a090, TypeCJ>, Enc_041d7b, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001000110; @@ -7130,7 +7184,7 @@ def J4_cmpgtn1_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_8577055, PredRel { +tc_d108a090, TypeCJ>, Enc_b1e1fb, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000001; let Inst{31-22} = 0b0001001110; @@ -7152,7 +7206,7 @@ def J4_cmpgtn1_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii), "p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14530015, PredRel { +tc_d108a090, TypeCJ>, Enc_178717, PredRel { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100001; let Inst{31-22} = 0b0001001110; @@ -7174,7 +7228,7 @@ def J4_cmpgtu_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7199,7 +7253,7 @@ def J4_cmpgtu_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7224,7 +7278,7 @@ def J4_cmpgtu_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010101; @@ -7247,7 +7301,7 @@ def J4_cmpgtu_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010101; @@ -7270,7 +7324,7 @@ def J4_cmpgtu_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010101; @@ -7293,7 +7347,7 @@ def J4_cmpgtu_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010101; @@ -7316,7 +7370,7 @@ def J4_cmpgtu_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7340,7 +7394,7 @@ def J4_cmpgtu_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel { +tc_580a779c, TypeNCJ>, Enc_c9a18e, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7364,7 +7418,7 @@ def J4_cmpgtu_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001010100; @@ -7386,7 +7440,7 @@ def J4_cmpgtu_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b10; let Inst{31-22} = 0b0001010100; @@ -7408,7 +7462,7 @@ def J4_cmpgtu_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b01; let Inst{31-22} = 0b0001010100; @@ -7430,7 +7484,7 @@ def J4_cmpgtu_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel { +tc_92d1833c, TypeCJ>, Enc_6a5972, PredRel { let Inst{0-0} = 0b0; let Inst{13-12} = 0b11; let Inst{31-22} = 0b0001010100; @@ -7452,7 +7506,7 @@ def J4_cmpgtui_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7477,7 +7531,7 @@ def J4_cmpgtui_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7502,7 +7556,7 @@ def J4_cmpgtui_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000101; @@ -7525,7 +7579,7 @@ def J4_cmpgtui_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000101; @@ -7548,7 +7602,7 @@ def J4_cmpgtui_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001101; @@ -7571,7 +7625,7 @@ def J4_cmpgtui_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001101; @@ -7594,7 +7648,7 @@ def J4_cmpgtui_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7618,7 +7672,7 @@ def J4_cmpgtui_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii), "if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel { +tc_09faec3b, TypeNCJ>, Enc_eafd18, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7642,7 +7696,7 @@ def J4_cmpgtui_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001000100; @@ -7664,7 +7718,7 @@ def J4_cmpgtui_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001000100; @@ -7686,7 +7740,7 @@ def J4_cmpgtui_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{31-22} = 0b0001001100; @@ -7708,7 +7762,7 @@ def J4_cmpgtui_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii), "p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel { +tc_d108a090, TypeCJ>, Enc_14d27a, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{31-22} = 0b0001001100; @@ -7730,7 +7784,7 @@ def J4_cmplt_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7755,7 +7809,7 @@ def J4_cmplt_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7780,7 +7834,7 @@ def J4_cmplt_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7804,7 +7858,7 @@ def J4_cmplt_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7828,7 +7882,7 @@ def J4_cmpltu_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7853,7 +7907,7 @@ def J4_cmpltu_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7878,7 +7932,7 @@ def J4_cmpltu_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b0; let Inst{19-19} = 0b0; @@ -7902,7 +7956,7 @@ def J4_cmpltu_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii), "if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel { +tc_3e61d314, TypeNCJ>, Enc_5de85f, PredRel { let Inst{0-0} = 0b0; let Inst{13-13} = 0b1; let Inst{19-19} = 0b0; @@ -7926,7 +7980,7 @@ def J4_hintjumpr : HInst< (outs), (ins IntRegs:$Rs32), "hintjr($Rs32)", -J_tc_2early_SLOT2, TypeJ>, Enc_11704059 { +tc_b08b653e, TypeJ>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010010101; let isTerminator = 1; @@ -7938,7 +7992,7 @@ def J4_jumpseti : HInst< (outs GeneralSubRegs:$Rd16), (ins u6_0Imm:$II, b30_2Imm:$Ii), "$Rd16 = #$II ; jump $Ii", -COMPOUND, TypeCJ>, Enc_4834775 { +tc_1e062b18, TypeCJ>, Enc_9e4c3f { let Inst{0-0} = 0b0; let Inst{31-22} = 0b0001011000; let hasNewValue = 1; @@ -7956,7 +8010,7 @@ def J4_jumpsetr : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "$Rd16 = $Rs16 ; jump $Ii", -COMPOUND, TypeCJ>, Enc_2639299 { +tc_1e062b18, TypeCJ>, Enc_66bce1 { let Inst{0-0} = 0b0; let Inst{13-12} = 0b00; let Inst{31-22} = 0b0001011100; @@ -7975,7 +8029,7 @@ def J4_tstbit0_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (!tstbit($Ns8.new,#0)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 { +tc_dbe218dd, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -7999,7 +8053,7 @@ def J4_tstbit0_f_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (!tstbit($Ns8.new,#0)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 { +tc_dbe218dd, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -8023,7 +8077,7 @@ def J4_tstbit0_fp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001000111; @@ -8045,7 +8099,7 @@ def J4_tstbit0_fp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001000111; @@ -8067,7 +8121,7 @@ def J4_tstbit0_fp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001001111; @@ -8089,7 +8143,7 @@ def J4_tstbit0_fp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001001111; @@ -8111,7 +8165,7 @@ def J4_tstbit0_t_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (tstbit($Ns8.new,#0)) jump:nt $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 { +tc_dbe218dd, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000000; let Inst{19-19} = 0b0; @@ -8134,7 +8188,7 @@ def J4_tstbit0_t_jumpnv_t : HInst< (outs), (ins IntRegs:$Ns8, b30_2Imm:$Ii), "if (tstbit($Ns8.new,#0)) jump:t $Ii", -NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 { +tc_dbe218dd, TypeNCJ>, Enc_69d63b { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100000; let Inst{19-19} = 0b0; @@ -8157,7 +8211,7 @@ def J4_tstbit0_tp0_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001000110; @@ -8178,7 +8232,7 @@ def J4_tstbit0_tp0_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001000110; @@ -8199,7 +8253,7 @@ def J4_tstbit0_tp1_jump_nt : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b000011; let Inst{31-22} = 0b0001001110; @@ -8220,7 +8274,7 @@ def J4_tstbit0_tp1_jump_t : HInst< (outs), (ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii), "p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii", -COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 { +tc_eb07ef6f, TypeCJ>, Enc_ad1c74 { let Inst{0-0} = 0b0; let Inst{13-8} = 0b100011; let Inst{31-22} = 0b0001001110; @@ -8241,7 +8295,7 @@ def L2_deallocframe : HInst< (outs), (ins), "deallocframe", -LD_tc_ld_SLOT01, TypeLD>, Enc_0 { +tc_c1dbc916, TypeLD>, Enc_3a3d62 { let Inst{4-0} = 0b11110; let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10010000000; @@ -8255,7 +8309,7 @@ def L2_loadalignb_io : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Ryy32 = memb_fifo($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_449439 { +tc_14da557c, TypeLD>, Enc_a27588 { let Inst{24-21} = 0b0100; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8272,7 +8326,7 @@ def L2_loadalignb_pbr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 { +tc_ae762521, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110100; let accessSize = ByteAccess; @@ -8283,7 +8337,7 @@ def L2_loadalignb_pci : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_971347 { +tc_d2a33af5, TypeLD>, Enc_74aef2 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000100; let addrMode = PostInc; @@ -8296,7 +8350,7 @@ def L2_loadalignb_pcr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 { +tc_ae762521, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000100; let addrMode = PostInc; @@ -8309,7 +8363,7 @@ def L2_loadalignb_pi : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii), "$Ryy32 = memb_fifo($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6372758 { +tc_ae762521, TypeLD>, Enc_6b197f { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010100; let addrMode = PostInc; @@ -8321,7 +8375,7 @@ def L2_loadalignb_pr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memb_fifo($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 { +tc_ae762521, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100100; let addrMode = PostInc; @@ -8333,7 +8387,7 @@ def L2_loadalignb_zomap : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32), "$Ryy32 = memb_fifo($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let Constraints = "$Ryy32 = $Ryy32in"; @@ -8342,7 +8396,7 @@ def L2_loadalignh_io : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii), "$Ryy32 = memh_fifo($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_11930027 { +tc_14da557c, TypeLD>, Enc_5cd7e9 { let Inst{24-21} = 0b0010; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8359,7 +8413,7 @@ def L2_loadalignh_pbr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 { +tc_ae762521, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110010; let accessSize = HalfWordAccess; @@ -8370,7 +8424,7 @@ def L2_loadalignh_pci : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_1971351 { +tc_d2a33af5, TypeLD>, Enc_9e2e1c { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000010; let addrMode = PostInc; @@ -8383,7 +8437,7 @@ def L2_loadalignh_pcr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 { +tc_ae762521, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000010; let addrMode = PostInc; @@ -8396,7 +8450,7 @@ def L2_loadalignh_pi : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii), "$Ryy32 = memh_fifo($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_3372766 { +tc_ae762521, TypeLD>, Enc_bd1cbc { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010010; let addrMode = PostInc; @@ -8408,7 +8462,7 @@ def L2_loadalignh_pr : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Rx32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2), "$Ryy32 = memh_fifo($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 { +tc_ae762521, TypeLD>, Enc_1f5d8f { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100010; let addrMode = PostInc; @@ -8420,7 +8474,7 @@ def L2_loadalignh_zomap : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rs32), "$Ryy32 = memh_fifo($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let Constraints = "$Ryy32 = $Ryy32in"; @@ -8429,7 +8483,7 @@ def L2_loadbsw2_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = membh($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15275738 { +tc_bf6fa601, TypeLD>, Enc_de0214 { let Inst{24-21} = 0b0001; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -8447,7 +8501,7 @@ def L2_loadbsw2_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = membh($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110001; let hasNewValue = 1; @@ -8460,7 +8514,7 @@ def L2_loadbsw2_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = membh($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 { +tc_3eab77bd, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000001; let hasNewValue = 1; @@ -8475,7 +8529,7 @@ def L2_loadbsw2_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = membh($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000001; let hasNewValue = 1; @@ -8490,7 +8544,7 @@ def L2_loadbsw2_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = membh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009 { +tc_65dc7cc4, TypeLD>, Enc_152467 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010001; let hasNewValue = 1; @@ -8504,7 +8558,7 @@ def L2_loadbsw2_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = membh($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100001; let hasNewValue = 1; @@ -8518,7 +8572,7 @@ def L2_loadbsw2_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = membh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -8528,7 +8582,7 @@ def L2_loadbsw4_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rdd32 = membh($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_9852473 { +tc_bf6fa601, TypeLD>, Enc_2d7491 { let Inst{24-21} = 0b0111; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8544,7 +8598,7 @@ def L2_loadbsw4_pbr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110111; let accessSize = WordAccess; @@ -8555,7 +8609,7 @@ def L2_loadbsw4_pci : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_3931661 { +tc_3eab77bd, TypeLD>, Enc_70b24b { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000111; let addrMode = PostInc; @@ -8568,7 +8622,7 @@ def L2_loadbsw4_pcr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000111; let addrMode = PostInc; @@ -8581,7 +8635,7 @@ def L2_loadbsw4_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii), "$Rdd32 = membh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_8752140 { +tc_65dc7cc4, TypeLD>, Enc_71f1b4 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010111; let addrMode = PostInc; @@ -8593,7 +8647,7 @@ def L2_loadbsw4_pr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = membh($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100111; let addrMode = PostInc; @@ -8605,7 +8659,7 @@ def L2_loadbsw4_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = membh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -8613,7 +8667,7 @@ def L2_loadbzw2_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memubh($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15275738 { +tc_bf6fa601, TypeLD>, Enc_de0214 { let Inst{24-21} = 0b0011; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -8631,7 +8685,7 @@ def L2_loadbzw2_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110011; let hasNewValue = 1; @@ -8644,7 +8698,7 @@ def L2_loadbzw2_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 { +tc_3eab77bd, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000011; let hasNewValue = 1; @@ -8659,7 +8713,7 @@ def L2_loadbzw2_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000011; let hasNewValue = 1; @@ -8674,7 +8728,7 @@ def L2_loadbzw2_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = memubh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009 { +tc_65dc7cc4, TypeLD>, Enc_152467 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010011; let hasNewValue = 1; @@ -8688,7 +8742,7 @@ def L2_loadbzw2_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memubh($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100011; let hasNewValue = 1; @@ -8702,7 +8756,7 @@ def L2_loadbzw2_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memubh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -8712,7 +8766,7 @@ def L2_loadbzw4_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rdd32 = memubh($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_9852473 { +tc_bf6fa601, TypeLD>, Enc_2d7491 { let Inst{24-21} = 0b0101; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8728,7 +8782,7 @@ def L2_loadbzw4_pbr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011110101; let accessSize = WordAccess; @@ -8739,7 +8793,7 @@ def L2_loadbzw4_pci : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_3931661 { +tc_3eab77bd, TypeLD>, Enc_70b24b { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011000101; let addrMode = PostInc; @@ -8752,7 +8806,7 @@ def L2_loadbzw4_pcr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011000101; let addrMode = PostInc; @@ -8765,7 +8819,7 @@ def L2_loadbzw4_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii), "$Rdd32 = memubh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_8752140 { +tc_65dc7cc4, TypeLD>, Enc_71f1b4 { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011010101; let addrMode = PostInc; @@ -8777,7 +8831,7 @@ def L2_loadbzw4_pr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memubh($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011100101; let addrMode = PostInc; @@ -8789,7 +8843,7 @@ def L2_loadbzw4_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = memubh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -8797,7 +8851,7 @@ def L2_loadrb_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memb($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_14461004, AddrModeRel { +tc_bf6fa601, TypeLD>, Enc_211aaa, AddrModeRel { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -8818,7 +8872,7 @@ def L2_loadrb_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memb($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111000; let hasNewValue = 1; @@ -8831,7 +8885,7 @@ def L2_loadrb_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memb($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_16303398 { +tc_3eab77bd, TypeLD>, Enc_e0a47a { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001000; let hasNewValue = 1; @@ -8846,7 +8900,7 @@ def L2_loadrb_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memb($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001000; let hasNewValue = 1; @@ -8861,7 +8915,7 @@ def L2_loadrb_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii), "$Rd32 = memb($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5598813, PredNewRel { +tc_65dc7cc4, TypeLD>, Enc_222336, PredNewRel { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011000; let hasNewValue = 1; @@ -8877,7 +8931,7 @@ def L2_loadrb_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memb($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101000; let hasNewValue = 1; @@ -8891,7 +8945,7 @@ def L2_loadrb_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memb($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -8901,7 +8955,7 @@ def L2_loadrbgp : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memb(gp+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -8920,7 +8974,7 @@ def L2_loadrd_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s29_3Imm:$Ii), "$Rdd32 = memd($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_163381, AddrModeRel { +tc_bf6fa601, TypeLD>, Enc_fa3ba4, AddrModeRel { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b10010; let addrMode = BaseImmOffset; @@ -8939,7 +8993,7 @@ def L2_loadrd_pbr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111110; let accessSize = DoubleWordAccess; @@ -8950,7 +9004,7 @@ def L2_loadrd_pci : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_931653 { +tc_3eab77bd, TypeLD>, Enc_b05839 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001110; let addrMode = PostInc; @@ -8963,7 +9017,7 @@ def L2_loadrd_pcr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001110; let addrMode = PostInc; @@ -8976,7 +9030,7 @@ def L2_loadrd_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii), "$Rdd32 = memd($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_9752128, PredNewRel { +tc_65dc7cc4, TypeLD>, Enc_5bdd42, PredNewRel { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011110; let addrMode = PostInc; @@ -8990,7 +9044,7 @@ def L2_loadrd_pr : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rdd32 = memd($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 { +tc_65dc7cc4, TypeLD>, Enc_7eee72 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101110; let addrMode = PostInc; @@ -9002,7 +9056,7 @@ def L2_loadrd_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = memd($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9010,7 +9064,7 @@ def L2_loadrdgp : HInst< (outs DoubleRegs:$Rdd32), (ins u29_3Imm:$Ii), "$Rdd32 = memd(gp+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4975051, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_509701, AddrModeRel { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b01001; let accessSize = DoubleWordAccess; @@ -9027,7 +9081,7 @@ def L2_loadrh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memh($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15275738, AddrModeRel { +tc_bf6fa601, TypeLD>, Enc_de0214, AddrModeRel { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9048,7 +9102,7 @@ def L2_loadrh_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memh($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111010; let hasNewValue = 1; @@ -9061,7 +9115,7 @@ def L2_loadrh_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memh($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 { +tc_3eab77bd, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001010; let hasNewValue = 1; @@ -9076,7 +9130,7 @@ def L2_loadrh_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memh($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001010; let hasNewValue = 1; @@ -9091,7 +9145,7 @@ def L2_loadrh_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = memh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009, PredNewRel { +tc_65dc7cc4, TypeLD>, Enc_152467, PredNewRel { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011010; let hasNewValue = 1; @@ -9107,7 +9161,7 @@ def L2_loadrh_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memh($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101010; let hasNewValue = 1; @@ -9121,7 +9175,7 @@ def L2_loadrh_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9131,7 +9185,7 @@ def L2_loadrhgp : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memh(gp+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9150,7 +9204,7 @@ def L2_loadri_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rd32 = memw($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_8990840, AddrModeRel { +tc_bf6fa601, TypeLD>, Enc_2a3787, AddrModeRel { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9171,7 +9225,7 @@ def L2_loadri_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memw($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111100; let hasNewValue = 1; @@ -9184,7 +9238,7 @@ def L2_loadri_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memw($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_14303394 { +tc_3eab77bd, TypeLD>, Enc_27fd0e { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001100; let hasNewValue = 1; @@ -9199,7 +9253,7 @@ def L2_loadri_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memw($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001100; let hasNewValue = 1; @@ -9214,7 +9268,7 @@ def L2_loadri_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii), "$Rd32 = memw($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_16376009, PredNewRel { +tc_65dc7cc4, TypeLD>, Enc_3d920a, PredNewRel { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011100; let hasNewValue = 1; @@ -9230,7 +9284,7 @@ def L2_loadri_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memw($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101100; let hasNewValue = 1; @@ -9244,7 +9298,7 @@ def L2_loadri_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memw($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9254,7 +9308,7 @@ def L2_loadrigp : HInst< (outs IntRegs:$Rd32), (ins u30_2Imm:$Ii), "$Rd32 = memw(gp+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_8814718, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_4f4ed7, AddrModeRel { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9273,7 +9327,7 @@ def L2_loadrub_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memub($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_14461004, AddrModeRel { +tc_bf6fa601, TypeLD>, Enc_211aaa, AddrModeRel { let Inst{24-21} = 0b1001; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9294,7 +9348,7 @@ def L2_loadrub_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memub($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111001; let hasNewValue = 1; @@ -9307,7 +9361,7 @@ def L2_loadrub_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memub($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_16303398 { +tc_3eab77bd, TypeLD>, Enc_e0a47a { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001001; let hasNewValue = 1; @@ -9322,7 +9376,7 @@ def L2_loadrub_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memub($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001001; let hasNewValue = 1; @@ -9337,7 +9391,7 @@ def L2_loadrub_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii), "$Rd32 = memub($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5598813, PredNewRel { +tc_65dc7cc4, TypeLD>, Enc_222336, PredNewRel { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011001; let hasNewValue = 1; @@ -9353,7 +9407,7 @@ def L2_loadrub_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memub($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101001; let hasNewValue = 1; @@ -9367,7 +9421,7 @@ def L2_loadrub_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memub($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9377,7 +9431,7 @@ def L2_loadrubgp : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memub(gp+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1001; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9396,7 +9450,7 @@ def L2_loadruh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memuh($Rs32+#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15275738, AddrModeRel { +tc_bf6fa601, TypeLD>, Enc_de0214, AddrModeRel { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b10010; let hasNewValue = 1; @@ -9417,7 +9471,7 @@ def L2_loadruh_pbr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++$Mu2:brev)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011111011; let hasNewValue = 1; @@ -9430,7 +9484,7 @@ def L2_loadruh_pci : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 { +tc_3eab77bd, TypeLD>, Enc_e83554 { let Inst{12-9} = 0b0000; let Inst{31-21} = 0b10011001011; let hasNewValue = 1; @@ -9445,7 +9499,7 @@ def L2_loadruh_pcr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++I:circ($Mu2))", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00010000; let Inst{31-21} = 0b10011001011; let hasNewValue = 1; @@ -9460,7 +9514,7 @@ def L2_loadruh_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii), "$Rd32 = memuh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009, PredNewRel { +tc_65dc7cc4, TypeLD>, Enc_152467, PredNewRel { let Inst{13-9} = 0b00000; let Inst{31-21} = 0b10011011011; let hasNewValue = 1; @@ -9476,7 +9530,7 @@ def L2_loadruh_pr : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Rd32 = memuh($Rx32++$Mu2)", -LD_tc_ld_SLOT01, TypeLD>, Enc_48594 { +tc_65dc7cc4, TypeLD>, Enc_74d4e5 { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b10011101011; let hasNewValue = 1; @@ -9490,7 +9544,7 @@ def L2_loadruh_zomap : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memuh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_bf6fa601, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9500,7 +9554,7 @@ def L2_loadruhgp : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memuh(gp+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b01001; let hasNewValue = 1; @@ -9519,20 +9573,20 @@ def L2_loadw_locked : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = memw_locked($Rs32)", -LD_tc_ld_SLOT0, TypeLD>, Enc_4075554 { +tc_29c14515, TypeLD>, Enc_5e2823 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10010010000; let hasNewValue = 1; let opNewValue = 0; let accessSize = WordAccess; -let isSoloAX = 1; let mayLoad = 1; +let isSoloAX = 1; } def L2_ploadrbf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101000; let isPredicated = 1; @@ -9554,7 +9608,7 @@ def L2_ploadrbf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9571,7 +9625,7 @@ def L2_ploadrbf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memb($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9581,7 +9635,7 @@ def L2_ploadrbfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111000; let isPredicated = 1; @@ -9604,7 +9658,7 @@ def L2_ploadrbfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9622,7 +9676,7 @@ def L2_ploadrbfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memb($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9632,7 +9686,7 @@ def L2_ploadrbt_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memb($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001000; let isPredicated = 1; @@ -9653,7 +9707,7 @@ def L2_ploadrbt_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4) $Rd32 = memb($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9669,7 +9723,7 @@ def L2_ploadrbt_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memb($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9679,7 +9733,7 @@ def L2_ploadrbtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011000; let isPredicated = 1; @@ -9701,7 +9755,7 @@ def L2_ploadrbtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011000; let isPredicated = 1; @@ -9718,7 +9772,7 @@ def L2_ploadrbtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memb($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9728,7 +9782,7 @@ def L2_ploadrdf_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101110; let isPredicated = 1; @@ -9748,7 +9802,7 @@ def L2_ploadrdf_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel { +tc_ae762521, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9763,7 +9817,7 @@ def L2_ploadrdf_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rdd32 = memd($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9771,7 +9825,7 @@ def L2_ploadrdfnew_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111110; let isPredicated = 1; @@ -9792,7 +9846,7 @@ def L2_ploadrdfnew_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel { +tc_e578178f, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9808,7 +9862,7 @@ def L2_ploadrdfnew_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rdd32 = memd($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9816,7 +9870,7 @@ def L2_ploadrdt_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001110; let isPredicated = 1; @@ -9835,7 +9889,7 @@ def L2_ploadrdt_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel { +tc_ae762521, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9849,7 +9903,7 @@ def L2_ploadrdt_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rdd32 = memd($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9857,7 +9911,7 @@ def L2_ploadrdtnew_io : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii), "if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_acd6ed, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011110; let isPredicated = 1; @@ -9877,7 +9931,7 @@ def L2_ploadrdtnew_pi : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii), "if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel { +tc_e578178f, TypeLD>, Enc_9d1247, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011110; let isPredicated = 1; @@ -9892,7 +9946,7 @@ def L2_ploadrdtnew_zomap : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rdd32 = memd($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -9900,7 +9954,7 @@ def L2_ploadrhf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101010; let isPredicated = 1; @@ -9922,7 +9976,7 @@ def L2_ploadrhf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -9939,7 +9993,7 @@ def L2_ploadrhf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -9949,7 +10003,7 @@ def L2_ploadrhfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111010; let isPredicated = 1; @@ -9972,7 +10026,7 @@ def L2_ploadrhfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -9990,7 +10044,7 @@ def L2_ploadrhfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10000,7 +10054,7 @@ def L2_ploadrht_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4) $Rd32 = memh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001010; let isPredicated = 1; @@ -10021,7 +10075,7 @@ def L2_ploadrht_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4) $Rd32 = memh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -10037,7 +10091,7 @@ def L2_ploadrht_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10047,7 +10101,7 @@ def L2_ploadrhtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011010; let isPredicated = 1; @@ -10069,7 +10123,7 @@ def L2_ploadrhtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011010; let isPredicated = 1; @@ -10086,7 +10140,7 @@ def L2_ploadrhtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10096,7 +10150,7 @@ def L2_ploadrif_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101100; let isPredicated = 1; @@ -10118,7 +10172,7 @@ def L2_ploadrif_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel { +tc_ae762521, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10135,7 +10189,7 @@ def L2_ploadrif_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memw($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10145,7 +10199,7 @@ def L2_ploadrifnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111100; let isPredicated = 1; @@ -10168,7 +10222,7 @@ def L2_ploadrifnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel { +tc_e578178f, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10186,7 +10240,7 @@ def L2_ploadrifnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memw($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10196,7 +10250,7 @@ def L2_ploadrit_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if ($Pt4) $Rd32 = memw($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001100; let isPredicated = 1; @@ -10217,7 +10271,7 @@ def L2_ploadrit_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if ($Pt4) $Rd32 = memw($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel { +tc_ae762521, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10233,7 +10287,7 @@ def L2_ploadrit_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memw($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10243,7 +10297,7 @@ def L2_ploadritnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii), "if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_f82eaf, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011100; let isPredicated = 1; @@ -10265,7 +10319,7 @@ def L2_ploadritnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii), "if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel { +tc_e578178f, TypeLD>, Enc_b97f71, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011100; let isPredicated = 1; @@ -10282,7 +10336,7 @@ def L2_ploadritnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memw($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10292,7 +10346,7 @@ def L2_ploadrubf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101001; let isPredicated = 1; @@ -10314,7 +10368,7 @@ def L2_ploadrubf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10331,7 +10385,7 @@ def L2_ploadrubf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memub($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10341,7 +10395,7 @@ def L2_ploadrubfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111001; let isPredicated = 1; @@ -10364,7 +10418,7 @@ def L2_ploadrubfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10382,7 +10436,7 @@ def L2_ploadrubfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memub($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10392,7 +10446,7 @@ def L2_ploadrubt_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memub($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001001; let isPredicated = 1; @@ -10413,7 +10467,7 @@ def L2_ploadrubt_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4) $Rd32 = memub($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_ae762521, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10429,7 +10483,7 @@ def L2_ploadrubt_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memub($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10439,7 +10493,7 @@ def L2_ploadrubtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a21d47, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011001; let isPredicated = 1; @@ -10461,7 +10515,7 @@ def L2_ploadrubtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel { +tc_e578178f, TypeLD>, Enc_f4413a, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011001; let isPredicated = 1; @@ -10478,7 +10532,7 @@ def L2_ploadrubtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memub($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10488,7 +10542,7 @@ def L2_ploadruhf_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000101011; let isPredicated = 1; @@ -10510,7 +10564,7 @@ def L2_ploadruhf_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10527,7 +10581,7 @@ def L2_ploadruhf_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4) $Rd32 = memuh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10537,7 +10591,7 @@ def L2_ploadruhfnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000111011; let isPredicated = 1; @@ -10560,7 +10614,7 @@ def L2_ploadruhfnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10578,7 +10632,7 @@ def L2_ploadruhfnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if (!$Pt4.new) $Rd32 = memuh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10588,7 +10642,7 @@ def L2_ploadruht_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_14da557c, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000001011; let isPredicated = 1; @@ -10609,7 +10663,7 @@ def L2_ploadruht_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_ae762521, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10625,7 +10679,7 @@ def L2_ploadruht_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4) $Rd32 = memuh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_14da557c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10635,7 +10689,7 @@ def L2_ploadruhtnew_io : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel { +tc_65dc7cc4, TypeV2LDST>, Enc_a198f6, AddrModeRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b01000011011; let isPredicated = 1; @@ -10657,7 +10711,7 @@ def L2_ploadruhtnew_pi : HInst< (outs IntRegs:$Rd32, IntRegs:$Rx32), (ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii), "if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)", -LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel { +tc_e578178f, TypeLD>, Enc_733b27, PredNewRel { let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011011011; let isPredicated = 1; @@ -10674,7 +10728,7 @@ def L2_ploadruhtnew_zomap : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, IntRegs:$Rs32), "if ($Pt4.new) $Rd32 = memuh($Rs32)", -PSEUDO, TypeMAPPING> { +tc_65dc7cc4, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -10684,14 +10738,14 @@ def L4_add_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) += $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 { +tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10702,7 +10756,7 @@ def L4_add_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) += $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10710,14 +10764,14 @@ def L4_add_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) += $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 { +tc_a9c993d9, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10728,7 +10782,7 @@ def L4_add_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) += $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10736,14 +10790,14 @@ def L4_add_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) += $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 { +tc_a9c993d9, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10754,7 +10808,7 @@ def L4_add_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) += $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10762,14 +10816,14 @@ def L4_and_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) &= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 { +tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10780,7 +10834,7 @@ def L4_and_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) &= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10788,14 +10842,14 @@ def L4_and_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) &= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 { +tc_a9c993d9, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10806,7 +10860,7 @@ def L4_and_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) &= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10814,14 +10868,14 @@ def L4_and_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) &= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 { +tc_a9c993d9, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10832,7 +10886,7 @@ def L4_and_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) &= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10840,14 +10894,14 @@ def L4_iadd_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) += #$II", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 { +tc_da79106e, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10858,7 +10912,7 @@ def L4_iadd_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) += #$II", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10866,14 +10920,14 @@ def L4_iadd_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) += #$II", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 { +tc_da79106e, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10884,7 +10938,7 @@ def L4_iadd_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) += #$II", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10892,14 +10946,14 @@ def L4_iadd_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) += #$II", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 { +tc_da79106e, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b00; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10910,7 +10964,7 @@ def L4_iadd_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) += #$II", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10918,14 +10972,14 @@ def L4_iand_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) = clrbit(#$II)", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 { +tc_da79106e, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10936,7 +10990,7 @@ def L4_iand_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) = clrbit(#$II)", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10944,14 +10998,14 @@ def L4_iand_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) = clrbit(#$II)", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 { +tc_da79106e, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10962,7 +11016,7 @@ def L4_iand_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) = clrbit(#$II)", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10970,14 +11024,14 @@ def L4_iand_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) = clrbit(#$II)", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 { +tc_da79106e, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -10988,7 +11042,7 @@ def L4_iand_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) = clrbit(#$II)", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -10996,14 +11050,14 @@ def L4_ior_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) = setbit(#$II)", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 { +tc_da79106e, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11014,7 +11068,7 @@ def L4_ior_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) = setbit(#$II)", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11022,14 +11076,14 @@ def L4_ior_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) = setbit(#$II)", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 { +tc_da79106e, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11040,7 +11094,7 @@ def L4_ior_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) = setbit(#$II)", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11048,14 +11102,14 @@ def L4_ior_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) = setbit(#$II)", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 { +tc_da79106e, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11066,7 +11120,7 @@ def L4_ior_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) = setbit(#$II)", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11074,14 +11128,14 @@ def L4_isub_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II), "memb($Rs32+#$Ii) -= #$II", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 { +tc_da79106e, TypeV4LDST>, Enc_46c951 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11092,7 +11146,7 @@ def L4_isub_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memb($Rs32) -= #$II", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11100,14 +11154,14 @@ def L4_isub_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II), "memh($Rs32+#$Ii) -= #$II", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 { +tc_da79106e, TypeV4LDST>, Enc_e66a97 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11118,7 +11172,7 @@ def L4_isub_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memh($Rs32) -= #$II", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11126,14 +11180,14 @@ def L4_isub_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II), "memw($Rs32+#$Ii) -= #$II", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 { +tc_da79106e, TypeV4LDST>, Enc_84b2cd { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111111010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11144,7 +11198,7 @@ def L4_isub_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, u5_0Imm:$II), "memw($Rs32) -= #$II", -PSEUDO, TypeMAPPING> { +tc_da79106e, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11152,7 +11206,7 @@ def L4_loadalignb_ap : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Re32), (ins DoubleRegs:$Ryy32in, u32_0Imm:$II), "$Ryy32 = memb_fifo($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_11047413 { +tc_261d9b78, TypeLD>, Enc_f394d3 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010100; @@ -11160,8 +11214,8 @@ let hasNewValue = 1; let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 3; @@ -11174,13 +11228,13 @@ def L4_loadalignb_ur : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_7303598 { +tc_baccf077, TypeLD>, Enc_04c959 { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100100; let addrMode = BaseLongOffset; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let InputType = "imm"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -11194,7 +11248,7 @@ def L4_loadalignh_ap : HInst< (outs DoubleRegs:$Ryy32, IntRegs:$Re32), (ins DoubleRegs:$Ryy32in, u32_0Imm:$II), "$Ryy32 = memh_fifo($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_11047413 { +tc_261d9b78, TypeLD>, Enc_f394d3 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010010; @@ -11202,8 +11256,8 @@ let hasNewValue = 1; let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 3; @@ -11216,13 +11270,13 @@ def L4_loadalignh_ur : HInst< (outs DoubleRegs:$Ryy32), (ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_7303598 { +tc_baccf077, TypeLD>, Enc_04c959 { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100010; let addrMode = BaseLongOffset; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let InputType = "imm"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -11236,7 +11290,7 @@ def L4_loadbsw2_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = membh($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 { +tc_b5f5a094, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010001; @@ -11246,8 +11300,8 @@ let hasNewValue2 = 1; let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11259,15 +11313,15 @@ def L4_loadbsw2_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = membh($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_486163 { +tc_7d9a56cd, TypeLD>, Enc_4f677b { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100001; let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseLongOffset; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let InputType = "imm"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -11280,7 +11334,7 @@ def L4_loadbsw4_ap : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rdd32 = membh($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_877823 { +tc_b5f5a094, TypeLD>, Enc_7fa7f6 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010111; @@ -11288,8 +11342,8 @@ let hasNewValue = 1; let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11301,13 +11355,13 @@ def L4_loadbsw4_ur : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = membh($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_5582416 { +tc_7d9a56cd, TypeLD>, Enc_6185fe { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100111; let addrMode = BaseLongOffset; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let InputType = "imm"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -11320,7 +11374,7 @@ def L4_loadbzw2_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memubh($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 { +tc_b5f5a094, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010011; @@ -11330,8 +11384,8 @@ let hasNewValue2 = 1; let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11343,15 +11397,15 @@ def L4_loadbzw2_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memubh($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_486163 { +tc_7d9a56cd, TypeLD>, Enc_4f677b { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100011; let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseLongOffset; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let InputType = "imm"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -11364,7 +11418,7 @@ def L4_loadbzw4_ap : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rdd32 = memubh($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_877823 { +tc_b5f5a094, TypeLD>, Enc_7fa7f6 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011010101; @@ -11372,8 +11426,8 @@ let hasNewValue = 1; let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11385,13 +11439,13 @@ def L4_loadbzw4_ur : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = memubh($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_5582416 { +tc_7d9a56cd, TypeLD>, Enc_6185fe { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011100101; let addrMode = BaseLongOffset; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let InputType = "imm"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -11404,18 +11458,18 @@ def L4_loadd_locked : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = memd_locked($Rs32)", -LD_tc_ld_SLOT0, TypeLD>, Enc_4030179 { +tc_29c14515, TypeLD>, Enc_3a3d62 { let Inst{13-5} = 0b010000000; let Inst{31-21} = 0b10010010000; let accessSize = DoubleWordAccess; -let isSoloAX = 1; let mayLoad = 1; +let isSoloAX = 1; } def L4_loadrb_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memb($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 { +tc_b5f5a094, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011000; @@ -11425,8 +11479,8 @@ let hasNewValue2 = 1; let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11438,7 +11492,7 @@ def L4_loadrb_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memb($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl { +tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010000; let hasNewValue = 1; @@ -11455,15 +11509,15 @@ def L4_loadrb_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memb($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl { +tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101000; let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseLongOffset; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrb"; let InputType = "imm"; let DecoderNamespace = "MustExtend"; @@ -11477,7 +11531,7 @@ def L4_loadrd_ap : HInst< (outs DoubleRegs:$Rdd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rdd32 = memd($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_877823 { +tc_b5f5a094, TypeLD>, Enc_7fa7f6 { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011110; @@ -11485,8 +11539,8 @@ let hasNewValue = 1; let opNewValue = 1; let addrMode = AbsoluteSet; let accessSize = DoubleWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11498,7 +11552,7 @@ def L4_loadrd_rr : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7581852, AddrModeRel, ImmRegShl { +tc_5625c6c1, TypeLD>, Enc_84bff1, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010110; let addrMode = BaseRegOffset; @@ -11513,13 +11567,13 @@ def L4_loadrd_ur : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = memd($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_5582416, AddrModeRel, ImmRegShl { +tc_7d9a56cd, TypeLD>, Enc_6185fe, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101110; let addrMode = BaseLongOffset; let accessSize = DoubleWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrd"; let InputType = "imm"; let DecoderNamespace = "MustExtend"; @@ -11533,7 +11587,7 @@ def L4_loadrh_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memh($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 { +tc_b5f5a094, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011010; @@ -11543,8 +11597,8 @@ let hasNewValue2 = 1; let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11556,7 +11610,7 @@ def L4_loadrh_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl { +tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010010; let hasNewValue = 1; @@ -11573,15 +11627,15 @@ def L4_loadrh_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memh($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl { +tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101010; let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseLongOffset; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrh"; let InputType = "imm"; let DecoderNamespace = "MustExtend"; @@ -11595,7 +11649,7 @@ def L4_loadri_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memw($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 { +tc_b5f5a094, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011100; @@ -11605,8 +11659,8 @@ let hasNewValue2 = 1; let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11618,7 +11672,7 @@ def L4_loadri_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memw($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl { +tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010100; let hasNewValue = 1; @@ -11635,15 +11689,15 @@ def L4_loadri_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memw($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl { +tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101100; let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseLongOffset; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadri"; let InputType = "imm"; let DecoderNamespace = "MustExtend"; @@ -11657,7 +11711,7 @@ def L4_loadrub_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memub($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 { +tc_b5f5a094, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011001; @@ -11667,8 +11721,8 @@ let hasNewValue2 = 1; let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11680,7 +11734,7 @@ def L4_loadrub_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memub($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl { +tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010001; let hasNewValue = 1; @@ -11697,15 +11751,15 @@ def L4_loadrub_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memub($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl { +tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101001; let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseLongOffset; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrub"; let InputType = "imm"; let DecoderNamespace = "MustExtend"; @@ -11719,7 +11773,7 @@ def L4_loadruh_ap : HInst< (outs IntRegs:$Rd32, IntRegs:$Re32), (ins u32_0Imm:$II), "$Rd32 = memuh($Re32=#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 { +tc_b5f5a094, TypeLD>, Enc_323f2d { let Inst{7-7} = 0b0; let Inst{13-12} = 0b01; let Inst{31-21} = 0b10011011011; @@ -11729,8 +11783,8 @@ let hasNewValue2 = 1; let opNewValue2 = 1; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let DecoderNamespace = "MustExtend"; let isExtendable = 1; let opExtendable = 2; @@ -11742,7 +11796,7 @@ def L4_loadruh_rr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl { +tc_5625c6c1, TypeLD>, Enc_da664b, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111010011; let hasNewValue = 1; @@ -11759,15 +11813,15 @@ def L4_loadruh_ur : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II), "$Rd32 = memuh($Rt32<<#$Ii+#$II)", -LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl { +tc_7d9a56cd, TypeLD>, Enc_4f677b, AddrModeRel, ImmRegShl { let Inst{12-12} = 0b1; let Inst{31-21} = 0b10011101011; let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseLongOffset; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadruh"; let InputType = "imm"; let DecoderNamespace = "MustExtend"; @@ -11781,14 +11835,14 @@ def L4_or_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) |= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 { +tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11799,7 +11853,7 @@ def L4_or_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) |= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11807,14 +11861,14 @@ def L4_or_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) |= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 { +tc_a9c993d9, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11825,7 +11879,7 @@ def L4_or_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) |= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11833,14 +11887,14 @@ def L4_or_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) |= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 { +tc_a9c993d9, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -11851,7 +11905,7 @@ def L4_or_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) |= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -11859,7 +11913,7 @@ def L4_ploadrbf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memb(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111000; @@ -11869,8 +11923,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrb"; let BaseOpcode = "L4_loadrb_abs"; let DecoderNamespace = "MustExtend"; @@ -11884,7 +11938,7 @@ def L4_ploadrbf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -11901,7 +11955,7 @@ def L4_ploadrbfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memb(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111000; @@ -11911,9 +11965,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrb"; let BaseOpcode = "L4_loadrb_abs"; let DecoderNamespace = "MustExtend"; @@ -11927,7 +11981,7 @@ def L4_ploadrbfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -11945,7 +11999,7 @@ def L4_ploadrbt_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memb(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111000; @@ -11954,8 +12008,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrb"; let BaseOpcode = "L4_loadrb_abs"; let DecoderNamespace = "MustExtend"; @@ -11969,7 +12023,7 @@ def L4_ploadrbt_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000000; let isPredicated = 1; let hasNewValue = 1; @@ -11985,7 +12039,7 @@ def L4_ploadrbtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memb(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111000; @@ -11994,9 +12048,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrb"; let BaseOpcode = "L4_loadrb_abs"; let DecoderNamespace = "MustExtend"; @@ -12010,7 +12064,7 @@ def L4_ploadrbtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010000; let isPredicated = 1; let hasNewValue = 1; @@ -12027,7 +12081,7 @@ def L4_ploadrdf_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rdd32 = memd(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111110; @@ -12035,8 +12089,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = DoubleWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrd"; let BaseOpcode = "L4_loadrd_abs"; let DecoderNamespace = "MustExtend"; @@ -12050,7 +12104,7 @@ def L4_ploadrdf_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110001110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12065,7 +12119,7 @@ def L4_ploadrdfnew_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rdd32 = memd(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111110; @@ -12073,9 +12127,9 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = DoubleWordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrd"; let BaseOpcode = "L4_loadrd_abs"; let DecoderNamespace = "MustExtend"; @@ -12089,7 +12143,7 @@ def L4_ploadrdfnew_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel { +tc_128719e8, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110011110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12105,15 +12159,15 @@ def L4_ploadrdt_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rdd32 = memd(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111110; let isPredicated = 1; let addrMode = Absolute; let accessSize = DoubleWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrd"; let BaseOpcode = "L4_loadrd_abs"; let DecoderNamespace = "MustExtend"; @@ -12127,7 +12181,7 @@ def L4_ploadrdt_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110000110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -12141,16 +12195,16 @@ def L4_ploadrdtnew_abs : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rdd32 = memd(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2a7b91, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111110; let isPredicated = 1; let addrMode = Absolute; let accessSize = DoubleWordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrd"; let BaseOpcode = "L4_loadrd_abs"; let DecoderNamespace = "MustExtend"; @@ -12164,7 +12218,7 @@ def L4_ploadrdtnew_rr : HInst< (outs DoubleRegs:$Rdd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel { +tc_128719e8, TypeLD>, Enc_98c0b8, AddrModeRel { let Inst{31-21} = 0b00110010110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -12179,7 +12233,7 @@ def L4_ploadrhf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111010; @@ -12189,8 +12243,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrh"; let BaseOpcode = "L4_loadrh_abs"; let DecoderNamespace = "MustExtend"; @@ -12204,7 +12258,7 @@ def L4_ploadrhf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12221,7 +12275,7 @@ def L4_ploadrhfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111010; @@ -12231,9 +12285,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrh"; let BaseOpcode = "L4_loadrh_abs"; let DecoderNamespace = "MustExtend"; @@ -12247,7 +12301,7 @@ def L4_ploadrhfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12265,7 +12319,7 @@ def L4_ploadrht_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111010; @@ -12274,8 +12328,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrh"; let BaseOpcode = "L4_loadrh_abs"; let DecoderNamespace = "MustExtend"; @@ -12289,7 +12343,7 @@ def L4_ploadrht_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000010; let isPredicated = 1; let hasNewValue = 1; @@ -12305,7 +12359,7 @@ def L4_ploadrhtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111010; @@ -12314,9 +12368,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrh"; let BaseOpcode = "L4_loadrh_abs"; let DecoderNamespace = "MustExtend"; @@ -12330,7 +12384,7 @@ def L4_ploadrhtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010010; let isPredicated = 1; let hasNewValue = 1; @@ -12347,7 +12401,7 @@ def L4_ploadrif_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memw(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111100; @@ -12357,8 +12411,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadri"; let BaseOpcode = "L4_loadri_abs"; let DecoderNamespace = "MustExtend"; @@ -12372,7 +12426,7 @@ def L4_ploadrif_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12389,7 +12443,7 @@ def L4_ploadrifnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memw(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111100; @@ -12399,9 +12453,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = WordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadri"; let BaseOpcode = "L4_loadri_abs"; let DecoderNamespace = "MustExtend"; @@ -12415,7 +12469,7 @@ def L4_ploadrifnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12433,7 +12487,7 @@ def L4_ploadrit_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memw(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111100; @@ -12442,8 +12496,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadri"; let BaseOpcode = "L4_loadri_abs"; let DecoderNamespace = "MustExtend"; @@ -12457,7 +12511,7 @@ def L4_ploadrit_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000100; let isPredicated = 1; let hasNewValue = 1; @@ -12473,7 +12527,7 @@ def L4_ploadritnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memw(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111100; @@ -12482,9 +12536,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = WordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadri"; let BaseOpcode = "L4_loadri_abs"; let DecoderNamespace = "MustExtend"; @@ -12498,7 +12552,7 @@ def L4_ploadritnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010100; let isPredicated = 1; let hasNewValue = 1; @@ -12515,7 +12569,7 @@ def L4_ploadrubf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memub(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111001; @@ -12525,8 +12579,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrub"; let BaseOpcode = "L4_loadrub_abs"; let DecoderNamespace = "MustExtend"; @@ -12540,7 +12594,7 @@ def L4_ploadrubf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001001; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12557,7 +12611,7 @@ def L4_ploadrubfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memub(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111001; @@ -12567,9 +12621,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrub"; let BaseOpcode = "L4_loadrub_abs"; let DecoderNamespace = "MustExtend"; @@ -12583,7 +12637,7 @@ def L4_ploadrubfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011001; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12601,7 +12655,7 @@ def L4_ploadrubt_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memub(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111001; @@ -12610,8 +12664,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrub"; let BaseOpcode = "L4_loadrub_abs"; let DecoderNamespace = "MustExtend"; @@ -12625,7 +12679,7 @@ def L4_ploadrubt_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000001; let isPredicated = 1; let hasNewValue = 1; @@ -12641,7 +12695,7 @@ def L4_ploadrubtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memub(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111001; @@ -12650,9 +12704,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrub"; let BaseOpcode = "L4_loadrub_abs"; let DecoderNamespace = "MustExtend"; @@ -12666,7 +12720,7 @@ def L4_ploadrubtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010001; let isPredicated = 1; let hasNewValue = 1; @@ -12683,7 +12737,7 @@ def L4_ploadruhf_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4) $Rd32 = memuh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b101; let Inst{31-21} = 0b10011111011; @@ -12693,8 +12747,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadruh"; let BaseOpcode = "L4_loadruh_abs"; let DecoderNamespace = "MustExtend"; @@ -12708,7 +12762,7 @@ def L4_ploadruhf_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110001011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12725,7 +12779,7 @@ def L4_ploadruhfnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if (!$Pt4.new) $Rd32 = memuh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b111; let Inst{31-21} = 0b10011111011; @@ -12735,9 +12789,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadruh"; let BaseOpcode = "L4_loadruh_abs"; let DecoderNamespace = "MustExtend"; @@ -12751,7 +12805,7 @@ def L4_ploadruhfnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110011011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -12769,7 +12823,7 @@ def L4_ploadruht_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4) $Rd32 = memuh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_136c4786, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b100; let Inst{31-21} = 0b10011111011; @@ -12778,8 +12832,8 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadruh"; let BaseOpcode = "L4_loadruh_abs"; let DecoderNamespace = "MustExtend"; @@ -12793,7 +12847,7 @@ def L4_ploadruht_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_9dafb7d3, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110000011; let isPredicated = 1; let hasNewValue = 1; @@ -12809,7 +12863,7 @@ def L4_ploadruhtnew_abs : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pt4, u32_0Imm:$Ii), "if ($Pt4.new) $Rd32 = memuh(#$Ii)", -LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel { +tc_b5f5a094, TypeLD>, Enc_2301d6, AddrModeRel { let Inst{7-5} = 0b100; let Inst{13-11} = 0b110; let Inst{31-21} = 0b10011111011; @@ -12818,9 +12872,9 @@ let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadruh"; let BaseOpcode = "L4_loadruh_abs"; let DecoderNamespace = "MustExtend"; @@ -12834,7 +12888,7 @@ def L4_ploadruhtnew_rr : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii), "if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)", -V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel { +tc_128719e8, TypeLD>, Enc_2e1979, AddrModeRel { let Inst{31-21} = 0b00110010011; let isPredicated = 1; let hasNewValue = 1; @@ -12851,7 +12905,7 @@ def L4_return : HInst< (outs), (ins), "dealloc_return", -LD_tc_3or4stall_SLOT0, TypeLD>, Enc_0, PredNewRel { +tc_dcfee7ae, TypeLD>, Enc_3a3d62, PredNewRel { let Inst{4-0} = 0b11110; let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10010110000; @@ -12873,7 +12927,7 @@ def L4_return_f : HInst< (outs), (ins PredRegs:$Pv4), "if (!$Pv4) dealloc_return", -LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel { +tc_9ce7a5ab, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{4-0} = 0b11110; let Inst{7-5} = 0b000; let Inst{13-10} = 0b1100; @@ -12885,8 +12939,8 @@ let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; -let isReturn = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [R30]; let Defs = [PC, R29, R30, R31]; let BaseOpcode = "L4_return"; @@ -12896,7 +12950,7 @@ def L4_return_fnew_pnt : HInst< (outs), (ins PredRegs:$Pv4), "if (!$Pv4.new) dealloc_return:nt", -LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel { +tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{4-0} = 0b11110; let Inst{7-5} = 0b000; let Inst{13-10} = 0b1010; @@ -12908,9 +12962,9 @@ let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; -let isReturn = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [R30]; let Defs = [PC, R29, R30, R31]; let BaseOpcode = "L4_return"; @@ -12920,7 +12974,7 @@ def L4_return_fnew_pt : HInst< (outs), (ins PredRegs:$Pv4), "if (!$Pv4.new) dealloc_return:t", -LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel { +tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{4-0} = 0b11110; let Inst{7-5} = 0b000; let Inst{13-10} = 0b1110; @@ -12932,9 +12986,9 @@ let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; -let isReturn = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [R30]; let Defs = [PC, R29, R30, R31]; let BaseOpcode = "L4_return"; @@ -12944,7 +12998,7 @@ def L4_return_t : HInst< (outs), (ins PredRegs:$Pv4), "if ($Pv4) dealloc_return", -LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel { +tc_9ce7a5ab, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{4-0} = 0b11110; let Inst{7-5} = 0b000; let Inst{13-10} = 0b0100; @@ -12955,8 +13009,8 @@ let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; -let isReturn = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [R30]; let Defs = [PC, R29, R30, R31]; let BaseOpcode = "L4_return"; @@ -12966,7 +13020,7 @@ def L4_return_tnew_pnt : HInst< (outs), (ins PredRegs:$Pv4), "if ($Pv4.new) dealloc_return:nt", -LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel { +tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{4-0} = 0b11110; let Inst{7-5} = 0b000; let Inst{13-10} = 0b0010; @@ -12977,9 +13031,9 @@ let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; -let isReturn = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [R30]; let Defs = [PC, R29, R30, R31]; let BaseOpcode = "L4_return"; @@ -12989,7 +13043,7 @@ def L4_return_tnew_pt : HInst< (outs), (ins PredRegs:$Pv4), "if ($Pv4.new) dealloc_return:t", -LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel { +tc_3993c58b, TypeLD>, Enc_b7fad3, PredNewRel { let Inst{4-0} = 0b11110; let Inst{7-5} = 0b000; let Inst{13-10} = 0b0110; @@ -13000,9 +13054,9 @@ let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; -let isReturn = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [R30]; let Defs = [PC, R29, R30, R31]; let BaseOpcode = "L4_return"; @@ -13012,14 +13066,14 @@ def L4_sub_memopb_io : HInst< (outs), (ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) -= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 { +tc_a9c993d9, TypeV4LDST>, Enc_d44e31 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -13030,7 +13084,7 @@ def L4_sub_memopb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) -= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13038,14 +13092,14 @@ def L4_sub_memoph_io : HInst< (outs), (ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) -= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 { +tc_a9c993d9, TypeV4LDST>, Enc_163a3c { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -13056,7 +13110,7 @@ def L4_sub_memoph_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) -= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13064,14 +13118,14 @@ def L4_sub_memopw_io : HInst< (outs), (ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) -= $Rt32", -V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 { +tc_a9c993d9, TypeV4LDST>, Enc_226535 { let Inst{6-5} = 0b01; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00111110010; let addrMode = BaseImmOffset; let accessSize = WordAccess; -let mayStore = 1; let mayLoad = 1; +let mayStore = 1; let isExtendable = 1; let opExtendable = 1; let isExtentSigned = 0; @@ -13082,7 +13136,7 @@ def L4_sub_memopw_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) -= $Rt32", -PSEUDO, TypeMAPPING> { +tc_a9c993d9, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13090,7 +13144,7 @@ def M2_acci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += add($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889, ImmRegRel { +tc_c0cd91a8, TypeM>, Enc_2ae154, ImmRegRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -13105,7 +13159,7 @@ def M2_accii : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 += add($Rs32,#$Ii)", -M_tc_2_acc_SLOT23, TypeM>, Enc_11522288, ImmRegRel { +tc_c0cd91a8, TypeM>, Enc_c90aca, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100010000; let hasNewValue = 1; @@ -13124,7 +13178,7 @@ def M2_cmaci_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpyi($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13135,7 +13189,7 @@ def M2_cmacr_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpyr($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13146,7 +13200,7 @@ def M2_cmacs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13158,7 +13212,7 @@ def M2_cmacs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -13170,7 +13224,7 @@ def M2_cmacsc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32*):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111010; @@ -13182,7 +13236,7 @@ def M2_cmacsc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111110; @@ -13194,7 +13248,7 @@ def M2_cmpyi_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpyi($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13204,7 +13258,7 @@ def M2_cmpyr_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpyr($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13214,7 +13268,7 @@ def M2_cmpyrs_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101001; @@ -13227,7 +13281,7 @@ def M2_cmpyrs_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13240,7 +13294,7 @@ def M2_cmpyrsc_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101011; @@ -13253,7 +13307,7 @@ def M2_cmpyrsc_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101111; @@ -13266,7 +13320,7 @@ def M2_cmpys_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13277,7 +13331,7 @@ def M2_cmpys_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -13288,7 +13342,7 @@ def M2_cmpysc_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32*):sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -13299,7 +13353,7 @@ def M2_cmpysc_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101110; @@ -13310,7 +13364,7 @@ def M2_cnacs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13322,7 +13376,7 @@ def M2_cnacs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -13334,7 +13388,7 @@ def M2_cnacsc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32*):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111010; @@ -13346,7 +13400,7 @@ def M2_cnacsc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111110; @@ -13358,7 +13412,7 @@ def M2_dpmpyss_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -13369,7 +13423,7 @@ def M2_dpmpyss_nac_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111001; @@ -13380,7 +13434,7 @@ def M2_dpmpyss_rnd_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101001; @@ -13392,7 +13446,7 @@ def M2_dpmpyss_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -13402,7 +13456,7 @@ def M2_dpmpyuu_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111010; @@ -13413,7 +13467,7 @@ def M2_dpmpyuu_nac_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111011; @@ -13424,7 +13478,7 @@ def M2_dpmpyuu_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -13434,7 +13488,7 @@ def M2_hmmpyh_rs1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13447,7 +13501,7 @@ def M2_hmmpyh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13460,7 +13514,7 @@ def M2_hmmpyl_rs1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101111; @@ -13473,7 +13527,7 @@ def M2_hmmpyl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -13486,7 +13540,7 @@ def M2_maci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyi($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889, ImmRegRel { +tc_8cb685d9, TypeM>, Enc_2ae154, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -13501,7 +13555,7 @@ def M2_macsin : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii), "$Rx32 -= mpyi($Rs32,#$Ii)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_11522288 { +tc_a12a5971, TypeM>, Enc_c90aca { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100001100; let hasNewValue = 1; @@ -13519,7 +13573,7 @@ def M2_macsip : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii), "$Rx32 += mpyi($Rs32,#$Ii)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_11522288, ImmRegRel { +tc_a12a5971, TypeM>, Enc_c90aca, ImmRegRel { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100001000; let hasNewValue = 1; @@ -13538,7 +13592,7 @@ def M2_mmachs_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -13550,7 +13604,7 @@ def M2_mmachs_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -13562,7 +13616,7 @@ def M2_mmachs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -13574,7 +13628,7 @@ def M2_mmachs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -13586,7 +13640,7 @@ def M2_mmacls_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -13598,7 +13652,7 @@ def M2_mmacls_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -13610,7 +13664,7 @@ def M2_mmacls_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -13622,7 +13676,7 @@ def M2_mmacls_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -13634,7 +13688,7 @@ def M2_mmacuhs_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -13646,7 +13700,7 @@ def M2_mmacuhs_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -13658,7 +13712,7 @@ def M2_mmacuhs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -13670,7 +13724,7 @@ def M2_mmacuhs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010110; @@ -13682,7 +13736,7 @@ def M2_mmaculs_rs0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -13694,7 +13748,7 @@ def M2_mmaculs_rs1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -13706,7 +13760,7 @@ def M2_mmaculs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -13718,7 +13772,7 @@ def M2_mmaculs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010110; @@ -13730,7 +13784,7 @@ def M2_mmpyh_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -13741,7 +13795,7 @@ def M2_mmpyh_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -13752,7 +13806,7 @@ def M2_mmpyh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -13763,7 +13817,7 @@ def M2_mmpyh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -13774,7 +13828,7 @@ def M2_mmpyl_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -13785,7 +13839,7 @@ def M2_mmpyl_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -13796,7 +13850,7 @@ def M2_mmpyl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -13807,7 +13861,7 @@ def M2_mmpyl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -13818,7 +13872,7 @@ def M2_mmpyuh_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -13829,7 +13883,7 @@ def M2_mmpyuh_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -13840,7 +13894,7 @@ def M2_mmpyuh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -13851,7 +13905,7 @@ def M2_mmpyuh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -13862,7 +13916,7 @@ def M2_mmpyul_rs0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -13873,7 +13927,7 @@ def M2_mmpyul_rs1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -13884,7 +13938,7 @@ def M2_mmpyul_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -13895,7 +13949,7 @@ def M2_mmpyul_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -13906,7 +13960,7 @@ def M2_mpy_acc_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -13919,7 +13973,7 @@ def M2_mpy_acc_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -13932,7 +13986,7 @@ def M2_mpy_acc_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -13945,7 +13999,7 @@ def M2_mpy_acc_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -13958,7 +14012,7 @@ def M2_mpy_acc_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -13971,7 +14025,7 @@ def M2_mpy_acc_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -13984,7 +14038,7 @@ def M2_mpy_acc_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -13997,7 +14051,7 @@ def M2_mpy_acc_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14010,7 +14064,7 @@ def M2_mpy_acc_sat_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14024,7 +14078,7 @@ def M2_mpy_acc_sat_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14038,7 +14092,7 @@ def M2_mpy_acc_sat_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14052,7 +14106,7 @@ def M2_mpy_acc_sat_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14066,7 +14120,7 @@ def M2_mpy_acc_sat_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14080,7 +14134,7 @@ def M2_mpy_acc_sat_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14094,7 +14148,7 @@ def M2_mpy_acc_sat_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110000; @@ -14108,7 +14162,7 @@ def M2_mpy_acc_sat_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110100; @@ -14122,7 +14176,7 @@ def M2_mpy_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14134,7 +14188,7 @@ def M2_mpy_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14146,7 +14200,7 @@ def M2_mpy_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14158,7 +14212,7 @@ def M2_mpy_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14170,7 +14224,7 @@ def M2_mpy_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14182,7 +14236,7 @@ def M2_mpy_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14194,7 +14248,7 @@ def M2_mpy_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14206,7 +14260,7 @@ def M2_mpy_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14218,7 +14272,7 @@ def M2_mpy_nac_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14231,7 +14285,7 @@ def M2_mpy_nac_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14244,7 +14298,7 @@ def M2_mpy_nac_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14257,7 +14311,7 @@ def M2_mpy_nac_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14270,7 +14324,7 @@ def M2_mpy_nac_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14283,7 +14337,7 @@ def M2_mpy_nac_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14296,7 +14350,7 @@ def M2_mpy_nac_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14309,7 +14363,7 @@ def M2_mpy_nac_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14322,7 +14376,7 @@ def M2_mpy_nac_sat_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14336,7 +14390,7 @@ def M2_mpy_nac_sat_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14350,7 +14404,7 @@ def M2_mpy_nac_sat_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14364,7 +14418,7 @@ def M2_mpy_nac_sat_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14378,7 +14432,7 @@ def M2_mpy_nac_sat_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14392,7 +14446,7 @@ def M2_mpy_nac_sat_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14406,7 +14460,7 @@ def M2_mpy_nac_sat_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110001; @@ -14420,7 +14474,7 @@ def M2_mpy_nac_sat_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110101; @@ -14434,7 +14488,7 @@ def M2_mpy_rnd_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14446,7 +14500,7 @@ def M2_mpy_rnd_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14458,7 +14512,7 @@ def M2_mpy_rnd_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14470,7 +14524,7 @@ def M2_mpy_rnd_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14482,7 +14536,7 @@ def M2_mpy_rnd_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14494,7 +14548,7 @@ def M2_mpy_rnd_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14506,7 +14560,7 @@ def M2_mpy_rnd_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14518,7 +14572,7 @@ def M2_mpy_rnd_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14530,7 +14584,7 @@ def M2_mpy_sat_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14543,7 +14597,7 @@ def M2_mpy_sat_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14556,7 +14610,7 @@ def M2_mpy_sat_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14569,7 +14623,7 @@ def M2_mpy_sat_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14582,7 +14636,7 @@ def M2_mpy_sat_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14595,7 +14649,7 @@ def M2_mpy_sat_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14608,7 +14662,7 @@ def M2_mpy_sat_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100000; @@ -14621,7 +14675,7 @@ def M2_mpy_sat_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100100; @@ -14634,7 +14688,7 @@ def M2_mpy_sat_rnd_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14647,7 +14701,7 @@ def M2_mpy_sat_rnd_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14660,7 +14714,7 @@ def M2_mpy_sat_rnd_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14673,7 +14727,7 @@ def M2_mpy_sat_rnd_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14686,7 +14740,7 @@ def M2_mpy_sat_rnd_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14699,7 +14753,7 @@ def M2_mpy_sat_rnd_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14712,7 +14766,7 @@ def M2_mpy_sat_rnd_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100001; @@ -14725,7 +14779,7 @@ def M2_mpy_sat_rnd_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100101; @@ -14738,7 +14792,7 @@ def M2_mpy_up : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101000; @@ -14750,7 +14804,7 @@ def M2_mpy_up_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -14762,7 +14816,7 @@ def M2_mpy_up_s1_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpy($Rs32,$Rt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101111; @@ -14775,7 +14829,7 @@ def M2_mpyd_acc_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14786,7 +14840,7 @@ def M2_mpyd_acc_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14797,7 +14851,7 @@ def M2_mpyd_acc_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14808,7 +14862,7 @@ def M2_mpyd_acc_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14819,7 +14873,7 @@ def M2_mpyd_acc_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14830,7 +14884,7 @@ def M2_mpyd_acc_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14841,7 +14895,7 @@ def M2_mpyd_acc_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110000; @@ -14852,7 +14906,7 @@ def M2_mpyd_acc_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110100; @@ -14863,7 +14917,7 @@ def M2_mpyd_hh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14873,7 +14927,7 @@ def M2_mpyd_hh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14883,7 +14937,7 @@ def M2_mpyd_hl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14893,7 +14947,7 @@ def M2_mpyd_hl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14903,7 +14957,7 @@ def M2_mpyd_lh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14913,7 +14967,7 @@ def M2_mpyd_lh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14923,7 +14977,7 @@ def M2_mpyd_ll_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100000; @@ -14933,7 +14987,7 @@ def M2_mpyd_ll_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100100; @@ -14943,7 +14997,7 @@ def M2_mpyd_nac_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -14954,7 +15008,7 @@ def M2_mpyd_nac_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -14965,7 +15019,7 @@ def M2_mpyd_nac_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -14976,7 +15030,7 @@ def M2_mpyd_nac_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -14987,7 +15041,7 @@ def M2_mpyd_nac_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -14998,7 +15052,7 @@ def M2_mpyd_nac_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -15009,7 +15063,7 @@ def M2_mpyd_nac_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110001; @@ -15020,7 +15074,7 @@ def M2_mpyd_nac_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110101; @@ -15031,7 +15085,7 @@ def M2_mpyd_rnd_hh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15041,7 +15095,7 @@ def M2_mpyd_rnd_hh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15051,7 +15105,7 @@ def M2_mpyd_rnd_hl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15061,7 +15115,7 @@ def M2_mpyd_rnd_hl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15071,7 +15125,7 @@ def M2_mpyd_rnd_lh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15081,7 +15135,7 @@ def M2_mpyd_rnd_lh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15091,7 +15145,7 @@ def M2_mpyd_rnd_ll_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100001; @@ -15101,7 +15155,7 @@ def M2_mpyd_rnd_ll_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100101; @@ -15111,7 +15165,7 @@ def M2_mpyi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyi($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773, ImmRegRel { +tc_8c8041e6, TypeM>, Enc_5ab2be, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101000; @@ -15125,7 +15179,7 @@ def M2_mpysin : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u8_0Imm:$Ii), "$Rd32 = -mpyi($Rs32,#$Ii)", -M_tc_3x_SLOT23, TypeM>, Enc_16355964 { +tc_ae2c2dc2, TypeM>, Enc_b8c967 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100000100; let hasNewValue = 1; @@ -15136,7 +15190,7 @@ def M2_mpysip : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u32_0Imm:$Ii), "$Rd32 = +mpyi($Rs32,#$Ii)", -M_tc_3x_SLOT23, TypeM>, Enc_16355964 { +tc_ae2c2dc2, TypeM>, Enc_b8c967 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100000000; let hasNewValue = 1; @@ -15152,7 +15206,7 @@ def M2_mpysmi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, m32_0Imm:$Ii), "$Rd32 = mpyi($Rs32,#$Ii)", -M_tc_3x_SLOT23, TypeM>, ImmRegRel { +tc_ae2c2dc2, TypeM>, ImmRegRel { let hasNewValue = 1; let opNewValue = 0; let CextOpcode = "M2_mpyi"; @@ -15168,7 +15222,7 @@ def M2_mpysu_up : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpysu($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101011; @@ -15180,7 +15234,7 @@ def M2_mpyu_acc_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15193,7 +15247,7 @@ def M2_mpyu_acc_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15206,7 +15260,7 @@ def M2_mpyu_acc_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15219,7 +15273,7 @@ def M2_mpyu_acc_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15232,7 +15286,7 @@ def M2_mpyu_acc_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15245,7 +15299,7 @@ def M2_mpyu_acc_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15258,7 +15312,7 @@ def M2_mpyu_acc_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110010; @@ -15271,7 +15325,7 @@ def M2_mpyu_acc_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110110; @@ -15284,7 +15338,7 @@ def M2_mpyu_hh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15296,7 +15350,7 @@ def M2_mpyu_hh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15308,7 +15362,7 @@ def M2_mpyu_hl_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15320,7 +15374,7 @@ def M2_mpyu_hl_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15332,7 +15386,7 @@ def M2_mpyu_lh_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15344,7 +15398,7 @@ def M2_mpyu_lh_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15356,7 +15410,7 @@ def M2_mpyu_ll_s0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100010; @@ -15368,7 +15422,7 @@ def M2_mpyu_ll_s1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101100110; @@ -15380,7 +15434,7 @@ def M2_mpyu_nac_hh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15393,7 +15447,7 @@ def M2_mpyu_nac_hh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15406,7 +15460,7 @@ def M2_mpyu_nac_hl_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15419,7 +15473,7 @@ def M2_mpyu_nac_hl_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15432,7 +15486,7 @@ def M2_mpyu_nac_lh_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15445,7 +15499,7 @@ def M2_mpyu_nac_lh_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15458,7 +15512,7 @@ def M2_mpyu_nac_ll_s0 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110011; @@ -15471,7 +15525,7 @@ def M2_mpyu_nac_ll_s1 : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101110111; @@ -15484,7 +15538,7 @@ def M2_mpyu_up : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyu($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101010; @@ -15496,7 +15550,7 @@ def M2_mpyud_acc_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15507,7 +15561,7 @@ def M2_mpyud_acc_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15518,7 +15572,7 @@ def M2_mpyud_acc_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15529,7 +15583,7 @@ def M2_mpyud_acc_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15540,7 +15594,7 @@ def M2_mpyud_acc_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15551,7 +15605,7 @@ def M2_mpyud_acc_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15562,7 +15616,7 @@ def M2_mpyud_acc_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110010; @@ -15573,7 +15627,7 @@ def M2_mpyud_acc_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110110; @@ -15584,7 +15638,7 @@ def M2_mpyud_hh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15594,7 +15648,7 @@ def M2_mpyud_hh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15604,7 +15658,7 @@ def M2_mpyud_hl_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15614,7 +15668,7 @@ def M2_mpyud_hl_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15624,7 +15678,7 @@ def M2_mpyud_lh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.h)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15634,7 +15688,7 @@ def M2_mpyud_lh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15644,7 +15698,7 @@ def M2_mpyud_ll_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.l)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100010; @@ -15654,7 +15708,7 @@ def M2_mpyud_ll_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100100110; @@ -15664,7 +15718,7 @@ def M2_mpyud_nac_hh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15675,7 +15729,7 @@ def M2_mpyud_nac_hh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15686,7 +15740,7 @@ def M2_mpyud_nac_hl_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15697,7 +15751,7 @@ def M2_mpyud_nac_hl_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15708,7 +15762,7 @@ def M2_mpyud_nac_lh_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.h)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15719,7 +15773,7 @@ def M2_mpyud_nac_lh_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15730,7 +15784,7 @@ def M2_mpyud_nac_ll_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.l)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110011; @@ -15741,7 +15795,7 @@ def M2_mpyud_nac_ll_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100110111; @@ -15752,7 +15806,7 @@ def M2_mpyui : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = mpyui($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM> { +tc_8c8041e6, TypeM> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -15762,7 +15816,7 @@ def M2_nacci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= add($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_c0cd91a8, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111100; @@ -15776,7 +15830,7 @@ def M2_naccii : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 -= add($Rs32,#$Ii)", -M_tc_2_acc_SLOT23, TypeM>, Enc_11522288 { +tc_c0cd91a8, TypeM>, Enc_c90aca { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100010100; let hasNewValue = 1; @@ -15794,7 +15848,7 @@ def M2_subacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32), "$Rx32 += sub($Rt32,$Rs32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_7692963 { +tc_c0cd91a8, TypeM>, Enc_a568d4 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -15808,7 +15862,7 @@ def M2_vabsdiffh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffh($Rtt32,$Rss32)", -M_tc_2_SLOT23, TypeM>, Enc_11687333 { +tc_63cd9d2d, TypeM>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -15818,7 +15872,7 @@ def M2_vabsdiffw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffw($Rtt32,$Rss32)", -M_tc_2_SLOT23, TypeM>, Enc_11687333 { +tc_63cd9d2d, TypeM>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -15828,7 +15882,7 @@ def M2_vcmac_s0_sat_i : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vcmpyi($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -15840,7 +15894,7 @@ def M2_vcmac_s0_sat_r : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vcmpyr($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -15852,7 +15906,7 @@ def M2_vcmpy_s0_sat_i : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyi($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -15863,7 +15917,7 @@ def M2_vcmpy_s0_sat_r : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyr($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -15874,7 +15928,7 @@ def M2_vcmpy_s1_sat_i : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -15885,7 +15939,7 @@ def M2_vcmpy_s1_sat_r : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -15896,7 +15950,7 @@ def M2_vdmacs_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vdmpy($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -15908,7 +15962,7 @@ def M2_vdmacs_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -15920,7 +15974,7 @@ def M2_vdmpyrs_s0 : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_9277990 { +tc_8c8041e6, TypeM>, Enc_d2216a { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001000; @@ -15933,7 +15987,7 @@ def M2_vdmpyrs_s1 : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_9277990 { +tc_8c8041e6, TypeM>, Enc_d2216a { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001100; @@ -15946,7 +16000,7 @@ def M2_vdmpys_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vdmpy($Rss32,$Rtt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -15957,7 +16011,7 @@ def M2_vdmpys_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -15968,7 +16022,7 @@ def M2_vmac2 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyh($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111001; @@ -15979,7 +16033,7 @@ def M2_vmac2es : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyeh($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -15990,7 +16044,7 @@ def M2_vmac2es_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyeh($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16002,7 +16056,7 @@ def M2_vmac2es_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -16014,7 +16068,7 @@ def M2_vmac2s_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyh($Rs32,$Rt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111000; @@ -16026,7 +16080,7 @@ def M2_vmac2s_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -16038,7 +16092,7 @@ def M2_vmac2su_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyhsu($Rs32,$Rt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111011; @@ -16050,7 +16104,7 @@ def M2_vmac2su_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111111; @@ -16062,7 +16116,7 @@ def M2_vmpy2es_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyeh($Rss32,$Rtt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16073,7 +16127,7 @@ def M2_vmpy2es_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -16084,7 +16138,7 @@ def M2_vmpy2s_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyh($Rs32,$Rt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -16095,7 +16149,7 @@ def M2_vmpy2s_s0pack : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101001; @@ -16108,7 +16162,7 @@ def M2_vmpy2s_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -16119,7 +16173,7 @@ def M2_vmpy2s_s1pack : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM>, Enc_14071773 { +tc_8c8041e6, TypeM>, Enc_5ab2be { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101101101; @@ -16132,7 +16186,7 @@ def M2_vmpy2su_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyhsu($Rs32,$Rt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101000; @@ -16143,7 +16197,7 @@ def M2_vmpy2su_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -16154,7 +16208,7 @@ def M2_vraddh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vraddh($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_9277990 { +tc_8c8041e6, TypeM>, Enc_d2216a { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001001; @@ -16166,7 +16220,7 @@ def M2_vradduh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vradduh($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_9277990 { +tc_8c8041e6, TypeM>, Enc_d2216a { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001000; @@ -16178,7 +16232,7 @@ def M2_vrcmaci_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyi($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16189,7 +16243,7 @@ def M2_vrcmaci_s0c : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyi($Rss32,$Rtt32*)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010010; @@ -16200,7 +16254,7 @@ def M2_vrcmacr_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyr($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16211,7 +16265,7 @@ def M2_vrcmacr_s0c : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpyr($Rss32,$Rtt32*)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -16222,7 +16276,7 @@ def M2_vrcmpyi_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyi($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16232,7 +16286,7 @@ def M2_vrcmpyi_s0c : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyi($Rss32,$Rtt32*)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -16242,7 +16296,7 @@ def M2_vrcmpyr_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyr($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16252,7 +16306,7 @@ def M2_vrcmpyr_s0c : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpyr($Rss32,$Rtt32*)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000011; @@ -16262,7 +16316,7 @@ def M2_vrcmpys_acc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat", -M_tc_3x_SLOT23, TypeM> { +tc_8cb685d9, TypeM> { let isPseudo = 1; let Constraints = "$Rxx32 = $Rxx32in"; } @@ -16270,7 +16324,7 @@ def M2_vrcmpys_acc_s1_h : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi", -M_tc_3x_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -16282,7 +16336,7 @@ def M2_vrcmpys_acc_s1_l : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo", -M_tc_3x_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -16294,14 +16348,14 @@ def M2_vrcmpys_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat", -M_tc_3x_SLOT23, TypeM> { +tc_8c8041e6, TypeM> { let isPseudo = 1; } def M2_vrcmpys_s1_h : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -16312,7 +16366,7 @@ def M2_vrcmpys_s1_l : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -16323,7 +16377,7 @@ def M2_vrcmpys_s1rp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat", -M_tc_3x_SLOT23, TypeM> { +tc_8c8041e6, TypeM> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -16332,7 +16386,7 @@ def M2_vrcmpys_s1rp_h : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi", -M_tc_3x_SLOT23, TypeM>, Enc_9277990 { +tc_8c8041e6, TypeM>, Enc_d2216a { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001101; @@ -16345,7 +16399,7 @@ def M2_vrcmpys_s1rp_l : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo", -M_tc_3x_SLOT23, TypeM>, Enc_9277990 { +tc_8c8041e6, TypeM>, Enc_d2216a { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101001101; @@ -16358,7 +16412,7 @@ def M2_vrmac_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpyh($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010000; @@ -16369,7 +16423,7 @@ def M2_vrmpy_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpyh($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000000; @@ -16379,7 +16433,7 @@ def M2_xor_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= xor($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111100; @@ -16393,7 +16447,7 @@ def M4_and_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= and($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16407,7 +16461,7 @@ def M4_and_andn : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= and($Rs32,~$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111001; @@ -16421,7 +16475,7 @@ def M4_and_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= or($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16435,7 +16489,7 @@ def M4_and_xor : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= xor($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16449,7 +16503,7 @@ def M4_cmpyi_wh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat", -S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645 { +tc_8c8041e6, TypeS_3op>, Enc_3d5b28 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16462,7 +16516,7 @@ def M4_cmpyi_whc : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat", -S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645, Requires<[HasV5T]> { +tc_8c8041e6, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16475,7 +16529,7 @@ def M4_cmpyr_wh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat", -S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645 { +tc_8c8041e6, TypeS_3op>, Enc_3d5b28 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16488,7 +16542,7 @@ def M4_cmpyr_whc : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat", -S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645, Requires<[HasV5T]> { +tc_8c8041e6, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16501,7 +16555,7 @@ def M4_mac_up_s1_sat : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += mpy($Rs32,$Rt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111011; @@ -16516,7 +16570,7 @@ def M4_mpyri_addi : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II), "$Rd32 = add(#$Ii,mpyi($Rs32,#$II))", -ALU64_tc_3x_SLOT23, TypeALU64>, Enc_971574, ImmRegRel { +tc_a12a5971, TypeALU64>, Enc_322e1b, ImmRegRel { let Inst{31-24} = 0b11011000; let hasNewValue = 1; let opNewValue = 0; @@ -16532,7 +16586,7 @@ def M4_mpyri_addr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii), "$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))", -ALU64_tc_3x_SLOT23, TypeALU64>, Enc_236434, ImmRegRel { +tc_a12a5971, TypeALU64>, Enc_420cf3, ImmRegRel { let Inst{31-23} = 0b110111111; let hasNewValue = 1; let opNewValue = 0; @@ -16549,7 +16603,7 @@ def M4_mpyri_addr_u2 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32), "$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))", -ALU64_tc_3x_SLOT23, TypeALU64>, Enc_9959498 { +tc_69bb508b, TypeALU64>, Enc_277737 { let Inst{31-23} = 0b110111110; let hasNewValue = 1; let opNewValue = 0; @@ -16559,7 +16613,7 @@ def M4_mpyrr_addi : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))", -ALU64_tc_3x_SLOT23, TypeALU64>, Enc_2216485, ImmRegRel { +tc_8cb685d9, TypeALU64>, Enc_a7b8e8, ImmRegRel { let Inst{31-23} = 0b110101110; let hasNewValue = 1; let opNewValue = 0; @@ -16576,7 +16630,7 @@ def M4_mpyrr_addr : HInst< (outs IntRegs:$Ry32), (ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32), "$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))", -M_tc_3x_SLOT23, TypeM>, Enc_13770697, ImmRegRel { +tc_8cb685d9, TypeM>, Enc_7f1a05, ImmRegRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100011000; @@ -16591,7 +16645,7 @@ def M4_nac_up_s1_sat : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= mpy($Rs32,$Rt32):<<1:sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 { +tc_8cb685d9, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111011; @@ -16606,7 +16660,7 @@ def M4_or_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= and($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111010; @@ -16620,7 +16674,7 @@ def M4_or_andn : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= and($Rs32,~$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111001; @@ -16634,7 +16688,7 @@ def M4_or_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= or($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16648,7 +16702,7 @@ def M4_or_xor : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= xor($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16662,7 +16716,7 @@ def M4_pmpyw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = pmpyw($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -16672,7 +16726,7 @@ def M4_pmpyw_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 ^= pmpyw($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111001; @@ -16683,7 +16737,7 @@ def M4_vpmpyh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vpmpyh($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101110; @@ -16693,7 +16747,7 @@ def M4_vpmpyh_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 ^= vpmpyh($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111101; @@ -16704,7 +16758,7 @@ def M4_vrmpyeh_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpyweh($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -16715,7 +16769,7 @@ def M4_vrmpyeh_acc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -16726,7 +16780,7 @@ def M4_vrmpyeh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpyweh($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000010; @@ -16736,7 +16790,7 @@ def M4_vrmpyeh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -16746,7 +16800,7 @@ def M4_vrmpyoh_acc_s0 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpywoh($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010011; @@ -16757,7 +16811,7 @@ def M4_vrmpyoh_acc_s1 : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -16768,7 +16822,7 @@ def M4_vrmpyoh_s0 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpywoh($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000001; @@ -16778,7 +16832,7 @@ def M4_vrmpyoh_s1 : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -16788,7 +16842,7 @@ def M4_xor_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= and($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16802,7 +16856,7 @@ def M4_xor_andn : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= and($Rs32,~$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111001; @@ -16816,7 +16870,7 @@ def M4_xor_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 ^= or($Rs32,$Rt32)", -M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 { +tc_3c10f809, TypeM>, Enc_2ae154 { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111110; @@ -16830,7 +16884,7 @@ def M4_xor_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 ^= xor($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_12702821 { +tc_3c10f809, TypeS_3op>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001010100; @@ -16841,7 +16895,7 @@ def M5_vdmacbsu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821, Requires<[HasV5T]> { +tc_8cb685d9, TypeM>, Enc_88c16c, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -16853,7 +16907,7 @@ def M5_vdmpybsu : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat", -M_tc_3x_SLOT23, TypeM>, Enc_8333157, Requires<[HasV5T]> { +tc_8c8041e6, TypeM>, Enc_a56825, Requires<[HasV5T]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -16864,7 +16918,7 @@ def M5_vmacbsu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpybsu($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111110; @@ -16875,7 +16929,7 @@ def M5_vmacbuu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rxx32 += vmpybu($Rs32,$Rt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 { +tc_8cb685d9, TypeM>, Enc_61f0b0 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100111100; @@ -16886,7 +16940,7 @@ def M5_vmpybsu : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpybsu($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101010; @@ -16896,7 +16950,7 @@ def M5_vmpybuu : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = vmpybu($Rs32,$Rt32)", -M_tc_3x_SLOT23, TypeM>, Enc_1997594 { +tc_8c8041e6, TypeM>, Enc_be32a5 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11100101100; @@ -16906,7 +16960,7 @@ def M5_vrmacbsu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpybsu($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010110; @@ -16917,7 +16971,7 @@ def M5_vrmacbuu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vrmpybu($Rss32,$Rtt32)", -M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 { +tc_8cb685d9, TypeM>, Enc_88c16c { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010100; @@ -16928,7 +16982,7 @@ def M5_vrmpybsu : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpybsu($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000110; @@ -16938,7 +16992,7 @@ def M5_vrmpybuu : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vrmpybu($Rss32,$Rtt32)", -M_tc_3x_SLOT23, TypeM>, Enc_8333157 { +tc_8c8041e6, TypeM>, Enc_a56825 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000100; @@ -16948,7 +17002,7 @@ def M6_vabsdiffb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffb($Rtt32,$Rss32)", -M_tc_2_SLOT23, TypeM>, Enc_11687333, Requires<[HasV62T]> { +tc_faab1248, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -16958,7 +17012,7 @@ def M6_vabsdiffub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffub($Rtt32,$Rss32)", -M_tc_2_SLOT23, TypeM>, Enc_11687333, Requires<[HasV62T]> { +tc_faab1248, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -16968,15 +17022,15 @@ def PS_loadrbabs : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memb(#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b01001; let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrb"; let BaseOpcode = "L4_loadrb_abs"; let isPredicable = 1; @@ -16991,13 +17045,13 @@ def PS_loadrdabs : HInst< (outs DoubleRegs:$Rdd32), (ins u29_3Imm:$Ii), "$Rdd32 = memd(#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4975051, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_509701, AddrModeRel { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b01001; let addrMode = Absolute; let accessSize = DoubleWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrd"; let BaseOpcode = "L4_loadrd_abs"; let isPredicable = 1; @@ -17012,15 +17066,15 @@ def PS_loadrhabs : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memh(#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b01001; let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrh"; let BaseOpcode = "L4_loadrh_abs"; let isPredicable = 1; @@ -17035,15 +17089,15 @@ def PS_loadriabs : HInst< (outs IntRegs:$Rd32), (ins u30_2Imm:$Ii), "$Rd32 = memw(#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_8814718, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_4f4ed7, AddrModeRel { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b01001; let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = WordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadri"; let BaseOpcode = "L4_loadri_abs"; let isPredicable = 1; @@ -17058,15 +17112,15 @@ def PS_loadrubabs : HInst< (outs IntRegs:$Rd32), (ins u32_0Imm:$Ii), "$Rd32 = memub(#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_25bef0, AddrModeRel { let Inst{24-21} = 0b1001; let Inst{31-27} = 0b01001; let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadrub"; let BaseOpcode = "L4_loadrub_abs"; let isPredicable = 1; @@ -17081,15 +17135,15 @@ def PS_loadruhabs : HInst< (outs IntRegs:$Rd32), (ins u31_1Imm:$Ii), "$Rd32 = memuh(#$Ii)", -V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel { +tc_70cabf66, TypeV2LDST>, Enc_8df4be, AddrModeRel { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b01001; let hasNewValue = 1; let opNewValue = 0; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let mayLoad = 1; +let isExtended = 1; let CextOpcode = "L2_loadruh"; let BaseOpcode = "L4_loadruh_abs"; let isPredicable = 1; @@ -17104,7 +17158,7 @@ def PS_storerbabs : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Rt32), "memb(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeV2LDST>, Enc_12395768, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_1b64fb, AddrModeRel { let Inst{24-21} = 0b0000; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17126,16 +17180,16 @@ def PS_storerbnewabs : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Nt8), "memb(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_4050532, AddrModeRel { +tc_9e86015f, TypeV2LDST>, Enc_ad1831, AddrModeRel { let Inst{12-11} = 0b00; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let addrMode = Absolute; let accessSize = ByteAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; let isPredicable = 1; @@ -17151,7 +17205,7 @@ def PS_storerdabs : HInst< (outs), (ins u29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd(#$Ii) = $Rtt32", -ST_tc_st_SLOT01, TypeV2LDST>, Enc_11682941, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_5c124a, AddrModeRel { let Inst{24-21} = 0b0110; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17172,7 +17226,7 @@ def PS_storerfabs : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(#$Ii) = $Rt32.h", -ST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0011; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17193,7 +17247,7 @@ def PS_storerhabs : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0010; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17215,16 +17269,16 @@ def PS_storerhnewabs : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Nt8), "memh(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_13618890, AddrModeRel { +tc_9e86015f, TypeV2LDST>, Enc_bc03e5, AddrModeRel { let Inst{12-11} = 0b01; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let addrMode = Absolute; let accessSize = HalfWordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; let isPredicable = 1; @@ -17240,7 +17294,7 @@ def PS_storeriabs : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Rt32), "memw(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeV2LDST>, Enc_15999208, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_541f26, AddrModeRel { let Inst{24-21} = 0b0100; let Inst{31-27} = 0b01001; let addrMode = Absolute; @@ -17262,16 +17316,16 @@ def PS_storerinewabs : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Nt8), "memw(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_12297800, AddrModeRel { +tc_9e86015f, TypeV2LDST>, Enc_78cbf0, AddrModeRel { let Inst{12-11} = 0b10; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let addrMode = Absolute; let accessSize = WordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; let isPredicable = 1; @@ -17287,7 +17341,7 @@ def S2_addasl_rrri : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii), "$Rd32 = addasl($Rt32,$Rs32,#$Ii)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_3494181 { +tc_090485bb, TypeS_3op>, Enc_47ef61 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000100000; let hasNewValue = 1; @@ -17298,7 +17352,7 @@ def S2_allocframe : HInst< (outs), (ins u11_3Imm:$Ii), "allocframe(#$Ii)", -ST_tc_ld_SLOT0, TypeST>, Enc_15830826 { +tc_0cb867f2, TypeST>, Enc_22c845 { let Inst{13-11} = 0b000; let Inst{31-21} = 0b10100000100; let Inst{20-16} = 0b11101; @@ -17312,7 +17366,7 @@ def S2_asl_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asl($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 { +tc_9c18c9a5, TypeS_2op>, Enc_5eac98 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000000000; } @@ -17320,7 +17374,7 @@ def S2_asl_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += asl($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b110; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17330,7 +17384,7 @@ def S2_asl_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= asl($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17340,7 +17394,7 @@ def S2_asl_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= asl($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17350,7 +17404,7 @@ def S2_asl_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= asl($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b110; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17360,7 +17414,7 @@ def S2_asl_i_p_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 ^= asl($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10000010100; let prefersSlot3 = 1; @@ -17370,7 +17424,7 @@ def S2_asl_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asl($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 { +tc_9c18c9a5, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -17381,7 +17435,7 @@ def S2_asl_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += asl($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17394,7 +17448,7 @@ def S2_asl_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= asl($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17407,7 +17461,7 @@ def S2_asl_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= asl($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17420,7 +17474,7 @@ def S2_asl_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= asl($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17433,19 +17487,20 @@ def S2_asl_i_r_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asl($Rs32,#$Ii):sat", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 { +tc_47ab9233, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S2_asl_i_r_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 ^= asl($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110100; @@ -17458,7 +17513,7 @@ def S2_asl_i_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vaslh($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 { +tc_9c18c9a5, TypeS_2op>, Enc_12b6e9 { let Inst{7-5} = 0b010; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000100; @@ -17467,7 +17522,7 @@ def S2_asl_i_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rdd32 = vaslw($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 { +tc_9c18c9a5, TypeS_2op>, Enc_7e5a82 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000000010; @@ -17476,7 +17531,7 @@ def S2_asl_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = asl($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -17485,7 +17540,7 @@ def S2_asl_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += asl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -17496,7 +17551,7 @@ def S2_asl_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= asl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -17507,7 +17562,7 @@ def S2_asl_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= asl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -17518,7 +17573,7 @@ def S2_asl_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= asl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -17529,7 +17584,7 @@ def S2_asl_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= asl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -17540,7 +17595,7 @@ def S2_asl_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asl($Rs32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 { +tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -17551,7 +17606,7 @@ def S2_asl_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += asl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -17564,7 +17619,7 @@ def S2_asl_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= asl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -17577,7 +17632,7 @@ def S2_asl_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= asl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -17590,7 +17645,7 @@ def S2_asl_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= asl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -17603,19 +17658,20 @@ def S2_asl_r_r_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asl($Rs32,$Rt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 { +tc_47ab9233, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S2_asl_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vaslh($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -17624,7 +17680,7 @@ def S2_asl_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vaslw($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -17633,7 +17689,7 @@ def S2_asr_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asr($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 { +tc_9c18c9a5, TypeS_2op>, Enc_5eac98 { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10000000000; } @@ -17641,7 +17697,7 @@ def S2_asr_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += asr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b100; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17651,7 +17707,7 @@ def S2_asr_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= asr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17661,7 +17717,7 @@ def S2_asr_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= asr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -17671,7 +17727,7 @@ def S2_asr_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= asr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b100; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -17681,7 +17737,7 @@ def S2_asr_i_p_rnd : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asr($Rss32,#$Ii):rnd", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_2op>, Enc_5eac98, Requires<[HasV5T]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000000110; let prefersSlot3 = 1; @@ -17690,14 +17746,14 @@ def S2_asr_i_p_rnd_goodsyntax : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asrrnd($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_2op>, Requires<[HasV5T]> { let isPseudo = 1; } def S2_asr_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asr($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 { +tc_9c18c9a5, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -17708,7 +17764,7 @@ def S2_asr_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += asr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17721,7 +17777,7 @@ def S2_asr_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= asr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17734,7 +17790,7 @@ def S2_asr_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= asr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -17747,7 +17803,7 @@ def S2_asr_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= asr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -17760,7 +17816,7 @@ def S2_asr_i_r_rnd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asr($Rs32,#$Ii):rnd", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 { +tc_63cd9d2d, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100010; @@ -17772,7 +17828,7 @@ def S2_asr_i_r_rnd_goodsyntax : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = asrrnd($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op> { +tc_63cd9d2d, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -17781,18 +17837,19 @@ def S2_asr_i_svw_trun : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rd32 = vasrw($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2380082 { +tc_7ca2ea10, TypeS_2op>, Enc_8dec2e { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001000110; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_asr_i_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 { +tc_9c18c9a5, TypeS_2op>, Enc_12b6e9 { let Inst{7-5} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000100; @@ -17801,7 +17858,7 @@ def S2_asr_i_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rdd32 = vasrw($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 { +tc_9c18c9a5, TypeS_2op>, Enc_7e5a82 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000000010; @@ -17810,7 +17867,7 @@ def S2_asr_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = asr($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -17819,7 +17876,7 @@ def S2_asr_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += asr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -17830,7 +17887,7 @@ def S2_asr_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= asr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -17841,7 +17898,7 @@ def S2_asr_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= asr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -17852,7 +17909,7 @@ def S2_asr_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= asr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -17863,7 +17920,7 @@ def S2_asr_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= asr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -17874,7 +17931,7 @@ def S2_asr_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asr($Rs32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 { +tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -17885,7 +17942,7 @@ def S2_asr_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += asr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -17898,7 +17955,7 @@ def S2_asr_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= asr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -17911,7 +17968,7 @@ def S2_asr_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= asr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -17924,7 +17981,7 @@ def S2_asr_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= asr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -17937,30 +17994,32 @@ def S2_asr_r_r_sat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = asr($Rs32,$Rt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 { +tc_47ab9233, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S2_asr_r_svw_trun : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = vasrw($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14287645 { +tc_7ca2ea10, TypeS_3op>, Enc_3d5b28 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_asr_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vasrh($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -17969,7 +18028,7 @@ def S2_asr_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vasrw($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -17978,25 +18037,27 @@ def S2_brev : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = brev($Rs32)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 { +tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_brevp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = brev($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_ab1b5e74, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000110; +let prefersSlot3 = 1; } def S2_cabacdecbin : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = decbin($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 { +tc_5d806107, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -18008,77 +18069,84 @@ def S2_cl0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = cl0($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_cl0p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = cl0($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_cl1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = cl1($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_cl1p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = cl1($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_clb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = clb($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_clbnorm : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = normamt($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_clbp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = clb($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_clrbit_i : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = clrbit($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 { +tc_9c18c9a5, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100110; @@ -18089,7 +18157,7 @@ def S2_clrbit_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = clrbit($Rs32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 { +tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -18100,55 +18168,60 @@ def S2_ct0 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = ct0($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_ct0p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = ct0($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000111; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_ct1 : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = ct1($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_ab1b5e74, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_ct1p : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = ct1($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000111; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_deinterleave : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = deinterleave($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_ab1b5e74, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000110; +let prefersSlot3 = 1; } def S2_extractu : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II), "$Rd32 = extractu($Rs32,#$Ii,#$II)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_11930928 { +tc_c0cd91a8, TypeS_2op>, Enc_b388cf { let Inst{13-13} = 0b0; let Inst{31-23} = 0b100011010; let hasNewValue = 1; @@ -18159,7 +18232,7 @@ def S2_extractu_rp : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rd32 = extractu($Rs32,$Rtt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_15472748 { +tc_87601822, TypeS_3op>, Enc_e07374 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001001000; @@ -18171,7 +18244,7 @@ def S2_extractup : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II), "$Rdd32 = extractu($Rss32,#$Ii,#$II)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_9894557 { +tc_c0cd91a8, TypeS_2op>, Enc_b84c4c { let Inst{31-24} = 0b10000001; let prefersSlot3 = 1; } @@ -18179,7 +18252,7 @@ def S2_extractup_rp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = extractu($Rss32,$Rtt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_87601822, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -18189,56 +18262,61 @@ def S2_insert : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = insert($Rs32,#$Ii,#$II)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2880796 { +tc_d95f4e98, TypeS_2op>, Enc_a1e29d { let Inst{13-13} = 0b0; let Inst{31-23} = 0b100011110; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Constraints = "$Rx32 = $Rx32in"; } def S2_insert_rp : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rx32 = insert($Rs32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_16311032 { +tc_3c10f809, TypeS_3op>, Enc_179b35 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001000000; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Constraints = "$Rx32 = $Rx32in"; } def S2_insertp : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II), "$Rxx32 = insert($Rss32,#$Ii,#$II)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_631197 { +tc_d95f4e98, TypeS_2op>, Enc_143a3c { let Inst{31-24} = 0b10000011; +let prefersSlot3 = 1; let Constraints = "$Rxx32 = $Rxx32in"; } def S2_insertp_rp : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 = insert($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_12702821 { +tc_3c10f809, TypeS_3op>, Enc_88c16c { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001010000; +let prefersSlot3 = 1; let Constraints = "$Rxx32 = $Rxx32in"; } def S2_interleave : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = interleave($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_ab1b5e74, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000110; +let prefersSlot3 = 1; } def S2_lfsp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = lfs($Rss32,$Rtt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_87601822, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -18248,7 +18326,7 @@ def S2_lsl_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = lsl($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -18257,7 +18335,7 @@ def S2_lsl_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += lsl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -18268,7 +18346,7 @@ def S2_lsl_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= lsl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -18279,7 +18357,7 @@ def S2_lsl_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= lsl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -18290,7 +18368,7 @@ def S2_lsl_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= lsl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -18301,7 +18379,7 @@ def S2_lsl_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= lsl($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -18312,7 +18390,7 @@ def S2_lsl_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = lsl($Rs32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 { +tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -18323,7 +18401,7 @@ def S2_lsl_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += lsl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -18336,7 +18414,7 @@ def S2_lsl_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= lsl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -18349,7 +18427,7 @@ def S2_lsl_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= lsl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -18362,7 +18440,7 @@ def S2_lsl_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= lsl($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -18375,7 +18453,7 @@ def S2_lsl_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlslh($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -18384,7 +18462,7 @@ def S2_lsl_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlslw($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -18393,7 +18471,7 @@ def S2_lsr_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = lsr($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 { +tc_9c18c9a5, TypeS_2op>, Enc_5eac98 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000000000; } @@ -18401,7 +18479,7 @@ def S2_lsr_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += lsr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b101; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -18411,7 +18489,7 @@ def S2_lsr_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= lsr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -18421,7 +18499,7 @@ def S2_lsr_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= lsr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_c0cd91a8, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -18431,7 +18509,7 @@ def S2_lsr_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= lsr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b101; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -18441,7 +18519,7 @@ def S2_lsr_i_p_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 ^= lsr($Rss32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 { +tc_3c10f809, TypeS_2op>, Enc_70fb07 { let Inst{7-5} = 0b001; let Inst{31-21} = 0b10000010100; let prefersSlot3 = 1; @@ -18451,7 +18529,7 @@ def S2_lsr_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = lsr($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 { +tc_9c18c9a5, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -18462,7 +18540,7 @@ def S2_lsr_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += lsr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -18475,7 +18553,7 @@ def S2_lsr_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= lsr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -18488,7 +18566,7 @@ def S2_lsr_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= lsr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_c0cd91a8, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -18501,7 +18579,7 @@ def S2_lsr_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= lsr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -18514,7 +18592,7 @@ def S2_lsr_i_r_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 ^= lsr($Rs32,#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 { +tc_3c10f809, TypeS_2op>, Enc_28a2dc { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110100; @@ -18527,7 +18605,7 @@ def S2_lsr_i_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vlsrh($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 { +tc_9c18c9a5, TypeS_2op>, Enc_12b6e9 { let Inst{7-5} = 0b001; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000100; @@ -18536,7 +18614,7 @@ def S2_lsr_i_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Rdd32 = vlsrw($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 { +tc_9c18c9a5, TypeS_2op>, Enc_7e5a82 { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000000010; @@ -18545,7 +18623,7 @@ def S2_lsr_r_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = lsr($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011100; @@ -18554,7 +18632,7 @@ def S2_lsr_r_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += lsr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011110; @@ -18565,7 +18643,7 @@ def S2_lsr_r_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 &= lsr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011010; @@ -18576,7 +18654,7 @@ def S2_lsr_r_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 -= lsr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_c0cd91a8, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011100; @@ -18587,7 +18665,7 @@ def S2_lsr_r_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 |= lsr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011000; @@ -18598,7 +18676,7 @@ def S2_lsr_r_p_xor : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 ^= lsr($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 { +tc_3c10f809, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001011011; @@ -18609,7 +18687,7 @@ def S2_lsr_r_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = lsr($Rs32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 { +tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110010; @@ -18620,7 +18698,7 @@ def S2_lsr_r_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += lsr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100110; @@ -18633,7 +18711,7 @@ def S2_lsr_r_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 &= lsr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100010; @@ -18646,7 +18724,7 @@ def S2_lsr_r_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= lsr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_c0cd91a8, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100100; @@ -18659,7 +18737,7 @@ def S2_lsr_r_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 |= lsr($Rs32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 { +tc_3c10f809, TypeS_3op>, Enc_2ae154 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001100000; @@ -18672,7 +18750,7 @@ def S2_lsr_r_vh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlsrh($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011010; @@ -18681,7 +18759,7 @@ def S2_lsr_r_vw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vlsrw($Rss32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 { +tc_9c18c9a5, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011000; @@ -18690,7 +18768,7 @@ def S2_packhl : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = packhl($Rs32,$Rt32)", -ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1997594 { +tc_548f402d, TypeALU32_3op>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11110101100; @@ -18700,7 +18778,7 @@ def S2_parityp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = parity($Rss32,$Rtt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_9277990 { +tc_87601822, TypeALU64>, Enc_d2216a { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010000000; @@ -18712,7 +18790,7 @@ def S2_pstorerbf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100000; let isPredicated = 1; @@ -18734,7 +18812,7 @@ def S2_pstorerbf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel { +tc_9b73d261, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -18752,7 +18830,7 @@ def S2_pstorerbf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memb($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -18760,7 +18838,7 @@ def S2_pstorerbfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -18779,7 +18857,7 @@ def S2_pstorerbnewf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel { +tc_9da3628f, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000100101; @@ -18788,8 +18866,8 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; let BaseOpcode = "S2_storerb_io"; @@ -18804,7 +18882,7 @@ def S2_pstorerbnewf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel { +tc_e2480a7f, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-11} = 0b100; @@ -18814,8 +18892,8 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; let opNewValue = 4; @@ -18825,7 +18903,7 @@ def S2_pstorerbnewf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4) memb($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_9da3628f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -18834,7 +18912,7 @@ def S2_pstorerbnewfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel { +tc_8fab9ac3, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -18845,8 +18923,8 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; let opNewValue = 4; @@ -18856,7 +18934,7 @@ def S2_pstorerbnewt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel { +tc_9da3628f, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000000101; @@ -18864,8 +18942,8 @@ let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; let BaseOpcode = "S2_storerb_io"; @@ -18880,7 +18958,7 @@ def S2_pstorerbnewt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel { +tc_e2480a7f, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-11} = 0b100; @@ -18889,8 +18967,8 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; let opNewValue = 4; @@ -18900,7 +18978,7 @@ def S2_pstorerbnewt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4) memb($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_9da3628f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -18909,7 +18987,7 @@ def S2_pstorerbnewtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel { +tc_8fab9ac3, TypeST>, Enc_52a5dd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -18919,8 +18997,8 @@ let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerb_pi"; let opNewValue = 4; @@ -18930,7 +19008,7 @@ def S2_pstorerbt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000000; let isPredicated = 1; @@ -18951,7 +19029,7 @@ def S2_pstorerbt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel { +tc_9b73d261, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -18968,7 +19046,7 @@ def S2_pstorerbt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memb($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -18976,7 +19054,7 @@ def S2_pstorerbtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_cc449f, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -18994,7 +19072,7 @@ def S2_pstorerdf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100110; let isPredicated = 1; @@ -19015,7 +19093,7 @@ def S2_pstorerdf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel { +tc_9b73d261, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19033,7 +19111,7 @@ def S2_pstorerdf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rs32) = $Rtt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19041,7 +19119,7 @@ def S2_pstorerdfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19060,7 +19138,7 @@ def S2_pstorerdt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rs32+#$Ii) = $Rtt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000110; let isPredicated = 1; @@ -19080,7 +19158,7 @@ def S2_pstorerdt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rx32++#$Ii) = $Rtt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel { +tc_9b73d261, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19097,7 +19175,7 @@ def S2_pstorerdt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rs32) = $Rtt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19105,7 +19183,7 @@ def S2_pstorerdtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_9a33d5, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19123,7 +19201,7 @@ def S2_pstorerff_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100011; let isPredicated = 1; @@ -19144,7 +19222,7 @@ def S2_pstorerff_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19162,7 +19240,7 @@ def S2_pstorerff_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32) = $Rt32.h", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19170,7 +19248,7 @@ def S2_pstorerffnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19189,7 +19267,7 @@ def S2_pstorerft_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000011; let isPredicated = 1; @@ -19209,7 +19287,7 @@ def S2_pstorerft_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19226,7 +19304,7 @@ def S2_pstorerft_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memh($Rs32) = $Rt32.h", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19234,7 +19312,7 @@ def S2_pstorerftnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19252,7 +19330,7 @@ def S2_pstorerhf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100010; let isPredicated = 1; @@ -19274,7 +19352,7 @@ def S2_pstorerhf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19292,7 +19370,7 @@ def S2_pstorerhf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19300,7 +19378,7 @@ def S2_pstorerhfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19319,7 +19397,7 @@ def S2_pstorerhnewf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel { +tc_9da3628f, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000100101; @@ -19328,8 +19406,8 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; let BaseOpcode = "S2_storerh_io"; @@ -19344,7 +19422,7 @@ def S2_pstorerhnewf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel { +tc_e2480a7f, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-11} = 0b101; @@ -19354,8 +19432,8 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; let opNewValue = 4; @@ -19365,7 +19443,7 @@ def S2_pstorerhnewf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4) memh($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_9da3628f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19374,7 +19452,7 @@ def S2_pstorerhnewfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel { +tc_8fab9ac3, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -19385,8 +19463,8 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; let opNewValue = 4; @@ -19396,7 +19474,7 @@ def S2_pstorerhnewt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel { +tc_9da3628f, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000000101; @@ -19404,8 +19482,8 @@ let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; let BaseOpcode = "S2_storerh_io"; @@ -19420,7 +19498,7 @@ def S2_pstorerhnewt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel { +tc_e2480a7f, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-11} = 0b101; @@ -19429,8 +19507,8 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; let opNewValue = 4; @@ -19440,7 +19518,7 @@ def S2_pstorerhnewt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4) memh($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_9da3628f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19449,7 +19527,7 @@ def S2_pstorerhnewtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel { +tc_8fab9ac3, TypeST>, Enc_31aa6a, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -19459,8 +19537,8 @@ let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_pi"; let opNewValue = 4; @@ -19470,7 +19548,7 @@ def S2_pstorerht_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000010; let isPredicated = 1; @@ -19491,7 +19569,7 @@ def S2_pstorerht_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_9b73d261, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19508,7 +19586,7 @@ def S2_pstorerht_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memh($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19516,7 +19594,7 @@ def S2_pstorerhtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_b886fd, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19534,7 +19612,7 @@ def S2_pstorerif_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000100100; let isPredicated = 1; @@ -19556,7 +19634,7 @@ def S2_pstorerif_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel { +tc_9b73d261, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19574,7 +19652,7 @@ def S2_pstorerif_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4) memw($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19582,7 +19660,7 @@ def S2_pstorerifnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19602,7 +19680,7 @@ def S2_pstorerinewf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel { +tc_9da3628f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000100101; @@ -19611,8 +19689,8 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; let BaseOpcode = "S2_storeri_io"; @@ -19627,7 +19705,7 @@ def S2_pstorerinewf_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel { +tc_e2480a7f, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b0; let Inst{13-11} = 0b110; @@ -19637,8 +19715,8 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; let opNewValue = 4; @@ -19648,7 +19726,7 @@ def S2_pstorerinewf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4) memw($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_9da3628f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19657,7 +19735,7 @@ def S2_pstorerinewfnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel { +tc_8fab9ac3, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -19668,8 +19746,8 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; let opNewValue = 4; @@ -19679,7 +19757,7 @@ def S2_pstorerinewt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel { +tc_9da3628f, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000000101; @@ -19687,8 +19765,8 @@ let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; let BaseOpcode = "S2_storeri_io"; @@ -19703,7 +19781,7 @@ def S2_pstorerinewt_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel { +tc_e2480a7f, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-11} = 0b110; @@ -19712,8 +19790,8 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; let opNewValue = 4; @@ -19723,7 +19801,7 @@ def S2_pstorerinewt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4) memw($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_9da3628f, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -19732,7 +19810,7 @@ def S2_pstorerinewtnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel { +tc_8fab9ac3, TypeST>, Enc_65f095, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -19742,8 +19820,8 @@ let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_pi"; let opNewValue = 4; @@ -19753,7 +19831,7 @@ def S2_pstorerit_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel { +tc_3d905451, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000000100; let isPredicated = 1; @@ -19774,7 +19852,7 @@ def S2_pstorerit_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel { +tc_9b73d261, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; @@ -19791,7 +19869,7 @@ def S2_pstorerit_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4) memw($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_3d905451, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19799,7 +19877,7 @@ def S2_pstoreritnew_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel { +tc_7675c0e9, TypeST>, Enc_7eaeb6, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -19817,7 +19895,7 @@ def S2_setbit_i : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = setbit($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 { +tc_9c18c9a5, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100110; @@ -19828,7 +19906,7 @@ def S2_setbit_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = setbit($Rs32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 { +tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -19839,7 +19917,7 @@ def S2_shuffeb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = shuffeb($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 { +tc_9c18c9a5, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -19848,7 +19926,7 @@ def S2_shuffeh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = shuffeh($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 { +tc_9c18c9a5, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -19857,7 +19935,7 @@ def S2_shuffob : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = shuffob($Rtt32,$Rss32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11687333 { +tc_9c18c9a5, TypeS_3op>, Enc_ea23e4 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001000; @@ -19866,7 +19944,7 @@ def S2_shuffoh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = shuffoh($Rtt32,$Rss32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11687333 { +tc_9c18c9a5, TypeS_3op>, Enc_ea23e4 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -19875,7 +19953,7 @@ def S2_storerb_io : HInst< (outs), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_13150110, AddrModeRel { +tc_53ee6546, TypeST>, Enc_448f7f, AddrModeRel { let Inst{24-21} = 0b1000; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -19896,7 +19974,7 @@ def S2_storerb_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++$Mu2:brev) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel { +tc_20a8e109, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111000; let accessSize = ByteAccess; @@ -19909,7 +19987,7 @@ def S2_storerb_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++#$Ii:circ($Mu2)) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_3915770 { +tc_251c87b2, TypeST>, Enc_b15941 { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001000; @@ -19924,7 +20002,7 @@ def S2_storerb_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++I:circ($Mu2)) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001000; let addrMode = PostInc; @@ -19938,7 +20016,7 @@ def S2_storerb_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32), "memb($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_12492533, AddrModeRel { +tc_20a8e109, TypeST>, Enc_10bc21, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -19955,7 +20033,7 @@ def S2_storerb_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memb($Rx32++$Mu2) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101000; let addrMode = PostInc; @@ -19968,7 +20046,7 @@ def S2_storerb_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memb($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_53ee6546, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -19976,7 +20054,7 @@ def S2_storerbgp : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Rt32), "memb(gp+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_12395768, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_1b64fb, AddrModeRel { let Inst{24-21} = 0b0000; let Inst{31-27} = 0b01001; let accessSize = ByteAccess; @@ -19994,15 +20072,15 @@ def S2_storerbnew_io : HInst< (outs), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8), "memb($Rs32+#$Ii) = $Nt8.new", -ST_tc_st_SLOT0, TypeST>, Enc_10002182, AddrModeRel { +tc_6c576d46, TypeST>, Enc_4df4e9, AddrModeRel { let Inst{12-11} = 0b00; let Inst{24-21} = 0b1101; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; let BaseOpcode = "S2_storerb_io"; @@ -20018,14 +20096,14 @@ def S2_storerbnew_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++$Mu2:brev) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel { +tc_c8f9a6f6, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101111101; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "S2_storerb_pbr"; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20034,7 +20112,7 @@ def S2_storerbnew_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_5326450 { +tc_9c68db63, TypeST>, Enc_96ce4f { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{12-11} = 0b00; @@ -20042,8 +20120,8 @@ let Inst{31-21} = 0b10101001101; let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [CS]; let opNewValue = 4; let Constraints = "$Rx32 = $Rx32in"; @@ -20052,15 +20130,15 @@ def S2_storerbnew_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++I:circ($Mu2)) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 { +tc_c8f9a6f6, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101001101; let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [CS]; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20069,7 +20147,7 @@ def S2_storerbnew_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8), "memb($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_5900401, AddrModeRel { +tc_c8f9a6f6, TypeST>, Enc_c7cd90, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-11} = 0b000; @@ -20077,8 +20155,8 @@ let Inst{31-21} = 0b10101011101; let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "S2_storerb_pi"; let isPredicable = 1; let isNVStorable = 1; @@ -20089,15 +20167,15 @@ def S2_storerbnew_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memb($Rx32++$Mu2) = $Nt8.new", -ST_tc_st_SLOT0, TypeST>, Enc_10067774 { +tc_c8f9a6f6, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101101101; let addrMode = PostInc; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; } @@ -20105,7 +20183,7 @@ def S2_storerbnew_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Nt8), "memb($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_6c576d46, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 1; @@ -20114,14 +20192,14 @@ def S2_storerbnewgp : HInst< (outs), (ins u32_0Imm:$Ii, IntRegs:$Nt8), "memb(gp+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_4050532, AddrModeRel { +tc_9e86015f, TypeV2LDST>, Enc_ad1831, AddrModeRel { let Inst{12-11} = 0b00; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [GP]; let BaseOpcode = "S2_storerbabs"; let isPredicable = 1; @@ -20135,7 +20213,7 @@ def S2_storerd_io : HInst< (outs), (ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rs32+#$Ii) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_16319737, AddrModeRel { +tc_53ee6546, TypeST>, Enc_ce6828, AddrModeRel { let Inst{24-21} = 0b1110; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20155,7 +20233,7 @@ def S2_storerd_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++$Mu2:brev) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_15816255 { +tc_20a8e109, TypeST>, Enc_928ca1 { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111110; let accessSize = DoubleWordAccess; @@ -20166,7 +20244,7 @@ def S2_storerd_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_4501395 { +tc_251c87b2, TypeST>, Enc_395cc4 { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001110; @@ -20180,7 +20258,7 @@ def S2_storerd_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++I:circ($Mu2)) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_15816255 { +tc_20a8e109, TypeST>, Enc_928ca1 { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001110; let addrMode = PostInc; @@ -20193,7 +20271,7 @@ def S2_storerd_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rx32++#$Ii) = $Rtt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11271630, AddrModeRel { +tc_20a8e109, TypeST>, Enc_85bf58, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20210,7 +20288,7 @@ def S2_storerd_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32), "memd($Rx32++$Mu2) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_15816255 { +tc_20a8e109, TypeST>, Enc_928ca1 { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101110; let addrMode = PostInc; @@ -20222,7 +20300,7 @@ def S2_storerd_zomap : HInst< (outs), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "memd($Rs32) = $Rtt32", -PSEUDO, TypeMAPPING> { +tc_53ee6546, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20230,7 +20308,7 @@ def S2_storerdgp : HInst< (outs), (ins u29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd(gp+#$Ii) = $Rtt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11682941, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_5c124a, AddrModeRel { let Inst{24-21} = 0b0110; let Inst{31-27} = 0b01001; let accessSize = DoubleWordAccess; @@ -20247,7 +20325,7 @@ def S2_storerf_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_7736768, AddrModeRel { +tc_53ee6546, TypeST>, Enc_e957fb, AddrModeRel { let Inst{24-21} = 0b1011; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20267,7 +20345,7 @@ def S2_storerf_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2:brev) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111011; let accessSize = HalfWordAccess; @@ -20278,7 +20356,7 @@ def S2_storerf_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_10915758 { +tc_251c87b2, TypeST>, Enc_935d9b { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001011; @@ -20292,7 +20370,7 @@ def S2_storerf_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++I:circ($Mu2)) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001011; let addrMode = PostInc; @@ -20305,7 +20383,7 @@ def S2_storerf_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "memh($Rx32++#$Ii) = $Rt32.h", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11492529, AddrModeRel { +tc_20a8e109, TypeST>, Enc_052c7d, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20322,7 +20400,7 @@ def S2_storerf_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101011; let addrMode = PostInc; @@ -20334,7 +20412,7 @@ def S2_storerf_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) = $Rt32.h", -PSEUDO, TypeMAPPING> { +tc_53ee6546, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20342,7 +20420,7 @@ def S2_storerfgp : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(gp+#$Ii) = $Rt32.h", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0011; let Inst{31-27} = 0b01001; let accessSize = HalfWordAccess; @@ -20359,7 +20437,7 @@ def S2_storerh_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7736768, AddrModeRel { +tc_53ee6546, TypeST>, Enc_e957fb, AddrModeRel { let Inst{24-21} = 0b1010; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20380,7 +20458,7 @@ def S2_storerh_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2:brev) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel { +tc_20a8e109, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111010; let accessSize = HalfWordAccess; @@ -20393,7 +20471,7 @@ def S2_storerh_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++#$Ii:circ($Mu2)) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_10915758 { +tc_251c87b2, TypeST>, Enc_935d9b { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001010; @@ -20408,7 +20486,7 @@ def S2_storerh_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++I:circ($Mu2)) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001010; let addrMode = PostInc; @@ -20422,7 +20500,7 @@ def S2_storerh_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32), "memh($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_11492529, AddrModeRel { +tc_20a8e109, TypeST>, Enc_052c7d, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20439,7 +20517,7 @@ def S2_storerh_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memh($Rx32++$Mu2) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101010; let addrMode = PostInc; @@ -20452,7 +20530,7 @@ def S2_storerh_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memh($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_53ee6546, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20460,7 +20538,7 @@ def S2_storerhgp : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Rt32), "memh(gp+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_fda92c, AddrModeRel { let Inst{24-21} = 0b0010; let Inst{31-27} = 0b01001; let accessSize = HalfWordAccess; @@ -20478,15 +20556,15 @@ def S2_storerhnew_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8), "memh($Rs32+#$Ii) = $Nt8.new", -ST_tc_st_SLOT0, TypeST>, Enc_748676, AddrModeRel { +tc_6c576d46, TypeST>, Enc_0d8870, AddrModeRel { let Inst{12-11} = 0b01; let Inst{24-21} = 0b1101; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; let BaseOpcode = "S2_storerh_io"; @@ -20502,14 +20580,14 @@ def S2_storerhnew_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++$Mu2:brev) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel { +tc_c8f9a6f6, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101111101; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "S2_storerh_pbr"; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20518,7 +20596,7 @@ def S2_storerhnew_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10326434 { +tc_9c68db63, TypeST>, Enc_91b9fe { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{12-11} = 0b01; @@ -20526,8 +20604,8 @@ let Inst{31-21} = 0b10101001101; let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [CS]; let opNewValue = 4; let Constraints = "$Rx32 = $Rx32in"; @@ -20536,15 +20614,15 @@ def S2_storerhnew_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++I:circ($Mu2)) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 { +tc_c8f9a6f6, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000010; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101001101; let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [CS]; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20553,7 +20631,7 @@ def S2_storerhnew_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8), "memh($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_6900405, AddrModeRel { +tc_c8f9a6f6, TypeST>, Enc_e26546, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-11} = 0b001; @@ -20561,8 +20639,8 @@ let Inst{31-21} = 0b10101011101; let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "S2_storerh_pi"; let isNVStorable = 1; let isPredicable = 1; @@ -20573,15 +20651,15 @@ def S2_storerhnew_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memh($Rx32++$Mu2) = $Nt8.new", -ST_tc_st_SLOT0, TypeST>, Enc_10067774 { +tc_c8f9a6f6, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101101101; let addrMode = PostInc; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; } @@ -20589,7 +20667,7 @@ def S2_storerhnew_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Nt8), "memh($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_6c576d46, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 1; @@ -20598,14 +20676,14 @@ def S2_storerhnewgp : HInst< (outs), (ins u31_1Imm:$Ii, IntRegs:$Nt8), "memh(gp+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_13618890, AddrModeRel { +tc_9e86015f, TypeV2LDST>, Enc_bc03e5, AddrModeRel { let Inst{12-11} = 0b01; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [GP]; let BaseOpcode = "S2_storerhabs"; let isPredicable = 1; @@ -20619,7 +20697,7 @@ def S2_storeri_io : HInst< (outs), (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_6673186, AddrModeRel { +tc_53ee6546, TypeST>, Enc_143445, AddrModeRel { let Inst{24-21} = 0b1100; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; @@ -20640,7 +20718,7 @@ def S2_storeri_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++$Mu2:brev) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel { +tc_20a8e109, TypeST>, Enc_d5c73f, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101111100; let accessSize = WordAccess; @@ -20653,7 +20731,7 @@ def S2_storeri_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++#$Ii:circ($Mu2)) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_9915754 { +tc_251c87b2, TypeST>, Enc_79b8c8 { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{31-21} = 0b10101001100; @@ -20668,7 +20746,7 @@ def S2_storeri_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++I:circ($Mu2)) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000010; let Inst{31-21} = 0b10101001100; let addrMode = PostInc; @@ -20682,7 +20760,7 @@ def S2_storeri_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32), "memw($Rx32++#$Ii) = $Rt32", -ST_tc_st_pi_SLOT01, TypeST>, Enc_10492541, AddrModeRel { +tc_20a8e109, TypeST>, Enc_db40cd, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; @@ -20699,7 +20777,7 @@ def S2_storeri_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32), "memw($Rx32++$Mu2) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_7255914 { +tc_20a8e109, TypeST>, Enc_d5c73f { let Inst{7-0} = 0b00000000; let Inst{31-21} = 0b10101101100; let addrMode = PostInc; @@ -20712,7 +20790,7 @@ def S2_storeri_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_53ee6546, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -20720,7 +20798,7 @@ def S2_storerigp : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Rt32), "memw(gp+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_15999208, AddrModeRel { +tc_c14739d5, TypeV2LDST>, Enc_541f26, AddrModeRel { let Inst{24-21} = 0b0100; let Inst{31-27} = 0b01001; let accessSize = WordAccess; @@ -20738,15 +20816,15 @@ def S2_storerinew_io : HInst< (outs), (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8), "memw($Rs32+#$Ii) = $Nt8.new", -ST_tc_st_SLOT0, TypeST>, Enc_8409782, AddrModeRel { +tc_6c576d46, TypeST>, Enc_690862, AddrModeRel { let Inst{12-11} = 0b10; let Inst{24-21} = 0b1101; let Inst{31-27} = 0b10100; let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; let BaseOpcode = "S2_storeri_io"; @@ -20762,14 +20840,14 @@ def S2_storerinew_pbr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++$Mu2:brev) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel { +tc_c8f9a6f6, TypeST>, Enc_8dbe85, AddrModeRel { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101111101; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "S2_storeri_pbr"; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20778,7 +20856,7 @@ def S2_storerinew_pci : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_11326438 { +tc_9c68db63, TypeST>, Enc_3f97c8 { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{12-11} = 0b10; @@ -20786,8 +20864,8 @@ let Inst{31-21} = 0b10101001101; let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [CS]; let opNewValue = 4; let Constraints = "$Rx32 = $Rx32in"; @@ -20796,15 +20874,15 @@ def S2_storerinew_pcr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++I:circ($Mu2)) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 { +tc_c8f9a6f6, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000010; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101001101; let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [CS]; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; @@ -20813,7 +20891,7 @@ def S2_storerinew_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8), "memw($Rx32++#$Ii) = $Nt8.new", -ST_tc_st_pi_SLOT0, TypeST>, Enc_7900405, AddrModeRel { +tc_c8f9a6f6, TypeST>, Enc_223005, AddrModeRel { let Inst{2-0} = 0b000; let Inst{7-7} = 0b0; let Inst{13-11} = 0b010; @@ -20821,8 +20899,8 @@ let Inst{31-21} = 0b10101011101; let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "S2_storeri_pi"; let isPredicable = 1; let opNewValue = 3; @@ -20832,15 +20910,15 @@ def S2_storerinew_pr : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8), "memw($Rx32++$Mu2) = $Nt8.new", -ST_tc_st_SLOT0, TypeST>, Enc_10067774 { +tc_c8f9a6f6, TypeST>, Enc_8dbe85 { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101101101; let addrMode = PostInc; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let opNewValue = 3; let Constraints = "$Rx32 = $Rx32in"; } @@ -20848,7 +20926,7 @@ def S2_storerinew_zomap : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Nt8), "memw($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_6c576d46, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 1; @@ -20857,14 +20935,14 @@ def S2_storerinewgp : HInst< (outs), (ins u30_2Imm:$Ii, IntRegs:$Nt8), "memw(gp+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_12297800, AddrModeRel { +tc_9e86015f, TypeV2LDST>, Enc_78cbf0, AddrModeRel { let Inst{12-11} = 0b10; let Inst{24-21} = 0b0101; let Inst{31-27} = 0b01001; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let Uses = [GP]; let BaseOpcode = "S2_storeriabs"; let isPredicable = 1; @@ -20878,20 +20956,20 @@ def S2_storew_locked : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "memw_locked($Rs32,$Pd4) = $Rt32", -ST_tc_ld_SLOT0, TypeST>, Enc_10157519 { +tc_7d01cbdc, TypeST>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100000101; let accessSize = WordAccess; +let isPredicateLate = 1; let isSoloAX = 1; let mayStore = 1; -let isPredicateLate = 1; } def S2_svsathb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = vsathb($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -20902,7 +20980,7 @@ def S2_svsathub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = vsathub($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001100100; let hasNewValue = 1; @@ -20913,7 +20991,7 @@ def S2_tableidxb : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 { +tc_d95f4e98, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011100; let hasNewValue = 1; let opNewValue = 0; @@ -20924,7 +21002,7 @@ def S2_tableidxb_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxb($Rs32,#$Ii,#$II)", -S_2op_tc_1_SLOT23, TypeS_2op> { +tc_d95f4e98, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -20935,7 +21013,7 @@ def S2_tableidxd : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 { +tc_d95f4e98, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011111; let hasNewValue = 1; let opNewValue = 0; @@ -20946,7 +21024,7 @@ def S2_tableidxd_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxd($Rs32,#$Ii,#$II)", -S_2op_tc_1_SLOT23, TypeS_2op> { +tc_d95f4e98, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -20956,7 +21034,7 @@ def S2_tableidxh : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 { +tc_d95f4e98, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011101; let hasNewValue = 1; let opNewValue = 0; @@ -20967,7 +21045,7 @@ def S2_tableidxh_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxh($Rs32,#$Ii,#$II)", -S_2op_tc_1_SLOT23, TypeS_2op> { +tc_d95f4e98, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -20977,7 +21055,7 @@ def S2_tableidxw : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II), "$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 { +tc_d95f4e98, TypeS_2op>, Enc_cd82bc { let Inst{31-22} = 0b1000011110; let hasNewValue = 1; let opNewValue = 0; @@ -20988,7 +21066,7 @@ def S2_tableidxw_goodsyntax : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II), "$Rx32 = tableidxw($Rs32,#$Ii,#$II)", -S_2op_tc_1_SLOT23, TypeS_2op> { +tc_d95f4e98, TypeS_2op> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -20998,7 +21076,7 @@ def S2_togglebit_i : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = togglebit($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 { +tc_9c18c9a5, TypeS_2op>, Enc_a05677 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100110; @@ -21009,7 +21087,7 @@ def S2_togglebit_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = togglebit($Rs32,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 { +tc_9c18c9a5, TypeS_3op>, Enc_5ab2be { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -21020,7 +21098,7 @@ def S2_tstbit_i : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Pd4 = tstbit($Rs32,#$Ii)", -S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742 { +tc_5fa2857c, TypeS_2op>, Enc_83ee64 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000101000; @@ -21029,7 +21107,7 @@ def S2_tstbit_r : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = tstbit($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 { +tc_c58f771a, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111000; @@ -21038,7 +21116,7 @@ def S2_valignib : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii), "$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11971407 { +tc_d1b5a4b6, TypeS_3op>, Enc_729ff7 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000000000; } @@ -21046,7 +21124,7 @@ def S2_valignrb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4), "$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11552785 { +tc_d1b5a4b6, TypeS_3op>, Enc_8c6530 { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010000; @@ -21055,7 +21133,7 @@ def S2_vcnegh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vcnegh($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8940892 { +tc_47ab9233, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011110; @@ -21066,7 +21144,7 @@ def S2_vcrotate : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rdd32 = vcrotate($Rss32,$Rt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8940892 { +tc_63cd9d2d, TypeS_3op>, Enc_927852 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000011110; @@ -21077,7 +21155,7 @@ def S2_vrcnegh : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rxx32 += vrcnegh($Rss32,$Rt32)", -S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_7912540 { +tc_8cb685d9, TypeS_3op>, Enc_1aa186 { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11001011001; @@ -21088,28 +21166,30 @@ def S2_vrndpackwh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vrndwh($Rss32)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184 { +tc_88fa2da6, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S2_vrndpackwhs : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vrndwh($Rss32):sat", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184 { +tc_94e6ffd9, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S2_vsathb : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsathb($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21120,7 +21200,7 @@ def S2_vsathb_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsathb($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21129,7 +21209,7 @@ def S2_vsathub : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsathub($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21140,7 +21220,7 @@ def S2_vsathub_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsathub($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21149,7 +21229,7 @@ def S2_vsatwh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsatwh($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21160,7 +21240,7 @@ def S2_vsatwh_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsatwh($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21169,7 +21249,7 @@ def S2_vsatwuh : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vsatwuh($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -21180,7 +21260,7 @@ def S2_vsatwuh_nopack : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = vsatwuh($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 { +tc_b86c7e8b, TypeS_2op>, Enc_b9c5fb { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000000000; let Defs = [USR_OVF]; @@ -21189,7 +21269,7 @@ def S2_vsplatrb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = vsplatb($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 { +tc_b86c7e8b, TypeS_2op>, Enc_5e2823 { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10001100010; let hasNewValue = 1; @@ -21201,7 +21281,7 @@ def S2_vsplatrh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsplath($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 { +tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000100010; let isReMaterializable = 1; @@ -21211,7 +21291,7 @@ def S2_vspliceib : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii), "$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_16730127 { +tc_d1b5a4b6, TypeS_3op>, Enc_d50cd3 { let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000000100; } @@ -21219,7 +21299,7 @@ def S2_vsplicerb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4), "$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_5178985 { +tc_d1b5a4b6, TypeS_3op>, Enc_dbd70c { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000010100; @@ -21228,7 +21308,7 @@ def S2_vsxtbh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsxtbh($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 { +tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21238,7 +21318,7 @@ def S2_vsxthw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsxthw($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 { +tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21248,7 +21328,7 @@ def S2_vtrunehb : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vtrunehb($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -21258,7 +21338,7 @@ def S2_vtrunewh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunewh($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 { +tc_9c18c9a5, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -21267,7 +21347,7 @@ def S2_vtrunohb : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = vtrunohb($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_b86c7e8b, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -21277,7 +21357,7 @@ def S2_vtrunowh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunowh($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 { +tc_9c18c9a5, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -21286,7 +21366,7 @@ def S2_vzxtbh : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vzxtbh($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 { +tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21296,7 +21376,7 @@ def S2_vzxthw : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vzxthw($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 { +tc_b86c7e8b, TypeS_2op>, Enc_3a3d62 { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000100000; let isReMaterializable = 1; @@ -21306,7 +21386,7 @@ def S4_addaddi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii), "$Rd32 = add($Rs32,add($Ru32,#$Ii))", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_6495334 { +tc_090485bb, TypeALU64>, Enc_8b8d61 { let Inst{31-23} = 0b110110110; let hasNewValue = 1; let opNewValue = 0; @@ -21321,7 +21401,7 @@ def S4_addi_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = add(#$Ii,asl($Rx32in,#$II))", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 { +tc_c0cd91a8, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b100; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -21339,7 +21419,7 @@ def S4_addi_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = add(#$Ii,lsr($Rx32in,#$II))", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 { +tc_c0cd91a8, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b100; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -21357,7 +21437,7 @@ def S4_andi_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = and(#$Ii,asl($Rx32in,#$II))", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 { +tc_3c10f809, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b000; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -21375,7 +21455,7 @@ def S4_andi_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = and(#$Ii,lsr($Rx32in,#$II))", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 { +tc_3c10f809, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b000; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -21393,7 +21473,7 @@ def S4_clbaddi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s6_0Imm:$Ii), "$Rd32 = add(clb($Rs32),#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_5523416 { +tc_87601822, TypeS_2op>, Enc_9fae8a { let Inst{7-5} = 0b000; let Inst{31-21} = 0b10001100001; let hasNewValue = 1; @@ -21404,7 +21484,7 @@ def S4_clbpaddi : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, s6_0Imm:$Ii), "$Rd32 = add(clb($Rss32),#$Ii)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_10188026 { +tc_87601822, TypeS_2op>, Enc_a1640c { let Inst{7-5} = 0b010; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -21415,17 +21495,18 @@ def S4_clbpnorm : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = normamt($Rss32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 { +tc_ab1b5e74, TypeS_2op>, Enc_90cd8b { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; } def S4_extract : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II), "$Rd32 = extract($Rs32,#$Ii,#$II)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_11930928 { +tc_c0cd91a8, TypeS_2op>, Enc_b388cf { let Inst{13-13} = 0b0; let Inst{31-23} = 0b100011011; let hasNewValue = 1; @@ -21436,7 +21517,7 @@ def S4_extract_rp : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "$Rd32 = extract($Rs32,$Rtt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_15472748 { +tc_87601822, TypeS_3op>, Enc_e07374 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11001001000; @@ -21448,7 +21529,7 @@ def S4_extractp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II), "$Rdd32 = extract($Rss32,#$Ii,#$II)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_9894557 { +tc_c0cd91a8, TypeS_2op>, Enc_b84c4c { let Inst{31-24} = 0b10001010; let prefersSlot3 = 1; } @@ -21456,7 +21537,7 @@ def S4_extractp_rp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = extract($Rss32,$Rtt32)", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_87601822, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -21466,7 +21547,7 @@ def S4_lsli : HInst< (outs IntRegs:$Rd32), (ins s6_0Imm:$Ii, IntRegs:$Rt32), "$Rd32 = lsl(#$Ii,$Rt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_518319 { +tc_9c18c9a5, TypeS_3op>, Enc_fef969 { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000110100; @@ -21477,7 +21558,7 @@ def S4_ntstbit_i : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Pd4 = !tstbit($Rs32,#$Ii)", -S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742 { +tc_5fa2857c, TypeS_2op>, Enc_83ee64 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000101001; @@ -21486,7 +21567,7 @@ def S4_ntstbit_r : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = !tstbit($Rs32,$Rt32)", -S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 { +tc_c58f771a, TypeS_3op>, Enc_c2b48e { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111001; @@ -21495,7 +21576,7 @@ def S4_or_andi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 |= and($Rs32,#$Ii)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_6356866 { +tc_3c10f809, TypeALU64>, Enc_b0e9d8 { let Inst{31-22} = 0b1101101000; let hasNewValue = 1; let opNewValue = 0; @@ -21512,7 +21593,7 @@ def S4_or_andix : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii), "$Rx32 = or($Ru32,and($Rx32in,#$Ii))", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_7504828 { +tc_3c10f809, TypeALU64>, Enc_b4e6cf { let Inst{31-22} = 0b1101101001; let hasNewValue = 1; let opNewValue = 0; @@ -21528,7 +21609,7 @@ def S4_or_ori : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii), "$Rx32 |= or($Rs32,#$Ii)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_6356866 { +tc_3c10f809, TypeALU64>, Enc_b0e9d8 { let Inst{31-22} = 0b1101101010; let hasNewValue = 1; let opNewValue = 0; @@ -21545,7 +21626,7 @@ def S4_ori_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = or(#$Ii,asl($Rx32in,#$II))", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 { +tc_3c10f809, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b010; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -21563,7 +21644,7 @@ def S4_ori_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = or(#$Ii,lsr($Rx32in,#$II))", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 { +tc_3c10f809, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b010; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -21581,7 +21662,7 @@ def S4_parity : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = parity($Rs32,$Rt32)", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 { +tc_87601822, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101111; @@ -21593,7 +21674,7 @@ def S4_pstorerbf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -21618,7 +21699,7 @@ def S4_pstorerbf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -21634,7 +21715,7 @@ def S4_pstorerbfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -21643,8 +21724,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; @@ -21660,7 +21741,7 @@ def S4_pstorerbfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110000; let isPredicated = 1; @@ -21683,7 +21764,7 @@ def S4_pstorerbfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111000; let isPredicated = 1; let isPredicatedFalse = 1; @@ -21700,7 +21781,7 @@ def S4_pstorerbfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memb($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -21708,7 +21789,7 @@ def S4_pstorerbnewf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b000; @@ -21718,9 +21799,9 @@ let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = ByteAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; let DecoderNamespace = "MustExtend"; @@ -21735,7 +21816,7 @@ def S4_pstorerbnewf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110101101; let isPredicated = 1; @@ -21743,8 +21824,8 @@ let isPredicatedFalse = 1; let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; let BaseOpcode = "S4_storerb_rr"; @@ -21754,7 +21835,7 @@ def S4_pstorerbnewfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -21764,10 +21845,10 @@ let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = ByteAccess; let isNVStore = 1; -let isExtended = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let isExtended = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; let DecoderNamespace = "MustExtend"; @@ -21782,7 +21863,7 @@ def S4_pstorerbnewfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel { +tc_c8f9a6f6, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000110101; @@ -21792,8 +21873,8 @@ let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; let BaseOpcode = "S2_storerb_io"; @@ -21808,7 +21889,7 @@ def S4_pstorerbnewfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110111101; let isPredicated = 1; @@ -21817,8 +21898,8 @@ let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; let BaseOpcode = "S4_storerb_rr"; @@ -21828,7 +21909,7 @@ def S4_pstorerbnewfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4.new) memb($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_c8f9a6f6, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -21837,7 +21918,7 @@ def S4_pstorerbnewt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b000; @@ -21846,9 +21927,9 @@ let isPredicated = 1; let addrMode = Absolute; let accessSize = ByteAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; let DecoderNamespace = "MustExtend"; @@ -21863,15 +21944,15 @@ def S4_pstorerbnewt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110100101; let isPredicated = 1; let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; let BaseOpcode = "S4_storerb_rr"; @@ -21881,7 +21962,7 @@ def S4_pstorerbnewtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b100; @@ -21890,10 +21971,10 @@ let isPredicated = 1; let addrMode = Absolute; let accessSize = ByteAccess; let isNVStore = 1; -let isExtended = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let isExtended = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; let DecoderNamespace = "MustExtend"; @@ -21908,7 +21989,7 @@ def S4_pstorerbnewtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel { +tc_c8f9a6f6, TypeV2LDST>, Enc_585242, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b00; let Inst{31-21} = 0b01000010101; @@ -21917,8 +21998,8 @@ let addrMode = BaseImmOffset; let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "imm"; let BaseOpcode = "S2_storerb_io"; @@ -21933,7 +22014,7 @@ def S4_pstorerbnewtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b00; let Inst{31-21} = 0b00110110101; let isPredicated = 1; @@ -21941,8 +22022,8 @@ let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; let BaseOpcode = "S4_storerb_rr"; @@ -21952,7 +22033,7 @@ def S4_pstorerbnewtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4.new) memb($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_c8f9a6f6, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -21961,7 +22042,7 @@ def S4_pstorerbt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -21985,7 +22066,7 @@ def S4_pstorerbt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100000; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22000,7 +22081,7 @@ def S4_pstorerbtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22008,8 +22089,8 @@ let Inst{31-18} = 0b10101111000000; let isPredicated = 1; let addrMode = Absolute; let accessSize = ByteAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S2_storerbabs"; @@ -22025,7 +22106,7 @@ def S4_pstorerbtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_da8d43, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010000; let isPredicated = 1; @@ -22047,7 +22128,7 @@ def S4_pstorerbtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110000; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22063,7 +22144,7 @@ def S4_pstorerbtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memb($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22071,7 +22152,7 @@ def S4_pstorerdf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd(#$Ii) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel { +tc_c85212ca, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22095,7 +22176,7 @@ def S4_pstorerdf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110101110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22110,7 +22191,7 @@ def S4_pstorerdfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd(#$Ii) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel { +tc_336e698c, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22119,8 +22200,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = DoubleWordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerd"; let BaseOpcode = "S2_storerdabs"; @@ -22135,7 +22216,7 @@ def S4_pstorerdfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110110; let isPredicated = 1; @@ -22157,7 +22238,7 @@ def S4_pstorerdfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110111110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22173,7 +22254,7 @@ def S4_pstorerdfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if (!$Pv4.new) memd($Rs32) = $Rtt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22181,7 +22262,7 @@ def S4_pstorerdt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd(#$Ii) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel { +tc_c85212ca, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22204,7 +22285,7 @@ def S4_pstorerdt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110100110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22218,7 +22299,7 @@ def S4_pstorerdtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd(#$Ii) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel { +tc_336e698c, TypeST>, Enc_50b5ac, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22226,8 +22307,8 @@ let Inst{31-18} = 0b10101111110000; let isPredicated = 1; let addrMode = Absolute; let accessSize = DoubleWordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerd"; let BaseOpcode = "S2_storerdabs"; @@ -22242,7 +22323,7 @@ def S4_pstorerdtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_57a33e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010110; let isPredicated = 1; @@ -22263,7 +22344,7 @@ def S4_pstorerdtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_1a9974, AddrModeRel { let Inst{31-21} = 0b00110110110; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22278,7 +22359,7 @@ def S4_pstorerdtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32), "if ($Pv4.new) memd($Rs32) = $Rtt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22286,7 +22367,7 @@ def S4_pstorerff_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh(#$Ii) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22310,7 +22391,7 @@ def S4_pstorerff_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22325,7 +22406,7 @@ def S4_pstorerffnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh(#$Ii) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22334,8 +22415,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerf"; let BaseOpcode = "S2_storerfabs"; @@ -22350,7 +22431,7 @@ def S4_pstorerffnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110011; let isPredicated = 1; @@ -22372,7 +22453,7 @@ def S4_pstorerffnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111011; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22388,7 +22469,7 @@ def S4_pstorerffnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32) = $Rt32.h", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22396,7 +22477,7 @@ def S4_pstorerft_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh(#$Ii) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22419,7 +22500,7 @@ def S4_pstorerft_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100011; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22433,7 +22514,7 @@ def S4_pstorerftnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh(#$Ii) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22441,8 +22522,8 @@ let Inst{31-18} = 0b10101111011000; let isPredicated = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerf"; let BaseOpcode = "S2_storerfabs"; @@ -22457,7 +22538,7 @@ def S4_pstorerftnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010011; let isPredicated = 1; @@ -22478,7 +22559,7 @@ def S4_pstorerftnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110011; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22493,7 +22574,7 @@ def S4_pstorerftnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32) = $Rt32.h", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22501,7 +22582,7 @@ def S4_pstorerhf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22526,7 +22607,7 @@ def S4_pstorerhf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22542,7 +22623,7 @@ def S4_pstorerhfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22551,8 +22632,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; @@ -22568,7 +22649,7 @@ def S4_pstorerhfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110010; let isPredicated = 1; @@ -22591,7 +22672,7 @@ def S4_pstorerhfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111010; let isPredicated = 1; let isPredicatedFalse = 1; @@ -22608,7 +22689,7 @@ def S4_pstorerhfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memh($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22616,7 +22697,7 @@ def S4_pstorerhnewf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b001; @@ -22626,9 +22707,9 @@ let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; let DecoderNamespace = "MustExtend"; @@ -22643,7 +22724,7 @@ def S4_pstorerhnewf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110101101; let isPredicated = 1; @@ -22651,8 +22732,8 @@ let isPredicatedFalse = 1; let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; let BaseOpcode = "S2_storerh_rr"; @@ -22662,7 +22743,7 @@ def S4_pstorerhnewfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -22672,10 +22753,10 @@ let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; let isNVStore = 1; -let isExtended = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let isExtended = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; let DecoderNamespace = "MustExtend"; @@ -22690,7 +22771,7 @@ def S4_pstorerhnewfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel { +tc_c8f9a6f6, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000110101; @@ -22700,8 +22781,8 @@ let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; let BaseOpcode = "S2_storerh_io"; @@ -22716,7 +22797,7 @@ def S4_pstorerhnewfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110111101; let isPredicated = 1; @@ -22725,8 +22806,8 @@ let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; let BaseOpcode = "S2_storerh_rr"; @@ -22736,7 +22817,7 @@ def S4_pstorerhnewfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4.new) memh($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_c8f9a6f6, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -22745,7 +22826,7 @@ def S4_pstorerhnewt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b001; @@ -22754,9 +22835,9 @@ let isPredicated = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; let DecoderNamespace = "MustExtend"; @@ -22771,15 +22852,15 @@ def S4_pstorerhnewt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110100101; let isPredicated = 1; let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; let BaseOpcode = "S2_storerh_rr"; @@ -22789,7 +22870,7 @@ def S4_pstorerhnewtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b101; @@ -22798,10 +22879,10 @@ let isPredicated = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; let isNVStore = 1; -let isExtended = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let isExtended = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; let DecoderNamespace = "MustExtend"; @@ -22816,7 +22897,7 @@ def S4_pstorerhnewtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel { +tc_c8f9a6f6, TypeV2LDST>, Enc_f44229, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b01; let Inst{31-21} = 0b01000010101; @@ -22825,8 +22906,8 @@ let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "imm"; let BaseOpcode = "S2_storerh_io"; @@ -22841,7 +22922,7 @@ def S4_pstorerhnewtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b01; let Inst{31-21} = 0b00110110101; let isPredicated = 1; @@ -22849,8 +22930,8 @@ let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; let BaseOpcode = "S2_storerh_rr"; @@ -22860,7 +22941,7 @@ def S4_pstorerhnewtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4.new) memh($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_c8f9a6f6, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -22869,7 +22950,7 @@ def S4_pstorerht_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -22893,7 +22974,7 @@ def S4_pstorerht_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100010; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22908,7 +22989,7 @@ def S4_pstorerhtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -22916,8 +22997,8 @@ let Inst{31-18} = 0b10101111010000; let isPredicated = 1; let addrMode = Absolute; let accessSize = HalfWordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerhabs"; @@ -22933,7 +23014,7 @@ def S4_pstorerhtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_e8c45e, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010010; let isPredicated = 1; @@ -22955,7 +23036,7 @@ def S4_pstorerhtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110010; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -22971,7 +23052,7 @@ def S4_pstorerhtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memh($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -22979,7 +23060,7 @@ def S4_pstorerif_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -23004,7 +23085,7 @@ def S4_pstorerif_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110101100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23020,7 +23101,7 @@ def S4_pstorerifnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -23029,8 +23110,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = WordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; @@ -23046,7 +23127,7 @@ def S4_pstorerifnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000110100; let isPredicated = 1; @@ -23069,7 +23150,7 @@ def S4_pstorerifnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110111100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23086,7 +23167,7 @@ def S4_pstorerifnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if (!$Pv4.new) memw($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23094,7 +23175,7 @@ def S4_pstorerinewf_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b010; @@ -23104,9 +23185,9 @@ let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = WordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; let DecoderNamespace = "MustExtend"; @@ -23121,7 +23202,7 @@ def S4_pstorerinewf_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110101101; let isPredicated = 1; @@ -23129,8 +23210,8 @@ let isPredicatedFalse = 1; let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; let BaseOpcode = "S2_storeri_rr"; @@ -23140,7 +23221,7 @@ def S4_pstorerinewfnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b1; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -23150,10 +23231,10 @@ let isPredicatedFalse = 1; let addrMode = Absolute; let accessSize = WordAccess; let isNVStore = 1; -let isExtended = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let isExtended = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; let DecoderNamespace = "MustExtend"; @@ -23168,7 +23249,7 @@ def S4_pstorerinewfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel { +tc_c8f9a6f6, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000110101; @@ -23178,8 +23259,8 @@ let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; let BaseOpcode = "S2_storeri_io"; @@ -23194,7 +23275,7 @@ def S4_pstorerinewfnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110111101; let isPredicated = 1; @@ -23203,8 +23284,8 @@ let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; let BaseOpcode = "S2_storeri_rr"; @@ -23214,7 +23295,7 @@ def S4_pstorerinewfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if (!$Pv4.new) memw($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_c8f9a6f6, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -23223,7 +23304,7 @@ def S4_pstorerinewt_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_2c8fe5ae, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b010; @@ -23232,9 +23313,9 @@ let isPredicated = 1; let addrMode = Absolute; let accessSize = WordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; let DecoderNamespace = "MustExtend"; @@ -23249,15 +23330,15 @@ def S4_pstorerinewt_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_77781686, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110100101; let isPredicated = 1; let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; let BaseOpcode = "S2_storeri_rr"; @@ -23267,7 +23348,7 @@ def S4_pstorerinewtnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw(#$Ii) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel { +tc_7986ba30, TypeST>, Enc_44215c, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-11} = 0b110; @@ -23276,10 +23357,10 @@ let isPredicated = 1; let addrMode = Absolute; let accessSize = WordAccess; let isNVStore = 1; -let isExtended = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let isExtended = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; let DecoderNamespace = "MustExtend"; @@ -23294,7 +23375,7 @@ def S4_pstorerinewtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new", -V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel { +tc_c8f9a6f6, TypeV2LDST>, Enc_8dbdfe, AddrModeRel { let Inst{2-2} = 0b0; let Inst{12-11} = 0b10; let Inst{31-21} = 0b01000010101; @@ -23303,8 +23384,8 @@ let addrMode = BaseImmOffset; let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "imm"; let BaseOpcode = "S2_storeri_io"; @@ -23319,7 +23400,7 @@ def S4_pstorerinewtnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel { +tc_8def9c57, TypeST>, Enc_47ee5e, AddrModeRel { let Inst{4-3} = 0b10; let Inst{31-21} = 0b00110110101; let isPredicated = 1; @@ -23327,8 +23408,8 @@ let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; let isPredicatedNew = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; let BaseOpcode = "S2_storeri_rr"; @@ -23338,7 +23419,7 @@ def S4_pstorerinewtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8), "if ($Pv4.new) memw($Rs32) = $Nt8.new", -PSEUDO, TypeMAPPING> { +tc_c8f9a6f6, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; let opNewValue = 2; @@ -23347,7 +23428,7 @@ def S4_pstorerit_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_c85212ca, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; @@ -23371,7 +23452,7 @@ def S4_pstorerit_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7bc567a7, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110100100; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -23386,7 +23467,7 @@ def S4_pstoreritnew_abs : HInst< (outs), (ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw(#$Ii) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel { +tc_336e698c, TypeST>, Enc_1cf4ca, AddrModeRel { let Inst{2-2} = 0b0; let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; @@ -23394,8 +23475,8 @@ let Inst{31-18} = 0b10101111100000; let isPredicated = 1; let addrMode = Absolute; let accessSize = WordAccess; -let isExtended = 1; let isPredicatedNew = 1; +let isExtended = 1; let mayStore = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeriabs"; @@ -23411,7 +23492,7 @@ def S4_pstoreritnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32", -V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel { +tc_20a8e109, TypeV2LDST>, Enc_397f23, AddrModeRel { let Inst{2-2} = 0b0; let Inst{31-21} = 0b01000010100; let isPredicated = 1; @@ -23433,7 +23514,7 @@ def S4_pstoreritnew_rr : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel { +tc_7639d4b0, TypeST>, Enc_6339d5, AddrModeRel { let Inst{31-21} = 0b00110110100; let isPredicated = 1; let addrMode = BaseRegOffset; @@ -23449,7 +23530,7 @@ def S4_pstoreritnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32), "if ($Pv4.new) memw($Rs32) = $Rt32", -PSEUDO, TypeMAPPING> { +tc_20a8e109, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23457,20 +23538,20 @@ def S4_stored_locked : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "memd_locked($Rs32,$Pd4) = $Rtt32", -ST_tc_ld_SLOT0, TypeST>, Enc_2921694 { +tc_7d01cbdc, TypeST>, Enc_d7dc10 { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100000111; let accessSize = DoubleWordAccess; +let isPredicateLate = 1; let isSoloAX = 1; let mayStore = 1; -let isPredicateLate = 1; } def S4_storeirb_io : HInst< (outs), (ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "memb($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_11282123, PredNewRel { +tc_fcee8723, TypeST>, Enc_8203bb, PredNewRel { let Inst{31-21} = 0b00111100000; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -23489,7 +23570,7 @@ def S4_storeirb_zomap : HInst< (outs), (ins IntRegs:$Rs32, s8_0Imm:$II), "memb($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_fcee8723, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23497,7 +23578,7 @@ def S4_storeirbf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if (!$Pv4) memb($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel { +tc_1e69aa99, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111000100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23517,7 +23598,7 @@ def S4_storeirbf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4) memb($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_1e69aa99, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23525,7 +23606,7 @@ def S4_storeirbfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if (!$Pv4.new) memb($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel { +tc_8f0a6bad, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111001100; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23546,7 +23627,7 @@ def S4_storeirbfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4.new) memb($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_8f0a6bad, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23554,7 +23635,7 @@ def S4_storeirbt_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if ($Pv4) memb($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel { +tc_1e69aa99, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111000000; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23573,7 +23654,7 @@ def S4_storeirbt_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4) memb($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_1e69aa99, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23581,7 +23662,7 @@ def S4_storeirbtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "if ($Pv4.new) memb($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel { +tc_8f0a6bad, TypeST>, Enc_d7a65e, PredNewRel { let Inst{31-21} = 0b00111001000; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23601,7 +23682,7 @@ def S4_storeirbtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4.new) memb($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_8f0a6bad, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23609,7 +23690,7 @@ def S4_storeirh_io : HInst< (outs), (ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "memh($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_10282127, PredNewRel { +tc_fcee8723, TypeST>, Enc_a803e0, PredNewRel { let Inst{31-21} = 0b00111100001; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; @@ -23628,7 +23709,7 @@ def S4_storeirh_zomap : HInst< (outs), (ins IntRegs:$Rs32, s8_0Imm:$II), "memh($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_fcee8723, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23636,7 +23717,7 @@ def S4_storeirhf_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if (!$Pv4) memh($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel { +tc_1e69aa99, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111000101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23656,7 +23737,7 @@ def S4_storeirhf_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4) memh($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_1e69aa99, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23664,7 +23745,7 @@ def S4_storeirhfnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if (!$Pv4.new) memh($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel { +tc_8f0a6bad, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111001101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23685,7 +23766,7 @@ def S4_storeirhfnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4.new) memh($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_8f0a6bad, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23693,7 +23774,7 @@ def S4_storeirht_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if ($Pv4) memh($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel { +tc_1e69aa99, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111000001; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23712,7 +23793,7 @@ def S4_storeirht_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4) memh($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_1e69aa99, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23720,7 +23801,7 @@ def S4_storeirhtnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II), "if ($Pv4.new) memh($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel { +tc_8f0a6bad, TypeST>, Enc_f20719, PredNewRel { let Inst{31-21} = 0b00111001001; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23740,7 +23821,7 @@ def S4_storeirhtnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4.new) memh($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_8f0a6bad, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23748,7 +23829,7 @@ def S4_storeiri_io : HInst< (outs), (ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "memw($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_9282127, PredNewRel { +tc_fcee8723, TypeST>, Enc_f37377, PredNewRel { let Inst{31-21} = 0b00111100010; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -23767,7 +23848,7 @@ def S4_storeiri_zomap : HInst< (outs), (ins IntRegs:$Rs32, s8_0Imm:$II), "memw($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_fcee8723, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23775,7 +23856,7 @@ def S4_storeirif_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if (!$Pv4) memw($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel { +tc_1e69aa99, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111000110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23795,7 +23876,7 @@ def S4_storeirif_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4) memw($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_1e69aa99, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23803,7 +23884,7 @@ def S4_storeirifnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if (!$Pv4.new) memw($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel { +tc_8f0a6bad, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111001110; let isPredicated = 1; let isPredicatedFalse = 1; @@ -23824,7 +23905,7 @@ def S4_storeirifnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if (!$Pv4.new) memw($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_8f0a6bad, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23832,7 +23913,7 @@ def S4_storeirit_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if ($Pv4) memw($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel { +tc_1e69aa99, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111000010; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23851,7 +23932,7 @@ def S4_storeirit_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4) memw($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_1e69aa99, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23859,7 +23940,7 @@ def S4_storeiritnew_io : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "if ($Pv4.new) memw($Rs32+#$Ii) = #$II", -V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel { +tc_8f0a6bad, TypeST>, Enc_5ccba9, PredNewRel { let Inst{31-21} = 0b00111001010; let isPredicated = 1; let addrMode = BaseImmOffset; @@ -23879,7 +23960,7 @@ def S4_storeiritnew_zomap : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II), "if ($Pv4.new) memw($Rs32) = #$II", -PSEUDO, TypeMAPPING> { +tc_8f0a6bad, TypeMAPPING> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -23887,7 +23968,7 @@ def S4_storerb_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memb($Re32=#$II) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel { +tc_336e698c, TypeST>, Enc_8bcba4, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011000; @@ -23910,7 +23991,7 @@ def S4_storerb_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl { +tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011000; let addrMode = BaseRegOffset; @@ -23926,7 +24007,7 @@ def S4_storerb_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memb($Ru32<<#$Ii+#$II) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl { +tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101000; let addrMode = BaseLongOffset; @@ -23948,7 +24029,7 @@ def S4_storerbnew_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Nt8), "memb($Re32=#$II) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel { +tc_7986ba30, TypeST>, Enc_724154, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-11} = 0b000; let Inst{31-21} = 0b10101011101; @@ -23957,9 +24038,9 @@ let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = ByteAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let BaseOpcode = "S2_storerb_ap"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -23973,14 +24054,14 @@ def S4_storerbnew_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "memb($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel { +tc_be995eaf, TypeST>, Enc_c6220b, AddrModeRel { let Inst{6-3} = 0b0000; let Inst{31-21} = 0b00111011101; let addrMode = BaseRegOffset; let accessSize = ByteAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerb"; let InputType = "reg"; let BaseOpcode = "S4_storerb_rr"; @@ -23991,16 +24072,16 @@ def S4_storerbnew_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8), "memb($Ru32<<#$Ii+#$II) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel { +tc_210b2456, TypeST>, Enc_7eb485, AddrModeRel { let Inst{7-7} = 0b1; let Inst{12-11} = 0b00; let Inst{31-21} = 0b10101101101; let addrMode = BaseLongOffset; let accessSize = ByteAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerb"; let BaseOpcode = "S4_storerb_ur"; let DecoderNamespace = "MustExtend"; @@ -24015,7 +24096,7 @@ def S4_storerd_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, DoubleRegs:$Rtt32), "memd($Re32=#$II) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_8131399 { +tc_336e698c, TypeST>, Enc_c7a204 { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011110; @@ -24037,7 +24118,7 @@ def S4_storerd_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rs32+$Ru32<<#$Ii) = $Rtt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_9772987, AddrModeRel, ImmRegShl { +tc_45631a8d, TypeST>, Enc_55355c, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011110; let addrMode = BaseRegOffset; @@ -24052,7 +24133,7 @@ def S4_storerd_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32), "memd($Ru32<<#$Ii+#$II) = $Rtt32", -ST_tc_st_SLOT01, TypeST>, Enc_12848507, AddrModeRel, ImmRegShl { +tc_a4567c39, TypeST>, Enc_f79415, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101110; let addrMode = BaseLongOffset; @@ -24073,7 +24154,7 @@ def S4_storerf_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memh($Re32=#$II) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_11477246 { +tc_336e698c, TypeST>, Enc_8bcba4 { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011011; @@ -24095,7 +24176,7 @@ def S4_storerf_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+$Ru32<<#$Ii) = $Rt32.h", -V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl { +tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011011; let addrMode = BaseRegOffset; @@ -24110,7 +24191,7 @@ def S4_storerf_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memh($Ru32<<#$Ii+#$II) = $Rt32.h", -ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl { +tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101011; let addrMode = BaseLongOffset; @@ -24131,7 +24212,7 @@ def S4_storerh_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memh($Re32=#$II) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel { +tc_336e698c, TypeST>, Enc_8bcba4, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011010; @@ -24154,7 +24235,7 @@ def S4_storerh_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl { +tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011010; let addrMode = BaseRegOffset; @@ -24170,7 +24251,7 @@ def S4_storerh_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memh($Ru32<<#$Ii+#$II) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl { +tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101010; let addrMode = BaseLongOffset; @@ -24192,7 +24273,7 @@ def S4_storerhnew_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Nt8), "memh($Re32=#$II) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel { +tc_7986ba30, TypeST>, Enc_724154, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-11} = 0b001; let Inst{31-21} = 0b10101011101; @@ -24201,9 +24282,9 @@ let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = HalfWordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let BaseOpcode = "S2_storerh_ap"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -24217,14 +24298,14 @@ def S4_storerhnew_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "memh($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel { +tc_be995eaf, TypeST>, Enc_c6220b, AddrModeRel { let Inst{6-3} = 0b0001; let Inst{31-21} = 0b00111011101; let addrMode = BaseRegOffset; let accessSize = HalfWordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storerh"; let InputType = "reg"; let BaseOpcode = "S2_storerh_rr"; @@ -24235,16 +24316,16 @@ def S4_storerhnew_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8), "memh($Ru32<<#$Ii+#$II) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel { +tc_210b2456, TypeST>, Enc_7eb485, AddrModeRel { let Inst{7-7} = 0b1; let Inst{12-11} = 0b01; let Inst{31-21} = 0b10101101101; let addrMode = BaseLongOffset; let accessSize = HalfWordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storerh"; let BaseOpcode = "S2_storerh_ur"; let DecoderNamespace = "MustExtend"; @@ -24259,7 +24340,7 @@ def S4_storeri_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Rt32), "memw($Re32=#$II) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel { +tc_336e698c, TypeST>, Enc_8bcba4, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10101011100; @@ -24282,7 +24363,7 @@ def S4_storeri_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+$Ru32<<#$Ii) = $Rt32", -V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl { +tc_45631a8d, TypeST>, Enc_eca7c8, AddrModeRel, ImmRegShl { let Inst{6-5} = 0b00; let Inst{31-21} = 0b00111011100; let addrMode = BaseRegOffset; @@ -24298,7 +24379,7 @@ def S4_storeri_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32), "memw($Ru32<<#$Ii+#$II) = $Rt32", -ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl { +tc_a4567c39, TypeST>, Enc_9ea4cf, AddrModeRel, ImmRegShl { let Inst{7-7} = 0b1; let Inst{31-21} = 0b10101101100; let addrMode = BaseLongOffset; @@ -24320,7 +24401,7 @@ def S4_storerinew_ap : HInst< (outs IntRegs:$Re32), (ins u32_0Imm:$II, IntRegs:$Nt8), "memw($Re32=#$II) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel { +tc_7986ba30, TypeST>, Enc_724154, AddrModeRel { let Inst{7-6} = 0b10; let Inst{13-11} = 0b010; let Inst{31-21} = 0b10101011101; @@ -24329,9 +24410,9 @@ let opNewValue = 0; let addrMode = AbsoluteSet; let accessSize = WordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let BaseOpcode = "S2_storeri_ap"; let DecoderNamespace = "MustExtend"; let isExtendable = 1; @@ -24345,14 +24426,14 @@ def S4_storerinew_rr : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8), "memw($Rs32+$Ru32<<#$Ii) = $Nt8.new", -V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel { +tc_be995eaf, TypeST>, Enc_c6220b, AddrModeRel { let Inst{6-3} = 0b0010; let Inst{31-21} = 0b00111011101; let addrMode = BaseRegOffset; let accessSize = WordAccess; let isNVStore = 1; -let mayStore = 1; let isNewValue = 1; +let mayStore = 1; let CextOpcode = "S2_storeri"; let InputType = "reg"; let BaseOpcode = "S2_storeri_rr"; @@ -24363,16 +24444,16 @@ def S4_storerinew_ur : HInst< (outs), (ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8), "memw($Ru32<<#$Ii+#$II) = $Nt8.new", -NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel { +tc_210b2456, TypeST>, Enc_7eb485, AddrModeRel { let Inst{7-7} = 0b1; let Inst{12-11} = 0b10; let Inst{31-21} = 0b10101101101; let addrMode = BaseLongOffset; let accessSize = WordAccess; let isNVStore = 1; +let isNewValue = 1; let isExtended = 1; let mayStore = 1; -let isNewValue = 1; let CextOpcode = "S2_storeri"; let BaseOpcode = "S2_storeri_ur"; let DecoderNamespace = "MustExtend"; @@ -24387,7 +24468,7 @@ def S4_subaddi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32), "$Rd32 = add($Rs32,sub(#$Ii,$Ru32))", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_6495334 { +tc_090485bb, TypeALU64>, Enc_8b8d61 { let Inst{31-23} = 0b110110111; let hasNewValue = 1; let opNewValue = 0; @@ -24402,7 +24483,7 @@ def S4_subi_asl_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = sub(#$Ii,asl($Rx32in,#$II))", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 { +tc_c0cd91a8, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b110; let Inst{4-4} = 0b0; let Inst{31-24} = 0b11011110; @@ -24420,7 +24501,7 @@ def S4_subi_lsr_ri : HInst< (outs IntRegs:$Rx32), (ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II), "$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 { +tc_c0cd91a8, TypeALU64>, Enc_c31910 { let Inst{2-0} = 0b110; let Inst{4-4} = 0b1; let Inst{31-24} = 0b11011110; @@ -24438,7 +24519,7 @@ def S4_vrcrotate : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)", -S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_114098 { +tc_6264c5e0, TypeS_3op>, Enc_645d54 { let Inst{7-6} = 0b11; let Inst{31-21} = 0b11000011110; let prefersSlot3 = 1; @@ -24447,7 +24528,7 @@ def S4_vrcrotate_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii), "$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)", -S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_13114546 { +tc_bc5561d8, TypeS_3op>, Enc_b72622 { let Inst{7-6} = 0b00; let Inst{31-21} = 0b11001011101; let prefersSlot3 = 1; @@ -24457,17 +24538,18 @@ def S4_vxaddsubh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_47ab9233, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S4_vxaddsubhr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_63cd9d2d, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -24478,27 +24560,29 @@ def S4_vxaddsubw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_47ab9233, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S4_vxsubaddh : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_47ab9233, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S4_vxsubaddhr : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_63cd9d2d, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -24509,17 +24593,18 @@ def S4_vxsubaddw : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat", -S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 { +tc_47ab9233, TypeS_3op>, Enc_a56825 { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def S5_asrhub_rnd_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):raw", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8038806, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { let Inst{7-5} = 0b100; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10001000011; @@ -24532,7 +24617,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat", -S_2op_tc_2_SLOT23, TypeS_2op>, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_2op>, Requires<[HasV5T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -24541,7 +24626,7 @@ def S5_asrhub_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):sat", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8038806, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { let Inst{7-5} = 0b101; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10001000011; @@ -24554,7 +24639,7 @@ def S5_popcountp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = popcount($Rss32)", -S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> { +tc_ca280e8b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -24565,7 +24650,7 @@ def S5_vasrhrnd : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii):raw", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_2op>, Enc_12b6e9, Requires<[HasV5T]> { let Inst{7-5} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000001; @@ -24575,14 +24660,14 @@ def S5_vasrhrnd_goodsyntax : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii):rnd", -S_2op_tc_1_SLOT23, TypeS_2op>, Requires<[HasV5T]> { +tc_63cd9d2d, TypeS_2op>, Requires<[HasV5T]> { let isPseudo = 1; } def S6_rol_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = rol($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995, Requires<[HasV60T]> { +tc_9f518242, TypeS_2op>, Enc_5eac98, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000000000; } @@ -24590,7 +24675,7 @@ def S6_rol_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += rol($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -24600,7 +24685,7 @@ def S6_rol_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= rol($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -24610,7 +24695,7 @@ def S6_rol_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= rol($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -24620,7 +24705,7 @@ def S6_rol_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= rol($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -24630,7 +24715,7 @@ def S6_rol_i_p_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 ^= rol($Rss32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010100; let prefersSlot3 = 1; @@ -24640,7 +24725,7 @@ def S6_rol_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = rol($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456, Requires<[HasV60T]> { +tc_9f518242, TypeS_2op>, Enc_a05677, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -24651,7 +24736,7 @@ def S6_rol_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += rol($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -24664,7 +24749,7 @@ def S6_rol_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= rol($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -24677,7 +24762,7 @@ def S6_rol_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= rol($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -24690,7 +24775,7 @@ def S6_rol_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= rol($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -24703,7 +24788,7 @@ def S6_rol_i_r_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 ^= rol($Rs32,#$Ii)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> { +tc_e17ce9ad, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110100; @@ -24716,7 +24801,7 @@ def S6_vsplatrbp : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsplatb($Rs32)", -S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV62T]> { +tc_78b3c689, TypeS_2op>, Enc_3a3d62, Requires<[HasV62T]> { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100010; } @@ -24724,7 +24809,7 @@ def S6_vtrunehb_ppp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunehb($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157, Requires<[HasV62T]> { +tc_9f518242, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -24733,7 +24818,7 @@ def S6_vtrunohb_ppp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunohb($Rss32,$Rtt32)", -S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157, Requires<[HasV62T]> { +tc_9f518242, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -24742,7 +24827,7 @@ def SA1_addi : HInst< (outs GeneralSubRegs:$Rx16), (ins IntRegs:$Rx16in, s32_0Imm:$Ii), "$Rx16 = add($Rx16in,#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_3974695 { +tc_821c4233, TypeSUBINSN>, Enc_93af4c { let Inst{12-11} = 0b00; let hasNewValue = 1; let opNewValue = 0; @@ -24759,7 +24844,7 @@ def SA1_addrx : HInst< (outs GeneralSubRegs:$Rx16), (ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16), "$Rx16 = add($Rx16in,$Rs16)", -PSEUDO, TypeSUBINSN>, Enc_6135183 { +tc_821c4233, TypeSUBINSN>, Enc_0527db { let Inst{12-8} = 0b11000; let hasNewValue = 1; let opNewValue = 0; @@ -24771,7 +24856,7 @@ def SA1_addsp : HInst< (outs GeneralSubRegs:$Rd16), (ins u6_2Imm:$Ii), "$Rd16 = add(r29,#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_176263 { +tc_d2609065, TypeSUBINSN>, Enc_2df31d { let Inst{12-10} = 0b011; let hasNewValue = 1; let opNewValue = 0; @@ -24783,7 +24868,7 @@ def SA1_and1 : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = and($Rs16,#1)", -PSEUDO, TypeSUBINSN>, Enc_14939491 { +tc_d2609065, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10010; let hasNewValue = 1; let opNewValue = 0; @@ -24794,7 +24879,7 @@ def SA1_clrf : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (!p0) $Rd16 = #0", -PSEUDO, TypeSUBINSN>, Enc_1451363 { +tc_7c2dcd4d, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100111; let isPredicated = 1; let isPredicatedFalse = 1; @@ -24808,7 +24893,7 @@ def SA1_clrfnew : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (!p0.new) $Rd16 = #0", -PSEUDO, TypeSUBINSN>, Enc_1451363 { +tc_f26aa619, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -24823,7 +24908,7 @@ def SA1_clrt : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (p0) $Rd16 = #0", -PSEUDO, TypeSUBINSN>, Enc_1451363 { +tc_7c2dcd4d, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100110; let isPredicated = 1; let hasNewValue = 1; @@ -24836,7 +24921,7 @@ def SA1_clrtnew : HInst< (outs GeneralSubRegs:$Rd16), (ins), "if (p0.new) $Rd16 = #0", -PSEUDO, TypeSUBINSN>, Enc_1451363 { +tc_f26aa619, TypeSUBINSN>, Enc_1f5ba6 { let Inst{12-4} = 0b110100100; let isPredicated = 1; let hasNewValue = 1; @@ -24850,7 +24935,7 @@ def SA1_cmpeqi : HInst< (outs), (ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii), "p0 = cmp.eq($Rs16,#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_2079016 { +tc_e8c7a357, TypeSUBINSN>, Enc_63eaeb { let Inst{3-2} = 0b00; let Inst{12-8} = 0b11001; let AsmVariantName = "NonParsable"; @@ -24861,7 +24946,7 @@ def SA1_combine0i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#0,#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_15946706 { +tc_d2609065, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b00; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24873,7 +24958,7 @@ def SA1_combine1i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#1,#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_15946706 { +tc_d2609065, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b01; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24885,7 +24970,7 @@ def SA1_combine2i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#2,#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_15946706 { +tc_d2609065, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b10; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24897,7 +24982,7 @@ def SA1_combine3i : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u2_0Imm:$Ii), "$Rdd8 = combine(#3,#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_15946706 { +tc_d2609065, TypeSUBINSN>, Enc_ed48be { let Inst{4-3} = 0b11; let Inst{12-7} = 0b111000; let hasNewValue = 1; @@ -24909,7 +24994,7 @@ def SA1_combinerz : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins GeneralSubRegs:$Rs16), "$Rdd8 = combine($Rs16,#0)", -PSEUDO, TypeSUBINSN>, Enc_10501894 { +tc_d2609065, TypeSUBINSN>, Enc_399e12 { let Inst{3-3} = 0b1; let Inst{12-8} = 0b11101; let hasNewValue = 1; @@ -24921,7 +25006,7 @@ def SA1_combinezr : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins GeneralSubRegs:$Rs16), "$Rdd8 = combine(#0,$Rs16)", -PSEUDO, TypeSUBINSN>, Enc_10501894 { +tc_d2609065, TypeSUBINSN>, Enc_399e12 { let Inst{3-3} = 0b0; let Inst{12-8} = 0b11101; let hasNewValue = 1; @@ -24933,7 +25018,7 @@ def SA1_dec : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, n1Const:$n1), "$Rd16 = add($Rs16,#$n1)", -PSEUDO, TypeSUBINSN>, Enc_10597934 { +tc_821c4233, TypeSUBINSN>, Enc_ee5ed0 { let Inst{12-8} = 0b10011; let hasNewValue = 1; let opNewValue = 0; @@ -24944,7 +25029,7 @@ def SA1_inc : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = add($Rs16,#1)", -PSEUDO, TypeSUBINSN>, Enc_14939491 { +tc_d2609065, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10001; let hasNewValue = 1; let opNewValue = 0; @@ -24955,7 +25040,7 @@ def SA1_seti : HInst< (outs GeneralSubRegs:$Rd16), (ins u32_0Imm:$Ii), "$Rd16 = #$Ii", -PSEUDO, TypeSUBINSN>, Enc_2176383 { +tc_d2609065, TypeSUBINSN>, Enc_e39bb2 { let Inst{12-10} = 0b010; let hasNewValue = 1; let opNewValue = 0; @@ -24971,7 +25056,7 @@ def SA1_setin1 : HInst< (outs GeneralSubRegs:$Rd16), (ins n1Const:$n1), "$Rd16 = #$n1", -PSEUDO, TypeSUBINSN>, Enc_13336212 { +tc_d2609065, TypeSUBINSN>, Enc_7a0ea6 { let Inst{12-4} = 0b110100000; let hasNewValue = 1; let opNewValue = 0; @@ -24982,7 +25067,7 @@ def SA1_sxtb : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = sxtb($Rs16)", -PSEUDO, TypeSUBINSN>, Enc_14939491 { +tc_d2609065, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10101; let hasNewValue = 1; let opNewValue = 0; @@ -24993,7 +25078,7 @@ def SA1_sxth : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = sxth($Rs16)", -PSEUDO, TypeSUBINSN>, Enc_14939491 { +tc_d2609065, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10100; let hasNewValue = 1; let opNewValue = 0; @@ -25004,7 +25089,7 @@ def SA1_tfr : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = $Rs16", -PSEUDO, TypeSUBINSN>, Enc_14939491 { +tc_d2609065, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10000; let hasNewValue = 1; let opNewValue = 0; @@ -25015,7 +25100,7 @@ def SA1_zxtb : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = and($Rs16,#255)", -PSEUDO, TypeSUBINSN>, Enc_14939491 { +tc_d2609065, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10111; let hasNewValue = 1; let opNewValue = 0; @@ -25026,7 +25111,7 @@ def SA1_zxth : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16), "$Rd16 = zxth($Rs16)", -PSEUDO, TypeSUBINSN>, Enc_14939491 { +tc_d2609065, TypeSUBINSN>, Enc_97d666 { let Inst{12-8} = 0b10110; let hasNewValue = 1; let opNewValue = 0; @@ -25037,7 +25122,7 @@ def SL1_loadri_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii), "$Rd16 = memw($Rs16+#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_13606251 { +tc_bf6fa601, TypeSUBINSN>, Enc_53dca9 { let Inst{12-12} = 0b0; let hasNewValue = 1; let opNewValue = 0; @@ -25051,7 +25136,7 @@ def SL1_loadrub_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii), "$Rd16 = memub($Rs16+#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_15606259 { +tc_bf6fa601, TypeSUBINSN>, Enc_c175d0 { let Inst{12-12} = 0b1; let hasNewValue = 1; let opNewValue = 0; @@ -25065,7 +25150,7 @@ def SL2_deallocframe : HInst< (outs), (ins), "deallocframe", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_86442910, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111100000000; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; @@ -25078,7 +25163,7 @@ def SL2_jumpr31 : HInst< (outs), (ins), "jumpr r31", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000000; let isTerminator = 1; let isIndirectBranch = 1; @@ -25093,7 +25178,7 @@ def SL2_jumpr31_f : HInst< (outs), (ins), "if (!p0) jumpr r31", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -25111,7 +25196,7 @@ def SL2_jumpr31_fnew : HInst< (outs), (ins), "if (!p0.new) jumpr:nt r31", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000111; let isPredicated = 1; let isPredicatedFalse = 1; @@ -25119,8 +25204,8 @@ let isTerminator = 1; let isIndirectBranch = 1; let cofMax1 = 1; let AsmVariantName = "NonParsable"; -let isReturn = 1; let isPredicatedNew = 1; +let isReturn = 1; let Uses = [P0, R31]; let Defs = [PC]; let isTaken = Inst{4}; @@ -25130,7 +25215,7 @@ def SL2_jumpr31_t : HInst< (outs), (ins), "if (p0) jumpr r31", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000100; let isPredicated = 1; let isTerminator = 1; @@ -25147,15 +25232,15 @@ def SL2_jumpr31_tnew : HInst< (outs), (ins), "if (p0.new) jumpr:nt r31", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_35fb9d13, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111111000110; let isPredicated = 1; let isTerminator = 1; let isIndirectBranch = 1; let cofMax1 = 1; let AsmVariantName = "NonParsable"; -let isReturn = 1; let isPredicatedNew = 1; +let isReturn = 1; let Uses = [P0, R31]; let Defs = [PC]; let isTaken = Inst{4}; @@ -25165,7 +25250,7 @@ def SL2_loadrb_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii), "$Rd16 = memb($Rs16+#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_3135259 { +tc_bf6fa601, TypeSUBINSN>, Enc_2fbf3c { let Inst{12-11} = 0b10; let hasNewValue = 1; let opNewValue = 0; @@ -25179,7 +25264,7 @@ def SL2_loadrd_sp : HInst< (outs GeneralDoubleLow8Regs:$Rdd8), (ins u5_3Imm:$Ii), "$Rdd8 = memd(r29+#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_16479122 { +tc_70cabf66, TypeSUBINSN>, Enc_86a14b { let Inst{12-8} = 0b11110; let hasNewValue = 1; let opNewValue = 0; @@ -25194,7 +25279,7 @@ def SL2_loadrh_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii), "$Rd16 = memh($Rs16+#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_4135257 { +tc_bf6fa601, TypeSUBINSN>, Enc_2bae10 { let Inst{12-11} = 0b00; let hasNewValue = 1; let opNewValue = 0; @@ -25208,7 +25293,7 @@ def SL2_loadri_sp : HInst< (outs GeneralSubRegs:$Rd16), (ins u5_2Imm:$Ii), "$Rd16 = memw(r29+#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_64199 { +tc_70cabf66, TypeSUBINSN>, Enc_51635c { let Inst{12-9} = 0b1110; let hasNewValue = 1; let opNewValue = 0; @@ -25223,7 +25308,7 @@ def SL2_loadruh_io : HInst< (outs GeneralSubRegs:$Rd16), (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii), "$Rd16 = memuh($Rs16+#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_4135257 { +tc_bf6fa601, TypeSUBINSN>, Enc_2bae10 { let Inst{12-11} = 0b01; let hasNewValue = 1; let opNewValue = 0; @@ -25237,15 +25322,15 @@ def SL2_return : HInst< (outs), (ins), "dealloc_return", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000000; let isTerminator = 1; let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; let AsmVariantName = "NonParsable"; -let isReturn = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [R30]; let Defs = [PC, R30, R29, R31]; let DecoderNamespace = "SUBINSN_L2"; @@ -25254,7 +25339,7 @@ def SL2_return_f : HInst< (outs), (ins), "if (!p0) dealloc_return", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000101; let isPredicated = 1; let isPredicatedFalse = 1; @@ -25263,8 +25348,8 @@ let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; let AsmVariantName = "NonParsable"; -let isReturn = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; @@ -25274,7 +25359,7 @@ def SL2_return_fnew : HInst< (outs), (ins), "if (!p0.new) dealloc_return:nt", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000111; let isPredicated = 1; let isPredicatedFalse = 1; @@ -25283,9 +25368,9 @@ let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; let AsmVariantName = "NonParsable"; -let isReturn = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; @@ -25295,7 +25380,7 @@ def SL2_return_t : HInst< (outs), (ins), "if (p0) dealloc_return", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000100; let isPredicated = 1; let isTerminator = 1; @@ -25303,8 +25388,8 @@ let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; let AsmVariantName = "NonParsable"; -let isReturn = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; @@ -25314,7 +25399,7 @@ def SL2_return_tnew : HInst< (outs), (ins), "if (p0.new) dealloc_return:nt", -PSEUDO, TypeSUBINSN>, Enc_0 { +tc_95c54f8b, TypeSUBINSN>, Enc_e3b0c4 { let Inst{12-0} = 0b1111101000110; let isPredicated = 1; let isTerminator = 1; @@ -25322,9 +25407,9 @@ let isIndirectBranch = 1; let accessSize = DoubleWordAccess; let cofMax1 = 1; let AsmVariantName = "NonParsable"; -let isReturn = 1; let isPredicatedNew = 1; let mayLoad = 1; +let isReturn = 1; let Uses = [P0, R30]; let Defs = [PC, R30, R29, R31]; let isTaken = Inst{4}; @@ -25334,7 +25419,7 @@ def SS1_storeb_io : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16), "memb($Rs16+#$Ii) = $Rt16", -PSEUDO, TypeSUBINSN>, Enc_13204995 { +tc_53ee6546, TypeSUBINSN>, Enc_b38ffc { let Inst{12-12} = 0b1; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -25346,7 +25431,7 @@ def SS1_storew_io : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16), "memw($Rs16+#$Ii) = $Rt16", -PSEUDO, TypeSUBINSN>, Enc_11205051 { +tc_53ee6546, TypeSUBINSN>, Enc_f55a0c { let Inst{12-12} = 0b0; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25358,7 +25443,7 @@ def SS2_allocframe : HInst< (outs), (ins u5_3Imm:$Ii), "allocframe(#$Ii)", -PSEUDO, TypeSUBINSN>, Enc_7884306 { +tc_f027ebe9, TypeSUBINSN>, Enc_6f70ca { let Inst{3-0} = 0b0000; let Inst{12-9} = 0b1110; let addrMode = BaseImmOffset; @@ -25373,7 +25458,7 @@ def SS2_storebi0 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii), "memb($Rs16+#$Ii) = #0", -PSEUDO, TypeSUBINSN>, Enc_13536408 { +tc_6c52d277, TypeSUBINSN>, Enc_84d359 { let Inst{12-8} = 0b10010; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -25385,7 +25470,7 @@ def SS2_storebi1 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii), "memb($Rs16+#$Ii) = #1", -PSEUDO, TypeSUBINSN>, Enc_13536408 { +tc_6c52d277, TypeSUBINSN>, Enc_84d359 { let Inst{12-8} = 0b10011; let addrMode = BaseImmOffset; let accessSize = ByteAccess; @@ -25397,7 +25482,7 @@ def SS2_stored_sp : HInst< (outs), (ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8), "memd(r29+#$Ii) = $Rtt8", -PSEUDO, TypeSUBINSN>, Enc_9165078 { +tc_c14739d5, TypeSUBINSN>, Enc_b8309d { let Inst{12-9} = 0b0101; let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; @@ -25410,7 +25495,7 @@ def SS2_storeh_io : HInst< (outs), (ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16), "memh($Rs16+#$Ii) = $Rt16", -PSEUDO, TypeSUBINSN>, Enc_1734121 { +tc_53ee6546, TypeSUBINSN>, Enc_625deb { let Inst{12-11} = 0b00; let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; @@ -25422,7 +25507,7 @@ def SS2_storew_sp : HInst< (outs), (ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16), "memw(r29+#$Ii) = $Rt16", -PSEUDO, TypeSUBINSN>, Enc_6690615 { +tc_c14739d5, TypeSUBINSN>, Enc_87c142 { let Inst{12-9} = 0b0100; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25435,7 +25520,7 @@ def SS2_storewi0 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii), "memw($Rs16+#$Ii) = #0", -PSEUDO, TypeSUBINSN>, Enc_15536400 { +tc_6c52d277, TypeSUBINSN>, Enc_a6ce9c { let Inst{12-8} = 0b10000; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25447,7 +25532,7 @@ def SS2_storewi1 : HInst< (outs), (ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii), "memw($Rs16+#$Ii) = #1", -PSEUDO, TypeSUBINSN>, Enc_15536400 { +tc_6c52d277, TypeSUBINSN>, Enc_a6ce9c { let Inst{12-8} = 0b10001; let addrMode = BaseImmOffset; let accessSize = WordAccess; @@ -25759,7 +25844,7 @@ def V6_extractw : HInst< (outs IntRegs:$Rd32), (ins VectorRegs:$Vu32, IntRegs:$Rs32), "$Rd32 = vextract($Vu32,$Rs32)", -LD_tc_ld_SLOT0, TypeLD>, Enc_16601956, Requires<[HasV60T,UseHVX]> { +tc_9777e6bf, TypeLD>, Enc_50e578, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10010010000; @@ -25773,7 +25858,7 @@ def V6_extractw_128B : HInst< (outs IntRegs:$Rd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rs32), "$Rd32 = vextract($Vu32,$Rs32)", -LD_tc_ld_SLOT0, TypeLD>, Enc_16601956, Requires<[HasV60T,UseHVX]> { +tc_9777e6bf, TypeLD>, Enc_50e578, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10010010000; @@ -25851,6 +25936,144 @@ let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } +def V6_ldcnp0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.cur = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldcnp0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.cur = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldcnpnt0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.cur = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldcnpnt0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.cur = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldcp0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.cur = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldcp0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.cur = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldcpnt0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.cur = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldcpnt0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.cur = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldnp0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32 = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldnp0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32 = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldnpnt0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32 = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldnpnt0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32 = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} def V6_ldnt0 : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32), @@ -25874,6 +26097,144 @@ let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } +def V6_ldp0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32 = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldp0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32 = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldpnt0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32 = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldpnt0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32 = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldtnp0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.tmp = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldtnp0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.tmp = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldtnpnt0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.tmp = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldtnpnt0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if (!$Pv4) $Vd32.tmp = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldtp0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.tmp = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldtp0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.tmp = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} +def V6_ldtpnt0 : HInst< +(outs VectorRegs:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.tmp = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_ldtpnt0_128B : HInst< +(outs VectorRegs128B:$Vd32), +(ins PredRegs:$Pv4, IntRegs:$Rt32), +"if ($Pv4) $Vd32.tmp = vmem($Rt32):nt", +PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +let isCodeGenOnly = 1; +} def V6_ldu0 : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32), @@ -25922,7 +26283,7 @@ def V6_lvsplatb : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32), "$Vd32.b = vsplat($Rt32)", -CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[HasV62T,UseHVX]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b00011001110; let hasNewValue = 1; @@ -25933,7 +26294,7 @@ def V6_lvsplatb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32), "$Vd32.b = vsplat($Rt32)", -CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[HasV62T,UseHVX]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b00011001110; let hasNewValue = 1; @@ -25945,7 +26306,7 @@ def V6_lvsplath : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32), "$Vd32.h = vsplat($Rt32)", -CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[HasV62T,UseHVX]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b00011001110; let hasNewValue = 1; @@ -25956,7 +26317,7 @@ def V6_lvsplath_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32), "$Vd32.h = vsplat($Rt32)", -CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX>, Enc_a5ed8a, Requires<[HasV62T,UseHVX]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b00011001110; let hasNewValue = 1; @@ -25968,7 +26329,7 @@ def V6_lvsplatw : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32), "$Vd32 = vsplat($Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_9768377, Requires<[HasV60T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[HasV60T,UseHVX]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -25979,7 +26340,7 @@ def V6_lvsplatw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32), "$Vd32 = vsplat($Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_9768377, Requires<[HasV60T,UseHVX]> { +tc_6b78cf13, TypeCVI_VX_LATE>, Enc_a5ed8a, Requires<[HasV60T,UseHVX]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -25991,7 +26352,7 @@ def V6_pred_and : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4, VecPredRegs:$Qt4), "$Qd4 = and($Qs4,$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26004,7 +26365,7 @@ def V6_pred_and_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4), "$Qd4 = and($Qs4,$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000000; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26018,7 +26379,7 @@ def V6_pred_and_n : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4, VecPredRegs:$Qt4), "$Qd4 = and($Qs4,!$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000101; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26031,7 +26392,7 @@ def V6_pred_and_n_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4), "$Qd4 = and($Qs4,!$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000101; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26045,7 +26406,7 @@ def V6_pred_not : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4), "$Qd4 = not($Qs4)", -CVI_VA, TypeCVI_VA>, Enc_4897205, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_bfbf03, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000010; let Inst{13-10} = 0b0000; let Inst{31-16} = 0b0001111000000011; @@ -26057,7 +26418,7 @@ def V6_pred_not_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4), "$Qd4 = not($Qs4)", -CVI_VA, TypeCVI_VA>, Enc_4897205, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_bfbf03, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000010; let Inst{13-10} = 0b0000; let Inst{31-16} = 0b0001111000000011; @@ -26070,7 +26431,7 @@ def V6_pred_or : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4, VecPredRegs:$Qt4), "$Qd4 = or($Qs4,$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000001; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26083,7 +26444,7 @@ def V6_pred_or_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4), "$Qd4 = or($Qs4,$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000001; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26097,7 +26458,7 @@ def V6_pred_or_n : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4, VecPredRegs:$Qt4), "$Qd4 = or($Qs4,!$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000100; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26110,7 +26471,7 @@ def V6_pred_or_n_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4), "$Qd4 = or($Qs4,!$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000100; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26124,7 +26485,7 @@ def V6_pred_scalar2 : HInst< (outs VecPredRegs:$Qd4), (ins IntRegs:$Rt32), "$Qd4 = vsetq($Rt32)", -CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV60T,UseHVX]> { +tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[HasV60T,UseHVX]> { let Inst{13-2} = 0b000000010001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -26135,7 +26496,7 @@ def V6_pred_scalar2_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins IntRegs:$Rt32), "$Qd4 = vsetq($Rt32)", -CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV60T,UseHVX]> { +tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[HasV60T,UseHVX]> { let Inst{13-2} = 0b000000010001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -26147,7 +26508,7 @@ def V6_pred_scalar2v2 : HInst< (outs VecPredRegs:$Qd4), (ins IntRegs:$Rt32), "$Qd4 = vsetq2($Rt32)", -CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV62T,UseHVX]> { +tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[HasV62T,UseHVX]> { let Inst{13-2} = 0b000000010011; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -26158,7 +26519,7 @@ def V6_pred_scalar2v2_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins IntRegs:$Rt32), "$Qd4 = vsetq2($Rt32)", -CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV62T,UseHVX]> { +tc_4105d6b5, TypeCVI_VP>, Enc_7222b7, Requires<[HasV62T,UseHVX]> { let Inst{13-2} = 0b000000010011; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -26170,7 +26531,7 @@ def V6_pred_xor : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4, VecPredRegs:$Qt4), "$Qd4 = xor($Qs4,$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000011; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26183,7 +26544,7 @@ def V6_pred_xor_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4), "$Qd4 = xor($Qs4,$Qt4)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000011; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26197,7 +26558,7 @@ def V6_shuffeqh : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4, VecPredRegs:$Qt4), "$Qd4.b = vshuffe($Qs4.h,$Qt4.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV62T,UseHVX]> { let Inst{7-2} = 0b000110; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26210,7 +26571,7 @@ def V6_shuffeqh_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4), "$Qd4.b = vshuffe($Qs4.h,$Qt4.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV62T,UseHVX]> { let Inst{7-2} = 0b000110; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26224,7 +26585,7 @@ def V6_shuffeqw : HInst< (outs VecPredRegs:$Qd4), (ins VecPredRegs:$Qs4, VecPredRegs:$Qt4), "$Qd4.h = vshuffe($Qs4.w,$Qt4.w)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV62T,UseHVX]> { let Inst{7-2} = 0b000111; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26237,7 +26598,7 @@ def V6_shuffeqw_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4), "$Qd4.h = vshuffe($Qs4.w,$Qt4.w)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_134437, Requires<[HasV62T,UseHVX]> { let Inst{7-2} = 0b000111; let Inst{13-10} = 0b0000; let Inst{21-16} = 0b000011; @@ -26540,7 +26901,7 @@ def V6_vL32Ub_ai : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmemu($Rt32+#$Ii)", -CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_1244745, Requires<[HasV60T,UseHVX]> { +tc_35e92f8e, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26556,7 +26917,7 @@ def V6_vL32Ub_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmemu($Rt32+#$Ii)", -CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_8437395, Requires<[HasV60T,UseHVX]> { +tc_35e92f8e, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26573,7 +26934,7 @@ def V6_vL32Ub_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmemu($Rx32++#$Ii)", -CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_10039393, Requires<[HasV60T,UseHVX]> { +tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26590,7 +26951,7 @@ def V6_vL32Ub_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmemu($Rx32++#$Ii)", -CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_11039423, Requires<[HasV60T,UseHVX]> { +tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26608,7 +26969,7 @@ def V6_vL32Ub_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmemu($Rx32++$Mu2)", -CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000111; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26624,7 +26985,7 @@ def V6_vL32Ub_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmemu($Rx32++$Mu2)", -CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_4fd8566e, TypeCVI_VM_VP_LDU>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000111; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26641,7 +27002,7 @@ def V6_vL32b_ai : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26658,7 +27019,7 @@ def V6_vL32b_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26676,7 +27037,7 @@ def V6_vL32b_cur_ai : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26685,6 +27046,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -26692,7 +27054,7 @@ def V6_vL32b_cur_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26701,6 +27063,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26709,7 +27072,7 @@ def V6_vL32b_cur_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26719,6 +27082,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -26726,7 +27090,7 @@ def V6_vL32b_cur_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26736,6 +27100,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26744,7 +27109,7 @@ def V6_vL32b_cur_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26755,6 +27120,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26763,7 +27129,7 @@ def V6_vL32b_cur_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26774,6 +27140,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26783,7 +27150,7 @@ def V6_vL32b_cur_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26793,6 +27160,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26801,7 +27169,7 @@ def V6_vL32b_cur_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26811,6 +27179,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26820,7 +27189,7 @@ def V6_vL32b_cur_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26829,6 +27198,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26837,7 +27207,7 @@ def V6_vL32b_cur_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26846,6 +27216,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26855,7 +27226,7 @@ def V6_vL32b_cur_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26863,6 +27234,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26871,7 +27243,7 @@ def V6_vL32b_cur_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26879,6 +27251,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26888,7 +27261,7 @@ def V6_vL32b_cur_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26897,6 +27270,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -26904,7 +27278,7 @@ def V6_vL32b_cur_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26913,6 +27287,7 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26921,7 +27296,7 @@ def V6_vL32b_cur_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26931,6 +27306,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26939,7 +27315,7 @@ def V6_vL32b_cur_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26949,6 +27325,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26958,7 +27335,7 @@ def V6_vL32b_cur_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26967,6 +27344,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -26975,7 +27353,7 @@ def V6_vL32b_cur_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26984,6 +27362,7 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; +let CVINew = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -26993,7 +27372,7 @@ def V6_vL32b_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27010,7 +27389,7 @@ def V6_vL32b_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27028,7 +27407,7 @@ def V6_vL32b_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27047,7 +27426,7 @@ def V6_vL32b_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27067,7 +27446,7 @@ def V6_vL32b_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27085,7 +27464,7 @@ def V6_vL32b_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27104,7 +27483,7 @@ def V6_vL32b_nt_ai : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27113,8 +27492,8 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let isCVLoadable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -27122,7 +27501,7 @@ def V6_vL32b_nt_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27131,8 +27510,8 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let isCVLoadable = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -27141,7 +27520,7 @@ def V6_vL32b_nt_cur_ai : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27150,15 +27529,16 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27167,8 +27547,9 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27176,7 +27557,7 @@ def V6_vL32b_nt_cur_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27186,15 +27567,16 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27204,8 +27586,9 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27213,7 +27596,7 @@ def V6_vL32b_nt_cur_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27224,8 +27607,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27233,7 +27617,7 @@ def V6_vL32b_nt_cur_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27244,8 +27628,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27254,7 +27639,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27264,8 +27649,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27273,7 +27659,7 @@ def V6_vL32b_nt_cur_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27283,8 +27669,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27293,7 +27680,7 @@ def V6_vL32b_nt_cur_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27302,8 +27689,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27311,7 +27699,7 @@ def V6_vL32b_nt_cur_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27320,8 +27708,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27330,7 +27719,7 @@ def V6_vL32b_nt_cur_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27338,8 +27727,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27347,7 +27737,7 @@ def V6_vL32b_nt_cur_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27355,8 +27745,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27365,7 +27756,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27374,15 +27765,16 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27391,8 +27783,9 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27400,7 +27793,7 @@ def V6_vL32b_nt_cur_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27410,8 +27803,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27419,7 +27813,7 @@ def V6_vL32b_nt_cur_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27429,8 +27823,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27439,7 +27834,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27448,8 +27843,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27457,7 +27853,7 @@ def V6_vL32b_nt_cur_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27466,8 +27862,9 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; +let CVINew = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27476,7 +27873,7 @@ def V6_vL32b_nt_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27486,15 +27883,15 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27504,8 +27901,8 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27513,7 +27910,7 @@ def V6_vL32b_nt_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27524,8 +27921,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27533,7 +27930,7 @@ def V6_vL32b_nt_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27544,8 +27941,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27554,7 +27951,7 @@ def V6_vL32b_nt_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27564,8 +27961,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27573,7 +27970,7 @@ def V6_vL32b_nt_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27583,8 +27980,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27593,7 +27990,7 @@ def V6_vL32b_nt_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27602,8 +27999,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let isCVLoadable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27612,7 +28009,7 @@ def V6_vL32b_nt_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27621,8 +28018,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let isCVLoadable = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -27632,7 +28029,7 @@ def V6_vL32b_nt_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27640,8 +28037,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let isCVLoadable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27650,7 +28047,7 @@ def V6_vL32b_nt_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27658,8 +28055,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let isCVLoadable = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -27669,7 +28066,7 @@ def V6_vL32b_nt_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27678,15 +28075,15 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27695,8 +28092,8 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27704,7 +28101,7 @@ def V6_vL32b_nt_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27714,8 +28111,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27723,7 +28120,7 @@ def V6_vL32b_nt_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27733,8 +28130,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27743,7 +28140,7 @@ def V6_vL32b_nt_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27752,8 +28149,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27761,7 +28158,7 @@ def V6_vL32b_nt_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27770,8 +28167,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27780,7 +28177,7 @@ def V6_vL32b_nt_tmp_ai : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27789,15 +28186,15 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_tmp_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27806,8 +28203,8 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27815,7 +28212,7 @@ def V6_vL32b_nt_tmp_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27825,15 +28222,15 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_tmp_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -27843,8 +28240,8 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -27852,7 +28249,7 @@ def V6_vL32b_nt_tmp_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27863,8 +28260,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27872,7 +28269,7 @@ def V6_vL32b_nt_tmp_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -27883,8 +28280,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27893,7 +28290,7 @@ def V6_vL32b_nt_tmp_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27903,8 +28300,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27912,7 +28309,7 @@ def V6_vL32b_nt_tmp_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -27922,8 +28319,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27932,7 +28329,7 @@ def V6_vL32b_nt_tmp_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27941,8 +28338,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27950,7 +28347,7 @@ def V6_vL32b_nt_tmp_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27959,8 +28356,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -27969,7 +28366,7 @@ def V6_vL32b_nt_tmp_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27977,8 +28374,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27986,7 +28383,7 @@ def V6_vL32b_nt_tmp_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -27994,8 +28391,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -28004,7 +28401,7 @@ def V6_vL32b_nt_tmp_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -28013,15 +28410,15 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_tmp_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -28030,8 +28427,8 @@ let opNewValue = 0; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -28039,7 +28436,7 @@ def V6_vL32b_nt_tmp_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -28049,8 +28446,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -28058,7 +28455,7 @@ def V6_vL32b_nt_tmp_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -28068,8 +28465,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -28078,7 +28475,7 @@ def V6_vL32b_nt_tmp_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -28087,8 +28484,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector64Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -28096,7 +28493,7 @@ def V6_vL32b_nt_tmp_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -28105,8 +28502,8 @@ let opNewValue = 0; let addrMode = PostInc; let accessSize = Vector128Access; let isCVLoad = 1; -let isNonTemporal = 1; let mayLoad = 1; +let isNonTemporal = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -28115,7 +28512,7 @@ def V6_vL32b_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28133,7 +28530,7 @@ def V6_vL32b_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28152,7 +28549,7 @@ def V6_vL32b_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -28169,7 +28566,7 @@ def V6_vL32b_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -28187,7 +28584,7 @@ def V6_vL32b_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -28203,7 +28600,7 @@ def V6_vL32b_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -28220,7 +28617,7 @@ def V6_vL32b_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -28238,7 +28635,7 @@ def V6_vL32b_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -28257,7 +28654,7 @@ def V6_vL32b_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -28274,7 +28671,7 @@ def V6_vL32b_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)", -CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -28292,7 +28689,7 @@ def V6_vL32b_tmp_ai : HInst< (outs VectorRegs:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -28308,7 +28705,7 @@ def V6_vL32b_tmp_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -28325,7 +28722,7 @@ def V6_vL32b_tmp_npred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -28342,7 +28739,7 @@ def V6_vL32b_tmp_npred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -28360,7 +28757,7 @@ def V6_vL32b_tmp_npred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -28379,7 +28776,7 @@ def V6_vL32b_tmp_npred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -28399,7 +28796,7 @@ def V6_vL32b_tmp_npred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -28417,7 +28814,7 @@ def V6_vL32b_tmp_npred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -28436,7 +28833,7 @@ def V6_vL32b_tmp_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28453,7 +28850,7 @@ def V6_vL32b_tmp_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28471,7 +28868,7 @@ def V6_vL32b_tmp_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -28487,7 +28884,7 @@ def V6_vL32b_tmp_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -28504,7 +28901,7 @@ def V6_vL32b_tmp_pred_ai : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -28520,7 +28917,7 @@ def V6_vL32b_tmp_pred_ai_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -28537,7 +28934,7 @@ def V6_vL32b_tmp_pred_pi : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -28555,7 +28952,7 @@ def V6_vL32b_tmp_pred_pi_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -28574,7 +28971,7 @@ def V6_vL32b_tmp_pred_ppu : HInst< (outs VectorRegs:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -28591,7 +28988,7 @@ def V6_vL32b_tmp_pred_ppu_128B : HInst< (outs VectorRegs128B:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -28609,7 +29006,7 @@ def V6_vS32Ub_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "vmemu($Rt32+#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_354299ad, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28624,7 +29021,7 @@ def V6_vS32Ub_ai_128B : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "vmemu($Rt32+#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_354299ad, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28640,7 +29037,7 @@ def V6_vS32Ub_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -28655,7 +29052,7 @@ def V6_vS32Ub_npred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -28671,7 +29068,7 @@ def V6_vS32Ub_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -28688,7 +29085,7 @@ def V6_vS32Ub_npred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -28706,7 +29103,7 @@ def V6_vS32Ub_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -28722,7 +29119,7 @@ def V6_vS32Ub_npred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -28739,7 +29136,7 @@ def V6_vS32Ub_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "vmemu($Rx32++#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_7fa82b08, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -28755,7 +29152,7 @@ def V6_vS32Ub_pi_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "vmemu($Rx32++#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_7fa82b08, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -28772,7 +29169,7 @@ def V6_vS32Ub_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "vmemu($Rx32++$Mu2) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_7fa82b08, TypeCVI_VM_STU>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-5} = 0b00000111; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; @@ -28787,7 +29184,7 @@ def V6_vS32Ub_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "vmemu($Rx32++$Mu2) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_7fa82b08, TypeCVI_VM_STU>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-5} = 0b00000111; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; @@ -28803,7 +29200,7 @@ def V6_vS32Ub_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -28817,7 +29214,7 @@ def V6_vS32Ub_pred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d642eff3, TypeCVI_VM_STU>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -28832,7 +29229,7 @@ def V6_vS32Ub_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -28848,7 +29245,7 @@ def V6_vS32Ub_pred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -28865,7 +29262,7 @@ def V6_vS32Ub_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -28880,7 +29277,7 @@ def V6_vS32Ub_pred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32", -CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_6fd9ad30, TypeCVI_VM_STU>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -28896,7 +29293,7 @@ def V6_vS32b_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28912,7 +29309,7 @@ def V6_vS32b_ai_128B : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28929,15 +29326,16 @@ def V6_vS32b_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8), "vmem($Rt32+#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_6608821, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28947,15 +29345,16 @@ def V6_vS32b_new_ai_128B : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8), "vmem($Rt32+#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2152247, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28966,7 +29365,7 @@ def V6_vS32b_new_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8), "if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01101; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -28974,8 +29373,9 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -28984,7 +29384,7 @@ def V6_vS32b_new_npred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8), "if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01101; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -28992,8 +29392,9 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29003,7 +29404,7 @@ def V6_vS32b_new_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8), "if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -29012,8 +29413,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29023,7 +29425,7 @@ def V6_vS32b_new_npred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8), "if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -29032,8 +29434,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29044,7 +29447,7 @@ def V6_vS32b_new_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8), "if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001101; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -29052,8 +29455,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29063,7 +29467,7 @@ def V6_vS32b_new_npred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8), "if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001101; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -29071,8 +29475,9 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29083,15 +29488,16 @@ def V6_vS32b_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8), "vmem($Rx32++#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_12244921, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29102,15 +29508,16 @@ def V6_vS32b_new_pi_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8), "vmem($Rx32++#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_11244923, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29122,14 +29529,15 @@ def V6_vS32b_new_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8), "vmem($Rx32++$Mu2) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-3} = 0b0000000100; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29140,14 +29548,15 @@ def V6_vS32b_new_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8), "vmem($Rx32++$Mu2) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-3} = 0b0000000100; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29159,15 +29568,16 @@ def V6_vS32b_new_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8), "if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01000; let Inst{31-21} = 0b00101000101; let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29176,15 +29586,16 @@ def V6_vS32b_new_pred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8), "if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01000; let Inst{31-21} = 0b00101000101; let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29194,7 +29605,7 @@ def V6_vS32b_new_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8), "if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -29202,8 +29613,9 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29213,7 +29625,7 @@ def V6_vS32b_new_pred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8), "if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -29221,8 +29633,9 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29233,15 +29646,16 @@ def V6_vS32b_new_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8), "if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001000; let Inst{31-21} = 0b00101011101; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29251,15 +29665,16 @@ def V6_vS32b_new_pred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8), "if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001000; let Inst{31-21} = 0b00101011101; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; +let CVINew = 1; let isNewValue = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29270,7 +29685,7 @@ def V6_vS32b_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -29286,7 +29701,7 @@ def V6_vS32b_npred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -29303,7 +29718,7 @@ def V6_vS32b_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -29321,7 +29736,7 @@ def V6_vS32b_npred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -29340,7 +29755,7 @@ def V6_vS32b_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -29357,7 +29772,7 @@ def V6_vS32b_npred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -29375,7 +29790,7 @@ def V6_vS32b_nqpred_ai : HInst< (outs), (ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000100; let addrMode = BaseImmOffset; @@ -29387,7 +29802,7 @@ def V6_vS32b_nqpred_ai_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000100; let addrMode = BaseImmOffset; @@ -29400,7 +29815,7 @@ def V6_vS32b_nqpred_pi : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -29414,7 +29829,7 @@ def V6_vS32b_nqpred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -29429,7 +29844,7 @@ def V6_vS32b_nqpred_ppu : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011100; let addrMode = PostInc; @@ -29442,7 +29857,7 @@ def V6_vS32b_nqpred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011100; let addrMode = PostInc; @@ -29456,14 +29871,14 @@ def V6_vS32b_nt_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; let addrMode = BaseImmOffset; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isNVStorable = 1; let isPredicable = 1; @@ -29473,14 +29888,14 @@ def V6_vS32b_nt_ai_128B : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_e3748cdf, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; let addrMode = BaseImmOffset; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let isNVStorable = 1; let isPredicable = 1; @@ -29491,16 +29906,17 @@ def V6_vS32b_nt_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8), "vmem($Rt32+#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_6608821, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29510,16 +29926,17 @@ def V6_vS32b_nt_new_ai_128B : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8), "vmem($Rt32+#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2152247, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_1b93bdc6, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29530,7 +29947,7 @@ def V6_vS32b_nt_new_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8), "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01111; let Inst{31-21} = 0b00101000111; let isPredicated = 1; @@ -29538,9 +29955,10 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29549,7 +29967,7 @@ def V6_vS32b_nt_new_npred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8), "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01111; let Inst{31-21} = 0b00101000111; let isPredicated = 1; @@ -29557,9 +29975,10 @@ let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29569,7 +29988,7 @@ def V6_vS32b_nt_new_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8), "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -29578,9 +29997,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29590,7 +30010,7 @@ def V6_vS32b_nt_new_npred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8), "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -29599,9 +30019,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29612,7 +30033,7 @@ def V6_vS32b_nt_new_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8), "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001111; let Inst{31-21} = 0b00101011111; let isPredicated = 1; @@ -29620,9 +30041,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29632,7 +30054,7 @@ def V6_vS32b_nt_new_npred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8), "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001111; let Inst{31-21} = 0b00101011111; let isPredicated = 1; @@ -29640,9 +30062,10 @@ let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29653,16 +30076,17 @@ def V6_vS32b_nt_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8), "vmem($Rx32++#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_12244921, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29673,16 +30097,17 @@ def V6_vS32b_nt_new_pi_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8), "vmem($Rx32++#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_11244923, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29694,15 +30119,16 @@ def V6_vS32b_nt_new_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8), "vmem($Rx32++$Mu2):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-3} = 0b0000000100; let Inst{31-21} = 0b00101011011; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29713,15 +30139,16 @@ def V6_vS32b_nt_new_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8), "vmem($Rx32++$Mu2):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_db5b9e2f, TypeCVI_VM_NEW_ST>, Enc_cf1927, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-3} = 0b0000000100; let Inst{31-21} = 0b00101011011; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29733,16 +30160,17 @@ def V6_vS32b_nt_new_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8), "if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01010; let Inst{31-21} = 0b00101000111; let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29751,16 +30179,17 @@ def V6_vS32b_nt_new_pred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8), "if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_d5090f3e, TypeCVI_VM_NEW_ST>, Enc_f7430e, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01010; let Inst{31-21} = 0b00101000111; let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29770,7 +30199,7 @@ def V6_vS32b_nt_new_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8), "if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -29778,9 +30207,10 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29790,7 +30220,7 @@ def V6_vS32b_nt_new_pred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8), "if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_784502, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-3} = 0b01010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -29798,9 +30228,10 @@ let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29811,16 +30242,17 @@ def V6_vS32b_nt_new_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8), "if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001010; let Inst{31-21} = 0b00101011111; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 4; @@ -29830,16 +30262,17 @@ def V6_vS32b_nt_new_pred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8), "if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new", -CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_8b6a873f, TypeCVI_VM_NEW_ST>, Enc_372c9d, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-3} = 0b00001010; let Inst{31-21} = 0b00101011111; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; let isNVStore = 1; -let mayStore = 1; -let isNonTemporal = 1; +let CVINew = 1; let isNewValue = 1; +let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; @@ -29850,15 +30283,15 @@ def V6_vS32b_nt_npred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000111; let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29867,15 +30300,15 @@ def V6_vS32b_nt_npred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000111; let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29885,7 +30318,7 @@ def V6_vS32b_nt_npred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -29893,8 +30326,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29904,7 +30337,7 @@ def V6_vS32b_nt_npred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; @@ -29912,8 +30345,8 @@ let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29924,15 +30357,15 @@ def V6_vS32b_nt_npred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011111; let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29942,15 +30375,15 @@ def V6_vS32b_nt_npred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011111; let isPredicated = 1; let isPredicatedFalse = 1; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29961,26 +30394,26 @@ def V6_vS32b_nt_nqpred_ai : HInst< (outs), (ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000110; let addrMode = BaseImmOffset; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vS32b_nt_nqpred_ai_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{31-21} = 0b00101000110; let addrMode = BaseImmOffset; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -29988,14 +30421,14 @@ def V6_vS32b_nt_nqpred_pi : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -30003,14 +30436,14 @@ def V6_vS32b_nt_nqpred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -30019,13 +30452,13 @@ def V6_vS32b_nt_nqpred_ppu : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011110; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -30033,13 +30466,13 @@ def V6_vS32b_nt_nqpred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000001; let Inst{31-21} = 0b00101011110; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -30048,14 +30481,14 @@ def V6_vS32b_nt_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isNVStorable = 1; let isPredicable = 1; @@ -30066,14 +30499,14 @@ def V6_vS32b_nt_pi_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let isNVStorable = 1; let isPredicable = 1; @@ -30085,13 +30518,13 @@ def V6_vS32b_nt_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011011; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isNVStorable = 1; let isPredicable = 1; @@ -30102,13 +30535,13 @@ def V6_vS32b_nt_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011011; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let isNVStorable = 1; let isPredicable = 1; @@ -30120,14 +30553,14 @@ def V6_vS32b_nt_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000111; let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -30136,14 +30569,14 @@ def V6_vS32b_nt_pred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000111; let isPredicated = 1; let addrMode = BaseImmOffset; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ai_128B"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -30153,15 +30586,15 @@ def V6_vS32b_nt_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -30171,15 +30604,15 @@ def V6_vS32b_nt_pred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001111; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_pi_128B"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -30190,14 +30623,14 @@ def V6_vS32b_nt_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011111; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -30207,14 +30640,14 @@ def V6_vS32b_nt_pred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011111; let isPredicated = 1; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu_128B"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -30225,26 +30658,26 @@ def V6_vS32b_nt_qpred_ai : HInst< (outs), (ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000110; let addrMode = BaseImmOffset; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vS32b_nt_qpred_ai_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000110; let addrMode = BaseImmOffset; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; } @@ -30252,14 +30685,14 @@ def V6_vS32b_nt_qpred_pi : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -30267,14 +30700,14 @@ def V6_vS32b_nt_qpred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -30283,13 +30716,13 @@ def V6_vS32b_nt_qpred_ppu : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011110; let addrMode = PostInc; let accessSize = Vector64Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -30297,13 +30730,13 @@ def V6_vS32b_nt_qpred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011110; let addrMode = PostInc; let accessSize = Vector128Access; -let mayStore = 1; let isNonTemporal = 1; +let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let isCodeGenOnly = 1; let Constraints = "$Rx32 = $Rx32in"; @@ -30312,7 +30745,7 @@ def V6_vS32b_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -30329,7 +30762,7 @@ def V6_vS32b_pi_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -30347,7 +30780,7 @@ def V6_vS32b_ppu : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; @@ -30362,7 +30795,7 @@ def V6_vS32b_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_a4c9df3b, TypeCVI_VM_ST>, Enc_d15d19, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011001; let addrMode = PostInc; @@ -30378,7 +30811,7 @@ def V6_vS32b_pred_ai : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if ($Pv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -30393,7 +30826,7 @@ def V6_vS32b_pred_ai_128B : HInst< (outs), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Pv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel { +tc_85d237e3, TypeCVI_VM_ST>, Enc_27b757, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000101; let isPredicated = 1; @@ -30409,7 +30842,7 @@ def V6_vS32b_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if ($Pv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]> { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -30426,7 +30859,7 @@ def V6_vS32b_pred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Pv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]> { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -30444,7 +30877,7 @@ def V6_vS32b_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if ($Pv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]> { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -30459,7 +30892,7 @@ def V6_vS32b_pred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if ($Pv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]> { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011101; let isPredicated = 1; @@ -30475,7 +30908,7 @@ def V6_vS32b_qpred_ai : HInst< (outs), (ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32), "if ($Qv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000100; let addrMode = BaseImmOffset; @@ -30487,7 +30920,7 @@ def V6_vS32b_qpred_ai_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Qv4) vmem($Rt32+#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> { +tc_aedb9f9e, TypeCVI_VM_ST>, Enc_2ea740, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{31-21} = 0b00101000100; let addrMode = BaseImmOffset; @@ -30500,7 +30933,7 @@ def V6_vS32b_qpred_pi : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32), "if ($Qv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -30514,7 +30947,7 @@ def V6_vS32b_qpred_pi_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32), "if ($Qv4) vmem($Rx32++#$Ii) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_0b51ce, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -30529,7 +30962,7 @@ def V6_vS32b_qpred_ppu : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32), "if ($Qv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011100; let addrMode = PostInc; @@ -30542,7 +30975,7 @@ def V6_vS32b_qpred_ppu_128B : HInst< (outs IntRegs:$Rx32), (ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32), "if ($Qv4) vmem($Rx32++$Mu2) = $Vs32", -CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> { +tc_99093773, TypeCVI_VM_ST>, Enc_4dff07, Requires<[HasV60T,UseHVX]> { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011100; let addrMode = PostInc; @@ -30556,7 +30989,7 @@ def V6_vabsdiffh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30568,7 +31001,7 @@ def V6_vabsdiffh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30604,7 +31037,7 @@ def V6_vabsdiffub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30616,7 +31049,7 @@ def V6_vabsdiffub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30652,7 +31085,7 @@ def V6_vabsdiffuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30664,7 +31097,7 @@ def V6_vabsdiffuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30700,7 +31133,7 @@ def V6_vabsdiffw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30712,7 +31145,7 @@ def V6_vabsdiffw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -30748,7 +31181,7 @@ def V6_vabsh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.h = vabs($Vu32.h)", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30760,7 +31193,7 @@ def V6_vabsh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.h = vabs($Vu32.h)", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30796,7 +31229,7 @@ def V6_vabsh_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.h = vabs($Vu32.h):sat", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30808,7 +31241,7 @@ def V6_vabsh_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.h = vabs($Vu32.h):sat", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30844,7 +31277,7 @@ def V6_vabsw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.w = vabs($Vu32.w)", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30856,7 +31289,7 @@ def V6_vabsw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.w = vabs($Vu32.w)", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30892,7 +31325,7 @@ def V6_vabsw_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.w = vabs($Vu32.w):sat", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30904,7 +31337,7 @@ def V6_vabsw_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.w = vabs($Vu32.w):sat", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -30940,7 +31373,7 @@ def V6_vaddb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vadd($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -30952,7 +31385,7 @@ def V6_vaddb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vadd($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -30988,7 +31421,7 @@ def V6_vaddb_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -31000,7 +31433,7 @@ def V6_vaddb_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -31036,7 +31469,7 @@ def V6_vaddbnq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if (!$Qv4) $Vx32.b += $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31051,7 +31484,7 @@ def V6_vaddbnq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if (!$Qv4) $Vx32.b += $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31094,7 +31527,7 @@ def V6_vaddbq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if ($Qv4) $Vx32.b += $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31109,7 +31542,7 @@ def V6_vaddbq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if ($Qv4) $Vx32.b += $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31152,7 +31585,7 @@ def V6_vaddbsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vadd($Vu32.b,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31164,7 +31597,7 @@ def V6_vaddbsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vadd($Vu32.b,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -31200,7 +31633,7 @@ def V6_vaddbsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -31212,7 +31645,7 @@ def V6_vaddbsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -31248,7 +31681,7 @@ def V6_vaddcarry : HInst< (outs VectorRegs:$Vd32, VecPredRegs:$Qx4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, VecPredRegs:$Qx4in), "$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry", -CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> { +tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[HasV62T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100101; @@ -31263,7 +31696,7 @@ def V6_vaddcarry_128B : HInst< (outs VectorRegs128B:$Vd32, VecPredRegs128B:$Qx4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, VecPredRegs128B:$Qx4in), "$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry", -CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> { +tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[HasV62T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100101; @@ -31279,7 +31712,7 @@ def V6_vaddclbh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011111000; @@ -31291,7 +31724,7 @@ def V6_vaddclbh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011111000; @@ -31304,7 +31737,7 @@ def V6_vaddclbw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011111000; @@ -31316,7 +31749,7 @@ def V6_vaddclbw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011111000; @@ -31329,7 +31762,7 @@ def V6_vaddh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vadd($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -31341,7 +31774,7 @@ def V6_vaddh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vadd($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -31377,7 +31810,7 @@ def V6_vaddh_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -31389,7 +31822,7 @@ def V6_vaddh_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -31425,7 +31858,7 @@ def V6_vaddhnq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if (!$Qv4) $Vx32.h += $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31440,7 +31873,7 @@ def V6_vaddhnq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if (!$Qv4) $Vx32.h += $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31483,7 +31916,7 @@ def V6_vaddhq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if ($Qv4) $Vx32.h += $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31498,7 +31931,7 @@ def V6_vaddhq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if ($Qv4) $Vx32.h += $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -31541,7 +31974,7 @@ def V6_vaddhsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vadd($Vu32.h,$Vv32.h):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -31553,7 +31986,7 @@ def V6_vaddhsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vadd($Vu32.h,$Vv32.h):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -31589,7 +32022,7 @@ def V6_vaddhsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -31601,7 +32034,7 @@ def V6_vaddhsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -31637,7 +32070,7 @@ def V6_vaddhw : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.w = vadd($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -31649,7 +32082,7 @@ def V6_vaddhw_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.w = vadd($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -31662,7 +32095,7 @@ def V6_vaddhw_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.w += vadd($Vu32.h,$Vv32.h)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -31676,7 +32109,7 @@ def V6_vaddhw_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.w += vadd($Vu32.h,$Vv32.h)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -31741,7 +32174,7 @@ def V6_vaddubh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -31753,7 +32186,7 @@ def V6_vaddubh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -31766,7 +32199,7 @@ def V6_vaddubh_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -31780,7 +32213,7 @@ def V6_vaddubh_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -31845,7 +32278,7 @@ def V6_vaddubsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -31857,7 +32290,7 @@ def V6_vaddubsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -31893,7 +32326,7 @@ def V6_vaddubsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -31905,7 +32338,7 @@ def V6_vaddubsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -31941,7 +32374,7 @@ def V6_vaddububb_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -31953,7 +32386,7 @@ def V6_vaddububb_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -31966,7 +32399,7 @@ def V6_vadduhsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -31978,7 +32411,7 @@ def V6_vadduhsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -32014,7 +32447,7 @@ def V6_vadduhsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -32026,7 +32459,7 @@ def V6_vadduhsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -32062,7 +32495,7 @@ def V6_vadduhw : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -32074,7 +32507,7 @@ def V6_vadduhw_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -32087,7 +32520,7 @@ def V6_vadduhw_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -32101,7 +32534,7 @@ def V6_vadduhw_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -32166,7 +32599,7 @@ def V6_vadduwsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -32178,7 +32611,7 @@ def V6_vadduwsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -32214,7 +32647,7 @@ def V6_vadduwsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -32226,7 +32659,7 @@ def V6_vadduwsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -32262,7 +32695,7 @@ def V6_vaddw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vadd($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -32274,7 +32707,7 @@ def V6_vaddw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vadd($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -32310,7 +32743,7 @@ def V6_vaddw_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -32322,7 +32755,7 @@ def V6_vaddw_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -32358,7 +32791,7 @@ def V6_vaddwnq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if (!$Qv4) $Vx32.w += $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -32373,7 +32806,7 @@ def V6_vaddwnq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if (!$Qv4) $Vx32.w += $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -32416,7 +32849,7 @@ def V6_vaddwq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if ($Qv4) $Vx32.w += $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -32431,7 +32864,7 @@ def V6_vaddwq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if ($Qv4) $Vx32.w += $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -32474,7 +32907,7 @@ def V6_vaddwsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vadd($Vu32.w,$Vv32.w):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -32486,7 +32919,7 @@ def V6_vaddwsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vadd($Vu32.w,$Vv32.w):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -32522,7 +32955,7 @@ def V6_vaddwsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -32534,7 +32967,7 @@ def V6_vaddwsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -32570,7 +33003,7 @@ def V6_valignb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = valign($Vu32,$Vv32,$Rt8)", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -32582,7 +33015,7 @@ def V6_valignb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = valign($Vu32,$Vv32,$Rt8)", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -32595,7 +33028,7 @@ def V6_valignbi : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii), "$Vd32 = valign($Vu32,$Vv32,#$Ii)", -CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV60T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110001; let hasNewValue = 1; @@ -32606,7 +33039,7 @@ def V6_valignbi_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii), "$Vd32 = valign($Vu32,$Vv32,#$Ii)", -CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV60T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110001; let hasNewValue = 1; @@ -32618,7 +33051,7 @@ def V6_vand : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32 = vand($Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -32630,7 +33063,7 @@ def V6_vand_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32 = vand($Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -32643,7 +33076,7 @@ def V6_vandnqrt : HInst< (outs VectorRegs:$Vd32), (ins VecPredRegs:$Qu4, IntRegs:$Rt32), "$Vd32 = vand(!$Qu4,$Rt32)", -CVI_VX, TypeCVI_VX>, Enc_4711514, Requires<[HasV62T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX>, Enc_7b7ba8, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-10} = 0b0001; let Inst{31-21} = 0b00011001101; @@ -32655,7 +33088,7 @@ def V6_vandnqrt_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VecPredRegs128B:$Qu4, IntRegs:$Rt32), "$Vd32 = vand(!$Qu4,$Rt32)", -CVI_VX, TypeCVI_VX>, Enc_4711514, Requires<[HasV62T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX>, Enc_7b7ba8, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-10} = 0b0001; let Inst{31-21} = 0b00011001101; @@ -32668,7 +33101,7 @@ def V6_vandnqrt_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32), "$Vx32 |= vand(!$Qu4,$Rt32)", -CVI_VX, TypeCVI_VX>, Enc_4944558, Requires<[HasV62T,UseHVX]> { +tc_9311da3f, TypeCVI_VX>, Enc_895bd9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b00011001011; @@ -32682,7 +33115,7 @@ def V6_vandnqrt_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32), "$Vx32 |= vand(!$Qu4,$Rt32)", -CVI_VX, TypeCVI_VX>, Enc_4944558, Requires<[HasV62T,UseHVX]> { +tc_9311da3f, TypeCVI_VX>, Enc_895bd9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-10} = 0b1001; let Inst{31-21} = 0b00011001011; @@ -32747,7 +33180,7 @@ def V6_vandqrt : HInst< (outs VectorRegs:$Vd32), (ins VecPredRegs:$Qu4, IntRegs:$Rt32), "$Vd32 = vand($Qu4,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_4711514, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b00011001101; @@ -32759,7 +33192,7 @@ def V6_vandqrt_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VecPredRegs128B:$Qu4, IntRegs:$Rt32), "$Vd32 = vand($Qu4,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_4711514, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_7b7ba8, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b00011001101; @@ -32772,7 +33205,7 @@ def V6_vandqrt_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32), "$Vx32 |= vand($Qu4,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_4944558, Requires<[HasV60T,UseHVX]> { +tc_9311da3f, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b00011001011; @@ -32786,7 +33219,7 @@ def V6_vandqrt_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32), "$Vx32 |= vand($Qu4,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_4944558, Requires<[HasV60T,UseHVX]> { +tc_9311da3f, TypeCVI_VX_LATE>, Enc_895bd9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-10} = 0b1000; let Inst{31-21} = 0b00011001011; @@ -32851,7 +33284,7 @@ def V6_vandvnqv : HInst< (outs VectorRegs:$Vd32), (ins VecPredRegs:$Qv4, VectorRegs:$Vu32), "$Vd32 = vand(!$Qv4,$Vu32)", -CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000011; @@ -32864,7 +33297,7 @@ def V6_vandvnqv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vu32), "$Vd32 = vand(!$Qv4,$Vu32)", -CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000011; @@ -32878,7 +33311,7 @@ def V6_vandvqv : HInst< (outs VectorRegs:$Vd32), (ins VecPredRegs:$Qv4, VectorRegs:$Vu32), "$Vd32 = vand($Qv4,$Vu32)", -CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000011; @@ -32891,7 +33324,7 @@ def V6_vandvqv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vu32), "$Vd32 = vand($Qv4,$Vu32)", -CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_c4dc92, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000011; @@ -32905,7 +33338,7 @@ def V6_vandvrt : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Qd4 = vand($Vu32,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_11498120, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -32917,7 +33350,7 @@ def V6_vandvrt_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Qd4 = vand($Vu32,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_11498120, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_0f8bab, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -32930,7 +33363,7 @@ def V6_vandvrt_acc : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Qx4 |= vand($Vu32,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_10612292, Requires<[HasV60T,UseHVX]> { +tc_9311da3f, TypeCVI_VX_LATE>, Enc_adf111, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -32944,7 +33377,7 @@ def V6_vandvrt_acc_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Qx4 |= vand($Vu32,$Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_10612292, Requires<[HasV60T,UseHVX]> { +tc_9311da3f, TypeCVI_VX_LATE>, Enc_adf111, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -33009,7 +33442,7 @@ def V6_vaslh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.h = vasl($Vu32.h,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -33021,7 +33454,7 @@ def V6_vaslh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.h = vasl($Vu32.h,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -33057,7 +33490,7 @@ def V6_vaslhv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vasl($Vu32.h,$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33069,7 +33502,7 @@ def V6_vaslhv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vasl($Vu32.h,$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33105,7 +33538,7 @@ def V6_vaslw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vasl($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -33117,7 +33550,7 @@ def V6_vaslw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vasl($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -33130,7 +33563,7 @@ def V6_vaslw_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vasl($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -33144,7 +33577,7 @@ def V6_vaslw_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vasl($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -33209,7 +33642,7 @@ def V6_vaslwv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vasl($Vu32.w,$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33221,7 +33654,7 @@ def V6_vaslwv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vasl($Vu32.w,$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33257,7 +33690,7 @@ def V6_vasrh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.h = vasr($Vu32.h,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -33269,7 +33702,7 @@ def V6_vasrh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.h = vasr($Vu32.h,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -33305,7 +33738,7 @@ def V6_vasrhbrndsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -33317,7 +33750,7 @@ def V6_vasrhbrndsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -33330,7 +33763,7 @@ def V6_vasrhbrndsat_alt : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33340,7 +33773,7 @@ def V6_vasrhbsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -33352,7 +33785,7 @@ def V6_vasrhbsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -33365,7 +33798,7 @@ def V6_vasrhubrndsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33377,7 +33810,7 @@ def V6_vasrhubrndsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33390,7 +33823,7 @@ def V6_vasrhubrndsat_alt : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33400,7 +33833,7 @@ def V6_vasrhubsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33412,7 +33845,7 @@ def V6_vasrhubsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33425,7 +33858,7 @@ def V6_vasrhubsat_alt : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33435,7 +33868,7 @@ def V6_vasrhv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vasr($Vu32.h,$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33447,7 +33880,7 @@ def V6_vasrhv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vasr($Vu32.h,$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33483,7 +33916,7 @@ def V6_vasruwuhrndsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -33495,7 +33928,7 @@ def V6_vasruwuhrndsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -33508,7 +33941,7 @@ def V6_vasrw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vasr($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -33520,7 +33953,7 @@ def V6_vasrw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vasr($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -33533,7 +33966,7 @@ def V6_vasrw_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vasr($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -33547,7 +33980,7 @@ def V6_vasrw_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vasr($Vu32.w,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_c00bf9c9, TypeCVI_VS>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -33612,7 +34045,7 @@ def V6_vasrwh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33624,7 +34057,7 @@ def V6_vasrwh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33637,7 +34070,7 @@ def V6_vasrwh_alt : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33647,7 +34080,7 @@ def V6_vasrwhrndsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33659,7 +34092,7 @@ def V6_vasrwhrndsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33672,7 +34105,7 @@ def V6_vasrwhrndsat_alt : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33682,7 +34115,7 @@ def V6_vasrwhsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33694,7 +34127,7 @@ def V6_vasrwhsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33707,7 +34140,7 @@ def V6_vasrwhsat_alt : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33717,7 +34150,7 @@ def V6_vasrwuhrndsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -33729,7 +34162,7 @@ def V6_vasrwuhrndsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -33742,7 +34175,7 @@ def V6_vasrwuhsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33754,7 +34187,7 @@ def V6_vasrwuhsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat", -CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_7fa8b40f, TypeCVI_VS>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -33767,7 +34200,7 @@ def V6_vasrwuhsat_alt : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat", -PSEUDO, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33777,7 +34210,7 @@ def V6_vasrwv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vasr($Vu32.w,$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33789,7 +34222,7 @@ def V6_vasrwv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vasr($Vu32.w,$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -33825,7 +34258,7 @@ def V6_vassign : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32 = $Vu32", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000011; @@ -33837,7 +34270,7 @@ def V6_vassign_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32 = $Vu32", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000011; @@ -33850,7 +34283,7 @@ def V6_vassignp : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32), "$Vdd32 = $Vuu32", -CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> { +CVI_VA, TypeCVI_VA_DV>, Requires<[HasV60T,UseHVX]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33860,7 +34293,7 @@ def V6_vassignp_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32), "$Vdd32 = $Vuu32", -CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> { +CVI_VA, TypeCVI_VA_DV>, Requires<[HasV60T,UseHVX]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -33871,7 +34304,7 @@ def V6_vavgh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vavg($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -33883,7 +34316,7 @@ def V6_vavgh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vavg($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -33919,7 +34352,7 @@ def V6_vavghrnd : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -33931,7 +34364,7 @@ def V6_vavghrnd_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -33967,7 +34400,7 @@ def V6_vavgub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -33979,7 +34412,7 @@ def V6_vavgub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -34015,7 +34448,7 @@ def V6_vavgubrnd : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -34027,7 +34460,7 @@ def V6_vavgubrnd_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -34063,7 +34496,7 @@ def V6_vavguh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -34075,7 +34508,7 @@ def V6_vavguh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -34111,7 +34544,7 @@ def V6_vavguhrnd : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -34123,7 +34556,7 @@ def V6_vavguhrnd_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -34159,7 +34592,7 @@ def V6_vavgw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vavg($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -34171,7 +34604,7 @@ def V6_vavgw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vavg($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100110; @@ -34207,7 +34640,7 @@ def V6_vavgwrnd : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -34219,7 +34652,7 @@ def V6_vavgwrnd_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -34255,7 +34688,7 @@ def V6_vccombine : HInst< (outs VecDblRegs:$Vdd32), (ins PredRegs:$Ps4, VectorRegs:$Vu32, VectorRegs:$Vv32), "if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> { +tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011010011; @@ -34268,7 +34701,7 @@ def V6_vccombine_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins PredRegs:$Ps4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> { +tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011010011; @@ -34282,7 +34715,7 @@ def V6_vcl0h : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.uh = vcl0($Vu32.uh)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -34294,7 +34727,7 @@ def V6_vcl0h_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.uh = vcl0($Vu32.uh)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -34330,7 +34763,7 @@ def V6_vcl0w : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.uw = vcl0($Vu32.uw)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -34342,7 +34775,7 @@ def V6_vcl0w_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.uw = vcl0($Vu32.uw)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -34378,7 +34811,7 @@ def V6_vcmov : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Ps4, VectorRegs:$Vu32), "if ($Ps4) $Vd32 = $Vu32", -CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> { +tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001101000000000; @@ -34391,7 +34824,7 @@ def V6_vcmov_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Ps4, VectorRegs128B:$Vu32), "if ($Ps4) $Vd32 = $Vu32", -CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> { +tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001101000000000; @@ -34405,7 +34838,7 @@ def V6_vcombine : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32 = vcombine($Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34418,7 +34851,7 @@ def V6_vcombine_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32 = vcombine($Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -34455,7 +34888,7 @@ def V6_vdeal : HInst< (outs VectorRegs:$Vy32, VectorRegs:$Vx32), (ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32), "vdeal($Vy32,$Vx32,$Rt32)", -CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> { +tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001111; @@ -34470,7 +34903,7 @@ def V6_vdeal_128B : HInst< (outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32), "vdeal($Vy32,$Vx32,$Rt32)", -CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> { +tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001111; @@ -34486,7 +34919,7 @@ def V6_vdealb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.b = vdeal($Vu32.b)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -34498,7 +34931,7 @@ def V6_vdealb4w : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vdeale($Vu32.b,$Vv32.b)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -34510,7 +34943,7 @@ def V6_vdealb4w_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vdeale($Vu32.b,$Vv32.b)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -34546,7 +34979,7 @@ def V6_vdealb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.b = vdeal($Vu32.b)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -34582,7 +35015,7 @@ def V6_vdealh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.h = vdeal($Vu32.h)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -34594,7 +35027,7 @@ def V6_vdealh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.h = vdeal($Vu32.h)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -34630,7 +35063,7 @@ def V6_vdealvdd : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -34642,7 +35075,7 @@ def V6_vdealvdd_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -34655,7 +35088,7 @@ def V6_vdelta : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32 = vdelta($Vu32,$Vv32)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -34667,7 +35100,7 @@ def V6_vdelta_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32 = vdelta($Vu32,$Vv32)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -34680,7 +35113,7 @@ def V6_vdmpybus : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -34692,7 +35125,7 @@ def V6_vdmpybus_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -34705,7 +35138,7 @@ def V6_vdmpybus_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -34719,7 +35152,7 @@ def V6_vdmpybus_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -34784,7 +35217,7 @@ def V6_vdmpybus_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -34796,7 +35229,7 @@ def V6_vdmpybus_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -34809,7 +35242,7 @@ def V6_vdmpybus_dv_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -34823,7 +35256,7 @@ def V6_vdmpybus_dv_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -34888,7 +35321,7 @@ def V6_vdmpyhb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -34900,7 +35333,7 @@ def V6_vdmpyhb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -34913,7 +35346,7 @@ def V6_vdmpyhb_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -34927,7 +35360,7 @@ def V6_vdmpyhb_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -34992,7 +35425,7 @@ def V6_vdmpyhb_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35004,7 +35437,7 @@ def V6_vdmpyhb_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35017,7 +35450,7 @@ def V6_vdmpyhb_dv_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35031,7 +35464,7 @@ def V6_vdmpyhb_dv_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35096,7 +35529,7 @@ def V6_vdmpyhisat : HInst< (outs VectorRegs:$Vd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35108,7 +35541,7 @@ def V6_vdmpyhisat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35121,7 +35554,7 @@ def V6_vdmpyhisat_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35135,7 +35568,7 @@ def V6_vdmpyhisat_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35200,7 +35633,7 @@ def V6_vdmpyhsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35212,7 +35645,7 @@ def V6_vdmpyhsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35225,7 +35658,7 @@ def V6_vdmpyhsat_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35239,7 +35672,7 @@ def V6_vdmpyhsat_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35304,7 +35737,7 @@ def V6_vdmpyhsuisat : HInst< (outs VectorRegs:$Vd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35316,7 +35749,7 @@ def V6_vdmpyhsuisat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_0e41fa, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35329,7 +35762,7 @@ def V6_vdmpyhsuisat_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35343,7 +35776,7 @@ def V6_vdmpyhsuisat_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_cc857d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35408,7 +35841,7 @@ def V6_vdmpyhsusat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35420,7 +35853,7 @@ def V6_vdmpyhsusat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -35433,7 +35866,7 @@ def V6_vdmpyhsusat_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35447,7 +35880,7 @@ def V6_vdmpyhsusat_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -35512,7 +35945,7 @@ def V6_vdmpyhvsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -35524,7 +35957,7 @@ def V6_vdmpyhvsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -35537,7 +35970,7 @@ def V6_vdmpyhvsat_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -35551,7 +35984,7 @@ def V6_vdmpyhvsat_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -35616,7 +36049,7 @@ def V6_vdsaduh : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -35628,7 +36061,7 @@ def V6_vdsaduh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -35641,7 +36074,7 @@ def V6_vdsaduh_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -35655,7 +36088,7 @@ def V6_vdsaduh_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -35720,7 +36153,7 @@ def V6_veqb : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -35732,7 +36165,7 @@ def V6_veqb_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -35745,7 +36178,7 @@ def V6_veqb_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35758,7 +36191,7 @@ def V6_veqb_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35772,7 +36205,7 @@ def V6_veqb_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35786,7 +36219,7 @@ def V6_veqb_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35801,7 +36234,7 @@ def V6_veqb_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35814,7 +36247,7 @@ def V6_veqb_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35828,7 +36261,7 @@ def V6_veqh : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -35840,7 +36273,7 @@ def V6_veqh_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -35853,7 +36286,7 @@ def V6_veqh_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35866,7 +36299,7 @@ def V6_veqh_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35880,7 +36313,7 @@ def V6_veqh_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35894,7 +36327,7 @@ def V6_veqh_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35909,7 +36342,7 @@ def V6_veqh_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35922,7 +36355,7 @@ def V6_veqh_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35936,7 +36369,7 @@ def V6_veqw : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -35948,7 +36381,7 @@ def V6_veqw_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -35961,7 +36394,7 @@ def V6_veqw_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35974,7 +36407,7 @@ def V6_veqw_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -35988,7 +36421,7 @@ def V6_veqw_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36002,7 +36435,7 @@ def V6_veqw_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36017,7 +36450,7 @@ def V6_veqw_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36030,7 +36463,7 @@ def V6_veqw_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36044,7 +36477,7 @@ def V6_vgtb : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36056,7 +36489,7 @@ def V6_vgtb_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36069,7 +36502,7 @@ def V6_vgtb_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36082,7 +36515,7 @@ def V6_vgtb_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36096,7 +36529,7 @@ def V6_vgtb_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36110,7 +36543,7 @@ def V6_vgtb_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36125,7 +36558,7 @@ def V6_vgtb_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36138,7 +36571,7 @@ def V6_vgtb_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36152,7 +36585,7 @@ def V6_vgth : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36164,7 +36597,7 @@ def V6_vgth_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36177,7 +36610,7 @@ def V6_vgth_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36190,7 +36623,7 @@ def V6_vgth_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36204,7 +36637,7 @@ def V6_vgth_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36218,7 +36651,7 @@ def V6_vgth_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36233,7 +36666,7 @@ def V6_vgth_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36246,7 +36679,7 @@ def V6_vgth_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36260,7 +36693,7 @@ def V6_vgtub : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36272,7 +36705,7 @@ def V6_vgtub_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36285,7 +36718,7 @@ def V6_vgtub_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36298,7 +36731,7 @@ def V6_vgtub_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36312,7 +36745,7 @@ def V6_vgtub_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36326,7 +36759,7 @@ def V6_vgtub_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36341,7 +36774,7 @@ def V6_vgtub_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36354,7 +36787,7 @@ def V6_vgtub_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b101000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36368,7 +36801,7 @@ def V6_vgtuh : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36380,7 +36813,7 @@ def V6_vgtuh_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36393,7 +36826,7 @@ def V6_vgtuh_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36406,7 +36839,7 @@ def V6_vgtuh_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36420,7 +36853,7 @@ def V6_vgtuh_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b011001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36434,7 +36867,7 @@ def V6_vgtuh_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b011001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36449,7 +36882,7 @@ def V6_vgtuh_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b101001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36462,7 +36895,7 @@ def V6_vgtuh_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b101001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36476,7 +36909,7 @@ def V6_vgtuw : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36488,7 +36921,7 @@ def V6_vgtuw_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36501,7 +36934,7 @@ def V6_vgtuw_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36514,7 +36947,7 @@ def V6_vgtuw_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b001010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36528,7 +36961,7 @@ def V6_vgtuw_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b011010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36542,7 +36975,7 @@ def V6_vgtuw_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b011010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36557,7 +36990,7 @@ def V6_vgtuw_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b101010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36570,7 +37003,7 @@ def V6_vgtuw_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b101010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36584,7 +37017,7 @@ def V6_vgtw : HInst< (outs VecPredRegs:$Qd4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36596,7 +37029,7 @@ def V6_vgtw_128B : HInst< (outs VecPredRegs128B:$Qd4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_95441f, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111100; @@ -36609,7 +37042,7 @@ def V6_vgtw_and : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36622,7 +37055,7 @@ def V6_vgtw_and_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b000110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36636,7 +37069,7 @@ def V6_vgtw_or : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36650,7 +37083,7 @@ def V6_vgtw_or_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b010110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36665,7 +37098,7 @@ def V6_vgtw_xor : HInst< (outs VecPredRegs:$Qx4), (ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36678,7 +37111,7 @@ def V6_vgtw_xor_128B : HInst< (outs VecPredRegs128B:$Qx4), (ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_eaa9f8, Requires<[HasV60T,UseHVX]> { let Inst{7-2} = 0b100110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; @@ -36692,7 +37125,7 @@ def V6_vhist : HInst< (outs), (ins), "vhist", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV60T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV60T,UseHVX]> { let Inst{13-0} = 0b10000010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -36701,7 +37134,7 @@ def V6_vhist_128B : HInst< (outs), (ins), "vhist", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV60T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV60T,UseHVX]> { let Inst{13-0} = 0b10000010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -36711,7 +37144,7 @@ def V6_vhistq : HInst< (outs), (ins VecPredRegs:$Qv4), "vhist($Qv4)", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV60T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV60T,UseHVX]> { let Inst{13-0} = 0b10000010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -36721,7 +37154,7 @@ def V6_vhistq_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4), "vhist($Qv4)", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV60T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV60T,UseHVX]> { let Inst{13-0} = 0b10000010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -36732,7 +37165,7 @@ def V6_vinsertwr : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, IntRegs:$Rt32), "$Vx32.w = vinsert($Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_313333, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[HasV60T,UseHVX]> { let Inst{13-5} = 0b100000001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -36744,7 +37177,7 @@ def V6_vinsertwr_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, IntRegs:$Rt32), "$Vx32.w = vinsert($Rt32)", -CVI_VX_LATE, TypeCVI_VX>, Enc_313333, Requires<[HasV60T,UseHVX]> { +tc_e231aa4f, TypeCVI_VX_LATE>, Enc_569cfe, Requires<[HasV60T,UseHVX]> { let Inst{13-5} = 0b100000001; let Inst{31-21} = 0b00011001101; let hasNewValue = 1; @@ -36757,7 +37190,7 @@ def V6_vlalignb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vlalign($Vu32,$Vv32,$Rt8)", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -36769,7 +37202,7 @@ def V6_vlalignb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vlalign($Vu32,$Vv32,$Rt8)", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011011; @@ -36782,7 +37215,7 @@ def V6_vlalignbi : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii), "$Vd32 = vlalign($Vu32,$Vv32,#$Ii)", -CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV60T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110011; let hasNewValue = 1; @@ -36793,7 +37226,7 @@ def V6_vlalignbi_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii), "$Vd32 = vlalign($Vu32,$Vv32,#$Ii)", -CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV60T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110011; let hasNewValue = 1; @@ -36805,7 +37238,7 @@ def V6_vlsrb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.ub = vlsr($Vu32.ub,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV62T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -36817,7 +37250,7 @@ def V6_vlsrb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.ub = vlsr($Vu32.ub,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV62T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -36830,7 +37263,7 @@ def V6_vlsrh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.uh = vlsr($Vu32.uh,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -36842,7 +37275,7 @@ def V6_vlsrh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.uh = vlsr($Vu32.uh,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -36878,7 +37311,7 @@ def V6_vlsrhv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vlsr($Vu32.h,$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -36890,7 +37323,7 @@ def V6_vlsrhv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vlsr($Vu32.h,$Vv32.h)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -36926,7 +37359,7 @@ def V6_vlsrw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.uw = vlsr($Vu32.uw,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -36938,7 +37371,7 @@ def V6_vlsrw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.uw = vlsr($Vu32.uw,$Rt32)", -CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_41f4b64e, TypeCVI_VS>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -36974,7 +37407,7 @@ def V6_vlsrwv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vlsr($Vu32.w,$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -36986,7 +37419,7 @@ def V6_vlsrwv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vlsr($Vu32.w,$Vv32.w)", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111101; @@ -37022,7 +37455,7 @@ def V6_vlutvvb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37034,7 +37467,7 @@ def V6_vlutvvb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37047,7 +37480,7 @@ def V6_vlutvvb_nm : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -37059,7 +37492,7 @@ def V6_vlutvvb_nm_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch", -CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV62T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_a30110, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -37072,7 +37505,7 @@ def V6_vlutvvb_oracc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8877260, Requires<[HasV60T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_245865, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37086,7 +37519,7 @@ def V6_vlutvvb_oracc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8877260, Requires<[HasV60T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_245865, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37101,7 +37534,7 @@ def V6_vlutvvb_oracci : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii), "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8280533, Requires<[HasV62T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_cd4705, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100110; let hasNewValue = 1; @@ -37114,7 +37547,7 @@ def V6_vlutvvb_oracci_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii), "$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8280533, Requires<[HasV62T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_cd4705, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100110; let hasNewValue = 1; @@ -37128,7 +37561,7 @@ def V6_vlutvvbi : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)", -CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV62T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110001; let hasNewValue = 1; @@ -37139,7 +37572,7 @@ def V6_vlutvvbi_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii), "$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)", -CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV62T,UseHVX]> { +tc_c4b515c5, TypeCVI_VP>, Enc_0b2e5b, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110001; let hasNewValue = 1; @@ -37151,7 +37584,7 @@ def V6_vlutvwh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37163,7 +37596,7 @@ def V6_vlutvwh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37176,7 +37609,7 @@ def V6_vlutvwh_nm : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV62T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -37188,7 +37621,7 @@ def V6_vlutvwh_nm_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV62T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-24} = 0b00011000; @@ -37201,7 +37634,7 @@ def V6_vlutvwh_oracc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_16213761, Requires<[HasV60T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_7b523d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37215,7 +37648,7 @@ def V6_vlutvwh_oracc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_16213761, Requires<[HasV60T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_7b523d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -37230,7 +37663,7 @@ def V6_vlutvwh_oracci : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii), "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_3457570, Requires<[HasV62T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_1178da, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100111; let hasNewValue = 1; @@ -37243,7 +37676,7 @@ def V6_vlutvwh_oracci_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii), "$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_3457570, Requires<[HasV62T,UseHVX]> { +tc_cbf6d1dc, TypeCVI_VP_VS>, Enc_1178da, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100111; let hasNewValue = 1; @@ -37257,7 +37690,7 @@ def V6_vlutvwhi : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_13261538, Requires<[HasV62T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110011; let hasNewValue = 1; @@ -37268,7 +37701,7 @@ def V6_vlutvwhi_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii), "$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_13261538, Requires<[HasV62T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_4b39e4, Requires<[HasV62T,UseHVX]> { let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110011; let hasNewValue = 1; @@ -37280,7 +37713,7 @@ def V6_vmaxb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vmax($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -37292,7 +37725,7 @@ def V6_vmaxb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vmax($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -37328,7 +37761,7 @@ def V6_vmaxh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vmax($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37340,7 +37773,7 @@ def V6_vmaxh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vmax($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37376,7 +37809,7 @@ def V6_vmaxub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37388,7 +37821,7 @@ def V6_vmaxub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37424,7 +37857,7 @@ def V6_vmaxuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37436,7 +37869,7 @@ def V6_vmaxuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37472,7 +37905,7 @@ def V6_vmaxw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmax($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -37484,7 +37917,7 @@ def V6_vmaxw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmax($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -37520,7 +37953,7 @@ def V6_vminb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vmin($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -37532,7 +37965,7 @@ def V6_vminb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vmin($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -37568,7 +38001,7 @@ def V6_vminh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vmin($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37580,7 +38013,7 @@ def V6_vminh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vmin($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37616,7 +38049,7 @@ def V6_vminub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37628,7 +38061,7 @@ def V6_vminub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37664,7 +38097,7 @@ def V6_vminuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37676,7 +38109,7 @@ def V6_vminuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37712,7 +38145,7 @@ def V6_vminw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmin($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37724,7 +38157,7 @@ def V6_vminw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmin($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111000; @@ -37760,7 +38193,7 @@ def V6_vmpabus : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -37772,7 +38205,7 @@ def V6_vmpabus_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -37785,7 +38218,7 @@ def V6_vmpabus_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -37799,7 +38232,7 @@ def V6_vmpabus_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -37864,7 +38297,7 @@ def V6_vmpabusv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -37876,7 +38309,7 @@ def V6_vmpabusv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -37912,7 +38345,7 @@ def V6_vmpabuuv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -37924,7 +38357,7 @@ def V6_vmpabuuv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -37960,7 +38393,7 @@ def V6_vmpahb : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -37972,7 +38405,7 @@ def V6_vmpahb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -37985,7 +38418,7 @@ def V6_vmpahb_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -37999,7 +38432,7 @@ def V6_vmpahb_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -38064,7 +38497,7 @@ def V6_vmpauhb : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV62T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -38076,7 +38509,7 @@ def V6_vmpauhb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV62T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -38089,7 +38522,7 @@ def V6_vmpauhb_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV62T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -38103,7 +38536,7 @@ def V6_vmpauhb_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV62T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -38168,7 +38601,7 @@ def V6_vmpybus : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -38180,7 +38613,7 @@ def V6_vmpybus_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -38193,7 +38626,7 @@ def V6_vmpybus_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -38207,7 +38640,7 @@ def V6_vmpybus_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -38272,7 +38705,7 @@ def V6_vmpybusv : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -38284,7 +38717,7 @@ def V6_vmpybusv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -38297,7 +38730,7 @@ def V6_vmpybusv_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -38311,7 +38744,7 @@ def V6_vmpybusv_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -38376,7 +38809,7 @@ def V6_vmpybv : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.h = vmpy($Vu32.b,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -38388,7 +38821,7 @@ def V6_vmpybv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.h = vmpy($Vu32.b,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -38401,7 +38834,7 @@ def V6_vmpybv_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.h += vmpy($Vu32.b,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -38415,7 +38848,7 @@ def V6_vmpybv_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.h += vmpy($Vu32.b,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -38480,7 +38913,7 @@ def V6_vmpyewuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmpye($Vu32.w,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -38492,7 +38925,7 @@ def V6_vmpyewuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmpye($Vu32.w,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -38505,7 +38938,7 @@ def V6_vmpyewuh_64 : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32 = vmpye($Vu32.w,$Vv32.uh)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV62T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -38517,7 +38950,7 @@ def V6_vmpyewuh_64_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32 = vmpye($Vu32.w,$Vv32.uh)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV62T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -38553,7 +38986,7 @@ def V6_vmpyh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vdd32.w = vmpy($Vu32.h,$Rt32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -38565,7 +38998,7 @@ def V6_vmpyh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vdd32.w = vmpy($Vu32.h,$Rt32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -38601,7 +39034,7 @@ def V6_vmpyhsat_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -38615,7 +39048,7 @@ def V6_vmpyhsat_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -38657,7 +39090,7 @@ def V6_vmpyhsrs : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -38669,7 +39102,7 @@ def V6_vmpyhsrs_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -38705,7 +39138,7 @@ def V6_vmpyhss : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -38717,7 +39150,7 @@ def V6_vmpyhss_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -38753,7 +39186,7 @@ def V6_vmpyhus : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -38765,7 +39198,7 @@ def V6_vmpyhus_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -38778,7 +39211,7 @@ def V6_vmpyhus_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -38792,7 +39225,7 @@ def V6_vmpyhus_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -38857,7 +39290,7 @@ def V6_vmpyhv : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.w = vmpy($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -38869,7 +39302,7 @@ def V6_vmpyhv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.w = vmpy($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -38882,7 +39315,7 @@ def V6_vmpyhv_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.w += vmpy($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -38896,7 +39329,7 @@ def V6_vmpyhv_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.w += vmpy($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -38961,7 +39394,7 @@ def V6_vmpyhvsrs : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -38973,7 +39406,7 @@ def V6_vmpyhvsrs_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -39009,7 +39442,7 @@ def V6_vmpyieoh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -39021,7 +39454,7 @@ def V6_vmpyieoh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -39034,7 +39467,7 @@ def V6_vmpyiewh_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.w += vmpyie($Vu32.w,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -39048,7 +39481,7 @@ def V6_vmpyiewh_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.w += vmpyie($Vu32.w,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100010; @@ -39090,7 +39523,7 @@ def V6_vmpyiewuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -39102,7 +39535,7 @@ def V6_vmpyiewuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -39115,7 +39548,7 @@ def V6_vmpyiewuh_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39129,7 +39562,7 @@ def V6_vmpyiewuh_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39194,7 +39627,7 @@ def V6_vmpyih : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vmpyi($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -39206,7 +39639,7 @@ def V6_vmpyih_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vmpyi($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -39219,7 +39652,7 @@ def V6_vmpyih_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.h += vmpyi($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39233,7 +39666,7 @@ def V6_vmpyih_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.h += vmpyi($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39298,7 +39731,7 @@ def V6_vmpyihb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpyi($Vu32.h,$Rt32.b)", -CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -39310,7 +39743,7 @@ def V6_vmpyihb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpyi($Vu32.h,$Rt32.b)", -CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -39323,7 +39756,7 @@ def V6_vmpyihb_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.h += vmpyi($Vu32.h,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -39337,7 +39770,7 @@ def V6_vmpyihb_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.h += vmpyi($Vu32.h,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -39402,7 +39835,7 @@ def V6_vmpyiowh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmpyio($Vu32.w,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -39414,7 +39847,7 @@ def V6_vmpyiowh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmpyio($Vu32.w,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -39450,7 +39883,7 @@ def V6_vmpyiwb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -39462,7 +39895,7 @@ def V6_vmpyiwb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -39475,7 +39908,7 @@ def V6_vmpyiwb_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -39489,7 +39922,7 @@ def V6_vmpyiwb_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -39554,7 +39987,7 @@ def V6_vmpyiwh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -39566,7 +39999,7 @@ def V6_vmpyiwh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -39579,7 +40012,7 @@ def V6_vmpyiwh_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -39593,7 +40026,7 @@ def V6_vmpyiwh_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -39658,7 +40091,7 @@ def V6_vmpyiwub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)", -CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV62T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -39670,7 +40103,7 @@ def V6_vmpyiwub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)", -CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV62T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001100; @@ -39683,7 +40116,7 @@ def V6_vmpyiwub_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)", -CVI_VX_LONG, TypeCVI_VX>, Enc_10058269, Requires<[HasV62T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -39697,7 +40130,7 @@ def V6_vmpyiwub_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)", -CVI_VX_LONG, TypeCVI_VX>, Enc_10058269, Requires<[HasV62T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -39762,7 +40195,7 @@ def V6_vmpyowh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -39774,7 +40207,7 @@ def V6_vmpyowh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -39787,7 +40220,7 @@ def V6_vmpyowh_64_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32 += vmpyo($Vu32.w,$Vv32.h)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39801,7 +40234,7 @@ def V6_vmpyowh_64_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32 += vmpyo($Vu32.w,$Vv32.h)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39839,7 +40272,7 @@ def V6_vmpyowh_rnd : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -39851,7 +40284,7 @@ def V6_vmpyowh_rnd_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -39887,7 +40320,7 @@ def V6_vmpyowh_rnd_sacc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39901,7 +40334,7 @@ def V6_vmpyowh_rnd_sacc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39941,7 +40374,7 @@ def V6_vmpyowh_sacc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39955,7 +40388,7 @@ def V6_vmpyowh_sacc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -39995,7 +40428,7 @@ def V6_vmpyub : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001110; @@ -40007,7 +40440,7 @@ def V6_vmpyub_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001110; @@ -40020,7 +40453,7 @@ def V6_vmpyub_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -40034,7 +40467,7 @@ def V6_vmpyub_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001100; @@ -40099,7 +40532,7 @@ def V6_vmpyubv : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -40111,7 +40544,7 @@ def V6_vmpyubv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -40124,7 +40557,7 @@ def V6_vmpyubv_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -40138,7 +40571,7 @@ def V6_vmpyubv_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -40203,7 +40636,7 @@ def V6_vmpyuh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -40215,7 +40648,7 @@ def V6_vmpyuh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_01d3d0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -40228,7 +40661,7 @@ def V6_vmpyuh_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -40242,7 +40675,7 @@ def V6_vmpyuh_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_5e8512, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -40307,7 +40740,7 @@ def V6_vmpyuhv : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -40319,7 +40752,7 @@ def V6_vmpyuhv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -40332,7 +40765,7 @@ def V6_vmpyuhv_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -40346,7 +40779,7 @@ def V6_vmpyuhv_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_3fc427, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100001; @@ -40411,7 +40844,7 @@ def V6_vmux : HInst< (outs VectorRegs:$Vd32), (ins VecPredRegs:$Qt4, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32 = vmux($Qt4,$Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_1572239, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_31db33, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110111; @@ -40423,7 +40856,7 @@ def V6_vmux_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VecPredRegs128B:$Qt4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32 = vmux($Qt4,$Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_1572239, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_31db33, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110111; @@ -40436,7 +40869,7 @@ def V6_vnavgh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vnavg($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -40448,7 +40881,7 @@ def V6_vnavgh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vnavg($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -40484,7 +40917,7 @@ def V6_vnavgub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -40496,7 +40929,7 @@ def V6_vnavgub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -40532,7 +40965,7 @@ def V6_vnavgw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vnavg($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -40544,7 +40977,7 @@ def V6_vnavgw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vnavg($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100111; @@ -40580,7 +41013,7 @@ def V6_vnccombine : HInst< (outs VecDblRegs:$Vdd32), (ins PredRegs:$Ps4, VectorRegs:$Vu32, VectorRegs:$Vv32), "if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> { +tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011010010; @@ -40594,7 +41027,7 @@ def V6_vnccombine_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins PredRegs:$Ps4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> { +tc_2171ebae, TypeCVI_VA_DV>, Enc_8c2412, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011010010; @@ -40609,7 +41042,7 @@ def V6_vncmov : HInst< (outs VectorRegs:$Vd32), (ins PredRegs:$Ps4, VectorRegs:$Vu32), "if (!$Ps4) $Vd32 = $Vu32", -CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> { +tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001101000100000; @@ -40623,7 +41056,7 @@ def V6_vncmov_128B : HInst< (outs VectorRegs128B:$Vd32), (ins PredRegs:$Ps4, VectorRegs128B:$Vu32), "if (!$Ps4) $Vd32 = $Vu32", -CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> { +tc_b06ab583, TypeCVI_VA>, Enc_770858, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001101000100000; @@ -40638,7 +41071,7 @@ def V6_vnormamth : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.h = vnormamt($Vu32.h)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000011; @@ -40650,7 +41083,7 @@ def V6_vnormamth_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.h = vnormamt($Vu32.h)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000011; @@ -40686,7 +41119,7 @@ def V6_vnormamtw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.w = vnormamt($Vu32.w)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000011; @@ -40698,7 +41131,7 @@ def V6_vnormamtw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.w = vnormamt($Vu32.w)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000011; @@ -40734,7 +41167,7 @@ def V6_vnot : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32 = vnot($Vu32)", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -40746,7 +41179,7 @@ def V6_vnot_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32 = vnot($Vu32)", -CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_71337255, TypeCVI_VA>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000000; @@ -40759,7 +41192,7 @@ def V6_vor : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32 = vor($Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -40771,7 +41204,7 @@ def V6_vor_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32 = vor($Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -40784,7 +41217,7 @@ def V6_vpackeb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vpacke($Vu32.h,$Vv32.h)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40796,7 +41229,7 @@ def V6_vpackeb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vpacke($Vu32.h,$Vv32.h)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40832,7 +41265,7 @@ def V6_vpackeh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vpacke($Vu32.w,$Vv32.w)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40844,7 +41277,7 @@ def V6_vpackeh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vpacke($Vu32.w,$Vv32.w)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40880,7 +41313,7 @@ def V6_vpackhb_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vpack($Vu32.h,$Vv32.h):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40892,7 +41325,7 @@ def V6_vpackhb_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vpack($Vu32.h,$Vv32.h):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40928,7 +41361,7 @@ def V6_vpackhub_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40940,7 +41373,7 @@ def V6_vpackhub_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -40976,7 +41409,7 @@ def V6_vpackob : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vpacko($Vu32.h,$Vv32.h)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -40988,7 +41421,7 @@ def V6_vpackob_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vpacko($Vu32.h,$Vv32.h)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -41024,7 +41457,7 @@ def V6_vpackoh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vpacko($Vu32.w,$Vv32.w)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -41036,7 +41469,7 @@ def V6_vpackoh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vpacko($Vu32.w,$Vv32.w)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -41072,7 +41505,7 @@ def V6_vpackwh_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vpack($Vu32.w,$Vv32.w):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -41084,7 +41517,7 @@ def V6_vpackwh_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vpack($Vu32.w,$Vv32.w):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -41120,7 +41553,7 @@ def V6_vpackwuh_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -41132,7 +41565,7 @@ def V6_vpackwuh_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -41168,7 +41601,7 @@ def V6_vpopcounth : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.h = vpopcount($Vu32.h)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -41180,7 +41613,7 @@ def V6_vpopcounth_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.h = vpopcount($Vu32.h)", -CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_d2cb81ea, TypeCVI_VS>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -41216,7 +41649,7 @@ def V6_vrdelta : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32 = vrdelta($Vu32,$Vv32)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -41228,7 +41661,7 @@ def V6_vrdelta_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32 = vrdelta($Vu32,$Vv32)", -CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_f3fc3f83, TypeCVI_VP>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -41241,7 +41674,7 @@ def V6_vrmpybus : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -41253,7 +41686,7 @@ def V6_vrmpybus_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -41266,7 +41699,7 @@ def V6_vrmpybus_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -41280,7 +41713,7 @@ def V6_vrmpybus_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -41345,7 +41778,7 @@ def V6_vrmpybusi : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -41357,7 +41790,7 @@ def V6_vrmpybusi_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b10; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -41370,7 +41803,7 @@ def V6_vrmpybusi_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b10; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -41384,7 +41817,7 @@ def V6_vrmpybusi_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b10; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -41449,7 +41882,7 @@ def V6_vrmpybusv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -41461,7 +41894,7 @@ def V6_vrmpybusv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -41474,7 +41907,7 @@ def V6_vrmpybusv_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -41488,7 +41921,7 @@ def V6_vrmpybusv_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -41553,7 +41986,7 @@ def V6_vrmpybv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vrmpy($Vu32.b,$Vv32.b)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -41565,7 +41998,7 @@ def V6_vrmpybv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vrmpy($Vu32.b,$Vv32.b)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -41578,7 +42011,7 @@ def V6_vrmpybv_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.w += vrmpy($Vu32.b,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -41592,7 +42025,7 @@ def V6_vrmpybv_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.w += vrmpy($Vu32.b,$Vv32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -41657,7 +42090,7 @@ def V6_vrmpyub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -41669,7 +42102,7 @@ def V6_vrmpyub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)", -CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_69b6dd20, TypeCVI_VX>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -41682,7 +42115,7 @@ def V6_vrmpyub_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32), "$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -41696,7 +42129,7 @@ def V6_vrmpyub_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)", -CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> { +tc_d725e5b0, TypeCVI_VX>, Enc_5138b3, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -41761,7 +42194,7 @@ def V6_vrmpyubi : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -41773,7 +42206,7 @@ def V6_vrmpyubi_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -41786,7 +42219,7 @@ def V6_vrmpyubi_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -41800,7 +42233,7 @@ def V6_vrmpyubi_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001011; @@ -41865,7 +42298,7 @@ def V6_vrmpyubv : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -41877,7 +42310,7 @@ def V6_vrmpyubv_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)", -CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_908a4c8c, TypeCVI_VX>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -41890,7 +42323,7 @@ def V6_vrmpyubv_acc : HInst< (outs VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -41904,7 +42337,7 @@ def V6_vrmpyubv_acc_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> { +tc_e172d86a, TypeCVI_VX_DV>, Enc_a7341a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -41969,7 +42402,7 @@ def V6_vror : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, IntRegs:$Rt32), "$Vd32 = vror($Vu32,$Rt32)", -CVI_VP, TypeCVI_VP>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_bf142ae2, TypeCVI_VP>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -41981,7 +42414,7 @@ def V6_vror_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, IntRegs:$Rt32), "$Vd32 = vror($Vu32,$Rt32)", -CVI_VP, TypeCVI_VP>, Enc_16214129, Requires<[HasV60T,UseHVX]> { +tc_bf142ae2, TypeCVI_VP>, Enc_b087ac, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001011; @@ -41994,7 +42427,7 @@ def V6_vroundhb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vround($Vu32.h,$Vv32.h):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42006,7 +42439,7 @@ def V6_vroundhb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vround($Vu32.h,$Vv32.h):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42042,7 +42475,7 @@ def V6_vroundhub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vround($Vu32.h,$Vv32.h):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42054,7 +42487,7 @@ def V6_vroundhub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vround($Vu32.h,$Vv32.h):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42090,7 +42523,7 @@ def V6_vrounduhub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -42102,7 +42535,7 @@ def V6_vrounduhub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -42138,7 +42571,7 @@ def V6_vrounduwuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -42150,7 +42583,7 @@ def V6_vrounduwuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111111; @@ -42186,7 +42619,7 @@ def V6_vroundwh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vround($Vu32.w,$Vv32.w):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42198,7 +42631,7 @@ def V6_vroundwh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vround($Vu32.w,$Vv32.w):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42234,7 +42667,7 @@ def V6_vroundwuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vround($Vu32.w,$Vv32.w):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42246,7 +42679,7 @@ def V6_vroundwuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vround($Vu32.w,$Vv32.w):sat", -CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_45453b98, TypeCVI_VS>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42282,7 +42715,7 @@ def V6_vrsadubi : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -42294,7 +42727,7 @@ def V6_vrsadubi_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> { +tc_7e9f581b, TypeCVI_VX_DV>, Enc_2f2f04, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -42307,7 +42740,7 @@ def V6_vrsadubi_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -42321,7 +42754,7 @@ def V6_vrsadubi_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii), "$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)", -CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> { +tc_41f99e1c, TypeCVI_VX_DV>, Enc_d483b9, Requires<[HasV60T,UseHVX]> { let Inst{7-6} = 0b11; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001010; @@ -42386,7 +42819,7 @@ def V6_vsathub : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vsat($Vu32.h,$Vv32.h)", -CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42398,7 +42831,7 @@ def V6_vsathub_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vsat($Vu32.h,$Vv32.h)", -CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42434,7 +42867,7 @@ def V6_vsatuwuh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -42446,7 +42879,7 @@ def V6_vsatuwuh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -42482,7 +42915,7 @@ def V6_vsatwh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vsat($Vu32.w,$Vv32.w)", -CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42494,7 +42927,7 @@ def V6_vsatwh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vsat($Vu32.w,$Vv32.w)", -CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_9b9642a1, TypeCVI_VINLANESAT>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111011; @@ -42530,7 +42963,7 @@ def V6_vsb : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.h = vsxt($Vu32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -42542,7 +42975,7 @@ def V6_vsb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.h = vsxt($Vu32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -42578,7 +43011,7 @@ def V6_vsh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.w = vsxt($Vu32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -42590,7 +43023,7 @@ def V6_vsh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.w = vsxt($Vu32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -42626,7 +43059,7 @@ def V6_vshufeh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vshuffe($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42638,7 +43071,7 @@ def V6_vshufeh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vshuffe($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42674,7 +43107,7 @@ def V6_vshuff : HInst< (outs VectorRegs:$Vy32, VectorRegs:$Vx32), (ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32), "vshuff($Vy32,$Vx32,$Rt32)", -CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> { +tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001111; @@ -42689,7 +43122,7 @@ def V6_vshuff_128B : HInst< (outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32), (ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32), "vshuff($Vy32,$Vx32,$Rt32)", -CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> { +tc_5c120602, TypeCVI_VP_VS>, Enc_989021, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001111; @@ -42705,7 +43138,7 @@ def V6_vshuffb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.b = vshuff($Vu32.b)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -42717,7 +43150,7 @@ def V6_vshuffb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.b = vshuff($Vu32.b)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -42753,7 +43186,7 @@ def V6_vshuffeb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vshuffe($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42765,7 +43198,7 @@ def V6_vshuffeb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vshuffe($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42801,7 +43234,7 @@ def V6_vshuffh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32), "$Vd32.h = vshuff($Vu32.h)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -42813,7 +43246,7 @@ def V6_vshuffh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32), "$Vd32.h = vshuff($Vu32.h)", -CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> { +tc_e6299d16, TypeCVI_VP>, Enc_e7581c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -42849,7 +43282,7 @@ def V6_vshuffob : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vshuffo($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42861,7 +43294,7 @@ def V6_vshuffob_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vshuffo($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42897,7 +43330,7 @@ def V6_vshuffvdd : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8), "$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -42909,7 +43342,7 @@ def V6_vshuffvdd_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8), "$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> { +tc_4e2a5159, TypeCVI_VP_VS>, Enc_24a7dc, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-24} = 0b00011011; @@ -42922,7 +43355,7 @@ def V6_vshufoeb : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42934,7 +43367,7 @@ def V6_vshufoeb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42970,7 +43403,7 @@ def V6_vshufoeh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -42982,7 +43415,7 @@ def V6_vshufoeh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -43018,7 +43451,7 @@ def V6_vshufoh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vshuffo($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -43030,7 +43463,7 @@ def V6_vshufoh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vshuffo($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111010; @@ -43066,7 +43499,7 @@ def V6_vsubb : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vsub($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -43078,7 +43511,7 @@ def V6_vsubb_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vsub($Vu32.b,$Vv32.b)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -43114,7 +43547,7 @@ def V6_vsubb_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -43126,7 +43559,7 @@ def V6_vsubb_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -43162,7 +43595,7 @@ def V6_vsubbnq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if (!$Qv4) $Vx32.b -= $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -43176,7 +43609,7 @@ def V6_vsubbnq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if (!$Qv4) $Vx32.b -= $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -43216,7 +43649,7 @@ def V6_vsubbq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if ($Qv4) $Vx32.b -= $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -43230,7 +43663,7 @@ def V6_vsubbq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if ($Qv4) $Vx32.b -= $Vu32.b", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -43270,7 +43703,7 @@ def V6_vsubbsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.b = vsub($Vu32.b,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -43282,7 +43715,7 @@ def V6_vsubbsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.b = vsub($Vu32.b,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111001; @@ -43318,7 +43751,7 @@ def V6_vsubbsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -43330,7 +43763,7 @@ def V6_vsubbsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -43366,7 +43799,7 @@ def V6_vsubcarry : HInst< (outs VectorRegs:$Vd32, VecPredRegs:$Qx4), (ins VectorRegs:$Vu32, VectorRegs:$Vv32, VecPredRegs:$Qx4in), "$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry", -CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> { +tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[HasV62T,UseHVX]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100101; @@ -43381,7 +43814,7 @@ def V6_vsubcarry_128B : HInst< (outs VectorRegs128B:$Vd32, VecPredRegs128B:$Qx4), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, VecPredRegs128B:$Qx4in), "$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry", -CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> { +tc_5a9fc4ec, TypeCVI_VA>, Enc_b43b67, Requires<[HasV62T,UseHVX]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100101; @@ -43397,7 +43830,7 @@ def V6_vsubh : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vsub($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -43409,7 +43842,7 @@ def V6_vsubh_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vsub($Vu32.h,$Vv32.h)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -43445,7 +43878,7 @@ def V6_vsubh_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -43457,7 +43890,7 @@ def V6_vsubh_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -43493,7 +43926,7 @@ def V6_vsubhnq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if (!$Qv4) $Vx32.h -= $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -43507,7 +43940,7 @@ def V6_vsubhnq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if (!$Qv4) $Vx32.h -= $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -43547,7 +43980,7 @@ def V6_vsubhq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if ($Qv4) $Vx32.h -= $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -43561,7 +43994,7 @@ def V6_vsubhq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if ($Qv4) $Vx32.h -= $Vu32.h", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000001; @@ -43601,7 +44034,7 @@ def V6_vsubhsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.h = vsub($Vu32.h,$Vv32.h):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -43613,7 +44046,7 @@ def V6_vsubhsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.h = vsub($Vu32.h,$Vv32.h):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -43649,7 +44082,7 @@ def V6_vsubhsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -43661,7 +44094,7 @@ def V6_vsubhsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -43697,7 +44130,7 @@ def V6_vsubhw : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.w = vsub($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -43709,7 +44142,7 @@ def V6_vsubhw_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.w = vsub($Vu32.h,$Vv32.h)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -43745,7 +44178,7 @@ def V6_vsububh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -43757,7 +44190,7 @@ def V6_vsububh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -43793,7 +44226,7 @@ def V6_vsububsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -43805,7 +44238,7 @@ def V6_vsububsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -43841,7 +44274,7 @@ def V6_vsububsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -43853,7 +44286,7 @@ def V6_vsububsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -43889,7 +44322,7 @@ def V6_vsubububb_sat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -43901,7 +44334,7 @@ def V6_vsubububb_sat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -43914,7 +44347,7 @@ def V6_vsubuhsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -43926,7 +44359,7 @@ def V6_vsubuhsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -43962,7 +44395,7 @@ def V6_vsubuhsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -43974,7 +44407,7 @@ def V6_vsubuhsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -44010,7 +44443,7 @@ def V6_vsubuhw : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -44022,7 +44455,7 @@ def V6_vsubuhw_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> { +tc_eda67dcd, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -44058,7 +44491,7 @@ def V6_vsubuwsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -44070,7 +44503,7 @@ def V6_vsubuwsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011111110; @@ -44106,7 +44539,7 @@ def V6_vsubuwsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -44118,7 +44551,7 @@ def V6_vsubuwsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV62T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -44154,7 +44587,7 @@ def V6_vsubw : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vsub($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -44166,7 +44599,7 @@ def V6_vsubw_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vsub($Vu32.w,$Vv32.w)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100010; @@ -44202,7 +44635,7 @@ def V6_vsubw_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -44214,7 +44647,7 @@ def V6_vsubw_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100100; @@ -44250,7 +44683,7 @@ def V6_vsubwnq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if (!$Qv4) $Vx32.w -= $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -44264,7 +44697,7 @@ def V6_vsubwnq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if (!$Qv4) $Vx32.w -= $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -44304,7 +44737,7 @@ def V6_vsubwq : HInst< (outs VectorRegs:$Vx32), (ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32), "if ($Qv4) $Vx32.w -= $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -44318,7 +44751,7 @@ def V6_vsubwq_128B : HInst< (outs VectorRegs128B:$Vx32), (ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32), "if ($Qv4) $Vx32.w -= $Vu32.w", -CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> { +tc_a3127e12, TypeCVI_VA>, Enc_a90628, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{21-16} = 0b000010; @@ -44358,7 +44791,7 @@ def V6_vsubwsat : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32.w = vsub($Vu32.w,$Vv32.w):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -44370,7 +44803,7 @@ def V6_vsubwsat_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32.w = vsub($Vu32.w,$Vv32.w):sat", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100011; @@ -44406,7 +44839,7 @@ def V6_vsubwsat_dv : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32), "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -44418,7 +44851,7 @@ def V6_vsubwsat_dv_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32), "$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> { +tc_97c165b9, TypeCVI_VA_DV>, Enc_f8ecf9, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100101; @@ -44454,7 +44887,7 @@ def V6_vswap : HInst< (outs VecDblRegs:$Vdd32), (ins VecPredRegs:$Qt4, VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vdd32 = vswap($Qt4,$Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_11424254, Requires<[HasV60T,UseHVX]> { +tc_316c637c, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110101; @@ -44466,7 +44899,7 @@ def V6_vswap_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecPredRegs128B:$Qt4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vdd32 = vswap($Qt4,$Vu32,$Vv32)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_11424254, Requires<[HasV60T,UseHVX]> { +tc_316c637c, TypeCVI_VA_DV>, Enc_3dac0b, Requires<[HasV60T,UseHVX]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011110101; @@ -44479,7 +44912,7 @@ def V6_vtmpyb : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -44491,7 +44924,7 @@ def V6_vtmpyb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -44504,7 +44937,7 @@ def V6_vtmpyb_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -44518,7 +44951,7 @@ def V6_vtmpyb_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -44583,7 +45016,7 @@ def V6_vtmpybus : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -44595,7 +45028,7 @@ def V6_vtmpybus_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001000; @@ -44608,7 +45041,7 @@ def V6_vtmpybus_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -44622,7 +45055,7 @@ def V6_vtmpybus_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -44687,7 +45120,7 @@ def V6_vtmpyhb : HInst< (outs VecDblRegs:$Vdd32), (ins VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -44699,7 +45132,7 @@ def V6_vtmpyhb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> { +tc_7c3f55c4, TypeCVI_VX_DV>, Enc_aad80c, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001101; @@ -44712,7 +45145,7 @@ def V6_vtmpyhb_acc : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -44726,7 +45159,7 @@ def V6_vtmpyhb_acc_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32), "$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)", -CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> { +tc_d98f4d63, TypeCVI_VX_DV>, Enc_d6990d, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001000; @@ -44820,7 +45253,7 @@ def V6_vunpackb : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.h = vunpack($Vu32.b)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -44832,7 +45265,7 @@ def V6_vunpackb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.h = vunpack($Vu32.b)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -44868,7 +45301,7 @@ def V6_vunpackh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.w = vunpack($Vu32.h)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -44880,7 +45313,7 @@ def V6_vunpackh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.w = vunpack($Vu32.h)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -44916,7 +45349,7 @@ def V6_vunpackob : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32), "$Vxx32.h |= vunpacko($Vu32.b)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> { +tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000000; @@ -44930,7 +45363,7 @@ def V6_vunpackob_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32), "$Vxx32.h |= vunpacko($Vu32.b)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> { +tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000000; @@ -44970,7 +45403,7 @@ def V6_vunpackoh : HInst< (outs VecDblRegs:$Vxx32), (ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32), "$Vxx32.w |= vunpacko($Vu32.h)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> { +tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000000; @@ -44984,7 +45417,7 @@ def V6_vunpackoh_128B : HInst< (outs VecDblRegs128B:$Vxx32), (ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32), "$Vxx32.w |= vunpacko($Vu32.h)", -CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> { +tc_72ad7b54, TypeCVI_VP_VS>, Enc_500cb0, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-16} = 0b0001111000000000; @@ -45026,7 +45459,7 @@ def V6_vunpackub : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.uh = vunpack($Vu32.ub)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -45038,7 +45471,7 @@ def V6_vunpackub_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.uh = vunpack($Vu32.ub)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -45074,7 +45507,7 @@ def V6_vunpackuh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.uw = vunpack($Vu32.uh)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -45086,7 +45519,7 @@ def V6_vunpackuh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.uw = vunpack($Vu32.uh)", -CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_d7bea0ec, TypeCVI_VP_VS>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -45122,7 +45555,7 @@ def V6_vwhist128 : HInst< (outs), (ins), "vwhist128", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10010010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -45131,7 +45564,7 @@ def V6_vwhist128_128B : HInst< (outs), (ins), "vwhist128", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10010010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -45141,7 +45574,7 @@ def V6_vwhist128m : HInst< (outs), (ins u1_0Imm:$Ii), "vwhist128(#$Ii)", -CVI_HIST, TypeCVI_HIST>, Enc_1291652, Requires<[HasV62T,UseHVX]> { +tc_b77635b4, TypeCVI_HIST>, Enc_efaed8, Requires<[HasV62T,UseHVX]> { let Inst{7-0} = 0b10000000; let Inst{13-9} = 0b10011; let Inst{31-16} = 0b0001111000000000; @@ -45151,7 +45584,7 @@ def V6_vwhist128m_128B : HInst< (outs), (ins u1_0Imm:$Ii), "vwhist128(#$Ii)", -CVI_HIST, TypeCVI_HIST>, Enc_1291652, Requires<[HasV62T,UseHVX]> { +tc_b77635b4, TypeCVI_HIST>, Enc_efaed8, Requires<[HasV62T,UseHVX]> { let Inst{7-0} = 0b10000000; let Inst{13-9} = 0b10011; let Inst{31-16} = 0b0001111000000000; @@ -45162,7 +45595,7 @@ def V6_vwhist128q : HInst< (outs), (ins VecPredRegs:$Qv4), "vwhist128($Qv4)", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10010010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -45172,7 +45605,7 @@ def V6_vwhist128q_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4), "vwhist128($Qv4)", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10010010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -45183,7 +45616,7 @@ def V6_vwhist128qm : HInst< (outs), (ins VecPredRegs:$Qv4, u1_0Imm:$Ii), "vwhist128($Qv4,#$Ii)", -CVI_HIST, TypeCVI_HIST>, Enc_7978128, Requires<[HasV62T,UseHVX]> { +tc_28978789, TypeCVI_HIST>, Enc_802dc0, Requires<[HasV62T,UseHVX]> { let Inst{7-0} = 0b10000000; let Inst{13-9} = 0b10011; let Inst{21-16} = 0b000010; @@ -45194,7 +45627,7 @@ def V6_vwhist128qm_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4, u1_0Imm:$Ii), "vwhist128($Qv4,#$Ii)", -CVI_HIST, TypeCVI_HIST>, Enc_7978128, Requires<[HasV62T,UseHVX]> { +tc_28978789, TypeCVI_HIST>, Enc_802dc0, Requires<[HasV62T,UseHVX]> { let Inst{7-0} = 0b10000000; let Inst{13-9} = 0b10011; let Inst{21-16} = 0b000010; @@ -45206,7 +45639,7 @@ def V6_vwhist256 : HInst< (outs), (ins), "vwhist256", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -45215,7 +45648,7 @@ def V6_vwhist256_128B : HInst< (outs), (ins), "vwhist256", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001010000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -45225,7 +45658,7 @@ def V6_vwhist256_sat : HInst< (outs), (ins), "vwhist256:sat", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001110000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -45234,7 +45667,7 @@ def V6_vwhist256_sat_128B : HInst< (outs), (ins), "vwhist256:sat", -CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> { +tc_e5053c8f, TypeCVI_HIST>, Enc_e3b0c4, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001110000000; let Inst{31-16} = 0b0001111000000000; let DecoderNamespace = "EXT_mmvec"; @@ -45244,7 +45677,7 @@ def V6_vwhist256q : HInst< (outs), (ins VecPredRegs:$Qv4), "vwhist256($Qv4)", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -45254,7 +45687,7 @@ def V6_vwhist256q_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4), "vwhist256($Qv4)", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001010000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -45265,7 +45698,7 @@ def V6_vwhist256q_sat : HInst< (outs), (ins VecPredRegs:$Qv4), "vwhist256($Qv4):sat", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001110000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -45275,7 +45708,7 @@ def V6_vwhist256q_sat_128B : HInst< (outs), (ins VecPredRegs128B:$Qv4), "vwhist256($Qv4):sat", -CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> { +tc_cedf314b, TypeCVI_HIST>, Enc_217147, Requires<[HasV62T,UseHVX]> { let Inst{13-0} = 0b10001110000000; let Inst{21-16} = 0b000010; let Inst{31-24} = 0b00011110; @@ -45286,7 +45719,7 @@ def V6_vxor : HInst< (outs VectorRegs:$Vd32), (ins VectorRegs:$Vu32, VectorRegs:$Vv32), "$Vd32 = vxor($Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -45298,7 +45731,7 @@ def V6_vxor_128B : HInst< (outs VectorRegs128B:$Vd32), (ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32), "$Vd32 = vxor($Vu32,$Vv32)", -CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> { +tc_bbaf280e, TypeCVI_VA>, Enc_45364e, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -45311,7 +45744,7 @@ def V6_vzb : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.uh = vzxt($Vu32.ub)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -45323,7 +45756,7 @@ def V6_vzb_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.uh = vzxt($Vu32.ub)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -45359,7 +45792,7 @@ def V6_vzh : HInst< (outs VecDblRegs:$Vdd32), (ins VectorRegs:$Vu32), "$Vdd32.uw = vzxt($Vu32.uh)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -45371,7 +45804,7 @@ def V6_vzh_128B : HInst< (outs VecDblRegs128B:$Vdd32), (ins VectorRegs128B:$Vu32), "$Vdd32.uw = vzxt($Vu32.uh)", -CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> { +tc_644584f8, TypeCVI_VA_DV>, Enc_dd766a, Requires<[HasV60T,UseHVX]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000010; @@ -45407,7 +45840,7 @@ def Y2_barrier : HInst< (outs), (ins), "barrier", -ST_tc_3stall_SLOT0, TypeST>, Enc_0 { +tc_ef2676fd, TypeST>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b1010100000000000; let isSoloAX = 1; @@ -45417,7 +45850,7 @@ def Y2_break : HInst< (outs), (ins), "brkpt", -CR_tc_3x_SLOT3, TypeCR>, Enc_0 { +tc_bcf0e36e, TypeCR>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b0110110000100000; let isSolo = 1; @@ -45426,7 +45859,7 @@ def Y2_dccleana : HInst< (outs), (ins IntRegs:$Rs32), "dccleana($Rs32)", -ST_tc_ld_SLOT0, TypeST>, Enc_11704059 { +tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000000; let isSoloAin1 = 1; @@ -45435,7 +45868,7 @@ def Y2_dccleaninva : HInst< (outs), (ins IntRegs:$Rs32), "dccleaninva($Rs32)", -ST_tc_ld_SLOT0, TypeST>, Enc_11704059 { +tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000010; let isSoloAin1 = 1; @@ -45444,7 +45877,7 @@ def Y2_dcfetch : HInst< (outs), (ins IntRegs:$Rs32), "dcfetch($Rs32)", -PSEUDO, TypeMAPPING> { +tc_34e882a4, TypeMAPPING> { let hasSideEffects = 1; let isPseudo = 1; let isCodeGenOnly = 1; @@ -45453,7 +45886,7 @@ def Y2_dcfetchbo : HInst< (outs), (ins IntRegs:$Rs32, u11_3Imm:$Ii), "dcfetch($Rs32+#$Ii)", -LD_tc_ld_SLOT0, TypeLD>, Enc_4983213 { +tc_ef0ebaaa, TypeLD>, Enc_2d829e { let Inst{13-11} = 0b000; let Inst{31-21} = 0b10010100000; let addrMode = BaseImmOffset; @@ -45463,7 +45896,7 @@ def Y2_dcinva : HInst< (outs), (ins IntRegs:$Rs32), "dcinva($Rs32)", -ST_tc_ld_SLOT0, TypeST>, Enc_11704059 { +tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000001; let isSoloAin1 = 1; @@ -45472,17 +45905,17 @@ def Y2_dczeroa : HInst< (outs), (ins IntRegs:$Rs32), "dczeroa($Rs32)", -ST_tc_ld_SLOT0, TypeST>, Enc_11704059 { +tc_30665cb0, TypeST>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b10100000110; -let mayStore = 1; let isSoloAin1 = 1; +let mayStore = 1; } def Y2_icinva : HInst< (outs), (ins IntRegs:$Rs32), "icinva($Rs32)", -J_tc_2early_SLOT2, TypeJ>, Enc_11704059 { +tc_049dfb74, TypeJ>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010110110; let isSolo = 1; @@ -45491,7 +45924,7 @@ def Y2_isync : HInst< (outs), (ins), "isync", -J_tc_2early_SLOT2, TypeJ>, Enc_0 { +tc_d267fa19, TypeJ>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000010; let Inst{31-16} = 0b0101011111000000; let isSolo = 1; @@ -45500,7 +45933,7 @@ def Y2_syncht : HInst< (outs), (ins), "syncht", -ST_tc_ld_SLOT0, TypeST>, Enc_0 { +tc_ef2676fd, TypeST>, Enc_e3b0c4 { let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b1010100001000000; let isSolo = 1; @@ -45509,7 +45942,7 @@ def Y4_l2fetch : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), "l2fetch($Rs32,$Rt32)", -ST_tc_3stall_SLOT0, TypeST>, Enc_14620934 { +tc_f4608adc, TypeST>, Enc_ca3887 { let Inst{7-0} = 0b00000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100110000; @@ -45521,7 +45954,7 @@ def Y4_trace : HInst< (outs), (ins IntRegs:$Rs32), "trace($Rs32)", -CR_tc_2early_SLOT3, TypeCR>, Enc_11704059 { +tc_4997da4a, TypeCR>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01100010010; let isSoloAX = 1; @@ -45530,7 +45963,7 @@ def Y5_l2fetch : HInst< (outs), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "l2fetch($Rs32,$Rtt32)", -ST_tc_3stall_SLOT0, TypeST>, Enc_8943121, Requires<[HasV5T]> { +tc_f4608adc, TypeST>, Enc_e6abcf, Requires<[HasV5T]> { let Inst{7-0} = 0b00000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100110100; @@ -45542,31 +45975,33 @@ def dep_A2_addsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32):sat:deprecated", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 { +tc_47ab9233, TypeALU64>, Enc_5ab2be { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101100; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def dep_A2_subsat : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rt32, IntRegs:$Rs32), "$Rd32 = sub($Rt32,$Rs32):sat:deprecated", -ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 { +tc_47ab9233, TypeALU64>, Enc_bd6011 { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010101100; let hasNewValue = 1; let opNewValue = 0; +let prefersSlot3 = 1; let Defs = [USR_OVF]; } def dep_S2_packhl : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rdd32 = packhl($Rs32,$Rt32):deprecated", -ALU64_tc_1_SLOT23, TypeALU64>, Enc_1997594 { +tc_9c18c9a5, TypeALU64>, Enc_be32a5 { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010100000; diff --git a/lib/Target/Hexagon/HexagonDepTimingClasses.h b/lib/Target/Hexagon/HexagonDepTimingClasses.h new file mode 100644 index 0000000000000000000000000000000000000000..52963034543d838d202fbb4af4da29ac417ef760 --- /dev/null +++ b/lib/Target/Hexagon/HexagonDepTimingClasses.h @@ -0,0 +1,132 @@ +//===--- HexagonDepTimingClasses.h ----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +static bool is_TC3x(unsigned SchedClass) { + switch (SchedClass) { + case Hexagon::Sched::tc_1000eb10: + case Hexagon::Sched::tc_2aaab1e0: + case Hexagon::Sched::tc_4997da4a: + case Hexagon::Sched::tc_5d806107: + case Hexagon::Sched::tc_6264c5e0: + case Hexagon::Sched::tc_69bb508b: + case Hexagon::Sched::tc_8c8041e6: + case Hexagon::Sched::tc_8cb685d9: + case Hexagon::Sched::tc_a12a5971: + case Hexagon::Sched::tc_ae0722f7: + case Hexagon::Sched::tc_ae2c2dc2: + case Hexagon::Sched::tc_bc5561d8: + case Hexagon::Sched::tc_d6a805a8: + case Hexagon::Sched::tc_f055fbb6: + case Hexagon::Sched::tc_feb4974b: + return true; + default: + return false; + } +} + +static bool is_TC2early(unsigned SchedClass) { + switch (SchedClass) { + case Hexagon::Sched::tc_35fb9d13: + case Hexagon::Sched::tc_cbe45117: + return true; + default: + return false; + } +} + +static bool is_TC4x(unsigned SchedClass) { + switch (SchedClass) { + case Hexagon::Sched::tc_09c86199: + case Hexagon::Sched::tc_2d1e6f5c: + case Hexagon::Sched::tc_2e55aa16: + case Hexagon::Sched::tc_3bea1824: + case Hexagon::Sched::tc_e836c161: + case Hexagon::Sched::tc_f1aa2cdb: + return true; + default: + return false; + } +} + +static bool is_TC2(unsigned SchedClass) { + switch (SchedClass) { + case Hexagon::Sched::tc_090485bb: + case Hexagon::Sched::tc_1fe8323c: + case Hexagon::Sched::tc_37326008: + case Hexagon::Sched::tc_3c10f809: + case Hexagon::Sched::tc_47ab9233: + case Hexagon::Sched::tc_485bb57c: + case Hexagon::Sched::tc_511f28f6: + case Hexagon::Sched::tc_583510c7: + case Hexagon::Sched::tc_63cd9d2d: + case Hexagon::Sched::tc_76c4c5ef: + case Hexagon::Sched::tc_7ca2ea10: + case Hexagon::Sched::tc_87601822: + case Hexagon::Sched::tc_88fa2da6: + case Hexagon::Sched::tc_94e6ffd9: + case Hexagon::Sched::tc_ab1b5e74: + case Hexagon::Sched::tc_b0f50e3c: + case Hexagon::Sched::tc_bd16579e: + case Hexagon::Sched::tc_c0cd91a8: + case Hexagon::Sched::tc_ca280e8b: + case Hexagon::Sched::tc_cd321066: + case Hexagon::Sched::tc_d95f4e98: + case Hexagon::Sched::tc_e17ce9ad: + case Hexagon::Sched::tc_f1240c08: + case Hexagon::Sched::tc_faab1248: + return true; + default: + return false; + } +} + +static bool is_TC1(unsigned SchedClass) { + switch (SchedClass) { + case Hexagon::Sched::tc_07ac815d: + case Hexagon::Sched::tc_1b6011fb: + case Hexagon::Sched::tc_1b834fe7: + case Hexagon::Sched::tc_1e062b18: + case Hexagon::Sched::tc_1f9668cc: + case Hexagon::Sched::tc_43068634: + case Hexagon::Sched::tc_47f0b7ad: + case Hexagon::Sched::tc_537e2013: + case Hexagon::Sched::tc_548f402d: + case Hexagon::Sched::tc_5fa2857c: + case Hexagon::Sched::tc_5fe9fcd0: + case Hexagon::Sched::tc_78b3c689: + case Hexagon::Sched::tc_7c2dcd4d: + case Hexagon::Sched::tc_81a23d44: + case Hexagon::Sched::tc_821c4233: + case Hexagon::Sched::tc_92d1833c: + case Hexagon::Sched::tc_9a13af9d: + case Hexagon::Sched::tc_9c18c9a5: + case Hexagon::Sched::tc_9df8b0dc: + case Hexagon::Sched::tc_9f518242: + case Hexagon::Sched::tc_a1fb80e1: + case Hexagon::Sched::tc_a333d2a9: + case Hexagon::Sched::tc_a87879e8: + case Hexagon::Sched::tc_aad55963: + case Hexagon::Sched::tc_b08b653e: + case Hexagon::Sched::tc_b324366f: + case Hexagon::Sched::tc_b5bfaa60: + case Hexagon::Sched::tc_b86c7e8b: + case Hexagon::Sched::tc_c58f771a: + case Hexagon::Sched::tc_d108a090: + case Hexagon::Sched::tc_d1b5a4b6: + case Hexagon::Sched::tc_d2609065: + case Hexagon::Sched::tc_d63b71d1: + case Hexagon::Sched::tc_e2c31426: + case Hexagon::Sched::tc_e8c7a357: + case Hexagon::Sched::tc_eb07ef6f: + case Hexagon::Sched::tc_f16d5b17: + return true; + default: + return false; + } +} diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 67af947e089dd6db718ce2a1da577315d861d099..03c4a83594b335460b729f0168f6f4dbec002ad6 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -65,9 +65,9 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index d8ba5dcd35ad06f6df7155557f8f8143113ec128..9f8c9ded8127b5a7b998d3431e8708fa02ecfd35 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -559,10 +559,10 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO, } unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS); - switch (RC->getSize()) { - case 4: + switch (TRI->getRegSizeInBits(*RC)) { + case 32: return IfTrue ? A2_tfrt : A2_tfrf; - case 8: + case 64: return IfTrue ? A2_tfrpt : A2_tfrpf; } llvm_unreachable("Invalid register operand"); diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index 015d3b840e6fe63982206a19ebf4bdf2d18db353..23d4e2610d9a58045ff521f6acc642256c85704d 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -12,10 +12,9 @@ // form. //===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" #include "Hexagon.h" #include "HexagonTargetMachine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 0e2380f4316a9c24994230285eb4c2915072c694..18e49c69b8e36245ec601e73856e307ad53e861b 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -10,8 +10,8 @@ #define DEBUG_TYPE "hexagon-pei" -#include "HexagonBlockRanges.h" #include "HexagonFrameLowering.h" +#include "HexagonBlockRanges.h" #include "HexagonInstrInfo.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" @@ -1425,7 +1425,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, if (!SRegs[S->Reg]) continue; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); - int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), S->Offset); + int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset); MinOffset = std::min(MinOffset, S->Offset); CSI.push_back(CalleeSavedInfo(S->Reg, FI)); SRegs[S->Reg] = false; @@ -1437,11 +1437,12 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { unsigned R = x; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); - int Off = MinOffset - RC->getSize(); - unsigned Align = std::min(RC->getAlignment(), getStackAlignment()); + unsigned Size = TRI->getSpillSize(*RC); + int Off = MinOffset - Size; + unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment()); assert(isPowerOf2_32(Align)); Off &= -Align; - int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), Off); + int FI = MFI.CreateFixedSpillStackObject(Size, Off); MinOffset = std::min(MinOffset, Off); CSI.push_back(CalleeSavedInfo(R, FI)); SRegs[R] = false; @@ -1656,7 +1657,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, // defined. From the point of view of the liveness tracking, it is ok to // store it as a whole, but if we break it up we may end up storing a // register that is entirely undefined. - LivePhysRegs LPR(&HRI); + LivePhysRegs LPR(HRI); LPR.addLiveIns(B); SmallVector,2> Clobbers; for (auto R = B.begin(); R != It; ++R) { @@ -1677,10 +1678,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - unsigned Size = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned Size = HRI.getSpillSize(RC); + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned StoreOpc; @@ -1734,10 +1735,10 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - unsigned Size = RC->getSize(); - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned Size = HRI.getSpillSize(RC); + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned LoadOpc; @@ -1777,16 +1778,16 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, if (!MI->getOperand(0).isFI()) return false; + auto &HRI = *HST.getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned SrcR = MI->getOperand(2).getReg(); bool IsKill = MI->getOperand(2).isKill(); int FI = MI->getOperand(0).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned StoreOpc; @@ -1815,15 +1816,15 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, if (!MI->getOperand(1).isFI()) return false; + auto &HRI = *HST.getRegisterInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned DstR = MI->getOperand(0).getReg(); int FI = MI->getOperand(1).getIndex(); bool Is128B = HST.useHVXDblOps(); - auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass - : &Hexagon::VectorRegs128BRegClass; - - unsigned NeedAlign = RC->getAlignment(); + const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass + : Hexagon::VectorRegs128BRegClass; + unsigned NeedAlign = HRI.getSpillAlignment(RC); unsigned HasAlign = MFI.getObjectAlignment(FI); unsigned LoadOpc; @@ -1932,7 +1933,7 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, if (!needToReserveScavengingSpillSlots(MF, HRI, RC)) continue; unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1; - unsigned S = RC->getSize(), A = RC->getAlignment(); + unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC); for (unsigned i = 0; i < Num; i++) { int NewFI = MFI.CreateSpillStackObject(S, A); RS->addScavengingFrameIndex(NewFI); diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index c99ad5130aef5ae03f643bdea3d131b94ed861da..7c6de6d513e8d1ad5365411a1cfe5ec63500437f 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -14,10 +14,10 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index 54d99d399f88571b81939475d13db3d7c47bd196..bf31e1699284081fae21817ab3ede1adf73ad6f0 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" @@ -34,8 +34,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include #include diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index 85222944c77caa39b2d2e429d74a8a8540d0a7c1..11ac5454f604323672cc74c48a2cc4cac7d9c1b4 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -40,8 +40,8 @@ #include "llvm/Pass.h" #include "llvm/Support/MathExtras.h" #include -#include #include +#include #include using namespace llvm; @@ -59,9 +59,7 @@ namespace { public: static char ID; - HexagonGenMux() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) { - initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry()); - } + HexagonGenMux() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon generate mux instructions"; @@ -79,8 +77,8 @@ namespace { } private: - const HexagonInstrInfo *HII; - const HexagonRegisterInfo *HRI; + const HexagonInstrInfo *HII = nullptr; + const HexagonRegisterInfo *HRI = nullptr; struct CondsetInfo { unsigned PredR = 0; @@ -134,7 +132,7 @@ namespace { } // end anonymous namespace -INITIALIZE_PASS(HexagonGenMux, "hexagon-mux", +INITIALIZE_PASS(HexagonGenMux, "hexagon-gen-mux", "Hexagon generate mux instructions", false, false) void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const { @@ -235,8 +233,11 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned DR = MI->getOperand(0).getReg(); if (isRegPair(DR)) continue; + MachineOperand &PredOp = MI->getOperand(1); + if (PredOp.isUndef()) + continue; - unsigned PR = MI->getOperand(1).getReg(); + unsigned PR = PredOp.getReg(); unsigned Idx = I2X.lookup(MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); @@ -294,12 +295,15 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0; unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0; bool Failure = false, CanUp = true, CanDown = true; + bool Used1 = false, Used2 = false; for (unsigned X = MinX+1; X < MaxX; X++) { const DefUseInfo &DU = DUM.lookup(X); if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) { Failure = true; break; } + Used1 |= DU.Uses[SR1]; + Used2 |= DU.Uses[SR2]; if (CanDown && DU.Defs[SR1]) CanDown = false; if (CanUp && DU.Defs[SR2]) @@ -313,6 +317,45 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { // Prefer "down", since this will move the MUX farther away from the // predicate definition. MachineBasicBlock::iterator At = CanDown ? Def2 : Def1; + if (CanDown) { + // If the MUX is placed "down", we need to make sure that there aren't + // any kills of the source registers between the two defs. + if (Used1 || Used2) { + auto ResetKill = [this] (unsigned Reg, MachineInstr &MI) -> bool { + if (MachineOperand *Op = MI.findRegisterUseOperand(Reg, true, HRI)) { + Op->setIsKill(false); + return true; + } + return false; + }; + bool KilledSR1 = false, KilledSR2 = false; + for (MachineInstr &MJ : make_range(std::next(It1), It2)) { + if (SR1) + KilledSR1 |= ResetKill(SR1, MJ); + if (SR2) + KilledSR2 |= ResetKill(SR1, MJ); + } + // If any of the source registers were killed in this range, transfer + // the kills to the source operands: they will me "moved" to the + // resulting MUX and their parent instructions will be deleted. + if (KilledSR1) { + assert(Src1->isReg()); + Src1->setIsKill(true); + } + if (KilledSR2) { + assert(Src2->isReg()); + Src2->setIsKill(true); + } + } + } else { + // If the MUX is placed "up", it shouldn't kill any source registers + // that are still used afterwards. We can reset the kill flags directly + // on the operands, because the source instructions will be erased. + if (Used1 && Src1->isReg()) + Src1->setIsKill(false); + if (Used2 && Src2->isReg()) + Src2->setIsKill(false); + } ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2)); } diff --git a/lib/Target/Hexagon/HexagonIICHVX.td b/lib/Target/Hexagon/HexagonIICHVX.td index 4081a225832b0b94da36205086062b99ebb79e44..1493d52f08e856a9058fc8ed2e7777c8fa2e00ab 100644 --- a/lib/Target/Hexagon/HexagonIICHVX.td +++ b/lib/Target/Hexagon/HexagonIICHVX.td @@ -7,96 +7,12 @@ // //===----------------------------------------------------------------------===// -// -// Though all these itinerary classes exist for V60 onwards, they are being -// listed here as 'HVXV62Itin' because itinerary class description prior to V62 -// doesn't include operand cycle info. In future, I plan to merge them -// together and call it 'HVXItin'. -// -class HVXV62Itin { - list HVXV62Itin_list = [ - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLANE,CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF, CVI_MPY01]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_SHIFT]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_XLANE, CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_LD], 0>, - InstrStage<1, [CVI_XLANE, CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_LD]>],[1, 1, 1, 1, 10]>, - InstrItinData, - InstrStage<1, [CVI_LD], 0>, - InstrStage<1, [CVI_XLANE, CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SLOT1], 0>, - InstrStage<1, [CVI_LD], 0>, - InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_ST], 0>, - InstrStage<1, [CVI_XLANE, CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>], - [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_ST]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [SLOT1], 0>, - InstrStage<1, [CVI_ST], 0>, - InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>, - InstrItinData, - InstrStage<1, [CVI_ALL]>], [1, 1, 1, 1]>]; +def CVI_VA : InstrItinClass; + +class HVXItin { + list HVXItin_list = [ + InstrItinData, + InstrStage<1, [CVI_XLANE,CVI_SHIFT, CVI_MPY0, CVI_MPY1]>], + [9, 7, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>]; } diff --git a/lib/Target/Hexagon/HexagonIICScalar.td b/lib/Target/Hexagon/HexagonIICScalar.td index e69cfbdad68888d41b7509f67cb4b7894269ee90..5fe713346e38984058e25eb7ed0358251f0f8d3a 100644 --- a/lib/Target/Hexagon/HexagonIICScalar.td +++ b/lib/Target/Hexagon/HexagonIICScalar.td @@ -11,154 +11,22 @@ // classes as per V62. Curretnly, they are just extracted from // HexagonScheduleV62.td but will soon be auto-generated by HexagonGen.py. +class PseudoItin { + list PseudoItin_list = [ + InstrItinData], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]>, + InstrItinData], [1, 1, 1]>, + InstrItinData], [2]> + ]; +} + class ScalarItin { list ScalarItin_list = [ - InstrItinData], [1, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [1, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [1, 1, 1]>, - - // ALU64 - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - - // CR -> System - InstrItinData], [2, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [3, 1, 1]>, - - // Jump (conditional/unconditional/return etc) - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], [2, 1, 1, 1]>, - - // JR - InstrItinData], [2, 1, 1]>, - InstrItinData], [3, 1, 1]>, - - // Extender - InstrItinData], [2, 1, 1, 1]>, - - // Load - InstrItinData], - [3, 1]>, - InstrItinData], - [3, 1]>, - InstrItinData], [4, 1]>, - InstrItinData], [3, 1]>, - - // M - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [4, 1, 1]>, - InstrItinData], - [4, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - - // Store - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], [3, 1, 1]>, - InstrItinData], [3, 1, 1]>, - InstrItinData], [1, 1, 1]>, - InstrItinData], [1, 1, 1]>, - - // S - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60. - InstrItinData], - [4, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - - // New Value Compare Jump - InstrItinData], - [3, 1, 1, 1]>, - - // Mem ops - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - - // Endloop - InstrItinData], - [2]>, - InstrItinData], - [1, 1, 1, 1]>, - - // Duplex and Compound - InstrItinData], [1, 1, 1]>, - InstrItinData], [1, 1, 1]>, - InstrItinData], [1, 1, 1]>, - // Misc - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]>]; + InstrItinData], + [3, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData], + [1, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]> + ]; } diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 8e10c521a77d38154b729b9d29bcc158a9c52623..e5f49ca77a91263907c0c5d2031120922c114227 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -71,6 +71,9 @@ public: return true; } + bool ComplexPatternFuncMutatesDAG() const override { + return true; + } void PreprocessISelDAG() override; void EmitFunctionEntryCode() override; @@ -81,6 +84,7 @@ public: inline bool SelectAddrGP(SDValue &N, SDValue &R); bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); bool SelectAddrFI(SDValue &N, SDValue &R); + bool DetectUseSxtw(SDValue &N, SDValue &R); StringRef getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; @@ -106,7 +110,6 @@ public: void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl); void SelectStore(SDNode *N); void SelectSHL(SDNode *N); - void SelectMul(SDNode *N); void SelectZeroExtend(SDNode *N); void SelectIntrinsicWChain(SDNode *N); void SelectIntrinsicWOChain(SDNode *N); @@ -118,9 +121,10 @@ public: #include "HexagonGenDAGISel.inc" private: - bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src); + bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); bool isOrEquivalentToAdd(const SDNode *N) const; bool isAlignedMemNode(const MemSDNode *N) const; + bool isSmallStackStore(const StoreSDNode *N) const; bool isPositiveHalfWord(const SDNode *N) const; // DAG preprocessing functions. @@ -597,90 +601,6 @@ void HexagonDAGToDAGISel::SelectStore(SDNode *N) { SelectCode(ST); } -void HexagonDAGToDAGISel::SelectMul(SDNode *N) { - SDLoc dl(N); - - // %conv.i = sext i32 %tmp1 to i64 - // %conv2.i = sext i32 %add to i64 - // %mul.i = mul nsw i64 %conv2.i, %conv.i - // - // --- match with the following --- - // - // %mul.i = mpy (%tmp1, %add) - // - - if (N->getValueType(0) == MVT::i64) { - // Shifting a i64 signed multiply. - SDValue MulOp0 = N->getOperand(0); - SDValue MulOp1 = N->getOperand(1); - - SDValue OP0; - SDValue OP1; - - // Handle sign_extend and sextload. - if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext0 = MulOp0.getOperand(0); - if (Sext0.getNode()->getValueType(0) != MVT::i32) { - SelectCode(N); - return; - } - OP0 = Sext0; - } else if (MulOp0.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast(MulOp0.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - SelectCode(N); - return; - } - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), TargetConst0, - Chain), 0); - } else { - SelectCode(N); - return; - } - - // Same goes for the second operand. - if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) { - SDValue Sext1 = MulOp1.getOperand(0); - if (Sext1.getNode()->getValueType(0) != MVT::i32) { - SelectCode(N); - return; - } - OP1 = Sext1; - } else if (MulOp1.getOpcode() == ISD::LOAD) { - LoadSDNode *LD = cast(MulOp1.getNode()); - if (LD->getMemoryVT() != MVT::i32 || - LD->getExtensionType() != ISD::SEXTLOAD || - LD->getAddressingMode() != ISD::UNINDEXED) { - SelectCode(N); - return; - } - SDValue Chain = LD->getChain(); - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32, - MVT::Other, - LD->getBasePtr(), TargetConst0, - Chain), 0); - } else { - SelectCode(N); - return; - } - - // Generate a mpy instruction. - SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, - MVT::i64, OP0, OP1); - ReplaceNode(N, Result); - return; - } - - SelectCode(N); -} - void HexagonDAGToDAGISel::SelectSHL(SDNode *N) { SDLoc dl(N); SDValue Shl_0 = N->getOperand(0); @@ -843,7 +763,7 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { SDValue V = N->getOperand(1); SDValue U; - if (isValueExtension(V, Bits, U)) { + if (keepsLowBits(V, Bits, U)) { SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), N->getOperand(0), U); ReplaceNode(N, R.getNode()); @@ -949,7 +869,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::SHL: return SelectSHL(N); case ISD::LOAD: return SelectLoad(N); case ISD::STORE: return SelectStore(N); - case ISD::MUL: return SelectMul(N); case ISD::ZERO_EXTEND: return SelectZeroExtend(N); case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N); case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); @@ -1327,7 +1246,7 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() { } // Match a frame index that can be used in an addressing mode. -bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) { +bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) { if (N.getOpcode() != ISD::FrameIndex) return false; auto &HFI = *HST->getFrameLowering(); @@ -1388,16 +1307,83 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, return false; } -bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, - unsigned FromBits, SDValue &Src) { +bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) { + // This (complex pattern) function is meant to detect a sign-extension + // i32->i64 on a per-operand basis. This would allow writing single + // patterns that would cover a number of combinations of different ways + // a sign-extensions could be written. For example: + // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y) + // could match either one of these: + // (mul (sext x) (sext_inreg y)) + // (mul (sext-load *p) (sext_inreg y)) + // (mul (sext_inreg x) (sext y)) + // etc. + // + // The returned value will have type i64 and its low word will + // contain the value being extended. The high bits are not specified. + // The returned type is i64 because the original type of N was i64, + // but the users of this function should only use the low-word of the + // result, e.g. + // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y)) + + if (N.getValueType() != MVT::i64) + return false; + EVT SrcVT; + unsigned Opc = N.getOpcode(); + switch (Opc) { + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_INREG: { + // sext_inreg has the source type as a separate operand. + EVT T = Opc == ISD::SIGN_EXTEND + ? N.getOperand(0).getValueType() + : cast(N.getOperand(1))->getVT(); + if (T.getSizeInBits() != 32) + return false; + R = N.getOperand(0); + break; + } + case ISD::LOAD: { + LoadSDNode *L = cast(N); + if (L->getExtensionType() != ISD::SEXTLOAD) + return false; + // All extending loads extend to i32, so even if the value in + // memory is shorter than 32 bits, it will be i32 after the load. + if (L->getMemoryVT().getSizeInBits() > 32) + return false; + R = N; + break; + } + default: + return false; + } + EVT RT = R.getValueType(); + if (RT == MVT::i64) + return true; + assert(RT == MVT::i32); + // This is only to produce a value of type i64. Do not rely on the + // high bits produced by this. + const SDLoc &dl(N); + SDValue Ops[] = { + CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), + R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), + R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) + }; + SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, + MVT::i64, Ops); + R = SDValue(T, 0); + return true; +} + +bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, + SDValue &Src) { unsigned Opc = Val.getOpcode(); switch (Opc) { case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: { - SDValue const &Op0 = Val.getOperand(0); + const SDValue &Op0 = Val.getOperand(0); EVT T = Op0.getValueType(); - if (T.isInteger() && T.getSizeInBits() == FromBits) { + if (T.isInteger() && T.getSizeInBits() == NumBits) { Src = Op0; return true; } @@ -1408,23 +1394,23 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, case ISD::AssertZext: if (Val.getOperand(0).getValueType().isInteger()) { VTSDNode *T = cast(Val.getOperand(1)); - if (T->getVT().getSizeInBits() == FromBits) { + if (T->getVT().getSizeInBits() == NumBits) { Src = Val.getOperand(0); return true; } } break; case ISD::AND: { - // Check if this is an AND with "FromBits" of lower bits set to 1. - uint64_t FromMask = (1 << FromBits) - 1; + // Check if this is an AND with NumBits of lower bits set to 1. + uint64_t Mask = (1 << NumBits) - 1; if (ConstantSDNode *C = dyn_cast(Val.getOperand(0))) { - if (C->getZExtValue() == FromMask) { + if (C->getZExtValue() == Mask) { Src = Val.getOperand(1); return true; } } if (ConstantSDNode *C = dyn_cast(Val.getOperand(1))) { - if (C->getZExtValue() == FromMask) { + if (C->getZExtValue() == Mask) { Src = Val.getOperand(0); return true; } @@ -1433,16 +1419,16 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val, } case ISD::OR: case ISD::XOR: { - // OR/XOR with the lower "FromBits" bits set to 0. - uint64_t FromMask = (1 << FromBits) - 1; + // OR/XOR with the lower NumBits bits set to 0. + uint64_t Mask = (1 << NumBits) - 1; if (ConstantSDNode *C = dyn_cast(Val.getOperand(0))) { - if ((C->getZExtValue() & FromMask) == 0) { + if ((C->getZExtValue() & Mask) == 0) { Src = Val.getOperand(1); return true; } } if (ConstantSDNode *C = dyn_cast(Val.getOperand(1))) { - if ((C->getZExtValue() & FromMask) == 0) { + if ((C->getZExtValue() & Mask) == 0) { Src = Val.getOperand(0); return true; } @@ -1477,6 +1463,20 @@ bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const { return N->getAlignment() >= N->getMemoryVT().getStoreSize(); } +bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const { + unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF); + switch (N->getMemoryVT().getStoreSize()) { + case 1: + return StackSize <= 56; // 1*2^6 - 8 + case 2: + return StackSize <= 120; // 2*2^6 - 8 + case 4: + return StackSize <= 248; // 4*2^6 - 8 + default: + return false; + } +} + // Return true when the given node fits in a positive half word. bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { if (const ConstantSDNode *CN = dyn_cast(N)) { diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 0a5e9aed4f1315f90ec416aceeb1e09d57980a1e..afed894cfb9acc601cea8ed9fbcf351fa64c60d3 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonISelLowering.h" +#include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" @@ -26,8 +26,8 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" @@ -485,9 +485,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, } } - unsigned Offset = State.AllocateStack(4, 4); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; + return true; } static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, @@ -500,9 +498,7 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, } } - unsigned Offset = State.AllocateStack(8, 8); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; + return true; } static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, @@ -513,7 +509,6 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, bool UseHVX = HST.useHVXOps(); bool UseHVXDbl = HST.useHVXDblOps(); - unsigned OffSiz = 64; if (LocVT == MVT::v16i32) { if (unsigned Reg = State.AllocateReg(Hexagon::V0)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); @@ -525,18 +520,14 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } - OffSiz = 128; } else if (LocVT == MVT::v64i32) { if (unsigned Reg = State.AllocateReg(Hexagon::W0)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } - OffSiz = 256; } - unsigned Offset = State.AllocateStack(OffSiz, OffSiz); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; + return true; } void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) { @@ -592,6 +583,16 @@ static bool isHvxVectorType(MVT Ty) { } } +bool +HexagonTargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_Hexagon); +} + // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is // passed by value, the function prototype is modified to return void and // the value is stored in memory pointed by a pointer passed by caller. @@ -634,7 +635,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps); } -bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // If either no tail call or told not to tail call at all, don't. auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); @@ -847,8 +848,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue Glue; if (!IsTailCall) { - SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true); - Chain = DAG.getCALLSEQ_START(Chain, C, dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); Glue = Chain.getValue(1); } @@ -1254,7 +1254,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments( InVals.push_back(FIN); } else { InVals.push_back( - DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo())); + DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); } } } @@ -1719,8 +1719,13 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag); InFlag = Chain.getValue(1); + unsigned Flags = + static_cast(DAG.getSubtarget()).useLongCalls() + ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended + : HexagonII::MO_GDPLT; + return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT, - Hexagon::R0, HexagonII::MO_GDPLT); + Hexagon::R0, Flags); } // @@ -1923,11 +1928,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BSWAP, MVT::i32, Legal); setOperationAction(ISD::BSWAP, MVT::i64, Legal); - - // We custom lower i64 to i64 mul, so that it is not considered as a legal - // operation. There is a pattern that will match i64 mul and transform it - // to a series of instructions. - setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MUL, MVT::i64, Legal); for (unsigned IntExpOp : { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, @@ -1998,7 +1999,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Floating point arithmetic/math functions: ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN, - ISD::FCOS, ISD::FPOWI, ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR, ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index aa0f00cd5bbf484fff609d9003558625d5226ce0..1415156487c072b8cb66657085c84b0c83cc239a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -185,12 +185,17 @@ namespace HexagonISD { SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + bool CanLowerReturn(CallingConv::ID CallConv, + MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index 39c2a6e4f5a54c94d1568f0c21bfe67d404779b7..636a439ba6a9a9ecb88bdff7e8a3b4c1d848dc99 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -7,16 +7,6 @@ // //===----------------------------------------------------------------------===// -// Maintain list of valid subtargets for each instruction. -class SubTarget value> { - bits<6> Value = value; -} - -def HasAnySubT : SubTarget<0x3f>; // 111111 -def HasV5SubT : SubTarget<0x3e>; // 111110 -def HasV55SubT : SubTarget<0x3c>; // 111100 -def HasV60SubT : SubTarget<0x38>; // 111000 - // Addressing modes for load/store instructions class AddrModeType value> { bits<3> Value = value; @@ -131,12 +121,6 @@ class InstHexagon pattern, bits<2> opExtentAlign = 0; let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending. - // If an instruction is valid on a subtarget, set the corresponding - // bit from validSubTargets. - // By default, instruction is valid on all subtargets. - SubTarget validSubTargets = HasAnySubT; - let TSFlags{40-35} = validSubTargets.Value; - // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; let TSFlags{43-41} = addrMode.Value; @@ -165,6 +149,9 @@ class InstHexagon pattern, bit cofMax1 = 0; let TSFlags{60} = cofMax1; + bit CVINew = 0; + let TSFlags{61} = CVINew; + // Fields used for relation models. bit isNonTemporal = 0; string isNT = ""; // set to "true" for non-temporal vector stores. @@ -201,30 +188,10 @@ class LDInst pattern = [], string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> : InstHexagon, OpcodeHexagon; -class PseudoLDInst pattern = [], - string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> - : InstHexagon, OpcodeHexagon; - class CONSTLDInst pattern = [], - string cstr = ""> - : PseudoLDInst; - -// LD Instruction Class in V2/V3/V4. -// Definition of the instruction class NOT CHANGED. -class LDInstPost pattern = [], - string cstr = ""> - : LDInst; - -let mayLoad = 1 in -class LD0Inst pattern = [], - string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> + string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> : InstHexagon, OpcodeHexagon; -let mayLoad = 1 in -class LD1Inst pattern = [], - string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> - : InstHexagon; - // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. @@ -233,124 +200,9 @@ class STInst pattern = [], string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> : InstHexagon, OpcodeHexagon; -let mayStore = 1 in -class STInst_NoOpcode pattern = [], - string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> - : InstHexagon; - -class STInst2 pattern = [], - string cstr = ""> - : STInst; - -let mayStore = 1 in -class ST0Inst pattern = [], - string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> - : InstHexagon, OpcodeHexagon; - -// Same as ST0Inst but doesn't derive from OpcodeHexagon. -let mayStore = 1 in -class ST1Inst pattern = [], - string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> - : InstHexagon; - -// ST Instruction Class in V2/V3 can take SLOT0 only. -// ST Instruction Class in V4 can take SLOT0 & SLOT1. -// Definition of the instruction class CHANGED from V2/V3 to V4. -class STInstPost pattern = [], - string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> - : STInst; - -// ALU64 Instruction Class in V2/V3. -// XTYPE Instruction Class in V4. -// Definition of the instruction class NOT CHANGED. -// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. -class ALU64Inst pattern = [], - string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> - : InstHexagon, - OpcodeHexagon; - -// ALU64 Instruction Class in V2/V3. -// XTYPE Instruction Class in V4. -// Definition of the instruction class NOT CHANGED. -// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. -class ALU64Inst_NoOpcode pattern = [], - string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> - : InstHexagon; - - -class ALU64_acc pattern = [], - string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> - : ALU64Inst; - - -// M Instruction Class in V2/V3. -// XTYPE Instruction Class in V4. -// Definition of the instruction class NOT CHANGED. -// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. -class MInst pattern = [], - string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> - : InstHexagon, - OpcodeHexagon; - -// Same as above but doesn't derive from OpcodeHexagon -class MInst2 pattern = [], - string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> - : InstHexagon; - -// M Instruction Class in V2/V3. -// XTYPE Instruction Class in V4. -// Definition of the instruction class NOT CHANGED. -// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. -class MInst_acc pattern = [], - string cstr = "", InstrItinClass itin = M_tc_2_SLOT23> - : MInst; - -// S Instruction Class in V2/V3. -// XTYPE Instruction Class in V4. -// Definition of the instruction class NOT CHANGED. -// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. -class SInst pattern = [], - string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> - : InstHexagon, - OpcodeHexagon; - -class SInst_NoOpcode pattern = [], - string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> - : InstHexagon; - -class SInst2 pattern = [], - string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> - : InstHexagon; - -// S Instruction Class in V2/V3. -// XTYPE Instruction Class in V4. -// Definition of the instruction class NOT CHANGED. -// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. -class SInst_acc pattern = [], - string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23> - : SInst { - let Type = TypeS_3op; -} - -// J Instruction Class in V2/V3/V4. -// Definition of the instruction class NOT CHANGED. -class JInst pattern = [], - string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23> - : InstHexagon, OpcodeHexagon; - -class JInst_CJUMP_UCJUMP pattern = [], - string cstr = "", InstrItinClass itin = J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> - : InstHexagon, OpcodeHexagon; - -// CR Instruction Class in V2/V3/V4. -// Definition of the instruction class NOT CHANGED. -class CRInst pattern = [], - string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3> - : InstHexagon, OpcodeHexagon; - let isCodeGenOnly = 1, isPseudo = 1 in class Endloop pattern = [], - string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123> + string cstr = "", InstrItinClass itin = tc_ENDLOOP> : InstHexagon, OpcodeHexagon; @@ -370,27 +222,6 @@ class PseudoM pattern = [], // Instruction Classes Definitions - //===----------------------------------------------------------------------===// -// -// ALU64 patterns. -// -class ALU64_rr pattern = [], - string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23> - : ALU64Inst; - -class ALU64_ri pattern = [], - string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23> - : ALU64Inst; - -// Post increment ST Instruction. -class STInstPI pattern = [], - string cstr = ""> - : STInst; - -// Post increment LD Instruction. -class LDInstPI pattern = [], - string cstr = ""> - : LDInst; - //===----------------------------------------------------------------------===// // V4 Instruction Format Definitions + //===----------------------------------------------------------------------===// @@ -398,7 +229,7 @@ class LDInstPI pattern = [], include "HexagonInstrFormatsV4.td" //===----------------------------------------------------------------------===// -// V4 Instruction Format Definitions + +// V55 Instruction Format Definitions + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -408,5 +239,5 @@ include "HexagonInstrFormatsV4.td" include "HexagonInstrFormatsV60.td" //===----------------------------------------------------------------------===// -// V60 Instruction Format Definitions + +// V62 Instruction Format Definitions + //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 1fdf930c62fdb4502ec01cdaaf2d32abdf248823..c5fa25995212d08c43079e2092c864bf72f4ee80 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -1,4 +1,4 @@ -//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==// +//==- HexagonInstrFormatsV4.td - Hexagon Instruction Formats --*- tablegen -==// // // The LLVM Compiler Infrastructure // @@ -85,64 +85,3 @@ class InstDuplex iClass, list pattern = [], bits<2> opExtentAlign = 0; let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending. } - -//----------------------------------------------------------------------------// -// Instruction Classes Definitions -//----------------------------------------------------------------------------// - -// -// NV type instructions. -// -class NVInst pattern = [], - string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> - : InstHexagon, OpcodeHexagon; - -class NVInst_V4 pattern = [], - string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> - : NVInst; - -// Definition of Post increment new value store. -class NVInstPost_V4 pattern = [], - string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> - : NVInst; - -// Post increment ST Instruction. -let mayStore = 1 in -class NVInstPI_V4 pattern = [], - string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> - : NVInst; - -// New-value conditional branch. -class NCJInst pattern = [], - string cstr = ""> - : NVInst; - -let mayLoad = 1, mayStore = 1 in -class MEMInst pattern = [], - string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> - : InstHexagon, - OpcodeHexagon; - -class MEMInst_V4 pattern = [], - string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> - : MEMInst; - -class EXTENDERInst pattern = []> - : InstHexagon, OpcodeHexagon; - -class SUBInst pattern = [], - string cstr = ""> - : InstHexagon, - OpcodeHexagon; - -class CJInst pattern = [], - string cstr = ""> - : InstHexagon, - OpcodeHexagon; - -class CJInst_JMPSET pattern = [], - string cstr = ""> - : InstHexagon, - OpcodeHexagon; - diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td index c8a7faea5ed5130b3888666ec2b93725ac08c455..14bda0e0107d425e950fcdc5a42dc6e8d8418a79 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -15,202 +15,8 @@ // Instruction Classes Definitions + //----------------------------------------------------------------------------// -let validSubTargets = HasV60SubT in -{ class CVI_VA_Resource pattern = [], string cstr = "", InstrItinClass itin = CVI_VA> : InstHexagon, OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VA_DV_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VA_DV> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VX_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VX_LONG> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VX_Resource_late pattern = [], string cstr = "", - InstrItinClass itin = CVI_VX_LATE> - : InstHexagon, - Requires<[HasV60T, UseHVX]>; - -class CVI_VX_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VX> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VX_DV_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VX_DV> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VX_DV_Slot2_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VX_DV_SLOT2> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VX_DV_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VX_DV_LONG> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VP_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VP_LONG> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VP_VS_Resource_early pattern = [], string cstr = "", - InstrItinClass itin = CVI_VP_VS_EARLY> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VP_VS_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VP_VS_LONG> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VP_VS_Resource_long_early pattern = [], string cstr = "", - InstrItinClass itin = CVI_VP_VS_LONG_EARLY> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VS_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VS> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VINLANESAT_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VINLANESAT> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VS_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VS> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_LD_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_LD> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_LD_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_LD> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_TMP_LD_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_TMP_LD> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_TMP_LD_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_TMP_LD> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_CUR_LD_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_CUR_LD> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_VP_LDU_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_VP_LDU> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_VP_LDU_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_VP_LDU> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_ST_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_ST> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_ST_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_ST> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_NEW_ST_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_NEW_ST> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_NEW_ST_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_NEW_ST> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_STU_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_STU> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_VM_STU_Resource_long pattern = [], string cstr = "", - InstrItinClass itin = CVI_VM_STU> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; - -class CVI_HIST_Resource pattern = [], string cstr = "", - InstrItinClass itin = CVI_HIST> - : InstHexagon, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; -} - -let validSubTargets = HasV60SubT in -{ -class CVI_VA_Resource1 pattern = [], string cstr = "", - InstrItinClass itin = CVI_VA> - : InstHexagon, - Requires<[HasV60T, UseHVX]>; - -class CVI_VX_DV_Resource1 pattern = [], string cstr = "", - InstrItinClass itin = CVI_VX_DV> - : InstHexagon, - Requires<[HasV60T, UseHVX]>; - -class CVI_HIST_Resource1 pattern = [], string cstr = "", - InstrItinClass itin = CVI_HIST> - : InstHexagon, - Requires<[HasV60T, UseHVX]>; -} - - diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index b265a883da5c47589dd4012736c07ef30753a8f4..f43101fa456d5b3e32beccb565d9dbc9353cb18c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "HexagonInstrInfo.h" #include "Hexagon.h" #include "HexagonHazardRecognizer.h" -#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SmallPtrSet.h" @@ -57,8 +57,9 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRMAP_INFO -#include "HexagonGenInstrInfo.inc" +#include "HexagonDepTimingClasses.h" #include "HexagonGenDFAPacketizer.inc" +#include "HexagonGenInstrInfo.inc" cl::opt ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden, cl::init(false), cl::desc("Do not consider inline-asm a scheduling/" @@ -869,6 +870,9 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); unsigned KillFlag = getKillRegState(isKill); + bool HasAlloca = MFI.hasVarSizedObjects(); + const auto &HST = MF.getSubtarget(); + const HexagonFrameLowering &HFI = *HST.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, @@ -899,24 +903,36 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::V6_vS32Ub_ai_128B : Hexagon::V6_vS32b_ai_128B; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32b_ai; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::PS_vstorerwu_ai : Hexagon::PS_vstorerw_ai; BuildMI(MBB, I, DL, get(Opc)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::PS_vstorerwu_ai_128B : Hexagon::PS_vstorerw_ai_128B; BuildMI(MBB, I, DL, get(Opc)) @@ -935,6 +951,9 @@ void HexagonInstrInfo::loadRegFromStackSlot( MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); + bool HasAlloca = MFI.hasVarSizedObjects(); + const auto &HST = MF.getSubtarget(); + const HexagonFrameLowering &HFI = *HST.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, @@ -959,21 +978,33 @@ void HexagonInstrInfo::loadRegFromStackSlot( BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::PS_vloadrwu_ai_128B : Hexagon::PS_vloadrw_ai_128B; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 128 ? Hexagon::V6_vL32Ub_ai_128B : Hexagon::V6_vL32b_ai_128B; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::V6_vL32Ub_ai : Hexagon::V6_vL32b_ai; BuildMI(MBB, I, DL, get(Opc), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) { + // If there are variable-sized objects, spills will not be aligned. + if (HasAlloca) + Align = HFI.getStackAlignment(); unsigned Opc = Align < 64 ? Hexagon::PS_vloadrwu_ai : Hexagon::PS_vloadrw_ai; BuildMI(MBB, I, DL, get(Opc), DestReg) @@ -1110,8 +1141,9 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6; MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_lo)) - .add(MI.getOperand(1)) - .addImm(MI.getOperand(2).getImm()); + .add(MI.getOperand(1)) + .addImm(MI.getOperand(2).getImm()) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MI1New->getOperand(1).setIsKill(false); BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi)) .add(MI.getOperand(1)) @@ -1222,7 +1254,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(2); const MachineOperand &Op3 = MI.getOperand(3); - LivePhysRegs LiveAtMI(&HRI); + LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); if (Op0.getReg() != Op2.getReg()) { @@ -1251,7 +1283,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineOperand &Op1 = MI.getOperand(1); MachineOperand &Op2 = MI.getOperand(2); MachineOperand &Op3 = MI.getOperand(3); - LivePhysRegs LiveAtMI(&HRI); + LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); @@ -1435,7 +1467,15 @@ bool HexagonInstrInfo::DefinesPredicate( } bool HexagonInstrInfo::isPredicable(const MachineInstr &MI) const { - return MI.getDesc().isPredicable(); + if (!MI.getDesc().isPredicable()) + return false; + + if (MI.isCall() || isTailCall(MI)) { + const MachineFunction &MF = *MI.getParent()->getParent(); + if (!MF.getSubtarget().usePredicatedCalls()) + return false; + } + return true; } bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, @@ -1612,6 +1652,7 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return getInstrTimingClassLatency(ItinData, MI); } + DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); @@ -1728,161 +1769,6 @@ bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const { return getType(MI) == HexagonII::TypeCJ && MI.isBranch(); } -bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { - return (MI.isBranch() && isPredicated(MI)) || - isConditionalTransfer(MI) || - isConditionalALU32(MI) || - isConditionalLoad(MI) || - // Predicated stores which don't have a .new on any operands. - (MI.mayStore() && isPredicated(MI) && !isNewValueStore(MI) && - !isPredicatedNew(MI)); -} - -bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - case Hexagon::A2_paddf: - case Hexagon::A2_paddfnew: - case Hexagon::A2_paddif: - case Hexagon::A2_paddifnew: - case Hexagon::A2_paddit: - case Hexagon::A2_padditnew: - case Hexagon::A2_paddt: - case Hexagon::A2_paddtnew: - case Hexagon::A2_pandf: - case Hexagon::A2_pandfnew: - case Hexagon::A2_pandt: - case Hexagon::A2_pandtnew: - case Hexagon::A2_porf: - case Hexagon::A2_porfnew: - case Hexagon::A2_port: - case Hexagon::A2_portnew: - case Hexagon::A2_psubf: - case Hexagon::A2_psubfnew: - case Hexagon::A2_psubt: - case Hexagon::A2_psubtnew: - case Hexagon::A2_pxorf: - case Hexagon::A2_pxorfnew: - case Hexagon::A2_pxort: - case Hexagon::A2_pxortnew: - case Hexagon::A4_paslhf: - case Hexagon::A4_paslhfnew: - case Hexagon::A4_paslht: - case Hexagon::A4_paslhtnew: - case Hexagon::A4_pasrhf: - case Hexagon::A4_pasrhfnew: - case Hexagon::A4_pasrht: - case Hexagon::A4_pasrhtnew: - case Hexagon::A4_psxtbf: - case Hexagon::A4_psxtbfnew: - case Hexagon::A4_psxtbt: - case Hexagon::A4_psxtbtnew: - case Hexagon::A4_psxthf: - case Hexagon::A4_psxthfnew: - case Hexagon::A4_psxtht: - case Hexagon::A4_psxthtnew: - case Hexagon::A4_pzxtbf: - case Hexagon::A4_pzxtbfnew: - case Hexagon::A4_pzxtbt: - case Hexagon::A4_pzxtbtnew: - case Hexagon::A4_pzxthf: - case Hexagon::A4_pzxthfnew: - case Hexagon::A4_pzxtht: - case Hexagon::A4_pzxthtnew: - case Hexagon::C2_ccombinewf: - case Hexagon::C2_ccombinewt: - return true; - } - return false; -} - -// FIXME - Function name and it's functionality don't match. -// It should be renamed to hasPredNewOpcode() -bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { - if (!MI.getDesc().mayLoad() || !isPredicated(MI)) - return false; - - int PNewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode()); - // Instruction with valid predicated-new opcode can be promoted to .new. - return PNewOpcode >= 0; -} - -// Returns true if an instruction is a conditional store. -// -// Note: It doesn't include conditional new-value stores as they can't be -// converted to .new predicate. -bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - default: return false; - case Hexagon::S4_storeirbt_io: - case Hexagon::S4_storeirbf_io: - case Hexagon::S4_pstorerbt_rr: - case Hexagon::S4_pstorerbf_rr: - case Hexagon::S2_pstorerbt_io: - case Hexagon::S2_pstorerbf_io: - case Hexagon::S2_pstorerbt_pi: - case Hexagon::S2_pstorerbf_pi: - case Hexagon::S2_pstorerdt_io: - case Hexagon::S2_pstorerdf_io: - case Hexagon::S4_pstorerdt_rr: - case Hexagon::S4_pstorerdf_rr: - case Hexagon::S2_pstorerdt_pi: - case Hexagon::S2_pstorerdf_pi: - case Hexagon::S2_pstorerht_io: - case Hexagon::S2_pstorerhf_io: - case Hexagon::S4_storeirht_io: - case Hexagon::S4_storeirhf_io: - case Hexagon::S4_pstorerht_rr: - case Hexagon::S4_pstorerhf_rr: - case Hexagon::S2_pstorerht_pi: - case Hexagon::S2_pstorerhf_pi: - case Hexagon::S2_pstorerit_io: - case Hexagon::S2_pstorerif_io: - case Hexagon::S4_storeirit_io: - case Hexagon::S4_storeirif_io: - case Hexagon::S4_pstorerit_rr: - case Hexagon::S4_pstorerif_rr: - case Hexagon::S2_pstorerit_pi: - case Hexagon::S2_pstorerif_pi: - - // V4 global address store before promoting to dot new. - case Hexagon::S4_pstorerdt_abs: - case Hexagon::S4_pstorerdf_abs: - case Hexagon::S4_pstorerbt_abs: - case Hexagon::S4_pstorerbf_abs: - case Hexagon::S4_pstorerht_abs: - case Hexagon::S4_pstorerhf_abs: - case Hexagon::S4_pstorerit_abs: - case Hexagon::S4_pstorerif_abs: - return true; - - // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded - // from the "Conditional Store" list. Because a predicated new value store - // would NOT be promoted to a double dot new store. - // This function returns yes for those stores that are predicated but not - // yet promoted to predicate dot new instructions. - } -} - -bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { - switch (MI.getOpcode()) { - case Hexagon::A2_tfrt: - case Hexagon::A2_tfrf: - case Hexagon::C2_cmoveit: - case Hexagon::C2_cmoveif: - case Hexagon::A2_tfrtnew: - case Hexagon::A2_tfrfnew: - case Hexagon::C2_cmovenewit: - case Hexagon::C2_cmovenewif: - case Hexagon::A2_tfrpt: - case Hexagon::A2_tfrpf: - return true; - - default: - return false; - } - return false; -} - // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle // isFPImm and later getFPImm as well. bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { @@ -1940,7 +1826,7 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { case Hexagon::L4_return_fnew_pnt : case Hexagon::L4_return_tnew_pt : case Hexagon::L4_return_fnew_pt : - return true; + return true; } return false; } @@ -1967,12 +1853,12 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, if (RegA == RegB) return true; - if (Hexagon::DoubleRegsRegClass.contains(RegA)) + if (TargetRegisterInfo::isPhysicalRegister(RegA)) for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs) if (RegB == *SubRegs) return true; - if (Hexagon::DoubleRegsRegClass.contains(RegB)) + if (TargetRegisterInfo::isPhysicalRegister(RegB)) for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs) if (RegA == *SubRegs) return true; @@ -2016,9 +1902,7 @@ bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { // Multiply unsigned SchedClass = MI.getDesc().getSchedClass(); - if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23) - return true; - return false; + return is_TC4x(SchedClass) || is_TC3x(SchedClass); } bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { @@ -2086,7 +1970,7 @@ bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const { // No V60 HVX VMEM with A_INDIRECT. bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, const MachineInstr &J) const { - if (!isV60VectorInstruction(I)) + if (!isHVXVec(I)) return false; if (!I.mayLoad() && !I.mayStore()) return false; @@ -2139,7 +2023,7 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, unsigned offset) const { // This selection of jump instructions matches to that what - // AnalyzeBranch can parse, plus NVJ. + // analyzeBranch can parse, plus NVJ. if (isNewValueJump(MI)) // r9:2 return isInt<11>(offset); @@ -2210,30 +2094,13 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { } unsigned SchedClass = MI.getDesc().getSchedClass(); - - switch (SchedClass) { - case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: - case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: - case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: - case Hexagon::Sched::ALU64_tc_1_SLOT23: - case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: - case Hexagon::Sched::S_2op_tc_1_SLOT23: - case Hexagon::Sched::S_3op_tc_1_SLOT23: - case Hexagon::Sched::V2LDST_tc_ld_SLOT01: - case Hexagon::Sched::V2LDST_tc_st_SLOT0: - case Hexagon::Sched::V2LDST_tc_st_SLOT01: - case Hexagon::Sched::V4LDST_tc_ld_SLOT01: - case Hexagon::Sched::V4LDST_tc_st_SLOT0: - case Hexagon::Sched::V4LDST_tc_st_SLOT01: - return false; - } - return true; + return !is_TC1(SchedClass); } bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const { // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply // resource, but all operands can be received late like an ALU instruction. - return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; + return getType(MI) == HexagonII::TypeCVI_VX_LATE; } bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { @@ -2476,61 +2343,22 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { // Returns true when SU has a timing class TC1. bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); - switch (SchedClass) { - case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123: - case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123: - case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123: - case Hexagon::Sched::ALU64_tc_1_SLOT23: - case Hexagon::Sched::EXTENDER_tc_1_SLOT0123: - //case Hexagon::Sched::M_tc_1_SLOT23: - case Hexagon::Sched::S_2op_tc_1_SLOT23: - case Hexagon::Sched::S_3op_tc_1_SLOT23: - return true; - - default: - return false; - } + return is_TC1(SchedClass); } bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); - switch (SchedClass) { - case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123: - case Hexagon::Sched::ALU64_tc_2_SLOT23: - case Hexagon::Sched::CR_tc_2_SLOT3: - case Hexagon::Sched::M_tc_2_SLOT23: - case Hexagon::Sched::S_2op_tc_2_SLOT23: - case Hexagon::Sched::S_3op_tc_2_SLOT23: - return true; - - default: - return false; - } + return is_TC2(SchedClass); } bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); - switch (SchedClass) { - case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123: - case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123: - case Hexagon::Sched::ALU64_tc_2early_SLOT23: - case Hexagon::Sched::CR_tc_2early_SLOT23: - case Hexagon::Sched::CR_tc_2early_SLOT3: - case Hexagon::Sched::J_tc_2early_SLOT0123: - case Hexagon::Sched::J_tc_2early_SLOT2: - case Hexagon::Sched::J_tc_2early_SLOT23: - case Hexagon::Sched::S_2op_tc_2early_SLOT23: - case Hexagon::Sched::S_3op_tc_2early_SLOT23: - return true; - - default: - return false; - } + return is_TC2early(SchedClass); } bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); - return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; + return is_TC4x(SchedClass); } // Schedule this ASAP. @@ -2552,7 +2380,7 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, return false; } -bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const { +bool HexagonInstrInfo::isHVXVec(const MachineInstr &MI) const { const uint64_t V = getType(MI); return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; } @@ -2666,6 +2494,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, case Hexagon::L2_loadrh_io: case Hexagon::L2_loadruh_io: case Hexagon::S2_storerh_io: + case Hexagon::S2_storerf_io: return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -2750,7 +2579,7 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, } bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const { - return isV60VectorInstruction(MI) && isAccumulator(MI); + return isHVXVec(MI) && isAccumulator(MI); } bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { @@ -2856,7 +2685,7 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const { // Add latency to instruction. bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, const MachineInstr &MI2) const { - if (isV60VectorInstruction(MI1) && isV60VectorInstruction(MI2)) + if (isHVXVec(MI1) && isHVXVec(MI2)) if (!isVecUsableNextPacket(MI1, MI2)) return true; return false; @@ -2876,6 +2705,11 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { + if (Second.mayStore() && First.getOpcode() == Hexagon::S2_allocframe) { + const MachineOperand &Op = Second.getOperand(0); + if (Op.isReg() && Op.isUse() && Op.getReg() == Hexagon::R29) + return true; + } if (DisableNVSchedule) return false; if (mayBeNewStore(Second)) { @@ -2976,7 +2810,7 @@ bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const { bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { // There is no stall when ProdMI is not a V60 vector. - if (!isV60VectorInstruction(ProdMI)) + if (!isHVXVec(ProdMI)) return false; // There is no stall when ProdMI and ConsMI are not dependent. @@ -2994,19 +2828,15 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator BII) const { // There is no stall when I is not a V60 vector. - if (!isV60VectorInstruction(MI)) + if (!isHVXVec(MI)) return false; MachineBasicBlock::const_instr_iterator MII = BII; MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end(); - if (!MII->isBundle()) { + if (!(*MII).isBundle()) { const MachineInstr &J = *MII; - if (!isV60VectorInstruction(J)) - return false; - else if (isVecUsableNextPacket(J, MI)) - return false; - return true; + return producesStall(J, MI); } for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) { @@ -3034,12 +2864,14 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, } bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { - return (Opcode == Hexagon::J2_jumpt) || - (Opcode == Hexagon::J2_jumpf) || - (Opcode == Hexagon::J2_jumptnew) || - (Opcode == Hexagon::J2_jumpfnew) || - (Opcode == Hexagon::J2_jumptnewpt) || - (Opcode == Hexagon::J2_jumpfnewpt); + return Opcode == Hexagon::J2_jumpt || + Opcode == Hexagon::J2_jumptpt || + Opcode == Hexagon::J2_jumpf || + Opcode == Hexagon::J2_jumpfpt || + Opcode == Hexagon::J2_jumptnew || + Opcode == Hexagon::J2_jumpfnew || + Opcode == Hexagon::J2_jumptnewpt || + Opcode == Hexagon::J2_jumpfnewpt; } bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef Cond) const { @@ -3341,9 +3173,30 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return 0; } +// Return the regular version of the .cur instruction. +int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: llvm_unreachable("Unknown .cur type"); + case Hexagon::V6_vL32b_cur_pi: + return Hexagon::V6_vL32b_pi; + case Hexagon::V6_vL32b_cur_ai: + return Hexagon::V6_vL32b_ai; + //128B + case Hexagon::V6_vL32b_cur_pi_128B: + return Hexagon::V6_vL32b_pi_128B; + case Hexagon::V6_vL32b_cur_ai_128B: + return Hexagon::V6_vL32b_ai_128B; + } + return 0; +} + + // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // +// Note: It doesn't include conditional new-value stores as they can't be +// converted to .new predicate. +// // p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ] // ^ ^ // / \ (not OK. it will cause new-value store to be @@ -3359,7 +3212,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { // p.old store // [if (p0)memw(R0+#0)=R2] // -// // The following set of instructions further explains the scenario where // conditional new-value store becomes invalid when promoted to .new predicate // form. @@ -3467,6 +3319,8 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. +// If MBPI is null, all edges will be treated as equally likely for the +// purposes of establishing a predication hint. int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const { // We assume that block can have at most two successors. @@ -3475,9 +3329,16 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, bool Taken = false; const BranchProbability OneHalf(1, 2); + auto getEdgeProbability = [MBPI] (const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) { + if (MBPI) + return MBPI->getEdgeProbability(Src, Dst); + return BranchProbability(1, Src->succ_size()); + }; + if (BrTarget.isMBB()) { const MachineBasicBlock *Dst = BrTarget.getMBB(); - Taken = MBPI->getEdgeProbability(Src, Dst) >= OneHalf; + Taken = getEdgeProbability(Src, Dst) >= OneHalf; } else { // The branch target is not a basic block (most likely a function). // Since BPI only gives probabilities for targets that are basic blocks, @@ -3514,7 +3375,7 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, for (const MachineBasicBlock *SB : B.successors()) { if (!B.isLayoutSuccessor(SB)) continue; - Taken = MBPI->getEdgeProbability(Src, SB) < OneHalf; + Taken = getEdgeProbability(Src, SB) < OneHalf; break; } } else { @@ -3527,7 +3388,7 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, BT = Op.getMBB(); break; } - Taken = BT && MBPI->getEdgeProbability(Src, BT) < OneHalf; + Taken = BT && getEdgeProbability(Src, BT) < OneHalf; } } // if (!Bad) } @@ -3558,17 +3419,15 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode()); if (NewOpcode >= 0) return NewOpcode; - - dbgs() << "Cannot convert to .new: " << getName(MI.getOpcode()) << '\n'; - llvm_unreachable(nullptr); + return 0; } int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { + const MachineFunction &MF = *MI.getParent()->getParent(); + const HexagonSubtarget &HST = MF.getSubtarget(); int NewOp = MI.getOpcode(); if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form NewOp = Hexagon::getPredOldOpcode(NewOp); - const MachineFunction &MF = *MI.getParent()->getParent(); - const HexagonSubtarget &HST = MF.getSubtarget(); // All Hexagon architectures have prediction bits on dot-new branches, // but only Hexagon V60+ has prediction bits on dot-old ones. Make sure // to pick the right opcode when converting back to dot-old. @@ -3596,6 +3455,21 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { NewOp = Hexagon::getNonNVStore(NewOp); assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); } + + if (HST.hasV60TOps()) + return NewOp; + + // Subtargets prior to V60 didn't support 'taken' forms of predicated jumps. + switch (NewOp) { + case Hexagon::J2_jumpfpt: + return Hexagon::J2_jumpf; + case Hexagon::J2_jumptpt: + return Hexagon::J2_jumpt; + case Hexagon::J2_jumprfpt: + return Hexagon::J2_jumprf; + case Hexagon::J2_jumprtpt: + return Hexagon::J2_jumprt; + } return NewOp; } @@ -3947,18 +3821,6 @@ short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real); } -// Return first non-debug instruction in the basic block. -MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) - const { - for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) { - MachineInstr &MI = *MII; - if (MI.isDebugValue()) - continue; - return &MI; - } - return nullptr; -} - unsigned HexagonInstrInfo::getInstrTimingClassLatency( const InstrItineraryData *ItinData, const MachineInstr &MI) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may @@ -3966,18 +3828,53 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency( if (!ItinData) return getInstrLatency(ItinData, MI); - // Get the latency embedded in the itinerary. If we're not using timing class - // latencies or if we using BSB scheduling, then restrict the maximum latency - // to 1 (that is, either 0 or 1). if (MI.isTransient()) return 0; - unsigned Latency = ItinData->getStageLatency(MI.getDesc().getSchedClass()); - if (!EnableTimingClassLatency || - MI.getParent()->getParent()->getSubtarget(). - useBSBScheduling()) - if (Latency > 1) - Latency = 1; - return Latency; + return ItinData->getStageLatency(MI.getDesc().getSchedClass()); +} + +/// getOperandLatency - Compute and return the use operand latency of a given +/// pair of def and use. +/// In most cases, the static scheduling itinerary was enough to determine the +/// operand latency. But it may not be possible for instructions with variable +/// number of defs / uses. +/// +/// This is a raw interface to the itinerary that may be directly overriden by +/// a target. Use computeOperandLatency to get the best estimate of latency. +int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, + unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const { + auto &RI = getRegisterInfo(); + // Get DefIdx and UseIdx for super registers. + MachineOperand DefMO = DefMI.getOperand(DefIdx); + + if (RI.isPhysicalRegister(DefMO.getReg())) { + if (DefMO.isImplicit()) { + for (MCSuperRegIterator SR(DefMO.getReg(), &RI); SR.isValid(); ++SR) { + int Idx = DefMI.findRegisterDefOperandIdx(*SR, false, false, &RI); + if (Idx != -1) { + DefIdx = Idx; + break; + } + } + } + + MachineOperand UseMO = UseMI.getOperand(UseIdx); + if (UseMO.isImplicit()) { + for (MCSuperRegIterator SR(UseMO.getReg(), &RI); SR.isValid(); ++SR) { + int Idx = UseMI.findRegisterUseOperandIdx(*SR, false, &RI); + if (Idx != -1) { + UseIdx = Idx; + break; + } + } + } + } + + return TargetInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx, + UseMI, UseIdx); } // inverts the predication logic. @@ -4139,11 +4036,6 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { return IS.getUnits(); } -unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { - const uint64_t F = get(Opcode).TSFlags; - return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; -} - // Calculate size of the basic block without debug instructions. unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { return nonDbgMICount(BB->instr_begin(), BB->instr_end()); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index b268c7a28171e2fedd4d314b89af8e93360d2bd4..944d0161a7c8e08157f8135f703209ced61b3c28 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -288,6 +288,19 @@ public: /// If the instruction is an increment of a constant value, return the amount. bool getIncrementValue(const MachineInstr &MI, int &Value) const override; + /// getOperandLatency - Compute and return the use operand latency of a given + /// pair of def and use. + /// In most cases, the static scheduling itinerary was enough to determine the + /// operand latency. But it may not be possible for instructions with variable + /// number of defs / uses. + /// + /// This is a raw interface to the itinerary that may be directly overriden by + /// a target. Use computeOperandLatency to get the best estimate of latency. + int getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr &DefMI, unsigned DefIdx, + const MachineInstr &UseMI, + unsigned UseIdx) const override; + bool isTailCall(const MachineInstr &MI) const override; /// HexagonInstrInfo specifics. @@ -301,11 +314,6 @@ public: bool isAccumulator(const MachineInstr &MI) const; bool isComplex(const MachineInstr &MI) const; bool isCompoundBranchInstr(const MachineInstr &MI) const; - bool isCondInst(const MachineInstr &MI) const; - bool isConditionalALU32 (const MachineInstr &MI) const; - bool isConditionalLoad(const MachineInstr &MI) const; - bool isConditionalStore(const MachineInstr &MI) const; - bool isConditionalTransfer(const MachineInstr &MI) const; bool isConstExtended(const MachineInstr &MI) const; bool isDeallocRet(const MachineInstr &MI) const; bool isDependent(const MachineInstr &ProdMI, @@ -356,7 +364,7 @@ public: bool isTC4x(const MachineInstr &MI) const; bool isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const; - bool isV60VectorInstruction(const MachineInstr &MI) const; + bool isHVXVec(const MachineInstr &MI) const; bool isValidAutoIncImm(const EVT VT, const int Offset) const; bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const; bool isVecAcc(const MachineInstr &MI) const; @@ -399,6 +407,7 @@ public: const MachineInstr &GB) const; int getCondOpcode(int Opc, bool sense) const; int getDotCurOp(const MachineInstr &MI) const; + int getNonDotCurOp(const MachineInstr &MI) const; int getDotNewOp(const MachineInstr &MI) const; int getDotNewPredJumpOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const; @@ -424,7 +433,6 @@ public: unsigned getSize(const MachineInstr &MI) const; uint64_t getType(const MachineInstr &MI) const; unsigned getUnits(const MachineInstr &MI) const; - unsigned getValidSubTargets(const unsigned Opcode) const; /// getInstrTimingClassLatency - Compute the instruction latency of a given /// instruction using Timing Class information, if available. diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index b5948475e1f76449dfd174cb95d6eb4f621c4542..f82ad6cb3da6ad8b9a598f5bfdec2091202535ec 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -23,10 +23,11 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include #include @@ -58,6 +59,9 @@ cl::opt HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); +static cl::opt SimplifyLimit("hlir-simplify-limit", cl::init(10000), + cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); + static const char *HexagonVolatileMemcpyName = "hexagon_memcpy_forward_vp4cp4n2"; @@ -398,7 +402,7 @@ void Simplifier::Context::cleanup() { for (Value *V : Clones) { Instruction *U = cast(V); if (!U->getParent()) - delete U; + U->deleteValue(); } } @@ -476,7 +480,7 @@ Value *Simplifier::simplify(Context &C) { WorkListType Q; Q.push_back(C.Root); unsigned Count = 0; - const unsigned Limit = 100000; + const unsigned Limit = SimplifyLimit; while (!Q.empty()) { if (Count++ >= Limit) @@ -500,8 +504,7 @@ Value *Simplifier::simplify(Context &C) { Q.push_back(Op); } } - assert(Count < Limit && "Infinite loop in HLIR/simplify?"); - return C.Root; + return Count < Limit ? C.Root : nullptr; } @@ -1206,10 +1209,9 @@ bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, if (!T) return false; - unsigned BW = T->getBitWidth(); - APInt K0(BW, 0), K1(BW, 0); - computeKnownBits(V, K0, K1, DL); - return K0.countLeadingOnes() >= IterCount; + KnownBits Known(T->getBitWidth()); + computeKnownBits(V, Known, DL); + return Known.countMinLeadingZeros() >= IterCount; } @@ -1420,7 +1422,7 @@ bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { for (auto &I : *LoopB) - if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT)) + if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT})) I.replaceAllUsesWith(SV); for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) { @@ -1742,7 +1744,8 @@ bool PolynomialMultiplyRecognize::recognize() { // wide as the target's pmpy instruction. if (!promoteTypes(LoopB, ExitB)) return false; - convertShiftsToLeft(LoopB, ExitB, IterCount); + if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) + return false; cleanupLoopBody(LoopB); } @@ -2044,7 +2047,7 @@ CleanupAndExit: SCEV::FlagNUW); Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); if (Instruction *In = dyn_cast(NumBytes)) - if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT)) + if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) NumBytes = Simp; CallInst *NewCall; @@ -2156,7 +2159,7 @@ CleanupAndExit: Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, MemmoveB->getTerminator()); if (Instruction *In = dyn_cast(NumWords)) - if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT)) + if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) NumWords = Simp; Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp index 7189b5a52c4242cc15a46fea0167e80c60bf669f..072501d8260d6f97c07a42ebceb99c298907973a 100644 --- a/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -39,7 +39,7 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, // Populate the relocation type based on Hexagon target flags // set on an operand MCSymbolRefExpr::VariantKind RelocationType; - switch (MO.getTargetFlags()) { + switch (MO.getTargetFlags() & ~HexagonII::HMOTF_ConstExtended) { default: RelocationType = MCSymbolRefExpr::VK_None; break; diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 20dc9b0da1dba84b9e403e0dd2e03dd62982108f..4602de979024aff5361cdb8d56b7761cea716300 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -563,40 +563,33 @@ void ConvergingVLIWScheduler::readyQueueVerboseDump( } #endif -/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = nullptr; - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - SUnit &Pred = *I->getSUnit(); - if (!Pred.isScheduled) { - // We found an available, but not scheduled, predecessor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return nullptr; - OnlyAvailablePred = &Pred; - } +/// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { + if (SU->NumPredsLeft == 0) + return false; + + for (auto &Pred : SU->Preds) { + // We found an available, but not scheduled, predecessor. + if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) + return false; } - return OnlyAvailablePred; + + return true; } -/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor -/// of SU, return it, otherwise return null. -static SUnit *getSingleUnscheduledSucc(SUnit *SU) { - SUnit *OnlyAvailableSucc = nullptr; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - SUnit &Succ = *I->getSUnit(); - if (!Succ.isScheduled) { - // We found an available, but not scheduled, successor. If it's the - // only one we have found, keep track of it... otherwise give up. - if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) - return nullptr; - OnlyAvailableSucc = &Succ; - } +/// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor +/// of SU, return true (we may have duplicates) +static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { + if (SU->NumSuccsLeft == 0) + return false; + + for (auto &Succ : SU->Succs) { + // We found an available, but not scheduled, successor. + if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) + return false; } - return OnlyAvailableSucc; + return true; } // Constants used to denote relative importance of @@ -673,12 +666,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Count the number of nodes that // this node is the sole unscheduled node for. for (const SDep &SI : SU->Succs) - if (getSingleUnscheduledPred(SI.getSUnit()) == SU) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? for (const SDep &PI : SU->Preds) - if (getSingleUnscheduledSucc(PI.getSUnit()) == SU) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); @@ -744,7 +737,7 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Give less preference to an instruction that will cause a stall with // an instruction in the previous packet. - if (QII.isV60VectorInstruction(Instr)) { + if (QII.isHVXVec(Instr)) { // Check for stalls in the previous packet. if (Q.getID() == TopQID) { for (auto J : Top.ResourceModel->OldPacket) diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index dc10028c0424446791b0be529dbaa08361fbf116..810abf38863dddc60e187c7924b9d714c352ff1d 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -32,14 +32,10 @@ using namespace llvm; namespace llvm { -//===----------------------------------------------------------------------===// -// ConvergingVLIWScheduler - Implementation of the standard -// MachineSchedStrategy. -//===----------------------------------------------------------------------===// class VLIWResourceModel { /// ResourcesModel - Represents VLIW state. - /// Not limited to VLIW targets per say, but assumes + /// Not limited to VLIW targets per se, but assumes /// definition of DFA by a target. DFAPacketizer *ResourcesModel; @@ -110,6 +106,11 @@ public: void schedule() override; }; +//===----------------------------------------------------------------------===// +// ConvergingVLIWScheduler - Implementation of the standard +// MachineSchedStrategy. +//===----------------------------------------------------------------------===// + /// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics /// to balance the schedule. class ConvergingVLIWScheduler : public MachineSchedStrategy { diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index f87a1b8e424dca2da8b0fca689907fb6d3b2b98f..f80e0ef9e39fd71184947e2ef1c80f784041b180 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -14,8 +14,8 @@ def f64Imm : Operand { let ParserMatchClass = f64ImmOperand; } def s8_0Imm64Pred : PatLeaf<(i64 imm), [{ return isInt<8>(N->getSExtValue()); }]>; def s9_0ImmOperand : AsmOperandClass { let Name = "s9_0Imm"; } def s9_0Imm : Operand { let ParserMatchClass = s9_0ImmOperand; } -def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; let RenderMethod = "addSignedImmOperands"; } -def s23_2Imm : Operand { let ParserMatchClass = s23_2ImmOperand; } +def s27_2ImmOperand : AsmOperandClass { let Name = "s27_2Imm"; let RenderMethod = "addSignedImmOperands"; } +def s27_2Imm : Operand { let ParserMatchClass = s27_2ImmOperand; } def r32_0ImmPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); return isInt<32>(v); diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp index be50288849ca7fbc6f1d2b243750aa1d60d165ae..27b40f134b1f41c8ea8f3b958aee01e86e66ca60 100644 --- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -35,7 +35,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include static cl::opt CodeGrowthLimit("hexagon-amode-growth-limit", cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode " @@ -45,10 +44,8 @@ using namespace llvm; using namespace rdf; namespace llvm { - FunctionPass *createHexagonOptAddrMode(); - void initializeHexagonOptAddrModePass(PassRegistry &); - + void initializeHexagonOptAddrModePass(PassRegistry&); } // end namespace llvm namespace { @@ -59,10 +56,7 @@ public: HexagonOptAddrMode() : MachineFunctionPass(ID), HII(nullptr), MDT(nullptr), DFG(nullptr), - LV(nullptr) { - PassRegistry &R = *PassRegistry::getPassRegistry(); - initializeHexagonOptAddrModePass(R); - } + LV(nullptr) {} StringRef getPassName() const override { return "Optimize addressing mode of load/store"; @@ -84,7 +78,6 @@ private: MachineDominatorTree *MDT; DataFlowGraph *DFG; DataFlowGraph::DefStackMap DefM; - std::map> RDefMap; Liveness *LV; MISetType Deleted; @@ -99,8 +92,6 @@ private: void getAllRealUses(NodeAddr SN, NodeList &UNodeList); bool allValidCandidates(NodeAddr SA, NodeList &UNodeList); short getBaseWithLongOffset(const MachineInstr &MI) const; - void updateMap(NodeAddr IA); - bool constructDefMap(MachineBasicBlock *B); bool changeStore(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum); bool changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum); @@ -112,11 +103,11 @@ private: char HexagonOptAddrMode::ID = 0; -INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "opt-amode", +INITIALIZE_PASS_BEGIN(HexagonOptAddrMode, "amode-opt", "Optimize addressing mode", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) -INITIALIZE_PASS_END(HexagonOptAddrMode, "opt-amode", "Optimize addressing mode", +INITIALIZE_PASS_END(HexagonOptAddrMode, "amode-opt", "Optimize addressing mode", false, false) bool HexagonOptAddrMode::hasRepForm(MachineInstr &MI, unsigned TfrDefR) { @@ -173,8 +164,11 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr AddAslSN, for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr UA = *I; NodeAddr IA = UA.Addr->getOwner(*DFG); - if ((UA.Addr->getFlags() & NodeAttrs::PhiRef) || - RDefMap[OffsetRR][IA.Id] != OffsetRegRD) + if (UA.Addr->getFlags() & NodeAttrs::PhiRef) + return false; + NodeAddr AA = LV->getNearestAliasedRef(OffsetRR, IA); + if ((DFG->IsDef(AA) && AA.Id != OffsetRegRD) || + AA.Addr->getReachingDef() != OffsetRegRD) return false; MachineInstr &UseMI = *NodeAddr(IA).Addr->getCode(); @@ -259,7 +253,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr SA, << Print(phiUse, *DFG) << "\n"); if (!phiUse.empty()) { for (auto I : phiUse) { - if (DR.Reg != I.first) + if (!DFG->getPRI().alias(RegisterRef(I.first), DR)) continue; auto phiUseSet = I.second; for (auto phiUI : phiUseSet) { @@ -486,14 +480,14 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr AddAslUN, MIB.add(AddAslMI->getOperand(2)); MIB.add(AddAslMI->getOperand(3)); const GlobalValue *GV = ImmOp.getGlobal(); - MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(), + MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm()+ImmOp.getOffset(), ImmOp.getTargetFlags()); OpStart = 3; } else if (UseMID.mayStore()) { MIB.add(AddAslMI->getOperand(2)); MIB.add(AddAslMI->getOperand(3)); const GlobalValue *GV = ImmOp.getGlobal(); - MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(), + MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm()+ImmOp.getOffset(), ImmOp.getTargetFlags()); MIB.add(UseMI->getOperand(2)); OpStart = 3; @@ -597,47 +591,10 @@ bool HexagonOptAddrMode::processBlock(NodeAddr BA) { return Changed; } -void HexagonOptAddrMode::updateMap(NodeAddr IA) { - RegisterSet RRs; - for (NodeAddr RA : IA.Addr->members(*DFG)) - RRs.insert(RA.Addr->getRegRef(*DFG)); - bool Common = false; - for (auto &R : RDefMap) { - if (!RRs.count(R.first)) - continue; - Common = true; - break; - } - if (!Common) - return; - - for (auto &R : RDefMap) { - auto F = DefM.find(R.first.Reg); - if (F == DefM.end() || F->second.empty()) - continue; - R.second[IA.Id] = F->second.top()->Id; - } -} - -bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) { - bool Changed = false; - auto BA = DFG->getFunc().Addr->findBlock(B, *DFG); - DFG->markBlock(BA.Id, DefM); - - for (NodeAddr IA : BA.Addr->members(*DFG)) { - updateMap(IA); - DFG->pushAllDefs(IA, DefM); - } - - MachineDomTreeNode *N = MDT->getNode(B); - for (auto I : *N) - Changed |= constructDefMap(I->getBlock()); - - DFG->releaseBlock(BA.Id, DefM); - return Changed; -} - bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + bool Changed = false; auto &HST = MF.getSubtarget(); auto &MRI = MF.getRegInfo(); @@ -655,8 +612,6 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { L.computePhiInfo(); LV = &L; - constructDefMap(&DFG->getMF().front()); - Deleted.clear(); NodeAddr FA = DFG->getFunc(); DEBUG(dbgs() << "==== [RefMap#]=====:\n " diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index b8c3bf0745cee53214d54d81754f0e2fa86e6d49..689419638f54677852b6799ddace3b09a6644102 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -1,13 +1,14 @@ +//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + // Pattern fragment that combines the value type and the register class // into a single parameter. -// The pat frags in the definitions below need to have a named register, -// otherwise i32 will be assumed regardless of the register class. The -// name of the register does not matter. -def I1 : PatLeaf<(i1 PredRegs:$R)>; -def I32 : PatLeaf<(i32 IntRegs:$R)>; -def I64 : PatLeaf<(i64 DoubleRegs:$R)>; -def F32 : PatLeaf<(f32 IntRegs:$R)>; -def F64 : PatLeaf<(f64 DoubleRegs:$R)>; // Pattern fragments to extract the low and high subregisters from a // 64-bit value. @@ -353,7 +354,7 @@ def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; -def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), +def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1), (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; @@ -390,48 +391,47 @@ def: T_MType_acc_pat3 ; def: T_MType_acc_pat3 ; def: T_MType_acc_pat3 ; +// This complex pattern is really only to detect various forms of +// sign-extension i32->i64. The selected value will be of type i64 +// whose low word is the value being extended. The high word is +// unspecified. +def Usxtw : ComplexPattern; + def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; -def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>; def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; +def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; -// Return true if for a 32 to 64-bit sign-extended load. -def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{ - LoadSDNode *LD = dyn_cast(N); - if (!LD) - return false; - return LD->getExtensionType() == ISD::SEXTLOAD && - LD->getMemoryVT().getScalarType() == MVT::i32; -}]>; - -def: Pat<(mul (Aext64 I32:$src1), (Aext64 I32:$src2)), - (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(i32 (trunc (sra (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(i32 (trunc (srl (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(mul (Sext64 I32:$src1), (Sext64 I32:$src2)), - (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), + (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; -def: Pat<(mul Sext64Ld:$src1, Sext64Ld:$src2), - (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>; +def: Pat<(mul Sext64:$Rs, Sext64:$Rt), + (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; // Multiply and accumulate, use full result. // Rxx[+-]=mpy(Rs,Rt) -def: Pat<(add I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), - (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(sub I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))), - (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(add I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(add I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), - (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(add I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(sub I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -def: Pat<(sub I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))), - (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; +def: Pat<(sub I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; class Storepi_pat @@ -553,7 +553,8 @@ def: Storexm_simple_pat; def: Storexm_simple_pat; def: Storexm_simple_pat; -def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>; +def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; +def: Pat <(i64 (sext_inreg I64:$src, i32)), (A2_sxtw (LoReg I64:$src))>; def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src), (A2_abs IntRegs:$src)>; @@ -687,6 +688,8 @@ def I32toI1: OutPatFrag<(ops node:$Rs), defm: Storexm_pat; def: Storexm_simple_pat; +def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), + (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>; def: Pat<(sra I64:$src, u6_0ImmPred:$u6), (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; def: Pat<(srl I64:$src, u6_0ImmPred:$u6), @@ -722,7 +725,8 @@ def: Pat<(i1 0), (PS_false)>; def: Pat<(i1 1), (PS_true)>; // Pseudo instructions. -def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -740,8 +744,8 @@ def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def: Pat<(callseq_start timm:$amt), - (ADJCALLSTACKDOWN imm:$amt)>; +def: Pat<(callseq_start timm:$amt, timm:$amt2), + (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>; def: Pat<(callseq_end timm:$amt1, timm:$amt2), (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; @@ -798,27 +802,19 @@ def: Pat<(i64 (sext_inreg I64:$src1, i16)), def: Pat<(i64 (sext_inreg I64:$src1, i8)), (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; -// We want to prevent emitting pnot's as much as possible. -// Map brcond with an unsupported setcc to a J2_jumpf. -def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2), - bb:$offset)>; - -def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)), - bb:$offset), - (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$src1, bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset), + (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>; +def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset), + (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset), + (J2_jumpf PredRegs:$Pu, bb:$offset)>; +def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset), + (J2_jumpt PredRegs:$Pu, bb:$offset)>; // cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)), - bb:$offset)>; +def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset), + (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>; + // Map from a 64-bit select to an emulated 64-bit mux. // Hexagon does not support 64-bit MUXes; so emulate with combines. @@ -872,15 +868,13 @@ def: Pat<(i1 (setne I1:$src1, I1:$src2)), def: Pat<(i1 (setne I64:$src1, I64:$src2)), (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; -// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt). -// rs >= rt -> !(rt > rs). -def : Pat <(i1 (setge I32:$src1, I32:$src2)), - (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>; +// rs >= rt -> rt <= rs +def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), + (C4_cmplte I32:$Rt, I32:$Rs)>; -// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1) let AddedComplexity = 30 in -def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)), - (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; +def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), + (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>; // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). // rss >= rtt -> !(rtt > rss). @@ -1166,8 +1160,8 @@ multiclass MinMax_pats_p { defm: T_MinMax_pats; } -def: Pat<(add (Sext64 I32:$Rs), I64:$Rt), - (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>; +def: Pat<(add Sext64:$Rs, I64:$Rt), + (A2_addsp (LoReg Sext64:$Rs), DoubleRegs:$Rt)>; let AddedComplexity = 200 in { defm: MinMax_pats_p; @@ -1481,16 +1475,22 @@ def i32in8ImmPred: PatLeaf<(i32 imm), [{ return v == (int64_t)(int8_t)v; }]>; +class SmallStackStore + : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ + return isSmallStackStore(cast(N)); +}]>; let AddedComplexity = 40 in { // Even though the offset is not extendable in the store-immediate, we // can still generate the fi# in the base address. If the final offset // is not valid for the instruction, we will replace it with a scratch // register. -// def: Storexm_fi_pat ; -// def: Storexm_fi_pat ; -// def: Storexm_fi_pat ; + def: Storexm_fi_pat , s32_0ImmPred, + ToImmByte, S4_storeirb_io>; + def: Storexm_fi_pat , i16in8ImmPred, + ToImmHalf, S4_storeirh_io>; + def: Storexm_fi_pat , i32in8ImmPred, + ToImmWord, S4_storeiri_io>; // defm: Storexm_fi_add_pat ; @@ -1646,9 +1646,14 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>; def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; - +def: Pat<(add (mul I32:$Rs, I32:$Rt), + (HexagonCONST32 tglobaladdr:$global)), + (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>; def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), @@ -2141,6 +2146,11 @@ let AddedComplexity = 30 in { def: Storea_pat; def: Storea_pat; def: Storea_pat; + def: Storea_pat; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; } let AddedComplexity = 30 in { @@ -2149,6 +2159,19 @@ let AddedComplexity = 30 in { def: Loada_pat; def: Loada_pat; def: Loada_pat; + def: Loada_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; } // Indexed store word - global address. @@ -2719,6 +2742,15 @@ def: Pat<(fneg F64:$Rs), (S2_togglebit_i (HiReg $Rs), 31), isub_hi, (i32 (LoReg $Rs)), isub_lo)>; +def: Pat<(mul I64:$Rss, I64:$Rtt), + (A2_combinew + (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), + (LoReg $Rss), + (HiReg $Rtt)), + (LoReg $Rtt), + (HiReg $Rss)), + (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>; + def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ return isAlignedMemNode(dyn_cast(N)); }]>; diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td index 5a720e794562d6a05e5eed5bad66bdff248f8ec6..93fb688fc1c0aea25ad84253a6b1d63c329daa7c 100644 --- a/lib/Target/Hexagon/HexagonPseudo.td +++ b/lib/Target/Hexagon/HexagonPseudo.td @@ -7,6 +7,15 @@ // //===----------------------------------------------------------------------===// +// The pat frags in the definitions below need to have a named register, +// otherwise i32 will be assumed regardless of the register class. The +// name of the register does not matter. +def I1 : PatLeaf<(i1 PredRegs:$R)>; +def I32 : PatLeaf<(i32 IntRegs:$R)>; +def I64 : PatLeaf<(i64 DoubleRegs:$R)>; +def F32 : PatLeaf<(f32 IntRegs:$R)>; +def F64 : PatLeaf<(f64 DoubleRegs:$R)>; + let PrintMethod = "printGlobalOperand" in { def globaladdress : Operand; def globaladdressExt : Operand; @@ -14,23 +23,29 @@ let PrintMethod = "printGlobalOperand" in { let isPseudo = 1 in { let isCodeGenOnly = 0 in -def A2_iconst : Pseudo<(outs IntRegs:$Rd32), (ins s23_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">; -def DUPLEX_Pseudo : InstHexagon<(outs), (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>; +def A2_iconst : Pseudo<(outs IntRegs:$Rd32), + (ins s27_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">; + +def DUPLEX_Pseudo : InstHexagon<(outs), + (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>; } let isExtendable = 1, opExtendable = 1, opExtentBits = 6, isAsmParserOnly = 1 in -def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst), - (ins s32_0Imm:$src1, s8_0Imm:$src2), - "$dst=combine(#$src1,#$src2)">; +def TFRI64_V2_ext : InstHexagon<(outs DoubleRegs:$dst), + (ins s32_0Imm:$src1, s8_0Imm:$src2), + "$dst=combine(#$src1,#$src2)", [], "", + A2_combineii.Itinerary, TypeALU32_2op>, OpcodeHexagon; // HI/LO Instructions let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in -class REG_IMMED MajOp, bit MinOp> +class REG_IMMED MajOp, bit MinOp, + InstHexagon rootInst> : InstHexagon<(outs IntRegs:$dst), - (ins u16_0Imm:$imm_value), - "$dst"#RegHalf#"=#$imm_value", [], "", ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, OpcodeHexagon { + (ins u16_0Imm:$imm_value), + "$dst"#RegHalf#"=#$imm_value", [], "", + rootInst.Itinerary, rootInst.Type>, OpcodeHexagon { bits<5> dst; bits<32> imm_value; @@ -43,8 +58,8 @@ class REG_IMMED MajOp, bit MinOp> } let isAsmParserOnly = 1 in { - def LO : REG_IMMED<".l", 0b0, 0b001, 0b1>; - def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>; + def LO : REG_IMMED<".l", 0b0, 0b001, 0b1, A2_tfril>; + def HI : REG_IMMED<".h", 0b0, 0b010, 0b1, A2_tfrih>; } let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in { @@ -56,14 +71,16 @@ let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in { let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1 in -def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>; +def PS_true : InstHexagon<(outs PredRegs:$dst), (ins), "", + [(set I1:$dst, 1)], "", C2_orn.Itinerary, TypeCR>; let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, isCodeGenOnly = 1 in -def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>; +def PS_false : InstHexagon<(outs PredRegs:$dst), (ins), "", + [(set I1:$dst, 0)], "", C2_andn.Itinerary, TypeCR>; let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), ".error \"should not emit\" ", []>; let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in @@ -87,10 +104,10 @@ def ENDLOOP1 : Endloop<(outs), (ins b30_2Imm:$offset), let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, opExtendable = 0, hasSideEffects = 0 in -class LOOP_iBase - : CRInst<(outs), (ins brOp:$offset, u10_0Imm:$src2), +class LOOP_iBase + : InstHexagon <(outs), (ins b30_2Imm:$offset, u10_0Imm:$src2), #mnemonic#"($offset,#$src2)", - [], "" , CR_tc_3x_SLOT3> { + [], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon { bits<9> offset; bits<10> src2; @@ -107,10 +124,10 @@ class LOOP_iBase let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, opExtendable = 0, hasSideEffects = 0 in -class LOOP_rBase - : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2), +class LOOP_rBase + : InstHexagon<(outs), (ins b30_2Imm:$offset, IntRegs:$src2), #mnemonic#"($offset,$src2)", - [], "" ,CR_tc_3x_SLOT3> { + [], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon { bits<9> offset; bits<5> src2; @@ -123,27 +140,25 @@ class LOOP_rBase let Inst{4-3} = offset{3-2}; } -multiclass LOOP_ri { - let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in { - def iext: LOOP_iBase; - def rext: LOOP_rBase; - } +let Defs = [SA0, LC0, USR], isCodeGenOnly = 1, isExtended = 1, + opExtendable = 0 in { + def J2_loop0iext : LOOP_iBase<"loop0", J2_loop0i>; + def J2_loop1iext : LOOP_iBase<"loop1", J2_loop1i>; } - -let Defs = [SA0, LC0, USR] in -defm J2_loop0 : LOOP_ri<"loop0">; - // Interestingly only loop0's appear to set usr.lpcfg -let Defs = [SA1, LC1] in -defm J2_loop1 : LOOP_ri<"loop1">; +let Defs = [SA1, LC1], isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in { + def J2_loop0rext : LOOP_rBase<"loop0", J2_loop0r>; + def J2_loop1rext : LOOP_rBase<"loop1", J2_loop1r>; +} let isCall = 1, hasSideEffects = 1, isPredicable = 0, isExtended = 0, isExtendable = 1, opExtendable = 0, isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in class T_Call - : JInst<(outs), (ins a30_2Imm:$dst), - "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> { + : InstHexagon<(outs), (ins a30_2Imm:$dst), + "call " # ExtStr # "$dst", [], "", J2_call.Itinerary, TypeJ>, + OpcodeHexagon { let BaseOpcode = "call"; bits<24> dst; @@ -161,38 +176,24 @@ let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [PC, R31, R6, R7, P0] in def PS_call_stk : T_Call<"">; -let isCall = 1, hasSideEffects = 1, cofMax1 = 1 in -class JUMPR_MISC_CALLR - : JInst<(outs), InputDag, - !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs", - "if ($Pu) callr $Rs"), - "callr $Rs"), - [], "", J_tc_2early_SLOT2> { +// Call, no return. +let isCall = 1, hasSideEffects = 1, cofMax1 = 1, isCodeGenOnly = 1 in +def PS_callr_nr: InstHexagon<(outs), (ins IntRegs:$Rs), + "callr $Rs", [], "", J2_callr.Itinerary, TypeJ>, OpcodeHexagon { bits<5> Rs; bits<2> Pu; - let isPredicated = isPred; - let isPredicatedFalse = isPredNot; + let isPredicatedFalse = 1; let IClass = 0b0101; - let Inst{27-25} = 0b000; - let Inst{24-23} = !if (isPred, 0b10, 0b01); - let Inst{22} = 0; - let Inst{21} = isPredNot; - let Inst{9-8} = !if (isPred, Pu, 0b00); + let Inst{27-21} = 0b0000101; let Inst{20-16} = Rs; - } -let isCodeGenOnly = 1 in { - def PS_callr_nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return. -} - let isCall = 1, hasSideEffects = 1, isExtended = 0, isExtendable = 1, opExtendable = 0, isCodeGenOnly = 1, - BaseOpcode = "PS_call_nr", isExtentSigned = 1, opExtentAlign = 2, - Itinerary = J_tc_2early_SLOT23 in -class Call_nr nbits, bit isPred, bit isFalse, dag iops> + BaseOpcode = "PS_call_nr", isExtentSigned = 1, opExtentAlign = 2 in +class Call_nr nbits, bit isPred, bit isFalse, dag iops, + InstrItinClass itin> : Pseudo<(outs), iops, "">, PredRel { bits<2> Pu; bits<17> dst; @@ -202,16 +203,18 @@ class Call_nr nbits, bit isPred, bit isFalse, dag iops> let isPredicatedFalse = isFalse; } -def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii)>; -//def PS_call_nrt: Call_nr<17, 1, 0, (ins PredRegs:$Pu, s32_0Imm:$dst)>; -//def PS_call_nrf: Call_nr<17, 1, 1, (ins PredRegs:$Pu, s32_0Imm:$dst)>; +def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>; +//def PS_call_nrt: Call_nr<17, 1, 0, (ins PredRegs:$Pu, s32_0Imm:$dst), +// J2_callt.Itinerary>; +//def PS_call_nrf: Call_nr<17, 1, 1, (ins PredRegs:$Pu, s32_0Imm:$dst), +// J2_callf.Itinerary>; let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC], isPredicable = 1, hasSideEffects = 0, InputType = "reg", cofMax1 = 1 in -class T_JMPr +class T_JMPr : InstHexagon<(outs), (ins IntRegs:$dst), "jumpr $dst", [], - "", J_tc_2early_SLOT2, TypeJ>, OpcodeHexagon { + "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon { bits<5> dst; let IClass = 0b0101; @@ -222,12 +225,12 @@ class T_JMPr // A return through builtin_eh_return. let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0, isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in -def EH_RETURN_JMPR : T_JMPr; +def EH_RETURN_JMPR : T_JMPr; // Indirect tail-call. let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0, isTerminator = 1, isCodeGenOnly = 1 in -def PS_tailcall_r : T_JMPr; +def PS_tailcall_r : T_JMPr; // // Direct tail-calls. @@ -259,11 +262,11 @@ class JumpOpcStr { } let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1, hasSideEffects = 0, InputType = "reg", cofMax1 = 1 in -class T_JMPr_c +class T_JMPr_c : InstHexagon<(outs), (ins PredRegs:$src, IntRegs:$dst), CondStr<"$src", !if(PredNot,0,1), isPredNew>.S # JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", - [], "", J_tc_2early_SLOT2, TypeJ>, OpcodeHexagon { + [], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon { let isTaken = isTak; let isPredicatedFalse = PredNot; @@ -280,30 +283,25 @@ class T_JMPr_c let Inst{11} = isPredNew; let Inst{9-8} = src; } -multiclass JMPR_Pred { - def NAME : T_JMPr_c; // not taken - // Predicate new - def NAME#newpt : T_JMPr_c; // taken - def NAME#new : T_JMPr_c; // not taken -} -multiclass JMPR_base { - let BaseOpcode = BaseOp in { - def NAME : T_JMPr; - defm t : JMPR_Pred<0>; - defm f : JMPR_Pred<1>; - } + +let isTerminator = 1, hasSideEffects = 0, isReturn = 1, isCodeGenOnly = 1, + isBarrier = 1, BaseOpcode = "JMPret" in { + def PS_jmpret : T_JMPr, PredNewRel; + def PS_jmprett : T_JMPr_c<0, 0, 0, J2_jumprt>, PredNewRel; + def PS_jmpretf : T_JMPr_c<1, 0, 0, J2_jumprf>, PredNewRel; + def PS_jmprettnew : T_JMPr_c<0, 1, 0, J2_jumprtnew>, PredNewRel; + def PS_jmpretfnew : T_JMPr_c<1, 1, 0, J2_jumprfnew>, PredNewRel; + def PS_jmprettnewpt : T_JMPr_c<0, 1, 1, J2_jumprtnewpt>, PredNewRel; + def PS_jmpretfnewpt : T_JMPr_c<1, 1, 1, J2_jumprfnewpt>, PredNewRel; } -let isTerminator = 1, hasSideEffects = 0, isReturn = 1, isCodeGenOnly = 1, isBarrier = 1 in -defm PS_jmpret : JMPR_base<"JMPret">, PredNewRel; //defm V6_vtran2x2_map : HexagonMapping<(outs VectorRegs:$Vy32, VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, IntRegs:$Rt32), "vtrans2x2(${Vy32},${Vx32},${Rt32})", (V6_vshuff VectorRegs:$Vy32, VectorRegs:$Vx32, VectorRegs:$Vx32in, IntRegs:$Rt32)>; // The reason for the custom inserter is to record all ALLOCA instructions // in MachineFunctionInfo. -let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in -def PS_alloca: InstHexagon<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, u32_0Imm:$A), "", - [], "", ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>; +let Defs = [R29], hasSideEffects = 1 in +def PS_alloca: Pseudo <(outs IntRegs:$Rd), + (ins IntRegs:$Rs, u32_0Imm:$A), "", []>; // Load predicate. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, @@ -319,35 +317,19 @@ def LDriw_mod : LDInst<(outs ModRegs:$dst), (ins IntRegs:$addr, s32_0Imm:$off), ".error \"should not emit\"", []>; -// Vector load -let Predicates = [HasV60T, UseHVX] in -let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in - class V6_LDInst pattern = [], - string cstr = "", InstrItinClass itin = CVI_VM_LD, - IType type = TypeCVI_VM_LD> - : InstHexagon; - -// Vector store -let Predicates = [HasV60T, UseHVX] in -let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in -class V6_STInst pattern = [], - string cstr = "", InstrItinClass itin = CVI_VM_ST, - IType type = TypeCVI_VM_ST> -: InstHexagon; let isCodeGenOnly = 1, isPseudo = 1 in -def PS_pselect : ALU64_rr<(outs DoubleRegs:$Rd), +def PS_pselect: InstHexagon<(outs DoubleRegs:$Rd), (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt), - ".error \"should not emit\" ", []>; + ".error \"should not emit\" ", [], "", A2_tfrpt.Itinerary, TypeALU32_2op>; let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0, isPredicable = 1, isExtendable = 1, opExtendable = 0, isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in -class T_JMP - : JInst_CJUMP_UCJUMP<(outs), (ins b30_2Imm:$dst), - "jump " # ExtStr # "$dst", - [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> { +class T_JMP: InstHexagon<(outs), (ins b30_2Imm:$dst), + "jump $dst", + [], "", J2_jump.Itinerary, TypeJ>, OpcodeHexagon { bits<24> dst; let IClass = 0b0101; @@ -359,16 +341,16 @@ class T_JMP // Restore registers and dealloc return function call. let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in { - def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">; + def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP; let isExtended = 1, opExtendable = 0 in - def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">; + def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP; let Defs = [R14, R15, R28, R29, R30, R31, PC] in { - def RESTORE_DEALLOC_RET_JMP_V4_PIC : T_JMP<"">; + def RESTORE_DEALLOC_RET_JMP_V4_PIC : T_JMP; let isExtended = 1, opExtendable = 0 in - def RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC : T_JMP<"">; + def RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC : T_JMP; } } @@ -413,78 +395,76 @@ let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<"">, PredRel; } -// Vector load/store pseudos - -let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in -class STrivv_template - : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>; +// Vector store pseudos +let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1, + mayStore = 1, hasSideEffects = 0 in +class STrivv_template + : InstHexagon<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), + "", [], "", rootInst.Itinerary, rootInst.Type>; -def PS_vstorerw_ai: STrivv_template, - Requires<[HasV60T,UseHVXSgl]>; -def PS_vstorerwu_ai: STrivv_template, +def PS_vstorerw_ai: STrivv_template, Requires<[HasV60T,UseHVXSgl]>; -def PS_vstorerw_ai_128B: STrivv_template, +def PS_vstorerw_ai_128B: STrivv_template, Requires<[HasV60T,UseHVXDbl]>; -def PS_vstorerwu_ai_128B: STrivv_template, + +def PS_vstorerwu_ai: STrivv_template, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vstorerwu_ai_128B: STrivv_template, Requires<[HasV60T,UseHVXDbl]>; +let isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0 in { + def PS_vstorerq_ai: Pseudo<(outs), + (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs:$Qt), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vstorerq_ai_128B: Pseudo<(outs), + (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs128B:$Qt), "", []>, + Requires<[HasV60T,UseHVXDbl]>; +} -let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in -class LDrivv_template - : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>; +// Vector load pseudos +let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1, + mayLoad = 1, hasSideEffects = 0 in +class LDrivv_template + : InstHexagon<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), + "", [], "", rootInst.Itinerary, rootInst.Type>; -def PS_vloadrw_ai: LDrivv_template, +def PS_vloadrw_ai: LDrivv_template, Requires<[HasV60T,UseHVXSgl]>; -def PS_vloadrwu_ai: LDrivv_template, - Requires<[HasV60T,UseHVXSgl]>; -def PS_vloadrw_ai_128B: LDrivv_template, +def PS_vloadrw_ai_128B: LDrivv_template, Requires<[HasV60T,UseHVXDbl]>; -def PS_vloadrwu_ai_128B: LDrivv_template, + +def PS_vloadrwu_ai: LDrivv_template, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vloadrwu_ai_128B: LDrivv_template, Requires<[HasV60T,UseHVXDbl]>; -// Store vector predicate pseudo. -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, - isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { - def PS_vstorerq_ai : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXSgl]>; - - def PS_vstorerq_ai_128B : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VectorRegs:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXSgl]>; - - def PS_vloadrq_ai : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXDbl]>; - - def PS_vloadrq_ai_128B : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXDbl]>; +let isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in { + def PS_vloadrq_ai: Pseudo<(outs VecPredRegs:$Qd), + (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vloadrq_ai_128B: Pseudo<(outs VecPredRegs128B:$Qd), + (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>, + Requires<[HasV60T,UseHVXDbl]>; } -class VSELInst pattern = [], - string cstr = "", InstrItinClass itin = CVI_VA_DV, - IType type = TypeCVI_VA_DV> - : InstHexagon; -let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in { - def PS_vselect: VSELInst<(outs VectorRegs:$dst), - (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), "", []>, - Requires<[HasV60T,UseHVXSgl]>; - def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst), - (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3), - "", []>, Requires<[HasV60T,UseHVXDbl]>; - def PS_wselect: VSELInst<(outs VecDblRegs:$dst), - (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), "", []>, - Requires<[HasV60T,UseHVXSgl]>; - def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst), - (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3), - "", []>, Requires<[HasV60T,UseHVXDbl]>; -} +let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in +class VSELInst + : InstHexagon; + +def PS_vselect: VSELInst<(outs VectorRegs:$dst), + (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), + V6_vcmov>, Requires<[HasV60T,UseHVXSgl]>; +def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst), + (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3), + V6_vcmov>, Requires<[HasV60T,UseHVXDbl]>; + +def PS_wselect: VSELInst<(outs VecDblRegs:$dst), + (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), + V6_vccombine>, Requires<[HasV60T,UseHVXSgl]>; +def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst), + (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3), + V6_vccombine>, Requires<[HasV60T,UseHVXDbl]>; // Store predicate. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, @@ -501,8 +481,10 @@ def STriw_mod : STInst<(outs), let isExtendable = 1, opExtendable = 1, opExtentBits = 6, isAsmParserOnly = 1 in -def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64_0Imm:$src1), - "$dst = #$src1">; +def TFRI64_V4 : InstHexagon<(outs DoubleRegs:$dst), + (ins u64_0Imm:$src1), + "$dst = #$src1", [], "", + A2_combineii.Itinerary, TypeALU32_2op>, OpcodeHexagon; // Hexagon doesn't have a vector multiply with C semantics. // Instead, generate a pseudo instruction that gets expaneded into two diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2a1bb63af78924bb81d2321ab4d533671cf8d1c1..1fc157900ed5d6193ec5e99fa911a1416863c09b 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -50,11 +50,6 @@ bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const { R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1; } -bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const { - return Hexagon::R16 <= Reg && Reg <= Hexagon::R27; -} - - const MCPhysReg * HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF, const TargetRegisterClass *RC) const { diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 8a3f175b84881c7f25479566f7f04fffe85aff8a..5f65fad2cc0424390e9e71db96f8537389fca638 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -77,7 +77,6 @@ public: unsigned getFirstCallerSavedNonParamReg() const; bool isEHReturnCalleeSaveReg(unsigned Reg) const; - bool isCalleeSaveReg(unsigned Reg) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index 93ab2f73120716a433c4a9886cfdd71d9ba9a141..45dbb3a6d2184b7ac1d925afe957988a2c21d431 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -122,12 +122,6 @@ let Namespace = "Hexagon" in { def P2 : Rp<2, "p2">, DwarfRegNum<[65]>; def P3 : Rp<3, "p3">, DwarfRegNum<[66]>; - // Modifier registers. - // C6 and C7 can also be M0 and M1, but register names must be unique, even - // if belonging to different register classes. - def M0 : Mx<0, "m0">, DwarfRegNum<[72]>; - def M1 : Mx<1, "m1">, DwarfRegNum<[73]>; - // Fake register to represent USR.OVF bit. Artihmetic/saturating instruc- // tions modify this bit, and multiple such instructions are allowed in the // same packet. We need to ignore output dependencies on this bit, but not @@ -149,15 +143,15 @@ let Namespace = "Hexagon" in { // When defining more Cn registers, make sure to explicitly mark them // as reserved in HexagonRegisterInfo.cpp. def C5: Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; - def C6: Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>; - def C7: Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>; + def M0: Rc<6, "m0", ["c6"]>, DwarfRegNum<[73]>; + def M1: Rc<7, "m1", ["c7"]>, DwarfRegNum<[74]>; // Define C8 separately and make it aliased with USR. // The problem is that USR has subregisters (e.g. overflow). If USR was // specified as a subregister of C9_8, it would imply that subreg_overflow // and isub_lo can be composed, which leads to all kinds of issues // with lane masks. def C8: Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>; - def PC: Rc<9, "pc">, DwarfRegNum<[76]>; + def PC: Rc<9, "pc", ["c9"]>, DwarfRegNum<[76]>; def UGP: Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>; def GP: Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>; def CS0: Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>; @@ -177,7 +171,7 @@ let Namespace = "Hexagon" in { def C1_0: Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>; def C3_2: Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>; def C5_4: Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>; - def C7_6: Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>; + def C7_6: Rcc<6, "c7:6", [M0, M1], ["m1:0"]>, DwarfRegNum<[72]>; // Use C8 instead of USR as a subregister of C9_8. def C9_8: Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>; def C11_10: Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>; @@ -280,8 +274,8 @@ def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>; let Size = 32, isAllocatable = 0 in def CtrRegs : RegisterClass<"Hexagon", [i32], 32, - (add LC0, SA0, LC1, SA1, P3_0, C5, C6, C7, - C8, PC, UGP, GP, CS0, CS1, UPCYCLELO, UPCYCLEHI, + (add LC0, SA0, LC1, SA1, P3_0, C5, C8, PC, UGP, GP, CS0, CS1, + UPCYCLELO, UPCYCLEHI, FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI, M0, M1, USR)>; diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index 9b5fbea04d18b3c6687a064a7515a7a8ad1a5406..ffee03e726397344a502b5b40593e74107bb8212 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -7,6 +7,55 @@ // //===----------------------------------------------------------------------===// +def Hex_FWD : Bypass; +def HVX_FWD : Bypass; + +// Functional Units. +def SLOT0 : FuncUnit; +def SLOT1 : FuncUnit; +def SLOT2 : FuncUnit; +def SLOT3 : FuncUnit; +// Endloop is a pseudo instruction that is encoded with 2 bits in a packet +// rather than taking an execution slot. This special unit is needed +// to schedule an ENDLOOP with 4 other instructions. +def SLOT_ENDLOOP: FuncUnit; + +// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec". +def CVI_ST : FuncUnit; +def CVI_XLANE : FuncUnit; +def CVI_SHIFT : FuncUnit; +def CVI_MPY0 : FuncUnit; +def CVI_MPY1 : FuncUnit; +def CVI_LD : FuncUnit; + +// Combined functional units. +def CVI_XLSHF : FuncUnit; +def CVI_MPY01 : FuncUnit; +def CVI_ALL : FuncUnit; +def CVI_ALL_NOMEM : FuncUnit; + +// Combined functional unit data. +def HexagonComboFuncsV60 : + ComboFuncUnits<[ + ComboFuncData, + ComboFuncData, + ComboFuncData, + ComboFuncData + ]>; + +// Itinerary classes. +def PSEUDO : InstrItinClass; +def PSEUDOM : InstrItinClass; +def DUPLEX : InstrItinClass; +def tc_ENDLOOP : InstrItinClass; + +//===----------------------------------------------------------------------===// +// Auto-generated itinerary classes +//===----------------------------------------------------------------------===// +include "HexagonDepIICScalar.td" +include "HexagonDepIICHVX.td" + //===----------------------------------------------------------------------===// // V4 Machine Info + //===----------------------------------------------------------------------===// @@ -20,9 +69,9 @@ include "HexagonScheduleV55.td" // V60 Machine Info - //===----------------------------------------------------------------------===// -include "HexagonScheduleV60.td" include "HexagonIICScalar.td" include "HexagonIICHVX.td" +include "HexagonScheduleV60.td" //===----------------------------------------------------------------------===// // V62 Machine Info + diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index 880cc0a02b6a570fbe238a3092c2072c87f0aa93..69b704a805b82d5ba85906c429ed8e09f9a8428b 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -7,200 +7,31 @@ // //===----------------------------------------------------------------------===// -// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. -// This file describes that machine information. - -// -// |===========|==================================================| -// | PIPELINE | Instruction Classes | -// |===========|==================================================| -// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | -// |-----------|--------------------------------------------------| -// | SLOT1 | LD ST ALU32 | -// |-----------|--------------------------------------------------| -// | SLOT2 | XTYPE ALU32 J JR | -// |-----------|--------------------------------------------------| -// | SLOT3 | XTYPE ALU32 J CR | -// |===========|==================================================| - -// Functional Units. -def SLOT0 : FuncUnit; -def SLOT1 : FuncUnit; -def SLOT2 : FuncUnit; -def SLOT3 : FuncUnit; -// Endloop is a pseudo instruction that is encoded with 2 bits in a packet -// rather than taking an execution slot. This special unit is needed -// to schedule an ENDLOOP with 4 other instructions. -def SLOT_ENDLOOP: FuncUnit; - -// Itinerary classes. -def PSEUDO : InstrItinClass; -def PSEUDOM : InstrItinClass; -// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. -def DUPLEX : InstrItinClass; -def PREFIX : InstrItinClass; -def COMPOUND_CJ_ARCHDEPSLOT : InstrItinClass; -def COMPOUND : InstrItinClass; +def LD_tc_ld_SLOT01 : InstrItinClass; +def ST_tc_st_SLOT01 : InstrItinClass; + +class HexagonV4PseudoItin { + list V4PseudoItin_list = [ + InstrItinData]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>]>, + InstrItinData]>, + InstrItinData]> + ]; +} -def ALU32_2op_tc_1_SLOT0123 : InstrItinClass; -def ALU32_2op_tc_2early_SLOT0123 : InstrItinClass; -def ALU32_3op_tc_2early_SLOT0123 : InstrItinClass; -def ALU32_3op_tc_1_SLOT0123 : InstrItinClass; -def ALU32_3op_tc_2_SLOT0123 : InstrItinClass; -def ALU32_ADDI_tc_1_SLOT0123 : InstrItinClass; -def ALU64_tc_1_SLOT23 : InstrItinClass; -def ALU64_tc_2_SLOT23 : InstrItinClass; -def ALU64_tc_2early_SLOT23 : InstrItinClass; -def ALU64_tc_3x_SLOT23 : InstrItinClass; -def CR_tc_2_SLOT3 : InstrItinClass; -def CR_tc_2early_SLOT23 : InstrItinClass; -def CR_tc_2early_SLOT3 : InstrItinClass; -def CR_tc_3x_SLOT23 : InstrItinClass; -def CR_tc_3x_SLOT3 : InstrItinClass; -def J_tc_2early_SLOT23 : InstrItinClass; -def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass; -def J_tc_2early_SLOT2 : InstrItinClass; -def LD_tc_ld_SLOT01 : InstrItinClass; -def LD_tc_ld_pi_SLOT01 : InstrItinClass; -def LD_tc_ld_SLOT0 : InstrItinClass; -def LD_tc_3or4stall_SLOT0 : InstrItinClass; -def M_tc_2_SLOT23 : InstrItinClass; -def M_tc_2_acc_SLOT23 : InstrItinClass; -def M_tc_3_SLOT23 : InstrItinClass; -def M_tc_1_SLOT23 : InstrItinClass; -def M_tc_3x_SLOT23 : InstrItinClass; -def M_tc_3x_acc_SLOT23 : InstrItinClass; -def M_tc_3or4x_SLOT23 : InstrItinClass; -def M_tc_3or4x_acc_SLOT23 : InstrItinClass; -def ST_tc_st_SLOT01 : InstrItinClass; -def ST_tc_st_pi_SLOT01 : InstrItinClass; -def ST_tc_st_SLOT0 : InstrItinClass; -def ST_tc_st_pi_SLOT0 : InstrItinClass; -def ST_tc_ld_SLOT0 : InstrItinClass; -def ST_tc_3stall_SLOT0 : InstrItinClass; -def S_2op_tc_1_SLOT23 : InstrItinClass; -def S_2op_tc_2_SLOT23 : InstrItinClass; -def S_2op_tc_2early_SLOT23 : InstrItinClass; -def S_2op_tc_3or4x_SLOT23 : InstrItinClass; -def S_3op_tc_1_SLOT23 : InstrItinClass; -def S_3op_tc_2_SLOT23 : InstrItinClass; -def S_3op_tc_2early_SLOT23 : InstrItinClass; -def S_3op_tc_3_SLOT23 : InstrItinClass; -def S_3op_tc_3x_SLOT23 : InstrItinClass; -def NCJ_tc_3or4stall_SLOT0 : InstrItinClass; -def V2LDST_tc_ld_SLOT01 : InstrItinClass; -def V2LDST_tc_st_SLOT0 : InstrItinClass; -def V2LDST_tc_st_SLOT01 : InstrItinClass; -def V4LDST_tc_ld_SLOT01 : InstrItinClass; -def V4LDST_tc_st_SLOT0 : InstrItinClass; -def V4LDST_tc_st_SLOT01 : InstrItinClass; -def J_tc_2early_SLOT0123 : InstrItinClass; -def EXTENDER_tc_1_SLOT0123 : InstrItinClass; -def S_3op_tc_3stall_SLOT23 : InstrItinClass; +def HexagonV4ItinList : DepScalarItinV4, HexagonV4PseudoItin { + list V4Itin_list = [ + InstrItinData]>, + InstrItinData]> + ]; + list ItinList = + !listconcat(V4Itin_list, DepScalarItinV4_list, V4PseudoItin_list); +} def HexagonItinerariesV4 : - ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ - // ALU32 - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // ALU64 - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // CR -> System - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // Jump (conditional/unconditional/return etc) - // CR - InstrItinData]>, - InstrItinData]>, - // J - InstrItinData]>, - InstrItinData]>, - // JR - InstrItinData]>, - - //Load - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // M - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // Store - // ST - InstrItinData]>, - InstrItinData]>, - // ST0 - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // S - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // SYS - InstrItinData]>, - - // New Value Compare Jump - InstrItinData]>, - - // Mem ops - MEM_V4 - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - InstrItinData]>, - - // ENDLOOP - InstrItinData]>, - - // Extender/PREFIX - InstrItinData]>, - - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData, - InstrStage<1, [SLOT2, SLOT3]>]> - ]>; + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], + [Hex_FWD], HexagonV4ItinList.ItinList>; def HexagonModelV4 : SchedMachineModel { // Max issue per cycle == bundle width. diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td index 06cbcb16abb7ba87b9cb6b07a31fc01ec5d5dd1c..ca738be5d6ef6266dc017221f55670dbf9f432b7 100644 --- a/lib/Target/Hexagon/HexagonScheduleV55.td +++ b/lib/Target/Hexagon/HexagonScheduleV55.td @@ -1,4 +1,4 @@ -//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=// +//=-HexagonScheduleV55.td - HexagonV55 Scheduling Definitions -*- tablegen -*=// // // The LLVM Compiler Infrastructure // @@ -7,190 +7,33 @@ // //===----------------------------------------------------------------------===// -// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine. -// This file describes that machine information. -// -// |===========|==================================================| -// | PIPELINE | Instruction Classes | -// |===========|==================================================| -// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM | -// |-----------|--------------------------------------------------| -// | SLOT1 | LD ST ALU32 | -// |-----------|--------------------------------------------------| -// | SLOT2 | XTYPE ALU32 J JR | -// |-----------|--------------------------------------------------| -// | SLOT3 | XTYPE ALU32 J CR | -// |===========|==================================================| +class HexagonV55PseudoItin { + list V55PseudoItin_list = [ + InstrItinData], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]>, + InstrItinData], [1, 1, 1]>, + InstrItinData], [2]> + ]; +} -def CJ_tc_1_SLOT23 : InstrItinClass; -def CJ_tc_2early_SLOT23 : InstrItinClass; -def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass; -def COPROC_VX_vtc_long_SLOT23 : InstrItinClass; -def COPROC_VX_vtc_SLOT23 : InstrItinClass; -def J_tc_3stall_SLOT2 : InstrItinClass; -def MAPPING_tc_1_SLOT0123 : InstrItinClass; -def M_tc_3stall_SLOT23 : InstrItinClass; +def HexagonV55ItinList : DepScalarItinV55, + HexagonV55PseudoItin { + list V55Itin_list = [ + InstrItinData], [2, 1]>, + InstrItinData], + [1, 1, 1]> + ]; + list ItinList = + !listconcat(V55Itin_list, DepScalarItinV55_list, + V55PseudoItin_list); +} def HexagonItinerariesV55 : - ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ - // ALU32 - InstrItinData], [1, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [1, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [1, 1, 1]>, - - // ALU64 - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - - // CR -> System - InstrItinData], [2, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [3, 1, 1]>, - - // Jump (conditional/unconditional/return etc) - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], [2, 1, 1, 1]>, - - // JR - InstrItinData], [2, 1, 1]>, - InstrItinData], [3, 1, 1]>, - - // Extender - InstrItinData], [1, 1, 1]>, - - // Load - InstrItinData], - [2, 1]>, - InstrItinData], - [2, 1]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1]>, - - // M - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - - // Store - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [2, 1, 1]>, - InstrItinData], [1, 1, 1]>, - InstrItinData], [1, 1, 1]>, - - // S - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [2, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - InstrItinData], - [3, 1, 1]>, - - // New Value Compare Jump - InstrItinData], - [3, 1, 1, 1]>, - - // Mem ops - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [2, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - InstrItinData], - [3, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - - // Endloop - InstrItinData], - [2]>, - - // Vector - InstrItinData], [2, 1, 1, 1]>, - InstrItinData], [3, 1, 1, 1]>, - InstrItinData], [3, 1, 1, 1]>, - InstrItinData], - [1, 1, 1, 1]>, - - // Misc - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData], - [1, 1, 1]>, - InstrItinData, - InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]> - ]>; + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], + [Hex_FWD], HexagonV55ItinList.ItinList>; def HexagonModelV55 : SchedMachineModel { // Max issue per cycle == bundle width. @@ -201,5 +44,5 @@ def HexagonModelV55 : SchedMachineModel { } //===----------------------------------------------------------------------===// -// Hexagon V4 Resource Definitions - +// Hexagon V55 Resource Definitions - //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td index 63784710f52b684a6b83c7b160aaabc34401c3a2..a2544c92a72c05299b1fca4363bba1a2c0f95ab1 100644 --- a/lib/Target/Hexagon/HexagonScheduleV60.td +++ b/lib/Target/Hexagon/HexagonScheduleV60.td @@ -7,61 +7,6 @@ // //===----------------------------------------------------------------------===// -// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec". -def CVI_ST : FuncUnit; -def CVI_XLANE : FuncUnit; -def CVI_SHIFT : FuncUnit; -def CVI_MPY0 : FuncUnit; -def CVI_MPY1 : FuncUnit; -def CVI_LD : FuncUnit; - -// Combined functional units. -def CVI_XLSHF : FuncUnit; -def CVI_MPY01 : FuncUnit; -def CVI_ALL : FuncUnit; -def CVI_XLMPY0 : FuncUnit; -def CVI_SHFMPY1: FuncUnit; - -// Combined functional unit data. -def HexagonComboFuncsV60 : - ComboFuncUnits<[ - ComboFuncData, - ComboFuncData, - ComboFuncData, - ComboFuncData, - ComboFuncData - ]>; - -// Note: When adding additional vector scheduling classes, add the -// corresponding methods to the class HexagonInstrInfo. -def CVI_VA : InstrItinClass; -def CVI_VA_DV : InstrItinClass; -def CVI_VX_LONG : InstrItinClass; -def CVI_VX_LATE : InstrItinClass; -def CVI_VX : InstrItinClass; -def CVI_VX_DV_LONG : InstrItinClass; -def CVI_VX_DV : InstrItinClass; -def CVI_VX_DV_SLOT2 : InstrItinClass; -def CVI_VX_DV_SLOT2_LONG_EARLY : InstrItinClass; -def CVI_VP : InstrItinClass; -def CVI_VP_LONG : InstrItinClass; -def CVI_VP_VS_EARLY : InstrItinClass; -def CVI_VP_VS_LONG_EARLY : InstrItinClass; -def CVI_VP_VS_LONG : InstrItinClass; -def CVI_VP_VS : InstrItinClass; -def CVI_VP_DV : InstrItinClass; -def CVI_VS : InstrItinClass; -def CVI_VINLANESAT : InstrItinClass; -def CVI_VM_LD : InstrItinClass; -def CVI_VM_TMP_LD : InstrItinClass; -def CVI_VM_CUR_LD : InstrItinClass; -def CVI_VM_VP_LDU : InstrItinClass; -def CVI_VM_ST : InstrItinClass; -def CVI_VM_NEW_ST : InstrItinClass; -def CVI_VM_STU : InstrItinClass; -def CVI_HIST : InstrItinClass; -def CVI_VA_EXT : InstrItinClass; // There are four SLOTS (four parallel pipelines) in Hexagon V60 machine. // This file describes that machine information. @@ -108,196 +53,20 @@ def CVI_VA_EXT : InstrItinClass; // S0123| CVI_VA_EXT Extract | // |=====================================================================| +def HexagonV60ItinList : DepScalarItinV60, ScalarItin, + DepHVXItinV60, + HVXItin, PseudoItin { + list ItinList = + !listconcat(DepScalarItinV60_list, ScalarItin_list, + DepHVXItinV60_list, HVXItin_list, PseudoItin_list); +} + def HexagonItinerariesV60 : ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, - CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [ - // ALU32 - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // ALU64 - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // CR -> System - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // Jump (conditional/unconditional/return etc) - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // JR - InstrItinData]>, - InstrItinData]>, - - // Extender - InstrItinData]>, - - // Load - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // M - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // Store - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // S - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60. - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // New Value Compare Jump - InstrItinData]>, - - // Mem ops - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // Endloop - InstrItinData]>, - - // Vector - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - - // Duplex and Compound - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - // Misc - InstrItinData]>, - InstrItinData]>, - InstrItinData, - InstrStage<1, [SLOT2, SLOT3]>]>, - - // Latest CVI spec definitions. - InstrItinData, - InstrStage<1, [CVI_XLANE,CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>, - InstrItinData, - InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, - InstrItinData, - InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, - InstrItinData, - InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>, - InstrItinData, - InstrStage<1, [CVI_MPY01]>]>, - InstrItinData, - InstrStage<1, [CVI_MPY01]>]>, - InstrItinData, - InstrStage<1, [CVI_MPY01]>]>, - InstrItinData, - InstrStage<1, [CVI_XLANE]>]>, - InstrItinData, - InstrStage<1, [CVI_XLANE]>]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>]>, - InstrItinData, - InstrStage<1, [CVI_XLSHF]>]>, - InstrItinData, - InstrStage<1, [CVI_SHIFT]>]>, - InstrItinData, - InstrStage<1, [CVI_SHIFT]>]>, - InstrItinData, - InstrStage<1, [CVI_LD], 0>, - InstrStage<1, [CVI_XLANE, CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>]>, - InstrItinData, - InstrStage<1, [CVI_LD]>]>, - InstrItinData, - InstrStage<1, [CVI_LD], 0>, - InstrStage<1, [CVI_XLANE, CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>]>, - InstrItinData, - InstrStage<1, [SLOT1], 0>, - InstrStage<1, [CVI_LD], 0>, - InstrStage<1, [CVI_XLANE]>]>, - InstrItinData, - InstrStage<1, [CVI_ST], 0>, - InstrStage<1, [CVI_XLANE, CVI_SHIFT, - CVI_MPY0, CVI_MPY1]>]>, - InstrItinData, - InstrStage<1, [CVI_ST]>]>, - InstrItinData, - InstrStage<1, [SLOT1], 0>, - InstrStage<1, [CVI_ST], 0>, - InstrStage<1, [CVI_XLANE]>]>, - InstrItinData, - InstrStage<1, [CVI_ALL]>]> - ]>; + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM], + [Hex_FWD, HVX_FWD], HexagonV60ItinList.ItinList>; def HexagonModelV60 : SchedMachineModel { // Max issue per cycle == bundle width. diff --git a/lib/Target/Hexagon/HexagonScheduleV62.td b/lib/Target/Hexagon/HexagonScheduleV62.td index 0758788a600be4da43298b0972b2b4187c00a635..a0a8595f185fb0fdd9da145b90aeaa624b53ff75 100644 --- a/lib/Target/Hexagon/HexagonScheduleV62.td +++ b/lib/Target/Hexagon/HexagonScheduleV62.td @@ -6,115 +6,23 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// ScalarItin contains some old itineraries still used by a +// handful of instructions. Hopefully, we will be able to get rid of them soon. -// V62 follows the same schedule as V60 with following exceptions: -// Following instructions are permissible on any slot on V62: -// V4_J4_cmpeq_fp0_jump_nt -// V4_J4_cmpeq_fp0_jump_t -// V4_J4_cmpeq_fp1_jump_nt -// V4_J4_cmpeq_fp1_jump_t -// V4_J4_cmpeq_tp0_jump_nt -// V4_J4_cmpeq_tp0_jump_t -// V4_J4_cmpeq_tp1_jump_nt -// V4_J4_cmpeq_tp1_jump_t -// V4_J4_cmpeqi_fp0_jump_nt -// V4_J4_cmpeqi_fp0_jump_t -// V4_J4_cmpeqi_fp1_jump_nt -// V4_J4_cmpeqi_fp1_jump_t -// V4_J4_cmpeqi_tp0_jump_nt -// V4_J4_cmpeqi_tp0_jump_t -// V4_J4_cmpeqi_tp1_jump_nt -// V4_J4_cmpeqi_tp1_jump_t -// V4_J4_cmpeqn1_fp0_jump_nt -// V4_J4_cmpeqn1_fp0_jump_t -// V4_J4_cmpeqn1_fp1_jump_nt -// V4_J4_cmpeqn1_fp1_jump_t -// V4_J4_cmpeqn1_tp0_jump_nt -// V4_J4_cmpeqn1_tp0_jump_t -// V4_J4_cmpeqn1_tp1_jump_nt -// V4_J4_cmpeqn1_tp1_jump_t -// V4_J4_cmpgt_fp0_jump_nt -// V4_J4_cmpgt_fp0_jump_t -// V4_J4_cmpgt_fp1_jump_nt -// V4_J4_cmpgt_fp1_jump_t -// V4_J4_cmpgt_tp0_jump_nt -// V4_J4_cmpgt_tp0_jump_t -// V4_J4_cmpgt_tp1_jump_nt -// V4_J4_cmpgt_tp1_jump_t -// V4_J4_cmpgti_fp0_jump_nt -// V4_J4_cmpgti_fp0_jump_t -// V4_J4_cmpgti_fp1_jump_nt -// V4_J4_cmpgti_fp1_jump_t -// V4_J4_cmpgti_tp0_jump_nt -// V4_J4_cmpgti_tp0_jump_t -// V4_J4_cmpgti_tp1_jump_nt -// V4_J4_cmpgti_tp1_jump_t -// V4_J4_cmpgtn1_fp0_jump_nt -// V4_J4_cmpgtn1_fp0_jump_t -// V4_J4_cmpgtn1_fp1_jump_nt -// V4_J4_cmpgtn1_fp1_jump_t -// V4_J4_cmpgtn1_tp0_jump_nt -// V4_J4_cmpgtn1_tp0_jump_t -// V4_J4_cmpgtn1_tp1_jump_nt -// V4_J4_cmpgtn1_tp1_jump_t -// V4_J4_cmpgtu_fp0_jump_nt -// V4_J4_cmpgtu_fp0_jump_t -// V4_J4_cmpgtu_fp1_jump_nt -// V4_J4_cmpgtu_fp1_jump_t -// V4_J4_cmpgtu_tp0_jump_nt -// V4_J4_cmpgtu_tp0_jump_t -// V4_J4_cmpgtu_tp1_jump_nt -// V4_J4_cmpgtu_tp1_jump_t -// V4_J4_cmpgtui_fp0_jump_nt -// V4_J4_cmpgtui_fp0_jump_t -// V4_J4_cmpgtui_fp1_jump_nt -// V4_J4_cmpgtui_fp1_jump_t -// V4_J4_cmpgtui_tp0_jump_nt -// V4_J4_cmpgtui_tp0_jump_t -// V4_J4_cmpgtui_tp1_jump_nt -// V4_J4_cmpgtui_tp1_jump_t -// V4_J4_tstbit0_fp0_jump_nt -// V4_J4_tstbit0_fp0_jump_t -// V4_J4_tstbit0_fp1_jump_nt -// V4_J4_tstbit0_fp1_jump_t -// V4_J4_tstbit0_tp0_jump_nt -// V4_J4_tstbit0_tp0_jump_t -// V4_J4_tstbit0_tp1_jump_nt -// V4_J4_tstbit0_tp1_jump_t -// JMP -// JMPEXT -// JMPEXT_f -// JMPEXT_fnew_nt -// JMPEXT_fnew_t -// JMPEXT_t -// JMPEXT_tnew_nt -// JMPEXT_tnew_t -// JMPNOTEXT -// JMPNOTEXT_f -// JMPNOTEXT_fnew_nt -// JMPNOTEXT_fnew_t -// JMPNOTEXT_t -// JMPNOTEXT_tnew_nt -// JMPNOTEXT_tnew_t -// JMP_f -// JMP_fnew_nt -// JMP_fnew_t -// JMP_t -// JMP_tnew_nt -// JMP_tnew_t -// RESTORE_DEALLOC_RET_JMP_V4 -// RESTORE_DEALLOC_RET_JMP_V4_EXT - -def HexagonV62ItinList : ScalarItin, HVXV62Itin { +def HexagonV62ItinList : DepScalarItinV62, ScalarItin, + DepHVXItinV62, HVXItin, PseudoItin { list ItinList = - !listconcat(ScalarItin_list, HVXV62Itin_list); + !listconcat(DepScalarItinV62_list, ScalarItin_list, + DepHVXItinV62_list, HVXItin_list, PseudoItin_list); } def HexagonItinerariesV62 : ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, - CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], - [], HexagonV62ItinList.ItinList>; + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM], + [Hex_FWD, HVX_FWD], HexagonV62ItinList.ItinList>; def HexagonModelV62 : SchedMachineModel { // Max issue per cycle == bundle width. diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index 471e32221b2925fc9caf4f76f74d04ca762ef782..db268b78cd73f3279ef3bbb064a68f4af956134f 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -13,8 +13,8 @@ #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 033b93fc910aac07e94a94983e96a9d8bd017e8d..8851a23ae8acee154fe70ecd3a9a9bd014d4af04 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -73,6 +73,10 @@ static cl::opt OverrideLongCalls("hexagon-long-calls", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("If present, forces/disables the use of long calls")); +static cl::opt EnablePredicatedCalls("hexagon-pred-calls", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Consider calls to be predicable")); + void HexagonSubtarget::initializeEnvironment() { UseMemOps = false; ModeIEEERndNear = false; @@ -139,6 +143,59 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, UseBSBScheduling = hasV60TOps() && EnableBSBSched; } +/// \brief Perform target specific adjustments to the latency of a schedule +/// dependency. +void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, + SDep &Dep) const { + MachineInstr *SrcInst = Src->getInstr(); + MachineInstr *DstInst = Dst->getInstr(); + if (!Src->isInstr() || !Dst->isInstr()) + return; + + const HexagonInstrInfo *QII = getInstrInfo(); + + // Instructions with .new operands have zero latency. + SmallSet ExclSrc; + SmallSet ExclDst; + if (QII->canExecuteInBundle(*SrcInst, *DstInst) && + isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) { + Dep.setLatency(0); + return; + } + + if (!hasV60TOps()) + return; + + // If it's a REG_SEQUENCE, use its destination instruction to determine + // the correct latency. + if (DstInst->isRegSequence() && Dst->NumSuccs == 1) { + unsigned RSeqReg = DstInst->getOperand(0).getReg(); + MachineInstr *RSeqDst = Dst->Succs[0].getSUnit()->getInstr(); + unsigned UseIdx = -1; + for (unsigned OpNum = 0; OpNum < RSeqDst->getNumOperands(); OpNum++) { + const MachineOperand &MO = RSeqDst->getOperand(OpNum); + if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == RSeqReg) { + UseIdx = OpNum; + break; + } + } + unsigned RSeqLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, + 0, *RSeqDst, UseIdx)); + Dep.setLatency(RSeqLatency); + } + + // Try to schedule uses near definitions to generate .cur. + ExclSrc.clear(); + ExclDst.clear(); + if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) && + isBestZeroLatency(Src, Dst, QII, ExclSrc, ExclDst)) { + Dep.setLatency(0); + return; + } + + updateLatency(*SrcInst, *DstInst, Dep); +} + void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { for (auto &SU : DAG->SUnits) { @@ -154,19 +211,19 @@ void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) { for (auto &SU : DAG->SUnits) { // Update the latency of chain edges between v60 vector load or store - // instructions to be 1. These instructions cannot be scheduled in the + // instructions to be 1. These instruction cannot be scheduled in the // same packet. MachineInstr &MI1 = *SU.getInstr(); auto *QII = static_cast(DAG->TII); bool IsStoreMI1 = MI1.mayStore(); bool IsLoadMI1 = MI1.mayLoad(); - if (!QII->isV60VectorInstruction(MI1) || !(IsStoreMI1 || IsLoadMI1)) + if (!QII->isHVXVec(MI1) || !(IsStoreMI1 || IsLoadMI1)) continue; for (auto &SI : SU.Succs) { if (SI.getKind() != SDep::Order || SI.getLatency() != 0) continue; MachineInstr &MI2 = *SI.getSUnit()->getInstr(); - if (!QII->isV60VectorInstruction(MI2)) + if (!QII->isHVXVec(MI2)) continue; if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) { SI.setLatency(1); @@ -204,69 +261,99 @@ bool HexagonSubtarget::enableMachineScheduler() const { return true; } -bool HexagonSubtarget::enableSubRegLiveness() const { - return EnableSubregLiveness; +bool HexagonSubtarget::usePredicatedCalls() const { + return EnablePredicatedCalls; } -// This helper function is responsible for increasing the latency only. void HexagonSubtarget::updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, SDep &Dep) const { + if (Dep.isArtificial()) { + Dep.setLatency(1); + return; + } + if (!hasV60TOps()) return; auto &QII = static_cast(*getInstrInfo()); - if (EnableVecFrwdSched && QII.addLatencyToSchedule(SrcInst, DstInst)) { - // Vec frwd scheduling. - Dep.setLatency(Dep.getLatency() + 1); - } else if (useBSBScheduling() && - QII.isLateInstrFeedsEarlyInstr(SrcInst, DstInst)) { - // BSB scheduling. - Dep.setLatency(Dep.getLatency() + 1); - } else if (EnableTCLatencySched) { - // TClass latency scheduling. - // Check if SrcInst produces in 2C an operand of DstInst taken in stage 2B. - if (QII.isTC1(SrcInst) || QII.isTC2(SrcInst)) - if (!QII.isTC1(DstInst) && !QII.isTC2(DstInst)) - Dep.setLatency(Dep.getLatency() + 1); - } + // BSB scheduling. + if (QII.isHVXVec(SrcInst) || useBSBScheduling()) + Dep.setLatency((Dep.getLatency() + 1) >> 1); } -/// If the SUnit has a zero latency edge, return the other SUnit. -static SUnit *getZeroLatency(SUnit *N, SmallVector &Deps) { - for (auto &I : Deps) - if (I.isAssignedRegDep() && I.getLatency() == 0 && - !I.getSUnit()->getInstr()->isPseudo()) - return I.getSUnit(); - return nullptr; +void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { + MachineInstr *SrcI = Src->getInstr(); + for (auto &I : Src->Succs) { + if (!I.isAssignedRegDep() || I.getSUnit() != Dst) + continue; + unsigned DepR = I.getReg(); + int DefIdx = -1; + for (unsigned OpNum = 0; OpNum < SrcI->getNumOperands(); OpNum++) { + const MachineOperand &MO = SrcI->getOperand(OpNum); + if (MO.isReg() && MO.isDef() && MO.getReg() == DepR) + DefIdx = OpNum; + } + assert(DefIdx >= 0 && "Def Reg not found in Src MI"); + MachineInstr *DstI = Dst->getInstr(); + for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) { + const MachineOperand &MO = DstI->getOperand(OpNum); + if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) { + int Latency = (InstrInfo.getOperandLatency(&InstrItins, *SrcI, + DefIdx, *DstI, OpNum)); + + // For some instructions (ex: COPY), we might end up with < 0 latency + // as they don't have any Itinerary class associated with them. + if (Latency <= 0) + Latency = 1; + + I.setLatency(Latency); + updateLatency(*SrcI, *DstI, I); + } + } + + // Update the latency of opposite edge too. + for (auto &J : Dst->Preds) { + if (J.getSUnit() != Src) + continue; + J.setLatency(I.getLatency()); + } + } } /// Change the latency between the two SUnits. -void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector &Deps, - SUnit *Dst, unsigned Lat) const { - MachineInstr &SrcI = *Src->getInstr(); - for (auto &I : Deps) { +void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat) + const { + for (auto &I : Src->Succs) { if (I.getSUnit() != Dst) continue; + SDep T = I; I.setLatency(Lat); - SUnit *UpdateDst = I.getSUnit(); - updateLatency(SrcI, *UpdateDst->getInstr(), I); + // Update the latency of opposite edge too. - for (auto &PI : UpdateDst->Preds) { - if (PI.getSUnit() != Src || !PI.isAssignedRegDep()) - continue; - PI.setLatency(Lat); - updateLatency(SrcI, *UpdateDst->getInstr(), PI); - } + T.setSUnit(Src); + auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T); + assert(F != Dst->Preds.end()); + F->setLatency(I.getLatency()); } } +/// If the SUnit has a zero latency edge, return the other SUnit. +static SUnit *getZeroLatency(SUnit *N, SmallVector &Deps) { + for (auto &I : Deps) + if (I.isAssignedRegDep() && I.getLatency() == 0 && + !I.getSUnit()->getInstr()->isPseudo()) + return I.getSUnit(); + return nullptr; +} + // Return true if these are the best two instructions to schedule // together with a zero latency. Only one dependence should have a zero // latency. If there are multiple choices, choose the best, and change -// ther others, if needed. +// the others, if needed. bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, - const HexagonInstrInfo *TII) const { + const HexagonInstrInfo *TII, SmallSet &ExclSrc, + SmallSet &ExclDst) const { MachineInstr &SrcInst = *Src->getInstr(); MachineInstr &DstInst = *Dst->getInstr(); @@ -277,6 +364,16 @@ bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, if (SrcInst.isPHI() || DstInst.isPHI()) return false; + if (!TII->isToBeScheduledASAP(SrcInst, DstInst) && + !TII->canExecuteInBundle(SrcInst, DstInst)) + return false; + + // The architecture doesn't allow three dependent instructions in the same + // packet. So, if the destination has a zero latency successor, then it's + // not a candidate for a zero latency predecessor. + if (getZeroLatency(Dst, Dst->Succs) != nullptr) + return false; + // Check if the Dst instruction is the best candidate first. SUnit *Best = nullptr; SUnit *DstBest = nullptr; @@ -290,98 +387,53 @@ bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, if (Best != Dst) return false; - // The caller frequents adds the same dependence twice. If so, then + // The caller frequently adds the same dependence twice. If so, then // return true for this case too. - if (Src == SrcBest && Dst == DstBest) + if ((Src == SrcBest && Dst == DstBest ) || + (SrcBest == nullptr && Dst == DstBest) || + (Src == SrcBest && Dst == nullptr)) return true; // Reassign the latency for the previous bests, which requires setting // the dependence edge in both directions. - if (SrcBest != nullptr) - changeLatency(SrcBest, SrcBest->Succs, Dst, 1); - if (DstBest != nullptr) - changeLatency(Src, Src->Succs, DstBest, 1); - // If there is an edge from SrcBest to DstBst, then try to change that - // to 0 now. - if (SrcBest && DstBest) - changeLatency(SrcBest, SrcBest->Succs, DstBest, 0); - - return true; -} - -// Update the latency of a Phi when the Phi bridges two instructions that -// require a multi-cycle latency. -void HexagonSubtarget::changePhiLatency(MachineInstr &SrcInst, SUnit *Dst, - SDep &Dep) const { - if (!SrcInst.isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0) - return; - - for (const SDep &PI : Dst->Preds) { - if (PI.getLatency() != 0) - continue; - Dep.setLatency(2); - break; - } -} - -/// \brief Perform target specific adjustments to the latency of a schedule -/// dependency. -void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, - SDep &Dep) const { - MachineInstr *SrcInst = Src->getInstr(); - MachineInstr *DstInst = Dst->getInstr(); - if (!Src->isInstr() || !Dst->isInstr()) - return; - - const HexagonInstrInfo *QII = static_cast(getInstrInfo()); - - // Instructions with .new operands have zero latency. - if (QII->canExecuteInBundle(*SrcInst, *DstInst) && - isBestZeroLatency(Src, Dst, QII)) { - Dep.setLatency(0); - return; + if (SrcBest != nullptr) { + if (!hasV60TOps()) + changeLatency(SrcBest, Dst, 1); + else + restoreLatency(SrcBest, Dst); } - - if (!hasV60TOps()) - return; - - // Don't adjust the latency of post-increment part of the instruction. - if (QII->isPostIncrement(*SrcInst) && Dep.isAssignedRegDep()) { - if (SrcInst->mayStore()) - return; - if (Dep.getReg() != SrcInst->getOperand(0).getReg()) - return; - } else if (QII->isPostIncrement(*DstInst) && Dep.getKind() == SDep::Anti) { - if (DstInst->mayStore()) - return; - if (Dep.getReg() != DstInst->getOperand(0).getReg()) - return; - } else if (QII->isPostIncrement(*DstInst) && DstInst->mayStore() && - Dep.isAssignedRegDep()) { - MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1); - if (Op.isReg() && Dep.getReg() != Op.getReg()) - return; - } - - // Check if we need to change any the latency values when Phis are added. - if (useBSBScheduling() && SrcInst->isPHI()) { - changePhiLatency(*SrcInst, Dst, Dep); - return; + if (DstBest != nullptr) { + if (!hasV60TOps()) + changeLatency(Src, DstBest, 1); + else + restoreLatency(Src, DstBest); } - // If it's a REG_SEQUENCE, use its destination instruction to determine - // the correct latency. - if (DstInst->isRegSequence() && Dst->NumSuccs == 1) - DstInst = Dst->Succs[0].getSUnit()->getInstr(); - - // Try to schedule uses near definitions to generate .cur. - if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) && - isBestZeroLatency(Src, Dst, QII)) { - Dep.setLatency(0); - return; + // Attempt to find another opprotunity for zero latency in a different + // dependence. + if (SrcBest && DstBest) + // If there is an edge from SrcBest to DstBst, then try to change that + // to 0 now. + changeLatency(SrcBest, DstBest, 0); + else if (DstBest) { + // Check if the previous best destination instruction has a new zero + // latency dependence opportunity. + ExclSrc.insert(Src); + for (auto &I : DstBest->Preds) + if (ExclSrc.count(I.getSUnit()) == 0 && + isBestZeroLatency(I.getSUnit(), DstBest, TII, ExclSrc, ExclDst)) + changeLatency(I.getSUnit(), DstBest, 0); + } else if (SrcBest) { + // Check if previous best source instruction has a new zero latency + // dependence opportunity. + ExclDst.insert(Dst); + for (auto &I : SrcBest->Succs) + if (ExclDst.count(I.getSUnit()) == 0 && + isBestZeroLatency(SrcBest, I.getSUnit(), TII, ExclSrc, ExclDst)) + changeLatency(SrcBest, I.getSUnit(), 0); } - updateLatency(*SrcInst, *DstInst, Dep); + return true; } unsigned HexagonSubtarget::getL1CacheLineSize() const { @@ -392,3 +444,7 @@ unsigned HexagonSubtarget::getL1PrefetchDistance() const { return 32; } +bool HexagonSubtarget::enableSubRegLiveness() const { + return EnableSubregLiveness; +} + diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 6a3e7f13be4c4a8d3d2bdd52d3bcabd5d9f168d9..4379efa79c9cd80acf33e74eac59cc617720499b 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -104,6 +104,7 @@ public: bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; } bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; } bool useLongCalls() const { return UseLongCalls; } + bool usePredicatedCalls() const; bool useBSBScheduling() const { return UseBSBScheduling; } bool enableMachineScheduler() const override; @@ -146,11 +147,10 @@ private: // Helper function responsible for increasing the latency only. void updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, SDep &Dep) const; - void changeLatency(SUnit *Src, SmallVector &Deps, SUnit *Dst, - unsigned Lat) const; - bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII) - const; - void changePhiLatency(MachineInstr &SrcInst, SUnit *Dst, SDep &Dep) const; + void restoreLatency(SUnit *Src, SUnit *Dst) const; + void changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat) const; + bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII, + SmallSet &ExclSrc, SmallSet &ExclDst) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 06fc9195fa677e70929cc9c981966c7e03499e6e..e507a797871fcdd0c62593914658a91bd2203b40 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -23,8 +23,8 @@ #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -111,6 +111,8 @@ namespace llvm { extern char &HexagonExpandCondsetsID; void initializeHexagonExpandCondsetsPass(PassRegistry&); void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); + void initializeHexagonGenMuxPass(PassRegistry&); + void initializeHexagonOptAddrModePass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); FunctionPass *createHexagonBitSimplify(); @@ -151,7 +153,11 @@ static Reloc::Model getEffectiveRelocModel(Optional RM) { extern "C" void LLVMInitializeHexagonTarget() { // Register the target. RegisterTargetMachine X(getTheHexagonTarget()); - initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonLoopIdiomRecognizePass(PR); + initializeHexagonGenMuxPass(PR); + initializeHexagonOptAddrModePass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, @@ -221,7 +227,7 @@ namespace { /// Hexagon Code Generator Pass Configuration Options. class HexagonPassConfig : public TargetPassConfig { public: - HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM) + HexagonPassConfig(HexagonTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} HexagonTargetMachine &getHexagonTargetMachine() const { @@ -243,14 +249,14 @@ public: } // namespace TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { - return new HexagonPassConfig(this, PM); + return new HexagonPassConfig(*this, PM); } void HexagonPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); if (!NoOpt) { if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); @@ -274,27 +280,27 @@ bool HexagonPassConfig::addInstSelector() { if (!NoOpt) { // Create logical operations on predicate registers. if (EnableGenPred) - addPass(createHexagonGenPredicate(), false); + addPass(createHexagonGenPredicate()); // Rotate loops to expose bit-simplification opportunities. if (EnableLoopResched) - addPass(createHexagonLoopRescheduling(), false); + addPass(createHexagonLoopRescheduling()); // Split double registers. if (!DisableHSDR) addPass(createHexagonSplitDoubleRegs()); // Bit simplification. if (EnableBitSimplify) - addPass(createHexagonBitSimplify(), false); + addPass(createHexagonBitSimplify()); addPass(createHexagonPeephole()); printAndVerify("After hexagon peephole pass"); // Constant propagation. if (!DisableHCP) { - addPass(createHexagonConstPropagationPass(), false); - addPass(&UnreachableMachineBlockElimID, false); + addPass(createHexagonConstPropagationPass()); + addPass(&UnreachableMachineBlockElimID); } if (EnableGenInsert) - addPass(createHexagonGenInsert(), false); + addPass(createHexagonGenInsert()); if (EnableEarlyIf) - addPass(createHexagonEarlyIfConversion(), false); + addPass(createHexagonEarlyIfConversion()); } return false; @@ -305,9 +311,9 @@ void HexagonPassConfig::addPreRegAlloc() { if (EnableExpandCondsets) insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID); if (!DisableStoreWidening) - addPass(createHexagonStoreWidening(), false); + addPass(createHexagonStoreWidening()); if (!DisableHardwareLoops) - addPass(createHexagonHardwareLoops(), false); + addPass(createHexagonHardwareLoops()); } if (TM->getOptLevel() >= CodeGenOpt::Default) addPass(&MachinePipelinerID); @@ -318,16 +324,16 @@ void HexagonPassConfig::addPostRegAlloc() { if (EnableRDFOpt) addPass(createHexagonRDFOpt()); if (!DisableHexagonCFGOpt) - addPass(createHexagonCFGOptimizer(), false); + addPass(createHexagonCFGOptimizer()); if (!DisableAModeOpt) - addPass(createHexagonOptAddrMode(), false); + addPass(createHexagonOptAddrMode()); } } void HexagonPassConfig::addPreSched2() { - addPass(createHexagonCopyToCombine(), false); + addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) - addPass(&IfConverterID, false); + addPass(&IfConverterID); addPass(createHexagonSplitConst32AndConst64()); } @@ -335,17 +341,17 @@ void HexagonPassConfig::addPreEmitPass() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) - addPass(createHexagonNewValueJump(), false); + addPass(createHexagonNewValueJump()); - addPass(createHexagonBranchRelaxation(), false); + addPass(createHexagonBranchRelaxation()); // Create Packets. if (!NoOpt) { if (!DisableHardwareLoops) - addPass(createHexagonFixupHwLoops(), false); + addPass(createHexagonFixupHwLoops()); // Generate MUX from pairs of conditional transfers. if (EnableGenMux) - addPass(createHexagonGenMux(), false); + addPass(createHexagonGenMux()); addPass(createHexagonPacketizer(), false); } diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index c9c4f95dbaaa5818d8da25296659ce0cca7ceac1..4dacb1501392d69026de2820aa4542224c91e80e 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalObject.h" @@ -28,7 +29,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 3a789a5f7e0b22b85ad02273795003ffeb19b665..7667bfb7a0eb4e7bc3d383c5419c69d20350110a 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,10 +16,10 @@ // prune the dependence. // //===----------------------------------------------------------------------===// +#include "HexagonVLIWPacketizer.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" -#include "HexagonVLIWPacketizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -214,12 +214,12 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { for (auto &MB : MF) { auto Begin = MB.begin(), End = MB.end(); while (Begin != End) { - // First the first non-boundary starting from the end of the last + // Find the first non-boundary starting from the end of the last // scheduling region. MachineBasicBlock::iterator RB = Begin; while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF)) ++RB; - // First the first boundary starting from the beginning of the new + // Find the first boundary starting from the beginning of the new // region. MachineBasicBlock::iterator RE = RB; while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF)) @@ -273,25 +273,17 @@ bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI, if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) return true; - // Check if this is a predicate dependence. - const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); - if (RC == &Hexagon::PredRegsRegClass) - return true; - - // Assumes that the first operand of the CALLr is the function address. - if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { - const MachineOperand MO = MI.getOperand(0); - if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) - return true; + // Call-like instructions can be packetized with preceding instructions + // that define registers implicitly used or modified by the call. Explicit + // uses are still prohibited, as in the case of indirect calls: + // r0 = ... + // J2_jumpr r0 + if (DepType == SDep::Data) { + for (const MachineOperand MO : MI.operands()) + if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit()) + return true; } - if (HII->isJumpR(MI)) { - const MachineOperand &MO = HII->isPredicated(MI) ? MI.getOperand(1) - : MI.getOperand(0); - assert(MO.isReg() && MO.isUse()); - if (MO.getReg() == DepReg) - return true; - } return false; } @@ -333,11 +325,13 @@ bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC) { // Vector stores can be predicated, and can be new-value stores, but // they cannot be predicated on a .new predicate value. - if (NewRC == &Hexagon::PredRegsRegClass) - if (HII->isV60VectorInstruction(MI) && MI.mayStore()) + if (NewRC == &Hexagon::PredRegsRegClass) { + if (HII->isHVXVec(MI) && MI.mayStore()) return false; - return HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn() || - HII->mayBeNewStore(MI); + return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, nullptr) > 0; + } + // If the class is not PredRegs, it could only apply to new-value stores. + return HII->mayBeNewStore(MI); } // Promote an instructiont to its .cur form. @@ -356,7 +350,7 @@ void HexagonPacketizerList::cleanUpDotCur() { MachineInstr *MI = nullptr; for (auto BI : CurrentPacketMIs) { DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); - if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { + if (HII->isDotCurInst(*BI)) { MI = BI; continue; } @@ -369,7 +363,7 @@ void HexagonPacketizerList::cleanUpDotCur() { if (!MI) return; // We did not find a use of the CUR, so de-cur it. - MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); + MI->setDesc(HII->get(HII->getNonDotCurOp(*MI))); DEBUG(dbgs() << "Demoted CUR "; MI->dump();); } @@ -377,9 +371,9 @@ void HexagonPacketizerList::cleanUpDotCur() { bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) { - if (!HII->isV60VectorInstruction(MI)) + if (!HII->isHVXVec(MI)) return false; - if (!HII->isV60VectorInstruction(*MII)) + if (!HII->isHVXVec(*MII)) return false; // Already a dot new instruction. @@ -760,11 +754,14 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI, return false; } -static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) { +static bool isImplicitDependency(const MachineInstr &I, bool CheckDef, + unsigned DepReg) { for (auto &MO : I.operands()) { - if (MO.isRegMask() && MO.clobbersPhysReg(DepReg)) + if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(DepReg)) return true; - if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) + if (!MO.isReg() || MO.getReg() != DepReg || !MO.isImplicit()) + continue; + if (CheckDef == MO.isDef()) return true; } return false; @@ -798,7 +795,8 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, // If dependency is trough an implicitly defined register, we should not // newify the use. - if (isImplicitDependency(PI, DepReg)) + if (isImplicitDependency(PI, true, DepReg) || + isImplicitDependency(MI, false, DepReg)) return false; const MCInstrDesc& MCID = PI.getDesc(); @@ -808,8 +806,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI, // predicate .new if (RC == &Hexagon::PredRegsRegClass) - if (HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn()) - return HII->predCanBeUsedAsDotNew(PI, DepReg); + return HII->predCanBeUsedAsDotNew(PI, DepReg); if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) return false; @@ -1365,7 +1362,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // Data dpendence ok if we have load.cur. if (DepType == SDep::Data && HII->isDotCurInst(J)) { - if (HII->isV60VectorInstruction(I)) + if (HII->isHVXVec(I)) continue; } @@ -1374,6 +1371,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { if (promoteToDotNew(I, DepType, II, RC)) { PromotedToDotNew = true; + if (cannotCoexist(I, J)) + FoundSequentialDependence = true; continue; } } @@ -1418,26 +1417,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { DepType != SDep::Output) continue; - // Ignore output dependences due to superregs. We can write to two - // different subregisters of R1:0 for instance in the same cycle. - - // If neither I nor J defines DepReg, then this is a superfluous output - // dependence. The dependence must be of the form: - // R0 = ... - // R1 = ... - // and there is an output dependence between the two instructions with - // DepReg = D0. - // We want to ignore these dependences. Ideally, the dependence - // constructor should annotate such dependences. We can then avoid this - // relatively expensive check. - // if (DepType == SDep::Output) { - // DepReg is the register that's responsible for the dependence. - unsigned DepReg = SUJ->Succs[i].getReg(); - - // Check if I and J really defines DepReg. - if (!I.definesRegister(DepReg) && !J.definesRegister(DepReg)) - continue; FoundSequentialDependence = true; break; } @@ -1553,10 +1533,9 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { MachineInstr &I = *SUI->getInstr(); MachineInstr &J = *SUJ->getInstr(); - if (cannotCoexist(I, J)) - return false; + bool Coexist = !cannotCoexist(I, J); - if (!Dependence) + if (Coexist && !Dependence) return true; // Check if the instruction was promoted to a dot-new. If so, demote it @@ -1579,14 +1558,13 @@ MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr &MI) { MachineBasicBlock::iterator MII = MI.getIterator(); MachineBasicBlock *MBB = MI.getParent(); - if (MI.isImplicitDef()) { - unsigned R = MI.getOperand(0).getReg(); - if (Hexagon::IntRegsRegClass.contains(R)) { - MCSuperRegIterator S(R, HRI, false); - MI.addOperand(MachineOperand::CreateReg(*S, true, true)); - } + + if (CurrentPacketMIs.size() == 0) + PacketStalls = false; + PacketStalls |= producesStall(MI); + + if (MI.isImplicitDef()) return MII; - } assert(ResourceTracker->canReserveResources(MI)); bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); @@ -1660,23 +1638,13 @@ bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { } -// Return true when ConsMI uses a register defined by ProdMI. -static bool isDependent(const MachineInstr &ProdMI, - const MachineInstr &ConsMI) { - if (!ProdMI.getOperand(0).isReg()) - return false; - unsigned DstReg = ProdMI.getOperand(0).getReg(); - - for (auto &Op : ConsMI.operands()) - if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) - // The MIs depend on each other. - return true; - - return false; -} - // V60 forward scheduling. bool HexagonPacketizerList::producesStall(const MachineInstr &I) { + // If the packet already stalls, then ignore the stall from a subsequent + // instruction in the same packet. + if (PacketStalls) + return false; + // Check whether the previous packet is in a different loop. If this is the // case, there is little point in trying to avoid a stall because that would // favor the rare case (loop entry) over the common case (loop iteration). @@ -1691,40 +1659,58 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { return false; } - // Check for stall between two vector instructions. - if (HII->isV60VectorInstruction(I)) { - for (auto J : OldPacketMIs) { - if (!HII->isV60VectorInstruction(*J)) - continue; - if (isDependent(*J, I) && !HII->isVecUsableNextPacket(*J, I)) - return true; - } - return false; + SUnit *SUI = MIToSUnit[const_cast(&I)]; + + // Check if the latency is 0 between this instruction and any instruction + // in the current packet. If so, we disregard any potential stalls due to + // the instructions in the previous packet. Most of the instruction pairs + // that can go together in the same packet have 0 latency between them. + // Only exceptions are newValueJumps as they're generated much later and + // the latencies can't be changed at that point. Another is .cur + // instructions if its consumer has a 0 latency successor (such as .new). + // In this case, the latency between .cur and the consumer stays non-zero + // even though we can have both .cur and .new in the same packet. Changing + // the latency to 0 is not an option as it causes software pipeliner to + // not pipeline in some cases. + + // For Example: + // { + // I1: v6.cur = vmem(r0++#1) + // I2: v7 = valign(v6,v4,r2) + // I3: vmem(r5++#1) = v7.new + // } + // Here I2 and I3 has 0 cycle latency, but I1 and I2 has 2. + + for (auto J : CurrentPacketMIs) { + SUnit *SUJ = MIToSUnit[J]; + for (auto &Pred : SUI->Preds) + if (Pred.getSUnit() == SUJ && + (Pred.getLatency() == 0 || HII->isNewValueJump(I) || + HII->isToBeScheduledASAP(*J, I))) + return false; } - // Check for stall between two scalar instructions. First, check that - // there is no definition of a use in the current packet, because it - // may be a candidate for .new. - for (auto J : CurrentPacketMIs) - if (!HII->isV60VectorInstruction(*J) && isDependent(*J, I)) - return false; + // Check if the latency is greater than one between this instruction and any + // instruction in the previous packet. + for (auto J : OldPacketMIs) { + SUnit *SUJ = MIToSUnit[J]; + for (auto &Pred : SUI->Preds) + if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1) + return true; + } - // Check for stall between I and instructions in the previous packet. - if (MF.getSubtarget().useBSBScheduling()) { - for (auto J : OldPacketMIs) { - if (HII->isV60VectorInstruction(*J)) - continue; - if (!HII->isLateInstrFeedsEarlyInstr(*J, I)) - continue; - if (isDependent(*J, I) && !HII->canExecuteInBundle(*J, I)) + // Check if the latency is greater than one between this instruction and any + // instruction in the previous packet. + for (auto J : OldPacketMIs) { + SUnit *SUJ = MIToSUnit[J]; + for (auto &Pred : SUI->Preds) + if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1) return true; - } } return false; } - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 3f28dc5b79cec78582211f86103e513837de4577..adb92b6dc8557c04e80d28ee9270b3e8eb15b3e9 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -34,6 +34,10 @@ class HexagonPacketizerList : public VLIWPacketizerList { // Track MIs with ignored dependence. std::vector IgnoreDepMIs; + // Set to true if the packet contains an instruction that stalls with an + // instruction from the previous packet. + bool PacketStalls = false; + protected: /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 337af294eb861cb0e710355c9aa0844a2f418830..545c8b6b2acdefff3fb76032f2fad47d2b7e829b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -12,9 +12,9 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCChecker.h" #include "MCTargetDesc/HexagonMCCodeEmitter.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -58,6 +58,7 @@ class HexagonAsmBackend : public MCAsmBackend { RF.getContents() = Code; RF.getFixups() = Fixups; } + public: HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI, StringRef CPU) : @@ -183,7 +184,11 @@ public: { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 }, { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 }, { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 }, - { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 } + { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 }, + { "fixup_Hexagon_GD_PLT_B22_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_GD_PLT_B32_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LD_PLT_B22_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Hexagon_LD_PLT_B32_PCREL_X",0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -290,6 +295,11 @@ public: case fixup_Hexagon_32_PCREL: case fixup_Hexagon_6_PCREL_X: case fixup_Hexagon_23_REG: + case fixup_Hexagon_27_REG: + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B22_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: // These relocations should always have a relocation recorded IsResolved = false; return; @@ -346,6 +356,8 @@ public: case fixup_Hexagon_B9_PCREL_X: case fixup_Hexagon_B7_PCREL: case fixup_Hexagon_B7_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: return 4; } } @@ -373,6 +385,8 @@ public: break; case fixup_Hexagon_B32_PCREL_X: + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + case fixup_Hexagon_LD_PLT_B32_PCREL_X: Value >>= 6; break; } @@ -711,22 +725,24 @@ public: break; } case MCFragment::FT_Relaxable: { + MCContext &Context = Asm.getContext(); auto &RF = cast(*K); auto &Inst = const_cast(RF.getInst()); while (Size > 0 && HexagonMCInstrInfo::bundleSize(Inst) < 4) { - MCInst *Nop = new (Asm.getContext()) MCInst; + MCInst *Nop = new (Context) MCInst; Nop->setOpcode(Hexagon::A2_nop); Inst.addOperand(MCOperand::createInst(Nop)); Size -= 4; if (!HexagonMCChecker( - *MCII, RF.getSubtargetInfo(), Inst, Inst, - *Asm.getContext().getRegisterInfo()).check()) { + Context, *MCII, RF.getSubtargetInfo(), Inst, + *Context.getRegisterInfo(), false) + .check()) { Inst.erase(Inst.end() - 1); Size = 0; } } - bool Error = HexagonMCShuffle(true, *MCII, RF.getSubtargetInfo(), - Inst); + bool Error = HexagonMCShuffle(Context, true, *MCII, + RF.getSubtargetInfo(), Inst); //assert(!Error); (void)Error; ReplaceInstruction(Asm.getEmitter(), RF, Inst); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 9c80312b790da159f156b360a92aad89e33df9d2..d8009c5da08eefe750539679ac51ec9948f07c0a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -29,7 +29,7 @@ namespace llvm { /// namespace HexagonII { unsigned const TypeCVI_FIRST = TypeCVI_HIST; - unsigned const TypeCVI_LAST = TypeCVI_VX_DV; + unsigned const TypeCVI_LAST = TypeCVI_VX_LATE; enum SubTarget { HasV4SubT = 0x3f, @@ -128,10 +128,6 @@ namespace HexagonII { ExtentAlignPos = 33, ExtentAlignMask = 0x3, - // Valid subtargets - validSubTargetPos = 35, - validSubTargetMask = 0x3f, - // Addressing mode for load/store instructions. AddrModePos = 41, AddrModeMask = 0x7, @@ -163,7 +159,10 @@ namespace HexagonII { PrefersSlot3Mask = 0x1, CofMax1Pos = 60, - CofMax1Mask = 0x1 + CofMax1Mask = 0x1, + + CVINewPos = 61, + CVINewMask = 0x1 }; // *** The code above must match HexagonInstrFormat*.td *** // diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 944e235e72f2127bb89b7833f7ce1396c5848618..b975e31310946a91216de723c3baffbc5efea6b8 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -284,6 +284,16 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_HEX_TPREL_11_X; case fixup_Hexagon_23_REG: return ELF::R_HEX_23_REG; + case fixup_Hexagon_27_REG: + return ELF::R_HEX_27_REG; + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + return ELF::R_HEX_GD_PLT_B22_PCREL_X; + case fixup_Hexagon_GD_PLT_B32_PCREL_X: + return ELF::R_HEX_GD_PLT_B32_PCREL_X; + case fixup_Hexagon_LD_PLT_B22_PCREL_X: + return ELF::R_HEX_LD_PLT_B22_PCREL_X; + case fixup_Hexagon_LD_PLT_B32_PCREL_X: + return ELF::R_HEX_LD_PLT_B32_PCREL_X; } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h index 4c97ebbdd346c321b6768177d153b1b74cfa11fb..347327669ad9cf9fee092fe884c2071f95ea6b3b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h @@ -111,6 +111,11 @@ enum Fixups { fixup_Hexagon_TPREL_16_X, fixup_Hexagon_TPREL_11_X, fixup_Hexagon_23_REG, + fixup_Hexagon_27_REG, + fixup_Hexagon_GD_PLT_B22_PCREL_X, + fixup_Hexagon_GD_PLT_B32_PCREL_X, + fixup_Hexagon_LD_PLT_B22_PCREL_X, + fixup_Hexagon_LD_PLT_B32_PCREL_X, LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index dd790fd41257d40b4fc4ee4f5a769189c01fd95b..1929152129fa70bd3531cf28c527424d49c18368 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "HexagonAsmPrinter.h" #include "HexagonInstPrinter.h" +#include "HexagonAsmPrinter.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCAsmInfo.h" diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index 62b21c419f30334bf75d890b63b0fbcc5e94f94a..3bb658b844516a70236465357c555673dc73bd4a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -16,23 +16,27 @@ #include "HexagonBaseInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt RelaxNVChecks("relax-nv-checks", cl::init(false), - cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity")); +static cl::opt + RelaxNVChecks("relax-nv-checks", cl::init(false), cl::ZeroOrMore, + cl::Hidden, cl::desc("Relax checks of new-value validity")); const HexagonMCChecker::PredSense - HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); + HexagonMCChecker::Unconditional(Hexagon::NoRegister, false); void HexagonMCChecker::init() { // Initialize read-only registers set. ReadOnly.insert(Hexagon::PC); + ReadOnly.insert(Hexagon::C9_8); // Figure out the loop-registers definitions. if (HexagonMCInstrInfo::isInnerLoop(MCB)) { @@ -46,13 +50,12 @@ void HexagonMCChecker::init() { if (HexagonMCInstrInfo::isBundle(MCB)) // Unfurl a bundle. - for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { MCInst const &Inst = *I.getInst(); if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) { init(*Inst.getOperand(0).getInst()); init(*Inst.getOperand(1).getInst()); - } - else + } else init(Inst); } else @@ -69,20 +72,18 @@ void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg, // Note use of new predicate register. if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) NewPreds.insert(PredReg); - } - else + } else // Note register use. Super-registers are not tracked directly, // but their components. - for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); - SRI.isValid(); - ++SRI) + for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); ++SRI) if (!MCSubRegIterator(*SRI, &RI).isValid()) // Skip super-registers used indirectly. Uses.insert(*SRI); } -void HexagonMCChecker::init(MCInst const& MCI) { - const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI); +void HexagonMCChecker::init(MCInst const &MCI) { + const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MCI); unsigned PredReg = Hexagon::NoRegister; bool isTrue = false; @@ -109,10 +110,10 @@ void HexagonMCChecker::init(MCInst const& MCI) { if (Hexagon::USR_OVF == R) // Many insns change the USR implicitly, but only one or another flag. - // The instruction table models the USR.OVF flag, which can be implicitly - // modified more than once, but cannot be modified in the same packet - // with an instruction that modifies is explicitly. Deal with such situ- - // ations individually. + // The instruction table models the USR.OVF flag, which can be + // implicitly modified more than once, but cannot be modified in the + // same packet with an instruction that modifies is explicitly. Deal + // with such situations individually. SoftDefs.insert(R); else if (isPredicateRegister(R) && HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) @@ -124,8 +125,7 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Figure out explicit register definitions. for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { - unsigned R = MCI.getOperand(i).getReg(), - S = Hexagon::NoRegister; + unsigned R = MCI.getOperand(i).getReg(), S = Hexagon::NoRegister; // USR has subregisters (while C8 does not for technical reasons), so // reset R to USR, since we know how to handle multiple defs of USR, // taking into account its subregisters. @@ -134,9 +134,8 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Note register definitions, direct ones as well as indirect side-effects. // Super-registers are not tracked directly, but their components. - for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); - SRI.isValid(); - ++SRI) { + for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); ++SRI) { if (MCSubRegIterator(*SRI, &RI).isValid()) // Skip super-registers defined indirectly. continue; @@ -156,22 +155,19 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Only an explicit definition of P3:0 is noted as such; if a // side-effect, then note as a soft definition. SoftDefs.insert(*SRI); - else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI)) + else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && + isPredicateRegister(*SRI)) // Some insns produce predicates too late to be used in the same packet. LatePreds.insert(*SRI); - else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD) - // Current loads should be used in the same packet. - // TODO: relies on the impossibility of a current and a temporary loads - // in the same packet. - CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue)); - else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD) + else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == + HexagonII::TypeCVI_VM_TMP_LD) // Temporary loads should be used in the same packet, but don't commit // results, so it should be disregarded if another insn changes the same // register. // TODO: relies on the impossibility of a current and a temporary loads // in the same packet. TmpDefs.insert(*SRI); - else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) ) + else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and // destination registers with this instruction. same for vdeal(Vx,Vy,Rx) Uses.insert(*SRI); @@ -187,25 +183,25 @@ void HexagonMCChecker::init(MCInst const& MCI) { if (HexagonMCInstrInfo::isCompound(MCII, MCI)) compoundRegisterMap(R); // Compound insns have a limited register range. - for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); - SRI.isValid(); - ++SRI) + for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid()); + SRI.isValid(); ++SRI) if (!MCSubRegIterator(*SRI, &RI).isValid()) // No super-registers defined indirectly. - NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), - HexagonMCInstrInfo::isFloat(MCII, MCI))); + NewDefs[*SRI].push_back(NewSense::Def( + PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); // For fairly unique 2-dot-new producers, example: // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers. if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) { unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg(); - for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid()); - SRI.isValid(); - ++SRI) + bool HasSubRegs = MCSubRegIterator(R2, &RI).isValid(); + for (MCRegAliasIterator SRI(R2, &RI, !HasSubRegs); SRI.isValid(); ++SRI) if (!MCSubRegIterator(*SRI, &RI).isValid()) - NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), - HexagonMCInstrInfo::isFloat(MCII, MCI))); + NewDefs[*SRI].push_back(NewSense::Def( + PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI), + HexagonMCInstrInfo::isFloat(MCII, MCI))); } } @@ -227,18 +223,19 @@ void HexagonMCChecker::init(MCInst const& MCI) { // Super-registers cannot use new values. if (MCID.isBranch()) NewUses[N] = NewSense::Jmp( - llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ); + llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ); else NewUses[N] = NewSense::Use( - PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); + PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI)); } } } -HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx, - MCRegisterInfo const &ri) - : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI), - bLoadErrInfo(false) { +HexagonMCChecker::HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &mcb, + MCRegisterInfo const &ri, bool ReportErrors) + : Context(Context), MCB(mcb), RI(ri), MCII(MCII), STI(STI), + ReportErrors(ReportErrors) { init(); } @@ -247,24 +244,120 @@ bool HexagonMCChecker::check(bool FullCheck) { bool chkP = checkPredicates(); bool chkNV = checkNewValues(); bool chkR = checkRegisters(); + bool chkRRO = checkRegistersReadOnly(); + bool chkELB = checkEndloopBranches(); + checkRegisterCurDefs(); bool chkS = checkSolo(); bool chkSh = true; if (FullCheck) - chkSh = checkShuffle(); + chkSh = checkShuffle(); bool chkSl = true; if (FullCheck) - chkSl = checkSlots(); - bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl; + chkSl = checkSlots(); + bool chkAXOK = checkAXOK(); + bool chk = chkB && chkP && chkNV && chkR && chkRRO && chkELB && chkS && + chkSh && chkSl && chkAXOK; return chk; } -bool HexagonMCChecker::checkSlots() +bool HexagonMCChecker::checkEndloopBranches() { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, I); + if (Desc.isBranch() || Desc.isCall()) { + auto Inner = HexagonMCInstrInfo::isInnerLoop(MCB); + if (Inner || HexagonMCInstrInfo::isOuterLoop(MCB)) { + reportError(I.getLoc(), + llvm::Twine("packet marked with `:endloop") + + (Inner ? "0" : "1") + "' " + + "cannot contain instructions that modify register " + + "`" + llvm::Twine(RI.getName(Hexagon::PC)) + "'"); + return false; + } + } + } + return true; +} + +namespace { +bool isDuplexAGroup(unsigned Opcode) { + switch (Opcode) { + case Hexagon::SA1_addi: + case Hexagon::SA1_addrx: + case Hexagon::SA1_addsp: + case Hexagon::SA1_and1: + case Hexagon::SA1_clrf: + case Hexagon::SA1_clrfnew: + case Hexagon::SA1_clrt: + case Hexagon::SA1_clrtnew: + case Hexagon::SA1_cmpeqi: + case Hexagon::SA1_combine0i: + case Hexagon::SA1_combine1i: + case Hexagon::SA1_combine2i: + case Hexagon::SA1_combine3i: + case Hexagon::SA1_combinerz: + case Hexagon::SA1_combinezr: + case Hexagon::SA1_dec: + case Hexagon::SA1_inc: + case Hexagon::SA1_seti: + case Hexagon::SA1_setin1: + case Hexagon::SA1_sxtb: + case Hexagon::SA1_sxth: + case Hexagon::SA1_tfr: + case Hexagon::SA1_zxtb: + case Hexagon::SA1_zxth: + return true; + break; + default: + return false; + } +} + +bool isNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) { + unsigned Result = 0; + unsigned Type = HexagonMCInstrInfo::getType(MCII, ID); + if (Type == HexagonII::TypeDUPLEX) { + unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode(); + unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode(); + Result += !isDuplexAGroup(subInst0Opcode); + Result += !isDuplexAGroup(subInst1Opcode); + } else + Result += + Type != HexagonII::TypeALU32_2op && Type != HexagonII::TypeALU32_3op && + Type != HexagonII::TypeALU32_ADDI && Type != HexagonII::TypeS_2op && + Type != HexagonII::TypeS_3op && + (Type != HexagonII::TypeALU64 || HexagonMCInstrInfo::isFloat(MCII, ID)); + return Result != 0; +} +} // namespace + +bool HexagonMCChecker::checkAXOK() { + MCInst const *HasSoloAXInst = nullptr; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (HexagonMCInstrInfo::isSoloAX(MCII, I)) { + HasSoloAXInst = &I; + } + } + if (!HasSoloAXInst) + return true; + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (&I != HasSoloAXInst && isNeitherAnorX(MCII, I)) { + reportError( + HasSoloAXInst->getLoc(), + llvm::Twine("Instruction can only be in a packet with ALU or " + "non-FPU XTYPE instructions")); + reportError(I.getLoc(), + llvm::Twine("Not an ALU or non-FPU XTYPE instruction")); + return false; + } + } + return true; +} -{ +bool HexagonMCChecker::checkSlots() { unsigned slotsUsed = 0; - for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) { - MCInst const& MCI = *HMI.getInst(); + for (auto HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { + MCInst const &MCI = *HMI.getInst(); if (HexagonMCInstrInfo::isImmext(MCI)) continue; if (HexagonMCInstrInfo::isDuplex(MCII, MCI)) @@ -274,9 +367,7 @@ bool HexagonMCChecker::checkSlots() } if (slotsUsed > HEXAGON_PACKET_SIZE) { - HexagonMCErrInfo errInfo; - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS); - addErrInfo(errInfo); + reportError("invalid instruction packet: out of slots"); return false; } return true; @@ -284,11 +375,9 @@ bool HexagonMCChecker::checkSlots() // Check legal use of branches. bool HexagonMCChecker::checkBranches() { - HexagonMCErrInfo errInfo; if (HexagonMCInstrInfo::isBundle(MCB)) { bool hasConditional = false; - unsigned Branches = 0, - Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, + unsigned Branches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE, Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE; for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset; @@ -310,20 +399,12 @@ bool HexagonMCChecker::checkBranches() { } } - if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too? - if (HexagonMCInstrInfo::isInnerLoop(MCB) || - HexagonMCInstrInfo::isOuterLoop(MCB)) { - // Error out if there's any branch in a loop-end packet. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC); - addErrInfo(errInfo); - return false; - } if (Branches > 1) if (!hasConditional || Conditional > Unconditional) { // Error out if more than one unconditional branch or // the conditional branch appears after the unconditional one. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES); - addErrInfo(errInfo); + reportError( + "unconditional branch cannot precede another branch in packet"); return false; } } @@ -333,31 +414,28 @@ bool HexagonMCChecker::checkBranches() { // Check legal use of predicate registers. bool HexagonMCChecker::checkPredicates() { - HexagonMCErrInfo errInfo; // Check for proper use of new predicate registers. - for (const auto& I : NewPreds) { + for (const auto &I : NewPreds) { unsigned P = I; if (!Defs.count(P) || LatePreds.count(P)) { // Error out if the new predicate register is not defined, // or defined "late" // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }"). - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P); - addErrInfo(errInfo); + reportErrorNewValue(P); return false; } } // Check for proper use of auto-anded of predicate registers. - for (const auto& I : LatePreds) { + for (const auto &I : LatePreds) { unsigned P = I; if (LatePreds.count(P) > 1 || Defs.count(P)) { // Error out if predicate register defined "late" multiple times or // defined late and regularly defined // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }". - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P); - addErrInfo(errInfo); + reportErrorRegisters(P); return false; } } @@ -367,15 +445,12 @@ bool HexagonMCChecker::checkPredicates() { // Check legal use of new values. bool HexagonMCChecker::checkNewValues() { - HexagonMCErrInfo errInfo; - memset(&errInfo, 0, sizeof(errInfo)); - for (auto& I : NewUses) { + for (auto &I : NewUses) { unsigned R = I.first; NewSense &US = I.second; if (!hasValidNewValueDef(US, NewDefs[R])) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R); - addErrInfo(errInfo); + reportErrorNewValue(R); return false; } } @@ -383,25 +458,61 @@ bool HexagonMCChecker::checkNewValues() { return true; } +bool HexagonMCChecker::checkRegistersReadOnly() { + for (auto I : HexagonMCInstrInfo::bundleInstructions(MCB)) { + MCInst const &Inst = *I.getInst(); + unsigned Defs = HexagonMCInstrInfo::getDesc(MCII, Inst).getNumDefs(); + for (unsigned j = 0; j < Defs; ++j) { + MCOperand const &Operand = Inst.getOperand(j); + assert(Operand.isReg() && "Def is not a register"); + unsigned Register = Operand.getReg(); + if (ReadOnly.find(Register) != ReadOnly.end()) { + reportError(Inst.getLoc(), "Cannot write to read-only register `" + + llvm::Twine(RI.getName(Register)) + "'"); + return false; + } + } + } + return true; +} + +bool HexagonMCChecker::registerUsed(unsigned Register) { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) + for (unsigned j = HexagonMCInstrInfo::getDesc(MCII, I).getNumDefs(), + n = I.getNumOperands(); + j < n; ++j) { + MCOperand const &Operand = I.getOperand(j); + if (Operand.isReg() && Operand.getReg() == Register) + return true; + } + return false; +} + +void HexagonMCChecker::checkRegisterCurDefs() { + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (HexagonMCInstrInfo::isCVINew(MCII, I) && + HexagonMCInstrInfo::getDesc(MCII, I).mayLoad()) { + unsigned Register = I.getOperand(0).getReg(); + if (!registerUsed(Register)) + reportWarning("Register `" + llvm::Twine(RI.getName(Register)) + + "' used with `.cur' " + "but not used in the same packet"); + } + } +} + // Check for legal register uses and definitions. bool HexagonMCChecker::checkRegisters() { - HexagonMCErrInfo errInfo; // Check for proper register definitions. - for (const auto& I : Defs) { + for (const auto &I : Defs) { unsigned R = I.first; - if (ReadOnly.count(R)) { - // Error out for definitions of read-only registers. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R); - addErrInfo(errInfo); - return false; - } if (isLoopRegister(R) && Defs.count(R) > 1 && (HexagonMCInstrInfo::isInnerLoop(MCB) || HexagonMCInstrInfo::isOuterLoop(MCB))) { // Error out for definitions of loop registers at the end of a loop. - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R); - addErrInfo(errInfo); + reportError("loop-setup and some branch instructions " + "cannot be in the same packet"); return false; } if (SoftDefs.count(R)) { @@ -409,8 +520,7 @@ bool HexagonMCChecker::checkRegisters() { // (e.g., "{ usr = r0; r0 = sfadd(...) }"). unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:. unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); - addErrInfo(errInfo); + reportErrorRegisters(BadR); return false; } if (!isPredicateRegister(R) && Defs[R].size() > 1) { @@ -423,20 +533,18 @@ bool HexagonMCChecker::checkRegisters() { // changes, conditional or not. unsigned UsrR = Hexagon::USR; unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R; - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR); - addErrInfo(errInfo); + reportErrorRegisters(BadR); return false; } // Check for multiple conditional register definitions. - for (const auto& J : PM) { + for (const auto &J : PM) { PredSense P = J; // Check for multiple uses of the same condition. if (PM.count(P) > 1) { // Error out on conditional changes based on the same predicate // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }"). - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); - addErrInfo(errInfo); + reportErrorRegisters(R); return false; } // Check for the use of the complementary condition. @@ -444,44 +552,33 @@ bool HexagonMCChecker::checkRegisters() { if (PM.count(P) && PM.size() > 2) { // Error out on conditional changes based on the same predicate // multiple times - // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }"). - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R); - addErrInfo(errInfo); + // (e.g., "if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =..."). + reportErrorRegisters(R); return false; } } } } - // Check for use of current definitions. - for (const auto& I : CurDefs) { - unsigned R = I; - - if (!Uses.count(R)) { - // Warn on an unused current definition. - errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R); - addErrInfo(errInfo); - return true; - } - } - // Check for use of temporary definitions. - for (const auto& I : TmpDefs) { + for (const auto &I : TmpDefs) { unsigned R = I; if (!Uses.count(R)) { // special case for vhist bool vHistFound = false; - for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { - if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) { - vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp + for (auto const &HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) { + if (llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == + HexagonII::TypeCVI_HIST) { + vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp break; } } // Warn on an unused temporary definition. if (vHistFound == false) { - errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R); - addErrInfo(errInfo); + reportWarning("register `" + llvm::Twine(RI.getName(R)) + + "' used with `.tmp' " + "but not used in the same packet"); return true; } } @@ -492,45 +589,25 @@ bool HexagonMCChecker::checkRegisters() { // Check for legal use of solo insns. bool HexagonMCChecker::checkSolo() { - HexagonMCErrInfo errInfo; - if (HexagonMCInstrInfo::isBundle(MCB) && - HexagonMCInstrInfo::bundleSize(MCB) > 1) { - for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) { - if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO); - addErrInfo(errInfo); + if (HexagonMCInstrInfo::bundleSize(MCB) > 1) + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) { + if (llvm::HexagonMCInstrInfo::isSolo(MCII, I)) { + reportError(I.getLoc(), "Instruction is marked `isSolo' and " + "cannot have other instructions in " + "the same packet"); return false; } } - } return true; } bool HexagonMCChecker::checkShuffle() { - HexagonMCErrInfo errInfo; - // Branch info is lost when duplexing. The unduplexed insns must be - // checked and only branch errors matter for this case. - HexagonMCShuffler MCS(true, MCII, STI, MCB); - if (!MCS.check()) { - if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); - errInfo.setShuffleError(MCS.getError()); - addErrInfo(errInfo); - return false; - } - } - HexagonMCShuffler MCSDX(true, MCII, STI, MCBDX); - if (!MCSDX.check()) { - errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE); - errInfo.setShuffleError(MCSDX.getError()); - addErrInfo(errInfo); - return false; - } - return true; + HexagonMCShuffler MCSDX(Context, ReportErrors, MCII, STI, MCB); + return MCSDX.check(); } -void HexagonMCChecker::compoundRegisterMap(unsigned& Register) { +void HexagonMCChecker::compoundRegisterMap(unsigned &Register) { switch (Register) { default: break; @@ -562,7 +639,7 @@ void HexagonMCChecker::compoundRegisterMap(unsigned& Register) { } bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, - const NewSenseList &Defs) const { + const NewSenseList &Defs) const { bool Strict = !RelaxNVChecks; for (unsigned i = 0, n = Defs.size(); i < n; ++i) { @@ -590,3 +667,30 @@ bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use, return false; } +void HexagonMCChecker::reportErrorRegisters(unsigned Register) { + reportError("register `" + llvm::Twine(RI.getName(Register)) + + "' modified more than once"); +} + +void HexagonMCChecker::reportErrorNewValue(unsigned Register) { + reportError("register `" + llvm::Twine(RI.getName(Register)) + + "' used with `.new' " + "but not validly modified in the same packet"); +} + +void HexagonMCChecker::reportError(llvm::Twine const &Msg) { + reportError(MCB.getLoc(), Msg); +} + +void HexagonMCChecker::reportError(SMLoc Loc, llvm::Twine const &Msg) { + if (ReportErrors) + Context.reportError(Loc, Msg); +} + +void HexagonMCChecker::reportWarning(llvm::Twine const &Msg) { + if (ReportErrors) { + auto SM = Context.getSourceManager(); + if (SM) + SM->PrintMessage(MCB.getLoc(), SourceMgr::DK_Warning, Msg); + } +} diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index c3b3d4c14c8812089a0769a60e1c4b8710981e41..027f78b4899ce55d72579fdf7e0896fa6058ebb8 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -24,59 +24,14 @@ using namespace llvm; namespace llvm { class MCOperandInfo; -typedef struct { - unsigned Error, Warning, ShuffleError; - unsigned Register; -} ErrInfo_T; - -class HexagonMCErrInfo { -public: - enum { - CHECK_SUCCESS = 0, - // Errors. - CHECK_ERROR_BRANCHES = 0x00001, - CHECK_ERROR_NEWP = 0x00002, - CHECK_ERROR_NEWV = 0x00004, - CHECK_ERROR_REGISTERS = 0x00008, - CHECK_ERROR_READONLY = 0x00010, - CHECK_ERROR_LOOP = 0x00020, - CHECK_ERROR_ENDLOOP = 0x00040, - CHECK_ERROR_SOLO = 0x00080, - CHECK_ERROR_SHUFFLE = 0x00100, - CHECK_ERROR_NOSLOTS = 0x00200, - CHECK_ERROR_UNKNOWN = 0x00400, - // Warnings. - CHECK_WARN_CURRENT = 0x10000, - CHECK_WARN_TEMPORARY = 0x20000 - }; - ErrInfo_T s; - - void reset() { - s.Error = CHECK_SUCCESS; - s.Warning = CHECK_SUCCESS; - s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS; - s.Register = Hexagon::NoRegister; - }; - HexagonMCErrInfo() { - reset(); - }; - - void setError(unsigned e, unsigned r = Hexagon::NoRegister) - { s.Error = e; s.Register = r; }; - void setWarning(unsigned w, unsigned r = Hexagon::NoRegister) - { s.Warning = w; s.Register = r; }; - void setShuffleError(unsigned e) { s.ShuffleError = e; }; -}; - /// Check for a valid bundle. class HexagonMCChecker { - /// Insn bundle. - MCInst& MCB; - MCInst& MCBDX; - const MCRegisterInfo& RI; + MCContext &Context; + MCInst &MCB; + const MCRegisterInfo &RI; MCInstrInfo const &MCII; MCSubtargetInfo const &STI; - bool bLoadErrInfo; + bool ReportErrors; /// Set of definitions: register #, if predicated, if predicated true. typedef std::pair PredSense; @@ -99,23 +54,23 @@ class HexagonMCChecker { bool IsFloat, IsNVJ, Cond; // The special-case "constructors": static NewSense Jmp(bool isNVJ) { - NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ, - /*Cond=*/ false }; + NewSense NS = {/*PredReg=*/0, /*IsFloat=*/false, /*IsNVJ=*/isNVJ, + /*Cond=*/false}; return NS; } static NewSense Use(unsigned PR, bool True) { - NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false, - /*Cond=*/ True }; + NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/false, /*IsNVJ=*/false, + /*Cond=*/True}; return NS; } static NewSense Def(unsigned PR, bool True, bool Float) { - NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false, - /*Cond=*/ True }; + NewSense NS = {/*PredReg=*/PR, /*IsFloat=*/Float, /*IsNVJ=*/false, + /*Cond=*/True}; return NS; } }; /// Set of definitions that produce new register: - typedef llvm::SmallVector NewSenseList; + typedef llvm::SmallVector NewSenseList; typedef llvm::DenseMap::iterator NewDefsIterator; llvm::DenseMap NewDefs; @@ -123,10 +78,6 @@ class HexagonMCChecker { typedef std::set::iterator SoftDefsIterator; std::set SoftDefs; - /// Set of current definitions committed to the register file. - typedef std::set::iterator CurDefsIterator; - std::set CurDefs; - /// Set of temporary definitions not committed to the register file. typedef std::set::iterator TmpDefsIterator; std::set TmpDefs; @@ -151,69 +102,51 @@ class HexagonMCChecker { typedef std::set::iterator ReadOnlyIterator; std::set ReadOnly; - std::queue ErrInfoQ; - HexagonMCErrInfo CrntErrInfo; - - void getErrInfo() { - if (bLoadErrInfo == true) { - if (ErrInfoQ.empty()) { - CrntErrInfo.reset(); - } else { - CrntErrInfo.s = ErrInfoQ.front(); - ErrInfoQ.pop(); - } - } - bLoadErrInfo = false; - } - void init(); - void init(MCInst const&); + void init(MCInst const &); void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue); + bool registerUsed(unsigned Register); + // Checks performed. bool checkBranches(); bool checkPredicates(); bool checkNewValues(); bool checkRegisters(); + bool checkRegistersReadOnly(); + bool checkEndloopBranches(); + void checkRegisterCurDefs(); bool checkSolo(); bool checkShuffle(); bool checkSlots(); - bool checkSize(); + bool checkAXOK(); - static void compoundRegisterMap(unsigned&); + static void compoundRegisterMap(unsigned &); bool isPredicateRegister(unsigned R) const { - return (Hexagon::P0 == R || Hexagon::P1 == R || - Hexagon::P2 == R || Hexagon::P3 == R); + return (Hexagon::P0 == R || Hexagon::P1 == R || Hexagon::P2 == R || + Hexagon::P3 == R); }; bool isLoopRegister(unsigned R) const { - return (Hexagon::SA0 == R || Hexagon::LC0 == R || - Hexagon::SA1 == R || Hexagon::LC1 == R); + return (Hexagon::SA0 == R || Hexagon::LC0 == R || Hexagon::SA1 == R || + Hexagon::LC1 == R); }; - bool hasValidNewValueDef(const NewSense &Use, - const NewSenseList &Defs) const; + bool hasValidNewValueDef(const NewSense &Use, const NewSenseList &Defs) const; - public: - explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx, - const MCRegisterInfo& ri); +public: + explicit HexagonMCChecker(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &mcb, + const MCRegisterInfo &ri, bool ReportErrors = true); bool check(bool FullCheck = true); - - /// add a new error/warning - void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); }; - - /// Return the error code for the last operation in the insn bundle. - unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; }; - unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; }; - unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; }; - unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; }; - bool getNextErrInfo() { - bLoadErrInfo = true; - return (ErrInfoQ.empty()) ? false : (getErrInfo(), true); - } + void reportErrorRegisters(unsigned Register); + void reportErrorNewValue(unsigned Register); + void reportError(SMLoc Loc, llvm::Twine const &Msg); + void reportError(llvm::Twine const &Msg); + void reportWarning(llvm::Twine const &Msg); }; -} +} // namespace llvm #endif // HEXAGONMCCHECKER_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index c0956520de738a6b6eacbc010bc8a0dd1c6c5032..50f00d1aaeacf21438ee04af3bcab564c67acec5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonFixupKinds.h" -#include "MCTargetDesc/HexagonMCCodeEmitter.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/Statistic.h" @@ -199,6 +199,11 @@ Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits( return Hexagon::fixup_Hexagon_IE_GOT_32_6_X; case MCSymbolRefExpr::VK_Hexagon_PCREL: return Hexagon::fixup_Hexagon_B32_PCREL_X; + case MCSymbolRefExpr::VK_Hexagon_GD_PLT: + return Hexagon::fixup_Hexagon_GD_PLT_B32_PCREL_X; + case MCSymbolRefExpr::VK_Hexagon_LD_PLT: + return Hexagon::fixup_Hexagon_LD_PLT_B32_PCREL_X; + case MCSymbolRefExpr::VK_None: { auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) { @@ -318,6 +323,8 @@ namespace { case fixup_Hexagon_PLT_B22_PCREL: case fixup_Hexagon_GD_PLT_B22_PCREL: case fixup_Hexagon_LD_PLT_B22_PCREL: + case fixup_Hexagon_GD_PLT_B22_PCREL_X: + case fixup_Hexagon_LD_PLT_B22_PCREL_X: case fixup_Hexagon_6_PCREL_X: return true; default: @@ -414,10 +421,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, case 22: switch (kind) { case MCSymbolRefExpr::VK_Hexagon_GD_PLT: - FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; + FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X + : Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; break; case MCSymbolRefExpr::VK_Hexagon_LD_PLT: - FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; + FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X + : Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; break; case MCSymbolRefExpr::VK_None: FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X @@ -467,8 +476,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, } else switch (kind) { case MCSymbolRefExpr::VK_None: { - if (HexagonMCInstrInfo::s23_2_reloc(*MO.getExpr())) - FixupKind = Hexagon::fixup_Hexagon_23_REG; + if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr())) + FixupKind = Hexagon::fixup_Hexagon_27_REG; else if (MCID.mayStore() || MCID.mayLoad()) { for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; @@ -593,6 +602,12 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, case MCSymbolRefExpr::VK_Hexagon_LD_GOT: FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X; break; + case MCSymbolRefExpr::VK_Hexagon_GD_PLT: + FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X; + break; + case MCSymbolRefExpr::VK_Hexagon_LD_PLT: + FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X; + break; case MCSymbolRefExpr::VK_None: FixupKind = Hexagon::fixup_Hexagon_11_X; break; @@ -773,14 +788,6 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, if (HexagonMCInstrInfo::isSubInstruction(MI) || llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCJ) return HexagonMCInstrInfo::getDuplexRegisterNumbering(Reg); - switch(MI.getOpcode()){ - case Hexagon::A2_tfrrcr: - case Hexagon::A2_tfrcrr: - if(Reg == Hexagon::M0) - Reg = Hexagon::C6; - if(Reg == Hexagon::M1) - Reg = Hexagon::C7; - } return MCT.getRegisterInfo()->getEncodingValue(Reg); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index ffa980ca6563cd6c9d975c44ecee24dbc7e8b06d..127c97e342dce5f3eb4f1b6e48fb98b372e2c311 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -406,7 +406,7 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co if (MCI.size() < 2) return; - bool StartedValid = llvm::HexagonMCShuffle(false, MCII, STI, MCI); + bool StartedValid = llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI); // Create a vector, needed to keep the order of jump instructions. MCInst CheckList(MCI); @@ -420,8 +420,9 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co // Need to update the bundle. MCI = CheckList; - if (StartedValid && !llvm::HexagonMCShuffle(false, MCII, STI, MCI)) { - DEBUG(dbgs() << "Found ERROR\n"); + if (StartedValid && + !llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI)) { + DEBUG(dbgs() << "Found ERROR\n"); MCI = OriginalBundle; } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 09819ccedd8fb4007f57fe33448d8a2913a97cdb..47007e08a2ff94ec4c1da597e64c89eee8998958 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -29,7 +30,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include @@ -44,7 +44,7 @@ static cl::opt GPSize cl::init(8)); void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, bool) { assert(MCB.getOpcode() == Hexagon::BUNDLE); assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE); assert(HexagonMCInstrInfo::bundleSize(MCB) > 0); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h index 5cb84a48a3136dbb2c7d7b8a72314d8c8cb63311..024dff1a2f97c3876e4605bf975e780afffcb842 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h @@ -34,7 +34,8 @@ public: MCELFStreamer(Context, TAB, OS, Emitter), MCII (createHexagonMCInstrInfo()) {} - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override; void EmitSymbol(const MCInst &Inst); void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp index 14300edc7e1b295bed426649c96d9b49ba2f3c68..9fbe299d7d520a4bfdcc286fc343671e7b8cfe18 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp @@ -94,9 +94,9 @@ void HexagonMCExpr::setMustNotExtend(bool Val) { } bool HexagonMCExpr::mustNotExtend() const { return MustNotExtend; } -bool HexagonMCExpr::s23_2_reloc() const { return S23_2_reloc; } -void HexagonMCExpr::setS23_2_reloc(bool Val) { - S23_2_reloc = Val; +bool HexagonMCExpr::s27_2_reloc() const { return S27_2_reloc; } +void HexagonMCExpr::setS27_2_reloc(bool Val) { + S27_2_reloc = Val; } bool HexagonMCExpr::classof(MCExpr const *E) { @@ -104,7 +104,7 @@ bool HexagonMCExpr::classof(MCExpr const *E) { } HexagonMCExpr::HexagonMCExpr(MCExpr const *Expr) - : Expr(Expr), MustNotExtend(false), MustExtend(false), S23_2_reloc(false), + : Expr(Expr), MustNotExtend(false), MustExtend(false), S27_2_reloc(false), SignMismatch(false) {} void HexagonMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h index bca40cfaf6f4dfb717b1c193642821b63b9b525b..acfd996ccf82c74dd8634a51075797ae21a013af 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h @@ -29,8 +29,8 @@ public: bool mustExtend() const; void setMustNotExtend(bool Val = true); bool mustNotExtend() const; - void setS23_2_reloc(bool Val = true); - bool s23_2_reloc() const; + void setS27_2_reloc(bool Val = true); + bool s27_2_reloc() const; void setSignMismatch(bool Val = true); bool signMismatch() const; @@ -39,7 +39,7 @@ private: MCExpr const *Expr; bool MustNotExtend; bool MustExtend; - bool S23_2_reloc; + bool S27_2_reloc; bool SignMismatch; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 553ffba508a1ffffff5c23e6a28078cf368eed53..5fe638a9996b08af9181a7dc59bce6ac8aa2d5e1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -22,6 +22,49 @@ #include "llvm/MC/MCSubtargetInfo.h" namespace llvm { + +Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII, + MCInst const &Inst) + : MCII(MCII), BundleCurrent(Inst.begin() + + HexagonMCInstrInfo::bundleInstructionsOffset), + BundleEnd(Inst.end()), DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {} + +Hexagon::PacketIterator::PacketIterator(MCInstrInfo const &MCII, + MCInst const &Inst, std::nullptr_t) + : MCII(MCII), BundleCurrent(Inst.end()), BundleEnd(Inst.end()), + DuplexCurrent(Inst.end()), DuplexEnd(Inst.end()) {} + +Hexagon::PacketIterator &Hexagon::PacketIterator::operator++() { + if (DuplexCurrent != DuplexEnd) { + ++DuplexCurrent; + if (DuplexCurrent == DuplexEnd) { + DuplexCurrent = BundleEnd; + DuplexEnd = BundleEnd; + } + return *this; + } + ++BundleCurrent; + if (BundleCurrent != BundleEnd) { + MCInst const &Inst = *BundleCurrent->getInst(); + if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) { + DuplexCurrent = Inst.begin(); + DuplexEnd = Inst.end(); + } + } + return *this; +} + +MCInst const &Hexagon::PacketIterator::operator*() const { + if (DuplexCurrent != DuplexEnd) + return *DuplexCurrent->getInst(); + return *BundleCurrent->getInst(); +} + +bool Hexagon::PacketIterator::operator==(PacketIterator const &Other) const { + return BundleCurrent == Other.BundleCurrent && BundleEnd == Other.BundleEnd && + DuplexCurrent == Other.DuplexCurrent && DuplexEnd == Other.DuplexEnd; +} + void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value, MCContext &Context) { MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context))); @@ -41,6 +84,14 @@ void HexagonMCInstrInfo::addConstExtender(MCContext &Context, MCB.addOperand(MCOperand::createInst(XMCI)); } +iterator_range +HexagonMCInstrInfo::bundleInstructions(MCInstrInfo const &MCII, + MCInst const &MCI) { + assert(isBundle(MCI)); + return make_range(Hexagon::PacketIterator(MCII, MCI), + Hexagon::PacketIterator(MCII, MCI, nullptr)); +} + iterator_range HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { assert(isBundle(MCI)); @@ -66,7 +117,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, // instructions when possible. if (!HexagonDisableCompound) HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB); - HexagonMCShuffle(false, MCII, STI, MCB); + HexagonMCShuffle(Context, false, MCII, STI, MCB); // Examine the packet and convert pairs of instructions to duplex // instructions when possible. MCInst InstBundlePreDuplex = MCInst(MCB); @@ -74,7 +125,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, SmallVector possibleDuplexes; possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB); - HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes); + HexagonMCShuffle(Context, MCII, STI, MCB, possibleDuplexes); } // Examines packet and pad the packet, if needed, when an // end-loop is in the bundle. @@ -87,7 +138,7 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, CheckOk = Check ? Check->check(true) : true; if (!CheckOk) return false; - HexagonMCShuffle(true, MCII, STI, MCB); + HexagonMCShuffle(Context, true, MCII, STI, MCB); return true; } @@ -292,7 +343,7 @@ int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, } StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII, - MCInst const &MCI) { + MCInst const &MCI) { return MCII.getName(MCI.getOpcode()); } @@ -339,25 +390,6 @@ unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII, return ((F >> HexagonII::TypePos) & HexagonII::TypeMask); } -int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII, - MCInst const &MCI) { - const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - - HexagonII::SubTarget Target = static_cast( - (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask); - - switch (Target) { - default: - return Hexagon::ArchV4; - case HexagonII::HasV5SubT: - return Hexagon::ArchV5; - case HexagonII::HasV55SubT: - return Hexagon::ArchV55; - case HexagonII::HasV60SubT: - return Hexagon::ArchV60; - } -} - /// Return the slots this instruction can execute out of unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, @@ -397,9 +429,8 @@ bool HexagonMCInstrInfo::hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; - for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { - auto MI = I.getInst(); - if (HexagonMCInstrInfo::isDuplex(MCII, *MI)) + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCI)) { + if (HexagonMCInstrInfo::isDuplex(MCII, I)) return true; } @@ -410,13 +441,12 @@ bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) { return extenderForIndex(MCB, Index) != nullptr; } -bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) { +bool HexagonMCInstrInfo::hasImmExt( MCInst const &MCI) { if (!HexagonMCInstrInfo::isBundle(MCI)) return false; for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) { - auto MI = I.getInst(); - if (isImmext(*MI)) + if (isImmext(*I.getInst())) return true; } @@ -505,6 +535,11 @@ bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII, return (getType(MCII, MCI) == HexagonII::TypeCJ); } +bool HexagonMCInstrInfo::isCVINew(MCInstrInfo const &MCII, MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return ((F >> HexagonII::CVINewPos) & HexagonII::CVINewMask); +} + bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) { return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) || (Reg >= Hexagon::D8 && Reg <= Hexagon::D11)); @@ -732,16 +767,16 @@ bool HexagonMCInstrInfo::mustNotExtend(MCExpr const &Expr) { HexagonMCExpr const &HExpr = cast(Expr); return HExpr.mustNotExtend(); } -void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) { +void HexagonMCInstrInfo::setS27_2_reloc(MCExpr const &Expr, bool Val) { HexagonMCExpr &HExpr = const_cast(*llvm::cast(&Expr)); - HExpr.setS23_2_reloc(Val); + HExpr.setS27_2_reloc(Val); } -bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) { +bool HexagonMCInstrInfo::s27_2_reloc(MCExpr const &Expr) { HexagonMCExpr const *HExpr = llvm::dyn_cast(&Expr); if (!HExpr) return false; - return HExpr->s23_2_reloc(); + return HExpr->s27_2_reloc(); } void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) { @@ -813,4 +848,4 @@ unsigned HexagonMCInstrInfo::SubregisterBit(unsigned Consumer, return 0x1; return 0; } -} +} // namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 2e989adb5ccbf0830b73a5128934edec325b7110..ca44c3a11ba7ff78b8ed4f2de256ce5b5a3c04bd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -31,6 +31,25 @@ public: DuplexCandidate(unsigned i, unsigned j, unsigned iClass) : packetIndexI(i), packetIndexJ(j), iClass(iClass) {} }; +namespace Hexagon { +class PacketIterator { + MCInstrInfo const &MCII; + MCInst::const_iterator BundleCurrent; + MCInst::const_iterator BundleEnd; + MCInst::const_iterator DuplexCurrent; + MCInst::const_iterator DuplexEnd; + +public: + PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst); + PacketIterator(MCInstrInfo const &MCII, MCInst const &Inst, std::nullptr_t); + PacketIterator &operator++(); + MCInst const &operator*() const; + bool operator==(PacketIterator const &Other) const; + bool operator!=(PacketIterator const &Other) const { + return !(*this == Other); + } +}; +} // namespace Hexagon namespace HexagonMCInstrInfo { size_t const innerLoopOffset = 0; int64_t const innerLoopMask = 1 << innerLoopOffset; @@ -54,6 +73,8 @@ void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, MCInst const &MCI); // Returns a iterator range of instructions in this bundle +iterator_range +bundleInstructions(MCInstrInfo const &MCII, MCInst const &MCI); iterator_range bundleInstructions(MCInst const &MCI); // Returns the number of instructions in the bundle @@ -131,7 +152,6 @@ MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI); unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI); MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII, MCInst const &MCI); -int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI); // Return the Hexagon ISA class for the insn. unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI); @@ -180,6 +200,7 @@ bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI); // Return whether the instruction needs to be constant extended. bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI); +bool isCVINew(MCInstrInfo const &MCII, MCInst const &MCI); // Is this double register suitable for use in a duplex subinst bool isDblRegForSubInst(unsigned Reg); @@ -262,14 +283,14 @@ bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI); // Replace the instructions inside MCB, represented by Candidate void replaceDuplex(MCContext &Context, MCInst &MCI, DuplexCandidate Candidate); -bool s23_2_reloc(MCExpr const &Expr); +bool s27_2_reloc(MCExpr const &Expr); // Marks a bundle as endloop0 void setInnerLoop(MCInst &MCI); void setMemReorderDisabled(MCInst &MCI); void setMemStoreReorderEnabled(MCInst &MCI); void setMustExtend(MCExpr const &Expr, bool Val = true); void setMustNotExtend(MCExpr const &Expr, bool Val = true); -void setS23_2_reloc(MCExpr const &Expr, bool Val = true); +void setS27_2_reloc(MCExpr const &Expr, bool Val = true); // Marks a bundle as endloop1 void setOuterLoop(MCInst &MCI); @@ -282,7 +303,7 @@ unsigned SubregisterBit(unsigned Consumer, unsigned Producer, // Attempt to find and replace compound pairs void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCContext &Context, MCInst &MCI); -} -} +} // namespace HexagonMCInstrInfo +} // namespace llvm #endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index 529a5fd5ed82c09606d6b90b52eaafc80c53c36b..b2c7f1569380cf01f68d74cd496882d3bd25f1a2 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "hexagon-shuffle" +#include "MCTargetDesc/HexagonMCShuffler.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -45,6 +45,7 @@ void HexagonMCShuffler::init(MCInst &MCB) { } } + Loc = MCB.getLoc(); BundleFlags = MCB.getOperand(0).getImm(); } @@ -68,12 +69,14 @@ void HexagonMCShuffler::init(MCInst &MCB, MCInst const &AddMI, append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, AddMI)); } + Loc = MCB.getLoc(); BundleFlags = MCB.getOperand(0).getImm(); } void HexagonMCShuffler::copyTo(MCInst &MCB) { MCB.clear(); MCB.addOperand(MCOperand::createImm(BundleFlags)); + MCB.setLoc(Loc); // Copy the results into the bundle. for (HexagonShuffler::iterator I = begin(); I != end(); ++I) { @@ -89,15 +92,16 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) { if (shuffle()) { // Copy the results into the bundle. copyTo(MCB); - } else - DEBUG(MCB.dump()); - - return (!getError()); + return true; + } + DEBUG(MCB.dump()); + return false; } -bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII, - MCSubtargetInfo const &STI, MCInst &MCB) { - HexagonMCShuffler MCS(true, MCII, STI, MCB); +bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal, + MCInstrInfo const &MCII, MCSubtargetInfo const &STI, + MCInst &MCB) { + HexagonMCShuffler MCS(Context, Fatal, MCII, STI, MCB); if (DisableShuffle) // Ignore if user chose so. @@ -117,52 +121,16 @@ bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII, return false; } - // Reorder the bundle and copy the result. - if (!MCS.reshuffleTo(MCB)) { - // Unless there is any error, which should not happen at this point. - unsigned shuffleError = MCS.getError(); - - if (!Fatal && (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS)) - return false; - if (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS) { - errs() << "\nFailing packet:\n"; - for (const auto& I : HexagonMCInstrInfo::bundleInstructions(MCB)) { - MCInst *MI = const_cast(I.getInst()); - errs() << HexagonMCInstrInfo::getName(MCII, *MI) << ' ' << HexagonMCInstrInfo::getDesc(MCII, *MI).getOpcode() << '\n'; - } - errs() << '\n'; - } - - switch (shuffleError) { - default: - llvm_unreachable("unknown error"); - case HexagonShuffler::SHUFFLE_ERROR_INVALID: - llvm_unreachable("invalid packet"); - case HexagonShuffler::SHUFFLE_ERROR_STORES: - llvm_unreachable("too many stores"); - case HexagonShuffler::SHUFFLE_ERROR_LOADS: - llvm_unreachable("too many loads"); - case HexagonShuffler::SHUFFLE_ERROR_BRANCHES: - llvm_unreachable("too many branches"); - case HexagonShuffler::SHUFFLE_ERROR_NOSLOTS: - llvm_unreachable("no suitable slot"); - case HexagonShuffler::SHUFFLE_ERROR_SLOTS: - llvm_unreachable("over-subscribed slots"); - case HexagonShuffler::SHUFFLE_SUCCESS: // Single instruction case. - return true; - } - } - - return true; + return MCS.reshuffleTo(MCB); } -unsigned -llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCContext &Context, MCInst &MCB, +bool +llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &MCB, SmallVector possibleDuplexes) { if (DisableShuffle) - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; if (!HexagonMCInstrInfo::bundleSize(MCB)) { // There once was a bundle: @@ -172,46 +140,44 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, // After the IMPLICIT_DEFs were removed by the asm printer, the bundle // became empty. DEBUG(dbgs() << "Skipping empty bundle"); - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } else if (!HexagonMCInstrInfo::isBundle(MCB)) { DEBUG(dbgs() << "Skipping stand-alone insn"); - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } bool doneShuffling = false; - unsigned shuffleError; while (possibleDuplexes.size() > 0 && (!doneShuffling)) { // case of Duplex Found DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val(); MCInst Attempt(MCB); HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry); - HexagonMCShuffler MCS(true, MCII, STI, Attempt); // copy packet to the shuffler + HexagonMCShuffler MCS(Context, false, MCII, STI, Attempt); // copy packet to the shuffler if (MCS.size() == 1) { // case of one duplex // copy the created duplex in the shuffler to the bundle MCS.copyTo(MCB); - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } // try shuffle with this duplex doneShuffling = MCS.reshuffleTo(MCB); - shuffleError = MCS.getError(); if (doneShuffling) break; } if (doneShuffling == false) { - HexagonMCShuffler MCS(true, MCII, STI, MCB); + HexagonMCShuffler MCS(Context, false, MCII, STI, MCB); doneShuffling = MCS.reshuffleTo(MCB); // shuffle - shuffleError = MCS.getError(); } if (!doneShuffling) - return shuffleError; + return true; - return HexagonShuffler::SHUFFLE_SUCCESS; + return false; } -bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCInst &MCB, MCInst const &AddMI, int fixupCount) { +bool llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &MCB, + MCInst const &AddMI, int fixupCount) { if (!HexagonMCInstrInfo::isBundle(MCB)) return false; @@ -246,16 +212,6 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, if (bhasDuplex && bundleSize >= maxBundleSize) return false; - HexagonMCShuffler MCS(MCII, STI, MCB, AddMI, false); - if (!MCS.reshuffleTo(MCB)) { - unsigned shuffleError = MCS.getError(); - switch (shuffleError) { - default: - return false; - case HexagonShuffler::SHUFFLE_SUCCESS: // single instruction case - return true; - } - } - - return true; + HexagonMCShuffler MCS(Context, false, MCII, STI, MCB, AddMI, false); + return MCS.reshuffleTo(MCB); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h index 14bbfda4c914a8ab191effae0ca4b9d9f9929e20..dbe85b434dc4d759269e65b78caf0af3ea1af26f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h @@ -18,24 +18,19 @@ #include "MCTargetDesc/HexagonShuffler.h" namespace llvm { - class MCInst; - // Insn bundle shuffler. class HexagonMCShuffler : public HexagonShuffler { - bool immext_present; - bool duplex_present; - public: - HexagonMCShuffler(bool Fatal, MCInstrInfo const &MCII, + HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &MCB) - : HexagonShuffler(MCII, STI) { + : HexagonShuffler(Context, Fatal, MCII, STI) { init(MCB); }; - HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCInst &MCB, MCInst const &AddMI, - bool InsertAtFront) - : HexagonShuffler(MCII, STI) { + HexagonMCShuffler(MCContext &Context, bool Fatal, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &MCB, + MCInst const &AddMI, bool InsertAtFront) + : HexagonShuffler(Context, Fatal, MCII, STI) { init(MCB, AddMI, InsertAtFront); }; @@ -44,22 +39,20 @@ public: // Reorder and copy result to another. bool reshuffleTo(MCInst &MCB); - bool immextPresent() const { return immext_present; }; - bool duplexPresent() const { return duplex_present; }; - private: void init(MCInst &MCB); void init(MCInst &MCB, MCInst const &AddMI, bool InsertAtFront); }; // Invocation of the shuffler. -bool HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII, +bool HexagonMCShuffle(MCContext &Context, bool Fatal, MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &); -bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCInst &, MCInst const &, int); -unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, - MCContext &Context, MCInst &, - SmallVector); -} +bool HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &, MCInst const &, + int); +bool HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, + MCSubtargetInfo const &STI, MCInst &, + SmallVector); +} // namespace llvm #endif // HEXAGONMCSHUFFLER_H diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index bb98c2bbef6d7134a28547264550e27a1a0c528e..1a361548f93861970e351fcc09edeefc9cc67e1e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "Hexagon.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCAsmInfo.h" #include "MCTargetDesc/HexagonMCELFStreamer.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" @@ -27,10 +28,9 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 853f76213d38dc5a3ef0c26208e6da3264ae4d98..564d43b45cb87b583fd8ad51903ff5bca4d7b65b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -14,17 +14,18 @@ #define DEBUG_TYPE "hexagon-shuffle" -#include -#include +#include "HexagonShuffler.h" #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" -#include "HexagonShuffler.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; @@ -38,7 +39,7 @@ class HexagonBid { unsigned Bid; public: - HexagonBid() : Bid(0){} + HexagonBid() : Bid(0) {} HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; } // Check if the insn priority is overflowed. @@ -87,7 +88,7 @@ unsigned HexagonResource::setWeight(unsigned s) { // Calculate relative weight of the insn for the given slot, weighing it the // heavier the more restrictive the insn is and the lowest the slots that the // insn may be executed in. - if (Key == 0 || Units == 0 || (SlotWeight*s >= 32)) + if (Key == 0 || Units == 0 || (SlotWeight * s >= 32)) return Weight = 0; unsigned Ctpop = countPopulation(Units); @@ -101,19 +102,18 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) { UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VX_LATE] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); (*TUL)[HexagonII::TypeCVI_VINLANESAT] = - (CPU == "hexagonv60" || CPU == "hexagonv61" || CPU == "hexagonv61v1") ? - UnitsAndLanes(CVI_SHIFT, 1) : - UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (CPU == "hexagonv60") + ? UnitsAndLanes(CVI_SHIFT, 1) + : UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_LD] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); - (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = - UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_VM_ST] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); @@ -154,18 +154,19 @@ typedef SmallVector HVXInstsT; static unsigned makeAllBits(unsigned startBit, unsigned Lanes) { - for (unsigned i = 1 ; i < Lanes ; ++i) + for (unsigned i = 1; i < Lanes; ++i) startBit = (startBit << 1) | startBit; return startBit; } -static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned usedUnits) +static bool checkHVXPipes(const HVXInstsT &hvxInsts, unsigned startIdx, + unsigned usedUnits) { if (startIdx < hvxInsts.size()) { if (!hvxInsts[startIdx].Units) return checkHVXPipes(hvxInsts, startIdx + 1, usedUnits); - for (unsigned b = 0x1 ; b <= 0x8 ; b <<= 1) { + for (unsigned b = 0x1; b <= 0x8; b <<= 1) { if ((hvxInsts[startIdx].Units & b) == 0) continue; unsigned allBits = makeAllBits(b, hvxInsts[startIdx].Lanes); @@ -179,9 +180,10 @@ static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned return true; } -HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, +HexagonShuffler::HexagonShuffler(MCContext &Context, bool ReportErrors, + MCInstrInfo const &MCII, MCSubtargetInfo const &STI) - : MCII(MCII), STI(STI) { + : Context(Context), MCII(MCII), STI(STI), ReportErrors(ReportErrors) { reset(); HexagonCVIResource::SetupTUL(&TUL, STI.getCPU()); } @@ -189,7 +191,6 @@ HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, void HexagonShuffler::reset() { Packet.clear(); BundleFlags = 0; - Error = SHUFFLE_SUCCESS; } void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender, @@ -202,91 +203,31 @@ void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender, static struct { unsigned first; unsigned second; -} jumpSlots[] = { {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1} }; -#define MAX_JUMP_SLOTS (sizeof(jumpSlots)/sizeof(jumpSlots[0])) - -namespace { -bool isDuplexAGroup(unsigned Opcode) { - switch (Opcode) { - case Hexagon::SA1_addi: - case Hexagon::SA1_addrx: - case Hexagon::SA1_addsp: - case Hexagon::SA1_and1: - case Hexagon::SA1_clrf: - case Hexagon::SA1_clrfnew: - case Hexagon::SA1_clrt: - case Hexagon::SA1_clrtnew: - case Hexagon::SA1_cmpeqi: - case Hexagon::SA1_combine0i: - case Hexagon::SA1_combine1i: - case Hexagon::SA1_combine2i: - case Hexagon::SA1_combine3i: - case Hexagon::SA1_combinerz: - case Hexagon::SA1_combinezr: - case Hexagon::SA1_dec: - case Hexagon::SA1_inc: - case Hexagon::SA1_seti: - case Hexagon::SA1_setin1: - case Hexagon::SA1_sxtb: - case Hexagon::SA1_sxth: - case Hexagon::SA1_tfr: - case Hexagon::SA1_zxtb: - case Hexagon::SA1_zxth: - return true; - break; - default: - return false; - } -} - -unsigned countNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) { - unsigned Result = 0; - unsigned Type = HexagonMCInstrInfo::getType(MCII, ID); - if (Type == HexagonII::TypeDUPLEX) { - unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode(); - unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode(); - Result += !isDuplexAGroup(subInst0Opcode); - Result += !isDuplexAGroup(subInst1Opcode); - } else - Result += Type != HexagonII::TypeALU32_2op && - Type != HexagonII::TypeALU32_3op && - Type != HexagonII::TypeALU32_ADDI && - Type != HexagonII::TypeS_2op && - Type != HexagonII::TypeS_3op && - Type != HexagonII::TypeALU64 && - (Type != HexagonII::TypeM || - HexagonMCInstrInfo::isFloat(MCII, ID)); - return Result; -} -} +} jumpSlots[] = {{8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1}}; +#define MAX_JUMP_SLOTS (sizeof(jumpSlots) / sizeof(jumpSlots[0])) /// Check that the packet is legal and enforce relative insn order. bool HexagonShuffler::check() { // Descriptive slot masks. const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, - slotThree = 0x8, //slotFirstJump = 0x8, + slotThree = 0x8, // slotFirstJump = 0x8, slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; // Highest slots for branches and stores used to keep their original order. - //unsigned slotJump = slotFirstJump; + // unsigned slotJump = slotFirstJump; unsigned slotLoadStore = slotFirstLoadStore; // Number of branches, solo branches, indirect branches. unsigned jumps = 0, jump1 = 0; // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; - // Number of HVX loads, HVX stores. - unsigned CVIloads = 0, CVIstores = 0; - // Number of duplex insns, solo insns. - unsigned duplex = 0, solo = 0; - // Number of insns restricting other insns in the packet to A and X types, - // which is neither A or X types. - unsigned onlyAX = 0, neitherAnorX = 0; + // Number of duplex insns + unsigned duplex = 0; // Number of insns restricting other insns in slot #1 to A type. unsigned onlyAin1 = 0; // Number of insns restricting any insn in slot #1, except A2_nop. unsigned onlyNo1 = 0; - unsigned xtypeFloat = 0; unsigned pSlot3Cnt = 0; + unsigned nvstores = 0; unsigned memops = 0; unsigned deallocs = 0; iterator slot3ISJ = end(); @@ -297,13 +238,8 @@ bool HexagonShuffler::check() { for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { MCInst const &ID = ISJ->getDesc(); - if (HexagonMCInstrInfo::isSolo(MCII, ID)) - solo++; - else if (HexagonMCInstrInfo::isSoloAX(MCII, ID)) - onlyAX++; - else if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) - onlyAin1++; - neitherAnorX += countNeitherAnorX(MCII, ID); + if (HexagonMCInstrInfo::isSoloAin1(MCII, ID)) + ++onlyAin1; if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) { ++pSlot3Cnt; slot3ISJ = ISJ; @@ -316,8 +252,6 @@ bool HexagonShuffler::check() { case HexagonII::TypeS_2op: case HexagonII::TypeS_3op: case HexagonII::TypeALU64: - if (HexagonMCInstrInfo::isFloat(MCII, ID)) - ++xtypeFloat; break; case HexagonII::TypeJ: ++jumps; @@ -327,14 +261,11 @@ bool HexagonShuffler::check() { ++onlyNo1; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: - case HexagonII::TypeCVI_VM_CUR_LD: - ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; if (ISJ->Core.getUnits() == slotSingleLoad || - HexagonMCInstrInfo::getType(MCII, ID) == - HexagonII::TypeCVI_VM_VP_LDU) + HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU) ++load0; if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()) { ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. @@ -345,7 +276,6 @@ bool HexagonShuffler::check() { ++onlyNo1; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: - ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; @@ -362,24 +292,23 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeNCJ: ++memory; // NV insns are memory-like. - if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()) { - ++jumps, ++jump1; - foundBranches.push_back(ISJ); - } + ++jumps, ++jump1; + foundBranches.push_back(ISJ); break; case HexagonII::TypeV2LDST: - if(HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) { + if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) { ++loads; ++memory; if (ISJ->Core.getUnits() == slotSingleLoad || - HexagonMCInstrInfo::getType(MCII,ID) == + HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_VP_LDU) ++load0; - } - else { + } else { assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()); ++memory; ++stores; + if (HexagonMCInstrInfo::isNewValue(MCII, ID)) + ++nvstores; } break; case HexagonII::TypeCR: @@ -406,30 +335,37 @@ bool HexagonShuffler::check() { ++jumps; foundBranches.push_back(ISJ); } + if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isReturn()) { + ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + foundBranches.push_back(ISJ); + } + if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isReturn()) { + ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. + foundBranches.push_back(ISJ); + } break; } } } // Check if the packet is legal. - if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || - (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || - (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { - Error = SHUFFLE_ERROR_INVALID; + if ((load0 > 1 || store0 > 1) || + (duplex > 1 || (duplex && memory))) { + reportError(llvm::Twine("invalid instruction packet")); return false; } if (jump1 && jumps > 1) { // Error if single branch with another branch. - Error = SHUFFLE_ERROR_BRANCHES; + reportError(llvm::Twine("too many branches in packet")); return false; } - if (memops && stores > 1) { - Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT; + if ((nvstores || memops) && stores > 1) { + reportError(llvm::Twine("slot 0 instruction does not allow slot 1 store")); return false; } if (deallocs && stores) { - Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT; + reportError(llvm::Twine("slot 0 instruction does not allow slot 1 store")); return false; } @@ -441,7 +377,6 @@ bool HexagonShuffler::check() { if (!ISJ->Core.getUnits()) { // Error if insn may not be executed in any slot. - Error = SHUFFLE_ERROR_UNKNOWN; return false; } @@ -472,7 +407,8 @@ bool HexagonShuffler::check() { else if (stores > 1) { if (slotLoadStore < slotLastLoadStore) { // Error if no more slots available for stores. - Error = SHUFFLE_ERROR_STORES; + reportError( + llvm::Twine("invalid instruction packet: too many stores")); return false; } // Pin the store to the highest slot available to it. @@ -483,7 +419,7 @@ bool HexagonShuffler::check() { } if (store1 && stores > 1) { // Error if a single store with another store. - Error = SHUFFLE_ERROR_STORES; + reportError(llvm::Twine("invalid instruction packet: too many stores")); return false; } } @@ -494,7 +430,7 @@ bool HexagonShuffler::check() { if (!ISJ->Core.getUnits()) { // Error if insn may not be executed in any slot. - Error = SHUFFLE_ERROR_NOSLOTS; + reportError(llvm::Twine("invalid instruction packet: out of slots")); return false; } } @@ -503,12 +439,12 @@ bool HexagonShuffler::check() { bool validateSlots = true; if (jumps > 1) { if (foundBranches.size() > 2) { - Error = SHUFFLE_ERROR_BRANCHES; + reportError(llvm::Twine("too many branches in packet")); return false; } // try all possible choices - for (unsigned int i = 0 ; i < MAX_JUMP_SLOTS ; ++i) { + for (unsigned int i = 0; i < MAX_JUMP_SLOTS; ++i) { // validate first jump with this slot rule if (!(jumpSlots[i].first & foundBranches[0]->Core.getUnits())) continue; @@ -535,18 +471,18 @@ bool HexagonShuffler::check() { if (!bFail) { validateSlots = false; // all good, no need to re-do auction break; - } - else + } else // restore original values Packet = PacketSave; } if (validateSlots == true) { - Error = SHUFFLE_ERROR_NOSLOTS; + reportError(llvm::Twine("invalid instruction packet: out of slots")); return false; } } - if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) { + if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && + slot3ISJ != end()) { validateSlots = true; // save off slot mask of instruction marked with A_PREFER_SLOT3 // and then pin it to slot #3 @@ -582,7 +518,7 @@ bool HexagonShuffler::check() { for (iterator I = begin(); I != end(); ++I) if (!AuctionCore.bid(I->Core.getUnits())) { - Error = SHUFFLE_ERROR_SLOTS; + reportError(llvm::Twine("invalid instruction packet: slot error")); return false; } } @@ -605,12 +541,11 @@ bool HexagonShuffler::check() { startIdx = usedUnits = 0x0; if (checkHVXPipes(hvxInsts, startIdx, usedUnits) == false) { // too many pipes used to be valid - Error = SHUFFLE_ERROR_SLOTS; + reportError(llvm::Twine("invalid instruction packet: slot error")); return false; } } - Error = SHUFFLE_SUCCESS; return true; } @@ -618,12 +553,13 @@ bool HexagonShuffler::shuffle() { if (size() > HEXAGON_PACKET_SIZE) { // Ignore a packet with with more than what a packet can hold // or with compound or duplex insns for now. - Error = SHUFFLE_ERROR_INVALID; + reportError(llvm::Twine("invalid instruction packet")); return false; } // Check and prepare packet. - if (size() > 1 && check()) + bool Ok = true; + if (size() > 1 && (Ok = check())) // Reorder the handles for each slot. for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE; ++nSlot) { @@ -659,5 +595,10 @@ bool HexagonShuffler::shuffle() { dbgs() << '\n'); DEBUG(dbgs() << '\n'); - return (!getError()); + return Ok; +} + +void HexagonShuffler::reportError(llvm::Twine const &Msg) { + if (ReportErrors) + Context.reportError(Loc, Msg); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 36e8fa19d4671772ad0ba73ec83f8126d07b6d4e..10a959008f447b2e17b6b052ecf9a2e0eceebb66 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -45,8 +45,7 @@ public: // Check if the resources are in ascending slot order. static bool lessUnits(const HexagonResource &A, const HexagonResource &B) { - return (countPopulation(A.getUnits()) < - countPopulation(B.getUnits())); + return (countPopulation(A.getUnits()) < countPopulation(B.getUnits())); }; // Check if the resources are in ascending weight order. static bool lessWeight(const HexagonResource &A, const HexagonResource &B) { @@ -107,7 +106,7 @@ public: HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T, MCInstrInfo const &MCII, MCInst const *id, MCInst const *Extender, unsigned s) - : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {} + : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}; MCInst const &getDesc() const { return *ID; }; @@ -136,33 +135,21 @@ class HexagonShuffler { HexagonPacket Packet; HexagonPacket PacketSave; - // Shuffling error code. - unsigned Error; - HexagonCVIResource::TypeUnitsAndLanes TUL; protected: + MCContext &Context; int64_t BundleFlags; MCInstrInfo const &MCII; MCSubtargetInfo const &STI; + SMLoc Loc; + bool ReportErrors; public: typedef HexagonPacket::iterator iterator; - enum { - SHUFFLE_SUCCESS = 0, ///< Successful operation. - SHUFFLE_ERROR_INVALID, ///< Invalid bundle. - SHUFFLE_ERROR_STORES, ///< No free slots for store insns. - SHUFFLE_ERROR_LOADS, ///< No free slots for load insns. - SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns. - SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns. - SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots. - SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60). - SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict - SHUFFLE_ERROR_UNKNOWN ///< Unknown error. - }; - - explicit HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI); + HexagonShuffler(MCContext &Context, bool ReportErrors, + MCInstrInfo const &MCII, MCSubtargetInfo const &STI); // Reset to initial state. void reset(); @@ -180,9 +167,8 @@ public: void append(MCInst const &ID, MCInst const *Extender, unsigned S); // Return the error code for the last check or shuffling of the bundle. - void setError(unsigned Err) { Error = Err; }; - unsigned getError() const { return (Error); }; + void reportError(llvm::Twine const &Msg); }; -} +} // namespace llvm #endif // HEXAGONSHUFFLER_H diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp index 57ce9fabc5e3afa32e9bad50b33418d9b7726f76..ea86ffba58f616316f816aa4a60ae5635e4b6bba 100644 --- a/lib/Target/Hexagon/RDFCopy.cpp +++ b/lib/Target/Hexagon/RDFCopy.cpp @@ -59,7 +59,7 @@ void CopyPropagation::recordCopy(NodeAddr SA, EqualityMap &EM) { bool CopyPropagation::scanBlock(MachineBasicBlock *B) { bool Changed = false; - auto BA = DFG.getFunc().Addr->findBlock(B, DFG); + NodeAddr BA = DFG.findBlock(B); for (NodeAddr IA : BA.Addr->members(DFG)) { if (DFG.IsCode(IA)) { diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 9aa8ad68e07e2260427383eb2a284a0e0d1cef8a..60a12dcf2f03d0ce0caede414c3eeb2c08a7118c 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,9 +9,9 @@ // // RDF-based generic dead code elimination. +#include "RDFDeadCode.h" #include "RDFGraph.h" #include "RDFLiveness.h" -#include "RDFDeadCode.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 2253969290b78af6aece04dbbbabe493ff1a4cf9..8d127237089905dcdf8ea492ea5d44e5f435d3c2 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -10,8 +10,8 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" @@ -913,10 +913,11 @@ void DataFlowGraph::build(unsigned Options) { } // Add function-entry phi nodes for the live-in registers. - for (std::pair P : LiveIns) { + //for (std::pair P : LiveIns) { + for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) { + RegisterRef RR = *I; NodeAddr PA = newPhi(EA); uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving; - RegisterRef RR(P.first, P.second); NodeAddr DA = newDef(PA, RR, PhiFlags); PA.Addr->addMember(DA, *this); } @@ -993,9 +994,9 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const { return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef(); } #ifndef NDEBUG - RegisterRef NAR = PRI.normalize(AR); - RegisterRef NBR = PRI.normalize(BR); - assert(NAR.Reg != NBR.Reg); +// RegisterRef NAR = PRI.normalize(AR); +// RegisterRef NBR = PRI.normalize(BR); +// assert(NAR.Reg != NBR.Reg); #endif // This isn't strictly correct, because the overlap may happen in the // part masked out. @@ -1288,20 +1289,7 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { return true; }; - // Collect a set of registers that this instruction implicitly uses - // or defines. Implicit operands from an instruction will be ignored - // unless they are listed here. - RegisterSet ImpUses, ImpDefs; - if (const uint16_t *ImpD = In.getDesc().getImplicitDefs()) - while (uint16_t R = *ImpD++) - ImpDefs.insert(RegisterRef(R)); - if (const uint16_t *ImpU = In.getDesc().getImplicitUses()) - while (uint16_t R = *ImpU++) - ImpUses.insert(RegisterRef(R)); - bool IsCall = isCall(In); - bool NeedsImplicit = IsCall || In.isInlineAsm() || In.isReturn(); - bool IsPredicated = TII.isPredicated(In); unsigned NumOps = In.getNumOperands(); // Avoid duplicate implicit defs. This will not detect cases of implicit @@ -1363,8 +1351,6 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { if (!R || !TargetRegisterInfo::isPhysicalRegister(R) || DoneDefs.test(R)) continue; RegisterRef RR = makeRegRef(Op); - if (!NeedsImplicit && !ImpDefs.count(RR)) - continue; uint16_t Flags = NodeAttrs::None; if (TOI.isPreserving(In, OpN)) { Flags |= NodeAttrs::Preserving; @@ -1393,14 +1379,6 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { unsigned R = Op.getReg(); if (!R || !TargetRegisterInfo::isPhysicalRegister(R)) continue; - RegisterRef RR = makeRegRef(Op); - // Add implicit uses on return and call instructions, and on predicated - // instructions regardless of whether or not they appear in the instruction - // descriptor's list. - bool Implicit = Op.isImplicit(); - bool TakeImplicit = NeedsImplicit || IsPredicated; - if (Implicit && !TakeImplicit && !ImpUses.count(RR)) - continue; uint16_t Flags = NodeAttrs::None; if (Op.isUndef()) Flags |= NodeAttrs::Undef; diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h index d5faca4cd6f4b5be0321e25055a5076e637fa210..52f390356b265c9764c24dd56302af77155cdef0 100644 --- a/lib/Target/Hexagon/RDFGraph.h +++ b/lib/Target/Hexagon/RDFGraph.h @@ -508,7 +508,8 @@ namespace rdf { static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize, "NodeBase must be at most NodeAllocator::NodeMemSize bytes"); - typedef std::vector> NodeList; +// typedef std::vector> NodeList; + typedef SmallVector,4> NodeList; typedef std::set NodeSet; struct RefNode : public NodeBase { diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 25a4c5f7058d58ad6c0fad59c69297bfba4195cb..83e8968086d8ca0ada3d04b1599471d2c1de1f7a 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -23,8 +23,8 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. // -#include "RDFGraph.h" #include "RDFLiveness.h" +#include "RDFGraph.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" @@ -425,6 +425,7 @@ void Liveness::computePhiInfo() { // phi use -> (map: reaching phi -> set of registers defined in between) std::map> PhiUp; std::vector PhiUQ; // Work list of phis for upward propagation. + std::map PhiDRs; // Phi -> registers defined by it. // Go over all phis. for (NodeAddr PhiA : Phis) { @@ -437,12 +438,15 @@ void Liveness::computePhiInfo() { // For each def, add to the queue all reached (non-phi) defs. SetVector DefQ; NodeSet PhiDefs; + RegisterAggr DRs(PRI); for (NodeAddr R : PhiRefs) { if (!DFG.IsRef(R)) continue; + DRs.insert(R.Addr->getRegRef(DFG)); DefQ.insert(R.Id); PhiDefs.insert(R.Id); } + PhiDRs.insert(std::make_pair(PhiA.Id, DRs)); // Collect the super-set of all possible reached uses. This set will // contain all uses reached from this phi, either directly from the @@ -493,26 +497,33 @@ void Liveness::computePhiInfo() { // = R1:0 u6 Not reached by d1 (covered collectively // by d3 and d5), but following reached // defs and uses from d1 will lead here. - auto InPhiDefs = [&PhiDefs] (NodeAddr DA) -> bool { - return PhiDefs.count(DA.Id); - }; for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) { // For each reached register UI->first, there is a set UI->second, of // uses of it. For each such use, check if it is reached by this phi, // i.e. check if the set of its reaching uses intersects the set of // this phi's defs. - NodeRefSet &Uses = UI->second; - for (auto I = Uses.begin(), E = Uses.end(); I != E; ) { - auto UA = DFG.addr(I->first); + NodeRefSet Uses = UI->second; + UI->second.clear(); + for (std::pair I : Uses) { + auto UA = DFG.addr(I.first); // Undef flag is checked above. assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0); - RegisterRef R(UI->first, I->second); - NodeList RDs = getAllReachingDefs(R, UA); - // If none of the reaching defs of R are from this phi, remove this - // use of R. - I = any_of(RDs, InPhiDefs) ? std::next(I) : Uses.erase(I); + RegisterRef R(UI->first, I.second); + // Calculate the exposed part of the reached use. + RegisterAggr Covered(PRI); + for (NodeAddr DA : getAllReachingDefs(R, UA)) { + if (PhiDefs.count(DA.Id)) + break; + Covered.insert(DA.Addr->getRegRef(DFG)); + } + if (RegisterRef RC = Covered.clearIn(R)) { + // We are updating the map for register UI->first, so we need + // to map RC to be expressed in terms of that register. + RegisterRef S = PRI.mapTo(RC, UI->first); + UI->second.insert({I.first, S.Mask}); + } } - UI = Uses.empty() ? RealUses.erase(UI) : std::next(UI); + UI = UI->second.empty() ? RealUses.erase(UI) : std::next(UI); } // If this phi reaches some "real" uses, add it to the queue for upward @@ -615,14 +626,19 @@ void Liveness::computePhiInfo() { // then add (R-MidDefs,U) to RealUseMap[P] // for (const std::pair &T : RUM) { - RegisterRef R = DFG.restrictRef(RegisterRef(T.first), UR); - if (!R) + RegisterRef R(T.first); + // The current phi (PA) could be a phi for a regmask. It could + // reach a whole variety of uses that are not related to the + // specific upward phi (P.first). + const RegisterAggr &DRs = PhiDRs.at(P.first); + if (!DRs.hasAliasOf(R)) continue; + R = PRI.mapTo(DRs.intersectWith(R), T.first); for (std::pair V : T.second) { - RegisterRef S = DFG.restrictRef(RegisterRef(R.Reg, V.second), R); - if (!S) + LaneBitmask M = R.Mask & V.second; + if (M.none()) continue; - if (RegisterRef SS = MidDefs.clearIn(S)) { + if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) { NodeRefSet &RS = RealUseMap[P.first][SS.Reg]; Changed |= RS.insert({V.first,SS.Mask}).second; } @@ -750,8 +766,13 @@ void Liveness::computeLiveIns() { // all related shadows as a single use cluster. RegisterRef S(RS.first, P.second); NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs); - for (NodeAddr D : Ds) - LOX[S.Reg].insert({D.Id, S.Mask}); + for (NodeAddr D : Ds) { + // Calculate the mask corresponding to the visited def. + RegisterAggr TA(PRI); + TA.insert(D.Addr->getRegRef(DFG)).intersect(S); + LaneBitmask TM = TA.makeRegRef().Mask; + LOX[S.Reg].insert({D.Id, TM}); + } } } @@ -788,19 +809,9 @@ void Liveness::computeLiveIns() { //dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n'; LV.clear(); - for (std::pair P : LiveMap[&B]) { - MCSubRegIndexIterator S(P.first, &TRI); - if (!S.isValid()) { - LV.push_back(RegisterRef(P.first)); - continue; - } - do { - LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex()); - if ((M & P.second).any()) - LV.push_back(RegisterRef(S.getSubReg())); - ++S; - } while (S.isValid()); - } + const RegisterAggr &LG = LiveMap[&B]; + for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I) + LV.push_back(*I); std::sort(LV.begin(), LV.end()); dbgs() << "\tcomp = {"; for (auto I : LV) @@ -821,9 +832,10 @@ void Liveness::resetLiveIns() { for (auto I : T) B.removeLiveIn(I); // Add the newly computed live-ins. - auto &LiveIns = LiveMap[&B]; - for (auto I : LiveIns) { - B.addLiveIn({MCPhysReg(I.first), I.second}); + const RegisterAggr &LiveIns = LiveMap[&B]; + for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) { + RegisterRef R = *I; + B.addLiveIn({MCPhysReg(R.Reg), R.Mask}); } } } @@ -1023,10 +1035,9 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) { // registers are not covering LRef. The first def from the // upward chain will be live. // Subtract all accumulated defs (RRs) from LRef. - RegisterAggr L(PRI); - L.insert(LRef).clear(RRs); - assert(!L.empty()); - NewDefs.insert({TA.Id,L.begin()->second}); + RegisterRef T = RRs.clearIn(LRef); + assert(T); + NewDefs.insert({TA.Id,T.Mask}); break; } diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp index 7a7933c187a5943d9d78071fd565333597dc40ff..2aabf4ee1a38c2dd896dd84d090d6873347dcfa7 100644 --- a/lib/Target/Hexagon/RDFRegisters.cpp +++ b/lib/Target/Hexagon/RDFRegisters.cpp @@ -33,22 +33,33 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, } } - auto HasPartialOverlaps = [this] (uint32_t Reg) -> bool { - for (MCRegAliasIterator A(Reg, &TRI, false); A.isValid(); ++A) - if (!TRI.isSubRegister(Reg, *A) && !TRI.isSubRegister(*A, Reg)) - return true; - return false; - }; + UnitInfos.resize(TRI.getNumRegUnits()); - for (MCPhysReg R = 1, NR = TRI.getNumRegs(); R != NR; ++R) - RegInfos[R].Partial = HasPartialOverlaps(R); - - for (MCPhysReg R = 1, NR = TRI.getNumRegs(); R != NR; ++R) { - MCPhysReg SuperR = R; - for (MCSuperRegIterator S(R, &TRI, false); S.isValid(); ++S) - if (!RegInfos[*S].Partial) - SuperR = *S; - RegInfos[R].MaxSuper = SuperR; + for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) { + if (UnitInfos[U].Reg != 0) + continue; + MCRegUnitRootIterator R(U, &TRI); + assert(R.isValid()); + RegisterId F = *R; + ++R; + if (R.isValid()) { + UnitInfos[U].Mask = LaneBitmask::getAll(); + UnitInfos[U].Reg = F; + } else { + for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) { + std::pair P = *I; + UnitInfo &UI = UnitInfos[P.first]; + UI.Reg = F; + if (P.second.any()) { + UI.Mask = P.second; + } else { + if (const TargetRegisterClass *RC = RegInfos[F].RegClass) + UI.Mask = RC->LaneMask; + else + UI.Mask = LaneBitmask::getAll(); + } + } + } } for (const uint32_t *RM : TRI.getRegMasks()) @@ -58,25 +69,23 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri, for (const MachineOperand &Op : In.operands()) if (Op.isRegMask()) RegMasks.insert(Op.getRegMask()); + + MaskInfos.resize(RegMasks.size()+1); + for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) { + BitVector PU(TRI.getNumRegUnits()); + const uint32_t *MB = RegMasks.get(M); + for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) { + if (!(MB[i/32] & (1u << (i%32)))) + continue; + for (MCRegUnitIterator U(i, &TRI); U.isValid(); ++U) + PU.set(*U); + } + MaskInfos[M].Units = PU.flip(); + } } RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const { - if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) - return RR; - RegisterId SuperReg = RegInfos[RR.Reg].MaxSuper; - if (RR.Reg == SuperReg) - return RR; - - const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass; - LaneBitmask RCMask = RC != nullptr ? RC->LaneMask : LaneBitmask(0x00000001); - LaneBitmask Common = RR.Mask & RCMask; - -// Ex: IP/EIP/RIP -// assert(RC != nullptr || RR.Reg == SuperReg); - uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg); - LaneBitmask SuperMask = TRI.composeSubRegIndexLaneMask(Sub, Common); - assert(RR.Mask.none() || SuperMask.any()); - return RegisterRef(SuperReg, SuperMask); + return RR; } std::set PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const { @@ -203,154 +212,161 @@ bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const { return false; } +RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const { + if (RR.Reg == R) + return RR; + if (unsigned Idx = TRI.getSubRegIndex(R, RR.Reg)) + return RegisterRef(R, TRI.composeSubRegIndexLaneMask(Idx, RR.Mask)); + if (unsigned Idx = TRI.getSubRegIndex(RR.Reg, R)) { + const RegInfo &RI = RegInfos[R]; + LaneBitmask RCM = RI.RegClass ? RI.RegClass->LaneMask + : LaneBitmask::getAll(); + LaneBitmask M = TRI.reverseComposeSubRegIndexLaneMask(Idx, RR.Mask); + return RegisterRef(R, M & RCM); + } + llvm_unreachable("Invalid arguments: unrelated registers?"); +} + bool RegisterAggr::hasAliasOf(RegisterRef RR) const { - if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - // XXX SLOW - const uint32_t *MB = PRI.getRegMaskBits(RR.Reg); - for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) { - if (MB[i/32] & (1u << (i%32))) - continue; - if (hasAliasOf(RegisterRef(i, LaneBitmask::getAll()))) - return true; - } - return false; - } + if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) + return Units.anyCommon(PRI.getMaskUnits(RR.Reg)); - RegisterRef NR = PRI.normalize(RR); - auto F = Masks.find(NR.Reg); - if (F != Masks.end()) { - if ((F->second & NR.Mask).any()) - return true; - } - if (CheckUnits || PRI.hasPartialOverlaps(NR.Reg)) { - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { - std::pair P = *U; - if (P.second.none() || (P.second & RR.Mask).any()) - if (ExpUnits.test(P.first)) - return true; - } + for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { + std::pair P = *U; + if (P.second.none() || (P.second & RR.Mask).any()) + if (Units.test(P.first)) + return true; } return false; } bool RegisterAggr::hasCoverOf(RegisterRef RR) const { if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - // XXX SLOW - const uint32_t *MB = PRI.getRegMaskBits(RR.Reg); - for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) { - if (MB[i/32] & (1u << (i%32))) - continue; - if (!hasCoverOf(RegisterRef(i, LaneBitmask::getAll()))) - return false; - } - return true; + BitVector T(PRI.getMaskUnits(RR.Reg)); + return T.reset(Units).none(); } - // Always have a cover for empty lane mask. - RegisterRef NR = PRI.normalize(RR); - if (NR.Mask.none()) - return true; - auto F = Masks.find(NR.Reg); - if (F != Masks.end()) { - if ((NR.Mask & F->second) == NR.Mask) - return true; - } - if (CheckUnits || PRI.hasPartialOverlaps(NR.Reg)) { - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { - std::pair P = *U; - if (P.second.none() || (P.second & RR.Mask).any()) - if (!ExpUnits.test(P.first)) - return false; - } - return true; + for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { + std::pair P = *U; + if (P.second.none() || (P.second & RR.Mask).any()) + if (!Units.test(P.first)) + return false; } - return false; + return true; } RegisterAggr &RegisterAggr::insert(RegisterRef RR) { if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - // XXX SLOW - const uint32_t *MB = PRI.getRegMaskBits(RR.Reg); - for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) { - if (MB[i/32] & (1u << (i%32))) - continue; - insert(RegisterRef(i, LaneBitmask::getAll())); - } + Units |= PRI.getMaskUnits(RR.Reg); return *this; } - RegisterRef NR = PRI.normalize(RR); - auto F = Masks.find(NR.Reg); - if (F == Masks.end()) - Masks.insert({NR.Reg, NR.Mask}); - else - F->second |= NR.Mask; - - // If the register has any partial overlaps, the mask will not be sufficient - // to accurately represent aliasing/covering information. Add all units to - // the bit vector. - if (PRI.hasPartialOverlaps(NR.Reg)) { - for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { - std::pair P = *U; - if (P.second.none() || (P.second & RR.Mask).none()) - continue; - ExpUnits.set(P.first); - CheckUnits = true; - } + for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) { + std::pair P = *U; + if (P.second.none() || (P.second & RR.Mask).any()) + Units.set(P.first); } return *this; } RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) { - for (std::pair P : RG.Masks) - insert(RegisterRef(P.first, P.second)); + Units |= RG.Units; return *this; } -RegisterAggr &RegisterAggr::clear(RegisterRef RR) { - if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) { - // XXX SLOW - const uint32_t *MB = PRI.getRegMaskBits(RR.Reg); - for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) { - if (MB[i/32] & (1u << (i%32))) - continue; - clear(RegisterRef(i, LaneBitmask::getAll())); - } - return *this; - } +RegisterAggr &RegisterAggr::intersect(RegisterRef RR) { + return intersect(RegisterAggr(PRI).insert(RR)); +} - RegisterRef NR = PRI.normalize(RR); - auto F = Masks.find(NR.Reg); - if (F == Masks.end()) - return *this; - LaneBitmask NewM = F->second & ~NR.Mask; - if (NewM.none()) - Masks.erase(F); - else - F->second = NewM; +RegisterAggr &RegisterAggr::intersect(const RegisterAggr &RG) { + Units &= RG.Units; return *this; } +RegisterAggr &RegisterAggr::clear(RegisterRef RR) { + return clear(RegisterAggr(PRI).insert(RR)); +} + RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) { - for (std::pair P : RG.Masks) - clear(RegisterRef(P.first, P.second)); + Units.reset(RG.Units); return *this; } -RegisterRef RegisterAggr::clearIn(RegisterRef RR) const { +RegisterRef RegisterAggr::intersectWith(RegisterRef RR) const { RegisterAggr T(PRI); - T.insert(RR).clear(*this); + T.insert(RR).intersect(*this); if (T.empty()) return RegisterRef(); - return RegisterRef(T.begin()->first, T.begin()->second); + RegisterRef NR = T.makeRegRef(); + assert(NR); + return NR; +} + +RegisterRef RegisterAggr::clearIn(RegisterRef RR) const { + return RegisterAggr(PRI).insert(RR).clear(*this).makeRegRef(); +} + +RegisterRef RegisterAggr::makeRegRef() const { + int U = Units.find_first(); + if (U < 0) + return RegisterRef(); + + auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) { + for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R) + for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S) + Regs.set(*S); + }; + + // Find the set of all registers that are aliased to all the units + // in this aggregate. + + // Get all the registers aliased to the first unit in the bit vector. + BitVector Regs(PRI.getTRI().getNumRegs()); + AliasedRegs(U, Regs); + U = Units.find_next(U); + + // For each other unit, intersect it with the set of all registers + // aliased that unit. + while (U >= 0) { + BitVector AR(PRI.getTRI().getNumRegs()); + AliasedRegs(U, AR); + Regs &= AR; + U = Units.find_next(U); + } + + // If there is at least one register remaining, pick the first one, + // and consolidate the masks of all of its units contained in this + // aggregate. + + int F = Regs.find_first(); + if (F <= 0) + return RegisterRef(); + + LaneBitmask M; + for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) { + std::pair P = *I; + if (Units.test(P.first)) + M |= P.second.none() ? LaneBitmask::getAll() : P.second; + } + return RegisterRef(F, M); } void RegisterAggr::print(raw_ostream &OS) const { OS << '{'; - for (auto I : Masks) - OS << ' ' << PrintReg(I.first, &PRI.getTRI()) - << PrintLaneMaskOpt(I.second); + for (int U = Units.find_first(); U >= 0; U = Units.find_next(U)) + OS << ' ' << PrintRegUnit(U, &PRI.getTRI()); OS << " }"; } +RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG, + bool End) + : Owner(&RG) { + for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) { + RegisterRef R = RG.PRI.getRefForUnit(U); + Masks[R.Reg] |= R.Mask; + } + Pos = End ? Masks.end() : Masks.begin(); + Index = End ? Masks.size() : 0; +} + diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h index 621a6e2ff7737a4272461d4fb7626f5b76d26268..09b733ce616b3cf133beaed652b61d8e23db6ae8 100644 --- a/lib/Target/Hexagon/RDFRegisters.h +++ b/lib/Target/Hexagon/RDFRegisters.h @@ -51,6 +51,8 @@ namespace rdf { return F - Map.begin() + 1; } + uint32_t size() const { return Map.size(); } + typedef typename std::vector::const_iterator const_iterator; const_iterator begin() const { return Map.begin(); } const_iterator end() const { return Map.end(); } @@ -103,22 +105,34 @@ namespace rdf { return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB); } std::set getAliasSet(RegisterId Reg) const; - bool hasPartialOverlaps(RegisterId Reg) const { - return RegInfos[Reg].Partial; + + RegisterRef getRefForUnit(uint32_t U) const { + return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask); + } + const BitVector &getMaskUnits(RegisterId MaskId) const { + return MaskInfos[TargetRegisterInfo::stackSlot2Index(MaskId)].Units; } + RegisterRef mapTo(RegisterRef RR, unsigned R) const; const TargetRegisterInfo &getTRI() const { return TRI; } private: struct RegInfo { - unsigned MaxSuper = 0; const TargetRegisterClass *RegClass = nullptr; - bool Partial = false; + }; + struct UnitInfo { + RegisterId Reg = 0; + LaneBitmask Mask; + }; + struct MaskInfo { + BitVector Units; }; const TargetRegisterInfo &TRI; - std::vector RegInfos; IndexedSet RegMasks; + std::vector RegInfos; + std::vector UnitInfos; + std::vector MaskInfos; bool aliasRR(RegisterRef RA, RegisterRef RB) const; bool aliasRM(RegisterRef RR, RegisterRef RM) const; @@ -128,10 +142,10 @@ namespace rdf { struct RegisterAggr { RegisterAggr(const PhysicalRegisterInfo &pri) - : ExpUnits(pri.getTRI().getNumRegUnits()), PRI(pri) {} + : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {} RegisterAggr(const RegisterAggr &RG) = default; - bool empty() const { return Masks.empty(); } + bool empty() const { return Units.none(); } bool hasAliasOf(RegisterRef RR) const; bool hasCoverOf(RegisterRef RR) const; static bool isCoverOf(RegisterRef RA, RegisterRef RB, @@ -141,25 +155,52 @@ namespace rdf { RegisterAggr &insert(RegisterRef RR); RegisterAggr &insert(const RegisterAggr &RG); + RegisterAggr &intersect(RegisterRef RR); + RegisterAggr &intersect(const RegisterAggr &RG); RegisterAggr &clear(RegisterRef RR); RegisterAggr &clear(const RegisterAggr &RG); + RegisterRef intersectWith(RegisterRef RR) const; RegisterRef clearIn(RegisterRef RR) const; + RegisterRef makeRegRef() const; void print(raw_ostream &OS) const; - private: - typedef std::unordered_map MapType; + struct rr_iterator { + typedef std::map MapType; + private: + MapType Masks; + MapType::iterator Pos; + unsigned Index; + const RegisterAggr *Owner; + public: + rr_iterator(const RegisterAggr &RG, bool End); + RegisterRef operator*() const { + return RegisterRef(Pos->first, Pos->second); + } + rr_iterator &operator++() { + ++Pos; + ++Index; + return *this; + } + bool operator==(const rr_iterator &I) const { + assert(Owner == I.Owner); + return Index == I.Index; + } + bool operator!=(const rr_iterator &I) const { + return !(*this == I); + } + }; - public: - typedef MapType::const_iterator iterator; - iterator begin() const { return Masks.begin(); } - iterator end() const { return Masks.end(); } + rr_iterator rr_begin() const { + return rr_iterator(*this, false); + } + rr_iterator rr_end() const { + return rr_iterator(*this, true); + } private: - MapType Masks; - BitVector ExpUnits; // Register units for explicit checks. - bool CheckUnits = false; + BitVector Units; const PhysicalRegisterInfo &PRI; }; diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 8be2a898e38024d1485cd5e1250faa07b4db6654..34b966df7761b4f9e8b7c4420ef230c9e9d88745 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -29,6 +29,7 @@ subdirectories = MSP430 NVPTX Mips + Nios2 PowerPC RISCV Sparc diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 1d6c07974beb492c2c5fae45d8864cc14f754f4c..72e471f5766e53c353dca2a77ab18c20afa73944 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -28,8 +28,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Lanai/LanaiISelLowering.cpp b/lib/Target/Lanai/LanaiISelLowering.cpp index d156294a0b0c9ae13c22ef806a7f65812bede4d5..0a9cac2565f289b95522768682e63c05d3948fc8 100644 --- a/lib/Target/Lanai/LanaiISelLowering.cpp +++ b/lib/Target/Lanai/LanaiISelLowering.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "LanaiISelLowering.h" #include "Lanai.h" #include "LanaiCondCode.h" -#include "LanaiISelLowering.h" #include "LanaiMachineFunctionInfo.h" #include "LanaiSubtarget.h" #include "LanaiTargetObjectFile.h" @@ -38,10 +38,11 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetCallingConv.h" @@ -649,10 +650,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo( ByValArgs.push_back(FIPtr); } - Chain = DAG.getCALLSEQ_START( - Chain, - DAG.getConstant(NumBytes, DL, getPointerTy(DAG.getDataLayout()), true), - DL); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); SmallVector, 4> RegsToPass; SmallVector MemOpChains; @@ -1502,3 +1500,24 @@ SDValue LanaiTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } + +void LanaiTargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, KnownBits &Known, const APInt &DemandedElts, + const SelectionDAG &DAG, unsigned Depth) const { + unsigned BitWidth = Known.getBitWidth(); + switch (Op.getOpcode()) { + default: + break; + case LanaiISD::SETCC: + Known = KnownBits(BitWidth); + Known.Zero.setBits(1, BitWidth); + break; + case LanaiISD::SELECT_CC: + KnownBits Known2; + DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1); + DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1); + Known.Zero &= Known2.Zero; + Known.One &= Known2.One; + break; + } +} diff --git a/lib/Target/Lanai/LanaiISelLowering.h b/lib/Target/Lanai/LanaiISelLowering.h index c2fba4f9d167ba8d574aa1ca069060b3a4b5e8ec..49ad52a3977170ac3bdec6037fc69fd64cf112b5 100644 --- a/lib/Target/Lanai/LanaiISelLowering.h +++ b/lib/Target/Lanai/LanaiISelLowering.h @@ -106,6 +106,11 @@ public: SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + private: SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool IsVarArg, diff --git a/lib/Target/Lanai/LanaiInstrInfo.td b/lib/Target/Lanai/LanaiInstrInfo.td index 285fca11737de834cc3d7a9a858cbe0803c5b57a..776fee101dfeba9d8ebc2bda288c5fa38f561da1 100644 --- a/lib/Target/Lanai/LanaiInstrInfo.td +++ b/lib/Target/Lanai/LanaiInstrInfo.td @@ -22,7 +22,8 @@ include "LanaiInstrFormats.td" // -------------------------------------------------- // // These are target-independent nodes, but have target-specific formats. -def SDT_LanaiCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_LanaiCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; def SDT_LanaiCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_LanaiCall : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>; @@ -750,9 +751,9 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber SP. let Defs = [SP], Uses = [SP] in { - def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - "#ADJCALLSTACKDOWN $amt", - [(CallSeqStart timm:$amt)]>; + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "#ADJCALLSTACKDOWN $amt1 $amt2", + [(CallSeqStart timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2", [(CallSeqEnd timm:$amt1, timm:$amt2)]>; @@ -770,9 +771,6 @@ let Uses = [SR] in { [(set (i32 GPR:$Rs1), (LanaiSetCC imm:$DDDI))]>; } -// SCC's output is already 1-bit so and'ing with 1 is redundant. -def : Pat<(and (LanaiSetCC imm:$DDDI), 1), (SCC imm:$DDDI)>; - // Select with hardware support let Uses = [SR], isSelect = 1 in { def SELECT : InstRR<0b111, (outs GPR:$Rd), diff --git a/lib/Target/Lanai/LanaiRegisterInfo.cpp b/lib/Target/Lanai/LanaiRegisterInfo.cpp index 12a2571c28d9be93ab09ee0da09d88990ce31779..fe54589f8b0d9f9ea6470a90c2d1d82cecc29e43 100644 --- a/lib/Target/Lanai/LanaiRegisterInfo.cpp +++ b/lib/Target/Lanai/LanaiRegisterInfo.cpp @@ -264,12 +264,6 @@ LanaiRegisterInfo::getFrameRegister(const MachineFunction & /*MF*/) const { unsigned LanaiRegisterInfo::getBaseRegister() const { return Lanai::R14; } -bool LanaiRegisterInfo::canRealignStack(const MachineFunction &MF) const { - if (!TargetRegisterInfo::canRealignStack(MF)) - return false; - return true; -} - unsigned LanaiRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("no exception support"); return 0; diff --git a/lib/Target/Lanai/LanaiRegisterInfo.h b/lib/Target/Lanai/LanaiRegisterInfo.h index c6e459076ebc0161305da41ffb17f5bcb03637e4..d88a19193854308c6a637ace6d029aada778c35d 100644 --- a/lib/Target/Lanai/LanaiRegisterInfo.h +++ b/lib/Target/Lanai/LanaiRegisterInfo.h @@ -41,8 +41,6 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo { unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; - bool canRealignStack(const MachineFunction &MF) const override; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const override; diff --git a/lib/Target/Lanai/LanaiTargetMachine.cpp b/lib/Target/Lanai/LanaiTargetMachine.cpp index 2a9bc25d7fadbfab1ab092abe4be445835493696..a2f005ce445a8de5ac76b8d9b80ec872879d0b65 100644 --- a/lib/Target/Lanai/LanaiTargetMachine.cpp +++ b/lib/Target/Lanai/LanaiTargetMachine.cpp @@ -76,7 +76,7 @@ namespace { // Lanai Code Generator Pass Configuration Options. class LanaiPassConfig : public TargetPassConfig { public: - LanaiPassConfig(LanaiTargetMachine *TM, PassManagerBase *PassManager) + LanaiPassConfig(LanaiTargetMachine &TM, PassManagerBase *PassManager) : TargetPassConfig(TM, *PassManager) {} LanaiTargetMachine &getLanaiTargetMachine() const { @@ -91,7 +91,7 @@ public: TargetPassConfig * LanaiTargetMachine::createPassConfig(PassManagerBase &PassManager) { - return new LanaiPassConfig(this, &PassManager); + return new LanaiPassConfig(*this, &PassManager); } // Install an instruction selector pass. diff --git a/lib/Target/Lanai/LanaiTargetMachine.h b/lib/Target/Lanai/LanaiTargetMachine.h index 5278c70d909dac8dac7ec844857f81f9a84098f5..083ba6fdf8416e09290eace874257a9e074e9831 100644 --- a/lib/Target/Lanai/LanaiTargetMachine.h +++ b/lib/Target/Lanai/LanaiTargetMachine.h @@ -49,6 +49,10 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isMachineVerifierClean() const override { + return false; + } }; } // namespace llvm diff --git a/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/lib/Target/Lanai/LanaiTargetObjectFile.cpp index 7475dbd68ae49eb6709d5207289680ecb28b8758..38e75108ba16ca6bb819acbcb00b33e96856d403 100644 --- a/lib/Target/Lanai/LanaiTargetObjectFile.cpp +++ b/lib/Target/Lanai/LanaiTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "LanaiSubtarget.h" #include "LanaiTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index e02bba529bd503708127c9b507f8b42640743501..64cd3342ac18b9c808eaf303c3ab3c5ddb6fec68 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/LanaiBaseInfo.h" #include "MCTargetDesc/LanaiFixupKinds.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp index 10254677a5ad103e62c4c77037d7ec64f544b7f6..c3727416ecb94dfbaea92140571b36f829ae6763 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp @@ -19,8 +19,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index a47ff9ff3d61db3fc652d362476ea957a87229fa..bcbde2b8b7947e439496695e07b6c41ae231dfcb 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "LanaiMCAsmInfo.h" #include "LanaiMCTargetDesc.h" #include "InstPrinter/LanaiInstPrinter.h" +#include "LanaiMCAsmInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index c26b3081dbc32921c4e2e4a2d0d39a2fb6192cf4..82e6731ecd7829c862956936358e5a422f53c572 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -17,7 +17,7 @@ using namespace llvm; void MSP430MCAsmInfo::anchor() { } MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) { - PointerSize = CalleeSaveStackSlotSize = 2; + CodePointerSize = CalleeSaveStackSlotSize = 2; CommentString = ";"; diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td index dfea669f3ba1bbb8a75c6acfeae5c75b7061e800..203864dd40650ee3830c2a46779250ac1aea9e67 100644 --- a/lib/Target/MSP430/MSP430.td +++ b/lib/Target/MSP430/MSP430.td @@ -22,6 +22,18 @@ def FeatureX : SubtargetFeature<"ext", "ExtendedInsts", "true", "Enable MSP430-X extensions">; +def FeatureHWMult16 + : SubtargetFeature<"hwmult16", "HWMultMode", "HWMult16", + "Enable 16-bit hardware multiplier">; + +def FeatureHWMult32 + : SubtargetFeature<"hwmult32", "HWMultMode", "HWMult32", + "Enable 32-bit hardware multiplier">; + +def FeatureHWMultF5 + : SubtargetFeature<"hwmultf5", "HWMultMode", "HWMultF5", + "Enable F5 series hardware multiplier">; + //===----------------------------------------------------------------------===// // MSP430 supported processors. //===----------------------------------------------------------------------===// @@ -29,6 +41,8 @@ class Proc Features> : Processor; def : Proc<"generic", []>; +def : Proc<"msp430", []>; +def : Proc<"msp430x", [FeatureX]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index abf062fe86ae49966ba66aed112cfe023ca4e9c0..f39c21fc8aa2b0c158530a590e57222ce1b974f8 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "InstPrinter/MSP430InstPrinter.h" +#include "MSP430.h" #include "MSP430InstrInfo.h" #include "MSP430MCInstLower.h" #include "MSP430TargetMachine.h" diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index f1cb0b6c031b0d4af0675e91fd7e86e5e2ed6fc9..b4ff8f66c55f011ee1da4683b90d86daf857286f 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -236,7 +236,7 @@ MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr( // adjcallstackdown instruction into 'add SP, ' // TODO: consider using push / pop instead of sub + store / add MachineInstr &Old = *I; - uint64_t Amount = Old.getOperand(0).getImm(); + uint64_t Amount = TII.getFrameSize(Old); if (Amount != 0) { // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next @@ -252,8 +252,7 @@ MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr( } else { assert(Old.getOpcode() == TII.getCallFrameDestroyOpcode()); // factor out the amount the callee already popped. - uint64_t CalleeAmt = Old.getOperand(1).getImm(); - Amount -= CalleeAmt; + Amount -= TII.getFramePoppedByCallee(Old); if (Amount) New = BuildMI(MF, Old.getDebugLoc(), TII.get(MSP430::ADD16ri), MSP430::SP) @@ -272,7 +271,7 @@ MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr( } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) { // If we are performing frame pointer elimination and if the callee pops // something off the stack pointer, add it back. - if (uint64_t CalleeAmt = I->getOperand(1).getImm()) { + if (uint64_t CalleeAmt = TII.getFramePoppedByCallee(*I)) { MachineInstr &Old = *I; MachineInstr *New = BuildMI(MF, Old.getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SP) diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index cd58eda5d924c06bdbe8827b3a3784494a33ae86..0b02f79f472a106f33715f0840d07f2c4dab2be8 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -403,12 +403,12 @@ void MSP430DAGToDAGISel::Select(SDNode *Node) { int FI = cast(Node)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16); if (Node->hasOneUse()) { - CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16, TFI, + CurDAG->SelectNodeTo(Node, MSP430::ADDframe, MVT::i16, TFI, CurDAG->getTargetConstant(0, dl, MVT::i16)); return; } ReplaceNode(Node, CurDAG->getMachineNode( - MSP430::ADD16ri, dl, MVT::i16, TFI, + MSP430::ADDframe, dl, MVT::i16, TFI, CurDAG->getTargetConstant(0, dl, MVT::i16))); return; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 40b1dd3cc2ebfc94396812a67ca270d06a07bc1e..dae14fd301ee0238a87b373165100009688a9795 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -38,24 +38,6 @@ using namespace llvm; #define DEBUG_TYPE "msp430-lower" -typedef enum { - NoHWMult, - HWMultIntr, - HWMultNoIntr -} HWMultUseMode; - -static cl::opt -HWMultMode("msp430-hwmult-mode", cl::Hidden, - cl::desc("Hardware multiplier use mode"), - cl::init(HWMultNoIntr), - cl::values( - clEnumValN(NoHWMult, "no", - "Do not use hardware multiplier"), - clEnumValN(HWMultIntr, "interrupts", - "Assume hardware multiplier can be used inside interrupts"), - clEnumValN(HWMultNoIntr, "use", - "Assume hardware multiplier cannot be used inside interrupts"))); - MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, const MSP430Subtarget &STI) : TargetLowering(TM) { @@ -131,29 +113,29 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // FIXME: Implement efficiently multiplication by a constant - setOperationAction(ISD::MUL, MVT::i8, Expand); - setOperationAction(ISD::MULHS, MVT::i8, Expand); - setOperationAction(ISD::MULHU, MVT::i8, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::MUL, MVT::i16, Expand); + setOperationAction(ISD::MUL, MVT::i8, Promote); + setOperationAction(ISD::MULHS, MVT::i8, Promote); + setOperationAction(ISD::MULHU, MVT::i8, Promote); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Promote); + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Promote); + setOperationAction(ISD::MUL, MVT::i16, LibCall); setOperationAction(ISD::MULHS, MVT::i16, Expand); setOperationAction(ISD::MULHU, MVT::i16, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::UDIV, MVT::i8, Expand); - setOperationAction(ISD::UDIVREM, MVT::i8, Expand); - setOperationAction(ISD::UREM, MVT::i8, Expand); - setOperationAction(ISD::SDIV, MVT::i8, Expand); - setOperationAction(ISD::SDIVREM, MVT::i8, Expand); - setOperationAction(ISD::SREM, MVT::i8, Expand); - setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Promote); + setOperationAction(ISD::UDIVREM, MVT::i8, Promote); + setOperationAction(ISD::UREM, MVT::i8, Promote); + setOperationAction(ISD::SDIV, MVT::i8, Promote); + setOperationAction(ISD::SDIVREM, MVT::i8, Promote); + setOperationAction(ISD::SREM, MVT::i8, Promote); + setOperationAction(ISD::UDIV, MVT::i16, LibCall); setOperationAction(ISD::UDIVREM, MVT::i16, Expand); - setOperationAction(ISD::UREM, MVT::i16, Expand); - setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i16, LibCall); + setOperationAction(ISD::SDIV, MVT::i16, LibCall); setOperationAction(ISD::SDIVREM, MVT::i16, Expand); - setOperationAction(ISD::SREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i16, LibCall); // varargs support setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -162,15 +144,183 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::JumpTable, MVT::i16, Custom); - // Libcalls names. - if (HWMultMode == HWMultIntr) { - setLibcallName(RTLIB::MUL_I8, "__mulqi3hw"); - setLibcallName(RTLIB::MUL_I16, "__mulhi3hw"); - } else if (HWMultMode == HWMultNoIntr) { - setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint"); - setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint"); + // EABI Libcalls - EABI Section 6.2 + const struct { + const RTLIB::Libcall Op; + const char * const Name; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Floating point conversions - EABI Table 6 + { RTLIB::FPROUND_F64_F32, "__mspabi_cvtdf", ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__mspabi_cvtfd", ISD::SETCC_INVALID }, + // The following is NOT implemented in libgcc + //{ RTLIB::FPTOSINT_F64_I16, "__mspabi_fixdi", ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I32, "__mspabi_fixdli", ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__mspabi_fixdlli", ISD::SETCC_INVALID }, + // The following is NOT implemented in libgcc + //{ RTLIB::FPTOUINT_F64_I16, "__mspabi_fixdu", ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__mspabi_fixdul", ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__mspabi_fixdull", ISD::SETCC_INVALID }, + // The following is NOT implemented in libgcc + //{ RTLIB::FPTOSINT_F32_I16, "__mspabi_fixfi", ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__mspabi_fixfli", ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__mspabi_fixflli", ISD::SETCC_INVALID }, + // The following is NOT implemented in libgcc + //{ RTLIB::FPTOUINT_F32_I16, "__mspabi_fixfu", ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__mspabi_fixful", ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__mspabi_fixfull", ISD::SETCC_INVALID }, + // TODO The following IS implemented in libgcc + //{ RTLIB::SINTTOFP_I16_F64, "__mspabi_fltid", ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F64, "__mspabi_fltlid", ISD::SETCC_INVALID }, + // TODO The following IS implemented in libgcc but is not in the EABI + { RTLIB::SINTTOFP_I64_F64, "__mspabi_fltllid", ISD::SETCC_INVALID }, + // TODO The following IS implemented in libgcc + //{ RTLIB::UINTTOFP_I16_F64, "__mspabi_fltud", ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__mspabi_fltuld", ISD::SETCC_INVALID }, + // The following IS implemented in libgcc but is not in the EABI + { RTLIB::UINTTOFP_I64_F64, "__mspabi_fltulld", ISD::SETCC_INVALID }, + // TODO The following IS implemented in libgcc + //{ RTLIB::SINTTOFP_I16_F32, "__mspabi_fltif", ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__mspabi_fltlif", ISD::SETCC_INVALID }, + // TODO The following IS implemented in libgcc but is not in the EABI + { RTLIB::SINTTOFP_I64_F32, "__mspabi_fltllif", ISD::SETCC_INVALID }, + // TODO The following IS implemented in libgcc + //{ RTLIB::UINTTOFP_I16_F32, "__mspabi_fltuf", ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__mspabi_fltulf", ISD::SETCC_INVALID }, + // The following IS implemented in libgcc but is not in the EABI + { RTLIB::UINTTOFP_I64_F32, "__mspabi_fltullf", ISD::SETCC_INVALID }, + + // Floating point comparisons - EABI Table 7 + { RTLIB::OEQ_F64, "__mspabi_cmpd", ISD::SETEQ }, + { RTLIB::UNE_F64, "__mspabi_cmpd", ISD::SETNE }, + { RTLIB::OGE_F64, "__mspabi_cmpd", ISD::SETGE }, + { RTLIB::OLT_F64, "__mspabi_cmpd", ISD::SETLT }, + { RTLIB::OLE_F64, "__mspabi_cmpd", ISD::SETLE }, + { RTLIB::OGT_F64, "__mspabi_cmpd", ISD::SETGT }, + { RTLIB::OEQ_F32, "__mspabi_cmpf", ISD::SETEQ }, + { RTLIB::UNE_F32, "__mspabi_cmpf", ISD::SETNE }, + { RTLIB::OGE_F32, "__mspabi_cmpf", ISD::SETGE }, + { RTLIB::OLT_F32, "__mspabi_cmpf", ISD::SETLT }, + { RTLIB::OLE_F32, "__mspabi_cmpf", ISD::SETLE }, + { RTLIB::OGT_F32, "__mspabi_cmpf", ISD::SETGT }, + + // Floating point arithmetic - EABI Table 8 + { RTLIB::ADD_F64, "__mspabi_addd", ISD::SETCC_INVALID }, + { RTLIB::ADD_F32, "__mspabi_addf", ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__mspabi_divd", ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__mspabi_divf", ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__mspabi_mpyd", ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__mspabi_mpyf", ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__mspabi_subd", ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__mspabi_subf", ISD::SETCC_INVALID }, + // The following are NOT implemented in libgcc + // { RTLIB::NEG_F64, "__mspabi_negd", ISD::SETCC_INVALID }, + // { RTLIB::NEG_F32, "__mspabi_negf", ISD::SETCC_INVALID }, + + // TODO: SLL/SRA/SRL are in libgcc, RLL isn't + + // Universal Integer Operations - EABI Table 9 + { RTLIB::SDIV_I16, "__mspabi_divi", ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__mspabi_divli", ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__mspabi_divlli", ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__mspabi_divu", ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__mspabi_divul", ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__mspabi_divull", ISD::SETCC_INVALID }, + { RTLIB::SREM_I16, "__mspabi_remi", ISD::SETCC_INVALID }, + { RTLIB::SREM_I32, "__mspabi_remli", ISD::SETCC_INVALID }, + { RTLIB::SREM_I64, "__mspabi_remlli", ISD::SETCC_INVALID }, + { RTLIB::UREM_I16, "__mspabi_remu", ISD::SETCC_INVALID }, + { RTLIB::UREM_I32, "__mspabi_remul", ISD::SETCC_INVALID }, + { RTLIB::UREM_I64, "__mspabi_remull", ISD::SETCC_INVALID }, + + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); } + if (STI.hasHWMult16()) { + const struct { + const RTLIB::Libcall Op; + const char * const Name; + } LibraryCalls[] = { + // Integer Multiply - EABI Table 9 + { RTLIB::MUL_I16, "__mspabi_mpyi_hw" }, + { RTLIB::MUL_I32, "__mspabi_mpyl_hw" }, + { RTLIB::MUL_I64, "__mspabi_mpyll_hw" }, + // TODO The __mspabi_mpysl*_hw functions ARE implemented in libgcc + // TODO The __mspabi_mpyul*_hw functions ARE implemented in libgcc + }; + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + } + } else if (STI.hasHWMult32()) { + const struct { + const RTLIB::Libcall Op; + const char * const Name; + } LibraryCalls[] = { + // Integer Multiply - EABI Table 9 + { RTLIB::MUL_I16, "__mspabi_mpyi_hw" }, + { RTLIB::MUL_I32, "__mspabi_mpyl_hw32" }, + { RTLIB::MUL_I64, "__mspabi_mpyll_hw32" }, + // TODO The __mspabi_mpysl*_hw32 functions ARE implemented in libgcc + // TODO The __mspabi_mpyul*_hw32 functions ARE implemented in libgcc + }; + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + } + } else if (STI.hasHWMultF5()) { + const struct { + const RTLIB::Libcall Op; + const char * const Name; + } LibraryCalls[] = { + // Integer Multiply - EABI Table 9 + { RTLIB::MUL_I16, "__mspabi_mpyi_f5hw" }, + { RTLIB::MUL_I32, "__mspabi_mpyl_f5hw" }, + { RTLIB::MUL_I64, "__mspabi_mpyll_f5hw" }, + // TODO The __mspabi_mpysl*_f5hw functions ARE implemented in libgcc + // TODO The __mspabi_mpyul*_f5hw functions ARE implemented in libgcc + }; + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + } + } else { // NoHWMult + const struct { + const RTLIB::Libcall Op; + const char * const Name; + } LibraryCalls[] = { + // Integer Multiply - EABI Table 9 + { RTLIB::MUL_I16, "__mspabi_mpyi" }, + { RTLIB::MUL_I32, "__mspabi_mpyl" }, + { RTLIB::MUL_I64, "__mspabi_mpyll" }, + // The __mspabi_mpysl* functions are NOT implemented in libgcc + // The __mspabi_mpyul* functions are NOT implemented in libgcc + }; + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + } + setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::MSP430_BUILTIN); + } + + // Several of the runtime library functions use a special calling conv + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::MSP430_BUILTIN); + setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::MSP430_BUILTIN); + // TODO: __mspabi_srall, __mspabi_srlll, __mspabi_sllll + setMinFunctionAlignment(1); setPrefFunctionAlignment(2); } @@ -281,10 +431,27 @@ template static void AnalyzeArguments(CCState &State, SmallVectorImpl &ArgLocs, const SmallVectorImpl &Args) { - static const MCPhysReg RegList[] = { + static const MCPhysReg CRegList[] = { MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15 }; - static const unsigned NbRegs = array_lengthof(RegList); + static const unsigned CNbRegs = array_lengthof(CRegList); + static const MCPhysReg BuiltinRegList[] = { + MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11, + MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15 + }; + static const unsigned BuiltinNbRegs = array_lengthof(BuiltinRegList); + + ArrayRef RegList; + unsigned NbRegs; + + bool Builtin = (State.getCallingConv() == CallingConv::MSP430_BUILTIN); + if (Builtin) { + RegList = BuiltinRegList; + NbRegs = BuiltinNbRegs; + } else { + RegList = CRegList; + NbRegs = CNbRegs; + } if (State.isVarArg()) { AnalyzeVarArgs(State, Args); @@ -294,6 +461,11 @@ static void AnalyzeArguments(CCState &State, SmallVector ArgsParts; ParseFunctionArgs(Args, ArgsParts); + if (Builtin) { + assert(ArgsParts.size() == 2 && + "Builtin calling convention requires two arguments"); + } + unsigned RegsLeft = NbRegs; bool UsedStack = false; unsigned ValNo = 0; @@ -323,6 +495,11 @@ static void AnalyzeArguments(CCState &State, unsigned Parts = ArgsParts[i]; + if (Builtin) { + assert(Parts == 4 && + "Builtin calling convention requires 64-bit arguments"); + } + if (!UsedStack && Parts == 2 && RegsLeft == 1) { // Special case for 32-bit register split, see EABI section 3.3.3 unsigned Reg = State.AllocateReg(RegList); @@ -400,6 +577,7 @@ MSP430TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, switch (CallConv) { default: llvm_unreachable("Unsupported calling convention"); + case CallingConv::MSP430_BUILTIN: case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, @@ -598,7 +776,6 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, /// LowerCCCCallTo - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. -// TODO: sret. SDValue MSP430TargetLowering::LowerCCCCallTo( SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, @@ -615,8 +792,7 @@ SDValue MSP430TargetLowering::LowerCCCCallTo( unsigned NumBytes = CCInfo.getNextStackOffset(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getConstant(NumBytes, dl, PtrVT, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SmallVector, 4> RegsToPass; SmallVector MemOpChains; diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index e3259bd6a7bc02f91c94274c4bc7950d1751aa3b..d81f17e753c5a11e7e42f5ba974ebc798865148e 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -85,6 +85,12 @@ public: MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded = nullptr) const override; + + int64_t getFramePoppedByCallee(const MachineInstr &I) const { + assert(isFrameInstr(I) && "Not a frame instruction"); + assert(I.getOperand(1).getImm() >= 0 && "Size must not be negative"); + return I.getOperand(1).getImm(); + } }; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index c0c29b992238e304de91afbbe5dadf2cafd664df..cec43040f60d4c177898bf0ac94be2b939ffabcd 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -23,7 +23,8 @@ class SDTCisI16 : SDTCisVT; // Type Profiles. //===----------------------------------------------------------------------===// def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; -def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>; +def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>, + SDTCisVT<1, i16>]>; def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -113,15 +114,21 @@ def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber SR. let Defs = [SP, SR], Uses = [SP] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt), +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), "#ADJCALLSTACKDOWN", - [(MSP430callseq_start timm:$amt)]>; + [(MSP430callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), "#ADJCALLSTACKUP", [(MSP430callseq_end timm:$amt1, timm:$amt2)]>; } +let Defs = [SR], Uses = [SP] in { +def ADDframe : Pseudo<(outs GR16:$dst), (ins i16imm:$base, i16imm:$offset), + "# ADDframe PSEUDO", []>; +} + let usesCustomInserter = 1 in { + let Uses = [SR] in { def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), "# Select8 PSEUDO", [(set GR8:$dst, @@ -130,6 +137,7 @@ let usesCustomInserter = 1 in { "# Select16 PSEUDO", [(set GR16:$dst, (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>; + } let Defs = [SR] in { def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), "# Shl8 PSEUDO", @@ -207,7 +215,7 @@ let isCall = 1 in // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. Uses for argument // registers are added manually. - let Defs = [R12, R13, R14, R15, SR], + let Defs = [R11, R12, R13, R14, R15, SR], Uses = [SP] in { def CALLi : II16i<0x0, (outs), (ins i16imm:$dst), diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 81cd9d1ad3f8de06abd397ccf9b60b09b8d5bdae..7a3b7a8bd5ff7279a0593c327905de2f66da5b34 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -41,12 +41,12 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const Function* F = MF->getFunction(); static const MCPhysReg CalleeSavedRegs[] = { MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7, - MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11, + MSP430::R8, MSP430::R9, MSP430::R10, 0 }; static const MCPhysReg CalleeSavedRegsFP[] = { MSP430::R5, MSP430::R6, MSP430::R7, - MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11, + MSP430::R8, MSP430::R9, MSP430::R10, 0 }; static const MCPhysReg CalleeSavedRegsIntr[] = { @@ -127,7 +127,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Fold imm into offset Offset += MI.getOperand(FIOperandNum + 1).getImm(); - if (MI.getOpcode() == MSP430::ADD16ri) { + if (MI.getOpcode() == MSP430::ADDframe) { // This is actually "load effective address" of the stack slot // instruction. We have only two-address instructions, thus we need to // expand it into mov + add diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index 6216348e4d719b63224500b168411862c41da98a..776a9dcb11d4f990f04abea28d1b72c4783bcc19 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -19,6 +19,20 @@ using namespace llvm; #define DEBUG_TYPE "msp430-subtarget" +static cl::opt +HWMultModeOption("mhwmult", cl::Hidden, + cl::desc("Hardware multiplier use mode for MSP430"), + cl::init(MSP430Subtarget::NoHWMult), + cl::values( + clEnumValN(MSP430Subtarget::NoHWMult, "none", + "Do not use hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMult16, "16bit", + "Use 16-bit hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMult32, "32bit", + "Use 32-bit hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMultF5, "f5series", + "Use F5 series hardware multiplier"))); + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "MSP430GenSubtargetInfo.inc" @@ -27,7 +41,18 @@ void MSP430Subtarget::anchor() { } MSP430Subtarget & MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - ParseSubtargetFeatures("generic", FS); + ExtendedInsts = false; + HWMultMode = NoHWMult; + + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "msp430"; + + ParseSubtargetFeatures(CPUName, FS); + + if (HWMultModeOption != NoHWMult) + HWMultMode = HWMultModeOption; + return *this; } diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 1a00d85e01cb171d929a039e9696eac9a27e897f..8828dfd65878fc33dd28adb5efbe537927415b12 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -30,8 +30,15 @@ namespace llvm { class StringRef; class MSP430Subtarget : public MSP430GenSubtargetInfo { +public: + enum HWMultEnum { + NoHWMult, HWMult16, HWMult32, HWMultF5 + }; + +private: virtual void anchor(); bool ExtendedInsts; + HWMultEnum HWMultMode; MSP430FrameLowering FrameLowering; MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; @@ -50,6 +57,10 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool hasHWMult16() const { return HWMultMode == HWMult16; } + bool hasHWMult32() const { return HWMultMode == HWMult32; } + bool hasHWMultF5() const { return HWMultMode == HWMultF5; } + const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index bebe5fa35ad42ea4342fa62bbd02495d0ac6b6b9..d8fdc8ba674e6281ed21d36cf61bf6000073a144 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -52,7 +52,7 @@ namespace { /// MSP430 Code Generator Pass Configuration Options. class MSP430PassConfig : public TargetPassConfig { public: - MSP430PassConfig(MSP430TargetMachine *TM, PassManagerBase &PM) + MSP430PassConfig(MSP430TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} MSP430TargetMachine &getMSP430TargetMachine() const { @@ -65,7 +65,7 @@ public: } // namespace TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) { - return new MSP430PassConfig(this, PM); + return new MSP430PassConfig(*this, PM); } bool MSP430PassConfig::addInstSelector() { diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index d407774574be119344cbdbd29a1b80e6cd282550..694c201cbe8dce54471f4b84cb0d34fc9f9080aa 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -9,16 +9,18 @@ #include "MCTargetDesc/MipsABIFlagsSection.h" #include "MCTargetDesc/MipsABIInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "MipsTargetStreamer.h" -#include "MCTargetDesc/MipsBaseInfo.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -39,13 +41,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -216,9 +217,15 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); + bool emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, MCSymbol *Sym); + bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); + bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU, + SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool expandLoadAddress(unsigned DstReg, unsigned BaseReg, const MCOperand &Offset, bool Is32BitAddress, SMLoc IDLoc, MCStreamer &Out, @@ -1011,6 +1018,16 @@ public: Inst.addOperand(MCOperand::createReg(getAFGR64Reg())); } + void addStrictlyAFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getAFGR64Reg())); + } + + void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR64Reg())); + } + void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getFGR64Reg())); @@ -1027,6 +1044,15 @@ public: "registers"); } + void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getFGR32Reg())); + // FIXME: We ought to do this for -integrated-as without -via-file-asm too. + if (!AsmParser.useOddSPReg() && RegIdx.Index & 1) + AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU " + "registers"); + } + void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(getFGRH32Reg())); @@ -1574,6 +1600,11 @@ public: return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31; } + bool isStrictlyFGRAsmReg() const { + // AFGR64 is $0-$15 but we handle this in getAFGR64() + return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31; + } + bool isHWRegsAsmReg() const { return isRegIdx() && RegIdx.Kind & RegKind_HWRegs && RegIdx.Index <= 31; } @@ -2368,6 +2399,27 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, case Mips::PseudoTRUNC_W_D: return expandTrunc(Inst, true, true, IDLoc, Out, STI) ? MER_Fail : MER_Success; + + case Mips::LoadImmSingleGPR: + return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmSingleFGR: + return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmDoubleGPR: + return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmDoubleFGR: + return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; + case Mips::LoadImmDoubleFGR_32: + return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI) + ? MER_Fail + : MER_Success; case Mips::Ulh: return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::Ulhu: @@ -2952,6 +3004,302 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, return false; } +// Each double-precision register DO-D15 overlaps with two of the single +// precision registers F0-F31. As an example, all of the following hold true: +// D0 + 1 == F1, F1 + 1 == D1, F1 + 1 == F2, depending on the context. +static unsigned nextReg(unsigned Reg) { + if (MipsMCRegisterClasses[Mips::FGR32RegClassID].contains(Reg)) + return Reg == (unsigned)Mips::F31 ? (unsigned)Mips::F0 : Reg + 1; + switch (Reg) { + default: llvm_unreachable("Unknown register in assembly macro expansion!"); + case Mips::ZERO: return Mips::AT; + case Mips::AT: return Mips::V0; + case Mips::V0: return Mips::V1; + case Mips::V1: return Mips::A0; + case Mips::A0: return Mips::A1; + case Mips::A1: return Mips::A2; + case Mips::A2: return Mips::A3; + case Mips::A3: return Mips::T0; + case Mips::T0: return Mips::T1; + case Mips::T1: return Mips::T2; + case Mips::T2: return Mips::T3; + case Mips::T3: return Mips::T4; + case Mips::T4: return Mips::T5; + case Mips::T5: return Mips::T6; + case Mips::T6: return Mips::T7; + case Mips::T7: return Mips::S0; + case Mips::S0: return Mips::S1; + case Mips::S1: return Mips::S2; + case Mips::S2: return Mips::S3; + case Mips::S3: return Mips::S4; + case Mips::S4: return Mips::S5; + case Mips::S5: return Mips::S6; + case Mips::S6: return Mips::S7; + case Mips::S7: return Mips::T8; + case Mips::T8: return Mips::T9; + case Mips::T9: return Mips::K0; + case Mips::K0: return Mips::K1; + case Mips::K1: return Mips::GP; + case Mips::GP: return Mips::SP; + case Mips::SP: return Mips::FP; + case Mips::FP: return Mips::RA; + case Mips::RA: return Mips::ZERO; + case Mips::D0: return Mips::F1; + case Mips::D1: return Mips::F3; + case Mips::D2: return Mips::F5; + case Mips::D3: return Mips::F7; + case Mips::D4: return Mips::F9; + case Mips::D5: return Mips::F11; + case Mips::D6: return Mips::F13; + case Mips::D7: return Mips::F15; + case Mips::D8: return Mips::F17; + case Mips::D9: return Mips::F19; + case Mips::D10: return Mips::F21; + case Mips::D11: return Mips::F23; + case Mips::D12: return Mips::F25; + case Mips::D13: return Mips::F27; + case Mips::D14: return Mips::F29; + case Mips::D15: return Mips::F31; + } +} + +// FIXME: This method is too general. In principle we should compute the number +// of instructions required to synthesize the immediate inline compared to +// synthesizing the address inline and relying on non .text sections. +// For static O32 and N32 this may yield a small benefit, for static N64 this is +// likely to yield a much larger benefit as we have to synthesize a 64bit +// address to load a 64 bit value. +bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, + MCSymbol *Sym) { + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + + if(IsPicEnabled) { + const MCExpr *GotSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *GotExpr = + MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext()); + + if(isABI_O32() || isABI_N32()) { + TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr), + IDLoc, STI); + } else { //isABI_N64() + TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr), + IDLoc, STI); + } + } else { //!IsPicEnabled + const MCExpr *HiSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *HiExpr = + MipsMCExpr::create(MipsMCExpr::MEK_HI, HiSym, getContext()); + + // FIXME: This is technically correct but gives a different result to gas, + // but gas is incomplete there (it has a fixme noting it doesn't work with + // 64-bit addresses). + // FIXME: With -msym32 option, the address expansion for N64 should probably + // use the O32 / N32 case. It's safe to use the 64 address expansion as the + // symbol's value is considered sign extended. + if(isABI_O32() || isABI_N32()) { + TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI); + } else { //isABI_N64() + const MCExpr *HighestSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *HighestExpr = + MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, HighestSym, getContext()); + const MCExpr *HigherSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *HigherExpr = + MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, HigherSym, getContext()); + + TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc, + STI); + TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, + MCOperand::createExpr(HigherExpr), IDLoc, STI); + TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI); + TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr), + IDLoc, STI); + TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI); + } + } + return false; +} + +bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, + bool Is64FPU, SMLoc IDLoc, + MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + assert(Inst.getNumOperands() == 2 && "Invalid operand count"); + assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() && + "Invalid instruction operand."); + + unsigned FirstReg = Inst.getOperand(0).getReg(); + uint64_t ImmOp64 = Inst.getOperand(1).getImm(); + + uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; + // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the + // exponent field), convert it to double (e.g. 1 to 1.0) + if ((HiImmOp64 & 0x7ff00000) == 0) { + APFloat RealVal(APFloat::IEEEdouble(), ImmOp64); + ImmOp64 = RealVal.bitcastToAPInt().getZExtValue(); + } + + uint32_t LoImmOp64 = ImmOp64 & 0xffffffff; + HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32; + + if (IsSingle) { + // Conversion of a double in an uint64_t to a float in a uint32_t, + // retaining the bit pattern of a float. + uint32_t ImmOp32; + double doubleImm = BitsToDouble(ImmOp64); + float tmp_float = static_cast(doubleImm); + ImmOp32 = FloatToBits(tmp_float); + + if (IsGPR) { + if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc, + Out, STI)) + return true; + return false; + } else { + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + if (LoImmOp64 == 0) { + if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc, + Out, STI)) + return true; + TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI); + return false; + } + + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = getContext().getELFSection( + ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(ImmOp32, 4); + getStreamer().SwitchSection(CS); + + if(emitPartialAddress(TOut, IDLoc, Sym)) + return true; + TOut.emitRRX(Mips::LWC1, FirstReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + } + return false; + } + + // if(!IsSingle) + unsigned ATReg = getATReg(IDLoc); + if (!ATReg) + return true; + + if (IsGPR) { + if (LoImmOp64 == 0) { + if(isABI_N32() || isABI_N64()) { + if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true, + IDLoc, Out, STI)) + return true; + return false; + } else { + if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true, + IDLoc, Out, STI)) + return true; + + if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true, + IDLoc, Out, STI)) + return true; + return false; + } + } + + MCSection *CS = getStreamer().getCurrentSectionOnly(); + MCSection *ReadOnlySection = getContext().getELFSection( + ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(HiImmOp64, 4); + getStreamer().EmitIntValue(LoImmOp64, 4); + getStreamer().SwitchSection(CS); + + if(emitPartialAddress(TOut, IDLoc, Sym)) + return true; + if(isABI_N64()) + TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + else + TOut.emitRRX(Mips::ADDiu, ATReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + + if(isABI_N32() || isABI_N64()) + TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI); + else { + TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI); + TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI); + } + return false; + } else { // if(!IsGPR && !IsSingle) + if ((LoImmOp64 == 0) && + !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) { + // FIXME: In the case where the constant is zero, we can load the + // register directly from the zero register. + if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc, + Out, STI)) + return true; + if (isABI_N32() || isABI_N64()) + TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI); + else if (hasMips32r2()) { + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI); + } else { + TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI); + TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI); + } + return false; + } + + MCSection *CS = getStreamer().getCurrentSectionOnly(); + // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections + // where appropriate. + MCSection *ReadOnlySection = getContext().getELFSection( + ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); + + MCSymbol *Sym = getContext().createTempSymbol(); + const MCExpr *LoSym = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + const MipsMCExpr *LoExpr = + MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext()); + + getStreamer().SwitchSection(ReadOnlySection); + getStreamer().EmitLabel(Sym, IDLoc); + getStreamer().EmitIntValue(HiImmOp64, 4); + getStreamer().EmitIntValue(LoImmOp64, 4); + getStreamer().SwitchSection(CS); + + if(emitPartialAddress(TOut, IDLoc, Sym)) + return true; + TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg, + MCOperand::createExpr(LoExpr), IDLoc, STI); + } + return false; +} + bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { @@ -4318,45 +4666,6 @@ bool MipsAsmParser::expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } -static unsigned nextReg(unsigned Reg) { - switch (Reg) { - case Mips::ZERO: return Mips::AT; - case Mips::AT: return Mips::V0; - case Mips::V0: return Mips::V1; - case Mips::V1: return Mips::A0; - case Mips::A0: return Mips::A1; - case Mips::A1: return Mips::A2; - case Mips::A2: return Mips::A3; - case Mips::A3: return Mips::T0; - case Mips::T0: return Mips::T1; - case Mips::T1: return Mips::T2; - case Mips::T2: return Mips::T3; - case Mips::T3: return Mips::T4; - case Mips::T4: return Mips::T5; - case Mips::T5: return Mips::T6; - case Mips::T6: return Mips::T7; - case Mips::T7: return Mips::S0; - case Mips::S0: return Mips::S1; - case Mips::S1: return Mips::S2; - case Mips::S2: return Mips::S3; - case Mips::S3: return Mips::S4; - case Mips::S4: return Mips::S5; - case Mips::S5: return Mips::S6; - case Mips::S6: return Mips::S7; - case Mips::S7: return Mips::T8; - case Mips::T8: return Mips::T9; - case Mips::T9: return Mips::K0; - case Mips::K0: return Mips::K1; - case Mips::K1: return Mips::GP; - case Mips::GP: return Mips::SP; - case Mips::SP: return Mips::FP; - case Mips::FP: return Mips::RA; - case Mips::RA: return Mips::ZERO; - default: return 0; - } - -} - // Expand 'ld $ offset($reg2)' to 'lw $, offset($reg2); // lw $>, offset+4($reg2)' // or expand 'sd $ offset($reg2)' to 'sw $, offset($reg2); diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 3650cc9fe07286a461849a189859a4e3d7f0399d..40e337eb97ca14be0d1645b16e51e8b7018f3f3e 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(MipsCodeGen MipsSubtarget.cpp MipsTargetMachine.cpp MipsTargetObjectFile.cpp + MicroMipsSizeReduction.cpp ) add_subdirectory(InstPrinter) diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index ecdf6b0de6e7f101d57bc4f4887dd91f907169c0..b0b99432303638d36c64ba232c39a881f2d77bf8 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -17,14 +17,14 @@ #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 3304449efb91ee2a486eb789fb46c31a022260d1..6d3d4db0360320e488d1a74257abf833efa74f19 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// // -#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsAsmBackend.h" +#include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" @@ -366,6 +366,7 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_MICROMIPS_TLS_LDM", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0, 16, 0 }, + { "fixup_MICROMIPS_GOTTPREL", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 }, { "fixup_Mips_SUB", 0, 64, 0 }, @@ -437,6 +438,7 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_MICROMIPS_TLS_LDM", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16, 16, 0 }, + { "fixup_MICROMIPS_GOTTPREL", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 }, { "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 }, { "fixup_Mips_SUB", 0, 64, 0 }, diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 324fd3c6fe14cb5d9f304db05c21eb183b36e26d..d116ac3471bc00704257624b9630a9b763a3469a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -10,13 +10,13 @@ #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -374,6 +374,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_MICROMIPS_TLS_DTPREL_HI16; case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16: return ELF::R_MICROMIPS_TLS_DTPREL_LO16; + case Mips::fixup_MICROMIPS_GOTTPREL: + return ELF::R_MICROMIPS_TLS_GOTTPREL; case Mips::fixup_MICROMIPS_TLS_TPREL_HI16: return ELF::R_MICROMIPS_TLS_TPREL_HI16; case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index 4eeccc3995fd42f7c92ff7cbbf006f0ed671475d..f658aadff22fec5e2c28fc6addd1dae168fd483e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -10,17 +10,17 @@ #include "MipsELFStreamer.h" #include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" using namespace llvm; void MipsELFStreamer::EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, bool) { MCELFStreamer::EmitInstruction(Inst, STI); MCContext &Context = getContext(); diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index 72cde1c908453d09ee79c628947a0c43c97f054f..f5eda112817ef8ce6b438107c2c769e594aa8362 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -45,7 +45,8 @@ public: /// \p Inst is actually emitted. For example, we can inspect the operands and /// gather sufficient information that allows us to reason about the register /// usage for the translation unit. - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool = false) override; /// Overriding this function allows us to record all labels that should be /// marked as microMIPS. Based on this data marking is done in diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 149296212eca94052b3b48e03003f1cd6216e67a..6148a1b622c826f578fe79da12415982dc28cdf8 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -203,6 +203,9 @@ namespace Mips { // resulting in - R_MICROMIPS_TLS_DTPREL_LO16 fixup_MICROMIPS_TLS_DTPREL_LO16, + // resulting in - R_MICROMIPS_TLS_GOTTPREL. + fixup_MICROMIPS_GOTTPREL, + // resulting in - R_MICROMIPS_TLS_TPREL_HI16 fixup_MICROMIPS_TLS_TPREL_HI16, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index ebe3c578488826fa6d7d029e326d14e1dd75b176..11411d997bb3b1a6605fee6d9295da8a7d1fd907 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -23,7 +23,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) { if ((TheTriple.getArch() == Triple::mips64el) || (TheTriple.getArch() == Triple::mips64)) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } // FIXME: This condition isn't quite right but it's the best we can do until diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 5685f0426e9b4035f9a2f40c32ef29bc4bc1dd0f..0330824fd614e0e5769b58d1baef40e1b2ea98da 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsMCCodeEmitter.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MCTargetDesc/MipsMCTargetDesc.h" -#include "MipsMCCodeEmitter.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" @@ -669,7 +669,8 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl &Fixups, : Mips::fixup_Mips_DTPREL_LO; break; case MipsMCExpr::MEK_GOTTPREL: - FixupKind = Mips::fixup_Mips_GOTTPREL; + FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_GOTTPREL + : Mips::fixup_Mips_GOTTPREL; break; case MipsMCExpr::MEK_GOT: FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_GOT16 diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index be04480044d48268b3627a265641011cbe42b846..aad6bf378ea006cbaa4dd3a66f9b131da4ba52d9 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -8,14 +8,14 @@ //===----------------------------------------------------------------------===// #include "MipsMCExpr.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 8c2617a687b8f813603933cb3039b5002069ec90..9266f0e216d1148fc6acdd837512184442275558 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -139,8 +139,8 @@ private: public: /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to mask dangerous instructions. - void EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) override { + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + bool) override { // Sandbox indirect jumps. if (isIndirectJump(Inst)) { if (PendingCall) diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp index 74d5e4cc98419bddaf2e32fc09bcb9481dbfe636..2d84528e7469f2caecf12a48d9af3f5227c6dce3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// +#include "MipsOptionRecord.h" #include "MipsABIInfo.h" #include "MipsELFStreamer.h" -#include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include using namespace llvm; diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 2d4083b27ed17481e0c6f61659892de5b826f292..0cd4aebe4d1641748dfa48649424fd49eea64582 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -11,19 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsTargetStreamer.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsELFStreamer.h" #include "MipsMCExpr.h" #include "MipsMCTargetDesc.h" #include "MipsTargetObjectFile.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35948e36ad91385f4aee2256c02063e9b26cba09 --- /dev/null +++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp @@ -0,0 +1,392 @@ +//=== MicroMipsSizeReduction.cpp - MicroMips size reduction pass --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +///\file +/// This pass is used to reduce the size of instructions where applicable. +/// +/// TODO: Implement microMIPS64 support. +/// TODO: Implement support for reducing into lwp/swp instruction. +//===----------------------------------------------------------------------===// +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "micromips-reduce-size" + +STATISTIC(NumReduced, "Number of 32-bit instructions reduced to 16-bit ones"); + +namespace { + +/// Order of operands to transfer +// TODO: Will be extended when additional optimizations are added +enum OperandTransfer { + OT_NA, ///< Not applicable + OT_OperandsAll, ///< Transfer all operands +}; + +/// Reduction type +// TODO: Will be extended when additional optimizations are added +enum ReduceType { + RT_OneInstr ///< Reduce one instruction into a smaller instruction +}; + +// Information about immediate field restrictions +struct ImmField { + ImmField() : ImmFieldOperand(-1), Shift(0), LBound(0), HBound(0) {} + ImmField(uint8_t Shift, int16_t LBound, int16_t HBound, + int8_t ImmFieldOperand) + : ImmFieldOperand(ImmFieldOperand), Shift(Shift), LBound(LBound), + HBound(HBound) {} + int8_t ImmFieldOperand; // Immediate operand, -1 if it does not exist + uint8_t Shift; // Shift value + int16_t LBound; // Low bound of the immediate operand + int16_t HBound; // High bound of the immediate operand +}; + +/// Information about operands +// TODO: Will be extended when additional optimizations are added +struct OpInfo { + OpInfo(enum OperandTransfer TransferOperands) + : TransferOperands(TransferOperands) {} + OpInfo() : TransferOperands(OT_NA) {} + + enum OperandTransfer + TransferOperands; ///< Operands to transfer to the new instruction +}; + +// Information about opcodes +struct OpCodes { + OpCodes(unsigned WideOpc, unsigned NarrowOpc) + : WideOpc(WideOpc), NarrowOpc(NarrowOpc) {} + + unsigned WideOpc; ///< Wide opcode + unsigned NarrowOpc; ///< Narrow opcode +}; + +/// ReduceTable - A static table with information on mapping from wide +/// opcodes to narrow +struct ReduceEntry { + + enum ReduceType eRType; ///< Reduction type + bool (*ReduceFunction)( + MachineInstr *MI, + const ReduceEntry &Entry); ///< Pointer to reduce function + struct OpCodes Ops; ///< All relevant OpCodes + struct OpInfo OpInf; ///< Characteristics of operands + struct ImmField Imm; ///< Characteristics of immediate field + + ReduceEntry(enum ReduceType RType, struct OpCodes Op, + bool (*F)(MachineInstr *MI, const ReduceEntry &Entry), + struct OpInfo OpInf, struct ImmField Imm) + : eRType(RType), ReduceFunction(F), Ops(Op), OpInf(OpInf), Imm(Imm) {} + + unsigned NarrowOpc() const { return Ops.NarrowOpc; } + unsigned WideOpc() const { return Ops.WideOpc; } + int16_t LBound() const { return Imm.LBound; } + int16_t HBound() const { return Imm.HBound; } + uint8_t Shift() const { return Imm.Shift; } + int8_t ImmField() const { return Imm.ImmFieldOperand; } + enum OperandTransfer TransferOperands() const { + return OpInf.TransferOperands; + } + enum ReduceType RType() const { return eRType; } + + // operator used by std::equal_range + bool operator<(const unsigned int r) const { return (WideOpc() < r); } + + // operator used by std::equal_range + friend bool operator<(const unsigned int r, const struct ReduceEntry &re) { + return (r < re.WideOpc()); + } +}; + +class MicroMipsSizeReduce : public MachineFunctionPass { +public: + static char ID; + MicroMipsSizeReduce(); + + static const MipsInstrInfo *MipsII; + const MipsSubtarget *Subtarget; + + bool runOnMachineFunction(MachineFunction &MF) override; + + llvm::StringRef getPassName() const override { + return "microMIPS instruction size reduction pass"; + } + +private: + /// Reduces width of instructions in the specified basic block. + bool ReduceMBB(MachineBasicBlock &MBB); + + /// Attempts to reduce MI, returns true on success. + bool ReduceMI(const MachineBasicBlock::instr_iterator &MII); + + // Attempts to reduce LW/SW instruction into LWSP/SWSP, + // returns true on success. + static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry); + + // Attempts to reduce LBU/LHU instruction into LBU16/LHU16, + // returns true on success. + static bool ReduceLXUtoLXU16(MachineInstr *MI, const ReduceEntry &Entry); + + // Attempts to reduce SB/SH instruction into SB16/SH16, + // returns true on success. + static bool ReduceSXtoSX16(MachineInstr *MI, const ReduceEntry &Entry); + + // Attempts to reduce arithmetic instructions, returns true on success + static bool ReduceArithmeticInstructions(MachineInstr *MI, + const ReduceEntry &Entry); + + // Changes opcode of an instruction + static bool ReplaceInstruction(MachineInstr *MI, const ReduceEntry &Entry); + + // Table with transformation rules for each instruction + static llvm::SmallVector ReduceTable; +}; + +char MicroMipsSizeReduce::ID = 0; +const MipsInstrInfo *MicroMipsSizeReduce::MipsII; + +// This table must be sorted by WideOpc as a main criterion and +// ReduceType as a sub-criterion (when wide opcodes are the same) +llvm::SmallVector MicroMipsSizeReduce::ReduceTable = { + + // ReduceType, OpCodes, ReduceFunction, + // OpInfo(TransferOperands), + // ImmField(Shift, LBound, HBound, ImmFieldPosition) + {RT_OneInstr, OpCodes(Mips::ADDu, Mips::ADDU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::ADDu_MM, Mips::ADDU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::LBu, Mips::LBU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)}, + {RT_OneInstr, OpCodes(Mips::LBu_MM, Mips::LBU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)}, + {RT_OneInstr, OpCodes(Mips::LHu, Mips::LHU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::LHu_MM, Mips::LHU16_MM), ReduceLXUtoLXU16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SH, Mips::SH16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SH_MM, Mips::SH16_MM), ReduceSXtoSX16, + OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)}, + {RT_OneInstr, OpCodes(Mips::SUBu, Mips::SUBU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::SUBu_MM, Mips::SUBU16_MM), + ReduceArithmeticInstructions, OpInfo(OT_OperandsAll), + ImmField(0, 0, 0, -1)}, + {RT_OneInstr, OpCodes(Mips::SW, Mips::SWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, + {RT_OneInstr, OpCodes(Mips::SW_MM, Mips::SWSP_MM), ReduceXWtoXWSP, + OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)}, +}; +} + +// Returns true if the machine operand MO is register SP +static bool IsSP(const MachineOperand &MO) { + if (MO.isReg() && ((MO.getReg() == Mips::SP))) + return true; + return false; +} + +// Returns true if the machine operand MO is register $16, $17, or $2-$7. +static bool isMMThreeBitGPRegister(const MachineOperand &MO) { + if (MO.isReg() && Mips::GPRMM16RegClass.contains(MO.getReg())) + return true; + return false; +} + +// Returns true if the machine operand MO is register $0, $17, or $2-$7. +static bool isMMSourceRegister(const MachineOperand &MO) { + if (MO.isReg() && Mips::GPRMM16ZeroRegClass.contains(MO.getReg())) + return true; + return false; +} + +// Returns true if the operand Op is an immediate value +// and writes the immediate value into variable Imm +static bool GetImm(MachineInstr *MI, unsigned Op, int64_t &Imm) { + + if (!MI->getOperand(Op).isImm()) + return false; + Imm = MI->getOperand(Op).getImm(); + return true; +} + +// Returns true if the variable Value has the number of least-significant zero +// bits equal to Shift and if the shifted value is between the bounds +static bool InRange(int64_t Value, unsigned short Shift, int LBound, + int HBound) { + int64_t Value2 = Value >> Shift; + if ((Value2 << Shift) == Value && (Value2 >= LBound) && (Value2 < HBound)) + return true; + return false; +} + +// Returns true if immediate operand is in range +static bool ImmInRange(MachineInstr *MI, const ReduceEntry &Entry) { + + int64_t offset; + + if (!GetImm(MI, Entry.ImmField(), offset)) + return false; + + if (!InRange(offset, Entry.Shift(), Entry.LBound(), Entry.HBound())) + return false; + + return true; +} + +MicroMipsSizeReduce::MicroMipsSizeReduce() : MachineFunctionPass(ID) {} + +bool MicroMipsSizeReduce::ReduceMI( + const MachineBasicBlock::instr_iterator &MII) { + + MachineInstr *MI = &*MII; + unsigned Opcode = MI->getOpcode(); + + // Search the table. + llvm::SmallVector::const_iterator Start = + std::begin(ReduceTable); + llvm::SmallVector::const_iterator End = + std::end(ReduceTable); + + std::pair::const_iterator, + llvm::SmallVector::const_iterator> + Range = std::equal_range(Start, End, Opcode); + + if (Range.first == Range.second) + return false; + + for (llvm::SmallVector::const_iterator Entry = Range.first; + Entry != Range.second; ++Entry) + if (((*Entry).ReduceFunction)(&(*MII), *Entry)) + return true; + + return false; +} + +bool MicroMipsSizeReduce::ReduceXWtoXWSP(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!IsSP(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceArithmeticInstructions( + MachineInstr *MI, const ReduceEntry &Entry) { + + if (!isMMThreeBitGPRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1)) || + !isMMThreeBitGPRegister(MI->getOperand(2))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceLXUtoLXU16(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!isMMThreeBitGPRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceSXtoSX16(MachineInstr *MI, + const ReduceEntry &Entry) { + + if (!ImmInRange(MI, Entry)) + return false; + + if (!isMMSourceRegister(MI->getOperand(0)) || + !isMMThreeBitGPRegister(MI->getOperand(1))) + return false; + + return ReplaceInstruction(MI, Entry); +} + +bool MicroMipsSizeReduce::ReduceMBB(MachineBasicBlock &MBB) { + bool Modified = false; + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), + E = MBB.instr_end(); + MachineBasicBlock::instr_iterator NextMII; + + // Iterate through the instructions in the basic block + for (; MII != E; MII = NextMII) { + NextMII = std::next(MII); + MachineInstr *MI = &*MII; + + // Don't reduce bundled instructions or pseudo operations + if (MI->isBundle() || MI->isTransient()) + continue; + + // Try to reduce 32-bit instruction into 16-bit instruction + Modified |= ReduceMI(MII); + } + + return Modified; +} + +bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI, + const ReduceEntry &Entry) { + + MI->setDesc(MipsII->get(Entry.NarrowOpc())); + DEBUG(dbgs() << "Converted into 16-bit: " << *MI); + ++NumReduced; + return true; +} + +bool MicroMipsSizeReduce::runOnMachineFunction(MachineFunction &MF) { + + Subtarget = &static_cast(MF.getSubtarget()); + + // TODO: Add support for other subtargets: + // microMIPS32r6 and microMIPS64r6 + if (!Subtarget->inMicroMipsMode() || !Subtarget->hasMips32r2()) + return false; + + MipsII = static_cast(Subtarget->getInstrInfo()); + + bool Modified = false; + MachineFunction::iterator I = MF.begin(), E = MF.end(); + + for (; I != E; ++I) + Modified |= ReduceMBB(*I); + return Modified; +} + +/// Returns an instance of the MicroMips size reduction pass. +FunctionPass *llvm::createMicroMipsSizeReductionPass() { + return new MicroMipsSizeReduce(); +} diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index d9faf3325cacd13d6dbc6c202f8d97d532312017..008b9505ee26b99c429d34b77d28adc2ee07028b 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -23,15 +23,16 @@ namespace llvm { class ModulePass; class FunctionPass; - ModulePass *createMipsOs16Pass(MipsTargetMachine &TM); - ModulePass *createMips16HardFloatPass(MipsTargetMachine &TM); + ModulePass *createMipsOs16Pass(); + ModulePass *createMips16HardFloatPass(); - FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM); - FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); - FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsModuleISelDagPass(); + FunctionPass *createMipsOptimizePICCallPass(); + FunctionPass *createMipsDelaySlotFillerPass(); FunctionPass *createMipsHazardSchedule(); - FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); + FunctionPass *createMipsLongBranchPass(); FunctionPass *createMipsConstantIslandPass(); + FunctionPass *createMicroMipsSizeReductionPass(); } // end namespace llvm; #endif diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 9615bc38bfcef02c2000be28f832e2b4f4b4bc34..f24761d7d10135ab0f3f69ddce194b069a33c444 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -185,6 +185,9 @@ def FeatureUseTCCInDIV : SubtargetFeature< "UseTCCInDIV", "false", "Force the assembler to use trapping">; +def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", + "Disable 4-operand madd.fmt and related instructions">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index e7ceca9612a92e730a392b4011bd3247b4e48b36..09e41e1423aee154cd928a03f3ca1299b3914e89 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -1,4 +1,4 @@ -//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===// +//===- Mips16FrameLowering.cpp - Mips16 Frame Information -----------------===// // // The LLVM Compiler Infrastructure // @@ -17,14 +17,23 @@ #include "MipsInstrInfo.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetFrameLowering.h" +#include +#include +#include using namespace llvm; @@ -63,7 +72,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF, const std::vector &CSI = MFI.getCalleeSavedInfo(); - if (CSI.size()) { + if (!CSI.empty()) { const std::vector &CSI = MFI.getCalleeSavedInfo(); for (std::vector::const_iterator I = CSI.begin(), @@ -80,7 +89,6 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF, if (hasFP(MF)) BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0) .addReg(Mips::SP).setMIFlag(MachineInstr::FrameSetup); - } void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index a71b161b24ccff001f3372b7913e76c148f059d4..3c2426129e49a5d935a13e420a16dd5d11ef1408 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MipsTargetMachine.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" @@ -28,14 +29,16 @@ namespace { public: static char ID; - Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {} + Mips16HardFloat() : ModulePass(ID) {} StringRef getPassName() const override { return "MIPS16 Hard Float Pass"; } - bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + ModulePass::getAnalysisUsage(AU); + } - protected: - const MipsTargetMachine &TM; + bool runOnModule(Module &M) override; }; static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) { @@ -490,15 +493,14 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, // remove the use-soft-float attribute // static void removeUseSoftFloat(Function &F) { - AttributeList A; + AttrBuilder B; DEBUG(errs() << "removing -use-soft-float\n"); - A = A.addAttribute(F.getContext(), AttributeList::FunctionIndex, - "use-soft-float", "false"); - F.removeAttributes(AttributeList::FunctionIndex, A); + B.addAttribute("use-soft-float", "false"); + F.removeAttributes(AttributeList::FunctionIndex, B); if (F.hasFnAttribute("use-soft-float")) { DEBUG(errs() << "still has -use-soft-float\n"); } - F.addAttributes(AttributeList::FunctionIndex, A); + F.addAttributes(AttributeList::FunctionIndex, B); } @@ -521,6 +523,8 @@ static void removeUseSoftFloat(Function &F) { // during call lowering but it should be moved here in the future. // bool Mips16HardFloat::runOnModule(Module &M) { + auto &TM = static_cast( + getAnalysis().getTM()); DEBUG(errs() << "Run on Module Mips16HardFloat\n"); bool Modified = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { @@ -542,6 +546,6 @@ bool Mips16HardFloat::runOnModule(Module &M) { } -ModulePass *llvm::createMips16HardFloatPass(MipsTargetMachine &TM) { - return new Mips16HardFloat(TM); +ModulePass *llvm::createMips16HardFloatPass() { + return new Mips16HardFloat(); } diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 2a9d96205eb96e8d26111b7be22fc7b43494dc38..f7ff7c3dc7bbfaee67d950d485b9cb964461c194 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// +#include "MipsAsmPrinter.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" #include "Mips.h" -#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" #include "MipsTargetMachine.h" #include "MipsTargetStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -43,7 +44,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -81,7 +81,7 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AsmPrinter::runOnMachineFunction(MF); - EmitXRayTable(); + emitXRayTable(); return true; } @@ -273,9 +273,9 @@ void MipsAsmPrinter::printSavedRegsBitmask() { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); // size of stack area to which FP callee-saved regs are saved. - unsigned CPURegSize = Mips::GPR32RegClass.getSize(); - unsigned FGR32RegSize = Mips::FGR32RegClass.getSize(); - unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize(); + unsigned CPURegSize = TRI->getRegSizeInBits(Mips::GPR32RegClass) / 8; + unsigned FGR32RegSize = TRI->getRegSizeInBits(Mips::FGR32RegClass) / 8; + unsigned AFGR64RegSize = TRI->getRegSizeInBits(Mips::AFGR64RegClass) / 8; bool HasAFGR64Reg = false; unsigned CSFPRegsSize = 0; @@ -1148,39 +1148,6 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { recordSled(CurSled, MI, Kind); } -void MipsAsmPrinter::EmitXRayTable() { - if (Sleds.empty()) - return; - if (Subtarget->isTargetELF()) { - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); - MCSection *Section; - - if (Fn->hasComdat()) - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - else - Section = - OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC, 0, CurrentFnSym->getName()); - - OutStreamer->SwitchSection(Section); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, Subtarget->isGP64bit() ? 8 : 4); - OutStreamer->EmitSymbolValue(CurrentFnSym, Subtarget->isGP64bit() ? 8 : 4); - auto Kind = static_cast(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(Subtarget->isGP64bit() ? 14 : 6); - } - OutStreamer->SwitchSection(PrevSection); - } - Sleds.clear(); -} - void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) { EmitSled(MI, SledKind::FUNCTION_ENTER); } diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp index 7af988c1f64d914dd1dc3d0c9799a7cb2491606f..6a03ee9927d74775789736d21134f30944bf3b5f 100644 --- a/lib/Target/Mips/MipsCCState.cpp +++ b/lib/Target/Mips/MipsCCState.cpp @@ -38,7 +38,7 @@ static bool isF128SoftLibCall(const char *CallSym) { /// This function returns true if Ty is fp128, {f128} or i128 which was /// originally a fp128. -static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) { +static bool originalTypeIsF128(const Type *Ty, const char *Func) { if (Ty->isFP128Ty()) return true; @@ -46,12 +46,25 @@ static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) { Ty->getStructElementType(0)->isFP128Ty()) return true; - const ExternalSymbolSDNode *ES = - dyn_cast_or_null(CallNode); - // If the Ty is i128 and the function being called is a long double emulation // routine, then the original type is f128. - return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol())); + return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func)); +} + +/// Return true if the original type was vXfXX. +static bool originalEVTTypeIsVectorFloat(EVT Ty) { + if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint()) + return true; + + return false; +} + +/// Return true if the original type was vXfXX / vXfXX. +static bool originalTypeIsVectorFloat(const Type * Ty) { + if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy()) + return true; + + return false; } MipsCCState::SpecialCallingConvType @@ -73,16 +86,16 @@ MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee, void MipsCCState::PreAnalyzeCallResultForF128( const SmallVectorImpl &Ins, - const TargetLowering::CallLoweringInfo &CLI) { + const Type *RetTy, const char *Call) { for (unsigned i = 0; i < Ins.size(); ++i) { OriginalArgWasF128.push_back( - originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode())); - OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy()); + originalTypeIsF128(RetTy, Call)); + OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy()); } } -/// Identify lowered values that originated from f128 arguments and record -/// this for use by RetCC_MipsN. +/// Identify lowered values that originated from f128 or float arguments and +/// record this for use by RetCC_MipsN. void MipsCCState::PreAnalyzeReturnForF128( const SmallVectorImpl &Outs) { const MachineFunction &MF = getMachineFunction(); @@ -94,23 +107,44 @@ void MipsCCState::PreAnalyzeReturnForF128( } } -/// Identify lowered values that originated from f128 arguments and record +/// Identify lower values that originated from vXfXX and record +/// this. +void MipsCCState::PreAnalyzeCallResultForVectorFloat( + const SmallVectorImpl &Ins, const Type *RetTy) { + for (unsigned i = 0; i < Ins.size(); ++i) { + OriginalRetWasFloatVector.push_back(originalTypeIsVectorFloat(RetTy)); + } +} + +/// Identify lowered values that originated from vXfXX arguments and record /// this. +void MipsCCState::PreAnalyzeReturnForVectorFloat( + const SmallVectorImpl &Outs) { + for (unsigned i = 0; i < Outs.size(); ++i) { + ISD::OutputArg Out = Outs[i]; + OriginalRetWasFloatVector.push_back( + originalEVTTypeIsVectorFloat(Out.ArgVT)); + } +} + +/// Identify lowered values that originated from f128, float and sret to vXfXX +/// arguments and record this. void MipsCCState::PreAnalyzeCallOperands( const SmallVectorImpl &Outs, std::vector &FuncArgs, - const SDNode *CallNode) { + const char *Func) { for (unsigned i = 0; i < Outs.size(); ++i) { - OriginalArgWasF128.push_back( - originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode)); - OriginalArgWasFloat.push_back( - FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy()); + TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex]; + + OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, Func)); + OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy()); + OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy()); CallOperandIsFixed.push_back(Outs[i].IsFixed); } } -/// Identify lowered values that originated from f128 arguments and record -/// this. +/// Identify lowered values that originated from f128, float and vXfXX arguments +/// and record this. void MipsCCState::PreAnalyzeFormalArgumentsForF128( const SmallVectorImpl &Ins) { const MachineFunction &MF = getMachineFunction(); @@ -123,6 +157,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( if (Ins[i].Flags.isSRet()) { OriginalArgWasF128.push_back(false); OriginalArgWasFloat.push_back(false); + OriginalArgWasFloatVector.push_back(false); continue; } @@ -132,5 +167,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128( OriginalArgWasF128.push_back( originalTypeIsF128(FuncArg->getType(), nullptr)); OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy()); + + // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the + // first argument is actually an SRet pointer to a vector, then the next + // argument slot is $a2. + OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy()); } } diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h index 081c393a09be04d042792d6bbeb64fb0785a065d..27901699480bfe194b930b03f4b9978f2b09354c 100644 --- a/lib/Target/Mips/MipsCCState.h +++ b/lib/Target/Mips/MipsCCState.h @@ -31,7 +31,7 @@ private: /// Identify lowered values that originated from f128 arguments and record /// this for use by RetCC_MipsN. void PreAnalyzeCallResultForF128(const SmallVectorImpl &Ins, - const TargetLowering::CallLoweringInfo &CLI); + const Type *RetTy, const char * Func); /// Identify lowered values that originated from f128 arguments and record /// this for use by RetCC_MipsN. @@ -42,19 +42,36 @@ private: void PreAnalyzeCallOperands(const SmallVectorImpl &Outs, std::vector &FuncArgs, - const SDNode *CallNode); + const char *Func); /// Identify lowered values that originated from f128 arguments and record - /// this. + /// this for use by RetCC_MipsN. void PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl &Ins); + void + PreAnalyzeCallResultForVectorFloat(const SmallVectorImpl &Ins, + const Type *RetTy); + + void PreAnalyzeFormalArgumentsForVectorFloat( + const SmallVectorImpl &Ins); + + void + PreAnalyzeReturnForVectorFloat(const SmallVectorImpl &Outs); + /// Records whether the value has been lowered from an f128. SmallVector OriginalArgWasF128; /// Records whether the value has been lowered from float. SmallVector OriginalArgWasFloat; + /// Records whether the value has been lowered from a floating point vector. + SmallVector OriginalArgWasFloatVector; + + /// Records whether the return value has been lowered from a floating point + /// vector. + SmallVector OriginalRetWasFloatVector; + /// Records whether the value was a fixed argument. /// See ISD::OutputArg::IsFixed, SmallVector CallOperandIsFixed; @@ -73,11 +90,12 @@ public: AnalyzeCallOperands(const SmallVectorImpl &Outs, CCAssignFn Fn, std::vector &FuncArgs, - const SDNode *CallNode) { - PreAnalyzeCallOperands(Outs, FuncArgs, CallNode); + const char *Func) { + PreAnalyzeCallOperands(Outs, FuncArgs, Func); CCState::AnalyzeCallOperands(Outs, Fn); OriginalArgWasF128.clear(); OriginalArgWasFloat.clear(); + OriginalArgWasFloatVector.clear(); CallOperandIsFixed.clear(); } @@ -96,31 +114,38 @@ public: CCState::AnalyzeFormalArguments(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeCallResult(const SmallVectorImpl &Ins, - CCAssignFn Fn, - const TargetLowering::CallLoweringInfo &CLI) { - PreAnalyzeCallResultForF128(Ins, CLI); + CCAssignFn Fn, const Type *RetTy, + const char *Func) { + PreAnalyzeCallResultForF128(Ins, RetTy, Func); + PreAnalyzeCallResultForVectorFloat(Ins, RetTy); CCState::AnalyzeCallResult(Ins, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } void AnalyzeReturn(const SmallVectorImpl &Outs, CCAssignFn Fn) { PreAnalyzeReturnForF128(Outs); + PreAnalyzeReturnForVectorFloat(Outs); CCState::AnalyzeReturn(Outs, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); } bool CheckReturn(const SmallVectorImpl &ArgsFlags, CCAssignFn Fn) { PreAnalyzeReturnForF128(ArgsFlags); + PreAnalyzeReturnForVectorFloat(ArgsFlags); bool Return = CCState::CheckReturn(ArgsFlags, Fn); OriginalArgWasFloat.clear(); OriginalArgWasF128.clear(); + OriginalArgWasFloatVector.clear(); return Return; } @@ -128,6 +153,12 @@ public: bool WasOriginalArgFloat(unsigned ValNo) { return OriginalArgWasFloat[ValNo]; } + bool WasOriginalArgVectorFloat(unsigned ValNo) const { + return OriginalArgWasFloatVector[ValNo]; + } + bool WasOriginalRetVectorFloat(unsigned ValNo) const { + return OriginalRetWasFloatVector[ValNo]; + } bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; } SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; } }; diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index a57cb7badc175583446e6c29c3200215ad6fd0b9..b5df78f89a6b90ba52e1556ee65fcc98156b11e6 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -37,6 +37,10 @@ class CCIfOrigArgWasF128 class CCIfArgIsVarArg : CCIf<"!static_cast(&State)->IsCallOperandFixed(ValNo)", A>; +/// Match if the return was a floating point vector. +class CCIfOrigArgWasNotVectorFloat + : CCIf<"!static_cast(&State)" + "->WasOriginalRetVectorFloat(ValNo)", A>; /// Match if the special calling conv is the specified value. class CCIfSpecialCallingConv @@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[ // Promote i1/i8/i16 return values to i32. CCIfType<[i1, i8, i16], CCPromoteToType>, - // i32 are returned in registers V0, V1, A0, A1 - CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>, + // i32 are returned in registers V0, V1, A0, A1, unless the original return + // type was a vector of floats. + CCIfOrigArgWasNotVectorFloat>>, // f32 are returned in registers F0, F2 CCIfType<[f32], CCAssignToReg<[F0, F2]>>, diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index 026f66a1c0e15f51777abfafa6f691b4ec2ed33f..ff43a3950610387001f36a507be9448c07c10620 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -24,10 +24,10 @@ #include "Mips16InstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index ac9a81b1bb2f74c101c40d87ebd873ad55486457..c238a65378e22bfdee9ac29be956ecf50c1f90a8 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -19,6 +19,7 @@ def immZExt4 : ImmLeaf(Imm);}]>; def immZExt8 : ImmLeaf(Imm);}]>; def immZExt10 : ImmLeaf(Imm);}]>; def immSExt6 : ImmLeaf(Imm);}]>; +def immSExt10 : ImmLeaf(Imm);}]>; // Mips-specific dsp nodes def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, @@ -851,8 +852,8 @@ class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph, class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, uimm8, immZExt8, NoItinerary, DSPROpnd>; -class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, uimm10, - immZExt10, NoItinerary, DSPROpnd>; +class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, simm10, + immSExt10, NoItinerary, DSPROpnd>; class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb, NoItinerary, DSPROpnd, GPR32Opnd>; diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index ae58c26e145aba8bcf8982180cb21c909361cbb5..5d82571ff94f05549249fe8331e1d31fdacbb0a2 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -211,12 +211,12 @@ namespace { class Filler : public MachineFunctionPass { public: - Filler(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) { } + Filler() : MachineFunctionPass(ID), TM(nullptr) {} StringRef getPassName() const override { return "Mips Delay Slot Filler"; } bool runOnMachineFunction(MachineFunction &F) override { + TM = &F.getTarget(); bool Changed = false; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) @@ -290,7 +290,7 @@ namespace { bool terminateSearch(const MachineInstr &Candidate) const; - TargetMachine &TM; + const TargetMachine *TM; static char ID; }; @@ -386,7 +386,7 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) { void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { BitVector AllocSet = TRI.getAllocatableSet(MF); - for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R)) + for (unsigned R : AllocSet.set_bits()) for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) AllocSet.set(*AI); @@ -610,7 +610,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { Changed = true; // Delay slot filling is disabled at -O0. - if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) { + if (!DisableDelaySlotFiller && (TM->getOptLevel() != CodeGenOpt::None)) { bool Filled = false; if (MipsCompactBranchPolicy.getValue() != CB_Always || @@ -910,6 +910,4 @@ bool Filler::terminateSearch(const MachineInstr &Candidate) const { /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay /// slots in Mips MachineFunctions -FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { - return new Filler(tm); -} +FunctionPass *llvm::createMipsDelaySlotFillerPass() { return new Filler(); } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index c060cf06099d36a22425166eaf198b9c4ae4434b..f79cb0e67200a460b2be011fa59186c5a20f042f 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -17,8 +17,8 @@ #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsCCState.h" -#include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" @@ -1133,7 +1133,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI, if (NumBytes < 16) NumBytes = 16; - emitInst(Mips::ADJCALLSTACKDOWN).addImm(16); + emitInst(Mips::ADJCALLSTACKDOWN).addImm(16).addImm(0); // Process the args. MVT firstMVT; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -1260,8 +1260,11 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, emitInst(Mips::ADJCALLSTACKUP).addImm(16).addImm(0); if (RetVT != MVT::isVoid) { SmallVector RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, RetCC_Mips); + MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); + + CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy, + CLI.Symbol ? CLI.Symbol->getName().data() + : nullptr); // Only handle a single return value. if (RVLocs.size() != 1) @@ -1324,11 +1327,10 @@ bool MipsFastISel::fastLowerArguments() { // Only handle simple cases. i.e. All arguments are directly mapped to // registers of the appropriate type. SmallVector Allocation; - unsigned Idx = 1; for (const auto &FormalArg : F->args()) { - if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) { + if (FormalArg.hasAttribute(Attribute::InReg) || + FormalArg.hasAttribute(Attribute::StructRet) || + FormalArg.hasAttribute(Attribute::ByVal)) { DEBUG(dbgs() << ".. gave up (inreg, structret, byval)\n"); return false; } @@ -1340,7 +1342,8 @@ bool MipsFastISel::fastLowerArguments() { } EVT ArgVT = TLI.getValueType(DL, ArgTy); - DEBUG(dbgs() << ".. " << (Idx - 1) << ": " << ArgVT.getEVTString() << "\n"); + DEBUG(dbgs() << ".. " << FormalArg.getArgNo() << ": " + << ArgVT.getEVTString() << "\n"); if (!ArgVT.isSimple()) { DEBUG(dbgs() << ".. .. gave up (not a simple type)\n"); return false; @@ -1350,8 +1353,8 @@ bool MipsFastISel::fastLowerArguments() { case MVT::i1: case MVT::i8: case MVT::i16: - if (!F->getAttributes().hasAttribute(Idx, Attribute::SExt) && - !F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) { + if (!FormalArg.hasAttribute(Attribute::SExt) && + !FormalArg.hasAttribute(Attribute::ZExt)) { // It must be any extend, this shouldn't happen for clang-generated IR // so just fall back on SelectionDAG. DEBUG(dbgs() << ".. .. gave up (i8/i16 arg is not extended)\n"); @@ -1372,7 +1375,7 @@ bool MipsFastISel::fastLowerArguments() { break; case MVT::i32: - if (F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) { + if (FormalArg.hasAttribute(Attribute::ZExt)) { // The O32 ABI does not permit a zero-extended i32. DEBUG(dbgs() << ".. .. gave up (i32 arg is zero extended)\n"); return false; @@ -1435,23 +1438,20 @@ bool MipsFastISel::fastLowerArguments() { DEBUG(dbgs() << ".. .. gave up (unknown type)\n"); return false; } - - ++Idx; } - Idx = 0; for (const auto &FormalArg : F->args()) { - unsigned SrcReg = Allocation[Idx].Reg; - unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[Idx].RC); + unsigned ArgNo = FormalArg.getArgNo(); + unsigned SrcReg = Allocation[ArgNo].Reg; + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[ArgNo].RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only // use is a bitcast (which isn't turned into an instruction). - unsigned ResultReg = createResultReg(Allocation[Idx].RC); + unsigned ResultReg = createResultReg(Allocation[ArgNo].RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) .addReg(DstReg, getKillRegState(true)); updateValueMap(&FormalArg, ResultReg); - ++Idx; } // Calculate the size of the incoming arguments area. diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index b2cf03976f81de5a9653ab1f1a8d25ac4e40a47f..ef05166503b2414a70b86cb7a323ecb358c6b44b 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -119,7 +119,7 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { // Conservatively assume all callee-saved registers will be saved. for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { - unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize(); + unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R)); Offset = alignTo(Offset + Size, Size); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 93c5f496ce9716234ee7ca9ca40e9b70d536a130..68708dc4f50fe22bb93afc401537bc6c630f14e9 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -22,12 +22,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" @@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } +// The MIPS MSA ABI passes vector arguments in the integer register set. +// The number of integer registers used is dependant on the ABI used. +MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (VT.isVector() && Subtarget.hasMSA()) + return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64; + return MipsTargetLowering::getRegisterType(VT); +} + +MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) { + if (Subtarget.isABI_O32()) { + return MVT::i32; + } else { + return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64; + } + } + return MipsTargetLowering::getRegisterType(Context, VT); +} + +unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT.isVector()) + return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)), + 1U); + return MipsTargetLowering::getNumRegisters(Context, VT); +} + +unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + + // Break down vector types to either 2 i64s or 4 i32s. + RegisterVT = getRegisterTypeForCallingConv(Context, VT) ; + IntermediateVT = RegisterVT; + NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits() + ? VT.getVectorNumElements() + : VT.getSizeInBits() / RegisterVT.getSizeInBits(); + + return NumIntermediates; +} + SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); @@ -322,6 +364,18 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); + if (!(Subtarget.hasDSP() && Subtarget.hasMips32r2())) { + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + } + + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); @@ -362,7 +416,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); @@ -428,6 +481,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::SHL); @@ -471,8 +525,9 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() && !Subtarget.inMicroMipsMode(); - // Disable if we don't generate PIC or the ABI isn't O32. - if (!TM.isPositionIndependent() || !TM.getABI().IsO32()) + // Disable if either of the following is true: + // We do not generate PIC, the ABI is not O32, LargeGOT is being used. + if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || LargeGOT) UseFastISel = false; return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr; @@ -795,7 +850,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); uint64_t SMPos0, SMSize0, SMPos1, SMSize1; - ConstantSDNode *CN; + ConstantSDNode *CN, *CN1; // See if Op's first operand matches (and $src1 , mask0). if (And0.getOpcode() != ISD::AND) @@ -806,47 +861,200 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // See if Op's second operand matches (and (shl $src, pos), mask1). - if (And1.getOpcode() != ISD::AND) + if (And1.getOpcode() == ISD::AND && + And1.getOperand(0).getOpcode() == ISD::SHL) { + + if (!(CN = dyn_cast(And1.getOperand(1))) || + !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) + return SDValue(); + + // The shift masks must have the same position and size. + if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + return SDValue(); + + SDValue Shl = And1.getOperand(0); + + if (!(CN = dyn_cast(Shl.getOperand(1)))) + return SDValue(); + + unsigned Shamt = CN->getZExtValue(); + + // Return if the shift amount and the first bit position of mask are not the + // same. + EVT ValTy = N->getValueType(0); + if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) + return SDValue(); + + SDLoc DL(N); + return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), + DAG.getConstant(SMPos0, DL, MVT::i32), + DAG.getConstant(SMSize0, DL, MVT::i32), + And0.getOperand(0)); + } else { + // Pattern match DINS. + // $dst = or (and $src, mask0), mask1 + // where mask0 = ((1 << SMSize0) -1) << SMPos0 + // => dins $dst, $src, pos, size + if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMPos0) && + ((SMSize0 + SMPos0 <= 64 && Subtarget.hasMips64r2()) || + (SMSize0 + SMPos0 <= 32))) { + // Check if AND instruction has constant as argument + bool isConstCase = And1.getOpcode() != ISD::AND; + if (And1.getOpcode() == ISD::AND) { + if (!(CN1 = dyn_cast(And1->getOperand(1)))) + return SDValue(); + } else { + if (!(CN1 = dyn_cast(N->getOperand(1)))) + return SDValue(); + } + SDLoc DL(N); + EVT ValTy = N->getOperand(0)->getValueType(0); + SDValue Const1; + SDValue SrlX; + if (!isConstCase) { + Const1 = DAG.getConstant(SMPos0, DL, MVT::i32); + SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1); + } + return DAG.getNode( + MipsISD::Ins, DL, N->getValueType(0), + isConstCase + ? DAG.getConstant(CN1->getSExtValue() >> SMPos0, DL, ValTy) + : SrlX, + DAG.getConstant(SMPos0, DL, MVT::i32), + DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? SMSize0 & 31 + : SMSize0, + DL, MVT::i32), + And0->getOperand(0)); + + } return SDValue(); + } +} - if (!(CN = dyn_cast(And1.getOperand(1))) || - !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) +static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, + const MipsSubtarget &Subtarget) { + // ROOTNode must have a multiplication as an operand for the match to be + // successful. + if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL && + ROOTNode->getOperand(1).getOpcode() != ISD::MUL) return SDValue(); - // The shift masks must have the same position and size. - if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + // We don't handle vector types here. + if (ROOTNode->getValueType(0).isVector()) return SDValue(); - SDValue Shl = And1.getOperand(0); - if (Shl.getOpcode() != ISD::SHL) + // For MIPS64, madd / msub instructions are inefficent to use with 64 bit + // arithmetic. E.g. + // (add (mul a b) c) => + // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in + // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32) + // or + // MIPS64R2: (dins (mflo res) (mfhi res) 32 32) + // + // The overhead of setting up the Hi/Lo registers and reassembling the + // result makes this a dubious optimzation for MIPS64. The core of the + // problem is that Hi/Lo contain the upper and lower 32 bits of the + // operand and result. + // + // It requires a chain of 4 add/mul for MIPS64R2 to get better code + // density than doing it naively, 5 for MIPS64. Additionally, using + // madd/msub on MIPS64 requires the operands actually be 32 bit sign + // extended operands, not true 64 bit values. + // + // FIXME: For the moment, disable this completely for MIPS64. + if (Subtarget.hasMips64()) return SDValue(); - if (!(CN = dyn_cast(Shl.getOperand(1)))) + SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(0) + : ROOTNode->getOperand(1); + + SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL + ? ROOTNode->getOperand(1) + : ROOTNode->getOperand(0); + + // Transform this to a MADD only if the user of this node is the add. + // If there are other users of the mul, this function returns here. + if (!Mult.hasOneUse()) return SDValue(); - unsigned Shamt = CN->getZExtValue(); + // maddu and madd are unusual instructions in that on MIPS64 bits 63..31 + // must be in canonical form, i.e. sign extended. For MIPS32, the operands + // of the multiply must have 32 or more sign bits, otherwise we cannot + // perform this optimization. We have to check this here as we're performing + // this optimization pre-legalization. + SDValue MultLHS = Mult->getOperand(0); + SDValue MultRHS = Mult->getOperand(1); + unsigned LHSSB = CurDAG.ComputeNumSignBits(MultLHS); + unsigned RHSSB = CurDAG.ComputeNumSignBits(MultRHS); + + if (LHSSB < 32 || RHSSB < 32) + return SDValue(); + + APInt HighMask = + APInt::getHighBitsSet(Mult->getValueType(0).getScalarSizeInBits(), 32); + bool IsUnsigned = CurDAG.MaskedValueIsZero(Mult->getOperand(0), HighMask) && + CurDAG.MaskedValueIsZero(Mult->getOperand(1), HighMask) && + CurDAG.MaskedValueIsZero(AddOperand, HighMask); + + // Initialize accumulator. + SDLoc DL(ROOTNode); + SDValue TopHalf; + SDValue BottomHalf; + BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(0, DL)); + + TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, + CurDAG.getIntPtrConstant(1, DL)); + SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, + BottomHalf, + TopHalf); + + // Create MipsMAdd(u) / MipsMSub(u) node. + bool IsAdd = ROOTNode->getOpcode() == ISD::ADD; + unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd) + : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub); + SDValue MAddOps[3] = { + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)), + CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn}; + EVT VTs[2] = {MVT::i32, MVT::i32}; + SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); + + SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); + SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); + SDValue Combined = + CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi); + return Combined; +} + +static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + // (sub v0 (mul v1, v2)) => (msub v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); - // Return if the shift amount and the first bit position of mask are not the - // same. - EVT ValTy = N->getValueType(0); - if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) return SDValue(); + } - SDLoc DL(N); - return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), - DAG.getConstant(SMPos0, DL, MVT::i32), - DAG.getConstant(SMSize0, DL, MVT::i32), - And0.getOperand(0)); + return SDValue(); } static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { - // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) + // (add v0 (mul v1, v2)) => (madd v1, v2, v0) + if (DCI.isBeforeLegalizeOps()) { + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) + return performMADD_MSUBCombine(N, DAG, Subtarget); - if (DCI.isBeforeLegalizeOps()) return SDValue(); + } + // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) @@ -974,6 +1182,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performAssertZextCombine(N, DAG, DCI, Subtarget); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); + case ISD::SUB: + return performSUBCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -2515,6 +2725,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is available, shadow it and // go to stack. +// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. +// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} +// with the remainder spilled to the stack. +// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases +// spilling the remainder to the stack. // // For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// @@ -2526,8 +2741,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getMachineFunction().getSubtarget()); static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; + + const MipsCCState * MipsState = static_cast(&State); + static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; + static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 }; + // Do not process byval args here. if (ArgFlags.isByVal()) return true; @@ -2565,8 +2785,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, State.getFirstUnallocated(F32Regs) != ValNo; unsigned OrigAlign = ArgFlags.getOrigAlign(); bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8); - - if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { + bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo); + + // The MIPS vector ABI for floats passes them in a pair of registers + if (ValVT == MVT::i32 && isVectorFloat) { + // This is the start of an vector that was scalarized into an unknown number + // of components. It doesn't matter how many there are. Allocate one of the + // notional 8 byte aligned registers which map onto the argument stack, and + // shadow the register lost to alignment requirements. + if (ArgFlags.isSplit()) { + Reg = State.AllocateReg(FloatVectorIntRegs); + if (Reg == Mips::A2) + State.AllocateReg(Mips::A1); + else if (Reg == 0) + State.AllocateReg(Mips::A3); + } else { + // If we're an intermediate component of the split, we can just attempt to + // allocate a register directly. + Reg = State.AllocateReg(IntRegs); + } + } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs); // If this is the first part of an i64 arg, // the allocated register must be either A0 or A2. @@ -2750,7 +2988,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // caller side but removing it breaks the frame size calculation. CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); - CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode()); + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(Callee.getNode()); + CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), + ES ? ES->getSymbol() : nullptr); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); @@ -2784,7 +3025,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); if (!IsTailCall) - Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL); + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, @@ -2985,7 +3226,11 @@ SDValue MipsTargetLowering::LowerCallResult( SmallVector RVLocs; MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI); + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(CLI.Callee.getNode()); + CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI.RetTy, + ES ? ES->getSymbol() : nullptr); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 2dcafd51061a281f33032703d7c23ae0af57d571..0e47ed38f420710ab7c9b177558f65a0a1bb97b7 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -248,6 +248,33 @@ namespace llvm { bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(MVT VT) const override; + + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Return the number of registers for a given MVT, ensuring vectors are + /// treated as a series of gpr sized integers. + virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + + /// Break down vectors to the correct number of gpr sized integers. + virtual unsigned getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const override; + + /// Return the correct alignment for the current calling convention. + virtual unsigned + getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override { + if (ArgTy->isVectorTy()) + return std::min(DL.getABITypeAlignment(ArgTy), 8U); + return DL.getABITypeAlignment(ArgTy); + } + ISD::NodeType getExtendForAtomicOps() const override { return ISD::SIGN_EXTEND; } diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index df42d56d041bc621d75da7d909ae769ba98f77ef..94f3a74be98bc742952b1b31381fde3be3a0986f 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -557,11 +557,11 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; + MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, @@ -569,11 +569,11 @@ let AdditionalPredicates = [NoNaNsFPMath] in { } def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32, MADD4; -let AdditionalPredicates = [NoNaNsFPMath] in { +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, @@ -582,12 +582,12 @@ let AdditionalPredicates = [NoNaNsFPMath] in { let DecoderNamespace = "Mips64" in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; + MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; } -let AdditionalPredicates = [NoNaNsFPMath], +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4], DecoderNamespace = "Mips64" in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; @@ -681,6 +681,29 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd), "trunc.w.d\t$fd, $fs, $rs">, FGR_64, HARDFLOAT; +def LoadImmSingleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), + (ins imm64:$fpimm), + "li.s\t$rd, $fpimm">; + +def LoadImmSingleFGR : MipsAsmPseudoInst<(outs StrictlyFGR32Opnd:$rd), + (ins imm64:$fpimm), + "li.s\t$rd, $fpimm">, + HARDFLOAT; + +def LoadImmDoubleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), + (ins imm64:$fpimm), + "li.d\t$rd, $fpimm">; + +def LoadImmDoubleFGR_32 : MipsAsmPseudoInst<(outs StrictlyAFGR64Opnd:$rd), + (ins imm64:$fpimm), + "li.d\t$rd, $fpimm">, + FGR_32, HARDFLOAT; + +def LoadImmDoubleFGR : MipsAsmPseudoInst<(outs StrictlyFGR64Opnd:$rd), + (ins imm64:$fpimm), + "li.d\t$rd, $fpimm">, + FGR_64, HARDFLOAT; + //===----------------------------------------------------------------------===// // InstAliases. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index df62c66b75a323519968deb51be766b49717b7f5..4adf77f8d9a952664d19ad8af95afcd3f2ee64ef 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -103,12 +103,9 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID); for (unsigned i = 1; i < Cond.size(); ++i) { - if (Cond[i].isReg()) - MIB.addReg(Cond[i].getReg()); - else if (Cond[i].isImm()) - MIB.addImm(Cond[i].getImm()); - else - assert(false && "Cannot copy operand"); + assert((Cond[i].isImm() || Cond[i].isReg()) && + "Cannot copy operand for conditional branch!"); + MIB.add(Cond[i]); } MIB.addMBB(TBB); } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b90077d7807d8a1c618ee5af3e8a9a8f194b95f1..40078fb7714420bebf3f0311563bdc0d19d4088e 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -21,7 +21,7 @@ def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisInt<4>]>; -def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; def SDT_MFLOHI : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVT<1, untyped>]>; def SDT_MTLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, @@ -238,6 +238,8 @@ def HasEVA : Predicate<"Subtarget->hasEVA()">, AssemblerPredicate<"FeatureEVA,FeatureMips32r2">; def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; +def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, + AssemblerPredicate<"!FeatureMadd4">; //===----------------------------------------------------------------------===// @@ -390,6 +392,10 @@ class ASE_NOT_DSP { list InsnPredicates = [NotDSP]; } +class MADD4 { + list AdditionalPredicates = [HasMadd4]; +} + //===----------------------------------------------------------------------===// class MipsPat : Pat, PredicateControl { @@ -1719,8 +1725,8 @@ let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, isCTI=1 in { } let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { -def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt), - [(callseq_start timm:$amt)]>; +def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 100503700a720fa01c958150907e431f07ce81a3..272595af5f6f161f2af923de9ee75430c9481455 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -75,9 +75,8 @@ namespace { public: static char ID; - MipsLongBranch(TargetMachine &tm) - : MachineFunctionPass(ID), TM(tm), IsPIC(TM.isPositionIndependent()), - ABI(static_cast(TM).getABI()) {} + MipsLongBranch() + : MachineFunctionPass(ID), ABI(MipsABIInfo::Unknown()) {} StringRef getPassName() const override { return "Mips Long Branch"; } @@ -96,7 +95,6 @@ namespace { MachineBasicBlock *MBBOpnd); void expandToLongBranch(MBBInfo &Info); - const TargetMachine &TM; MachineFunction *MF; SmallVector MBBInfos; bool IsPIC; @@ -276,8 +274,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { if (IsPIC) { MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB); MF->insert(FallThroughMBB, BalTgtMBB); - LongBrMBB->addSuccessor(BalTgtMBB); - BalTgtMBB->addSuccessor(TgtMBB); + LongBrMBB->addSuccessor(BalTgtMBB, BranchProbability::getOne()); + BalTgtMBB->addSuccessor(&*FallThroughMBB, BranchProbability::getOne()); // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an @@ -344,8 +342,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP).addImm(8); if (Subtarget.hasMips32r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR)) - .addReg(Mips::ZERO).addReg(Mips::AT); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR), Mips::ZERO) + .addReg(Mips::AT); else BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT); @@ -417,8 +415,8 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP_64).addImm(0); if (Subtarget.hasMips64r6()) - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64)) - .addReg(Mips::ZERO_64).addReg(Mips::AT_64); + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JALR64), Mips::ZERO_64) + .addReg(Mips::AT_64); else BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64); @@ -469,6 +467,12 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { static_cast(F.getSubtarget()); const MipsInstrInfo *TII = static_cast(STI.getInstrInfo()); + + + const TargetMachine& TM = F.getTarget(); + IsPIC = TM.isPositionIndependent(); + ABI = static_cast(TM).getABI(); + LongBranchSeqSize = !IsPIC ? 2 : (ABI.IsN64() ? 10 : (!STI.isTargetNaCl() ? 9 : 10)); @@ -541,6 +545,4 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { /// createMipsLongBranchPass - Returns a pass that converts branches to long /// branches. -FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) { - return new MipsLongBranch(tm); -} +FunctionPass *llvm::createMipsLongBranchPass() { return new MipsLongBranch(); } diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 8b04fcb76920df559346205c17dceb9660d43ffc..bf79f0f2ff82522eca1d29f50519843221b5c728 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -3781,6 +3781,80 @@ let Predicates = [HasMSA] in { ISA_MIPS1_NOT_32R6_64R6; } +def vsplati64_imm_eq_63 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ + APInt Imm; + SDNode *BV = N->getOperand(0).getNode(); + EVT EltTy = N->getValueType(0).getVectorElementType(); + + return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def immi32Cst7 : ImmLeaf(Imm) && Imm == 7;}]>; +def immi32Cst15 : ImmLeaf(Imm) && Imm == 15;}]>; +def immi32Cst31 : ImmLeaf(Imm) && Imm == 31;}]>; + +def vsplati8imm7 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati8 immi32Cst7))>; +def vsplati16imm15 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati16 immi32Cst15))>; +def vsplati32imm31 : PatFrag<(ops node:$wt), + (and node:$wt, (vsplati32 immi32Cst31))>; +def vsplati64imm63 : PatFrag<(ops node:$wt), + (and node:$wt, vsplati64_imm_eq_63)>; + +class MSAShiftPat : + MSAPat<(VT (Node VT:$ws, (VT (and VT:$wt, Vec)))), + (VT (Insn VT:$ws, VT:$wt))>; + +class MSABitPat : + MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))), + (VT (Insn VT:$ws, VT:$wt))>; + +multiclass MSAShiftPats { + def : MSAShiftPat(Insn#_B), + (vsplati8 immi32Cst7)>; + def : MSAShiftPat(Insn#_H), + (vsplati16 immi32Cst15)>; + def : MSAShiftPat(Insn#_W), + (vsplati32 immi32Cst31)>; + def : MSAPat<(v2i64 (Node v2i64:$ws, (v2i64 (and v2i64:$wt, + vsplati64_imm_eq_63)))), + (v2i64 (!cast(Insn#_D) v2i64:$ws, v2i64:$wt))>; +} + +multiclass MSABitPats { + def : MSABitPat(Insn#_B), vsplati8imm7>; + def : MSABitPat(Insn#_H), vsplati16imm15>; + def : MSABitPat(Insn#_W), vsplati32imm31>; + def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$wt))), + (v2i64 (!cast(Insn#_D) v2i64:$ws, v2i64:$wt))>; +} + +defm : MSAShiftPats; +defm : MSAShiftPats; +defm : MSAShiftPats; +defm : MSABitPats; +defm : MSABitPats; + +def : MSAPat<(and v16i8:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$wt)), + immAllOnesV)), + (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>; +def : MSAPat<(and v8i16:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$wt)), + immAllOnesV)), + (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>; +def : MSAPat<(and v4i32:$ws, (xor (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$wt)), + immAllOnesV)), + (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>; +def : MSAPat<(and v2i64:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1), + (vsplati64imm63 v2i64:$wt)), + (bitconvert (v4i32 immAllOnesV)))), + (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>; + // Vector extraction with fixed index. // // Extracting 32-bit values on MSA32 should always use COPY_S_W rather than diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index 5bf4c958c7b97def5fd35c64d90ae0fc998e93f0..e01c03db222759a164b3feed2109614afd9a5578 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsABIInfo.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -53,14 +53,15 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() { } void MipsFunctionInfo::createEhDataRegsFI() { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); for (int I = 0; I < 4; ++I) { - const TargetRegisterClass *RC = + const TargetRegisterClass &RC = static_cast(MF.getTarget()).getABI().IsN64() - ? &Mips::GPR64RegClass - : &Mips::GPR32RegClass; + ? Mips::GPR64RegClass + : Mips::GPR32RegClass; - EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), false); } } @@ -69,11 +70,12 @@ void MipsFunctionInfo::createISRRegFI() { // The current implementation only supports Mips32r2+ not Mips64rX. Status // is always 32 bits, ErrorPC is 32 or 64 bits dependent on architecture, // however Mips32r2+ is the supported architecture. - const TargetRegisterClass *RC = &Mips::GPR32RegClass; + const TargetRegisterClass &RC = Mips::GPR32RegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); for (int I = 0; I < 2; ++I) ISRDataRegFI[I] = MF.getFrameInfo().CreateStackObject( - RC->getSize(), RC->getAlignment(), false); + TRI.getSpillSize(RC), TRI.getSpillAlignment(RC), false); } bool MipsFunctionInfo::isEhDataRegFI(int FI) const { @@ -93,9 +95,10 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) { } int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); if (MoveF64ViaSpillFI == -1) { MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject( - RC->getSize(), RC->getAlignment(), false); + TRI.getSpillSize(*RC), TRI.getSpillAlignment(*RC), false); } return MoveF64ViaSpillFI; } diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp index cf85eb3f2416044b12024c99dffb9790bfe5b7b1..ceacaa498389457313391aa6a29cb7d42d4a47f8 100644 --- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp @@ -10,6 +10,7 @@ #include "Mips.h" #include "MipsTargetMachine.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -22,18 +23,19 @@ namespace { public: static char ID; - explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_) - : MachineFunctionPass(ID), TM(TM_) {} + MipsModuleDAGToDAGISel() : MachineFunctionPass(ID) {} // Pass Name StringRef getPassName() const override { return "MIPS DAG->DAG Pattern Instruction Selection"; } - bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } - protected: - MipsTargetMachine &TM; + bool runOnMachineFunction(MachineFunction &MF) override; }; char MipsModuleDAGToDAGISel::ID = 0; @@ -41,10 +43,12 @@ namespace { bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n"); + auto &TPC = getAnalysis(); + auto &TM = TPC.getTM(); TM.resetSubtarget(&MF); return false; } -llvm::FunctionPass *llvm::createMipsModuleISelDagPass(MipsTargetMachine &TM) { - return new MipsModuleDAGToDAGISel(TM); +llvm::FunctionPass *llvm::createMipsModuleISelDagPass() { + return new MipsModuleDAGToDAGISel(); } diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp index f33857fe628fcbbb00f6e8911d3c0e588666b33b..79c8395d9dcc41e7a0a52d1aea301c55378c5637 100644 --- a/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "Mips.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/ScopedHashTable.h" @@ -59,7 +59,7 @@ private: class OptimizePICCall : public MachineFunctionPass { public: - OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {} + OptimizePICCall() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Mips OptimizePICCall"; } @@ -116,9 +116,10 @@ static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) { /// Return type of register Reg. static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) { + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); - assert(RC->vt_end() - RC->vt_begin() == 1); - return *RC->vt_begin(); + assert(TRI.legalclasstypes_end(*RC) - TRI.legalclasstypes_begin(*RC) == 1); + return *TRI.legalclasstypes_begin(*RC); } /// Do the following transformation: @@ -256,7 +257,7 @@ bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg, // Get the instruction that loads the function address from the GOT. Reg = MO->getReg(); - Val = (Value*)nullptr; + Val = nullptr; MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = MRI.getVRegDef(Reg); @@ -296,6 +297,6 @@ void OptimizePICCall::incCntAndSetReg(ValueType Entry, unsigned Reg) { } /// Return an OptimizeCall object. -FunctionPass *llvm::createMipsOptimizePICCallPass(MipsTargetMachine &TM) { - return new OptimizePICCall(TM); +FunctionPass *llvm::createMipsOptimizePICCallPass() { + return new OptimizePICCall(); } diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index 670b6c96e78ef02af652c70fe6fe880a6e2ea242..7ee45c28a7d09db8b4b69fa2b2564e1ada65fcd5 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/Instructions.h" #include "Mips.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -155,6 +155,4 @@ bool MipsOs16::runOnModule(Module &M) { return modified; } -ModulePass *llvm::createMipsOs16Pass(MipsTargetMachine &TM) { - return new MipsOs16; -} +ModulePass *llvm::createMipsOs16Pass() { return new MipsOs16(); } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 65be350f259df740c0f693735b2cb58d90c2aaca..de3389b5a6bf5bb219956d0e8951436aab65d583 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -286,7 +286,9 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" << "spOffset : " << spOffset << "\n" - << "stackSize : " << stackSize << "\n"); + << "stackSize : " << stackSize << "\n" + << "alignment : " + << MF.getFrameInfo().getObjectAlignment(FrameIndex) << "\n"); eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset); } diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index ccfdcc89b078a7161ae23f5af83d83c380424922..08fb3d7d435257844f7c3a13e73ea17c6af2f4b0 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -552,16 +552,31 @@ def AFGR64AsmOperand : MipsAsmRegOperand { let PredicateMethod = "isFGRAsmReg"; } +def StrictlyAFGR64AsmOperand : MipsAsmRegOperand { + let Name = "StrictlyAFGR64AsmReg"; + let PredicateMethod = "isStrictlyFGRAsmReg"; +} + def FGR64AsmOperand : MipsAsmRegOperand { let Name = "FGR64AsmReg"; let PredicateMethod = "isFGRAsmReg"; } +def StrictlyFGR64AsmOperand : MipsAsmRegOperand { + let Name = "StrictlyFGR64AsmReg"; + let PredicateMethod = "isStrictlyFGRAsmReg"; +} + def FGR32AsmOperand : MipsAsmRegOperand { let Name = "FGR32AsmReg"; let PredicateMethod = "isFGRAsmReg"; } +def StrictlyFGR32AsmOperand : MipsAsmRegOperand { + let Name = "StrictlyFGR32AsmReg"; + let PredicateMethod = "isStrictlyFGRAsmReg"; +} + def FGRH32AsmOperand : MipsAsmRegOperand { let Name = "FGRH32AsmReg"; let PredicateMethod = "isFGRAsmReg"; @@ -639,14 +654,26 @@ def AFGR64Opnd : RegisterOperand { let ParserMatchClass = AFGR64AsmOperand; } +def StrictlyAFGR64Opnd : RegisterOperand { + let ParserMatchClass = StrictlyAFGR64AsmOperand; +} + def FGR64Opnd : RegisterOperand { let ParserMatchClass = FGR64AsmOperand; } +def StrictlyFGR64Opnd : RegisterOperand { + let ParserMatchClass = StrictlyFGR64AsmOperand; +} + def FGR32Opnd : RegisterOperand { let ParserMatchClass = FGR32AsmOperand; } +def StrictlyFGR32Opnd : RegisterOperand { + let ParserMatchClass = StrictlyFGR32AsmOperand; +} + def FGRCCOpnd : RegisterOperand { // The assembler doesn't use register classes so we can re-use // FGR32AsmOperand. diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index ef8d18c6deb14c0546b8a243e3073340bf61f76a..102ebb21609aa4120b9201f6307ed381dfdf93bb 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "MipsSEFrameLowering.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSEFrameLowering.h" #include "MipsSEInstrInfo.h" #include "MipsSubtarget.h" #include "llvm/ADT/BitVector.h" @@ -260,7 +260,8 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, // copy dst_hi, $vr1 unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); - unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2; + const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst); + unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16; const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize); unsigned VR0 = MRI.createVirtualRegister(RC); unsigned VR1 = MRI.createVirtualRegister(RC); @@ -858,6 +859,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsABIInfo ABI = STI.getABI(); unsigned FP = ABI.GetFramePtr(); @@ -883,10 +885,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, if (ExpandPseudo(MF).expand()) { // The spill slot should be half the size of the accumulator. If target is // mips64, it should be 64-bit, otherwise it should be 32-bt. - const TargetRegisterClass *RC = STI.hasMips64() ? - &Mips::GPR64RegClass : &Mips::GPR32RegClass; - int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + const TargetRegisterClass &RC = STI.hasMips64() ? + Mips::GPR64RegClass : Mips::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlignment(RC), + false); RS->addScavengingFrameIndex(FI); } @@ -897,10 +900,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF, if (isInt<16>(MaxSPOffset)) return; - const TargetRegisterClass *RC = - ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; - int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(), - RC->getAlignment(), false); + const TargetRegisterClass &RC = + ABI.ArePtrs64bit() ? Mips::GPR64RegClass : Mips::GPR32RegClass; + int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC), + TRI->getSpillAlignment(RC), + false); RS->addScavengingFrameIndex(FI); } diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index c9cf9363b8c96100ca4e744f1f9f6599b36712ea..4be26dd25dc045c4af4dde7322b6c0d06401c3db 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -24,11 +24,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" -#include "llvm/IR/Dominators.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -245,46 +245,64 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { } } -void MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, - SDValue CmpLHS, const SDLoc &DL, - SDNode *Node) const { - unsigned Opc = InFlag.getOpcode(); (void)Opc; - - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned SLTuOp = Mips::SLTu, ADDuOp = Mips::ADDu; - if (Subtarget->isGP64bit()) { - SLTuOp = Mips::SLTu64; - ADDuOp = Mips::DADDu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; +void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const { + SDValue InFlag = Node->getOperand(2); + unsigned Opc = InFlag.getOpcode(); SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getMachineNode(SLTuOp, DL, VT, Ops); - - if (Subtarget->isGP64bit()) { - // On 64-bit targets, sltu produces an i64 but our backend currently says - // that SLTu64 produces an i32. We need to fix this in the long run but for - // now, just make the DAG type-correct by asserting the upper bits are zero. - Carry = CurDAG->getMachineNode(Mips::SUBREG_TO_REG, DL, VT, - CurDAG->getTargetConstant(0, DL, VT), - SDValue(Carry, 0), - CurDAG->getTargetConstant(Mips::sub_32, DL, - VT)); + // In the base case, we can rely on the carry bit from the addsc + // instruction. + if (Opc == ISD::ADDC) { + SDValue Ops[3] = {LHS, RHS, InFlag}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops); + return; } - // Generate a second addition only if we know that RHS is not a - // constant-zero node. - SDNode *AddCarry = Carry; - ConstantSDNode *C = dyn_cast(RHS); - if (!C || C->getZExtValue()) - AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS); + assert(Opc == ISD::ADDE && "ISD::ADDE not in a chain of ADDE nodes!"); + + // The more complex case is when there is a chain of ISD::ADDE nodes like: + // (adde (adde (adde (addc a b) c) d) e). + // + // The addwc instruction does not write to the carry bit, instead it writes + // to bit 20 of the dsp control register. To match this series of nodes, each + // intermediate adde node must be expanded to write the carry bit before the + // addition. + + // Start by reading the overflow field for addsc and moving the value to the + // carry field. The usage of 1 here with MipsISD::RDDSP / Mips::WRDSP + // corresponds to reading/writing the entire control register to/from a GPR. + + SDValue CstOne = CurDAG->getTargetConstant(1, DL, MVT::i32); + + SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32); - CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, SDValue(AddCarry, 0)); + SDNode *DSPCtrlField = + CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32, MVT::Glue, CstOne, InFlag); + + SDNode *Carry = CurDAG->getMachineNode( + Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne); + + SDValue Ops[4] = {SDValue(DSPCtrlField, 0), + CurDAG->getTargetConstant(6, DL, MVT::i32), CstOne, + SDValue(Carry, 0)}; + SDNode *DSPCFWithCarry = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, Ops); + + // My reading of the the MIPS DSP 3.01 specification isn't as clear as I + // would like about whether bit 20 always gets overwritten by addwc. + // Hence take an extremely conservative view and presume it's sticky. We + // therefore need to clear it. + + SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); + + SDValue InsOps[4] = {Zero, OuFlag, CstOne, SDValue(DSPCFWithCarry, 0)}; + SDNode *DSPCtrlFinal = CurDAG->getMachineNode(Mips::INS, DL, MVT::i32, InsOps); + + SDNode *WrDSP = CurDAG->getMachineNode(Mips::WRDSP, DL, MVT::Glue, + SDValue(DSPCtrlFinal, 0), CstOne); + + SDValue Operands[3] = {LHS, RHS, SDValue(WrDSP, 0)}; + CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Operands); } /// Match frameindex @@ -765,19 +783,8 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { switch(Opcode) { default: break; - case ISD::SUBE: { - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DSUBu : Mips::SUBu; - selectAddESubE(Opc, InFlag, InFlag.getOperand(0), DL, Node); - return true; - } - case ISD::ADDE: { - if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. - break; - SDValue InFlag = Node->getOperand(2); - unsigned Opc = Subtarget->isGP64bit() ? Mips::DADDu : Mips::ADDu; - selectAddESubE(Opc, InFlag, InFlag.getValue(0), DL, Node); + selectAddE(Node, DL); return true; } diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index f89a350cab044f4e3e97ab1b2e3457fda4506b66..6f38289c5a45747c967b80e3a60896e3394d6f11 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -41,8 +41,7 @@ private: const SDLoc &dl, EVT Ty, bool HasLo, bool HasHi); - void selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, - const SDLoc &DL, SDNode *Node) const; + void selectAddE(SDNode *Node, const SDLoc &DL) const; bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const; bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index e2da8477295b7d17e3936276cfc8484368d3bc6b..2382ea271661204197ce111c6e3ce4ca55844ba7 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -179,8 +179,6 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setTargetDAGCombine(ISD::ADDE); - setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::MUL); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -421,163 +419,6 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op, return MipsTargetLowering::LowerOperation(Op, DAG); } -// selectMADD - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc multLo, Lo0), (adde multHi, Hi0), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { - // ADDENode's second operand must be a flag output of an ADDC node in order - // for the matching to be successful. - SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); - - if (ADDCNode->getOpcode() != ISD::ADDC) - return false; - - SDValue MultHi = ADDENode->getOperand(0); - SDValue MultLo = ADDCNode->getOperand(0); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MADD only if ADDENode and ADDCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than ADDENode or ADDCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MADD instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(ADDENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - ADDCNode->getOperand(1), - ADDENode->getOperand(1)); - - // create MipsMAdd(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - - SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of adde and addc here - if (!SDValue(ADDCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); - } - if (!SDValue(ADDENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); - } - - return true; -} - -// selectMSUB - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc Lo0, multLo), (sube Hi0, multHi), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { - // SUBENode's second operand must be a flag output of an SUBC node in order - // for the matching to be successful. - SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); - - if (SUBCNode->getOpcode() != ISD::SUBC) - return false; - - SDValue MultHi = SUBENode->getOperand(1); - SDValue MultLo = SUBCNode->getOperand(1); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MSUB only if SUBENode and SUBCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than SUBENode or SUBCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MSUB instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDLoc DL(SUBENode); - - // Initialize accumulator. - SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped, - SUBCNode->getOperand(0), - SUBENode->getOperand(0)); - - // create MipsSub(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - - SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ACCIn); - - // replace uses of sube and subc here - if (!SDValue(SUBCNode, 0).use_empty()) { - SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); - } - if (!SDValue(SUBENode, 0).use_empty()) { - SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); - } - - return true; -} - -static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && - N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT // // Performs the following transformations: @@ -820,19 +661,6 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget &Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && - selectMSUB(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - static SDValue genConstMult(SDValue X, uint64_t C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG) { // Clear the upper (64 - VT.sizeInBits) bits. @@ -1110,16 +938,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SDValue Val; switch (N->getOpcode()) { - case ISD::ADDE: - return performADDECombine(N, DAG, DCI, Subtarget); case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; case ISD::OR: Val = performORCombine(N, DAG, DCI, Subtarget); break; - case ISD::SUBE: - return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: return performMULCombine(N, DAG, DCI, this); case ISD::SHL: @@ -1547,11 +1371,24 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); } +static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Vec = Op->getOperand(2); + bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); + MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; + SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, + DL, ResEltTy); + SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); + + return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); +} + static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { EVT ResTy = Op->getValueType(0); SDLoc DL(Op); SDValue One = DAG.getConstant(1, DL, ResTy); - SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); + SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), DAG.getNOT(DL, Bit, ResTy)); @@ -1687,7 +1524,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, - Op->getOperand(2))); + truncateVecElts(Op, DAG))); } case Intrinsic::mips_bnegi_b: case Intrinsic::mips_bnegi_h: @@ -1723,7 +1560,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), DAG.getNode(ISD::SHL, DL, VecTy, One, - Op->getOperand(2))); + truncateVecElts(Op, DAG))); } case Intrinsic::mips_bseti_b: case Intrinsic::mips_bseti_h: @@ -2210,7 +2047,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_sll_w: case Intrinsic::mips_sll_d: return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_slli_b: case Intrinsic::mips_slli_h: case Intrinsic::mips_slli_w: @@ -2240,7 +2077,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_sra_w: case Intrinsic::mips_sra_d: return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_srai_b: case Intrinsic::mips_srai_h: case Intrinsic::mips_srai_w: @@ -2270,7 +2107,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_srl_w: case Intrinsic::mips_srl_d: return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), - Op->getOperand(2)); + truncateVecElts(Op, DAG)); case Intrinsic::mips_srli_b: case Intrinsic::mips_srli_h: case Intrinsic::mips_srli_w: diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 91e712a7a54e8d15bd55bee542eb2a6fecab0c54..ee074798563d83317e75810474345f1cdf2db03a 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -207,13 +207,16 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::SDC164; - else if (RC->hasType(MVT::v16i8)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::ST_B; - else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || + TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::ST_H; - else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::ST_W; - else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::ST_D; else if (Mips::LO32RegClass.hasSubClassEq(RC)) Opc = Mips::SW; @@ -280,13 +283,16 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::LDC1; else if (Mips::FGR64RegClass.hasSubClassEq(RC)) Opc = Mips::LDC164; - else if (RC->hasType(MVT::v16i8)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8)) Opc = Mips::LD_B; - else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) || + TRI->isTypeLegalForClass(*RC, MVT::v8f16)) Opc = Mips::LD_H; - else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) || + TRI->isTypeLegalForClass(*RC, MVT::v4f32)) Opc = Mips::LD_W; - else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64)) + else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) || + TRI->isTypeLegalForClass(*RC, MVT::v2f64)) Opc = Mips::LD_D; else if (Mips::HI32RegClass.hasSubClassEq(RC)) Opc = Mips::LW; @@ -567,8 +573,8 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc, const MCInstrDesc &Desc = get(Opc); assert(Desc.NumOperands == 2 && "Unary instruction expected."); const MipsRegisterInfo *RI = &getRegisterInfo(); - unsigned DstRegSize = getRegClass(Desc, 0, RI, MF)->getSize(); - unsigned SrcRegSize = getRegClass(Desc, 1, RI, MF)->getSize(); + unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF)); + unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF)); return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize); } diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 8f5ecadecdea251857077782e5c565573dbf5d44..154d5825427b36628293f63ddac2b29667e80063 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "MipsMachineFunction.h" +#include "MipsSubtarget.h" #include "Mips.h" +#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -59,9 +59,8 @@ static cl::opt void MipsSubtarget::anchor() { } -MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, bool little, - const MipsTargetMachine &TM) +MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + bool little, const MipsTargetMachine &TM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault), IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true), @@ -71,14 +70,12 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), - HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(), + HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(TM, *this)) { - PreviousInMips16Mode = InMips16Mode; - if (MipsArchVersion == MipsDefault) MipsArchVersion = Mips32; diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index cca2cb8a46608f6ca9edb1c9bedf716c7ecb5385..ccd47f00c0d3bc1f786480e05f5e1e6b5e1a697b 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -78,7 +78,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // IsNan2008 - IEEE 754-2008 NaN encoding. bool IsNaN2008bit; - // IsFP64bit - General-purpose registers are 64 bits wide + // IsGP64bit - General-purpose registers are 64 bits wide bool IsGP64bit; // IsPTR64bit - Pointers are 64 bit wide @@ -119,9 +119,6 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // Mips16 hard float bool InMips16HardFloat; - // PreviousInMips16 -- the function we just processed was in Mips 16 Mode - bool PreviousInMips16Mode; - // InMicroMips -- can process MicroMips instructions bool InMicroMipsMode; @@ -147,6 +144,10 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // HasEVA -- supports EVA ASE. bool HasEVA; + + // nomadd4 - disables generation of 4-operand madd.s, madd.d and + // related instructions. + bool DisableMadd4; InstrItineraryData InstrItins; @@ -178,8 +179,8 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. - MipsSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - bool little, const MipsTargetMachine &TM); + MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little, + const MipsTargetMachine &TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -256,6 +257,7 @@ public: bool hasDSPR2() const { return HasDSPR2; } bool hasDSPR3() const { return HasDSPR3; } bool hasMSA() const { return HasMSA; } + bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } bool useSmallSection() const { return UseSmallSection; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index a45a9c4b41c37b62911fa64f1146884460f0820d..330ae19ecd0f13bb56e0f0d4355b31b22b95dc23 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MipsTargetMachine.h" #include "MCTargetDesc/MipsABIInfo.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "Mips.h" @@ -18,7 +19,6 @@ #include "MipsSEISelDAGToDAG.h" #include "MipsSubtarget.h" #include "MipsTargetObjectFile.h" -#include "MipsTargetMachine.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -154,6 +154,11 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const { bool hasNoMips16Attr = !F.getFnAttribute("nomips16").hasAttribute(Attribute::None); + bool HasMicroMipsAttr = + !F.getFnAttribute("micromips").hasAttribute(Attribute::None); + bool HasNoMicroMipsAttr = + !F.getFnAttribute("nomicromips").hasAttribute(Attribute::None); + // FIXME: This is related to the code below to reset the target options, // we need to know whether or not the soft float flag is set on the // function, so we can enable it as a subtarget feature. @@ -165,6 +170,10 @@ MipsTargetMachine::getSubtargetImpl(const Function &F) const { FS += FS.empty() ? "+mips16" : ",+mips16"; else if (hasNoMips16Attr) FS += FS.empty() ? "-mips16" : ",-mips16"; + if (HasMicroMipsAttr) + FS += FS.empty() ? "+micromips" : ",+micromips"; + else if (HasNoMicroMipsAttr) + FS += FS.empty() ? "-micromips" : ",-micromips"; if (softFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; @@ -192,7 +201,7 @@ namespace { /// Mips Code Generator Pass Configuration Options. class MipsPassConfig : public TargetPassConfig { public: - MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM) + MipsPassConfig(MipsTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { // The current implementation of long branch pass requires a scratch // register ($at) to be available before branch instructions. Tail merging @@ -218,28 +227,28 @@ public: } // end anonymous namespace TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) { - return new MipsPassConfig(this, PM); + return new MipsPassConfig(*this, PM); } void MipsPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); - addPass(createAtomicExpandPass(&getMipsTargetMachine())); + addPass(createAtomicExpandPass()); if (getMipsSubtarget().os16()) - addPass(createMipsOs16Pass(getMipsTargetMachine())); + addPass(createMipsOs16Pass()); if (getMipsSubtarget().inMips16HardFloat()) - addPass(createMips16HardFloatPass(getMipsTargetMachine())); + addPass(createMips16HardFloatPass()); } // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsPassConfig::addInstSelector() { - addPass(createMipsModuleISelDagPass(getMipsTargetMachine())); + addPass(createMipsModuleISelDagPass()); addPass(createMips16ISelDag(getMipsTargetMachine(), getOptLevel())); addPass(createMipsSEISelDag(getMipsTargetMachine(), getOptLevel())); return false; } void MipsPassConfig::addPreRegAlloc() { - addPass(createMipsOptimizePICCallPass(getMipsTargetMachine())); + addPass(createMipsOptimizePICCallPass()); } TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { @@ -259,14 +268,14 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() { // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. void MipsPassConfig::addPreEmitPass() { - MipsTargetMachine &TM = getMipsTargetMachine(); + addPass(createMicroMipsSizeReductionPass()); // The delay slot filler pass can potientially create forbidden slot (FS) // hazards for MIPSR6 which the hazard schedule pass (HSP) will fix. Any // (new) pass that creates compact branches after the HSP must handle FS // hazards itself or be pipelined before the HSP. - addPass(createMipsDelaySlotFillerPass(TM)); + addPass(createMipsDelaySlotFillerPass()); addPass(createMipsHazardSchedule()); - addPass(createMipsLongBranchPass(TM)); + addPass(createMipsLongBranchPass()); addPass(createMipsConstantIslandPass()); } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 140d7133f879bf232056c195cacc3666ed17289d..a3462868cb1116969581534e9cbf3ccaaf70e216 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -66,6 +66,10 @@ public: bool isLittleEndian() const { return isLittle; } const MipsABIInfo &getABI() const { return ABI; } + + bool isMachineVerifierClean() const override { + return false; + } }; /// Mips32/64 big endian target machine. diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index c5d6a05d66119cfcb5055146654d91a991e6f7db..4d73c3991035ebecb80d192227a9e01961fd6df7 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -10,13 +10,13 @@ #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Target/Mips/Relocation.txt b/lib/Target/Mips/Relocation.txt new file mode 100644 index 0000000000000000000000000000000000000000..f1a6fd8645f6505420e7d2853053d107926a69bb --- /dev/null +++ b/lib/Target/Mips/Relocation.txt @@ -0,0 +1,125 @@ +MIPS Relocation Principles + +In LLVM, there are several elements of the llvm::ISD::NodeType enum +that deal with addresses and/or relocations. These are defined in +include/llvm/Target/TargetSelectionDAG.td, namely: + GlobalAddress, GlobalTLSAddress, JumpTable, ConstantPool, + ExternalSymbol, BlockAddress +The MIPS backend uses several principles to handle these. + +1. Code for lowering addresses references to machine dependent code is +factored into common code for generating different address forms and +is called by the relocation model specific lowering function, using +templated functions. For example: + + // lib/Target/Mips/MipsISelLowering.cpp + SDValue MipsTargetLowering:: + lowerJumpTable(SDValue Op, SelectionDAG &DAG) const + +calls + + template // lib/Target/Mips/MipsISelLowering.h + SDValue getAddrLocal(NodeTy *N, const SDLoc &DL, EVT Ty, + SelectionDAG &DAG, bool IsN32OrN64) const + +which calls the overloaded function: + + // lib/Target/Mips/MipsISelLowering.h + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + +2. Generic address nodes are lowered to some combination of target +independent and machine specific SDNodes (for example: +MipsISD::{Highest, Higher, Hi, Lo}) depending upon relocation model, +ABI, and compilation options. + +The choice of specific instructions that are to be used is delegated +to ISel which in turn relies on TableGen patterns to choose subtarget +specific instructions. For example, in getAddrLocal, the pseudo-code +generated is: + + (add (load (wrapper $gp, %got(sym)), %lo(sym)) + +where "%lo" represents an instance of an SDNode with opcode +"MipsISD::Lo", "wrapper" indicates one with opcode "MipsISD::Wrapper", +and "%got" the global table pointer "getGlobalReg(...)". The "add" is +"ISD::ADD", not a target dependent one. + +3. A TableGen multiclass pattern "MipsHiLoRelocs" is used to define a +template pattern parameterized over the load upper immediate +instruction, add operation, the zero register, and register class. +Here the instantiation of MipsHiLoRelocs in MipsInstrInfo.td is used +to MIPS32 to compute addresses for the static relocation model. + + // lib/Target/Mips/MipsInstrInfo.td + multiclass MipsHiLoRelocs { + def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)), + (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; + ... + } + defm : MipsHiLoRelocs; + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHiLoRelocs, SYM_32; + +The instantiation in Mips64InstrInfo.td is used for MIPS64 in ILP32 +mode, as guarded by the predicate "SYM_32" and also for a submode of +LP64 where symbols are assumed to be 32 bits wide. A similar +multiclass for MIPS64 in LP64 mode is also defined: + + // lib/Target/Mips/Mips64InstrInfo.td + multiclass MipsHighestHigherHiLoRelocs { + ... + def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), + (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)), + (Daddiu ZERO_64, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + } + +and it is instantiated twice: + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHighestHigherHiLoRelocs, SYM_64; + // lib/Target/Mips/MicroMips64r6InstrInfo.td + defm : MipsHighestHigherHiLoRelocs, SYM_64, + ISA_MICROMIPS64R6; + +These patterns are used during instruction selection to match +MipsISD::{Highest, Higher, Hi, Lo} to a specific machine instruction +and operands. + +More details on how multiclasses in TableGen work can be found in the +section "Multiclass definitions and instances" in the document +"TableGen Language Introduction" + +4. Instruction definitions are multiply defined to cover the different +register classes. In some cases, such as LW/LW64, this also accounts +for the difference in the results of instruction execution. On MIPS32, +"lw" loads a 32 bit value from memory. On MIPS64, "lw" loads a 32 bit +value from memory and sign extends the value to 64 bits. + + // lib/Target/Mips/MipsInstrInfo.td + def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16_relaxed>, LUI_FM; + // lib/Target/Mips/Mips64InstrInfo.td + def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM; + +defines two names "LUi" and "LUi64" with two different register +classes, but with the same encoding---"LUI_FM". These instructions load a +16-bit immediate into bits 31-16 and clear the lower 15 bits. On MIPS64, +the result is sign-extended to 64 bits. diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 78bdf4e698d8b87e88e1612e6bbc6bebf2aaba96..bdd0f156c8afe6fde554ecb435afdfc19f16e239 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -27,7 +27,7 @@ void NVPTXMCAsmInfo::anchor() {} NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) { if (TheTriple.getArch() == Triple::nvptx64) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } CommentString = "//"; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 307ca6b99ffcb3778504f8adf469bee618a88ca0..0139646fc3f7558cdd9bb5ac08a4fddac701ac98 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXAsmPrinter.h" #include "InstPrinter/NVPTXInstPrinter.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" -#include "NVPTXAsmPrinter.h" #include "NVPTXMCExpr.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXRegisterInfo.h" @@ -73,8 +73,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -1550,12 +1550,12 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { } } - if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { + if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) { if (Ty->isAggregateType() || Ty->isVectorTy()) { // Just print .param .align .b8 .param[size]; // = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex + 1); + unsigned align = PAL.getParamAlignment(paramIndex); if (align == 0) align = DL.getABITypeAlignment(Ty); @@ -1641,7 +1641,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Just print .param .align .b8 .param[size]; // = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex + 1); + unsigned align = PAL.getParamAlignment(paramIndex); if (align == 0) align = DL.getABITypeAlignment(ETy); // Work around a bug in ptxas. When PTX code takes address of @@ -2004,7 +2004,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) { uint8_t Byte = Val.getLoBits(8).getZExtValue(); aggBuffer->addBytes(&Byte, 1, 1); - Val = Val.lshr(8); + Val.lshrInPlace(8); } return; } diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 390776212ce7ee7248a5384b58b7bc1663105030..916b0e1156640553214f044c95184904c3d8d32d 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "MCTargetDesc/NVPTXBaseInfo.h" +#include "NVPTX.h" #include "NVPTXUtilities.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constants.h" diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 36e4382777cf23c773128920633acdcbaef9c191..f26b9a7cb8dd336fcf9fa2febe870072236493f7 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1,4661 +1,4668 @@ -//===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that NVPTX uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/NVPTXBaseInfo.h" -#include "NVPTX.h" -#include "NVPTXISelLowering.h" -#include "NVPTXSection.h" -#include "NVPTXSubtarget.h" -#include "NVPTXTargetMachine.h" -#include "NVPTXTargetObjectFile.h" -#include "NVPTXUtilities.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Type.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetCallingConv.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG_TYPE -#define DEBUG_TYPE "nvptx-lower" - -using namespace llvm; - -static unsigned int uniqueCallSite = 0; - -static cl::opt sched4reg( - "nvptx-sched4reg", - cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); - -static cl::opt -FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::init(2)); - -static cl::opt UsePrecDivF32( - "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" - " IEEE Compliant F32 div.rnd if available."), - cl::init(2)); - -static cl::opt UsePrecSqrtF32( - "nvptx-prec-sqrtf32", cl::Hidden, - cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), - cl::init(true)); - -static cl::opt FtzEnabled( - "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), - cl::init(false)); - -int NVPTXTargetLowering::getDivF32Level() const { - if (UsePrecDivF32.getNumOccurrences() > 0) { - // If nvptx-prec-div32=N is used on the command-line, always honor it - return UsePrecDivF32; - } else { - // Otherwise, use div.approx if fast math is enabled - if (getTargetMachine().Options.UnsafeFPMath) - return 0; - else - return 2; - } -} - -bool NVPTXTargetLowering::usePrecSqrtF32() const { - if (UsePrecSqrtF32.getNumOccurrences() > 0) { - // If nvptx-prec-sqrtf32 is used on the command-line, always honor it - return UsePrecSqrtF32; - } else { - // Otherwise, use sqrt.approx if fast math is enabled - return !getTargetMachine().Options.UnsafeFPMath; - } -} - -bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { - // TODO: Get rid of this flag; there can be only one way to do this. - if (FtzEnabled.getNumOccurrences() > 0) { - // If nvptx-f32ftz is used on the command-line, always honor it - return FtzEnabled; - } else { - const Function *F = MF.getFunction(); - // Otherwise, check for an nvptx-f32ftz attribute on the function - if (F->hasFnAttribute("nvptx-f32ftz")) - return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; - else - return false; - } -} - -static bool IsPTXVectorType(MVT VT) { - switch (VT.SimpleTy) { - default: - return false; - case MVT::v2i1: - case MVT::v4i1: - case MVT::v2i8: - case MVT::v4i8: - case MVT::v2i16: - case MVT::v4i16: - case MVT::v2i32: - case MVT::v4i32: - case MVT::v2i64: - case MVT::v2f16: - case MVT::v4f16: - case MVT::v8f16: // <4 x f16x2> - case MVT::v2f32: - case MVT::v4f32: - case MVT::v2f64: - return true; - } -} - -/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive -/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors -/// into their primitive components. -/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the -/// same number of types as the Ins/Outs arrays in LowerFormalArguments, -/// LowerCall, and LowerReturn. -static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, - Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets = nullptr, - uint64_t StartingOffset = 0) { - SmallVector TempVTs; - SmallVector TempOffsets; - - ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); - for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { - EVT VT = TempVTs[i]; - uint64_t Off = TempOffsets[i]; - // Split vectors into individual elements, except for v2f16, which - // we will pass as a single scalar. - if (VT.isVector()) { - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - // Vectors with an even number of f16 elements will be passed to - // us as an array of v2f16 elements. We must match this so we - // stay in sync with Ins/Outs. - if (EltVT == MVT::f16 && NumElts % 2 == 0) { - EltVT = MVT::v2f16; - NumElts /= 2; - } - for (unsigned j = 0; j != NumElts; ++j) { - ValueVTs.push_back(EltVT); - if (Offsets) - Offsets->push_back(Off + j * EltVT.getStoreSize()); - } - } else { - ValueVTs.push_back(VT); - if (Offsets) - Offsets->push_back(Off); - } - } -} - -// Check whether we can merge loads/stores of some of the pieces of a -// flattened function parameter or return value into a single vector -// load/store. -// -// The flattened parameter is represented as a list of EVTs and -// offsets, and the whole structure is aligned to ParamAlignment. This -// function determines whether we can load/store pieces of the -// parameter starting at index Idx using a single vectorized op of -// size AccessSize. If so, it returns the number of param pieces -// covered by the vector op. Otherwise, it returns 1. -static unsigned CanMergeParamLoadStoresStartingAt( - unsigned Idx, uint32_t AccessSize, const SmallVectorImpl &ValueVTs, - const SmallVectorImpl &Offsets, unsigned ParamAlignment) { - assert(isPowerOf2_32(AccessSize) && "must be a power of 2!"); - - // Can't vectorize if param alignment is not sufficient. - if (AccessSize > ParamAlignment) - return 1; - // Can't vectorize if offset is not aligned. - if (Offsets[Idx] & (AccessSize - 1)) - return 1; - - EVT EltVT = ValueVTs[Idx]; - unsigned EltSize = EltVT.getStoreSize(); - - // Element is too large to vectorize. - if (EltSize >= AccessSize) - return 1; - - unsigned NumElts = AccessSize / EltSize; - // Can't vectorize if AccessBytes if not a multiple of EltSize. - if (AccessSize != EltSize * NumElts) - return 1; - - // We don't have enough elements to vectorize. - if (Idx + NumElts > ValueVTs.size()) - return 1; - - // PTX ISA can only deal with 2- and 4-element vector ops. - if (NumElts != 4 && NumElts != 2) - return 1; - - for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) { - // Types do not match. - if (ValueVTs[j] != EltVT) - return 1; - - // Elements are not contiguous. - if (Offsets[j] - Offsets[j - 1] != EltSize) - return 1; - } - // OK. We can vectorize ValueVTs[i..i+NumElts) - return NumElts; -} - -// Flags for tracking per-element vectorization state of loads/stores -// of a flattened function parameter or return value. -enum ParamVectorizationFlags { - PVF_INNER = 0x0, // Middle elements of a vector. - PVF_FIRST = 0x1, // First element of the vector. - PVF_LAST = 0x2, // Last element of the vector. - // Scalar is effectively a 1-element vector. - PVF_SCALAR = PVF_FIRST | PVF_LAST -}; - -// Computes whether and how we can vectorize the loads/stores of a -// flattened function parameter or return value. -// -// The flattened parameter is represented as the list of ValueVTs and -// Offsets, and is aligned to ParamAlignment bytes. We return a vector -// of the same size as ValueVTs indicating how each piece should be -// loaded/stored (i.e. as a scalar, or as part of a vector -// load/store). -static SmallVector -VectorizePTXValueVTs(const SmallVectorImpl &ValueVTs, - const SmallVectorImpl &Offsets, - unsigned ParamAlignment) { - // Set vector size to match ValueVTs and mark all elements as - // scalars by default. - SmallVector VectorInfo; - VectorInfo.assign(ValueVTs.size(), PVF_SCALAR); - - // Check what we can vectorize using 128/64/32-bit accesses. - for (int I = 0, E = ValueVTs.size(); I != E; ++I) { - // Skip elements we've already processed. - assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state."); - for (unsigned AccessSize : {16, 8, 4, 2}) { - unsigned NumElts = CanMergeParamLoadStoresStartingAt( - I, AccessSize, ValueVTs, Offsets, ParamAlignment); - // Mark vectorized elements. - switch (NumElts) { - default: - llvm_unreachable("Unexpected return value"); - case 1: - // Can't vectorize using this size, try next smaller size. - continue; - case 2: - assert(I + 1 < E && "Not enough elements."); - VectorInfo[I] = PVF_FIRST; - VectorInfo[I + 1] = PVF_LAST; - I += 1; - break; - case 4: - assert(I + 3 < E && "Not enough elements."); - VectorInfo[I] = PVF_FIRST; - VectorInfo[I + 1] = PVF_INNER; - VectorInfo[I + 2] = PVF_INNER; - VectorInfo[I + 3] = PVF_LAST; - I += 3; - break; - } - // Break out of the inner loop because we've already succeeded - // using largest possible AccessSize. - break; - } - } - return VectorInfo; -} - -// NVPTXTargetLowering Constructor. -NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, - const NVPTXSubtarget &STI) - : TargetLowering(TM), nvTM(&TM), STI(STI) { - // always lower memset, memcpy, and memmove intrinsics to load/store - // instructions, rather - // then generating calls to memset, mempcy or memmove. - MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; - MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; - MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; - - setBooleanContents(ZeroOrNegativeOneBooleanContent); - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - // Jump is Expensive. Don't create extra control flow for 'and', 'or' - // condition branches. - setJumpIsExpensive(true); - - // Wide divides are _very_ slow. Try to reduce the width of the divide if - // possible. - addBypassSlowDiv(64, 32); - - // By default, use the Source scheduling - if (sched4reg) - setSchedulingPreference(Sched::RegPressure); - else - setSchedulingPreference(Sched::Source); - - auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action, - LegalizeAction NoF16Action) { - setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action); - }; - - addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); - addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); - addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); - addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); - addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); - addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass); - addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass); - - // Conversion to/from FP16/FP16x2 is always legal. - setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); - - setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote); - setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand); - - // Operations not directly supported by NVPTX. - setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v2f16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - setOperationAction(ISD::BR_CC, MVT::f16, Expand); - setOperationAction(ISD::BR_CC, MVT::v2f16, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::BR_CC, MVT::i8, Expand); - setOperationAction(ISD::BR_CC, MVT::i16, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i64, Expand); - // Some SIGN_EXTEND_INREG can be done using cvt instruction. - // For others we will expand to a SHL/SRA pair. - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); - setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); - - setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); - setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); - - if (STI.hasROT64()) { - setOperationAction(ISD::ROTL, MVT::i64, Legal); - setOperationAction(ISD::ROTR, MVT::i64, Legal); - } else { - setOperationAction(ISD::ROTL, MVT::i64, Expand); - setOperationAction(ISD::ROTR, MVT::i64, Expand); - } - if (STI.hasROT32()) { - setOperationAction(ISD::ROTL, MVT::i32, Legal); - setOperationAction(ISD::ROTR, MVT::i32, Legal); - } else { - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTR, MVT::i32, Expand); - } - - setOperationAction(ISD::ROTL, MVT::i16, Expand); - setOperationAction(ISD::ROTR, MVT::i16, Expand); - setOperationAction(ISD::ROTL, MVT::i8, Expand); - setOperationAction(ISD::ROTR, MVT::i8, Expand); - setOperationAction(ISD::BSWAP, MVT::i16, Expand); - setOperationAction(ISD::BSWAP, MVT::i32, Expand); - setOperationAction(ISD::BSWAP, MVT::i64, Expand); - - // Indirect branch is not supported. - // This also disables Jump Table creation. - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); - - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - - // We want to legalize constant related memmove and memcopy - // intrinsics. - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); - - // Turn FP extload into load/fpextend - setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); - // Turn FP truncstore into trunc + store. - // FIXME: vector types should also be expanded - setTruncStoreAction(MVT::f32, MVT::f16, Expand); - setTruncStoreAction(MVT::f64, MVT::f16, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // PTX does not support load / store predicate registers - setOperationAction(ISD::LOAD, MVT::i1, Custom); - setOperationAction(ISD::STORE, MVT::i1, Custom); - - for (MVT VT : MVT::integer_valuetypes()) { - setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); - setTruncStoreAction(VT, MVT::i1, Expand); - } - - // This is legal in NVPTX - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f16, Legal); - - // TRAP can be lowered to PTX trap - setOperationAction(ISD::TRAP, MVT::Other, Legal); - - setOperationAction(ISD::ADDC, MVT::i64, Expand); - setOperationAction(ISD::ADDE, MVT::i64, Expand); - - // Register custom handling for vector loads/stores - for (MVT VT : MVT::vector_valuetypes()) { - if (IsPTXVectorType(VT)) { - setOperationAction(ISD::LOAD, VT, Custom); - setOperationAction(ISD::STORE, VT, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); - } - } - - // Custom handling for i8 intrinsics - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); - - for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) { - setOperationAction(ISD::SMIN, Ty, Legal); - setOperationAction(ISD::SMAX, Ty, Legal); - setOperationAction(ISD::UMIN, Ty, Legal); - setOperationAction(ISD::UMAX, Ty, Legal); - - setOperationAction(ISD::CTPOP, Ty, Legal); - setOperationAction(ISD::CTLZ, Ty, Legal); - } - - setOperationAction(ISD::CTTZ, MVT::i16, Expand); - setOperationAction(ISD::CTTZ, MVT::i32, Expand); - setOperationAction(ISD::CTTZ, MVT::i64, Expand); - - // PTX does not directly support SELP of i1, so promote to i32 first - setOperationAction(ISD::SELECT, MVT::i1, Custom); - - // PTX cannot multiply two i64s in a single instruction. - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - - // We have some custom DAG combine patterns for these nodes - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::FADD); - setTargetDAGCombine(ISD::MUL); - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SREM); - setTargetDAGCombine(ISD::UREM); - - // setcc for f16x2 needs special handling to prevent legalizer's - // attempt to scalarize it due to v2i1 not being legal. - if (STI.allowFP16Math()) - setTargetDAGCombine(ISD::SETCC); - - // Promote fp16 arithmetic if fp16 hardware isn't available or the - // user passed --nvptx-no-fp16-math. The flag is useful because, - // although sm_53+ GPUs have some sort of FP16 support in - // hardware, only sm_53 and sm_60 have full implementation. Others - // only have token amount of hardware and are likely to run faster - // by using fp32 units instead. - for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) { - setFP16OperationAction(Op, MVT::f16, Legal, Promote); - setFP16OperationAction(Op, MVT::v2f16, Legal, Expand); - } - - // There's no neg.f16 instruction. Expand to (0-x). - setOperationAction(ISD::FNEG, MVT::f16, Expand); - setOperationAction(ISD::FNEG, MVT::v2f16, Expand); - - // (would be) Library functions. - - // These map to conversion instructions for scalar FP types. - for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT, - ISD::FROUND, ISD::FTRUNC}) { - setOperationAction(Op, MVT::f16, Legal); - setOperationAction(Op, MVT::f32, Legal); - setOperationAction(Op, MVT::f64, Legal); - setOperationAction(Op, MVT::v2f16, Expand); - } - - // 'Expand' implements FCOPYSIGN without calling an external library. - setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - - // These map to corresponding instructions for f32/f64. f16 must be - // promoted to f32. v2f16 is expanded to f16, which is then promoted - // to f32. - for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, - ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) { - setOperationAction(Op, MVT::f16, Promote); - setOperationAction(Op, MVT::f32, Legal); - setOperationAction(Op, MVT::f64, Legal); - setOperationAction(Op, MVT::v2f16, Expand); - } - setOperationAction(ISD::FMINNUM, MVT::f16, Promote); - setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); - setOperationAction(ISD::FMINNAN, MVT::f16, Promote); - setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); - - // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate. - // No FPOW or FREM in PTX. - - // Now deduce the information based on the above mentioned - // actions - computeRegisterProperties(STI.getRegisterInfo()); -} - -const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((NVPTXISD::NodeType)Opcode) { - case NVPTXISD::FIRST_NUMBER: - break; - case NVPTXISD::CALL: - return "NVPTXISD::CALL"; - case NVPTXISD::RET_FLAG: - return "NVPTXISD::RET_FLAG"; - case NVPTXISD::LOAD_PARAM: - return "NVPTXISD::LOAD_PARAM"; - case NVPTXISD::Wrapper: - return "NVPTXISD::Wrapper"; - case NVPTXISD::DeclareParam: - return "NVPTXISD::DeclareParam"; - case NVPTXISD::DeclareScalarParam: - return "NVPTXISD::DeclareScalarParam"; - case NVPTXISD::DeclareRet: - return "NVPTXISD::DeclareRet"; - case NVPTXISD::DeclareScalarRet: - return "NVPTXISD::DeclareScalarRet"; - case NVPTXISD::DeclareRetParam: - return "NVPTXISD::DeclareRetParam"; - case NVPTXISD::PrintCall: - return "NVPTXISD::PrintCall"; - case NVPTXISD::PrintConvergentCall: - return "NVPTXISD::PrintConvergentCall"; - case NVPTXISD::PrintCallUni: - return "NVPTXISD::PrintCallUni"; - case NVPTXISD::PrintConvergentCallUni: - return "NVPTXISD::PrintConvergentCallUni"; - case NVPTXISD::LoadParam: - return "NVPTXISD::LoadParam"; - case NVPTXISD::LoadParamV2: - return "NVPTXISD::LoadParamV2"; - case NVPTXISD::LoadParamV4: - return "NVPTXISD::LoadParamV4"; - case NVPTXISD::StoreParam: - return "NVPTXISD::StoreParam"; - case NVPTXISD::StoreParamV2: - return "NVPTXISD::StoreParamV2"; - case NVPTXISD::StoreParamV4: - return "NVPTXISD::StoreParamV4"; - case NVPTXISD::StoreParamS32: - return "NVPTXISD::StoreParamS32"; - case NVPTXISD::StoreParamU32: - return "NVPTXISD::StoreParamU32"; - case NVPTXISD::CallArgBegin: - return "NVPTXISD::CallArgBegin"; - case NVPTXISD::CallArg: - return "NVPTXISD::CallArg"; - case NVPTXISD::LastCallArg: - return "NVPTXISD::LastCallArg"; - case NVPTXISD::CallArgEnd: - return "NVPTXISD::CallArgEnd"; - case NVPTXISD::CallVoid: - return "NVPTXISD::CallVoid"; - case NVPTXISD::CallVal: - return "NVPTXISD::CallVal"; - case NVPTXISD::CallSymbol: - return "NVPTXISD::CallSymbol"; - case NVPTXISD::Prototype: - return "NVPTXISD::Prototype"; - case NVPTXISD::MoveParam: - return "NVPTXISD::MoveParam"; - case NVPTXISD::StoreRetval: - return "NVPTXISD::StoreRetval"; - case NVPTXISD::StoreRetvalV2: - return "NVPTXISD::StoreRetvalV2"; - case NVPTXISD::StoreRetvalV4: - return "NVPTXISD::StoreRetvalV4"; - case NVPTXISD::PseudoUseParam: - return "NVPTXISD::PseudoUseParam"; - case NVPTXISD::RETURN: - return "NVPTXISD::RETURN"; - case NVPTXISD::CallSeqBegin: - return "NVPTXISD::CallSeqBegin"; - case NVPTXISD::CallSeqEnd: - return "NVPTXISD::CallSeqEnd"; - case NVPTXISD::CallPrototype: - return "NVPTXISD::CallPrototype"; - case NVPTXISD::LoadV2: - return "NVPTXISD::LoadV2"; - case NVPTXISD::LoadV4: - return "NVPTXISD::LoadV4"; - case NVPTXISD::LDGV2: - return "NVPTXISD::LDGV2"; - case NVPTXISD::LDGV4: - return "NVPTXISD::LDGV4"; - case NVPTXISD::LDUV2: - return "NVPTXISD::LDUV2"; - case NVPTXISD::LDUV4: - return "NVPTXISD::LDUV4"; - case NVPTXISD::StoreV2: - return "NVPTXISD::StoreV2"; - case NVPTXISD::StoreV4: - return "NVPTXISD::StoreV4"; - case NVPTXISD::FUN_SHFL_CLAMP: - return "NVPTXISD::FUN_SHFL_CLAMP"; - case NVPTXISD::FUN_SHFR_CLAMP: - return "NVPTXISD::FUN_SHFR_CLAMP"; - case NVPTXISD::IMAD: - return "NVPTXISD::IMAD"; - case NVPTXISD::SETP_F16X2: - return "NVPTXISD::SETP_F16X2"; - case NVPTXISD::Dummy: - return "NVPTXISD::Dummy"; - case NVPTXISD::MUL_WIDE_SIGNED: - return "NVPTXISD::MUL_WIDE_SIGNED"; - case NVPTXISD::MUL_WIDE_UNSIGNED: - return "NVPTXISD::MUL_WIDE_UNSIGNED"; - case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; - case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; - case NVPTXISD::Tex1DFloatFloatLevel: - return "NVPTXISD::Tex1DFloatFloatLevel"; - case NVPTXISD::Tex1DFloatFloatGrad: - return "NVPTXISD::Tex1DFloatFloatGrad"; - case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; - case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; - case NVPTXISD::Tex1DS32FloatLevel: - return "NVPTXISD::Tex1DS32FloatLevel"; - case NVPTXISD::Tex1DS32FloatGrad: - return "NVPTXISD::Tex1DS32FloatGrad"; - case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; - case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; - case NVPTXISD::Tex1DU32FloatLevel: - return "NVPTXISD::Tex1DU32FloatLevel"; - case NVPTXISD::Tex1DU32FloatGrad: - return "NVPTXISD::Tex1DU32FloatGrad"; - case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; - case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; - case NVPTXISD::Tex1DArrayFloatFloatLevel: - return "NVPTXISD::Tex1DArrayFloatFloatLevel"; - case NVPTXISD::Tex1DArrayFloatFloatGrad: - return "NVPTXISD::Tex1DArrayFloatFloatGrad"; - case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; - case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; - case NVPTXISD::Tex1DArrayS32FloatLevel: - return "NVPTXISD::Tex1DArrayS32FloatLevel"; - case NVPTXISD::Tex1DArrayS32FloatGrad: - return "NVPTXISD::Tex1DArrayS32FloatGrad"; - case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; - case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; - case NVPTXISD::Tex1DArrayU32FloatLevel: - return "NVPTXISD::Tex1DArrayU32FloatLevel"; - case NVPTXISD::Tex1DArrayU32FloatGrad: - return "NVPTXISD::Tex1DArrayU32FloatGrad"; - case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; - case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; - case NVPTXISD::Tex2DFloatFloatLevel: - return "NVPTXISD::Tex2DFloatFloatLevel"; - case NVPTXISD::Tex2DFloatFloatGrad: - return "NVPTXISD::Tex2DFloatFloatGrad"; - case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; - case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; - case NVPTXISD::Tex2DS32FloatLevel: - return "NVPTXISD::Tex2DS32FloatLevel"; - case NVPTXISD::Tex2DS32FloatGrad: - return "NVPTXISD::Tex2DS32FloatGrad"; - case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; - case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; - case NVPTXISD::Tex2DU32FloatLevel: - return "NVPTXISD::Tex2DU32FloatLevel"; - case NVPTXISD::Tex2DU32FloatGrad: - return "NVPTXISD::Tex2DU32FloatGrad"; - case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; - case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; - case NVPTXISD::Tex2DArrayFloatFloatLevel: - return "NVPTXISD::Tex2DArrayFloatFloatLevel"; - case NVPTXISD::Tex2DArrayFloatFloatGrad: - return "NVPTXISD::Tex2DArrayFloatFloatGrad"; - case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; - case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; - case NVPTXISD::Tex2DArrayS32FloatLevel: - return "NVPTXISD::Tex2DArrayS32FloatLevel"; - case NVPTXISD::Tex2DArrayS32FloatGrad: - return "NVPTXISD::Tex2DArrayS32FloatGrad"; - case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; - case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; - case NVPTXISD::Tex2DArrayU32FloatLevel: - return "NVPTXISD::Tex2DArrayU32FloatLevel"; - case NVPTXISD::Tex2DArrayU32FloatGrad: - return "NVPTXISD::Tex2DArrayU32FloatGrad"; - case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; - case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; - case NVPTXISD::Tex3DFloatFloatLevel: - return "NVPTXISD::Tex3DFloatFloatLevel"; - case NVPTXISD::Tex3DFloatFloatGrad: - return "NVPTXISD::Tex3DFloatFloatGrad"; - case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; - case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; - case NVPTXISD::Tex3DS32FloatLevel: - return "NVPTXISD::Tex3DS32FloatLevel"; - case NVPTXISD::Tex3DS32FloatGrad: - return "NVPTXISD::Tex3DS32FloatGrad"; - case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; - case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; - case NVPTXISD::Tex3DU32FloatLevel: - return "NVPTXISD::Tex3DU32FloatLevel"; - case NVPTXISD::Tex3DU32FloatGrad: - return "NVPTXISD::Tex3DU32FloatGrad"; - case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; - case NVPTXISD::TexCubeFloatFloatLevel: - return "NVPTXISD::TexCubeFloatFloatLevel"; - case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; - case NVPTXISD::TexCubeS32FloatLevel: - return "NVPTXISD::TexCubeS32FloatLevel"; - case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; - case NVPTXISD::TexCubeU32FloatLevel: - return "NVPTXISD::TexCubeU32FloatLevel"; - case NVPTXISD::TexCubeArrayFloatFloat: - return "NVPTXISD::TexCubeArrayFloatFloat"; - case NVPTXISD::TexCubeArrayFloatFloatLevel: - return "NVPTXISD::TexCubeArrayFloatFloatLevel"; - case NVPTXISD::TexCubeArrayS32Float: - return "NVPTXISD::TexCubeArrayS32Float"; - case NVPTXISD::TexCubeArrayS32FloatLevel: - return "NVPTXISD::TexCubeArrayS32FloatLevel"; - case NVPTXISD::TexCubeArrayU32Float: - return "NVPTXISD::TexCubeArrayU32Float"; - case NVPTXISD::TexCubeArrayU32FloatLevel: - return "NVPTXISD::TexCubeArrayU32FloatLevel"; - case NVPTXISD::Tld4R2DFloatFloat: - return "NVPTXISD::Tld4R2DFloatFloat"; - case NVPTXISD::Tld4G2DFloatFloat: - return "NVPTXISD::Tld4G2DFloatFloat"; - case NVPTXISD::Tld4B2DFloatFloat: - return "NVPTXISD::Tld4B2DFloatFloat"; - case NVPTXISD::Tld4A2DFloatFloat: - return "NVPTXISD::Tld4A2DFloatFloat"; - case NVPTXISD::Tld4R2DS64Float: - return "NVPTXISD::Tld4R2DS64Float"; - case NVPTXISD::Tld4G2DS64Float: - return "NVPTXISD::Tld4G2DS64Float"; - case NVPTXISD::Tld4B2DS64Float: - return "NVPTXISD::Tld4B2DS64Float"; - case NVPTXISD::Tld4A2DS64Float: - return "NVPTXISD::Tld4A2DS64Float"; - case NVPTXISD::Tld4R2DU64Float: - return "NVPTXISD::Tld4R2DU64Float"; - case NVPTXISD::Tld4G2DU64Float: - return "NVPTXISD::Tld4G2DU64Float"; - case NVPTXISD::Tld4B2DU64Float: - return "NVPTXISD::Tld4B2DU64Float"; - case NVPTXISD::Tld4A2DU64Float: - return "NVPTXISD::Tld4A2DU64Float"; - - case NVPTXISD::TexUnified1DFloatS32: - return "NVPTXISD::TexUnified1DFloatS32"; - case NVPTXISD::TexUnified1DFloatFloat: - return "NVPTXISD::TexUnified1DFloatFloat"; - case NVPTXISD::TexUnified1DFloatFloatLevel: - return "NVPTXISD::TexUnified1DFloatFloatLevel"; - case NVPTXISD::TexUnified1DFloatFloatGrad: - return "NVPTXISD::TexUnified1DFloatFloatGrad"; - case NVPTXISD::TexUnified1DS32S32: - return "NVPTXISD::TexUnified1DS32S32"; - case NVPTXISD::TexUnified1DS32Float: - return "NVPTXISD::TexUnified1DS32Float"; - case NVPTXISD::TexUnified1DS32FloatLevel: - return "NVPTXISD::TexUnified1DS32FloatLevel"; - case NVPTXISD::TexUnified1DS32FloatGrad: - return "NVPTXISD::TexUnified1DS32FloatGrad"; - case NVPTXISD::TexUnified1DU32S32: - return "NVPTXISD::TexUnified1DU32S32"; - case NVPTXISD::TexUnified1DU32Float: - return "NVPTXISD::TexUnified1DU32Float"; - case NVPTXISD::TexUnified1DU32FloatLevel: - return "NVPTXISD::TexUnified1DU32FloatLevel"; - case NVPTXISD::TexUnified1DU32FloatGrad: - return "NVPTXISD::TexUnified1DU32FloatGrad"; - case NVPTXISD::TexUnified1DArrayFloatS32: - return "NVPTXISD::TexUnified1DArrayFloatS32"; - case NVPTXISD::TexUnified1DArrayFloatFloat: - return "NVPTXISD::TexUnified1DArrayFloatFloat"; - case NVPTXISD::TexUnified1DArrayFloatFloatLevel: - return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; - case NVPTXISD::TexUnified1DArrayFloatFloatGrad: - return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; - case NVPTXISD::TexUnified1DArrayS32S32: - return "NVPTXISD::TexUnified1DArrayS32S32"; - case NVPTXISD::TexUnified1DArrayS32Float: - return "NVPTXISD::TexUnified1DArrayS32Float"; - case NVPTXISD::TexUnified1DArrayS32FloatLevel: - return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; - case NVPTXISD::TexUnified1DArrayS32FloatGrad: - return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; - case NVPTXISD::TexUnified1DArrayU32S32: - return "NVPTXISD::TexUnified1DArrayU32S32"; - case NVPTXISD::TexUnified1DArrayU32Float: - return "NVPTXISD::TexUnified1DArrayU32Float"; - case NVPTXISD::TexUnified1DArrayU32FloatLevel: - return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; - case NVPTXISD::TexUnified1DArrayU32FloatGrad: - return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; - case NVPTXISD::TexUnified2DFloatS32: - return "NVPTXISD::TexUnified2DFloatS32"; - case NVPTXISD::TexUnified2DFloatFloat: - return "NVPTXISD::TexUnified2DFloatFloat"; - case NVPTXISD::TexUnified2DFloatFloatLevel: - return "NVPTXISD::TexUnified2DFloatFloatLevel"; - case NVPTXISD::TexUnified2DFloatFloatGrad: - return "NVPTXISD::TexUnified2DFloatFloatGrad"; - case NVPTXISD::TexUnified2DS32S32: - return "NVPTXISD::TexUnified2DS32S32"; - case NVPTXISD::TexUnified2DS32Float: - return "NVPTXISD::TexUnified2DS32Float"; - case NVPTXISD::TexUnified2DS32FloatLevel: - return "NVPTXISD::TexUnified2DS32FloatLevel"; - case NVPTXISD::TexUnified2DS32FloatGrad: - return "NVPTXISD::TexUnified2DS32FloatGrad"; - case NVPTXISD::TexUnified2DU32S32: - return "NVPTXISD::TexUnified2DU32S32"; - case NVPTXISD::TexUnified2DU32Float: - return "NVPTXISD::TexUnified2DU32Float"; - case NVPTXISD::TexUnified2DU32FloatLevel: - return "NVPTXISD::TexUnified2DU32FloatLevel"; - case NVPTXISD::TexUnified2DU32FloatGrad: - return "NVPTXISD::TexUnified2DU32FloatGrad"; - case NVPTXISD::TexUnified2DArrayFloatS32: - return "NVPTXISD::TexUnified2DArrayFloatS32"; - case NVPTXISD::TexUnified2DArrayFloatFloat: - return "NVPTXISD::TexUnified2DArrayFloatFloat"; - case NVPTXISD::TexUnified2DArrayFloatFloatLevel: - return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; - case NVPTXISD::TexUnified2DArrayFloatFloatGrad: - return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; - case NVPTXISD::TexUnified2DArrayS32S32: - return "NVPTXISD::TexUnified2DArrayS32S32"; - case NVPTXISD::TexUnified2DArrayS32Float: - return "NVPTXISD::TexUnified2DArrayS32Float"; - case NVPTXISD::TexUnified2DArrayS32FloatLevel: - return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; - case NVPTXISD::TexUnified2DArrayS32FloatGrad: - return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; - case NVPTXISD::TexUnified2DArrayU32S32: - return "NVPTXISD::TexUnified2DArrayU32S32"; - case NVPTXISD::TexUnified2DArrayU32Float: - return "NVPTXISD::TexUnified2DArrayU32Float"; - case NVPTXISD::TexUnified2DArrayU32FloatLevel: - return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; - case NVPTXISD::TexUnified2DArrayU32FloatGrad: - return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; - case NVPTXISD::TexUnified3DFloatS32: - return "NVPTXISD::TexUnified3DFloatS32"; - case NVPTXISD::TexUnified3DFloatFloat: - return "NVPTXISD::TexUnified3DFloatFloat"; - case NVPTXISD::TexUnified3DFloatFloatLevel: - return "NVPTXISD::TexUnified3DFloatFloatLevel"; - case NVPTXISD::TexUnified3DFloatFloatGrad: - return "NVPTXISD::TexUnified3DFloatFloatGrad"; - case NVPTXISD::TexUnified3DS32S32: - return "NVPTXISD::TexUnified3DS32S32"; - case NVPTXISD::TexUnified3DS32Float: - return "NVPTXISD::TexUnified3DS32Float"; - case NVPTXISD::TexUnified3DS32FloatLevel: - return "NVPTXISD::TexUnified3DS32FloatLevel"; - case NVPTXISD::TexUnified3DS32FloatGrad: - return "NVPTXISD::TexUnified3DS32FloatGrad"; - case NVPTXISD::TexUnified3DU32S32: - return "NVPTXISD::TexUnified3DU32S32"; - case NVPTXISD::TexUnified3DU32Float: - return "NVPTXISD::TexUnified3DU32Float"; - case NVPTXISD::TexUnified3DU32FloatLevel: - return "NVPTXISD::TexUnified3DU32FloatLevel"; - case NVPTXISD::TexUnified3DU32FloatGrad: - return "NVPTXISD::TexUnified3DU32FloatGrad"; - case NVPTXISD::TexUnifiedCubeFloatFloat: - return "NVPTXISD::TexUnifiedCubeFloatFloat"; - case NVPTXISD::TexUnifiedCubeFloatFloatLevel: - return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; - case NVPTXISD::TexUnifiedCubeS32Float: - return "NVPTXISD::TexUnifiedCubeS32Float"; - case NVPTXISD::TexUnifiedCubeS32FloatLevel: - return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; - case NVPTXISD::TexUnifiedCubeU32Float: - return "NVPTXISD::TexUnifiedCubeU32Float"; - case NVPTXISD::TexUnifiedCubeU32FloatLevel: - return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; - case NVPTXISD::TexUnifiedCubeArrayFloatFloat: - return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; - case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: - return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; - case NVPTXISD::TexUnifiedCubeArrayS32Float: - return "NVPTXISD::TexUnifiedCubeArrayS32Float"; - case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: - return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; - case NVPTXISD::TexUnifiedCubeArrayU32Float: - return "NVPTXISD::TexUnifiedCubeArrayU32Float"; - case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: - return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; - case NVPTXISD::Tld4UnifiedR2DFloatFloat: - return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; - case NVPTXISD::Tld4UnifiedG2DFloatFloat: - return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; - case NVPTXISD::Tld4UnifiedB2DFloatFloat: - return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; - case NVPTXISD::Tld4UnifiedA2DFloatFloat: - return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; - case NVPTXISD::Tld4UnifiedR2DS64Float: - return "NVPTXISD::Tld4UnifiedR2DS64Float"; - case NVPTXISD::Tld4UnifiedG2DS64Float: - return "NVPTXISD::Tld4UnifiedG2DS64Float"; - case NVPTXISD::Tld4UnifiedB2DS64Float: - return "NVPTXISD::Tld4UnifiedB2DS64Float"; - case NVPTXISD::Tld4UnifiedA2DS64Float: - return "NVPTXISD::Tld4UnifiedA2DS64Float"; - case NVPTXISD::Tld4UnifiedR2DU64Float: - return "NVPTXISD::Tld4UnifiedR2DU64Float"; - case NVPTXISD::Tld4UnifiedG2DU64Float: - return "NVPTXISD::Tld4UnifiedG2DU64Float"; - case NVPTXISD::Tld4UnifiedB2DU64Float: - return "NVPTXISD::Tld4UnifiedB2DU64Float"; - case NVPTXISD::Tld4UnifiedA2DU64Float: - return "NVPTXISD::Tld4UnifiedA2DU64Float"; - - case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; - case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; - case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; - case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; - case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; - case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; - case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; - case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; - case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; - case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; - case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; - - case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; - case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; - case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; - case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; - case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; - case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; - case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; - case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; - case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; - case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; - case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; - - case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; - case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; - case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; - case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; - case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; - case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; - case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; - case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; - case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; - case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; - case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; - - case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; - case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; - case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; - case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; - case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; - case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; - case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; - case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; - case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; - case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; - case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; - - case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; - case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; - case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; - case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; - case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; - case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; - case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; - case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; - case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; - case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; - case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; - - case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; - case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; - case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; - case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; - case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; - case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; - case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; - case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; - case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; - case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; - case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; - - case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; - case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; - case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; - case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; - case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; - case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; - case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; - case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; - case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; - case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; - case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; - - case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; - case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; - case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; - case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; - case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; - case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; - case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; - case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; - case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; - case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; - case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; - - case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; - case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; - case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; - case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; - case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; - case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; - case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; - case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; - case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; - case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; - case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; - - case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; - case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; - case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; - case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; - case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; - case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; - case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; - case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; - case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; - case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; - case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; - - case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; - case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; - case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; - case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; - case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; - case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; - case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; - case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; - case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; - case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; - case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; - - case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; - case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; - case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; - case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; - case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; - case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; - case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; - case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; - case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; - case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; - case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; - - case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; - case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; - case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; - case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; - case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; - case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; - case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; - case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; - case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; - case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; - case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; - - case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; - case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; - case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; - case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; - case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; - case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; - case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; - case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; - case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; - case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; - case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; - - case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; - case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; - case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; - case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; - case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; - case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; - case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; - case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; - case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; - case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; - case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; - } - return nullptr; -} - -TargetLoweringBase::LegalizeTypeAction -NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { - if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) - return TypeSplitVector; - if (VT == MVT::v2f16) - return TypeLegal; - return TargetLoweringBase::getPreferredVectorAction(VT); -} - -SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, - int Enabled, int &ExtraSteps, - bool &UseOneConst, - bool Reciprocal) const { - if (!(Enabled == ReciprocalEstimate::Enabled || - (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32()))) - return SDValue(); - - if (ExtraSteps == ReciprocalEstimate::Unspecified) - ExtraSteps = 0; - - SDLoc DL(Operand); - EVT VT = Operand.getValueType(); - bool Ftz = useF32FTZ(DAG.getMachineFunction()); - - auto MakeIntrinsicCall = [&](Intrinsic::ID IID) { - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(IID, DL, MVT::i32), Operand); - }; - - // The sqrt and rsqrt refinement processes assume we always start out with an - // approximation of the rsqrt. Therefore, if we're going to do any refinement - // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing - // any refinement, we must return a regular sqrt. - if (Reciprocal || ExtraSteps > 0) { - if (VT == MVT::f32) - return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f - : Intrinsic::nvvm_rsqrt_approx_f); - else if (VT == MVT::f64) - return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d); - else - return SDValue(); - } else { - if (VT == MVT::f32) - return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f - : Intrinsic::nvvm_sqrt_approx_f); - else { - // There's no sqrt.approx.f64 instruction, so we emit - // reciprocal(rsqrt(x)). This is faster than - // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain - // x * rsqrt(x).) - return DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32), - MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d)); - } - } -} - -SDValue -NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - const GlobalValue *GV = cast(Op)->getGlobal(); - auto PtrVT = getPointerTy(DAG.getDataLayout()); - Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT); - return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); -} - -std::string NVPTXTargetLowering::getPrototype( - const DataLayout &DL, Type *retTy, const ArgListTy &Args, - const SmallVectorImpl &Outs, unsigned retAlignment, - const ImmutableCallSite *CS) const { - auto PtrVT = getPointerTy(DL); - - bool isABI = (STI.getSmVersion() >= 20); - assert(isABI && "Non-ABI compilation is not supported"); - if (!isABI) - return ""; - - std::stringstream O; - O << "prototype_" << uniqueCallSite << " : .callprototype "; - - if (retTy->getTypeID() == Type::VoidTyID) { - O << "()"; - } else { - O << "("; - if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { - unsigned size = 0; - if (auto *ITy = dyn_cast(retTy)) { - size = ITy->getBitWidth(); - } else { - assert(retTy->isFloatingPointTy() && - "Floating point type expected here"); - size = retTy->getPrimitiveSizeInBits(); - } - // PTX ABI requires all scalar return values to be at least 32 - // bits in size. fp16 normally uses .b16 as its storage type in - // PTX, so its size must be adjusted here, too. - if (size < 32) - size = 32; - - O << ".param .b" << size << " _"; - } else if (isa(retTy)) { - O << ".param .b" << PtrVT.getSizeInBits() << " _"; - } else if (retTy->isAggregateType() || retTy->isVectorTy()) { - auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); - O << ".param .align " << retAlignment << " .b8 _[" - << DL.getTypeAllocSize(retTy) << "]"; - } else { - llvm_unreachable("Unknown return type"); - } - O << ") "; - } - O << "_ ("; - - bool first = true; - - unsigned OIdx = 0; - for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { - Type *Ty = Args[i].Ty; - if (!first) { - O << ", "; - } - first = false; - - if (!Outs[OIdx].Flags.isByVal()) { - if (Ty->isAggregateType() || Ty->isVectorTy()) { - unsigned align = 0; - const CallInst *CallI = cast(CS->getInstruction()); - // +1 because index 0 is reserved for return type alignment - if (!getAlign(*CallI, i + 1, align)) - align = DL.getABITypeAlignment(Ty); - unsigned sz = DL.getTypeAllocSize(Ty); - O << ".param .align " << align << " .b8 "; - O << "_"; - O << "[" << sz << "]"; - // update the index for Outs - SmallVector vtparts; - ComputeValueVTs(*this, DL, Ty, vtparts); - if (unsigned len = vtparts.size()) - OIdx += len - 1; - continue; - } - // i8 types in IR will be i16 types in SDAG - assert((getValueType(DL, Ty) == Outs[OIdx].VT || - (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && - "type mismatch between callee prototype and arguments"); - // scalar type - unsigned sz = 0; - if (isa(Ty)) { - sz = cast(Ty)->getBitWidth(); - if (sz < 32) - sz = 32; - } else if (isa(Ty)) { - sz = PtrVT.getSizeInBits(); - } else if (Ty->isHalfTy()) - // PTX ABI requires all scalar parameters to be at least 32 - // bits in size. fp16 normally uses .b16 as its storage type - // in PTX, so its size must be adjusted here, too. - sz = 32; - else - sz = Ty->getPrimitiveSizeInBits(); - O << ".param .b" << sz << " "; - O << "_"; - continue; - } - auto *PTy = dyn_cast(Ty); - assert(PTy && "Param with byval attribute should be a pointer type"); - Type *ETy = PTy->getElementType(); - - unsigned align = Outs[OIdx].Flags.getByValAlign(); - unsigned sz = DL.getTypeAllocSize(ETy); - O << ".param .align " << align << " .b8 "; - O << "_"; - O << "[" << sz << "]"; - } - O << ");"; - return O.str(); -} - -unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, - const ImmutableCallSite *CS, - Type *Ty, unsigned Idx, - const DataLayout &DL) const { - if (!CS) { - // CallSite is zero, fallback to ABI type alignment - return DL.getABITypeAlignment(Ty); - } - - unsigned Align = 0; - const Value *DirectCallee = CS->getCalledFunction(); - - if (!DirectCallee) { - // We don't have a direct function symbol, but that may be because of - // constant cast instructions in the call. - const Instruction *CalleeI = CS->getInstruction(); - assert(CalleeI && "Call target is not a function or derived value?"); - - // With bitcast'd call targets, the instruction will be the call - if (isa(CalleeI)) { - // Check if we have call alignment metadata - if (getAlign(*cast(CalleeI), Idx, Align)) - return Align; - - const Value *CalleeV = cast(CalleeI)->getCalledValue(); - // Ignore any bitcast instructions - while (isa(CalleeV)) { - const ConstantExpr *CE = cast(CalleeV); - if (!CE->isCast()) - break; - // Look through the bitcast - CalleeV = cast(CalleeV)->getOperand(0); - } - - // We have now looked past all of the bitcasts. Do we finally have a - // Function? - if (isa(CalleeV)) - DirectCallee = CalleeV; - } - } - - // Check for function alignment information if we found that the - // ultimate target is a Function - if (DirectCallee) - if (getAlign(*cast(DirectCallee), Idx, Align)) - return Align; - - // Call is indirect or alignment information is not available, fall back to - // the ABI type alignment - return DL.getABITypeAlignment(Ty); -} - -SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc dl = CLI.DL; - SmallVectorImpl &Outs = CLI.Outs; - SmallVectorImpl &OutVals = CLI.OutVals; - SmallVectorImpl &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; - ArgListTy &Args = CLI.getArgs(); - Type *RetTy = CLI.RetTy; - ImmutableCallSite *CS = CLI.CS; - const DataLayout &DL = DAG.getDataLayout(); - - bool isABI = (STI.getSmVersion() >= 20); - assert(isABI && "Non-ABI compilation is not supported"); - if (!isABI) - return Chain; - - SDValue tempChain = Chain; - Chain = DAG.getCALLSEQ_START( - Chain, DAG.getIntPtrConstant(uniqueCallSite, dl, true), dl); - SDValue InFlag = Chain.getValue(1); - - unsigned paramCount = 0; - // Args.size() and Outs.size() need not match. - // Outs.size() will be larger - // * if there is an aggregate argument with multiple fields (each field - // showing up separately in Outs) - // * if there is a vector argument with more than typical vector-length - // elements (generally if more than 4) where each vector element is - // individually present in Outs. - // So a different index should be used for indexing into Outs/OutVals. - // See similar issue in LowerFormalArguments. - unsigned OIdx = 0; - // Declare the .params or .reg need to pass values - // to the function - for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { - EVT VT = Outs[OIdx].VT; - Type *Ty = Args[i].Ty; - - if (!Outs[OIdx].Flags.isByVal()) { - SmallVector VTs; - SmallVector Offsets; - ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); - unsigned ArgAlign = - getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); - unsigned AllocSize = DL.getTypeAllocSize(Ty); - SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - bool NeedAlign; // Does argument declaration specify alignment? - if (Ty->isAggregateType() || Ty->isVectorTy()) { - // declare .param .align .b8 .param[]; - SDValue DeclareParamOps[] = { - Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; - Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps); - NeedAlign = true; - } else { - // declare .param .b .param; - if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) { - // PTX ABI requires integral types to be at least 32 bits in - // size. FP16 is loaded/stored using i16, so it's handled - // here as well. - AllocSize = 4; - } - SDValue DeclareScalarParamOps[] = { - Chain, DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(AllocSize * 8, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32), InFlag}; - Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, - DeclareScalarParamOps); - NeedAlign = false; - } - InFlag = Chain.getValue(1); - - // PTX Interoperability Guide 3.3(A): [Integer] Values shorter - // than 32-bits are sign extended or zero extended, depending on - // whether they are signed or unsigned types. This case applies - // only to scalar parameters and not to aggregate values. - bool ExtendIntegerParam = - Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; - - auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); - SmallVector StoreOperands; - for (unsigned j = 0, je = VTs.size(); j != je; ++j) { - // New store. - if (VectorInfo[j] & PVF_FIRST) { - assert(StoreOperands.empty() && "Unfinished preceeding store."); - StoreOperands.push_back(Chain); - StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); - StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32)); - } - - EVT EltVT = VTs[j]; - SDValue StVal = OutVals[OIdx]; - if (ExtendIntegerParam) { - assert(VTs.size() == 1 && "Scalar can't have multiple parts."); - // zext/sext to i32 - StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND, - dl, MVT::i32, StVal); - } else if (EltVT.getSizeInBits() < 16) { - // Use 16-bit registers for small stores as it's the - // smallest general purpose register size supported by NVPTX. - StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); - } - - // Record the value to store. - StoreOperands.push_back(StVal); - - if (VectorInfo[j] & PVF_LAST) { - unsigned NumElts = StoreOperands.size() - 3; - NVPTXISD::NodeType Op; - switch (NumElts) { - case 1: - Op = NVPTXISD::StoreParam; - break; - case 2: - Op = NVPTXISD::StoreParamV2; - break; - case 4: - Op = NVPTXISD::StoreParamV4; - break; - default: - llvm_unreachable("Invalid vector info."); - } - - StoreOperands.push_back(InFlag); - - // Adjust type of the store op if we've extended the scalar - // return value. - EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; - unsigned EltAlign = - NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0; - - Chain = DAG.getMemIntrinsicNode( - Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, - TheStoreType, MachinePointerInfo(), EltAlign); - InFlag = Chain.getValue(1); - - // Cleanup. - StoreOperands.clear(); - } - ++OIdx; - } - assert(StoreOperands.empty() && "Unfinished parameter store."); - if (VTs.size() > 0) - --OIdx; - ++paramCount; - continue; - } - - // ByVal arguments - SmallVector VTs; - SmallVector Offsets; - auto *PTy = dyn_cast(Args[i].Ty); - assert(PTy && "Type of a byval parameter should be pointer"); - ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0); - - // declare .param .align .b8 .param[]; - unsigned sz = Outs[OIdx].Flags.getByValSize(); - SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); - // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, - // so we don't need to worry about natural alignment or not. - // See TargetLowering::LowerCallTo(). - - // Enforce minumum alignment of 4 to work around ptxas miscompile - // for sm_50+. See corresponding alignment adjustment in - // emitFunctionParamList() for details. - if (ArgAlign < 4) - ArgAlign = 4; - SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(sz, dl, MVT::i32), InFlag}; - Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps); - InFlag = Chain.getValue(1); - for (unsigned j = 0, je = VTs.size(); j != je; ++j) { - EVT elemtype = VTs[j]; - int curOffset = Offsets[j]; - unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); - auto PtrVT = getPointerTy(DL); - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], - DAG.getConstant(curOffset, dl, PtrVT)); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), PartAlign); - if (elemtype.getSizeInBits() < 16) { - theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); - } - SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, - DAG.getConstant(paramCount, dl, MVT::i32), - DAG.getConstant(curOffset, dl, MVT::i32), - theVal, InFlag }; - Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, - CopyParamOps, elemtype, - MachinePointerInfo()); - - InFlag = Chain.getValue(1); - } - ++paramCount; - } - - GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); - unsigned retAlignment = 0; - - // Handle Result - if (Ins.size() > 0) { - SmallVector resvtparts; - ComputeValueVTs(*this, DL, RetTy, resvtparts); - - // Declare - // .param .align 16 .b8 retval0[], or - // .param .b retval0 - unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy); - // Emit ".param .b retval0" instead of byte arrays only for - // these three types to match the logic in - // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. - // Plus, this behavior is consistent with nvcc's. - if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() || - RetTy->isPointerTy()) { - // Scalar needs to be at least 32bit wide - if (resultsz < 32) - resultsz = 32; - SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), - DAG.getConstant(resultsz, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, - DeclareRetOps); - InFlag = Chain.getValue(1); - } else { - retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL); - SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, - DAG.getConstant(retAlignment, dl, MVT::i32), - DAG.getConstant(resultsz / 8, dl, MVT::i32), - DAG.getConstant(0, dl, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, - DeclareRetOps); - InFlag = Chain.getValue(1); - } - } - - if (!Func) { - // This is indirect function call case : PTX requires a prototype of the - // form - // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); - // to be emitted, and the label has to used as the last arg of call - // instruction. - // The prototype is embedded in a string and put as the operand for a - // CallPrototype SDNode which will print out to the value of the string. - SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); - std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS); - const char *ProtoStr = - nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); - SDValue ProtoOps[] = { - Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, - }; - Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); - InFlag = Chain.getValue(1); - } - // Op to just print "call" - SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrintCallOps[] = { - Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag - }; - // We model convergent calls as separate opcodes. - unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall; - if (CLI.IsConvergent) - Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni - : NVPTXISD::PrintConvergentCall; - Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps); - InFlag = Chain.getValue(1); - - // Ops to print out the function name - SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallVoidOps[] = { Chain, Callee, InFlag }; - Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); - InFlag = Chain.getValue(1); - - // Ops to print out the param list - SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgBeginOps[] = { Chain, InFlag }; - Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, - CallArgBeginOps); - InFlag = Chain.getValue(1); - - for (unsigned i = 0, e = paramCount; i != e; ++i) { - unsigned opcode; - if (i == (e - 1)) - opcode = NVPTXISD::LastCallArg; - else - opcode = NVPTXISD::CallArg; - SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), - DAG.getConstant(i, dl, MVT::i32), InFlag }; - Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); - InFlag = Chain.getValue(1); - } - SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgEndOps[] = { Chain, - DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), - InFlag }; - Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); - InFlag = Chain.getValue(1); - - if (!Func) { - SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrototypeOps[] = { Chain, - DAG.getConstant(uniqueCallSite, dl, MVT::i32), - InFlag }; - Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); - InFlag = Chain.getValue(1); - } - - // Generate loads from param memory/moves from registers for result - if (Ins.size() > 0) { - SmallVector VTs; - SmallVector Offsets; - ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); - assert(VTs.size() == Ins.size() && "Bad value decomposition"); - - unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL); - auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); - - SmallVector LoadVTs; - int VecIdx = -1; // Index of the first element of the vector. - - // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than - // 32-bits are sign extended or zero extended, depending on whether - // they are signed or unsigned types. - bool ExtendIntegerRetVal = - RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; - - for (unsigned i = 0, e = VTs.size(); i != e; ++i) { - bool needTruncate = false; - EVT TheLoadType = VTs[i]; - EVT EltType = Ins[i].VT; - unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]); - if (ExtendIntegerRetVal) { - TheLoadType = MVT::i32; - EltType = MVT::i32; - needTruncate = true; - } else if (TheLoadType.getSizeInBits() < 16) { - if (VTs[i].isInteger()) - needTruncate = true; - EltType = MVT::i16; - } - - // Record index of the very first element of the vector. - if (VectorInfo[i] & PVF_FIRST) { - assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list."); - VecIdx = i; - } - - LoadVTs.push_back(EltType); - - if (VectorInfo[i] & PVF_LAST) { - unsigned NumElts = LoadVTs.size(); - LoadVTs.push_back(MVT::Other); - LoadVTs.push_back(MVT::Glue); - NVPTXISD::NodeType Op; - switch (NumElts) { - case 1: - Op = NVPTXISD::LoadParam; - break; - case 2: - Op = NVPTXISD::LoadParamV2; - break; - case 4: - Op = NVPTXISD::LoadParamV4; - break; - default: - llvm_unreachable("Invalid vector info."); - } - - SDValue LoadOperands[] = { - Chain, DAG.getConstant(1, dl, MVT::i32), - DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag}; - SDValue RetVal = DAG.getMemIntrinsicNode( - Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType, - MachinePointerInfo(), EltAlign); - - for (unsigned j = 0; j < NumElts; ++j) { - SDValue Ret = RetVal.getValue(j); - if (needTruncate) - Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret); - InVals.push_back(Ret); - } - Chain = RetVal.getValue(NumElts); - InFlag = RetVal.getValue(NumElts + 1); - - // Cleanup - VecIdx = -1; - LoadVTs.clear(); - } - } - } - - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(uniqueCallSite, dl, true), - DAG.getIntPtrConstant(uniqueCallSite + 1, dl, - true), - InFlag, dl); - uniqueCallSite++; - - // set isTailCall to false for now, until we figure out how to express - // tail call optimization in PTX - isTailCall = false; - return Chain; -} - -// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() -// (see LegalizeDAG.cpp). This is slow and uses local memory. -// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 -SDValue -NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); - SDLoc dl(Node); - SmallVector Ops; - unsigned NumOperands = Node->getNumOperands(); - for (unsigned i = 0; i < NumOperands; ++i) { - SDValue SubOp = Node->getOperand(i); - EVT VVT = SubOp.getNode()->getValueType(0); - EVT EltVT = VVT.getVectorElementType(); - unsigned NumSubElem = VVT.getVectorNumElements(); - for (unsigned j = 0; j < NumSubElem; ++j) { - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, - DAG.getIntPtrConstant(j, dl))); - } - } - return DAG.getBuildVector(Node->getValueType(0), dl, Ops); -} - -// We can init constant f16x2 with a single .b32 move. Normally it -// would get lowered as two constant loads and vector-packing move. -// mov.b16 %h1, 0x4000; -// mov.b16 %h2, 0x3C00; -// mov.b32 %hh2, {%h2, %h1}; -// Instead we want just a constant move: -// mov.b32 %hh2, 0x40003C00 -// -// This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0 -// generates good SASS in both cases. -SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - //return Op; - if (!(Op->getValueType(0) == MVT::v2f16 && - isa(Op->getOperand(0)) && - isa(Op->getOperand(1)))) - return Op; - - APInt E0 = - cast(Op->getOperand(0))->getValueAPF().bitcastToAPInt(); - APInt E1 = - cast(Op->getOperand(1))->getValueAPF().bitcastToAPInt(); - SDValue Const = - DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32); - return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const); -} - -SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { - SDValue Index = Op->getOperand(1); - // Constant index will be matched by tablegen. - if (isa(Index.getNode())) - return Op; - - // Extract individual elements and select one of them. - SDValue Vector = Op->getOperand(0); - EVT VectorVT = Vector.getValueType(); - assert(VectorVT == MVT::v2f16 && "Unexpected vector type."); - EVT EltVT = VectorVT.getVectorElementType(); - - SDLoc dl(Op.getNode()); - SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, - DAG.getIntPtrConstant(0, dl)); - SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, - DAG.getIntPtrConstant(1, dl)); - return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1, - ISD::CondCode::SETEQ); -} - -/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which -/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift -/// amount, or -/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift -/// amount. -SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - - if (VTBits == 32 && STI.getSmVersion() >= 35) { - // For 32bit and sm35, we can use the funnel shift 'shf' instruction. - // {dHi, dLo} = {aHi, aLo} >> Amt - // dHi = aHi >> Amt - // dLo = shf.r.clamp aLo, aHi, Amt - - SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, - ShAmt); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); - } - else { - // {dHi, dLo} = {aHi, aLo} >> Amt - // - if (Amt>=size) then - // dLo = aHi >> (Amt-size) - // dHi = aHi >> Amt (this is either all 0 or all 1) - // else - // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) - // dHi = aHi >> Amt - - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant(VTBits, dl, MVT::i32), - ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, - DAG.getConstant(VTBits, dl, MVT::i32)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - - SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, - DAG.getConstant(VTBits, dl, MVT::i32), - ISD::SETGE); - SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); - } -} - -/// LowerShiftLeftParts - Lower SHL_PARTS, which -/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift -/// amount, or -/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift -/// amount. -SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - assert(Op.getOpcode() == ISD::SHL_PARTS); - - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - - if (VTBits == 32 && STI.getSmVersion() >= 35) { - // For 32bit and sm35, we can use the funnel shift 'shf' instruction. - // {dHi, dLo} = {aHi, aLo} << Amt - // dHi = shf.l.clamp aLo, aHi, Amt - // dLo = aLo << Amt - - SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, - ShAmt); - SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); - } - else { - // {dHi, dLo} = {aHi, aLo} << Amt - // - if (Amt>=size) then - // dLo = aLo << Amt (all 0) - // dLo = aLo << (Amt-size) - // else - // dLo = aLo << Amt - // dHi = (aHi << Amt) | (aLo >> (size-Amt)) - - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, - DAG.getConstant(VTBits, dl, MVT::i32), - ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, - DAG.getConstant(VTBits, dl, MVT::i32)); - SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - - SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, - DAG.getConstant(VTBits, dl, MVT::i32), - ISD::SETGE); - SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, dl); - } -} - -SDValue -NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - case ISD::RETURNADDR: - return SDValue(); - case ISD::FRAMEADDR: - return SDValue(); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: - return Op; - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG); - case ISD::EXTRACT_SUBVECTOR: - return Op; - case ISD::EXTRACT_VECTOR_ELT: - return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::CONCAT_VECTORS: - return LowerCONCAT_VECTORS(Op, DAG); - case ISD::STORE: - return LowerSTORE(Op, DAG); - case ISD::LOAD: - return LowerLOAD(Op, DAG); - case ISD::SHL_PARTS: - return LowerShiftLeftParts(Op, DAG); - case ISD::SRA_PARTS: - case ISD::SRL_PARTS: - return LowerShiftRightParts(Op, DAG); - case ISD::SELECT: - return LowerSelect(Op, DAG); - default: - llvm_unreachable("Custom lowering not defined for operation"); - } -} - -SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { - SDValue Op0 = Op->getOperand(0); - SDValue Op1 = Op->getOperand(1); - SDValue Op2 = Op->getOperand(2); - SDLoc DL(Op.getNode()); - - assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); - - Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); - Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); - SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); - - return Trunc; -} - -SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { - if (Op.getValueType() == MVT::i1) - return LowerLOADi1(Op, DAG); - - // v2f16 is legal, so we can't rely on legalizer to handle unaligned - // loads and have to handle it here. - if (Op.getValueType() == MVT::v2f16) { - LoadSDNode *Load = cast(Op); - EVT MemVT = Load->getMemoryVT(); - if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - Load->getAddressSpace(), Load->getAlignment())) { - SDValue Ops[2]; - std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); - return DAG.getMergeValues(Ops, SDLoc(Op)); - } - } - - return SDValue(); -} - -// v = ld i1* addr -// => -// v1 = ld i8* addr (-> i16) -// v = trunc i16 to i1 -SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); - LoadSDNode *LD = cast(Node); - SDLoc dl(Node); - assert(LD->getExtensionType() == ISD::NON_EXTLOAD); - assert(Node->getValueType(0) == MVT::i1 && - "Custom lowering for i1 load only"); - SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), LD->getAlignment(), - LD->getMemOperand()->getFlags()); - SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); - // The legalizer (the caller) is expecting two values from the legalized - // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() - // in LegalizeDAG.cpp which also uses MergeValues. - SDValue Ops[] = { result, LD->getChain() }; - return DAG.getMergeValues(Ops, dl); -} - -SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { - StoreSDNode *Store = cast(Op); - EVT VT = Store->getMemoryVT(); - - if (VT == MVT::i1) - return LowerSTOREi1(Op, DAG); - - // v2f16 is legal, so we can't rely on legalizer to handle unaligned - // stores and have to handle it here. - if (VT == MVT::v2f16 && - !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - Store->getAddressSpace(), Store->getAlignment())) - return expandUnalignedStore(Store, DAG); - - if (VT.isVector()) - return LowerSTOREVector(Op, DAG); - - return SDValue(); -} - -SDValue -NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { - SDNode *N = Op.getNode(); - SDValue Val = N->getOperand(1); - SDLoc DL(N); - EVT ValVT = Val.getValueType(); - - if (ValVT.isVector()) { - // We only handle "native" vector sizes for now, e.g. <4 x double> is not - // legal. We can (and should) split that into 2 stores of <2 x double> here - // but I'm leaving that as a TODO for now. - if (!ValVT.isSimple()) - return SDValue(); - switch (ValVT.getSimpleVT().SimpleTy) { - default: - return SDValue(); - case MVT::v2i8: - case MVT::v2i16: - case MVT::v2i32: - case MVT::v2i64: - case MVT::v2f16: - case MVT::v2f32: - case MVT::v2f64: - case MVT::v4i8: - case MVT::v4i16: - case MVT::v4i32: - case MVT::v4f16: - case MVT::v4f32: - case MVT::v8f16: // <4 x f16x2> - // This is a "native" vector type - break; - } - - MemSDNode *MemSD = cast(N); - const DataLayout &TD = DAG.getDataLayout(); - - unsigned Align = MemSD->getAlignment(); - unsigned PrefAlign = - TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); - if (Align < PrefAlign) { - // This store is not sufficiently aligned, so bail out and let this vector - // store be scalarized. Note that we may still be able to emit smaller - // vector stores. For example, if we are storing a <4 x float> with an - // alignment of 8, this check will fail but the legalizer will try again - // with 2 x <2 x float>, which will succeed with an alignment of 8. - return SDValue(); - } - - unsigned Opcode = 0; - EVT EltVT = ValVT.getVectorElementType(); - unsigned NumElts = ValVT.getVectorNumElements(); - - // Since StoreV2 is a target node, we cannot rely on DAG type legalization. - // Therefore, we must ensure the type is legal. For i1 and i8, we set the - // stored type to i16 and propagate the "real" type as the memory type. - bool NeedExt = false; - if (EltVT.getSizeInBits() < 16) - NeedExt = true; - - bool StoreF16x2 = false; - switch (NumElts) { - default: - return SDValue(); - case 2: - Opcode = NVPTXISD::StoreV2; - break; - case 4: - Opcode = NVPTXISD::StoreV4; - break; - case 8: - // v8f16 is a special case. PTX doesn't have st.v8.f16 - // instruction. Instead, we split the vector into v2f16 chunks and - // store them with st.v4.b32. - assert(EltVT == MVT::f16 && "Wrong type for the vector."); - Opcode = NVPTXISD::StoreV4; - StoreF16x2 = true; - break; - } - - SmallVector Ops; - - // First is the chain - Ops.push_back(N->getOperand(0)); - - if (StoreF16x2) { - // Combine f16,f16 -> v2f16 - NumElts /= 2; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, - DAG.getIntPtrConstant(i * 2, DL)); - SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, - DAG.getIntPtrConstant(i * 2 + 1, DL)); - SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1); - Ops.push_back(V2); - } - } else { - // Then the split values - for (unsigned i = 0; i < NumElts; ++i) { - SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, - DAG.getIntPtrConstant(i, DL)); - if (NeedExt) - ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); - Ops.push_back(ExtVal); - } - } - - // Then any remaining arguments - Ops.append(N->op_begin() + 2, N->op_end()); - - SDValue NewSt = - DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops, - MemSD->getMemoryVT(), MemSD->getMemOperand()); - - // return DCI.CombineTo(N, NewSt, true); - return NewSt; - } - - return SDValue(); -} - -// st i1 v, addr -// => -// v1 = zxt v to i16 -// st.u8 i16, addr -SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { - SDNode *Node = Op.getNode(); - SDLoc dl(Node); - StoreSDNode *ST = cast(Node); - SDValue Tmp1 = ST->getChain(); - SDValue Tmp2 = ST->getBasePtr(); - SDValue Tmp3 = ST->getValue(); - assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); - SDValue Result = - DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, - ST->getAlignment(), ST->getMemOperand()->getFlags()); - return Result; -} - -SDValue -NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { - std::string ParamSym; - raw_string_ostream ParamStr(ParamSym); - - ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; - ParamStr.flush(); - - std::string *SavedStr = - nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); - return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); -} - -// Check to see if the kernel argument is image*_t or sampler_t - -static bool isImageOrSamplerVal(const Value *arg, const Module *context) { - static const char *const specialTypes[] = { "struct._image2d_t", - "struct._image3d_t", - "struct._sampler_t" }; - - Type *Ty = arg->getType(); - auto *PTy = dyn_cast(Ty); - - if (!PTy) - return false; - - if (!context) - return false; - - auto *STy = dyn_cast(PTy->getElementType()); - if (!STy || STy->isLiteral()) - return false; - - return std::find(std::begin(specialTypes), std::end(specialTypes), - STy->getName()) != std::end(specialTypes); -} - -SDValue NVPTXTargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, const SDLoc &dl, - SelectionDAG &DAG, SmallVectorImpl &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - const DataLayout &DL = DAG.getDataLayout(); - auto PtrVT = getPointerTy(DAG.getDataLayout()); - - const Function *F = MF.getFunction(); - const AttributeList &PAL = F->getAttributes(); - const TargetLowering *TLI = STI.getTargetLowering(); - - SDValue Root = DAG.getRoot(); - std::vector OutChains; - - bool isABI = (STI.getSmVersion() >= 20); - assert(isABI && "Non-ABI compilation is not supported"); - if (!isABI) - return Chain; - - std::vector argTypes; - std::vector theArgs; - for (const Argument &I : F->args()) { - theArgs.push_back(&I); - argTypes.push_back(I.getType()); - } - // argTypes.size() (or theArgs.size()) and Ins.size() need not match. - // Ins.size() will be larger - // * if there is an aggregate argument with multiple fields (each field - // showing up separately in Ins) - // * if there is a vector argument with more than typical vector-length - // elements (generally if more than 4) where each vector element is - // individually present in Ins. - // So a different index should be used for indexing into Ins. - // See similar issue in LowerCall. - unsigned InsIdx = 0; - - int idx = 0; - for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { - Type *Ty = argTypes[i]; - - // If the kernel argument is image*_t or sampler_t, convert it to - // a i32 constant holding the parameter position. This can later - // matched in the AsmPrinter to output the correct mangled name. - if (isImageOrSamplerVal( - theArgs[i], - (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() - : nullptr))) { - assert(isKernelFunction(*F) && - "Only kernels can have image/sampler params"); - InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); - continue; - } - - if (theArgs[i]->use_empty()) { - // argument is dead - if (Ty->isAggregateType()) { - SmallVector vtparts; - - ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); - assert(vtparts.size() > 0 && "empty aggregate type not expected"); - for (unsigned parti = 0, parte = vtparts.size(); parti != parte; - ++parti) { - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); - ++InsIdx; - } - if (vtparts.size() > 0) - --InsIdx; - continue; - } - if (Ty->isVectorTy()) { - EVT ObjectVT = getValueType(DL, Ty); - unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); - for (unsigned parti = 0; parti < NumRegs; ++parti) { - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); - ++InsIdx; - } - if (NumRegs > 0) - --InsIdx; - continue; - } - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); - continue; - } - - // In the following cases, assign a node order of "idx+1" - // to newly created nodes. The SDNodes for params have to - // appear in the same order as their order of appearance - // in the original function. "idx+1" holds that order. - if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) { - bool aggregateIsPacked = false; - if (StructType *STy = dyn_cast(Ty)) - aggregateIsPacked = STy->isPacked(); - - SmallVector VTs; - SmallVector Offsets; - ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); - assert(VTs.size() > 0 && "Unexpected empty type."); - auto VectorInfo = - VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty)); - - SDValue Arg = getParamSymbol(DAG, idx, PtrVT); - int VecIdx = -1; // Index of the first element of the current vector. - for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) { - if (VectorInfo[parti] & PVF_FIRST) { - assert(VecIdx == -1 && "Orphaned vector."); - VecIdx = parti; - } - - // That's the last element of this store op. - if (VectorInfo[parti] & PVF_LAST) { - unsigned NumElts = parti - VecIdx + 1; - EVT EltVT = VTs[parti]; - // i1 is loaded/stored as i8. - EVT LoadVT = EltVT; - if (EltVT == MVT::i1) - LoadVT = MVT::i8; - else if (EltVT == MVT::v2f16) - // getLoad needs a vector type, but it can't handle - // vectors which contain v2f16 elements. So we must load - // using i32 here and then bitcast back. - LoadVT = MVT::i32; - - EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts); - SDValue VecAddr = - DAG.getNode(ISD::ADD, dl, PtrVT, Arg, - DAG.getConstant(Offsets[VecIdx], dl, PtrVT)); - Value *srcValue = Constant::getNullValue(PointerType::get( - EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM)); - SDValue P = - DAG.getLoad(VecVT, dl, Root, VecAddr, - MachinePointerInfo(srcValue), aggregateIsPacked, - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant); - if (P.getNode()) - P.getNode()->setIROrder(idx + 1); - for (unsigned j = 0; j < NumElts; ++j) { - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P, - DAG.getIntPtrConstant(j, dl)); - // We've loaded i1 as an i8 and now must truncate it back to i1 - if (EltVT == MVT::i1) - Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt); - // v2f16 was loaded as an i32. Now we must bitcast it back. - else if (EltVT == MVT::v2f16) - Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); - // Extend the element if necesary (e.g. an i8 is loaded - // into an i16 register) - if (Ins[InsIdx].VT.isInteger() && - Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) { - unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND; - Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt); - } - InVals.push_back(Elt); - } - - // Reset vector tracking state. - VecIdx = -1; - } - ++InsIdx; - } - if (VTs.size() > 0) - --InsIdx; - continue; - } - - // Param has ByVal attribute - // Return MoveParam(param symbol). - // Ideally, the param symbol can be returned directly, - // but when SDNode builder decides to use it in a CopyToReg(), - // machine instruction fails because TargetExternalSymbol - // (not lowered) is target dependent, and CopyToReg assumes - // the source is lowered. - EVT ObjectVT = getValueType(DL, Ty); - assert(ObjectVT == Ins[InsIdx].VT && - "Ins type did not match function type"); - SDValue Arg = getParamSymbol(DAG, idx, PtrVT); - SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); - if (p.getNode()) - p.getNode()->setIROrder(idx + 1); - InVals.push_back(p); - } - - // Clang will check explicit VarArg and issue error if any. However, Clang - // will let code with - // implicit var arg like f() pass. See bug 617733. - // We treat this case as if the arg list is empty. - // if (F.isVarArg()) { - // assert(0 && "VarArg not supported yet!"); - //} - - if (!OutChains.empty()) - DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); - - return Chain; -} - -SDValue -NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SDLoc &dl, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - Type *RetTy = MF.getFunction()->getReturnType(); - - bool isABI = (STI.getSmVersion() >= 20); - assert(isABI && "Non-ABI compilation is not supported"); - if (!isABI) - return Chain; - - const DataLayout DL = DAG.getDataLayout(); - SmallVector VTs; - SmallVector Offsets; - ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); - assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); - - auto VectorInfo = VectorizePTXValueVTs( - VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1); - - // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than - // 32-bits are sign extended or zero extended, depending on whether - // they are signed or unsigned types. - bool ExtendIntegerRetVal = - RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; - - SmallVector StoreOperands; - for (unsigned i = 0, e = VTs.size(); i != e; ++i) { - // New load/store. Record chain and offset operands. - if (VectorInfo[i] & PVF_FIRST) { - assert(StoreOperands.empty() && "Orphaned operand list."); - StoreOperands.push_back(Chain); - StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32)); - } - - SDValue RetVal = OutVals[i]; - if (ExtendIntegerRetVal) { - RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND, - dl, MVT::i32, RetVal); - } else if (RetVal.getValueSizeInBits() < 16) { - // Use 16-bit registers for small load-stores as it's the - // smallest general purpose register size supported by NVPTX. - RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); - } - - // Record the value to return. - StoreOperands.push_back(RetVal); - - // That's the last element of this store op. - if (VectorInfo[i] & PVF_LAST) { - NVPTXISD::NodeType Op; - unsigned NumElts = StoreOperands.size() - 2; - switch (NumElts) { - case 1: - Op = NVPTXISD::StoreRetval; - break; - case 2: - Op = NVPTXISD::StoreRetvalV2; - break; - case 4: - Op = NVPTXISD::StoreRetvalV4; - break; - default: - llvm_unreachable("Invalid vector info."); - } - - // Adjust type of load/store op if we've extended the scalar - // return value. - EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; - Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other), - StoreOperands, TheStoreType, - MachinePointerInfo(), 1); - // Cleanup vector state. - StoreOperands.clear(); - } - } - - return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); -} - -void NVPTXTargetLowering::LowerAsmOperandForConstraint( - SDValue Op, std::string &Constraint, std::vector &Ops, - SelectionDAG &DAG) const { - if (Constraint.length() > 1) - return; - else - TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); -} - -static unsigned getOpcForTextureInstr(unsigned Intrinsic) { - switch (Intrinsic) { - default: - return 0; - - case Intrinsic::nvvm_tex_1d_v4f32_s32: - return NVPTXISD::Tex1DFloatS32; - case Intrinsic::nvvm_tex_1d_v4f32_f32: - return NVPTXISD::Tex1DFloatFloat; - case Intrinsic::nvvm_tex_1d_level_v4f32_f32: - return NVPTXISD::Tex1DFloatFloatLevel; - case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: - return NVPTXISD::Tex1DFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_v4s32_s32: - return NVPTXISD::Tex1DS32S32; - case Intrinsic::nvvm_tex_1d_v4s32_f32: - return NVPTXISD::Tex1DS32Float; - case Intrinsic::nvvm_tex_1d_level_v4s32_f32: - return NVPTXISD::Tex1DS32FloatLevel; - case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: - return NVPTXISD::Tex1DS32FloatGrad; - case Intrinsic::nvvm_tex_1d_v4u32_s32: - return NVPTXISD::Tex1DU32S32; - case Intrinsic::nvvm_tex_1d_v4u32_f32: - return NVPTXISD::Tex1DU32Float; - case Intrinsic::nvvm_tex_1d_level_v4u32_f32: - return NVPTXISD::Tex1DU32FloatLevel; - case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: - return NVPTXISD::Tex1DU32FloatGrad; - - case Intrinsic::nvvm_tex_1d_array_v4f32_s32: - return NVPTXISD::Tex1DArrayFloatS32; - case Intrinsic::nvvm_tex_1d_array_v4f32_f32: - return NVPTXISD::Tex1DArrayFloatFloat; - case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: - return NVPTXISD::Tex1DArrayFloatFloatLevel; - case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: - return NVPTXISD::Tex1DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_1d_array_v4s32_s32: - return NVPTXISD::Tex1DArrayS32S32; - case Intrinsic::nvvm_tex_1d_array_v4s32_f32: - return NVPTXISD::Tex1DArrayS32Float; - case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: - return NVPTXISD::Tex1DArrayS32FloatLevel; - case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: - return NVPTXISD::Tex1DArrayS32FloatGrad; - case Intrinsic::nvvm_tex_1d_array_v4u32_s32: - return NVPTXISD::Tex1DArrayU32S32; - case Intrinsic::nvvm_tex_1d_array_v4u32_f32: - return NVPTXISD::Tex1DArrayU32Float; - case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: - return NVPTXISD::Tex1DArrayU32FloatLevel; - case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: - return NVPTXISD::Tex1DArrayU32FloatGrad; - - case Intrinsic::nvvm_tex_2d_v4f32_s32: - return NVPTXISD::Tex2DFloatS32; - case Intrinsic::nvvm_tex_2d_v4f32_f32: - return NVPTXISD::Tex2DFloatFloat; - case Intrinsic::nvvm_tex_2d_level_v4f32_f32: - return NVPTXISD::Tex2DFloatFloatLevel; - case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: - return NVPTXISD::Tex2DFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_v4s32_s32: - return NVPTXISD::Tex2DS32S32; - case Intrinsic::nvvm_tex_2d_v4s32_f32: - return NVPTXISD::Tex2DS32Float; - case Intrinsic::nvvm_tex_2d_level_v4s32_f32: - return NVPTXISD::Tex2DS32FloatLevel; - case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: - return NVPTXISD::Tex2DS32FloatGrad; - case Intrinsic::nvvm_tex_2d_v4u32_s32: - return NVPTXISD::Tex2DU32S32; - case Intrinsic::nvvm_tex_2d_v4u32_f32: - return NVPTXISD::Tex2DU32Float; - case Intrinsic::nvvm_tex_2d_level_v4u32_f32: - return NVPTXISD::Tex2DU32FloatLevel; - case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: - return NVPTXISD::Tex2DU32FloatGrad; - - case Intrinsic::nvvm_tex_2d_array_v4f32_s32: - return NVPTXISD::Tex2DArrayFloatS32; - case Intrinsic::nvvm_tex_2d_array_v4f32_f32: - return NVPTXISD::Tex2DArrayFloatFloat; - case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: - return NVPTXISD::Tex2DArrayFloatFloatLevel; - case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: - return NVPTXISD::Tex2DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_2d_array_v4s32_s32: - return NVPTXISD::Tex2DArrayS32S32; - case Intrinsic::nvvm_tex_2d_array_v4s32_f32: - return NVPTXISD::Tex2DArrayS32Float; - case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: - return NVPTXISD::Tex2DArrayS32FloatLevel; - case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: - return NVPTXISD::Tex2DArrayS32FloatGrad; - case Intrinsic::nvvm_tex_2d_array_v4u32_s32: - return NVPTXISD::Tex2DArrayU32S32; - case Intrinsic::nvvm_tex_2d_array_v4u32_f32: - return NVPTXISD::Tex2DArrayU32Float; - case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: - return NVPTXISD::Tex2DArrayU32FloatLevel; - case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: - return NVPTXISD::Tex2DArrayU32FloatGrad; - - case Intrinsic::nvvm_tex_3d_v4f32_s32: - return NVPTXISD::Tex3DFloatS32; - case Intrinsic::nvvm_tex_3d_v4f32_f32: - return NVPTXISD::Tex3DFloatFloat; - case Intrinsic::nvvm_tex_3d_level_v4f32_f32: - return NVPTXISD::Tex3DFloatFloatLevel; - case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: - return NVPTXISD::Tex3DFloatFloatGrad; - case Intrinsic::nvvm_tex_3d_v4s32_s32: - return NVPTXISD::Tex3DS32S32; - case Intrinsic::nvvm_tex_3d_v4s32_f32: - return NVPTXISD::Tex3DS32Float; - case Intrinsic::nvvm_tex_3d_level_v4s32_f32: - return NVPTXISD::Tex3DS32FloatLevel; - case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: - return NVPTXISD::Tex3DS32FloatGrad; - case Intrinsic::nvvm_tex_3d_v4u32_s32: - return NVPTXISD::Tex3DU32S32; - case Intrinsic::nvvm_tex_3d_v4u32_f32: - return NVPTXISD::Tex3DU32Float; - case Intrinsic::nvvm_tex_3d_level_v4u32_f32: - return NVPTXISD::Tex3DU32FloatLevel; - case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: - return NVPTXISD::Tex3DU32FloatGrad; - - case Intrinsic::nvvm_tex_cube_v4f32_f32: - return NVPTXISD::TexCubeFloatFloat; - case Intrinsic::nvvm_tex_cube_level_v4f32_f32: - return NVPTXISD::TexCubeFloatFloatLevel; - case Intrinsic::nvvm_tex_cube_v4s32_f32: - return NVPTXISD::TexCubeS32Float; - case Intrinsic::nvvm_tex_cube_level_v4s32_f32: - return NVPTXISD::TexCubeS32FloatLevel; - case Intrinsic::nvvm_tex_cube_v4u32_f32: - return NVPTXISD::TexCubeU32Float; - case Intrinsic::nvvm_tex_cube_level_v4u32_f32: - return NVPTXISD::TexCubeU32FloatLevel; - - case Intrinsic::nvvm_tex_cube_array_v4f32_f32: - return NVPTXISD::TexCubeArrayFloatFloat; - case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: - return NVPTXISD::TexCubeArrayFloatFloatLevel; - case Intrinsic::nvvm_tex_cube_array_v4s32_f32: - return NVPTXISD::TexCubeArrayS32Float; - case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: - return NVPTXISD::TexCubeArrayS32FloatLevel; - case Intrinsic::nvvm_tex_cube_array_v4u32_f32: - return NVPTXISD::TexCubeArrayU32Float; - case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: - return NVPTXISD::TexCubeArrayU32FloatLevel; - - case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: - return NVPTXISD::Tld4R2DFloatFloat; - case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: - return NVPTXISD::Tld4G2DFloatFloat; - case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: - return NVPTXISD::Tld4B2DFloatFloat; - case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: - return NVPTXISD::Tld4A2DFloatFloat; - case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: - return NVPTXISD::Tld4R2DS64Float; - case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: - return NVPTXISD::Tld4G2DS64Float; - case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: - return NVPTXISD::Tld4B2DS64Float; - case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: - return NVPTXISD::Tld4A2DS64Float; - case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: - return NVPTXISD::Tld4R2DU64Float; - case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: - return NVPTXISD::Tld4G2DU64Float; - case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: - return NVPTXISD::Tld4B2DU64Float; - case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: - return NVPTXISD::Tld4A2DU64Float; - - case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: - return NVPTXISD::TexUnified1DFloatS32; - case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: - return NVPTXISD::TexUnified1DFloatFloat; - case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: - return NVPTXISD::TexUnified1DFloatFloatLevel; - case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: - return NVPTXISD::TexUnified1DFloatFloatGrad; - case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: - return NVPTXISD::TexUnified1DS32S32; - case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: - return NVPTXISD::TexUnified1DS32Float; - case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: - return NVPTXISD::TexUnified1DS32FloatLevel; - case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: - return NVPTXISD::TexUnified1DS32FloatGrad; - case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: - return NVPTXISD::TexUnified1DU32S32; - case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: - return NVPTXISD::TexUnified1DU32Float; - case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: - return NVPTXISD::TexUnified1DU32FloatLevel; - case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: - return NVPTXISD::TexUnified1DU32FloatGrad; - - case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: - return NVPTXISD::TexUnified1DArrayFloatS32; - case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: - return NVPTXISD::TexUnified1DArrayFloatFloat; - case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: - return NVPTXISD::TexUnified1DArrayFloatFloatLevel; - case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: - return NVPTXISD::TexUnified1DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: - return NVPTXISD::TexUnified1DArrayS32S32; - case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: - return NVPTXISD::TexUnified1DArrayS32Float; - case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: - return NVPTXISD::TexUnified1DArrayS32FloatLevel; - case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: - return NVPTXISD::TexUnified1DArrayS32FloatGrad; - case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: - return NVPTXISD::TexUnified1DArrayU32S32; - case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: - return NVPTXISD::TexUnified1DArrayU32Float; - case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: - return NVPTXISD::TexUnified1DArrayU32FloatLevel; - case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: - return NVPTXISD::TexUnified1DArrayU32FloatGrad; - - case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: - return NVPTXISD::TexUnified2DFloatS32; - case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: - return NVPTXISD::TexUnified2DFloatFloat; - case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: - return NVPTXISD::TexUnified2DFloatFloatLevel; - case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: - return NVPTXISD::TexUnified2DFloatFloatGrad; - case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: - return NVPTXISD::TexUnified2DS32S32; - case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: - return NVPTXISD::TexUnified2DS32Float; - case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: - return NVPTXISD::TexUnified2DS32FloatLevel; - case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: - return NVPTXISD::TexUnified2DS32FloatGrad; - case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: - return NVPTXISD::TexUnified2DU32S32; - case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: - return NVPTXISD::TexUnified2DU32Float; - case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: - return NVPTXISD::TexUnified2DU32FloatLevel; - case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: - return NVPTXISD::TexUnified2DU32FloatGrad; - - case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: - return NVPTXISD::TexUnified2DArrayFloatS32; - case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: - return NVPTXISD::TexUnified2DArrayFloatFloat; - case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: - return NVPTXISD::TexUnified2DArrayFloatFloatLevel; - case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: - return NVPTXISD::TexUnified2DArrayFloatFloatGrad; - case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: - return NVPTXISD::TexUnified2DArrayS32S32; - case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: - return NVPTXISD::TexUnified2DArrayS32Float; - case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: - return NVPTXISD::TexUnified2DArrayS32FloatLevel; - case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: - return NVPTXISD::TexUnified2DArrayS32FloatGrad; - case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: - return NVPTXISD::TexUnified2DArrayU32S32; - case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: - return NVPTXISD::TexUnified2DArrayU32Float; - case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: - return NVPTXISD::TexUnified2DArrayU32FloatLevel; - case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: - return NVPTXISD::TexUnified2DArrayU32FloatGrad; - - case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: - return NVPTXISD::TexUnified3DFloatS32; - case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: - return NVPTXISD::TexUnified3DFloatFloat; - case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: - return NVPTXISD::TexUnified3DFloatFloatLevel; - case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: - return NVPTXISD::TexUnified3DFloatFloatGrad; - case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: - return NVPTXISD::TexUnified3DS32S32; - case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: - return NVPTXISD::TexUnified3DS32Float; - case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: - return NVPTXISD::TexUnified3DS32FloatLevel; - case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: - return NVPTXISD::TexUnified3DS32FloatGrad; - case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: - return NVPTXISD::TexUnified3DU32S32; - case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: - return NVPTXISD::TexUnified3DU32Float; - case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: - return NVPTXISD::TexUnified3DU32FloatLevel; - case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: - return NVPTXISD::TexUnified3DU32FloatGrad; - - case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: - return NVPTXISD::TexUnifiedCubeFloatFloat; - case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: - return NVPTXISD::TexUnifiedCubeFloatFloatLevel; - case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: - return NVPTXISD::TexUnifiedCubeS32Float; - case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: - return NVPTXISD::TexUnifiedCubeS32FloatLevel; - case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: - return NVPTXISD::TexUnifiedCubeU32Float; - case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: - return NVPTXISD::TexUnifiedCubeU32FloatLevel; - - case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: - return NVPTXISD::TexUnifiedCubeArrayFloatFloat; - case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: - return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; - case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: - return NVPTXISD::TexUnifiedCubeArrayS32Float; - case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: - return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; - case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: - return NVPTXISD::TexUnifiedCubeArrayU32Float; - case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: - return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; - - case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: - return NVPTXISD::Tld4UnifiedR2DFloatFloat; - case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: - return NVPTXISD::Tld4UnifiedG2DFloatFloat; - case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: - return NVPTXISD::Tld4UnifiedB2DFloatFloat; - case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: - return NVPTXISD::Tld4UnifiedA2DFloatFloat; - case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: - return NVPTXISD::Tld4UnifiedR2DS64Float; - case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: - return NVPTXISD::Tld4UnifiedG2DS64Float; - case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: - return NVPTXISD::Tld4UnifiedB2DS64Float; - case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: - return NVPTXISD::Tld4UnifiedA2DS64Float; - case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: - return NVPTXISD::Tld4UnifiedR2DU64Float; - case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: - return NVPTXISD::Tld4UnifiedG2DU64Float; - case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: - return NVPTXISD::Tld4UnifiedB2DU64Float; - case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: - return NVPTXISD::Tld4UnifiedA2DU64Float; - } -} - -static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { - switch (Intrinsic) { - default: - return 0; - case Intrinsic::nvvm_suld_1d_i8_clamp: - return NVPTXISD::Suld1DI8Clamp; - case Intrinsic::nvvm_suld_1d_i16_clamp: - return NVPTXISD::Suld1DI16Clamp; - case Intrinsic::nvvm_suld_1d_i32_clamp: - return NVPTXISD::Suld1DI32Clamp; - case Intrinsic::nvvm_suld_1d_i64_clamp: - return NVPTXISD::Suld1DI64Clamp; - case Intrinsic::nvvm_suld_1d_v2i8_clamp: - return NVPTXISD::Suld1DV2I8Clamp; - case Intrinsic::nvvm_suld_1d_v2i16_clamp: - return NVPTXISD::Suld1DV2I16Clamp; - case Intrinsic::nvvm_suld_1d_v2i32_clamp: - return NVPTXISD::Suld1DV2I32Clamp; - case Intrinsic::nvvm_suld_1d_v2i64_clamp: - return NVPTXISD::Suld1DV2I64Clamp; - case Intrinsic::nvvm_suld_1d_v4i8_clamp: - return NVPTXISD::Suld1DV4I8Clamp; - case Intrinsic::nvvm_suld_1d_v4i16_clamp: - return NVPTXISD::Suld1DV4I16Clamp; - case Intrinsic::nvvm_suld_1d_v4i32_clamp: - return NVPTXISD::Suld1DV4I32Clamp; - case Intrinsic::nvvm_suld_1d_array_i8_clamp: - return NVPTXISD::Suld1DArrayI8Clamp; - case Intrinsic::nvvm_suld_1d_array_i16_clamp: - return NVPTXISD::Suld1DArrayI16Clamp; - case Intrinsic::nvvm_suld_1d_array_i32_clamp: - return NVPTXISD::Suld1DArrayI32Clamp; - case Intrinsic::nvvm_suld_1d_array_i64_clamp: - return NVPTXISD::Suld1DArrayI64Clamp; - case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: - return NVPTXISD::Suld1DArrayV2I8Clamp; - case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: - return NVPTXISD::Suld1DArrayV2I16Clamp; - case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: - return NVPTXISD::Suld1DArrayV2I32Clamp; - case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: - return NVPTXISD::Suld1DArrayV2I64Clamp; - case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: - return NVPTXISD::Suld1DArrayV4I8Clamp; - case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: - return NVPTXISD::Suld1DArrayV4I16Clamp; - case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: - return NVPTXISD::Suld1DArrayV4I32Clamp; - case Intrinsic::nvvm_suld_2d_i8_clamp: - return NVPTXISD::Suld2DI8Clamp; - case Intrinsic::nvvm_suld_2d_i16_clamp: - return NVPTXISD::Suld2DI16Clamp; - case Intrinsic::nvvm_suld_2d_i32_clamp: - return NVPTXISD::Suld2DI32Clamp; - case Intrinsic::nvvm_suld_2d_i64_clamp: - return NVPTXISD::Suld2DI64Clamp; - case Intrinsic::nvvm_suld_2d_v2i8_clamp: - return NVPTXISD::Suld2DV2I8Clamp; - case Intrinsic::nvvm_suld_2d_v2i16_clamp: - return NVPTXISD::Suld2DV2I16Clamp; - case Intrinsic::nvvm_suld_2d_v2i32_clamp: - return NVPTXISD::Suld2DV2I32Clamp; - case Intrinsic::nvvm_suld_2d_v2i64_clamp: - return NVPTXISD::Suld2DV2I64Clamp; - case Intrinsic::nvvm_suld_2d_v4i8_clamp: - return NVPTXISD::Suld2DV4I8Clamp; - case Intrinsic::nvvm_suld_2d_v4i16_clamp: - return NVPTXISD::Suld2DV4I16Clamp; - case Intrinsic::nvvm_suld_2d_v4i32_clamp: - return NVPTXISD::Suld2DV4I32Clamp; - case Intrinsic::nvvm_suld_2d_array_i8_clamp: - return NVPTXISD::Suld2DArrayI8Clamp; - case Intrinsic::nvvm_suld_2d_array_i16_clamp: - return NVPTXISD::Suld2DArrayI16Clamp; - case Intrinsic::nvvm_suld_2d_array_i32_clamp: - return NVPTXISD::Suld2DArrayI32Clamp; - case Intrinsic::nvvm_suld_2d_array_i64_clamp: - return NVPTXISD::Suld2DArrayI64Clamp; - case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: - return NVPTXISD::Suld2DArrayV2I8Clamp; - case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: - return NVPTXISD::Suld2DArrayV2I16Clamp; - case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: - return NVPTXISD::Suld2DArrayV2I32Clamp; - case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: - return NVPTXISD::Suld2DArrayV2I64Clamp; - case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: - return NVPTXISD::Suld2DArrayV4I8Clamp; - case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: - return NVPTXISD::Suld2DArrayV4I16Clamp; - case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: - return NVPTXISD::Suld2DArrayV4I32Clamp; - case Intrinsic::nvvm_suld_3d_i8_clamp: - return NVPTXISD::Suld3DI8Clamp; - case Intrinsic::nvvm_suld_3d_i16_clamp: - return NVPTXISD::Suld3DI16Clamp; - case Intrinsic::nvvm_suld_3d_i32_clamp: - return NVPTXISD::Suld3DI32Clamp; - case Intrinsic::nvvm_suld_3d_i64_clamp: - return NVPTXISD::Suld3DI64Clamp; - case Intrinsic::nvvm_suld_3d_v2i8_clamp: - return NVPTXISD::Suld3DV2I8Clamp; - case Intrinsic::nvvm_suld_3d_v2i16_clamp: - return NVPTXISD::Suld3DV2I16Clamp; - case Intrinsic::nvvm_suld_3d_v2i32_clamp: - return NVPTXISD::Suld3DV2I32Clamp; - case Intrinsic::nvvm_suld_3d_v2i64_clamp: - return NVPTXISD::Suld3DV2I64Clamp; - case Intrinsic::nvvm_suld_3d_v4i8_clamp: - return NVPTXISD::Suld3DV4I8Clamp; - case Intrinsic::nvvm_suld_3d_v4i16_clamp: - return NVPTXISD::Suld3DV4I16Clamp; - case Intrinsic::nvvm_suld_3d_v4i32_clamp: - return NVPTXISD::Suld3DV4I32Clamp; - case Intrinsic::nvvm_suld_1d_i8_trap: - return NVPTXISD::Suld1DI8Trap; - case Intrinsic::nvvm_suld_1d_i16_trap: - return NVPTXISD::Suld1DI16Trap; - case Intrinsic::nvvm_suld_1d_i32_trap: - return NVPTXISD::Suld1DI32Trap; - case Intrinsic::nvvm_suld_1d_i64_trap: - return NVPTXISD::Suld1DI64Trap; - case Intrinsic::nvvm_suld_1d_v2i8_trap: - return NVPTXISD::Suld1DV2I8Trap; - case Intrinsic::nvvm_suld_1d_v2i16_trap: - return NVPTXISD::Suld1DV2I16Trap; - case Intrinsic::nvvm_suld_1d_v2i32_trap: - return NVPTXISD::Suld1DV2I32Trap; - case Intrinsic::nvvm_suld_1d_v2i64_trap: - return NVPTXISD::Suld1DV2I64Trap; - case Intrinsic::nvvm_suld_1d_v4i8_trap: - return NVPTXISD::Suld1DV4I8Trap; - case Intrinsic::nvvm_suld_1d_v4i16_trap: - return NVPTXISD::Suld1DV4I16Trap; - case Intrinsic::nvvm_suld_1d_v4i32_trap: - return NVPTXISD::Suld1DV4I32Trap; - case Intrinsic::nvvm_suld_1d_array_i8_trap: - return NVPTXISD::Suld1DArrayI8Trap; - case Intrinsic::nvvm_suld_1d_array_i16_trap: - return NVPTXISD::Suld1DArrayI16Trap; - case Intrinsic::nvvm_suld_1d_array_i32_trap: - return NVPTXISD::Suld1DArrayI32Trap; - case Intrinsic::nvvm_suld_1d_array_i64_trap: - return NVPTXISD::Suld1DArrayI64Trap; - case Intrinsic::nvvm_suld_1d_array_v2i8_trap: - return NVPTXISD::Suld1DArrayV2I8Trap; - case Intrinsic::nvvm_suld_1d_array_v2i16_trap: - return NVPTXISD::Suld1DArrayV2I16Trap; - case Intrinsic::nvvm_suld_1d_array_v2i32_trap: - return NVPTXISD::Suld1DArrayV2I32Trap; - case Intrinsic::nvvm_suld_1d_array_v2i64_trap: - return NVPTXISD::Suld1DArrayV2I64Trap; - case Intrinsic::nvvm_suld_1d_array_v4i8_trap: - return NVPTXISD::Suld1DArrayV4I8Trap; - case Intrinsic::nvvm_suld_1d_array_v4i16_trap: - return NVPTXISD::Suld1DArrayV4I16Trap; - case Intrinsic::nvvm_suld_1d_array_v4i32_trap: - return NVPTXISD::Suld1DArrayV4I32Trap; - case Intrinsic::nvvm_suld_2d_i8_trap: - return NVPTXISD::Suld2DI8Trap; - case Intrinsic::nvvm_suld_2d_i16_trap: - return NVPTXISD::Suld2DI16Trap; - case Intrinsic::nvvm_suld_2d_i32_trap: - return NVPTXISD::Suld2DI32Trap; - case Intrinsic::nvvm_suld_2d_i64_trap: - return NVPTXISD::Suld2DI64Trap; - case Intrinsic::nvvm_suld_2d_v2i8_trap: - return NVPTXISD::Suld2DV2I8Trap; - case Intrinsic::nvvm_suld_2d_v2i16_trap: - return NVPTXISD::Suld2DV2I16Trap; - case Intrinsic::nvvm_suld_2d_v2i32_trap: - return NVPTXISD::Suld2DV2I32Trap; - case Intrinsic::nvvm_suld_2d_v2i64_trap: - return NVPTXISD::Suld2DV2I64Trap; - case Intrinsic::nvvm_suld_2d_v4i8_trap: - return NVPTXISD::Suld2DV4I8Trap; - case Intrinsic::nvvm_suld_2d_v4i16_trap: - return NVPTXISD::Suld2DV4I16Trap; - case Intrinsic::nvvm_suld_2d_v4i32_trap: - return NVPTXISD::Suld2DV4I32Trap; - case Intrinsic::nvvm_suld_2d_array_i8_trap: - return NVPTXISD::Suld2DArrayI8Trap; - case Intrinsic::nvvm_suld_2d_array_i16_trap: - return NVPTXISD::Suld2DArrayI16Trap; - case Intrinsic::nvvm_suld_2d_array_i32_trap: - return NVPTXISD::Suld2DArrayI32Trap; - case Intrinsic::nvvm_suld_2d_array_i64_trap: - return NVPTXISD::Suld2DArrayI64Trap; - case Intrinsic::nvvm_suld_2d_array_v2i8_trap: - return NVPTXISD::Suld2DArrayV2I8Trap; - case Intrinsic::nvvm_suld_2d_array_v2i16_trap: - return NVPTXISD::Suld2DArrayV2I16Trap; - case Intrinsic::nvvm_suld_2d_array_v2i32_trap: - return NVPTXISD::Suld2DArrayV2I32Trap; - case Intrinsic::nvvm_suld_2d_array_v2i64_trap: - return NVPTXISD::Suld2DArrayV2I64Trap; - case Intrinsic::nvvm_suld_2d_array_v4i8_trap: - return NVPTXISD::Suld2DArrayV4I8Trap; - case Intrinsic::nvvm_suld_2d_array_v4i16_trap: - return NVPTXISD::Suld2DArrayV4I16Trap; - case Intrinsic::nvvm_suld_2d_array_v4i32_trap: - return NVPTXISD::Suld2DArrayV4I32Trap; - case Intrinsic::nvvm_suld_3d_i8_trap: - return NVPTXISD::Suld3DI8Trap; - case Intrinsic::nvvm_suld_3d_i16_trap: - return NVPTXISD::Suld3DI16Trap; - case Intrinsic::nvvm_suld_3d_i32_trap: - return NVPTXISD::Suld3DI32Trap; - case Intrinsic::nvvm_suld_3d_i64_trap: - return NVPTXISD::Suld3DI64Trap; - case Intrinsic::nvvm_suld_3d_v2i8_trap: - return NVPTXISD::Suld3DV2I8Trap; - case Intrinsic::nvvm_suld_3d_v2i16_trap: - return NVPTXISD::Suld3DV2I16Trap; - case Intrinsic::nvvm_suld_3d_v2i32_trap: - return NVPTXISD::Suld3DV2I32Trap; - case Intrinsic::nvvm_suld_3d_v2i64_trap: - return NVPTXISD::Suld3DV2I64Trap; - case Intrinsic::nvvm_suld_3d_v4i8_trap: - return NVPTXISD::Suld3DV4I8Trap; - case Intrinsic::nvvm_suld_3d_v4i16_trap: - return NVPTXISD::Suld3DV4I16Trap; - case Intrinsic::nvvm_suld_3d_v4i32_trap: - return NVPTXISD::Suld3DV4I32Trap; - case Intrinsic::nvvm_suld_1d_i8_zero: - return NVPTXISD::Suld1DI8Zero; - case Intrinsic::nvvm_suld_1d_i16_zero: - return NVPTXISD::Suld1DI16Zero; - case Intrinsic::nvvm_suld_1d_i32_zero: - return NVPTXISD::Suld1DI32Zero; - case Intrinsic::nvvm_suld_1d_i64_zero: - return NVPTXISD::Suld1DI64Zero; - case Intrinsic::nvvm_suld_1d_v2i8_zero: - return NVPTXISD::Suld1DV2I8Zero; - case Intrinsic::nvvm_suld_1d_v2i16_zero: - return NVPTXISD::Suld1DV2I16Zero; - case Intrinsic::nvvm_suld_1d_v2i32_zero: - return NVPTXISD::Suld1DV2I32Zero; - case Intrinsic::nvvm_suld_1d_v2i64_zero: - return NVPTXISD::Suld1DV2I64Zero; - case Intrinsic::nvvm_suld_1d_v4i8_zero: - return NVPTXISD::Suld1DV4I8Zero; - case Intrinsic::nvvm_suld_1d_v4i16_zero: - return NVPTXISD::Suld1DV4I16Zero; - case Intrinsic::nvvm_suld_1d_v4i32_zero: - return NVPTXISD::Suld1DV4I32Zero; - case Intrinsic::nvvm_suld_1d_array_i8_zero: - return NVPTXISD::Suld1DArrayI8Zero; - case Intrinsic::nvvm_suld_1d_array_i16_zero: - return NVPTXISD::Suld1DArrayI16Zero; - case Intrinsic::nvvm_suld_1d_array_i32_zero: - return NVPTXISD::Suld1DArrayI32Zero; - case Intrinsic::nvvm_suld_1d_array_i64_zero: - return NVPTXISD::Suld1DArrayI64Zero; - case Intrinsic::nvvm_suld_1d_array_v2i8_zero: - return NVPTXISD::Suld1DArrayV2I8Zero; - case Intrinsic::nvvm_suld_1d_array_v2i16_zero: - return NVPTXISD::Suld1DArrayV2I16Zero; - case Intrinsic::nvvm_suld_1d_array_v2i32_zero: - return NVPTXISD::Suld1DArrayV2I32Zero; - case Intrinsic::nvvm_suld_1d_array_v2i64_zero: - return NVPTXISD::Suld1DArrayV2I64Zero; - case Intrinsic::nvvm_suld_1d_array_v4i8_zero: - return NVPTXISD::Suld1DArrayV4I8Zero; - case Intrinsic::nvvm_suld_1d_array_v4i16_zero: - return NVPTXISD::Suld1DArrayV4I16Zero; - case Intrinsic::nvvm_suld_1d_array_v4i32_zero: - return NVPTXISD::Suld1DArrayV4I32Zero; - case Intrinsic::nvvm_suld_2d_i8_zero: - return NVPTXISD::Suld2DI8Zero; - case Intrinsic::nvvm_suld_2d_i16_zero: - return NVPTXISD::Suld2DI16Zero; - case Intrinsic::nvvm_suld_2d_i32_zero: - return NVPTXISD::Suld2DI32Zero; - case Intrinsic::nvvm_suld_2d_i64_zero: - return NVPTXISD::Suld2DI64Zero; - case Intrinsic::nvvm_suld_2d_v2i8_zero: - return NVPTXISD::Suld2DV2I8Zero; - case Intrinsic::nvvm_suld_2d_v2i16_zero: - return NVPTXISD::Suld2DV2I16Zero; - case Intrinsic::nvvm_suld_2d_v2i32_zero: - return NVPTXISD::Suld2DV2I32Zero; - case Intrinsic::nvvm_suld_2d_v2i64_zero: - return NVPTXISD::Suld2DV2I64Zero; - case Intrinsic::nvvm_suld_2d_v4i8_zero: - return NVPTXISD::Suld2DV4I8Zero; - case Intrinsic::nvvm_suld_2d_v4i16_zero: - return NVPTXISD::Suld2DV4I16Zero; - case Intrinsic::nvvm_suld_2d_v4i32_zero: - return NVPTXISD::Suld2DV4I32Zero; - case Intrinsic::nvvm_suld_2d_array_i8_zero: - return NVPTXISD::Suld2DArrayI8Zero; - case Intrinsic::nvvm_suld_2d_array_i16_zero: - return NVPTXISD::Suld2DArrayI16Zero; - case Intrinsic::nvvm_suld_2d_array_i32_zero: - return NVPTXISD::Suld2DArrayI32Zero; - case Intrinsic::nvvm_suld_2d_array_i64_zero: - return NVPTXISD::Suld2DArrayI64Zero; - case Intrinsic::nvvm_suld_2d_array_v2i8_zero: - return NVPTXISD::Suld2DArrayV2I8Zero; - case Intrinsic::nvvm_suld_2d_array_v2i16_zero: - return NVPTXISD::Suld2DArrayV2I16Zero; - case Intrinsic::nvvm_suld_2d_array_v2i32_zero: - return NVPTXISD::Suld2DArrayV2I32Zero; - case Intrinsic::nvvm_suld_2d_array_v2i64_zero: - return NVPTXISD::Suld2DArrayV2I64Zero; - case Intrinsic::nvvm_suld_2d_array_v4i8_zero: - return NVPTXISD::Suld2DArrayV4I8Zero; - case Intrinsic::nvvm_suld_2d_array_v4i16_zero: - return NVPTXISD::Suld2DArrayV4I16Zero; - case Intrinsic::nvvm_suld_2d_array_v4i32_zero: - return NVPTXISD::Suld2DArrayV4I32Zero; - case Intrinsic::nvvm_suld_3d_i8_zero: - return NVPTXISD::Suld3DI8Zero; - case Intrinsic::nvvm_suld_3d_i16_zero: - return NVPTXISD::Suld3DI16Zero; - case Intrinsic::nvvm_suld_3d_i32_zero: - return NVPTXISD::Suld3DI32Zero; - case Intrinsic::nvvm_suld_3d_i64_zero: - return NVPTXISD::Suld3DI64Zero; - case Intrinsic::nvvm_suld_3d_v2i8_zero: - return NVPTXISD::Suld3DV2I8Zero; - case Intrinsic::nvvm_suld_3d_v2i16_zero: - return NVPTXISD::Suld3DV2I16Zero; - case Intrinsic::nvvm_suld_3d_v2i32_zero: - return NVPTXISD::Suld3DV2I32Zero; - case Intrinsic::nvvm_suld_3d_v2i64_zero: - return NVPTXISD::Suld3DV2I64Zero; - case Intrinsic::nvvm_suld_3d_v4i8_zero: - return NVPTXISD::Suld3DV4I8Zero; - case Intrinsic::nvvm_suld_3d_v4i16_zero: - return NVPTXISD::Suld3DV4I16Zero; - case Intrinsic::nvvm_suld_3d_v4i32_zero: - return NVPTXISD::Suld3DV4I32Zero; - } -} - -// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as -// TgtMemIntrinsic -// because we need the information that is only available in the "Value" type -// of destination -// pointer. In particular, the address space information. -bool NVPTXTargetLowering::getTgtMemIntrinsic( - IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { - switch (Intrinsic) { - default: - return false; - - case Intrinsic::nvvm_atomic_load_add_f32: - case Intrinsic::nvvm_atomic_load_inc_32: - case Intrinsic::nvvm_atomic_load_dec_32: - - case Intrinsic::nvvm_atomic_add_gen_f_cta: - case Intrinsic::nvvm_atomic_add_gen_f_sys: - case Intrinsic::nvvm_atomic_add_gen_i_cta: - case Intrinsic::nvvm_atomic_add_gen_i_sys: - case Intrinsic::nvvm_atomic_and_gen_i_cta: - case Intrinsic::nvvm_atomic_and_gen_i_sys: - case Intrinsic::nvvm_atomic_cas_gen_i_cta: - case Intrinsic::nvvm_atomic_cas_gen_i_sys: - case Intrinsic::nvvm_atomic_dec_gen_i_cta: - case Intrinsic::nvvm_atomic_dec_gen_i_sys: - case Intrinsic::nvvm_atomic_inc_gen_i_cta: - case Intrinsic::nvvm_atomic_inc_gen_i_sys: - case Intrinsic::nvvm_atomic_max_gen_i_cta: - case Intrinsic::nvvm_atomic_max_gen_i_sys: - case Intrinsic::nvvm_atomic_min_gen_i_cta: - case Intrinsic::nvvm_atomic_min_gen_i_sys: - case Intrinsic::nvvm_atomic_or_gen_i_cta: - case Intrinsic::nvvm_atomic_or_gen_i_sys: - case Intrinsic::nvvm_atomic_exch_gen_i_cta: - case Intrinsic::nvvm_atomic_exch_gen_i_sys: - case Intrinsic::nvvm_atomic_xor_gen_i_cta: - case Intrinsic::nvvm_atomic_xor_gen_i_sys: { - auto &DL = I.getModule()->getDataLayout(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = getValueType(DL, I.getType()); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = true; - Info.align = 0; - return true; - } - - case Intrinsic::nvvm_ldu_global_i: - case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_p: { - auto &DL = I.getModule()->getDataLayout(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - if (Intrinsic == Intrinsic::nvvm_ldu_global_i) - Info.memVT = getValueType(DL, I.getType()); - else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) - Info.memVT = getPointerTy(DL); - else - Info.memVT = getValueType(DL, I.getType()); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = cast(I.getArgOperand(1))->getZExtValue(); - - return true; - } - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: { - auto &DL = I.getModule()->getDataLayout(); - - Info.opc = ISD::INTRINSIC_W_CHAIN; - if (Intrinsic == Intrinsic::nvvm_ldg_global_i) - Info.memVT = getValueType(DL, I.getType()); - else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) - Info.memVT = getPointerTy(DL); - else - Info.memVT = getValueType(DL, I.getType()); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = cast(I.getArgOperand(1))->getZExtValue(); - - return true; - } - - case Intrinsic::nvvm_tex_1d_v4f32_s32: - case Intrinsic::nvvm_tex_1d_v4f32_f32: - case Intrinsic::nvvm_tex_1d_level_v4f32_f32: - case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_1d_array_v4f32_s32: - case Intrinsic::nvvm_tex_1d_array_v4f32_f32: - case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: - case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_v4f32_s32: - case Intrinsic::nvvm_tex_2d_v4f32_f32: - case Intrinsic::nvvm_tex_2d_level_v4f32_f32: - case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_2d_array_v4f32_s32: - case Intrinsic::nvvm_tex_2d_array_v4f32_f32: - case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: - case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_3d_v4f32_s32: - case Intrinsic::nvvm_tex_3d_v4f32_f32: - case Intrinsic::nvvm_tex_3d_level_v4f32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_cube_v4f32_f32: - case Intrinsic::nvvm_tex_cube_level_v4f32_f32: - case Intrinsic::nvvm_tex_cube_array_v4f32_f32: - case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: - case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: - case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: - case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: - case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: - case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: - case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: - case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: - case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: - case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: - case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: - case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: - case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: - case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: - case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: - case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: - case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: - case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: - case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: - case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: - case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: - case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: - case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: - case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: - case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: - case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: - Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::v4f32; - Info.ptrVal = nullptr; - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = 16; - return true; - - case Intrinsic::nvvm_tex_1d_v4s32_s32: - case Intrinsic::nvvm_tex_1d_v4s32_f32: - case Intrinsic::nvvm_tex_1d_level_v4s32_f32: - case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: - case Intrinsic::nvvm_tex_1d_array_v4s32_s32: - case Intrinsic::nvvm_tex_1d_array_v4s32_f32: - case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: - case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: - case Intrinsic::nvvm_tex_2d_v4s32_s32: - case Intrinsic::nvvm_tex_2d_v4s32_f32: - case Intrinsic::nvvm_tex_2d_level_v4s32_f32: - case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: - case Intrinsic::nvvm_tex_2d_array_v4s32_s32: - case Intrinsic::nvvm_tex_2d_array_v4s32_f32: - case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: - case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: - case Intrinsic::nvvm_tex_3d_v4s32_s32: - case Intrinsic::nvvm_tex_3d_v4s32_f32: - case Intrinsic::nvvm_tex_3d_level_v4s32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: - case Intrinsic::nvvm_tex_cube_v4s32_f32: - case Intrinsic::nvvm_tex_cube_level_v4s32_f32: - case Intrinsic::nvvm_tex_cube_array_v4s32_f32: - case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: - case Intrinsic::nvvm_tex_cube_v4u32_f32: - case Intrinsic::nvvm_tex_cube_level_v4u32_f32: - case Intrinsic::nvvm_tex_cube_array_v4u32_f32: - case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: - case Intrinsic::nvvm_tex_1d_v4u32_s32: - case Intrinsic::nvvm_tex_1d_v4u32_f32: - case Intrinsic::nvvm_tex_1d_level_v4u32_f32: - case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: - case Intrinsic::nvvm_tex_1d_array_v4u32_s32: - case Intrinsic::nvvm_tex_1d_array_v4u32_f32: - case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: - case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: - case Intrinsic::nvvm_tex_2d_v4u32_s32: - case Intrinsic::nvvm_tex_2d_v4u32_f32: - case Intrinsic::nvvm_tex_2d_level_v4u32_f32: - case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: - case Intrinsic::nvvm_tex_2d_array_v4u32_s32: - case Intrinsic::nvvm_tex_2d_array_v4u32_f32: - case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: - case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: - case Intrinsic::nvvm_tex_3d_v4u32_s32: - case Intrinsic::nvvm_tex_3d_v4u32_f32: - case Intrinsic::nvvm_tex_3d_level_v4u32_f32: - case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: - case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: - case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: - case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: - case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: - case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: - case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: - case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: - case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: - case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: - case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: - case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: - case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: - case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: - case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: - case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: - case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: - case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: - case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: - case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: - case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: - case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: - case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: - case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: - case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: - case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: - case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: - case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: - case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: - case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: - case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: - case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: - case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: - case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: - case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: - case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: - case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: - case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: - case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: - case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: - case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: - case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: - case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: - case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: - case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: - case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: - case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: - case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: - Info.opc = getOpcForTextureInstr(Intrinsic); - Info.memVT = MVT::v4i32; - Info.ptrVal = nullptr; - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = 16; - return true; - - case Intrinsic::nvvm_suld_1d_i8_clamp: - case Intrinsic::nvvm_suld_1d_v2i8_clamp: - case Intrinsic::nvvm_suld_1d_v4i8_clamp: - case Intrinsic::nvvm_suld_1d_array_i8_clamp: - case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: - case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: - case Intrinsic::nvvm_suld_2d_i8_clamp: - case Intrinsic::nvvm_suld_2d_v2i8_clamp: - case Intrinsic::nvvm_suld_2d_v4i8_clamp: - case Intrinsic::nvvm_suld_2d_array_i8_clamp: - case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: - case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: - case Intrinsic::nvvm_suld_3d_i8_clamp: - case Intrinsic::nvvm_suld_3d_v2i8_clamp: - case Intrinsic::nvvm_suld_3d_v4i8_clamp: - case Intrinsic::nvvm_suld_1d_i8_trap: - case Intrinsic::nvvm_suld_1d_v2i8_trap: - case Intrinsic::nvvm_suld_1d_v4i8_trap: - case Intrinsic::nvvm_suld_1d_array_i8_trap: - case Intrinsic::nvvm_suld_1d_array_v2i8_trap: - case Intrinsic::nvvm_suld_1d_array_v4i8_trap: - case Intrinsic::nvvm_suld_2d_i8_trap: - case Intrinsic::nvvm_suld_2d_v2i8_trap: - case Intrinsic::nvvm_suld_2d_v4i8_trap: - case Intrinsic::nvvm_suld_2d_array_i8_trap: - case Intrinsic::nvvm_suld_2d_array_v2i8_trap: - case Intrinsic::nvvm_suld_2d_array_v4i8_trap: - case Intrinsic::nvvm_suld_3d_i8_trap: - case Intrinsic::nvvm_suld_3d_v2i8_trap: - case Intrinsic::nvvm_suld_3d_v4i8_trap: - case Intrinsic::nvvm_suld_1d_i8_zero: - case Intrinsic::nvvm_suld_1d_v2i8_zero: - case Intrinsic::nvvm_suld_1d_v4i8_zero: - case Intrinsic::nvvm_suld_1d_array_i8_zero: - case Intrinsic::nvvm_suld_1d_array_v2i8_zero: - case Intrinsic::nvvm_suld_1d_array_v4i8_zero: - case Intrinsic::nvvm_suld_2d_i8_zero: - case Intrinsic::nvvm_suld_2d_v2i8_zero: - case Intrinsic::nvvm_suld_2d_v4i8_zero: - case Intrinsic::nvvm_suld_2d_array_i8_zero: - case Intrinsic::nvvm_suld_2d_array_v2i8_zero: - case Intrinsic::nvvm_suld_2d_array_v4i8_zero: - case Intrinsic::nvvm_suld_3d_i8_zero: - case Intrinsic::nvvm_suld_3d_v2i8_zero: - case Intrinsic::nvvm_suld_3d_v4i8_zero: - Info.opc = getOpcForSurfaceInstr(Intrinsic); - Info.memVT = MVT::i8; - Info.ptrVal = nullptr; - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = 16; - return true; - - case Intrinsic::nvvm_suld_1d_i16_clamp: - case Intrinsic::nvvm_suld_1d_v2i16_clamp: - case Intrinsic::nvvm_suld_1d_v4i16_clamp: - case Intrinsic::nvvm_suld_1d_array_i16_clamp: - case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: - case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: - case Intrinsic::nvvm_suld_2d_i16_clamp: - case Intrinsic::nvvm_suld_2d_v2i16_clamp: - case Intrinsic::nvvm_suld_2d_v4i16_clamp: - case Intrinsic::nvvm_suld_2d_array_i16_clamp: - case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: - case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: - case Intrinsic::nvvm_suld_3d_i16_clamp: - case Intrinsic::nvvm_suld_3d_v2i16_clamp: - case Intrinsic::nvvm_suld_3d_v4i16_clamp: - case Intrinsic::nvvm_suld_1d_i16_trap: - case Intrinsic::nvvm_suld_1d_v2i16_trap: - case Intrinsic::nvvm_suld_1d_v4i16_trap: - case Intrinsic::nvvm_suld_1d_array_i16_trap: - case Intrinsic::nvvm_suld_1d_array_v2i16_trap: - case Intrinsic::nvvm_suld_1d_array_v4i16_trap: - case Intrinsic::nvvm_suld_2d_i16_trap: - case Intrinsic::nvvm_suld_2d_v2i16_trap: - case Intrinsic::nvvm_suld_2d_v4i16_trap: - case Intrinsic::nvvm_suld_2d_array_i16_trap: - case Intrinsic::nvvm_suld_2d_array_v2i16_trap: - case Intrinsic::nvvm_suld_2d_array_v4i16_trap: - case Intrinsic::nvvm_suld_3d_i16_trap: - case Intrinsic::nvvm_suld_3d_v2i16_trap: - case Intrinsic::nvvm_suld_3d_v4i16_trap: - case Intrinsic::nvvm_suld_1d_i16_zero: - case Intrinsic::nvvm_suld_1d_v2i16_zero: - case Intrinsic::nvvm_suld_1d_v4i16_zero: - case Intrinsic::nvvm_suld_1d_array_i16_zero: - case Intrinsic::nvvm_suld_1d_array_v2i16_zero: - case Intrinsic::nvvm_suld_1d_array_v4i16_zero: - case Intrinsic::nvvm_suld_2d_i16_zero: - case Intrinsic::nvvm_suld_2d_v2i16_zero: - case Intrinsic::nvvm_suld_2d_v4i16_zero: - case Intrinsic::nvvm_suld_2d_array_i16_zero: - case Intrinsic::nvvm_suld_2d_array_v2i16_zero: - case Intrinsic::nvvm_suld_2d_array_v4i16_zero: - case Intrinsic::nvvm_suld_3d_i16_zero: - case Intrinsic::nvvm_suld_3d_v2i16_zero: - case Intrinsic::nvvm_suld_3d_v4i16_zero: - Info.opc = getOpcForSurfaceInstr(Intrinsic); - Info.memVT = MVT::i16; - Info.ptrVal = nullptr; - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = 16; - return true; - - case Intrinsic::nvvm_suld_1d_i32_clamp: - case Intrinsic::nvvm_suld_1d_v2i32_clamp: - case Intrinsic::nvvm_suld_1d_v4i32_clamp: - case Intrinsic::nvvm_suld_1d_array_i32_clamp: - case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: - case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: - case Intrinsic::nvvm_suld_2d_i32_clamp: - case Intrinsic::nvvm_suld_2d_v2i32_clamp: - case Intrinsic::nvvm_suld_2d_v4i32_clamp: - case Intrinsic::nvvm_suld_2d_array_i32_clamp: - case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: - case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: - case Intrinsic::nvvm_suld_3d_i32_clamp: - case Intrinsic::nvvm_suld_3d_v2i32_clamp: - case Intrinsic::nvvm_suld_3d_v4i32_clamp: - case Intrinsic::nvvm_suld_1d_i32_trap: - case Intrinsic::nvvm_suld_1d_v2i32_trap: - case Intrinsic::nvvm_suld_1d_v4i32_trap: - case Intrinsic::nvvm_suld_1d_array_i32_trap: - case Intrinsic::nvvm_suld_1d_array_v2i32_trap: - case Intrinsic::nvvm_suld_1d_array_v4i32_trap: - case Intrinsic::nvvm_suld_2d_i32_trap: - case Intrinsic::nvvm_suld_2d_v2i32_trap: - case Intrinsic::nvvm_suld_2d_v4i32_trap: - case Intrinsic::nvvm_suld_2d_array_i32_trap: - case Intrinsic::nvvm_suld_2d_array_v2i32_trap: - case Intrinsic::nvvm_suld_2d_array_v4i32_trap: - case Intrinsic::nvvm_suld_3d_i32_trap: - case Intrinsic::nvvm_suld_3d_v2i32_trap: - case Intrinsic::nvvm_suld_3d_v4i32_trap: - case Intrinsic::nvvm_suld_1d_i32_zero: - case Intrinsic::nvvm_suld_1d_v2i32_zero: - case Intrinsic::nvvm_suld_1d_v4i32_zero: - case Intrinsic::nvvm_suld_1d_array_i32_zero: - case Intrinsic::nvvm_suld_1d_array_v2i32_zero: - case Intrinsic::nvvm_suld_1d_array_v4i32_zero: - case Intrinsic::nvvm_suld_2d_i32_zero: - case Intrinsic::nvvm_suld_2d_v2i32_zero: - case Intrinsic::nvvm_suld_2d_v4i32_zero: - case Intrinsic::nvvm_suld_2d_array_i32_zero: - case Intrinsic::nvvm_suld_2d_array_v2i32_zero: - case Intrinsic::nvvm_suld_2d_array_v4i32_zero: - case Intrinsic::nvvm_suld_3d_i32_zero: - case Intrinsic::nvvm_suld_3d_v2i32_zero: - case Intrinsic::nvvm_suld_3d_v4i32_zero: - Info.opc = getOpcForSurfaceInstr(Intrinsic); - Info.memVT = MVT::i32; - Info.ptrVal = nullptr; - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = 16; - return true; - - case Intrinsic::nvvm_suld_1d_i64_clamp: - case Intrinsic::nvvm_suld_1d_v2i64_clamp: - case Intrinsic::nvvm_suld_1d_array_i64_clamp: - case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: - case Intrinsic::nvvm_suld_2d_i64_clamp: - case Intrinsic::nvvm_suld_2d_v2i64_clamp: - case Intrinsic::nvvm_suld_2d_array_i64_clamp: - case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: - case Intrinsic::nvvm_suld_3d_i64_clamp: - case Intrinsic::nvvm_suld_3d_v2i64_clamp: - case Intrinsic::nvvm_suld_1d_i64_trap: - case Intrinsic::nvvm_suld_1d_v2i64_trap: - case Intrinsic::nvvm_suld_1d_array_i64_trap: - case Intrinsic::nvvm_suld_1d_array_v2i64_trap: - case Intrinsic::nvvm_suld_2d_i64_trap: - case Intrinsic::nvvm_suld_2d_v2i64_trap: - case Intrinsic::nvvm_suld_2d_array_i64_trap: - case Intrinsic::nvvm_suld_2d_array_v2i64_trap: - case Intrinsic::nvvm_suld_3d_i64_trap: - case Intrinsic::nvvm_suld_3d_v2i64_trap: - case Intrinsic::nvvm_suld_1d_i64_zero: - case Intrinsic::nvvm_suld_1d_v2i64_zero: - case Intrinsic::nvvm_suld_1d_array_i64_zero: - case Intrinsic::nvvm_suld_1d_array_v2i64_zero: - case Intrinsic::nvvm_suld_2d_i64_zero: - case Intrinsic::nvvm_suld_2d_v2i64_zero: - case Intrinsic::nvvm_suld_2d_array_i64_zero: - case Intrinsic::nvvm_suld_2d_array_v2i64_zero: - case Intrinsic::nvvm_suld_3d_i64_zero: - case Intrinsic::nvvm_suld_3d_v2i64_zero: - Info.opc = getOpcForSurfaceInstr(Intrinsic); - Info.memVT = MVT::i64; - Info.ptrVal = nullptr; - Info.offset = 0; - Info.vol = false; - Info.readMem = true; - Info.writeMem = false; - Info.align = 16; - return true; - } - return false; -} - -/// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -/// Used to guide target specific optimizations, like loop strength reduction -/// (LoopStrengthReduce.cpp) and memory optimization for address mode -/// (CodeGenPrepare.cpp) -bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, - const AddrMode &AM, Type *Ty, - unsigned AS) const { - // AddrMode - This represents an addressing mode of: - // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg - // - // The legal address modes are - // - [avar] - // - [areg] - // - [areg+immoff] - // - [immAddr] - - if (AM.BaseGV) { - return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; - } - - switch (AM.Scale) { - case 0: // "r", "r+i" or "i" is allowed - break; - case 1: - if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. - return false; - // Otherwise we have r+i. - break; - default: - // No scale > 1 is allowed - return false; - } - return true; -} - -//===----------------------------------------------------------------------===// -// NVPTX Inline Assembly Support -//===----------------------------------------------------------------------===// - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -NVPTXTargetLowering::ConstraintType -NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - default: - break; - case 'b': - case 'r': - case 'h': - case 'c': - case 'l': - case 'f': - case 'd': - case '0': - case 'N': - return C_RegisterClass; - } - } - return TargetLowering::getConstraintType(Constraint); -} - -std::pair -NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - StringRef Constraint, - MVT VT) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'b': - return std::make_pair(0U, &NVPTX::Int1RegsRegClass); - case 'c': - return std::make_pair(0U, &NVPTX::Int16RegsRegClass); - case 'h': - return std::make_pair(0U, &NVPTX::Int16RegsRegClass); - case 'r': - return std::make_pair(0U, &NVPTX::Int32RegsRegClass); - case 'l': - case 'N': - return std::make_pair(0U, &NVPTX::Int64RegsRegClass); - case 'f': - return std::make_pair(0U, &NVPTX::Float32RegsRegClass); - case 'd': - return std::make_pair(0U, &NVPTX::Float64RegsRegClass); - } - } - return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); -} - -//===----------------------------------------------------------------------===// -// NVPTX DAG Combining -//===----------------------------------------------------------------------===// - -bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, - CodeGenOpt::Level OptLevel) const { - // Always honor command-line argument - if (FMAContractLevelOpt.getNumOccurrences() > 0) - return FMAContractLevelOpt > 0; - - // Do not contract if we're not optimizing the code. - if (OptLevel == 0) - return false; - - // Honor TargetOptions flags that explicitly say fusion is okay. - if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast) - return true; - - return allowUnsafeFPMath(MF); -} - -bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { - // Honor TargetOptions flags that explicitly say unsafe math is okay. - if (MF.getTarget().Options.UnsafeFPMath) - return true; - - // Allow unsafe math if unsafe-fp-math attribute explicitly says so. - const Function *F = MF.getFunction(); - if (F->hasFnAttribute("unsafe-fp-math")) { - Attribute Attr = F->getFnAttribute("unsafe-fp-math"); - StringRef Val = Attr.getValueAsString(); - if (Val == "true") - return true; - } - - return false; -} - -/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with -/// operands N0 and N1. This is a helper for PerformADDCombine that is -/// called with the default operands, and if that fails, with commuted -/// operands. -static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, - TargetLowering::DAGCombinerInfo &DCI, - const NVPTXSubtarget &Subtarget, - CodeGenOpt::Level OptLevel) { - SelectionDAG &DAG = DCI.DAG; - // Skip non-integer, non-scalar case - EVT VT=N0.getValueType(); - if (VT.isVector()) - return SDValue(); - - // fold (add (mul a, b), c) -> (mad a, b, c) - // - if (N0.getOpcode() == ISD::MUL) { - assert (VT.isInteger()); - // For integer: - // Since integer multiply-add costs the same as integer multiply - // but is more costly than integer add, do the fusion only when - // the mul is only used in the add. - if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || - !N0.getNode()->hasOneUse()) - return SDValue(); - - // Do the folding - return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), N1); - } - else if (N0.getOpcode() == ISD::FMUL) { - if (VT == MVT::f32 || VT == MVT::f64) { - const auto *TLI = static_cast( - &DAG.getTargetLoweringInfo()); - if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) - return SDValue(); - - // For floating point: - // Do the fusion only when the mul has less than 5 uses and all - // are add. - // The heuristic is that if a use is not an add, then that use - // cannot be fused into fma, therefore mul is still needed anyway. - // If there are more than 4 uses, even if they are all add, fusing - // them will increase register pressue. - // - int numUses = 0; - int nonAddCount = 0; - for (SDNode::use_iterator UI = N0.getNode()->use_begin(), - UE = N0.getNode()->use_end(); - UI != UE; ++UI) { - numUses++; - SDNode *User = *UI; - if (User->getOpcode() != ISD::FADD) - ++nonAddCount; - } - if (numUses >= 5) - return SDValue(); - if (nonAddCount) { - int orderNo = N->getIROrder(); - int orderNo2 = N0.getNode()->getIROrder(); - // simple heuristics here for considering potential register - // pressure, the logics here is that the differnce are used - // to measure the distance between def and use, the longer distance - // more likely cause register pressure. - if (orderNo - orderNo2 < 500) - return SDValue(); - - // Now, check if at least one of the FMUL's operands is live beyond the node N, - // which guarantees that the FMA will not increase register pressure at node N. - bool opIsLive = false; - const SDNode *left = N0.getOperand(0).getNode(); - const SDNode *right = N0.getOperand(1).getNode(); - - if (isa(left) || isa(right)) - opIsLive = true; - - if (!opIsLive) - for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - int orderNo3 = User->getIROrder(); - if (orderNo3 > orderNo) { - opIsLive = true; - break; - } - } - - if (!opIsLive) - for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - int orderNo3 = User->getIROrder(); - if (orderNo3 > orderNo) { - opIsLive = true; - break; - } - } - - if (!opIsLive) - return SDValue(); - } - - return DAG.getNode(ISD::FMA, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), N1); - } - } - - return SDValue(); -} - -/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. -/// -static SDValue PerformADDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const NVPTXSubtarget &Subtarget, - CodeGenOpt::Level OptLevel) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - // First try with the default operand order. - if (SDValue Result = - PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel)) - return Result; - - // If that didn't work, try again with the operands commuted. - return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); -} - -static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // The type legalizer turns a vector load of i8 values into a zextload to i16 - // registers, optionally ANY_EXTENDs it (if target type is integer), - // and ANDs off the high 8 bits. Since we turn this load into a - // target-specific DAG node, the DAG combiner fails to eliminate these AND - // nodes. Do that here. - SDValue Val = N->getOperand(0); - SDValue Mask = N->getOperand(1); - - if (isa(Val)) { - std::swap(Val, Mask); - } - - SDValue AExt; - // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and - if (Val.getOpcode() == ISD::ANY_EXTEND) { - AExt = Val; - Val = Val->getOperand(0); - } - - if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { - Val = Val->getOperand(0); - } - - if (Val->getOpcode() == NVPTXISD::LoadV2 || - Val->getOpcode() == NVPTXISD::LoadV4) { - ConstantSDNode *MaskCnst = dyn_cast(Mask); - if (!MaskCnst) { - // Not an AND with a constant - return SDValue(); - } - - uint64_t MaskVal = MaskCnst->getZExtValue(); - if (MaskVal != 0xff) { - // Not an AND that chops off top 8 bits - return SDValue(); - } - - MemSDNode *Mem = dyn_cast(Val); - if (!Mem) { - // Not a MemSDNode?!? - return SDValue(); - } - - EVT MemVT = Mem->getMemoryVT(); - if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { - // We only handle the i8 case - return SDValue(); - } - - unsigned ExtType = - cast(Val->getOperand(Val->getNumOperands()-1))-> - getZExtValue(); - if (ExtType == ISD::SEXTLOAD) { - // If for some reason the load is a sextload, the and is needed to zero - // out the high 8 bits - return SDValue(); - } - - bool AddTo = false; - if (AExt.getNode() != nullptr) { - // Re-insert the ext as a zext. - Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), - AExt.getValueType(), Val); - AddTo = true; - } - - // If we get here, the AND is unnecessary. Just replace it with the load - DCI.CombineTo(N, Val, AddTo); - } - - return SDValue(); -} - -static SDValue PerformREMCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - CodeGenOpt::Level OptLevel) { - assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM); - - // Don't do anything at less than -O2. - if (OptLevel < CodeGenOpt::Default) - return SDValue(); - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - bool IsSigned = N->getOpcode() == ISD::SREM; - unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV; - - const SDValue &Num = N->getOperand(0); - const SDValue &Den = N->getOperand(1); - - for (const SDNode *U : Num->uses()) { - if (U->getOpcode() == DivOpc && U->getOperand(0) == Num && - U->getOperand(1) == Den) { - // Num % Den -> Num - (Num / Den) * Den - return DAG.getNode(ISD::SUB, DL, VT, Num, - DAG.getNode(ISD::MUL, DL, VT, - DAG.getNode(DivOpc, DL, VT, Num, Den), - Den)); - } - } - return SDValue(); -} - -enum OperandSignedness { - Signed = 0, - Unsigned, - Unknown -}; - -/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand -/// that can be demoted to \p OptSize bits without loss of information. The -/// signedness of the operand, if determinable, is placed in \p S. -static bool IsMulWideOperandDemotable(SDValue Op, - unsigned OptSize, - OperandSignedness &S) { - S = Unknown; - - if (Op.getOpcode() == ISD::SIGN_EXTEND || - Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { - EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() <= OptSize) { - S = Signed; - return true; - } - } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { - EVT OrigVT = Op.getOperand(0).getValueType(); - if (OrigVT.getSizeInBits() <= OptSize) { - S = Unsigned; - return true; - } - } - - return false; -} - -/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can -/// be demoted to \p OptSize bits without loss of information. If the operands -/// contain a constant, it should appear as the RHS operand. The signedness of -/// the operands is placed in \p IsSigned. -static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, - unsigned OptSize, - bool &IsSigned) { - OperandSignedness LHSSign; - - // The LHS operand must be a demotable op - if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) - return false; - - // We should have been able to determine the signedness from the LHS - if (LHSSign == Unknown) - return false; - - IsSigned = (LHSSign == Signed); - - // The RHS can be a demotable op or a constant - if (ConstantSDNode *CI = dyn_cast(RHS)) { - const APInt &Val = CI->getAPIntValue(); - if (LHSSign == Unsigned) { - return Val.isIntN(OptSize); - } else { - return Val.isSignedIntN(OptSize); - } - } else { - OperandSignedness RHSSign; - if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) - return false; - - return LHSSign == RHSSign; - } -} - -/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply -/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform -/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift -/// amount. -static SDValue TryMULWIDECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - EVT MulType = N->getValueType(0); - if (MulType != MVT::i32 && MulType != MVT::i64) { - return SDValue(); - } - - SDLoc DL(N); - unsigned OptSize = MulType.getSizeInBits() >> 1; - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - - // Canonicalize the multiply so the constant (if any) is on the right - if (N->getOpcode() == ISD::MUL) { - if (isa(LHS)) { - std::swap(LHS, RHS); - } - } - - // If we have a SHL, determine the actual multiply amount - if (N->getOpcode() == ISD::SHL) { - ConstantSDNode *ShlRHS = dyn_cast(RHS); - if (!ShlRHS) { - return SDValue(); - } - - APInt ShiftAmt = ShlRHS->getAPIntValue(); - unsigned BitWidth = MulType.getSizeInBits(); - if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { - APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; - RHS = DCI.DAG.getConstant(MulVal, DL, MulType); - } else { - return SDValue(); - } - } - - bool Signed; - // Verify that our operands are demotable - if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { - return SDValue(); - } - - EVT DemotedVT; - if (MulType == MVT::i32) { - DemotedVT = MVT::i16; - } else { - DemotedVT = MVT::i32; - } - - // Truncate the operands to the correct size. Note that these are just for - // type consistency and will (likely) be eliminated in later phases. - SDValue TruncLHS = - DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); - SDValue TruncRHS = - DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); - - unsigned Opc; - if (Signed) { - Opc = NVPTXISD::MUL_WIDE_SIGNED; - } else { - Opc = NVPTXISD::MUL_WIDE_UNSIGNED; - } - - return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); -} - -/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. -static SDValue PerformMULCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - CodeGenOpt::Level OptLevel) { - if (OptLevel > 0) { - // Try mul.wide combining at OptLevel > 0 - if (SDValue Ret = TryMULWIDECombine(N, DCI)) - return Ret; - } - - return SDValue(); -} - -/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. -static SDValue PerformSHLCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - CodeGenOpt::Level OptLevel) { - if (OptLevel > 0) { - // Try mul.wide combining at OptLevel > 0 - if (SDValue Ret = TryMULWIDECombine(N, DCI)) - return Ret; - } - - return SDValue(); -} - -static SDValue PerformSETCCCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - EVT CCType = N->getValueType(0); - SDValue A = N->getOperand(0); - SDValue B = N->getOperand(1); - - if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16) - return SDValue(); - - SDLoc DL(N); - // setp.f16x2 returns two scalar predicates, which we need to - // convert back to v2i1. The returned result will be scalarized by - // the legalizer, but the comparison will remain a single vector - // instruction. - SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL, - DCI.DAG.getVTList(MVT::i1, MVT::i1), - {A, B, N->getOperand(2)}); - return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0), - CCNode.getValue(1)); -} - -SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); - switch (N->getOpcode()) { - default: break; - case ISD::ADD: - case ISD::FADD: - return PerformADDCombine(N, DCI, STI, OptLevel); - case ISD::MUL: - return PerformMULCombine(N, DCI, OptLevel); - case ISD::SHL: - return PerformSHLCombine(N, DCI, OptLevel); - case ISD::AND: - return PerformANDCombine(N, DCI); - case ISD::UREM: - case ISD::SREM: - return PerformREMCombine(N, DCI, OptLevel); - case ISD::SETCC: - return PerformSETCCCombine(N, DCI); - } - return SDValue(); -} - -/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. -static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, - SmallVectorImpl &Results) { - EVT ResVT = N->getValueType(0); - SDLoc DL(N); - - assert(ResVT.isVector() && "Vector load must have vector type"); - - // We only handle "native" vector sizes for now, e.g. <4 x double> is not - // legal. We can (and should) split that into 2 loads of <2 x double> here - // but I'm leaving that as a TODO for now. - assert(ResVT.isSimple() && "Can only handle simple types"); - switch (ResVT.getSimpleVT().SimpleTy) { - default: - return; - case MVT::v2i8: - case MVT::v2i16: - case MVT::v2i32: - case MVT::v2i64: - case MVT::v2f16: - case MVT::v2f32: - case MVT::v2f64: - case MVT::v4i8: - case MVT::v4i16: - case MVT::v4i32: - case MVT::v4f16: - case MVT::v4f32: - case MVT::v8f16: // <4 x f16x2> - // This is a "native" vector type - break; - } - - LoadSDNode *LD = cast(N); - - unsigned Align = LD->getAlignment(); - auto &TD = DAG.getDataLayout(); - unsigned PrefAlign = - TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); - if (Align < PrefAlign) { - // This load is not sufficiently aligned, so bail out and let this vector - // load be scalarized. Note that we may still be able to emit smaller - // vector loads. For example, if we are loading a <4 x float> with an - // alignment of 8, this check will fail but the legalizer will try again - // with 2 x <2 x float>, which will succeed with an alignment of 8. - return; - } - - EVT EltVT = ResVT.getVectorElementType(); - unsigned NumElts = ResVT.getVectorNumElements(); - - // Since LoadV2 is a target node, we cannot rely on DAG type legalization. - // Therefore, we must ensure the type is legal. For i1 and i8, we set the - // loaded type to i16 and propagate the "real" type as the memory type. - bool NeedTrunc = false; - if (EltVT.getSizeInBits() < 16) { - EltVT = MVT::i16; - NeedTrunc = true; - } - - unsigned Opcode = 0; - SDVTList LdResVTs; - bool LoadF16x2 = false; - - switch (NumElts) { - default: - return; - case 2: - Opcode = NVPTXISD::LoadV2; - LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); - break; - case 4: { - Opcode = NVPTXISD::LoadV4; - EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; - LdResVTs = DAG.getVTList(ListVTs); - break; - } - case 8: { - // v8f16 is a special case. PTX doesn't have ld.v8.f16 - // instruction. Instead, we split the vector into v2f16 chunks and - // load them with ld.v4.b32. - assert(EltVT == MVT::f16 && "Unsupported v8 vector type."); - LoadF16x2 = true; - Opcode = NVPTXISD::LoadV4; - EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16, - MVT::Other}; - LdResVTs = DAG.getVTList(ListVTs); - break; - } - } - - // Copy regular operands - SmallVector OtherOps(N->op_begin(), N->op_end()); - - // The select routine does not have access to the LoadSDNode instance, so - // pass along the extension information - OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); - - SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, - LD->getMemoryVT(), - LD->getMemOperand()); - - SmallVector ScalarRes; - if (LoadF16x2) { - // Split v2f16 subvectors back into individual elements. - NumElts /= 2; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue SubVector = NewLD.getValue(i); - SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, - DAG.getIntPtrConstant(0, DL)); - SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, - DAG.getIntPtrConstant(1, DL)); - ScalarRes.push_back(E0); - ScalarRes.push_back(E1); - } - } else { - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Res = NewLD.getValue(i); - if (NeedTrunc) - Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); - ScalarRes.push_back(Res); - } - } - - SDValue LoadChain = NewLD.getValue(NumElts); - - SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes); - - Results.push_back(BuildVec); - Results.push_back(LoadChain); -} - -static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, - SmallVectorImpl &Results) { - SDValue Chain = N->getOperand(0); - SDValue Intrin = N->getOperand(1); - SDLoc DL(N); - - // Get the intrinsic ID - unsigned IntrinNo = cast(Intrin.getNode())->getZExtValue(); - switch (IntrinNo) { - default: - return; - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: - case Intrinsic::nvvm_ldu_global_i: - case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_p: { - EVT ResVT = N->getValueType(0); - - if (ResVT.isVector()) { - // Vector LDG/LDU - - unsigned NumElts = ResVT.getVectorNumElements(); - EVT EltVT = ResVT.getVectorElementType(); - - // Since LDU/LDG are target nodes, we cannot rely on DAG type - // legalization. - // Therefore, we must ensure the type is legal. For i1 and i8, we set the - // loaded type to i16 and propagate the "real" type as the memory type. - bool NeedTrunc = false; - if (EltVT.getSizeInBits() < 16) { - EltVT = MVT::i16; - NeedTrunc = true; - } - - unsigned Opcode = 0; - SDVTList LdResVTs; - - switch (NumElts) { - default: - return; - case 2: - switch (IntrinNo) { - default: - return; - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: - Opcode = NVPTXISD::LDGV2; - break; - case Intrinsic::nvvm_ldu_global_i: - case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_p: - Opcode = NVPTXISD::LDUV2; - break; - } - LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); - break; - case 4: { - switch (IntrinNo) { - default: - return; - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: - Opcode = NVPTXISD::LDGV4; - break; - case Intrinsic::nvvm_ldu_global_i: - case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_p: - Opcode = NVPTXISD::LDUV4; - break; - } - EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; - LdResVTs = DAG.getVTList(ListVTs); - break; - } - } - - SmallVector OtherOps; - - // Copy regular operands - - OtherOps.push_back(Chain); // Chain - // Skip operand 1 (intrinsic ID) - // Others - OtherOps.append(N->op_begin() + 2, N->op_end()); - - MemIntrinsicSDNode *MemSD = cast(N); - - SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, - MemSD->getMemoryVT(), - MemSD->getMemOperand()); - - SmallVector ScalarRes; - - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Res = NewLD.getValue(i); - if (NeedTrunc) - Res = - DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); - ScalarRes.push_back(Res); - } - - SDValue LoadChain = NewLD.getValue(NumElts); - - SDValue BuildVec = - DAG.getBuildVector(ResVT, DL, ScalarRes); - - Results.push_back(BuildVec); - Results.push_back(LoadChain); - } else { - // i8 LDG/LDU - assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && - "Custom handling of non-i8 ldu/ldg?"); - - // Just copy all operands as-is - SmallVector Ops(N->op_begin(), N->op_end()); - - // Force output to i16 - SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); - - MemIntrinsicSDNode *MemSD = cast(N); - - // We make sure the memory type is i8, which will be used during isel - // to select the proper instruction. - SDValue NewLD = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, - MVT::i8, MemSD->getMemOperand()); - - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, - NewLD.getValue(0))); - Results.push_back(NewLD.getValue(1)); - } - } - } -} - -void NVPTXTargetLowering::ReplaceNodeResults( - SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - switch (N->getOpcode()) { - default: - report_fatal_error("Unhandled custom legalization"); - case ISD::LOAD: - ReplaceLoadVector(N, DAG, Results); - return; - case ISD::INTRINSIC_W_CHAIN: - ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); - return; - } -} - -// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. -void NVPTXSection::anchor() {} - -NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { - delete static_cast(TextSection); - delete static_cast(DataSection); - delete static_cast(BSSSection); - delete static_cast(ReadOnlySection); - - delete static_cast(StaticCtorSection); - delete static_cast(StaticDtorSection); - delete static_cast(LSDASection); - delete static_cast(EHFrameSection); - delete static_cast(DwarfAbbrevSection); - delete static_cast(DwarfInfoSection); - delete static_cast(DwarfLineSection); - delete static_cast(DwarfFrameSection); - delete static_cast(DwarfPubTypesSection); - delete static_cast(DwarfDebugInlineSection); - delete static_cast(DwarfStrSection); - delete static_cast(DwarfLocSection); - delete static_cast(DwarfARangesSection); - delete static_cast(DwarfRangesSection); - delete static_cast(DwarfMacinfoSection); -} - -MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal( - const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - return getDataSection(); -} +//===-- NVPTXISelLowering.cpp - NVPTX DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that NVPTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "NVPTXISelLowering.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" +#include "NVPTX.h" +#include "NVPTXSection.h" +#include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXTargetObjectFile.h" +#include "NVPTXUtilities.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetCallingConv.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_TYPE +#define DEBUG_TYPE "nvptx-lower" + +using namespace llvm; + +static unsigned int uniqueCallSite = 0; + +static cl::opt sched4reg( + "nvptx-sched4reg", + cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); + +static cl::opt +FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, + cl::desc("NVPTX Specific: FMA contraction (0: don't do it" + " 1: do it 2: do it aggressively"), + cl::init(2)); + +static cl::opt UsePrecDivF32( + "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, + cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" + " IEEE Compliant F32 div.rnd if available."), + cl::init(2)); + +static cl::opt UsePrecSqrtF32( + "nvptx-prec-sqrtf32", cl::Hidden, + cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."), + cl::init(true)); + +static cl::opt FtzEnabled( + "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, + cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), + cl::init(false)); + +int NVPTXTargetLowering::getDivF32Level() const { + if (UsePrecDivF32.getNumOccurrences() > 0) { + // If nvptx-prec-div32=N is used on the command-line, always honor it + return UsePrecDivF32; + } else { + // Otherwise, use div.approx if fast math is enabled + if (getTargetMachine().Options.UnsafeFPMath) + return 0; + else + return 2; + } +} + +bool NVPTXTargetLowering::usePrecSqrtF32() const { + if (UsePrecSqrtF32.getNumOccurrences() > 0) { + // If nvptx-prec-sqrtf32 is used on the command-line, always honor it + return UsePrecSqrtF32; + } else { + // Otherwise, use sqrt.approx if fast math is enabled + return !getTargetMachine().Options.UnsafeFPMath; + } +} + +bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const { + // TODO: Get rid of this flag; there can be only one way to do this. + if (FtzEnabled.getNumOccurrences() > 0) { + // If nvptx-f32ftz is used on the command-line, always honor it + return FtzEnabled; + } else { + const Function *F = MF.getFunction(); + // Otherwise, check for an nvptx-f32ftz attribute on the function + if (F->hasFnAttribute("nvptx-f32ftz")) + return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true"; + else + return false; + } +} + +static bool IsPTXVectorType(MVT VT) { + switch (VT.SimpleTy) { + default: + return false; + case MVT::v2i1: + case MVT::v4i1: + case MVT::v2i8: + case MVT::v4i8: + case MVT::v2i16: + case MVT::v4i16: + case MVT::v2i32: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v2f16: + case MVT::v4f16: + case MVT::v8f16: // <4 x f16x2> + case MVT::v2f32: + case MVT::v4f32: + case MVT::v2f64: + return true; + } +} + +/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive +/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors +/// into their primitive components. +/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the +/// same number of types as the Ins/Outs arrays in LowerFormalArguments, +/// LowerCall, and LowerReturn. +static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = nullptr, + uint64_t StartingOffset = 0) { + SmallVector TempVTs; + SmallVector TempOffsets; + + ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset); + for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) { + EVT VT = TempVTs[i]; + uint64_t Off = TempOffsets[i]; + // Split vectors into individual elements, except for v2f16, which + // we will pass as a single scalar. + if (VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + // Vectors with an even number of f16 elements will be passed to + // us as an array of v2f16 elements. We must match this so we + // stay in sync with Ins/Outs. + if (EltVT == MVT::f16 && NumElts % 2 == 0) { + EltVT = MVT::v2f16; + NumElts /= 2; + } + for (unsigned j = 0; j != NumElts; ++j) { + ValueVTs.push_back(EltVT); + if (Offsets) + Offsets->push_back(Off + j * EltVT.getStoreSize()); + } + } else { + ValueVTs.push_back(VT); + if (Offsets) + Offsets->push_back(Off); + } + } +} + +// Check whether we can merge loads/stores of some of the pieces of a +// flattened function parameter or return value into a single vector +// load/store. +// +// The flattened parameter is represented as a list of EVTs and +// offsets, and the whole structure is aligned to ParamAlignment. This +// function determines whether we can load/store pieces of the +// parameter starting at index Idx using a single vectorized op of +// size AccessSize. If so, it returns the number of param pieces +// covered by the vector op. Otherwise, it returns 1. +static unsigned CanMergeParamLoadStoresStartingAt( + unsigned Idx, uint32_t AccessSize, const SmallVectorImpl &ValueVTs, + const SmallVectorImpl &Offsets, unsigned ParamAlignment) { + assert(isPowerOf2_32(AccessSize) && "must be a power of 2!"); + + // Can't vectorize if param alignment is not sufficient. + if (AccessSize > ParamAlignment) + return 1; + // Can't vectorize if offset is not aligned. + if (Offsets[Idx] & (AccessSize - 1)) + return 1; + + EVT EltVT = ValueVTs[Idx]; + unsigned EltSize = EltVT.getStoreSize(); + + // Element is too large to vectorize. + if (EltSize >= AccessSize) + return 1; + + unsigned NumElts = AccessSize / EltSize; + // Can't vectorize if AccessBytes if not a multiple of EltSize. + if (AccessSize != EltSize * NumElts) + return 1; + + // We don't have enough elements to vectorize. + if (Idx + NumElts > ValueVTs.size()) + return 1; + + // PTX ISA can only deal with 2- and 4-element vector ops. + if (NumElts != 4 && NumElts != 2) + return 1; + + for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) { + // Types do not match. + if (ValueVTs[j] != EltVT) + return 1; + + // Elements are not contiguous. + if (Offsets[j] - Offsets[j - 1] != EltSize) + return 1; + } + // OK. We can vectorize ValueVTs[i..i+NumElts) + return NumElts; +} + +// Flags for tracking per-element vectorization state of loads/stores +// of a flattened function parameter or return value. +enum ParamVectorizationFlags { + PVF_INNER = 0x0, // Middle elements of a vector. + PVF_FIRST = 0x1, // First element of the vector. + PVF_LAST = 0x2, // Last element of the vector. + // Scalar is effectively a 1-element vector. + PVF_SCALAR = PVF_FIRST | PVF_LAST +}; + +// Computes whether and how we can vectorize the loads/stores of a +// flattened function parameter or return value. +// +// The flattened parameter is represented as the list of ValueVTs and +// Offsets, and is aligned to ParamAlignment bytes. We return a vector +// of the same size as ValueVTs indicating how each piece should be +// loaded/stored (i.e. as a scalar, or as part of a vector +// load/store). +static SmallVector +VectorizePTXValueVTs(const SmallVectorImpl &ValueVTs, + const SmallVectorImpl &Offsets, + unsigned ParamAlignment) { + // Set vector size to match ValueVTs and mark all elements as + // scalars by default. + SmallVector VectorInfo; + VectorInfo.assign(ValueVTs.size(), PVF_SCALAR); + + // Check what we can vectorize using 128/64/32-bit accesses. + for (int I = 0, E = ValueVTs.size(); I != E; ++I) { + // Skip elements we've already processed. + assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state."); + for (unsigned AccessSize : {16, 8, 4, 2}) { + unsigned NumElts = CanMergeParamLoadStoresStartingAt( + I, AccessSize, ValueVTs, Offsets, ParamAlignment); + // Mark vectorized elements. + switch (NumElts) { + default: + llvm_unreachable("Unexpected return value"); + case 1: + // Can't vectorize using this size, try next smaller size. + continue; + case 2: + assert(I + 1 < E && "Not enough elements."); + VectorInfo[I] = PVF_FIRST; + VectorInfo[I + 1] = PVF_LAST; + I += 1; + break; + case 4: + assert(I + 3 < E && "Not enough elements."); + VectorInfo[I] = PVF_FIRST; + VectorInfo[I + 1] = PVF_INNER; + VectorInfo[I + 2] = PVF_INNER; + VectorInfo[I + 3] = PVF_LAST; + I += 3; + break; + } + // Break out of the inner loop because we've already succeeded + // using largest possible AccessSize. + break; + } + } + return VectorInfo; +} + +// NVPTXTargetLowering Constructor. +NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, + const NVPTXSubtarget &STI) + : TargetLowering(TM), nvTM(&TM), STI(STI) { + // always lower memset, memcpy, and memmove intrinsics to load/store + // instructions, rather + // then generating calls to memset, mempcy or memmove. + MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; + + setBooleanContents(ZeroOrNegativeOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Jump is Expensive. Don't create extra control flow for 'and', 'or' + // condition branches. + setJumpIsExpensive(true); + + // Wide divides are _very_ slow. Try to reduce the width of the divide if + // possible. + addBypassSlowDiv(64, 32); + + // By default, use the Source scheduling + if (sched4reg) + setSchedulingPreference(Sched::RegPressure); + else + setSchedulingPreference(Sched::Source); + + auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action, + LegalizeAction NoF16Action) { + setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action); + }; + + addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass); + addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass); + addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); + addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); + addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); + addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass); + addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass); + + // Conversion to/from FP16/FP16x2 is always legal. + setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); + + setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote); + setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand); + + // Operations not directly supported by NVPTX. + setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::v2f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::BR_CC, MVT::f16, Expand); + setOperationAction(ISD::BR_CC, MVT::v2f16, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Expand); + setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + // Some SIGN_EXTEND_INREG can be done using cvt instruction. + // For others we will expand to a SHL/SRA pair. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); + setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); + + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + + if (STI.hasROT64()) { + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + } + if (STI.hasROT32()) { + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); + } + + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); + setOperationAction(ISD::BSWAP, MVT::i16, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + // Indirect branch is not supported. + // This also disables Jump Table creation. + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + + // We want to legalize constant related memmove and memcopy + // intrinsics. + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + // Turn FP extload into load/fpextend + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); + // Turn FP truncstore into trunc + store. + // FIXME: vector types should also be expanded + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // PTX does not support load / store predicate registers + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setTruncStoreAction(VT, MVT::i1, Expand); + } + + // This is legal in NVPTX + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f16, Legal); + + // TRAP can be lowered to PTX trap + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + + // Register custom handling for vector loads/stores + for (MVT VT : MVT::vector_valuetypes()) { + if (IsPTXVectorType(VT)) { + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom); + } + } + + // Custom handling for i8 intrinsics + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + + for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) { + setOperationAction(ISD::ABS, Ty, Legal); + setOperationAction(ISD::SMIN, Ty, Legal); + setOperationAction(ISD::SMAX, Ty, Legal); + setOperationAction(ISD::UMIN, Ty, Legal); + setOperationAction(ISD::UMAX, Ty, Legal); + + setOperationAction(ISD::CTPOP, Ty, Legal); + setOperationAction(ISD::CTLZ, Ty, Legal); + } + + setOperationAction(ISD::CTTZ, MVT::i16, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + + // PTX does not directly support SELP of i1, so promote to i32 first + setOperationAction(ISD::SELECT, MVT::i1, Custom); + + // PTX cannot multiply two i64s in a single instruction. + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + + // We have some custom DAG combine patterns for these nodes + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::FADD); + setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SREM); + setTargetDAGCombine(ISD::UREM); + + // setcc for f16x2 needs special handling to prevent legalizer's + // attempt to scalarize it due to v2i1 not being legal. + if (STI.allowFP16Math()) + setTargetDAGCombine(ISD::SETCC); + + // Promote fp16 arithmetic if fp16 hardware isn't available or the + // user passed --nvptx-no-fp16-math. The flag is useful because, + // although sm_53+ GPUs have some sort of FP16 support in + // hardware, only sm_53 and sm_60 have full implementation. Others + // only have token amount of hardware and are likely to run faster + // by using fp32 units instead. + for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) { + setFP16OperationAction(Op, MVT::f16, Legal, Promote); + setFP16OperationAction(Op, MVT::v2f16, Legal, Expand); + } + + // There's no neg.f16 instruction. Expand to (0-x). + setOperationAction(ISD::FNEG, MVT::f16, Expand); + setOperationAction(ISD::FNEG, MVT::v2f16, Expand); + + // (would be) Library functions. + + // These map to conversion instructions for scalar FP types. + for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT, + ISD::FROUND, ISD::FTRUNC}) { + setOperationAction(Op, MVT::f16, Legal); + setOperationAction(Op, MVT::f32, Legal); + setOperationAction(Op, MVT::f64, Legal); + setOperationAction(Op, MVT::v2f16, Expand); + } + + // 'Expand' implements FCOPYSIGN without calling an external library. + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + + // These map to corresponding instructions for f32/f64. f16 must be + // promoted to f32. v2f16 is expanded to f16, which is then promoted + // to f32. + for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, + ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) { + setOperationAction(Op, MVT::f16, Promote); + setOperationAction(Op, MVT::f32, Legal); + setOperationAction(Op, MVT::f64, Legal); + setOperationAction(Op, MVT::v2f16, Expand); + } + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINNAN, MVT::f16, Promote); + setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); + + // No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate. + // No FPOW or FREM in PTX. + + // Now deduce the information based on the above mentioned + // actions + computeRegisterProperties(STI.getRegisterInfo()); +} + +const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((NVPTXISD::NodeType)Opcode) { + case NVPTXISD::FIRST_NUMBER: + break; + case NVPTXISD::CALL: + return "NVPTXISD::CALL"; + case NVPTXISD::RET_FLAG: + return "NVPTXISD::RET_FLAG"; + case NVPTXISD::LOAD_PARAM: + return "NVPTXISD::LOAD_PARAM"; + case NVPTXISD::Wrapper: + return "NVPTXISD::Wrapper"; + case NVPTXISD::DeclareParam: + return "NVPTXISD::DeclareParam"; + case NVPTXISD::DeclareScalarParam: + return "NVPTXISD::DeclareScalarParam"; + case NVPTXISD::DeclareRet: + return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareScalarRet: + return "NVPTXISD::DeclareScalarRet"; + case NVPTXISD::DeclareRetParam: + return "NVPTXISD::DeclareRetParam"; + case NVPTXISD::PrintCall: + return "NVPTXISD::PrintCall"; + case NVPTXISD::PrintConvergentCall: + return "NVPTXISD::PrintConvergentCall"; + case NVPTXISD::PrintCallUni: + return "NVPTXISD::PrintCallUni"; + case NVPTXISD::PrintConvergentCallUni: + return "NVPTXISD::PrintConvergentCallUni"; + case NVPTXISD::LoadParam: + return "NVPTXISD::LoadParam"; + case NVPTXISD::LoadParamV2: + return "NVPTXISD::LoadParamV2"; + case NVPTXISD::LoadParamV4: + return "NVPTXISD::LoadParamV4"; + case NVPTXISD::StoreParam: + return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamV2: + return "NVPTXISD::StoreParamV2"; + case NVPTXISD::StoreParamV4: + return "NVPTXISD::StoreParamV4"; + case NVPTXISD::StoreParamS32: + return "NVPTXISD::StoreParamS32"; + case NVPTXISD::StoreParamU32: + return "NVPTXISD::StoreParamU32"; + case NVPTXISD::CallArgBegin: + return "NVPTXISD::CallArgBegin"; + case NVPTXISD::CallArg: + return "NVPTXISD::CallArg"; + case NVPTXISD::LastCallArg: + return "NVPTXISD::LastCallArg"; + case NVPTXISD::CallArgEnd: + return "NVPTXISD::CallArgEnd"; + case NVPTXISD::CallVoid: + return "NVPTXISD::CallVoid"; + case NVPTXISD::CallVal: + return "NVPTXISD::CallVal"; + case NVPTXISD::CallSymbol: + return "NVPTXISD::CallSymbol"; + case NVPTXISD::Prototype: + return "NVPTXISD::Prototype"; + case NVPTXISD::MoveParam: + return "NVPTXISD::MoveParam"; + case NVPTXISD::StoreRetval: + return "NVPTXISD::StoreRetval"; + case NVPTXISD::StoreRetvalV2: + return "NVPTXISD::StoreRetvalV2"; + case NVPTXISD::StoreRetvalV4: + return "NVPTXISD::StoreRetvalV4"; + case NVPTXISD::PseudoUseParam: + return "NVPTXISD::PseudoUseParam"; + case NVPTXISD::RETURN: + return "NVPTXISD::RETURN"; + case NVPTXISD::CallSeqBegin: + return "NVPTXISD::CallSeqBegin"; + case NVPTXISD::CallSeqEnd: + return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::CallPrototype: + return "NVPTXISD::CallPrototype"; + case NVPTXISD::LoadV2: + return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: + return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: + return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: + return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: + return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: + return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: + return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: + return "NVPTXISD::StoreV4"; + case NVPTXISD::FUN_SHFL_CLAMP: + return "NVPTXISD::FUN_SHFL_CLAMP"; + case NVPTXISD::FUN_SHFR_CLAMP: + return "NVPTXISD::FUN_SHFR_CLAMP"; + case NVPTXISD::IMAD: + return "NVPTXISD::IMAD"; + case NVPTXISD::SETP_F16X2: + return "NVPTXISD::SETP_F16X2"; + case NVPTXISD::Dummy: + return "NVPTXISD::Dummy"; + case NVPTXISD::MUL_WIDE_SIGNED: + return "NVPTXISD::MUL_WIDE_SIGNED"; + case NVPTXISD::MUL_WIDE_UNSIGNED: + return "NVPTXISD::MUL_WIDE_UNSIGNED"; + case NVPTXISD::Tex1DFloatS32: return "NVPTXISD::Tex1DFloatS32"; + case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; + case NVPTXISD::Tex1DFloatFloatLevel: + return "NVPTXISD::Tex1DFloatFloatLevel"; + case NVPTXISD::Tex1DFloatFloatGrad: + return "NVPTXISD::Tex1DFloatFloatGrad"; + case NVPTXISD::Tex1DS32S32: return "NVPTXISD::Tex1DS32S32"; + case NVPTXISD::Tex1DS32Float: return "NVPTXISD::Tex1DS32Float"; + case NVPTXISD::Tex1DS32FloatLevel: + return "NVPTXISD::Tex1DS32FloatLevel"; + case NVPTXISD::Tex1DS32FloatGrad: + return "NVPTXISD::Tex1DS32FloatGrad"; + case NVPTXISD::Tex1DU32S32: return "NVPTXISD::Tex1DU32S32"; + case NVPTXISD::Tex1DU32Float: return "NVPTXISD::Tex1DU32Float"; + case NVPTXISD::Tex1DU32FloatLevel: + return "NVPTXISD::Tex1DU32FloatLevel"; + case NVPTXISD::Tex1DU32FloatGrad: + return "NVPTXISD::Tex1DU32FloatGrad"; + case NVPTXISD::Tex1DArrayFloatS32: return "NVPTXISD::Tex1DArrayFloatS32"; + case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex1DArrayFloatFloat"; + case NVPTXISD::Tex1DArrayFloatFloatLevel: + return "NVPTXISD::Tex1DArrayFloatFloatLevel"; + case NVPTXISD::Tex1DArrayFloatFloatGrad: + return "NVPTXISD::Tex1DArrayFloatFloatGrad"; + case NVPTXISD::Tex1DArrayS32S32: return "NVPTXISD::Tex1DArrayS32S32"; + case NVPTXISD::Tex1DArrayS32Float: return "NVPTXISD::Tex1DArrayS32Float"; + case NVPTXISD::Tex1DArrayS32FloatLevel: + return "NVPTXISD::Tex1DArrayS32FloatLevel"; + case NVPTXISD::Tex1DArrayS32FloatGrad: + return "NVPTXISD::Tex1DArrayS32FloatGrad"; + case NVPTXISD::Tex1DArrayU32S32: return "NVPTXISD::Tex1DArrayU32S32"; + case NVPTXISD::Tex1DArrayU32Float: return "NVPTXISD::Tex1DArrayU32Float"; + case NVPTXISD::Tex1DArrayU32FloatLevel: + return "NVPTXISD::Tex1DArrayU32FloatLevel"; + case NVPTXISD::Tex1DArrayU32FloatGrad: + return "NVPTXISD::Tex1DArrayU32FloatGrad"; + case NVPTXISD::Tex2DFloatS32: return "NVPTXISD::Tex2DFloatS32"; + case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; + case NVPTXISD::Tex2DFloatFloatLevel: + return "NVPTXISD::Tex2DFloatFloatLevel"; + case NVPTXISD::Tex2DFloatFloatGrad: + return "NVPTXISD::Tex2DFloatFloatGrad"; + case NVPTXISD::Tex2DS32S32: return "NVPTXISD::Tex2DS32S32"; + case NVPTXISD::Tex2DS32Float: return "NVPTXISD::Tex2DS32Float"; + case NVPTXISD::Tex2DS32FloatLevel: + return "NVPTXISD::Tex2DS32FloatLevel"; + case NVPTXISD::Tex2DS32FloatGrad: + return "NVPTXISD::Tex2DS32FloatGrad"; + case NVPTXISD::Tex2DU32S32: return "NVPTXISD::Tex2DU32S32"; + case NVPTXISD::Tex2DU32Float: return "NVPTXISD::Tex2DU32Float"; + case NVPTXISD::Tex2DU32FloatLevel: + return "NVPTXISD::Tex2DU32FloatLevel"; + case NVPTXISD::Tex2DU32FloatGrad: + return "NVPTXISD::Tex2DU32FloatGrad"; + case NVPTXISD::Tex2DArrayFloatS32: return "NVPTXISD::Tex2DArrayFloatS32"; + case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex2DArrayFloatFloatLevel: + return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + case NVPTXISD::Tex2DArrayFloatFloatGrad: + return "NVPTXISD::Tex2DArrayFloatFloatGrad"; + case NVPTXISD::Tex2DArrayS32S32: return "NVPTXISD::Tex2DArrayS32S32"; + case NVPTXISD::Tex2DArrayS32Float: return "NVPTXISD::Tex2DArrayS32Float"; + case NVPTXISD::Tex2DArrayS32FloatLevel: + return "NVPTXISD::Tex2DArrayS32FloatLevel"; + case NVPTXISD::Tex2DArrayS32FloatGrad: + return "NVPTXISD::Tex2DArrayS32FloatGrad"; + case NVPTXISD::Tex2DArrayU32S32: return "NVPTXISD::Tex2DArrayU32S32"; + case NVPTXISD::Tex2DArrayU32Float: return "NVPTXISD::Tex2DArrayU32Float"; + case NVPTXISD::Tex2DArrayU32FloatLevel: + return "NVPTXISD::Tex2DArrayU32FloatLevel"; + case NVPTXISD::Tex2DArrayU32FloatGrad: + return "NVPTXISD::Tex2DArrayU32FloatGrad"; + case NVPTXISD::Tex3DFloatS32: return "NVPTXISD::Tex3DFloatS32"; + case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; + case NVPTXISD::Tex3DFloatFloatLevel: + return "NVPTXISD::Tex3DFloatFloatLevel"; + case NVPTXISD::Tex3DFloatFloatGrad: + return "NVPTXISD::Tex3DFloatFloatGrad"; + case NVPTXISD::Tex3DS32S32: return "NVPTXISD::Tex3DS32S32"; + case NVPTXISD::Tex3DS32Float: return "NVPTXISD::Tex3DS32Float"; + case NVPTXISD::Tex3DS32FloatLevel: + return "NVPTXISD::Tex3DS32FloatLevel"; + case NVPTXISD::Tex3DS32FloatGrad: + return "NVPTXISD::Tex3DS32FloatGrad"; + case NVPTXISD::Tex3DU32S32: return "NVPTXISD::Tex3DU32S32"; + case NVPTXISD::Tex3DU32Float: return "NVPTXISD::Tex3DU32Float"; + case NVPTXISD::Tex3DU32FloatLevel: + return "NVPTXISD::Tex3DU32FloatLevel"; + case NVPTXISD::Tex3DU32FloatGrad: + return "NVPTXISD::Tex3DU32FloatGrad"; + case NVPTXISD::TexCubeFloatFloat: return "NVPTXISD::TexCubeFloatFloat"; + case NVPTXISD::TexCubeFloatFloatLevel: + return "NVPTXISD::TexCubeFloatFloatLevel"; + case NVPTXISD::TexCubeS32Float: return "NVPTXISD::TexCubeS32Float"; + case NVPTXISD::TexCubeS32FloatLevel: + return "NVPTXISD::TexCubeS32FloatLevel"; + case NVPTXISD::TexCubeU32Float: return "NVPTXISD::TexCubeU32Float"; + case NVPTXISD::TexCubeU32FloatLevel: + return "NVPTXISD::TexCubeU32FloatLevel"; + case NVPTXISD::TexCubeArrayFloatFloat: + return "NVPTXISD::TexCubeArrayFloatFloat"; + case NVPTXISD::TexCubeArrayFloatFloatLevel: + return "NVPTXISD::TexCubeArrayFloatFloatLevel"; + case NVPTXISD::TexCubeArrayS32Float: + return "NVPTXISD::TexCubeArrayS32Float"; + case NVPTXISD::TexCubeArrayS32FloatLevel: + return "NVPTXISD::TexCubeArrayS32FloatLevel"; + case NVPTXISD::TexCubeArrayU32Float: + return "NVPTXISD::TexCubeArrayU32Float"; + case NVPTXISD::TexCubeArrayU32FloatLevel: + return "NVPTXISD::TexCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4R2DFloatFloat: + return "NVPTXISD::Tld4R2DFloatFloat"; + case NVPTXISD::Tld4G2DFloatFloat: + return "NVPTXISD::Tld4G2DFloatFloat"; + case NVPTXISD::Tld4B2DFloatFloat: + return "NVPTXISD::Tld4B2DFloatFloat"; + case NVPTXISD::Tld4A2DFloatFloat: + return "NVPTXISD::Tld4A2DFloatFloat"; + case NVPTXISD::Tld4R2DS64Float: + return "NVPTXISD::Tld4R2DS64Float"; + case NVPTXISD::Tld4G2DS64Float: + return "NVPTXISD::Tld4G2DS64Float"; + case NVPTXISD::Tld4B2DS64Float: + return "NVPTXISD::Tld4B2DS64Float"; + case NVPTXISD::Tld4A2DS64Float: + return "NVPTXISD::Tld4A2DS64Float"; + case NVPTXISD::Tld4R2DU64Float: + return "NVPTXISD::Tld4R2DU64Float"; + case NVPTXISD::Tld4G2DU64Float: + return "NVPTXISD::Tld4G2DU64Float"; + case NVPTXISD::Tld4B2DU64Float: + return "NVPTXISD::Tld4B2DU64Float"; + case NVPTXISD::Tld4A2DU64Float: + return "NVPTXISD::Tld4A2DU64Float"; + + case NVPTXISD::TexUnified1DFloatS32: + return "NVPTXISD::TexUnified1DFloatS32"; + case NVPTXISD::TexUnified1DFloatFloat: + return "NVPTXISD::TexUnified1DFloatFloat"; + case NVPTXISD::TexUnified1DFloatFloatLevel: + return "NVPTXISD::TexUnified1DFloatFloatLevel"; + case NVPTXISD::TexUnified1DFloatFloatGrad: + return "NVPTXISD::TexUnified1DFloatFloatGrad"; + case NVPTXISD::TexUnified1DS32S32: + return "NVPTXISD::TexUnified1DS32S32"; + case NVPTXISD::TexUnified1DS32Float: + return "NVPTXISD::TexUnified1DS32Float"; + case NVPTXISD::TexUnified1DS32FloatLevel: + return "NVPTXISD::TexUnified1DS32FloatLevel"; + case NVPTXISD::TexUnified1DS32FloatGrad: + return "NVPTXISD::TexUnified1DS32FloatGrad"; + case NVPTXISD::TexUnified1DU32S32: + return "NVPTXISD::TexUnified1DU32S32"; + case NVPTXISD::TexUnified1DU32Float: + return "NVPTXISD::TexUnified1DU32Float"; + case NVPTXISD::TexUnified1DU32FloatLevel: + return "NVPTXISD::TexUnified1DU32FloatLevel"; + case NVPTXISD::TexUnified1DU32FloatGrad: + return "NVPTXISD::TexUnified1DU32FloatGrad"; + case NVPTXISD::TexUnified1DArrayFloatS32: + return "NVPTXISD::TexUnified1DArrayFloatS32"; + case NVPTXISD::TexUnified1DArrayFloatFloat: + return "NVPTXISD::TexUnified1DArrayFloatFloat"; + case NVPTXISD::TexUnified1DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified1DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified1DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified1DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified1DArrayS32S32: + return "NVPTXISD::TexUnified1DArrayS32S32"; + case NVPTXISD::TexUnified1DArrayS32Float: + return "NVPTXISD::TexUnified1DArrayS32Float"; + case NVPTXISD::TexUnified1DArrayS32FloatLevel: + return "NVPTXISD::TexUnified1DArrayS32FloatLevel"; + case NVPTXISD::TexUnified1DArrayS32FloatGrad: + return "NVPTXISD::TexUnified1DArrayS32FloatGrad"; + case NVPTXISD::TexUnified1DArrayU32S32: + return "NVPTXISD::TexUnified1DArrayU32S32"; + case NVPTXISD::TexUnified1DArrayU32Float: + return "NVPTXISD::TexUnified1DArrayU32Float"; + case NVPTXISD::TexUnified1DArrayU32FloatLevel: + return "NVPTXISD::TexUnified1DArrayU32FloatLevel"; + case NVPTXISD::TexUnified1DArrayU32FloatGrad: + return "NVPTXISD::TexUnified1DArrayU32FloatGrad"; + case NVPTXISD::TexUnified2DFloatS32: + return "NVPTXISD::TexUnified2DFloatS32"; + case NVPTXISD::TexUnified2DFloatFloat: + return "NVPTXISD::TexUnified2DFloatFloat"; + case NVPTXISD::TexUnified2DFloatFloatLevel: + return "NVPTXISD::TexUnified2DFloatFloatLevel"; + case NVPTXISD::TexUnified2DFloatFloatGrad: + return "NVPTXISD::TexUnified2DFloatFloatGrad"; + case NVPTXISD::TexUnified2DS32S32: + return "NVPTXISD::TexUnified2DS32S32"; + case NVPTXISD::TexUnified2DS32Float: + return "NVPTXISD::TexUnified2DS32Float"; + case NVPTXISD::TexUnified2DS32FloatLevel: + return "NVPTXISD::TexUnified2DS32FloatLevel"; + case NVPTXISD::TexUnified2DS32FloatGrad: + return "NVPTXISD::TexUnified2DS32FloatGrad"; + case NVPTXISD::TexUnified2DU32S32: + return "NVPTXISD::TexUnified2DU32S32"; + case NVPTXISD::TexUnified2DU32Float: + return "NVPTXISD::TexUnified2DU32Float"; + case NVPTXISD::TexUnified2DU32FloatLevel: + return "NVPTXISD::TexUnified2DU32FloatLevel"; + case NVPTXISD::TexUnified2DU32FloatGrad: + return "NVPTXISD::TexUnified2DU32FloatGrad"; + case NVPTXISD::TexUnified2DArrayFloatS32: + return "NVPTXISD::TexUnified2DArrayFloatS32"; + case NVPTXISD::TexUnified2DArrayFloatFloat: + return "NVPTXISD::TexUnified2DArrayFloatFloat"; + case NVPTXISD::TexUnified2DArrayFloatFloatLevel: + return "NVPTXISD::TexUnified2DArrayFloatFloatLevel"; + case NVPTXISD::TexUnified2DArrayFloatFloatGrad: + return "NVPTXISD::TexUnified2DArrayFloatFloatGrad"; + case NVPTXISD::TexUnified2DArrayS32S32: + return "NVPTXISD::TexUnified2DArrayS32S32"; + case NVPTXISD::TexUnified2DArrayS32Float: + return "NVPTXISD::TexUnified2DArrayS32Float"; + case NVPTXISD::TexUnified2DArrayS32FloatLevel: + return "NVPTXISD::TexUnified2DArrayS32FloatLevel"; + case NVPTXISD::TexUnified2DArrayS32FloatGrad: + return "NVPTXISD::TexUnified2DArrayS32FloatGrad"; + case NVPTXISD::TexUnified2DArrayU32S32: + return "NVPTXISD::TexUnified2DArrayU32S32"; + case NVPTXISD::TexUnified2DArrayU32Float: + return "NVPTXISD::TexUnified2DArrayU32Float"; + case NVPTXISD::TexUnified2DArrayU32FloatLevel: + return "NVPTXISD::TexUnified2DArrayU32FloatLevel"; + case NVPTXISD::TexUnified2DArrayU32FloatGrad: + return "NVPTXISD::TexUnified2DArrayU32FloatGrad"; + case NVPTXISD::TexUnified3DFloatS32: + return "NVPTXISD::TexUnified3DFloatS32"; + case NVPTXISD::TexUnified3DFloatFloat: + return "NVPTXISD::TexUnified3DFloatFloat"; + case NVPTXISD::TexUnified3DFloatFloatLevel: + return "NVPTXISD::TexUnified3DFloatFloatLevel"; + case NVPTXISD::TexUnified3DFloatFloatGrad: + return "NVPTXISD::TexUnified3DFloatFloatGrad"; + case NVPTXISD::TexUnified3DS32S32: + return "NVPTXISD::TexUnified3DS32S32"; + case NVPTXISD::TexUnified3DS32Float: + return "NVPTXISD::TexUnified3DS32Float"; + case NVPTXISD::TexUnified3DS32FloatLevel: + return "NVPTXISD::TexUnified3DS32FloatLevel"; + case NVPTXISD::TexUnified3DS32FloatGrad: + return "NVPTXISD::TexUnified3DS32FloatGrad"; + case NVPTXISD::TexUnified3DU32S32: + return "NVPTXISD::TexUnified3DU32S32"; + case NVPTXISD::TexUnified3DU32Float: + return "NVPTXISD::TexUnified3DU32Float"; + case NVPTXISD::TexUnified3DU32FloatLevel: + return "NVPTXISD::TexUnified3DU32FloatLevel"; + case NVPTXISD::TexUnified3DU32FloatGrad: + return "NVPTXISD::TexUnified3DU32FloatGrad"; + case NVPTXISD::TexUnifiedCubeFloatFloat: + return "NVPTXISD::TexUnifiedCubeFloatFloat"; + case NVPTXISD::TexUnifiedCubeFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeS32Float: + return "NVPTXISD::TexUnifiedCubeS32Float"; + case NVPTXISD::TexUnifiedCubeS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeU32Float: + return "NVPTXISD::TexUnifiedCubeU32Float"; + case NVPTXISD::TexUnifiedCubeU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeU32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloat: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloat"; + case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayS32Float: + return "NVPTXISD::TexUnifiedCubeArrayS32Float"; + case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayS32FloatLevel"; + case NVPTXISD::TexUnifiedCubeArrayU32Float: + return "NVPTXISD::TexUnifiedCubeArrayU32Float"; + case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel: + return "NVPTXISD::TexUnifiedCubeArrayU32FloatLevel"; + case NVPTXISD::Tld4UnifiedR2DFloatFloat: + return "NVPTXISD::Tld4UnifiedR2DFloatFloat"; + case NVPTXISD::Tld4UnifiedG2DFloatFloat: + return "NVPTXISD::Tld4UnifiedG2DFloatFloat"; + case NVPTXISD::Tld4UnifiedB2DFloatFloat: + return "NVPTXISD::Tld4UnifiedB2DFloatFloat"; + case NVPTXISD::Tld4UnifiedA2DFloatFloat: + return "NVPTXISD::Tld4UnifiedA2DFloatFloat"; + case NVPTXISD::Tld4UnifiedR2DS64Float: + return "NVPTXISD::Tld4UnifiedR2DS64Float"; + case NVPTXISD::Tld4UnifiedG2DS64Float: + return "NVPTXISD::Tld4UnifiedG2DS64Float"; + case NVPTXISD::Tld4UnifiedB2DS64Float: + return "NVPTXISD::Tld4UnifiedB2DS64Float"; + case NVPTXISD::Tld4UnifiedA2DS64Float: + return "NVPTXISD::Tld4UnifiedA2DS64Float"; + case NVPTXISD::Tld4UnifiedR2DU64Float: + return "NVPTXISD::Tld4UnifiedR2DU64Float"; + case NVPTXISD::Tld4UnifiedG2DU64Float: + return "NVPTXISD::Tld4UnifiedG2DU64Float"; + case NVPTXISD::Tld4UnifiedB2DU64Float: + return "NVPTXISD::Tld4UnifiedB2DU64Float"; + case NVPTXISD::Tld4UnifiedA2DU64Float: + return "NVPTXISD::Tld4UnifiedA2DU64Float"; + + case NVPTXISD::Suld1DI8Clamp: return "NVPTXISD::Suld1DI8Clamp"; + case NVPTXISD::Suld1DI16Clamp: return "NVPTXISD::Suld1DI16Clamp"; + case NVPTXISD::Suld1DI32Clamp: return "NVPTXISD::Suld1DI32Clamp"; + case NVPTXISD::Suld1DI64Clamp: return "NVPTXISD::Suld1DI64Clamp"; + case NVPTXISD::Suld1DV2I8Clamp: return "NVPTXISD::Suld1DV2I8Clamp"; + case NVPTXISD::Suld1DV2I16Clamp: return "NVPTXISD::Suld1DV2I16Clamp"; + case NVPTXISD::Suld1DV2I32Clamp: return "NVPTXISD::Suld1DV2I32Clamp"; + case NVPTXISD::Suld1DV2I64Clamp: return "NVPTXISD::Suld1DV2I64Clamp"; + case NVPTXISD::Suld1DV4I8Clamp: return "NVPTXISD::Suld1DV4I8Clamp"; + case NVPTXISD::Suld1DV4I16Clamp: return "NVPTXISD::Suld1DV4I16Clamp"; + case NVPTXISD::Suld1DV4I32Clamp: return "NVPTXISD::Suld1DV4I32Clamp"; + + case NVPTXISD::Suld1DArrayI8Clamp: return "NVPTXISD::Suld1DArrayI8Clamp"; + case NVPTXISD::Suld1DArrayI16Clamp: return "NVPTXISD::Suld1DArrayI16Clamp"; + case NVPTXISD::Suld1DArrayI32Clamp: return "NVPTXISD::Suld1DArrayI32Clamp"; + case NVPTXISD::Suld1DArrayI64Clamp: return "NVPTXISD::Suld1DArrayI64Clamp"; + case NVPTXISD::Suld1DArrayV2I8Clamp: return "NVPTXISD::Suld1DArrayV2I8Clamp"; + case NVPTXISD::Suld1DArrayV2I16Clamp:return "NVPTXISD::Suld1DArrayV2I16Clamp"; + case NVPTXISD::Suld1DArrayV2I32Clamp:return "NVPTXISD::Suld1DArrayV2I32Clamp"; + case NVPTXISD::Suld1DArrayV2I64Clamp:return "NVPTXISD::Suld1DArrayV2I64Clamp"; + case NVPTXISD::Suld1DArrayV4I8Clamp: return "NVPTXISD::Suld1DArrayV4I8Clamp"; + case NVPTXISD::Suld1DArrayV4I16Clamp:return "NVPTXISD::Suld1DArrayV4I16Clamp"; + case NVPTXISD::Suld1DArrayV4I32Clamp:return "NVPTXISD::Suld1DArrayV4I32Clamp"; + + case NVPTXISD::Suld2DI8Clamp: return "NVPTXISD::Suld2DI8Clamp"; + case NVPTXISD::Suld2DI16Clamp: return "NVPTXISD::Suld2DI16Clamp"; + case NVPTXISD::Suld2DI32Clamp: return "NVPTXISD::Suld2DI32Clamp"; + case NVPTXISD::Suld2DI64Clamp: return "NVPTXISD::Suld2DI64Clamp"; + case NVPTXISD::Suld2DV2I8Clamp: return "NVPTXISD::Suld2DV2I8Clamp"; + case NVPTXISD::Suld2DV2I16Clamp: return "NVPTXISD::Suld2DV2I16Clamp"; + case NVPTXISD::Suld2DV2I32Clamp: return "NVPTXISD::Suld2DV2I32Clamp"; + case NVPTXISD::Suld2DV2I64Clamp: return "NVPTXISD::Suld2DV2I64Clamp"; + case NVPTXISD::Suld2DV4I8Clamp: return "NVPTXISD::Suld2DV4I8Clamp"; + case NVPTXISD::Suld2DV4I16Clamp: return "NVPTXISD::Suld2DV4I16Clamp"; + case NVPTXISD::Suld2DV4I32Clamp: return "NVPTXISD::Suld2DV4I32Clamp"; + + case NVPTXISD::Suld2DArrayI8Clamp: return "NVPTXISD::Suld2DArrayI8Clamp"; + case NVPTXISD::Suld2DArrayI16Clamp: return "NVPTXISD::Suld2DArrayI16Clamp"; + case NVPTXISD::Suld2DArrayI32Clamp: return "NVPTXISD::Suld2DArrayI32Clamp"; + case NVPTXISD::Suld2DArrayI64Clamp: return "NVPTXISD::Suld2DArrayI64Clamp"; + case NVPTXISD::Suld2DArrayV2I8Clamp: return "NVPTXISD::Suld2DArrayV2I8Clamp"; + case NVPTXISD::Suld2DArrayV2I16Clamp:return "NVPTXISD::Suld2DArrayV2I16Clamp"; + case NVPTXISD::Suld2DArrayV2I32Clamp:return "NVPTXISD::Suld2DArrayV2I32Clamp"; + case NVPTXISD::Suld2DArrayV2I64Clamp:return "NVPTXISD::Suld2DArrayV2I64Clamp"; + case NVPTXISD::Suld2DArrayV4I8Clamp: return "NVPTXISD::Suld2DArrayV4I8Clamp"; + case NVPTXISD::Suld2DArrayV4I16Clamp:return "NVPTXISD::Suld2DArrayV4I16Clamp"; + case NVPTXISD::Suld2DArrayV4I32Clamp:return "NVPTXISD::Suld2DArrayV4I32Clamp"; + + case NVPTXISD::Suld3DI8Clamp: return "NVPTXISD::Suld3DI8Clamp"; + case NVPTXISD::Suld3DI16Clamp: return "NVPTXISD::Suld3DI16Clamp"; + case NVPTXISD::Suld3DI32Clamp: return "NVPTXISD::Suld3DI32Clamp"; + case NVPTXISD::Suld3DI64Clamp: return "NVPTXISD::Suld3DI64Clamp"; + case NVPTXISD::Suld3DV2I8Clamp: return "NVPTXISD::Suld3DV2I8Clamp"; + case NVPTXISD::Suld3DV2I16Clamp: return "NVPTXISD::Suld3DV2I16Clamp"; + case NVPTXISD::Suld3DV2I32Clamp: return "NVPTXISD::Suld3DV2I32Clamp"; + case NVPTXISD::Suld3DV2I64Clamp: return "NVPTXISD::Suld3DV2I64Clamp"; + case NVPTXISD::Suld3DV4I8Clamp: return "NVPTXISD::Suld3DV4I8Clamp"; + case NVPTXISD::Suld3DV4I16Clamp: return "NVPTXISD::Suld3DV4I16Clamp"; + case NVPTXISD::Suld3DV4I32Clamp: return "NVPTXISD::Suld3DV4I32Clamp"; + + case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; + case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; + case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; + case NVPTXISD::Suld1DI64Trap: return "NVPTXISD::Suld1DI64Trap"; + case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; + case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; + case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; + case NVPTXISD::Suld1DV2I64Trap: return "NVPTXISD::Suld1DV2I64Trap"; + case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; + case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; + case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; + + case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; + case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; + case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; + case NVPTXISD::Suld1DArrayI64Trap: return "NVPTXISD::Suld1DArrayI64Trap"; + case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; + case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; + case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; + case NVPTXISD::Suld1DArrayV2I64Trap: return "NVPTXISD::Suld1DArrayV2I64Trap"; + case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; + case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; + case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; + + case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; + case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; + case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; + case NVPTXISD::Suld2DI64Trap: return "NVPTXISD::Suld2DI64Trap"; + case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; + case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; + case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; + case NVPTXISD::Suld2DV2I64Trap: return "NVPTXISD::Suld2DV2I64Trap"; + case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; + case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; + case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; + + case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; + case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; + case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; + case NVPTXISD::Suld2DArrayI64Trap: return "NVPTXISD::Suld2DArrayI64Trap"; + case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; + case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; + case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; + case NVPTXISD::Suld2DArrayV2I64Trap: return "NVPTXISD::Suld2DArrayV2I64Trap"; + case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; + case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; + case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; + + case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; + case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; + case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; + case NVPTXISD::Suld3DI64Trap: return "NVPTXISD::Suld3DI64Trap"; + case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; + case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; + case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; + case NVPTXISD::Suld3DV2I64Trap: return "NVPTXISD::Suld3DV2I64Trap"; + case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; + case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; + case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; + + case NVPTXISD::Suld1DI8Zero: return "NVPTXISD::Suld1DI8Zero"; + case NVPTXISD::Suld1DI16Zero: return "NVPTXISD::Suld1DI16Zero"; + case NVPTXISD::Suld1DI32Zero: return "NVPTXISD::Suld1DI32Zero"; + case NVPTXISD::Suld1DI64Zero: return "NVPTXISD::Suld1DI64Zero"; + case NVPTXISD::Suld1DV2I8Zero: return "NVPTXISD::Suld1DV2I8Zero"; + case NVPTXISD::Suld1DV2I16Zero: return "NVPTXISD::Suld1DV2I16Zero"; + case NVPTXISD::Suld1DV2I32Zero: return "NVPTXISD::Suld1DV2I32Zero"; + case NVPTXISD::Suld1DV2I64Zero: return "NVPTXISD::Suld1DV2I64Zero"; + case NVPTXISD::Suld1DV4I8Zero: return "NVPTXISD::Suld1DV4I8Zero"; + case NVPTXISD::Suld1DV4I16Zero: return "NVPTXISD::Suld1DV4I16Zero"; + case NVPTXISD::Suld1DV4I32Zero: return "NVPTXISD::Suld1DV4I32Zero"; + + case NVPTXISD::Suld1DArrayI8Zero: return "NVPTXISD::Suld1DArrayI8Zero"; + case NVPTXISD::Suld1DArrayI16Zero: return "NVPTXISD::Suld1DArrayI16Zero"; + case NVPTXISD::Suld1DArrayI32Zero: return "NVPTXISD::Suld1DArrayI32Zero"; + case NVPTXISD::Suld1DArrayI64Zero: return "NVPTXISD::Suld1DArrayI64Zero"; + case NVPTXISD::Suld1DArrayV2I8Zero: return "NVPTXISD::Suld1DArrayV2I8Zero"; + case NVPTXISD::Suld1DArrayV2I16Zero: return "NVPTXISD::Suld1DArrayV2I16Zero"; + case NVPTXISD::Suld1DArrayV2I32Zero: return "NVPTXISD::Suld1DArrayV2I32Zero"; + case NVPTXISD::Suld1DArrayV2I64Zero: return "NVPTXISD::Suld1DArrayV2I64Zero"; + case NVPTXISD::Suld1DArrayV4I8Zero: return "NVPTXISD::Suld1DArrayV4I8Zero"; + case NVPTXISD::Suld1DArrayV4I16Zero: return "NVPTXISD::Suld1DArrayV4I16Zero"; + case NVPTXISD::Suld1DArrayV4I32Zero: return "NVPTXISD::Suld1DArrayV4I32Zero"; + + case NVPTXISD::Suld2DI8Zero: return "NVPTXISD::Suld2DI8Zero"; + case NVPTXISD::Suld2DI16Zero: return "NVPTXISD::Suld2DI16Zero"; + case NVPTXISD::Suld2DI32Zero: return "NVPTXISD::Suld2DI32Zero"; + case NVPTXISD::Suld2DI64Zero: return "NVPTXISD::Suld2DI64Zero"; + case NVPTXISD::Suld2DV2I8Zero: return "NVPTXISD::Suld2DV2I8Zero"; + case NVPTXISD::Suld2DV2I16Zero: return "NVPTXISD::Suld2DV2I16Zero"; + case NVPTXISD::Suld2DV2I32Zero: return "NVPTXISD::Suld2DV2I32Zero"; + case NVPTXISD::Suld2DV2I64Zero: return "NVPTXISD::Suld2DV2I64Zero"; + case NVPTXISD::Suld2DV4I8Zero: return "NVPTXISD::Suld2DV4I8Zero"; + case NVPTXISD::Suld2DV4I16Zero: return "NVPTXISD::Suld2DV4I16Zero"; + case NVPTXISD::Suld2DV4I32Zero: return "NVPTXISD::Suld2DV4I32Zero"; + + case NVPTXISD::Suld2DArrayI8Zero: return "NVPTXISD::Suld2DArrayI8Zero"; + case NVPTXISD::Suld2DArrayI16Zero: return "NVPTXISD::Suld2DArrayI16Zero"; + case NVPTXISD::Suld2DArrayI32Zero: return "NVPTXISD::Suld2DArrayI32Zero"; + case NVPTXISD::Suld2DArrayI64Zero: return "NVPTXISD::Suld2DArrayI64Zero"; + case NVPTXISD::Suld2DArrayV2I8Zero: return "NVPTXISD::Suld2DArrayV2I8Zero"; + case NVPTXISD::Suld2DArrayV2I16Zero: return "NVPTXISD::Suld2DArrayV2I16Zero"; + case NVPTXISD::Suld2DArrayV2I32Zero: return "NVPTXISD::Suld2DArrayV2I32Zero"; + case NVPTXISD::Suld2DArrayV2I64Zero: return "NVPTXISD::Suld2DArrayV2I64Zero"; + case NVPTXISD::Suld2DArrayV4I8Zero: return "NVPTXISD::Suld2DArrayV4I8Zero"; + case NVPTXISD::Suld2DArrayV4I16Zero: return "NVPTXISD::Suld2DArrayV4I16Zero"; + case NVPTXISD::Suld2DArrayV4I32Zero: return "NVPTXISD::Suld2DArrayV4I32Zero"; + + case NVPTXISD::Suld3DI8Zero: return "NVPTXISD::Suld3DI8Zero"; + case NVPTXISD::Suld3DI16Zero: return "NVPTXISD::Suld3DI16Zero"; + case NVPTXISD::Suld3DI32Zero: return "NVPTXISD::Suld3DI32Zero"; + case NVPTXISD::Suld3DI64Zero: return "NVPTXISD::Suld3DI64Zero"; + case NVPTXISD::Suld3DV2I8Zero: return "NVPTXISD::Suld3DV2I8Zero"; + case NVPTXISD::Suld3DV2I16Zero: return "NVPTXISD::Suld3DV2I16Zero"; + case NVPTXISD::Suld3DV2I32Zero: return "NVPTXISD::Suld3DV2I32Zero"; + case NVPTXISD::Suld3DV2I64Zero: return "NVPTXISD::Suld3DV2I64Zero"; + case NVPTXISD::Suld3DV4I8Zero: return "NVPTXISD::Suld3DV4I8Zero"; + case NVPTXISD::Suld3DV4I16Zero: return "NVPTXISD::Suld3DV4I16Zero"; + case NVPTXISD::Suld3DV4I32Zero: return "NVPTXISD::Suld3DV4I32Zero"; + } + return nullptr; +} + +TargetLoweringBase::LegalizeTypeAction +NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) + return TypeSplitVector; + if (VT == MVT::v2f16) + return TypeLegal; + return TargetLoweringBase::getPreferredVectorAction(VT); +} + +SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, + int Enabled, int &ExtraSteps, + bool &UseOneConst, + bool Reciprocal) const { + if (!(Enabled == ReciprocalEstimate::Enabled || + (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32()))) + return SDValue(); + + if (ExtraSteps == ReciprocalEstimate::Unspecified) + ExtraSteps = 0; + + SDLoc DL(Operand); + EVT VT = Operand.getValueType(); + bool Ftz = useF32FTZ(DAG.getMachineFunction()); + + auto MakeIntrinsicCall = [&](Intrinsic::ID IID) { + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(IID, DL, MVT::i32), Operand); + }; + + // The sqrt and rsqrt refinement processes assume we always start out with an + // approximation of the rsqrt. Therefore, if we're going to do any refinement + // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing + // any refinement, we must return a regular sqrt. + if (Reciprocal || ExtraSteps > 0) { + if (VT == MVT::f32) + return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f + : Intrinsic::nvvm_rsqrt_approx_f); + else if (VT == MVT::f64) + return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d); + else + return SDValue(); + } else { + if (VT == MVT::f32) + return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f + : Intrinsic::nvvm_sqrt_approx_f); + else { + // There's no sqrt.approx.f64 instruction, so we emit + // reciprocal(rsqrt(x)). This is faster than + // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain + // x * rsqrt(x).) + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32), + MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d)); + } + } +} + +SDValue +NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + const GlobalValue *GV = cast(Op)->getGlobal(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op); +} + +std::string NVPTXTargetLowering::getPrototype( + const DataLayout &DL, Type *retTy, const ArgListTy &Args, + const SmallVectorImpl &Outs, unsigned retAlignment, + const ImmutableCallSite *CS) const { + auto PtrVT = getPointerTy(DL); + + bool isABI = (STI.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return ""; + + std::stringstream O; + O << "prototype_" << uniqueCallSite << " : .callprototype "; + + if (retTy->getTypeID() == Type::VoidTyID) { + O << "()"; + } else { + O << "("; + if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) { + unsigned size = 0; + if (auto *ITy = dyn_cast(retTy)) { + size = ITy->getBitWidth(); + } else { + assert(retTy->isFloatingPointTy() && + "Floating point type expected here"); + size = retTy->getPrimitiveSizeInBits(); + } + // PTX ABI requires all scalar return values to be at least 32 + // bits in size. fp16 normally uses .b16 as its storage type in + // PTX, so its size must be adjusted here, too. + if (size < 32) + size = 32; + + O << ".param .b" << size << " _"; + } else if (isa(retTy)) { + O << ".param .b" << PtrVT.getSizeInBits() << " _"; + } else if (retTy->isAggregateType() || retTy->isVectorTy()) { + auto &DL = CS->getCalledFunction()->getParent()->getDataLayout(); + O << ".param .align " << retAlignment << " .b8 _[" + << DL.getTypeAllocSize(retTy) << "]"; + } else { + llvm_unreachable("Unknown return type"); + } + O << ") "; + } + O << "_ ("; + + bool first = true; + + unsigned OIdx = 0; + for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { + Type *Ty = Args[i].Ty; + if (!first) { + O << ", "; + } + first = false; + + if (!Outs[OIdx].Flags.isByVal()) { + if (Ty->isAggregateType() || Ty->isVectorTy()) { + unsigned align = 0; + const CallInst *CallI = cast(CS->getInstruction()); + // +1 because index 0 is reserved for return type alignment + if (!getAlign(*CallI, i + 1, align)) + align = DL.getABITypeAlignment(Ty); + unsigned sz = DL.getTypeAllocSize(Ty); + O << ".param .align " << align << " .b8 "; + O << "_"; + O << "[" << sz << "]"; + // update the index for Outs + SmallVector vtparts; + ComputeValueVTs(*this, DL, Ty, vtparts); + if (unsigned len = vtparts.size()) + OIdx += len - 1; + continue; + } + // i8 types in IR will be i16 types in SDAG + assert((getValueType(DL, Ty) == Outs[OIdx].VT || + (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) && + "type mismatch between callee prototype and arguments"); + // scalar type + unsigned sz = 0; + if (isa(Ty)) { + sz = cast(Ty)->getBitWidth(); + if (sz < 32) + sz = 32; + } else if (isa(Ty)) { + sz = PtrVT.getSizeInBits(); + } else if (Ty->isHalfTy()) + // PTX ABI requires all scalar parameters to be at least 32 + // bits in size. fp16 normally uses .b16 as its storage type + // in PTX, so its size must be adjusted here, too. + sz = 32; + else + sz = Ty->getPrimitiveSizeInBits(); + O << ".param .b" << sz << " "; + O << "_"; + continue; + } + auto *PTy = dyn_cast(Ty); + assert(PTy && "Param with byval attribute should be a pointer type"); + Type *ETy = PTy->getElementType(); + + unsigned align = Outs[OIdx].Flags.getByValAlign(); + unsigned sz = DL.getTypeAllocSize(ETy); + O << ".param .align " << align << " .b8 "; + O << "_"; + O << "[" << sz << "]"; + } + O << ");"; + return O.str(); +} + +unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee, + const ImmutableCallSite *CS, + Type *Ty, unsigned Idx, + const DataLayout &DL) const { + if (!CS) { + // CallSite is zero, fallback to ABI type alignment + return DL.getABITypeAlignment(Ty); + } + + unsigned Align = 0; + const Value *DirectCallee = CS->getCalledFunction(); + + if (!DirectCallee) { + // We don't have a direct function symbol, but that may be because of + // constant cast instructions in the call. + const Instruction *CalleeI = CS->getInstruction(); + assert(CalleeI && "Call target is not a function or derived value?"); + + // With bitcast'd call targets, the instruction will be the call + if (isa(CalleeI)) { + // Check if we have call alignment metadata + if (getAlign(*cast(CalleeI), Idx, Align)) + return Align; + + const Value *CalleeV = cast(CalleeI)->getCalledValue(); + // Ignore any bitcast instructions + while (isa(CalleeV)) { + const ConstantExpr *CE = cast(CalleeV); + if (!CE->isCast()) + break; + // Look through the bitcast + CalleeV = cast(CalleeV)->getOperand(0); + } + + // We have now looked past all of the bitcasts. Do we finally have a + // Function? + if (isa(CalleeV)) + DirectCallee = CalleeV; + } + } + + // Check for function alignment information if we found that the + // ultimate target is a Function + if (DirectCallee) + if (getAlign(*cast(DirectCallee), Idx, Align)) + return Align; + + // Call is indirect or alignment information is not available, fall back to + // the ABI type alignment + return DL.getABITypeAlignment(Ty); +} + +SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + ArgListTy &Args = CLI.getArgs(); + Type *RetTy = CLI.RetTy; + ImmutableCallSite *CS = CLI.CS; + const DataLayout &DL = DAG.getDataLayout(); + + bool isABI = (STI.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return Chain; + + SDValue tempChain = Chain; + Chain = DAG.getCALLSEQ_START(Chain, uniqueCallSite, 0, dl); + SDValue InFlag = Chain.getValue(1); + + unsigned paramCount = 0; + // Args.size() and Outs.size() need not match. + // Outs.size() will be larger + // * if there is an aggregate argument with multiple fields (each field + // showing up separately in Outs) + // * if there is a vector argument with more than typical vector-length + // elements (generally if more than 4) where each vector element is + // individually present in Outs. + // So a different index should be used for indexing into Outs/OutVals. + // See similar issue in LowerFormalArguments. + unsigned OIdx = 0; + // Declare the .params or .reg need to pass values + // to the function + for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) { + EVT VT = Outs[OIdx].VT; + Type *Ty = Args[i].Ty; + + if (!Outs[OIdx].Flags.isByVal()) { + SmallVector VTs; + SmallVector Offsets; + ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets); + unsigned ArgAlign = + getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL); + unsigned AllocSize = DL.getTypeAllocSize(Ty); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + bool NeedAlign; // Does argument declaration specify alignment? + if (Ty->isAggregateType() || Ty->isVectorTy()) { + // declare .param .align .b8 .param[]; + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(AllocSize, dl, MVT::i32), InFlag}; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps); + NeedAlign = true; + } else { + // declare .param .b .param; + if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) { + // PTX ABI requires integral types to be at least 32 bits in + // size. FP16 is loaded/stored using i16, so it's handled + // here as well. + AllocSize = 4; + } + SDValue DeclareScalarParamOps[] = { + Chain, DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(AllocSize * 8, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag}; + Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, + DeclareScalarParamOps); + NeedAlign = false; + } + InFlag = Chain.getValue(1); + + // PTX Interoperability Guide 3.3(A): [Integer] Values shorter + // than 32-bits are sign extended or zero extended, depending on + // whether they are signed or unsigned types. This case applies + // only to scalar parameters and not to aggregate values. + bool ExtendIntegerParam = + Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32; + + auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); + SmallVector StoreOperands; + for (unsigned j = 0, je = VTs.size(); j != je; ++j) { + // New store. + if (VectorInfo[j] & PVF_FIRST) { + assert(StoreOperands.empty() && "Unfinished preceeding store."); + StoreOperands.push_back(Chain); + StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32)); + StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32)); + } + + EVT EltVT = VTs[j]; + SDValue StVal = OutVals[OIdx]; + if (ExtendIntegerParam) { + assert(VTs.size() == 1 && "Scalar can't have multiple parts."); + // zext/sext to i32 + StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND, + dl, MVT::i32, StVal); + } else if (EltVT.getSizeInBits() < 16) { + // Use 16-bit registers for small stores as it's the + // smallest general purpose register size supported by NVPTX. + StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); + } + + // Record the value to store. + StoreOperands.push_back(StVal); + + if (VectorInfo[j] & PVF_LAST) { + unsigned NumElts = StoreOperands.size() - 3; + NVPTXISD::NodeType Op; + switch (NumElts) { + case 1: + Op = NVPTXISD::StoreParam; + break; + case 2: + Op = NVPTXISD::StoreParamV2; + break; + case 4: + Op = NVPTXISD::StoreParamV4; + break; + default: + llvm_unreachable("Invalid vector info."); + } + + StoreOperands.push_back(InFlag); + + // Adjust type of the store op if we've extended the scalar + // return value. + EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j]; + unsigned EltAlign = + NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0; + + Chain = DAG.getMemIntrinsicNode( + Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands, + TheStoreType, MachinePointerInfo(), EltAlign, + /* Volatile */ false, /* ReadMem */ false, + /* WriteMem */ true, /* Size */ 0); + InFlag = Chain.getValue(1); + + // Cleanup. + StoreOperands.clear(); + } + ++OIdx; + } + assert(StoreOperands.empty() && "Unfinished parameter store."); + if (VTs.size() > 0) + --OIdx; + ++paramCount; + continue; + } + + // ByVal arguments + SmallVector VTs; + SmallVector Offsets; + auto *PTy = dyn_cast(Args[i].Ty); + assert(PTy && "Type of a byval parameter should be pointer"); + ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0); + + // declare .param .align .b8 .param[]; + unsigned sz = Outs[OIdx].Flags.getByValSize(); + SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); + // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, + // so we don't need to worry about natural alignment or not. + // See TargetLowering::LowerCallTo(). + + // Enforce minumum alignment of 4 to work around ptxas miscompile + // for sm_50+. See corresponding alignment adjustment in + // emitFunctionParamList() for details. + if (ArgAlign < 4) + ArgAlign = 4; + SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32), + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(sz, dl, MVT::i32), InFlag}; + Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, + DeclareParamOps); + InFlag = Chain.getValue(1); + for (unsigned j = 0, je = VTs.size(); j != je; ++j) { + EVT elemtype = VTs[j]; + int curOffset = Offsets[j]; + unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); + auto PtrVT = getPointerTy(DL); + SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx], + DAG.getConstant(curOffset, dl, PtrVT)); + SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), PartAlign); + if (elemtype.getSizeInBits() < 16) { + theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); + } + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, dl, MVT::i32), + DAG.getConstant(curOffset, dl, MVT::i32), + theVal, InFlag }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, + CopyParamOps, elemtype, + MachinePointerInfo(), /* Align */ 0, + /* Volatile */ false, /* ReadMem */ false, + /* WriteMem */ true, /* Size */ 0); + + InFlag = Chain.getValue(1); + } + ++paramCount; + } + + GlobalAddressSDNode *Func = dyn_cast(Callee.getNode()); + unsigned retAlignment = 0; + + // Handle Result + if (Ins.size() > 0) { + SmallVector resvtparts; + ComputeValueVTs(*this, DL, RetTy, resvtparts); + + // Declare + // .param .align 16 .b8 retval0[], or + // .param .b retval0 + unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy); + // Emit ".param .b retval0" instead of byte arrays only for + // these three types to match the logic in + // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. + // Plus, this behavior is consistent with nvcc's. + if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() || + RetTy->isPointerTy()) { + // Scalar needs to be at least 32bit wide + if (resultsz < 32) + resultsz = 32; + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(resultsz, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, + DeclareRetOps); + InFlag = Chain.getValue(1); + } else { + retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue DeclareRetOps[] = { Chain, + DAG.getConstant(retAlignment, dl, MVT::i32), + DAG.getConstant(resultsz / 8, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), InFlag }; + Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, + DeclareRetOps); + InFlag = Chain.getValue(1); + } + } + + if (!Func) { + // This is indirect function call case : PTX requires a prototype of the + // form + // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _); + // to be emitted, and the label has to used as the last arg of call + // instruction. + // The prototype is embedded in a string and put as the operand for a + // CallPrototype SDNode which will print out to the value of the string. + SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue); + std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS); + const char *ProtoStr = + nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str(); + SDValue ProtoOps[] = { + Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, + }; + Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); + InFlag = Chain.getValue(1); + } + // Op to just print "call" + SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue PrintCallOps[] = { + Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag + }; + // We model convergent calls as separate opcodes. + unsigned Opcode = Func ? NVPTXISD::PrintCallUni : NVPTXISD::PrintCall; + if (CLI.IsConvergent) + Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni + : NVPTXISD::PrintConvergentCall; + Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps); + InFlag = Chain.getValue(1); + + // Ops to print out the function name + SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallVoidOps[] = { Chain, Callee, InFlag }; + Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); + InFlag = Chain.getValue(1); + + // Ops to print out the param list + SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgBeginOps[] = { Chain, InFlag }; + Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, + CallArgBeginOps); + InFlag = Chain.getValue(1); + + for (unsigned i = 0, e = paramCount; i != e; ++i) { + unsigned opcode; + if (i == (e - 1)) + opcode = NVPTXISD::LastCallArg; + else + opcode = NVPTXISD::CallArg; + SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(i, dl, MVT::i32), InFlag }; + Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); + InFlag = Chain.getValue(1); + } + SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CallArgEndOps[] = { Chain, + DAG.getConstant(Func ? 1 : 0, dl, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); + InFlag = Chain.getValue(1); + + if (!Func) { + SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue PrototypeOps[] = { Chain, + DAG.getConstant(uniqueCallSite, dl, MVT::i32), + InFlag }; + Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); + InFlag = Chain.getValue(1); + } + + // Generate loads from param memory/moves from registers for result + if (Ins.size() > 0) { + SmallVector VTs; + SmallVector Offsets; + ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); + assert(VTs.size() == Ins.size() && "Bad value decomposition"); + + unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL); + auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); + + SmallVector LoadVTs; + int VecIdx = -1; // Index of the first element of the vector. + + // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than + // 32-bits are sign extended or zero extended, depending on whether + // they are signed or unsigned types. + bool ExtendIntegerRetVal = + RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; + + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + bool needTruncate = false; + EVT TheLoadType = VTs[i]; + EVT EltType = Ins[i].VT; + unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]); + if (ExtendIntegerRetVal) { + TheLoadType = MVT::i32; + EltType = MVT::i32; + needTruncate = true; + } else if (TheLoadType.getSizeInBits() < 16) { + if (VTs[i].isInteger()) + needTruncate = true; + EltType = MVT::i16; + } + + // Record index of the very first element of the vector. + if (VectorInfo[i] & PVF_FIRST) { + assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list."); + VecIdx = i; + } + + LoadVTs.push_back(EltType); + + if (VectorInfo[i] & PVF_LAST) { + unsigned NumElts = LoadVTs.size(); + LoadVTs.push_back(MVT::Other); + LoadVTs.push_back(MVT::Glue); + NVPTXISD::NodeType Op; + switch (NumElts) { + case 1: + Op = NVPTXISD::LoadParam; + break; + case 2: + Op = NVPTXISD::LoadParamV2; + break; + case 4: + Op = NVPTXISD::LoadParamV4; + break; + default: + llvm_unreachable("Invalid vector info."); + } + + SDValue LoadOperands[] = { + Chain, DAG.getConstant(1, dl, MVT::i32), + DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag}; + SDValue RetVal = DAG.getMemIntrinsicNode( + Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType, + MachinePointerInfo(), EltAlign, /* Volatile */ false, + /* ReadMem */ true, /* WriteMem */ false, /* Size */ 0); + + for (unsigned j = 0; j < NumElts; ++j) { + SDValue Ret = RetVal.getValue(j); + if (needTruncate) + Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret); + InVals.push_back(Ret); + } + Chain = RetVal.getValue(NumElts); + InFlag = RetVal.getValue(NumElts + 1); + + // Cleanup + VecIdx = -1; + LoadVTs.clear(); + } + } + } + + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getIntPtrConstant(uniqueCallSite, dl, true), + DAG.getIntPtrConstant(uniqueCallSite + 1, dl, + true), + InFlag, dl); + uniqueCallSite++; + + // set isTailCall to false for now, until we figure out how to express + // tail call optimization in PTX + isTailCall = false; + return Chain; +} + +// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() +// (see LegalizeDAG.cpp). This is slow and uses local memory. +// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 +SDValue +NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + SDLoc dl(Node); + SmallVector Ops; + unsigned NumOperands = Node->getNumOperands(); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue SubOp = Node->getOperand(i); + EVT VVT = SubOp.getNode()->getValueType(0); + EVT EltVT = VVT.getVectorElementType(); + unsigned NumSubElem = VVT.getVectorNumElements(); + for (unsigned j = 0; j < NumSubElem; ++j) { + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, + DAG.getIntPtrConstant(j, dl))); + } + } + return DAG.getBuildVector(Node->getValueType(0), dl, Ops); +} + +// We can init constant f16x2 with a single .b32 move. Normally it +// would get lowered as two constant loads and vector-packing move. +// mov.b16 %h1, 0x4000; +// mov.b16 %h2, 0x3C00; +// mov.b32 %hh2, {%h2, %h1}; +// Instead we want just a constant move: +// mov.b32 %hh2, 0x40003C00 +// +// This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0 +// generates good SASS in both cases. +SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + //return Op; + if (!(Op->getValueType(0) == MVT::v2f16 && + isa(Op->getOperand(0)) && + isa(Op->getOperand(1)))) + return Op; + + APInt E0 = + cast(Op->getOperand(0))->getValueAPF().bitcastToAPInt(); + APInt E1 = + cast(Op->getOperand(1))->getValueAPF().bitcastToAPInt(); + SDValue Const = + DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32); + return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const); +} + +SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Index = Op->getOperand(1); + // Constant index will be matched by tablegen. + if (isa(Index.getNode())) + return Op; + + // Extract individual elements and select one of them. + SDValue Vector = Op->getOperand(0); + EVT VectorVT = Vector.getValueType(); + assert(VectorVT == MVT::v2f16 && "Unexpected vector type."); + EVT EltVT = VectorVT.getVectorElementType(); + + SDLoc dl(Op.getNode()); + SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, + DAG.getIntPtrConstant(0, dl)); + SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector, + DAG.getIntPtrConstant(1, dl)); + return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1, + ISD::CondCode::SETEQ); +} + +/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which +/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift +/// amount, or +/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift +/// amount. +SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); + + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; + + if (VTBits == 32 && STI.getSmVersion() >= 35) { + // For 32bit and sm35, we can use the funnel shift 'shf' instruction. + // {dHi, dLo} = {aHi, aLo} >> Amt + // dHi = aHi >> Amt + // dLo = shf.r.clamp aLo, aHi, Amt + + SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, + ShAmt); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } + else { + // {dHi, dLo} = {aHi, aLo} >> Amt + // - if (Amt>=size) then + // dLo = aHi >> (Amt-size) + // dHi = aHi >> Amt (this is either all 0 or all 1) + // else + // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) + // dHi = aHi >> Amt + + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(VTBits, dl, MVT::i32), + ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, + DAG.getConstant(VTBits, dl, MVT::i32)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + + SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, + DAG.getConstant(VTBits, dl, MVT::i32), + ISD::SETGE); + SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } +} + +/// LowerShiftLeftParts - Lower SHL_PARTS, which +/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift +/// amount, or +/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift +/// amount. +SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + assert(Op.getOpcode() == ISD::SHL_PARTS); + + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + + if (VTBits == 32 && STI.getSmVersion() >= 35) { + // For 32bit and sm35, we can use the funnel shift 'shf' instruction. + // {dHi, dLo} = {aHi, aLo} << Amt + // dHi = shf.l.clamp aLo, aHi, Amt + // dLo = aLo << Amt + + SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, + ShAmt); + SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } + else { + // {dHi, dLo} = {aHi, aLo} << Amt + // - if (Amt>=size) then + // dLo = aLo << Amt (all 0) + // dLo = aLo << (Amt-size) + // else + // dLo = aLo << Amt + // dHi = (aHi << Amt) | (aLo >> (size-Amt)) + + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(VTBits, dl, MVT::i32), + ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, + DAG.getConstant(VTBits, dl, MVT::i32)); + SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + + SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, + DAG.getConstant(VTBits, dl, MVT::i32), + ISD::SETGE); + SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } +} + +SDValue +NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::RETURNADDR: + return SDValue(); + case ISD::FRAMEADDR: + return SDValue(); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return Op; + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: + return Op; + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + case ISD::LOAD: + return LowerLOAD(Op, DAG); + case ISD::SHL_PARTS: + return LowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + return LowerShiftRightParts(Op, DAG); + case ISD::SELECT: + return LowerSelect(Op, DAG); + default: + llvm_unreachable("Custom lowering not defined for operation"); + } +} + +SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const { + SDValue Op0 = Op->getOperand(0); + SDValue Op1 = Op->getOperand(1); + SDValue Op2 = Op->getOperand(2); + SDLoc DL(Op.getNode()); + + assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1"); + + Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); + Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); + SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select); + + return Trunc; +} + +SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() == MVT::i1) + return LowerLOADi1(Op, DAG); + + // v2f16 is legal, so we can't rely on legalizer to handle unaligned + // loads and have to handle it here. + if (Op.getValueType() == MVT::v2f16) { + LoadSDNode *Load = cast(Op); + EVT MemVT = Load->getMemoryVT(); + if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + Load->getAddressSpace(), Load->getAlignment())) { + SDValue Ops[2]; + std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); + return DAG.getMergeValues(Ops, SDLoc(Op)); + } + } + + return SDValue(); +} + +// v = ld i1* addr +// => +// v1 = ld i8* addr (-> i16) +// v = trunc i16 to i1 +SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + LoadSDNode *LD = cast(Node); + SDLoc dl(Node); + assert(LD->getExtensionType() == ISD::NON_EXTLOAD); + assert(Node->getValueType(0) == MVT::i1 && + "Custom lowering for i1 load only"); + SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LD->getAlignment(), + LD->getMemOperand()->getFlags()); + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); + // The legalizer (the caller) is expecting two values from the legalized + // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() + // in LegalizeDAG.cpp which also uses MergeValues. + SDValue Ops[] = { result, LD->getChain() }; + return DAG.getMergeValues(Ops, dl); +} + +SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + StoreSDNode *Store = cast(Op); + EVT VT = Store->getMemoryVT(); + + if (VT == MVT::i1) + return LowerSTOREi1(Op, DAG); + + // v2f16 is legal, so we can't rely on legalizer to handle unaligned + // stores and have to handle it here. + if (VT == MVT::v2f16 && + !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + Store->getAddressSpace(), Store->getAlignment())) + return expandUnalignedStore(Store, DAG); + + if (VT.isVector()) + return LowerSTOREVector(Op, DAG); + + return SDValue(); +} + +SDValue +NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDValue Val = N->getOperand(1); + SDLoc DL(N); + EVT ValVT = Val.getValueType(); + + if (ValVT.isVector()) { + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 stores of <2 x double> here + // but I'm leaving that as a TODO for now. + if (!ValVT.isSimple()) + return SDValue(); + switch (ValVT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f16: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f16: + case MVT::v4f32: + case MVT::v8f16: // <4 x f16x2> + // This is a "native" vector type + break; + } + + MemSDNode *MemSD = cast(N); + const DataLayout &TD = DAG.getDataLayout(); + + unsigned Align = MemSD->getAlignment(); + unsigned PrefAlign = + TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext())); + if (Align < PrefAlign) { + // This store is not sufficiently aligned, so bail out and let this vector + // store be scalarized. Note that we may still be able to emit smaller + // vector stores. For example, if we are storing a <4 x float> with an + // alignment of 8, this check will fail but the legalizer will try again + // with 2 x <2 x float>, which will succeed with an alignment of 8. + return SDValue(); + } + + unsigned Opcode = 0; + EVT EltVT = ValVT.getVectorElementType(); + unsigned NumElts = ValVT.getVectorNumElements(); + + // Since StoreV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // stored type to i16 and propagate the "real" type as the memory type. + bool NeedExt = false; + if (EltVT.getSizeInBits() < 16) + NeedExt = true; + + bool StoreF16x2 = false; + switch (NumElts) { + default: + return SDValue(); + case 2: + Opcode = NVPTXISD::StoreV2; + break; + case 4: + Opcode = NVPTXISD::StoreV4; + break; + case 8: + // v8f16 is a special case. PTX doesn't have st.v8.f16 + // instruction. Instead, we split the vector into v2f16 chunks and + // store them with st.v4.b32. + assert(EltVT == MVT::f16 && "Wrong type for the vector."); + Opcode = NVPTXISD::StoreV4; + StoreF16x2 = true; + break; + } + + SmallVector Ops; + + // First is the chain + Ops.push_back(N->getOperand(0)); + + if (StoreF16x2) { + // Combine f16,f16 -> v2f16 + NumElts /= 2; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, + DAG.getIntPtrConstant(i * 2, DL)); + SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val, + DAG.getIntPtrConstant(i * 2 + 1, DL)); + SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1); + Ops.push_back(V2); + } + } else { + // Then the split values + for (unsigned i = 0; i < NumElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val, + DAG.getIntPtrConstant(i, DL)); + if (NeedExt) + ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal); + Ops.push_back(ExtVal); + } + } + + // Then any remaining arguments + Ops.append(N->op_begin() + 2, N->op_end()); + + SDValue NewSt = + DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops, + MemSD->getMemoryVT(), MemSD->getMemOperand()); + + // return DCI.CombineTo(N, NewSt, true); + return NewSt; + } + + return SDValue(); +} + +// st i1 v, addr +// => +// v1 = zxt v to i16 +// st.u8 i16, addr +SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + SDLoc dl(Node); + StoreSDNode *ST = cast(Node); + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3 = ST->getValue(); + assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Tmp3); + SDValue Result = + DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), MVT::i8, + ST->getAlignment(), ST->getMemOperand()->getFlags()); + return Result; +} + +SDValue +NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { + std::string ParamSym; + raw_string_ostream ParamStr(ParamSym); + + ParamStr << DAG.getMachineFunction().getName() << "_param_" << idx; + ParamStr.flush(); + + std::string *SavedStr = + nvTM->getManagedStrPool()->getManagedString(ParamSym.c_str()); + return DAG.getTargetExternalSymbol(SavedStr->c_str(), v); +} + +// Check to see if the kernel argument is image*_t or sampler_t + +static bool isImageOrSamplerVal(const Value *arg, const Module *context) { + static const char *const specialTypes[] = { "struct._image2d_t", + "struct._image3d_t", + "struct._sampler_t" }; + + Type *Ty = arg->getType(); + auto *PTy = dyn_cast(Ty); + + if (!PTy) + return false; + + if (!context) + return false; + + auto *STy = dyn_cast(PTy->getElementType()); + if (!STy || STy->isLiteral()) + return false; + + return std::find(std::begin(specialTypes), std::end(specialTypes), + STy->getName()) != std::end(specialTypes); +} + +SDValue NVPTXTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + const DataLayout &DL = DAG.getDataLayout(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + const Function *F = MF.getFunction(); + const AttributeList &PAL = F->getAttributes(); + const TargetLowering *TLI = STI.getTargetLowering(); + + SDValue Root = DAG.getRoot(); + std::vector OutChains; + + bool isABI = (STI.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return Chain; + + std::vector argTypes; + std::vector theArgs; + for (const Argument &I : F->args()) { + theArgs.push_back(&I); + argTypes.push_back(I.getType()); + } + // argTypes.size() (or theArgs.size()) and Ins.size() need not match. + // Ins.size() will be larger + // * if there is an aggregate argument with multiple fields (each field + // showing up separately in Ins) + // * if there is a vector argument with more than typical vector-length + // elements (generally if more than 4) where each vector element is + // individually present in Ins. + // So a different index should be used for indexing into Ins. + // See similar issue in LowerCall. + unsigned InsIdx = 0; + + int idx = 0; + for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) { + Type *Ty = argTypes[i]; + + // If the kernel argument is image*_t or sampler_t, convert it to + // a i32 constant holding the parameter position. This can later + // matched in the AsmPrinter to output the correct mangled name. + if (isImageOrSamplerVal( + theArgs[i], + (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() + : nullptr))) { + assert(isKernelFunction(*F) && + "Only kernels can have image/sampler params"); + InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32)); + continue; + } + + if (theArgs[i]->use_empty()) { + // argument is dead + if (Ty->isAggregateType()) { + SmallVector vtparts; + + ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts); + assert(vtparts.size() > 0 && "empty aggregate type not expected"); + for (unsigned parti = 0, parte = vtparts.size(); parti != parte; + ++parti) { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); + ++InsIdx; + } + if (vtparts.size() > 0) + --InsIdx; + continue; + } + if (Ty->isVectorTy()) { + EVT ObjectVT = getValueType(DL, Ty); + unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT); + for (unsigned parti = 0; parti < NumRegs; ++parti) { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); + ++InsIdx; + } + if (NumRegs > 0) + --InsIdx; + continue; + } + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); + continue; + } + + // In the following cases, assign a node order of "idx+1" + // to newly created nodes. The SDNodes for params have to + // appear in the same order as their order of appearance + // in the original function. "idx+1" holds that order. + if (!PAL.hasParamAttribute(i, Attribute::ByVal)) { + bool aggregateIsPacked = false; + if (StructType *STy = dyn_cast(Ty)) + aggregateIsPacked = STy->isPacked(); + + SmallVector VTs; + SmallVector Offsets; + ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); + assert(VTs.size() > 0 && "Unexpected empty type."); + auto VectorInfo = + VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty)); + + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); + int VecIdx = -1; // Index of the first element of the current vector. + for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) { + if (VectorInfo[parti] & PVF_FIRST) { + assert(VecIdx == -1 && "Orphaned vector."); + VecIdx = parti; + } + + // That's the last element of this store op. + if (VectorInfo[parti] & PVF_LAST) { + unsigned NumElts = parti - VecIdx + 1; + EVT EltVT = VTs[parti]; + // i1 is loaded/stored as i8. + EVT LoadVT = EltVT; + if (EltVT == MVT::i1) + LoadVT = MVT::i8; + else if (EltVT == MVT::v2f16) + // getLoad needs a vector type, but it can't handle + // vectors which contain v2f16 elements. So we must load + // using i32 here and then bitcast back. + LoadVT = MVT::i32; + + EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts); + SDValue VecAddr = + DAG.getNode(ISD::ADD, dl, PtrVT, Arg, + DAG.getConstant(Offsets[VecIdx], dl, PtrVT)); + Value *srcValue = Constant::getNullValue(PointerType::get( + EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM)); + SDValue P = + DAG.getLoad(VecVT, dl, Root, VecAddr, + MachinePointerInfo(srcValue), aggregateIsPacked, + MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant); + if (P.getNode()) + P.getNode()->setIROrder(idx + 1); + for (unsigned j = 0; j < NumElts; ++j) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P, + DAG.getIntPtrConstant(j, dl)); + // We've loaded i1 as an i8 and now must truncate it back to i1 + if (EltVT == MVT::i1) + Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt); + // v2f16 was loaded as an i32. Now we must bitcast it back. + else if (EltVT == MVT::v2f16) + Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); + // Extend the element if necesary (e.g. an i8 is loaded + // into an i16 register) + if (Ins[InsIdx].VT.isInteger() && + Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) { + unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt); + } + InVals.push_back(Elt); + } + + // Reset vector tracking state. + VecIdx = -1; + } + ++InsIdx; + } + if (VTs.size() > 0) + --InsIdx; + continue; + } + + // Param has ByVal attribute + // Return MoveParam(param symbol). + // Ideally, the param symbol can be returned directly, + // but when SDNode builder decides to use it in a CopyToReg(), + // machine instruction fails because TargetExternalSymbol + // (not lowered) is target dependent, and CopyToReg assumes + // the source is lowered. + EVT ObjectVT = getValueType(DL, Ty); + assert(ObjectVT == Ins[InsIdx].VT && + "Ins type did not match function type"); + SDValue Arg = getParamSymbol(DAG, idx, PtrVT); + SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); + if (p.getNode()) + p.getNode()->setIROrder(idx + 1); + InVals.push_back(p); + } + + // Clang will check explicit VarArg and issue error if any. However, Clang + // will let code with + // implicit var arg like f() pass. See bug 617733. + // We treat this case as if the arg list is empty. + // if (F.isVarArg()) { + // assert(0 && "VarArg not supported yet!"); + //} + + if (!OutChains.empty()) + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); + + return Chain; +} + +SDValue +NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &dl, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + Type *RetTy = MF.getFunction()->getReturnType(); + + bool isABI = (STI.getSmVersion() >= 20); + assert(isABI && "Non-ABI compilation is not supported"); + if (!isABI) + return Chain; + + const DataLayout DL = DAG.getDataLayout(); + SmallVector VTs; + SmallVector Offsets; + ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); + assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); + + auto VectorInfo = VectorizePTXValueVTs( + VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1); + + // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than + // 32-bits are sign extended or zero extended, depending on whether + // they are signed or unsigned types. + bool ExtendIntegerRetVal = + RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; + + SmallVector StoreOperands; + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + // New load/store. Record chain and offset operands. + if (VectorInfo[i] & PVF_FIRST) { + assert(StoreOperands.empty() && "Orphaned operand list."); + StoreOperands.push_back(Chain); + StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32)); + } + + SDValue RetVal = OutVals[i]; + if (ExtendIntegerRetVal) { + RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND, + dl, MVT::i32, RetVal); + } else if (RetVal.getValueSizeInBits() < 16) { + // Use 16-bit registers for small load-stores as it's the + // smallest general purpose register size supported by NVPTX. + RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); + } + + // Record the value to return. + StoreOperands.push_back(RetVal); + + // That's the last element of this store op. + if (VectorInfo[i] & PVF_LAST) { + NVPTXISD::NodeType Op; + unsigned NumElts = StoreOperands.size() - 2; + switch (NumElts) { + case 1: + Op = NVPTXISD::StoreRetval; + break; + case 2: + Op = NVPTXISD::StoreRetvalV2; + break; + case 4: + Op = NVPTXISD::StoreRetvalV4; + break; + default: + llvm_unreachable("Invalid vector info."); + } + + // Adjust type of load/store op if we've extended the scalar + // return value. + EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i]; + Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other), + StoreOperands, TheStoreType, + MachinePointerInfo(), /* Align */ 1, + /* Volatile */ false, /* ReadMem */ false, + /* WriteMem */ true, /* Size */ 0); + // Cleanup vector state. + StoreOperands.clear(); + } + } + + return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); +} + +void NVPTXTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector &Ops, + SelectionDAG &DAG) const { + if (Constraint.length() > 1) + return; + else + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +static unsigned getOpcForTextureInstr(unsigned Intrinsic) { + switch (Intrinsic) { + default: + return 0; + + case Intrinsic::nvvm_tex_1d_v4f32_s32: + return NVPTXISD::Tex1DFloatS32; + case Intrinsic::nvvm_tex_1d_v4f32_f32: + return NVPTXISD::Tex1DFloatFloat; + case Intrinsic::nvvm_tex_1d_level_v4f32_f32: + return NVPTXISD::Tex1DFloatFloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: + return NVPTXISD::Tex1DFloatFloatGrad; + case Intrinsic::nvvm_tex_1d_v4s32_s32: + return NVPTXISD::Tex1DS32S32; + case Intrinsic::nvvm_tex_1d_v4s32_f32: + return NVPTXISD::Tex1DS32Float; + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + return NVPTXISD::Tex1DS32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + return NVPTXISD::Tex1DS32FloatGrad; + case Intrinsic::nvvm_tex_1d_v4u32_s32: + return NVPTXISD::Tex1DU32S32; + case Intrinsic::nvvm_tex_1d_v4u32_f32: + return NVPTXISD::Tex1DU32Float; + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + return NVPTXISD::Tex1DU32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + return NVPTXISD::Tex1DU32FloatGrad; + + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: + return NVPTXISD::Tex1DArrayFloatS32; + case Intrinsic::nvvm_tex_1d_array_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloat; + case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + return NVPTXISD::Tex1DArrayS32S32; + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + return NVPTXISD::Tex1DArrayS32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + return NVPTXISD::Tex1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + return NVPTXISD::Tex1DArrayU32S32; + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + return NVPTXISD::Tex1DArrayU32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + return NVPTXISD::Tex1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_v4f32_s32: + return NVPTXISD::Tex2DFloatS32; + case Intrinsic::nvvm_tex_2d_v4f32_f32: + return NVPTXISD::Tex2DFloatFloat; + case Intrinsic::nvvm_tex_2d_level_v4f32_f32: + return NVPTXISD::Tex2DFloatFloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: + return NVPTXISD::Tex2DFloatFloatGrad; + case Intrinsic::nvvm_tex_2d_v4s32_s32: + return NVPTXISD::Tex2DS32S32; + case Intrinsic::nvvm_tex_2d_v4s32_f32: + return NVPTXISD::Tex2DS32Float; + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + return NVPTXISD::Tex2DS32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + return NVPTXISD::Tex2DS32FloatGrad; + case Intrinsic::nvvm_tex_2d_v4u32_s32: + return NVPTXISD::Tex2DU32S32; + case Intrinsic::nvvm_tex_2d_v4u32_f32: + return NVPTXISD::Tex2DU32Float; + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + return NVPTXISD::Tex2DU32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + return NVPTXISD::Tex2DU32FloatGrad; + + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: + return NVPTXISD::Tex2DArrayFloatS32; + case Intrinsic::nvvm_tex_2d_array_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloat; + case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + return NVPTXISD::Tex2DArrayS32S32; + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + return NVPTXISD::Tex2DArrayS32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + return NVPTXISD::Tex2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + return NVPTXISD::Tex2DArrayU32S32; + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + return NVPTXISD::Tex2DArrayU32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + return NVPTXISD::Tex2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_3d_v4f32_s32: + return NVPTXISD::Tex3DFloatS32; + case Intrinsic::nvvm_tex_3d_v4f32_f32: + return NVPTXISD::Tex3DFloatFloat; + case Intrinsic::nvvm_tex_3d_level_v4f32_f32: + return NVPTXISD::Tex3DFloatFloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: + return NVPTXISD::Tex3DFloatFloatGrad; + case Intrinsic::nvvm_tex_3d_v4s32_s32: + return NVPTXISD::Tex3DS32S32; + case Intrinsic::nvvm_tex_3d_v4s32_f32: + return NVPTXISD::Tex3DS32Float; + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + return NVPTXISD::Tex3DS32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + return NVPTXISD::Tex3DS32FloatGrad; + case Intrinsic::nvvm_tex_3d_v4u32_s32: + return NVPTXISD::Tex3DU32S32; + case Intrinsic::nvvm_tex_3d_v4u32_f32: + return NVPTXISD::Tex3DU32Float; + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + return NVPTXISD::Tex3DU32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + return NVPTXISD::Tex3DU32FloatGrad; + + case Intrinsic::nvvm_tex_cube_v4f32_f32: + return NVPTXISD::TexCubeFloatFloat; + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + return NVPTXISD::TexCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_v4s32_f32: + return NVPTXISD::TexCubeS32Float; + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + return NVPTXISD::TexCubeS32FloatLevel; + case Intrinsic::nvvm_tex_cube_v4u32_f32: + return NVPTXISD::TexCubeU32Float; + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + return NVPTXISD::TexCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + return NVPTXISD::TexCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + return NVPTXISD::TexCubeArrayS32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + return NVPTXISD::TexCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + return NVPTXISD::TexCubeArrayU32Float; + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + return NVPTXISD::TexCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + return NVPTXISD::Tld4R2DFloatFloat; + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + return NVPTXISD::Tld4G2DFloatFloat; + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + return NVPTXISD::Tld4B2DFloatFloat; + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + return NVPTXISD::Tld4A2DFloatFloat; + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + return NVPTXISD::Tld4R2DS64Float; + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + return NVPTXISD::Tld4G2DS64Float; + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + return NVPTXISD::Tld4B2DS64Float; + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + return NVPTXISD::Tld4A2DS64Float; + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + return NVPTXISD::Tld4R2DU64Float; + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + return NVPTXISD::Tld4G2DU64Float; + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + return NVPTXISD::Tld4B2DU64Float; + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + return NVPTXISD::Tld4A2DU64Float; + + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + return NVPTXISD::TexUnified1DFloatS32; + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + return NVPTXISD::TexUnified1DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + return NVPTXISD::TexUnified1DS32S32; + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + return NVPTXISD::TexUnified1DS32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + return NVPTXISD::TexUnified1DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + return NVPTXISD::TexUnified1DU32S32; + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + return NVPTXISD::TexUnified1DU32Float; + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + return NVPTXISD::TexUnified1DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + return NVPTXISD::TexUnified1DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified1DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + return NVPTXISD::TexUnified1DArrayS32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified1DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + return NVPTXISD::TexUnified1DArrayU32S32; + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32Float; + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified1DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + return NVPTXISD::TexUnified2DFloatS32; + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + return NVPTXISD::TexUnified2DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + return NVPTXISD::TexUnified2DS32S32; + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + return NVPTXISD::TexUnified2DS32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + return NVPTXISD::TexUnified2DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + return NVPTXISD::TexUnified2DU32S32; + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + return NVPTXISD::TexUnified2DU32Float; + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + return NVPTXISD::TexUnified2DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + return NVPTXISD::TexUnified2DArrayFloatS32; + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + return NVPTXISD::TexUnified2DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + return NVPTXISD::TexUnified2DArrayS32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + return NVPTXISD::TexUnified2DArrayS32FloatGrad; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + return NVPTXISD::TexUnified2DArrayU32S32; + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32Float; + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatLevel; + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + return NVPTXISD::TexUnified2DArrayU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + return NVPTXISD::TexUnified3DFloatS32; + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloat; + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + return NVPTXISD::TexUnified3DFloatFloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + return NVPTXISD::TexUnified3DS32S32; + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + return NVPTXISD::TexUnified3DS32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + return NVPTXISD::TexUnified3DS32FloatGrad; + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + return NVPTXISD::TexUnified3DU32S32; + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + return NVPTXISD::TexUnified3DU32Float; + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatLevel; + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + return NVPTXISD::TexUnified3DU32FloatGrad; + + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32Float; + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeU32FloatLevel; + + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloat; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + return NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + return NVPTXISD::TexUnifiedCubeArrayS32FloatLevel; + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32Float; + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + return NVPTXISD::TexUnifiedCubeArrayU32FloatLevel; + + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedR2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedG2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedB2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: + return NVPTXISD::Tld4UnifiedA2DFloatFloat; + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedR2DS64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedG2DS64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedB2DS64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + return NVPTXISD::Tld4UnifiedA2DS64Float; + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedR2DU64Float; + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedG2DU64Float; + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedB2DU64Float; + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: + return NVPTXISD::Tld4UnifiedA2DU64Float; + } +} + +static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { + switch (Intrinsic) { + default: + return 0; + case Intrinsic::nvvm_suld_1d_i8_clamp: + return NVPTXISD::Suld1DI8Clamp; + case Intrinsic::nvvm_suld_1d_i16_clamp: + return NVPTXISD::Suld1DI16Clamp; + case Intrinsic::nvvm_suld_1d_i32_clamp: + return NVPTXISD::Suld1DI32Clamp; + case Intrinsic::nvvm_suld_1d_i64_clamp: + return NVPTXISD::Suld1DI64Clamp; + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + return NVPTXISD::Suld1DV2I8Clamp; + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + return NVPTXISD::Suld1DV2I16Clamp; + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + return NVPTXISD::Suld1DV2I32Clamp; + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + return NVPTXISD::Suld1DV2I64Clamp; + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + return NVPTXISD::Suld1DV4I8Clamp; + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + return NVPTXISD::Suld1DV4I16Clamp; + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + return NVPTXISD::Suld1DV4I32Clamp; + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + return NVPTXISD::Suld1DArrayI8Clamp; + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + return NVPTXISD::Suld1DArrayI16Clamp; + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + return NVPTXISD::Suld1DArrayI32Clamp; + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + return NVPTXISD::Suld1DArrayI64Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + return NVPTXISD::Suld1DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + return NVPTXISD::Suld1DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + return NVPTXISD::Suld1DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + return NVPTXISD::Suld1DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + return NVPTXISD::Suld1DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + return NVPTXISD::Suld1DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + return NVPTXISD::Suld1DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_2d_i8_clamp: + return NVPTXISD::Suld2DI8Clamp; + case Intrinsic::nvvm_suld_2d_i16_clamp: + return NVPTXISD::Suld2DI16Clamp; + case Intrinsic::nvvm_suld_2d_i32_clamp: + return NVPTXISD::Suld2DI32Clamp; + case Intrinsic::nvvm_suld_2d_i64_clamp: + return NVPTXISD::Suld2DI64Clamp; + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + return NVPTXISD::Suld2DV2I8Clamp; + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + return NVPTXISD::Suld2DV2I16Clamp; + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + return NVPTXISD::Suld2DV2I32Clamp; + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + return NVPTXISD::Suld2DV2I64Clamp; + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + return NVPTXISD::Suld2DV4I8Clamp; + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + return NVPTXISD::Suld2DV4I16Clamp; + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + return NVPTXISD::Suld2DV4I32Clamp; + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + return NVPTXISD::Suld2DArrayI8Clamp; + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + return NVPTXISD::Suld2DArrayI16Clamp; + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + return NVPTXISD::Suld2DArrayI32Clamp; + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + return NVPTXISD::Suld2DArrayI64Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + return NVPTXISD::Suld2DArrayV2I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + return NVPTXISD::Suld2DArrayV2I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + return NVPTXISD::Suld2DArrayV2I32Clamp; + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + return NVPTXISD::Suld2DArrayV2I64Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + return NVPTXISD::Suld2DArrayV4I8Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + return NVPTXISD::Suld2DArrayV4I16Clamp; + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + return NVPTXISD::Suld2DArrayV4I32Clamp; + case Intrinsic::nvvm_suld_3d_i8_clamp: + return NVPTXISD::Suld3DI8Clamp; + case Intrinsic::nvvm_suld_3d_i16_clamp: + return NVPTXISD::Suld3DI16Clamp; + case Intrinsic::nvvm_suld_3d_i32_clamp: + return NVPTXISD::Suld3DI32Clamp; + case Intrinsic::nvvm_suld_3d_i64_clamp: + return NVPTXISD::Suld3DI64Clamp; + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + return NVPTXISD::Suld3DV2I8Clamp; + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + return NVPTXISD::Suld3DV2I16Clamp; + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + return NVPTXISD::Suld3DV2I32Clamp; + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + return NVPTXISD::Suld3DV2I64Clamp; + case Intrinsic::nvvm_suld_3d_v4i8_clamp: + return NVPTXISD::Suld3DV4I8Clamp; + case Intrinsic::nvvm_suld_3d_v4i16_clamp: + return NVPTXISD::Suld3DV4I16Clamp; + case Intrinsic::nvvm_suld_3d_v4i32_clamp: + return NVPTXISD::Suld3DV4I32Clamp; + case Intrinsic::nvvm_suld_1d_i8_trap: + return NVPTXISD::Suld1DI8Trap; + case Intrinsic::nvvm_suld_1d_i16_trap: + return NVPTXISD::Suld1DI16Trap; + case Intrinsic::nvvm_suld_1d_i32_trap: + return NVPTXISD::Suld1DI32Trap; + case Intrinsic::nvvm_suld_1d_i64_trap: + return NVPTXISD::Suld1DI64Trap; + case Intrinsic::nvvm_suld_1d_v2i8_trap: + return NVPTXISD::Suld1DV2I8Trap; + case Intrinsic::nvvm_suld_1d_v2i16_trap: + return NVPTXISD::Suld1DV2I16Trap; + case Intrinsic::nvvm_suld_1d_v2i32_trap: + return NVPTXISD::Suld1DV2I32Trap; + case Intrinsic::nvvm_suld_1d_v2i64_trap: + return NVPTXISD::Suld1DV2I64Trap; + case Intrinsic::nvvm_suld_1d_v4i8_trap: + return NVPTXISD::Suld1DV4I8Trap; + case Intrinsic::nvvm_suld_1d_v4i16_trap: + return NVPTXISD::Suld1DV4I16Trap; + case Intrinsic::nvvm_suld_1d_v4i32_trap: + return NVPTXISD::Suld1DV4I32Trap; + case Intrinsic::nvvm_suld_1d_array_i8_trap: + return NVPTXISD::Suld1DArrayI8Trap; + case Intrinsic::nvvm_suld_1d_array_i16_trap: + return NVPTXISD::Suld1DArrayI16Trap; + case Intrinsic::nvvm_suld_1d_array_i32_trap: + return NVPTXISD::Suld1DArrayI32Trap; + case Intrinsic::nvvm_suld_1d_array_i64_trap: + return NVPTXISD::Suld1DArrayI64Trap; + case Intrinsic::nvvm_suld_1d_array_v2i8_trap: + return NVPTXISD::Suld1DArrayV2I8Trap; + case Intrinsic::nvvm_suld_1d_array_v2i16_trap: + return NVPTXISD::Suld1DArrayV2I16Trap; + case Intrinsic::nvvm_suld_1d_array_v2i32_trap: + return NVPTXISD::Suld1DArrayV2I32Trap; + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + return NVPTXISD::Suld1DArrayV2I64Trap; + case Intrinsic::nvvm_suld_1d_array_v4i8_trap: + return NVPTXISD::Suld1DArrayV4I8Trap; + case Intrinsic::nvvm_suld_1d_array_v4i16_trap: + return NVPTXISD::Suld1DArrayV4I16Trap; + case Intrinsic::nvvm_suld_1d_array_v4i32_trap: + return NVPTXISD::Suld1DArrayV4I32Trap; + case Intrinsic::nvvm_suld_2d_i8_trap: + return NVPTXISD::Suld2DI8Trap; + case Intrinsic::nvvm_suld_2d_i16_trap: + return NVPTXISD::Suld2DI16Trap; + case Intrinsic::nvvm_suld_2d_i32_trap: + return NVPTXISD::Suld2DI32Trap; + case Intrinsic::nvvm_suld_2d_i64_trap: + return NVPTXISD::Suld2DI64Trap; + case Intrinsic::nvvm_suld_2d_v2i8_trap: + return NVPTXISD::Suld2DV2I8Trap; + case Intrinsic::nvvm_suld_2d_v2i16_trap: + return NVPTXISD::Suld2DV2I16Trap; + case Intrinsic::nvvm_suld_2d_v2i32_trap: + return NVPTXISD::Suld2DV2I32Trap; + case Intrinsic::nvvm_suld_2d_v2i64_trap: + return NVPTXISD::Suld2DV2I64Trap; + case Intrinsic::nvvm_suld_2d_v4i8_trap: + return NVPTXISD::Suld2DV4I8Trap; + case Intrinsic::nvvm_suld_2d_v4i16_trap: + return NVPTXISD::Suld2DV4I16Trap; + case Intrinsic::nvvm_suld_2d_v4i32_trap: + return NVPTXISD::Suld2DV4I32Trap; + case Intrinsic::nvvm_suld_2d_array_i8_trap: + return NVPTXISD::Suld2DArrayI8Trap; + case Intrinsic::nvvm_suld_2d_array_i16_trap: + return NVPTXISD::Suld2DArrayI16Trap; + case Intrinsic::nvvm_suld_2d_array_i32_trap: + return NVPTXISD::Suld2DArrayI32Trap; + case Intrinsic::nvvm_suld_2d_array_i64_trap: + return NVPTXISD::Suld2DArrayI64Trap; + case Intrinsic::nvvm_suld_2d_array_v2i8_trap: + return NVPTXISD::Suld2DArrayV2I8Trap; + case Intrinsic::nvvm_suld_2d_array_v2i16_trap: + return NVPTXISD::Suld2DArrayV2I16Trap; + case Intrinsic::nvvm_suld_2d_array_v2i32_trap: + return NVPTXISD::Suld2DArrayV2I32Trap; + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + return NVPTXISD::Suld2DArrayV2I64Trap; + case Intrinsic::nvvm_suld_2d_array_v4i8_trap: + return NVPTXISD::Suld2DArrayV4I8Trap; + case Intrinsic::nvvm_suld_2d_array_v4i16_trap: + return NVPTXISD::Suld2DArrayV4I16Trap; + case Intrinsic::nvvm_suld_2d_array_v4i32_trap: + return NVPTXISD::Suld2DArrayV4I32Trap; + case Intrinsic::nvvm_suld_3d_i8_trap: + return NVPTXISD::Suld3DI8Trap; + case Intrinsic::nvvm_suld_3d_i16_trap: + return NVPTXISD::Suld3DI16Trap; + case Intrinsic::nvvm_suld_3d_i32_trap: + return NVPTXISD::Suld3DI32Trap; + case Intrinsic::nvvm_suld_3d_i64_trap: + return NVPTXISD::Suld3DI64Trap; + case Intrinsic::nvvm_suld_3d_v2i8_trap: + return NVPTXISD::Suld3DV2I8Trap; + case Intrinsic::nvvm_suld_3d_v2i16_trap: + return NVPTXISD::Suld3DV2I16Trap; + case Intrinsic::nvvm_suld_3d_v2i32_trap: + return NVPTXISD::Suld3DV2I32Trap; + case Intrinsic::nvvm_suld_3d_v2i64_trap: + return NVPTXISD::Suld3DV2I64Trap; + case Intrinsic::nvvm_suld_3d_v4i8_trap: + return NVPTXISD::Suld3DV4I8Trap; + case Intrinsic::nvvm_suld_3d_v4i16_trap: + return NVPTXISD::Suld3DV4I16Trap; + case Intrinsic::nvvm_suld_3d_v4i32_trap: + return NVPTXISD::Suld3DV4I32Trap; + case Intrinsic::nvvm_suld_1d_i8_zero: + return NVPTXISD::Suld1DI8Zero; + case Intrinsic::nvvm_suld_1d_i16_zero: + return NVPTXISD::Suld1DI16Zero; + case Intrinsic::nvvm_suld_1d_i32_zero: + return NVPTXISD::Suld1DI32Zero; + case Intrinsic::nvvm_suld_1d_i64_zero: + return NVPTXISD::Suld1DI64Zero; + case Intrinsic::nvvm_suld_1d_v2i8_zero: + return NVPTXISD::Suld1DV2I8Zero; + case Intrinsic::nvvm_suld_1d_v2i16_zero: + return NVPTXISD::Suld1DV2I16Zero; + case Intrinsic::nvvm_suld_1d_v2i32_zero: + return NVPTXISD::Suld1DV2I32Zero; + case Intrinsic::nvvm_suld_1d_v2i64_zero: + return NVPTXISD::Suld1DV2I64Zero; + case Intrinsic::nvvm_suld_1d_v4i8_zero: + return NVPTXISD::Suld1DV4I8Zero; + case Intrinsic::nvvm_suld_1d_v4i16_zero: + return NVPTXISD::Suld1DV4I16Zero; + case Intrinsic::nvvm_suld_1d_v4i32_zero: + return NVPTXISD::Suld1DV4I32Zero; + case Intrinsic::nvvm_suld_1d_array_i8_zero: + return NVPTXISD::Suld1DArrayI8Zero; + case Intrinsic::nvvm_suld_1d_array_i16_zero: + return NVPTXISD::Suld1DArrayI16Zero; + case Intrinsic::nvvm_suld_1d_array_i32_zero: + return NVPTXISD::Suld1DArrayI32Zero; + case Intrinsic::nvvm_suld_1d_array_i64_zero: + return NVPTXISD::Suld1DArrayI64Zero; + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + return NVPTXISD::Suld1DArrayV2I8Zero; + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + return NVPTXISD::Suld1DArrayV2I16Zero; + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + return NVPTXISD::Suld1DArrayV2I32Zero; + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + return NVPTXISD::Suld1DArrayV2I64Zero; + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + return NVPTXISD::Suld1DArrayV4I8Zero; + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + return NVPTXISD::Suld1DArrayV4I16Zero; + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + return NVPTXISD::Suld1DArrayV4I32Zero; + case Intrinsic::nvvm_suld_2d_i8_zero: + return NVPTXISD::Suld2DI8Zero; + case Intrinsic::nvvm_suld_2d_i16_zero: + return NVPTXISD::Suld2DI16Zero; + case Intrinsic::nvvm_suld_2d_i32_zero: + return NVPTXISD::Suld2DI32Zero; + case Intrinsic::nvvm_suld_2d_i64_zero: + return NVPTXISD::Suld2DI64Zero; + case Intrinsic::nvvm_suld_2d_v2i8_zero: + return NVPTXISD::Suld2DV2I8Zero; + case Intrinsic::nvvm_suld_2d_v2i16_zero: + return NVPTXISD::Suld2DV2I16Zero; + case Intrinsic::nvvm_suld_2d_v2i32_zero: + return NVPTXISD::Suld2DV2I32Zero; + case Intrinsic::nvvm_suld_2d_v2i64_zero: + return NVPTXISD::Suld2DV2I64Zero; + case Intrinsic::nvvm_suld_2d_v4i8_zero: + return NVPTXISD::Suld2DV4I8Zero; + case Intrinsic::nvvm_suld_2d_v4i16_zero: + return NVPTXISD::Suld2DV4I16Zero; + case Intrinsic::nvvm_suld_2d_v4i32_zero: + return NVPTXISD::Suld2DV4I32Zero; + case Intrinsic::nvvm_suld_2d_array_i8_zero: + return NVPTXISD::Suld2DArrayI8Zero; + case Intrinsic::nvvm_suld_2d_array_i16_zero: + return NVPTXISD::Suld2DArrayI16Zero; + case Intrinsic::nvvm_suld_2d_array_i32_zero: + return NVPTXISD::Suld2DArrayI32Zero; + case Intrinsic::nvvm_suld_2d_array_i64_zero: + return NVPTXISD::Suld2DArrayI64Zero; + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + return NVPTXISD::Suld2DArrayV2I8Zero; + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + return NVPTXISD::Suld2DArrayV2I16Zero; + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + return NVPTXISD::Suld2DArrayV2I32Zero; + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + return NVPTXISD::Suld2DArrayV2I64Zero; + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + return NVPTXISD::Suld2DArrayV4I8Zero; + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + return NVPTXISD::Suld2DArrayV4I16Zero; + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + return NVPTXISD::Suld2DArrayV4I32Zero; + case Intrinsic::nvvm_suld_3d_i8_zero: + return NVPTXISD::Suld3DI8Zero; + case Intrinsic::nvvm_suld_3d_i16_zero: + return NVPTXISD::Suld3DI16Zero; + case Intrinsic::nvvm_suld_3d_i32_zero: + return NVPTXISD::Suld3DI32Zero; + case Intrinsic::nvvm_suld_3d_i64_zero: + return NVPTXISD::Suld3DI64Zero; + case Intrinsic::nvvm_suld_3d_v2i8_zero: + return NVPTXISD::Suld3DV2I8Zero; + case Intrinsic::nvvm_suld_3d_v2i16_zero: + return NVPTXISD::Suld3DV2I16Zero; + case Intrinsic::nvvm_suld_3d_v2i32_zero: + return NVPTXISD::Suld3DV2I32Zero; + case Intrinsic::nvvm_suld_3d_v2i64_zero: + return NVPTXISD::Suld3DV2I64Zero; + case Intrinsic::nvvm_suld_3d_v4i8_zero: + return NVPTXISD::Suld3DV4I8Zero; + case Intrinsic::nvvm_suld_3d_v4i16_zero: + return NVPTXISD::Suld3DV4I16Zero; + case Intrinsic::nvvm_suld_3d_v4i32_zero: + return NVPTXISD::Suld3DV4I32Zero; + } +} + +// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as +// TgtMemIntrinsic +// because we need the information that is only available in the "Value" type +// of destination +// pointer. In particular, the address space information. +bool NVPTXTargetLowering::getTgtMemIntrinsic( + IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { + switch (Intrinsic) { + default: + return false; + + case Intrinsic::nvvm_atomic_load_add_f32: + case Intrinsic::nvvm_atomic_load_inc_32: + case Intrinsic::nvvm_atomic_load_dec_32: + + case Intrinsic::nvvm_atomic_add_gen_f_cta: + case Intrinsic::nvvm_atomic_add_gen_f_sys: + case Intrinsic::nvvm_atomic_add_gen_i_cta: + case Intrinsic::nvvm_atomic_add_gen_i_sys: + case Intrinsic::nvvm_atomic_and_gen_i_cta: + case Intrinsic::nvvm_atomic_and_gen_i_sys: + case Intrinsic::nvvm_atomic_cas_gen_i_cta: + case Intrinsic::nvvm_atomic_cas_gen_i_sys: + case Intrinsic::nvvm_atomic_dec_gen_i_cta: + case Intrinsic::nvvm_atomic_dec_gen_i_sys: + case Intrinsic::nvvm_atomic_inc_gen_i_cta: + case Intrinsic::nvvm_atomic_inc_gen_i_sys: + case Intrinsic::nvvm_atomic_max_gen_i_cta: + case Intrinsic::nvvm_atomic_max_gen_i_sys: + case Intrinsic::nvvm_atomic_min_gen_i_cta: + case Intrinsic::nvvm_atomic_min_gen_i_sys: + case Intrinsic::nvvm_atomic_or_gen_i_cta: + case Intrinsic::nvvm_atomic_or_gen_i_sys: + case Intrinsic::nvvm_atomic_exch_gen_i_cta: + case Intrinsic::nvvm_atomic_exch_gen_i_sys: + case Intrinsic::nvvm_atomic_xor_gen_i_cta: + case Intrinsic::nvvm_atomic_xor_gen_i_sys: { + auto &DL = I.getModule()->getDataLayout(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = getValueType(DL, I.getType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = true; + Info.align = 0; + return true; + } + + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: { + auto &DL = I.getModule()->getDataLayout(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + if (Intrinsic == Intrinsic::nvvm_ldu_global_i) + Info.memVT = getValueType(DL, I.getType()); + else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) + Info.memVT = getPointerTy(DL); + else + Info.memVT = getValueType(DL, I.getType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); + + return true; + } + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: { + auto &DL = I.getModule()->getDataLayout(); + + Info.opc = ISD::INTRINSIC_W_CHAIN; + if (Intrinsic == Intrinsic::nvvm_ldg_global_i) + Info.memVT = getValueType(DL, I.getType()); + else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) + Info.memVT = getPointerTy(DL); + else + Info.memVT = getValueType(DL, I.getType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = cast(I.getArgOperand(1))->getZExtValue(); + + return true; + } + + case Intrinsic::nvvm_tex_1d_v4f32_s32: + case Intrinsic::nvvm_tex_1d_v4f32_f32: + case Intrinsic::nvvm_tex_1d_level_v4f32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_v4f32_s32: + case Intrinsic::nvvm_tex_1d_array_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_2d_v4f32_s32: + case Intrinsic::nvvm_tex_2d_v4f32_f32: + case Intrinsic::nvvm_tex_2d_level_v4f32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_v4f32_s32: + case Intrinsic::nvvm_tex_2d_array_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_3d_v4f32_s32: + case Intrinsic::nvvm_tex_3d_v4f32_f32: + case Intrinsic::nvvm_tex_3d_level_v4f32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_cube_v4f32_f32: + case Intrinsic::nvvm_tex_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4f32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4f32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4f32_f32: + Info.opc = getOpcForTextureInstr(Intrinsic); + Info.memVT = MVT::v4f32; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + + case Intrinsic::nvvm_tex_1d_v4s32_s32: + case Intrinsic::nvvm_tex_1d_v4s32_f32: + case Intrinsic::nvvm_tex_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_v4s32_s32: + case Intrinsic::nvvm_tex_2d_v4s32_f32: + case Intrinsic::nvvm_tex_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_3d_v4s32_s32: + case Intrinsic::nvvm_tex_3d_v4s32_f32: + case Intrinsic::nvvm_tex_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4s32_f32: + case Intrinsic::nvvm_tex_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_cube_v4u32_f32: + case Intrinsic::nvvm_tex_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_v4u32_s32: + case Intrinsic::nvvm_tex_1d_v4u32_f32: + case Intrinsic::nvvm_tex_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_v4u32_s32: + case Intrinsic::nvvm_tex_2d_v4u32_f32: + case Intrinsic::nvvm_tex_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_3d_v4u32_s32: + case Intrinsic::nvvm_tex_3d_v4u32_f32: + case Intrinsic::nvvm_tex_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_a_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4s32_f32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_1d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_1d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_s32: + case Intrinsic::nvvm_tex_unified_2d_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_2d_array_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_s32: + case Intrinsic::nvvm_tex_unified_3d_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_3d_grad_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4s32_f32: + case Intrinsic::nvvm_tex_unified_cube_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_level_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_v4u32_f32: + case Intrinsic::nvvm_tex_unified_cube_array_level_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4s32_f32: + case Intrinsic::nvvm_tld4_unified_r_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_g_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_b_2d_v4u32_f32: + case Intrinsic::nvvm_tld4_unified_a_2d_v4u32_f32: + Info.opc = getOpcForTextureInstr(Intrinsic); + Info.memVT = MVT::v4i32; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + + case Intrinsic::nvvm_suld_1d_i8_clamp: + case Intrinsic::nvvm_suld_1d_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_v4i8_clamp: + case Intrinsic::nvvm_suld_1d_array_i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_i8_clamp: + case Intrinsic::nvvm_suld_2d_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_v4i8_clamp: + case Intrinsic::nvvm_suld_2d_array_i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i8_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i8_clamp: + case Intrinsic::nvvm_suld_3d_i8_clamp: + case Intrinsic::nvvm_suld_3d_v2i8_clamp: + case Intrinsic::nvvm_suld_3d_v4i8_clamp: + case Intrinsic::nvvm_suld_1d_i8_trap: + case Intrinsic::nvvm_suld_1d_v2i8_trap: + case Intrinsic::nvvm_suld_1d_v4i8_trap: + case Intrinsic::nvvm_suld_1d_array_i8_trap: + case Intrinsic::nvvm_suld_1d_array_v2i8_trap: + case Intrinsic::nvvm_suld_1d_array_v4i8_trap: + case Intrinsic::nvvm_suld_2d_i8_trap: + case Intrinsic::nvvm_suld_2d_v2i8_trap: + case Intrinsic::nvvm_suld_2d_v4i8_trap: + case Intrinsic::nvvm_suld_2d_array_i8_trap: + case Intrinsic::nvvm_suld_2d_array_v2i8_trap: + case Intrinsic::nvvm_suld_2d_array_v4i8_trap: + case Intrinsic::nvvm_suld_3d_i8_trap: + case Intrinsic::nvvm_suld_3d_v2i8_trap: + case Intrinsic::nvvm_suld_3d_v4i8_trap: + case Intrinsic::nvvm_suld_1d_i8_zero: + case Intrinsic::nvvm_suld_1d_v2i8_zero: + case Intrinsic::nvvm_suld_1d_v4i8_zero: + case Intrinsic::nvvm_suld_1d_array_i8_zero: + case Intrinsic::nvvm_suld_1d_array_v2i8_zero: + case Intrinsic::nvvm_suld_1d_array_v4i8_zero: + case Intrinsic::nvvm_suld_2d_i8_zero: + case Intrinsic::nvvm_suld_2d_v2i8_zero: + case Intrinsic::nvvm_suld_2d_v4i8_zero: + case Intrinsic::nvvm_suld_2d_array_i8_zero: + case Intrinsic::nvvm_suld_2d_array_v2i8_zero: + case Intrinsic::nvvm_suld_2d_array_v4i8_zero: + case Intrinsic::nvvm_suld_3d_i8_zero: + case Intrinsic::nvvm_suld_3d_v2i8_zero: + case Intrinsic::nvvm_suld_3d_v4i8_zero: + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i8; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + + case Intrinsic::nvvm_suld_1d_i16_clamp: + case Intrinsic::nvvm_suld_1d_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_v4i16_clamp: + case Intrinsic::nvvm_suld_1d_array_i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_i16_clamp: + case Intrinsic::nvvm_suld_2d_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_v4i16_clamp: + case Intrinsic::nvvm_suld_2d_array_i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i16_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i16_clamp: + case Intrinsic::nvvm_suld_3d_i16_clamp: + case Intrinsic::nvvm_suld_3d_v2i16_clamp: + case Intrinsic::nvvm_suld_3d_v4i16_clamp: + case Intrinsic::nvvm_suld_1d_i16_trap: + case Intrinsic::nvvm_suld_1d_v2i16_trap: + case Intrinsic::nvvm_suld_1d_v4i16_trap: + case Intrinsic::nvvm_suld_1d_array_i16_trap: + case Intrinsic::nvvm_suld_1d_array_v2i16_trap: + case Intrinsic::nvvm_suld_1d_array_v4i16_trap: + case Intrinsic::nvvm_suld_2d_i16_trap: + case Intrinsic::nvvm_suld_2d_v2i16_trap: + case Intrinsic::nvvm_suld_2d_v4i16_trap: + case Intrinsic::nvvm_suld_2d_array_i16_trap: + case Intrinsic::nvvm_suld_2d_array_v2i16_trap: + case Intrinsic::nvvm_suld_2d_array_v4i16_trap: + case Intrinsic::nvvm_suld_3d_i16_trap: + case Intrinsic::nvvm_suld_3d_v2i16_trap: + case Intrinsic::nvvm_suld_3d_v4i16_trap: + case Intrinsic::nvvm_suld_1d_i16_zero: + case Intrinsic::nvvm_suld_1d_v2i16_zero: + case Intrinsic::nvvm_suld_1d_v4i16_zero: + case Intrinsic::nvvm_suld_1d_array_i16_zero: + case Intrinsic::nvvm_suld_1d_array_v2i16_zero: + case Intrinsic::nvvm_suld_1d_array_v4i16_zero: + case Intrinsic::nvvm_suld_2d_i16_zero: + case Intrinsic::nvvm_suld_2d_v2i16_zero: + case Intrinsic::nvvm_suld_2d_v4i16_zero: + case Intrinsic::nvvm_suld_2d_array_i16_zero: + case Intrinsic::nvvm_suld_2d_array_v2i16_zero: + case Intrinsic::nvvm_suld_2d_array_v4i16_zero: + case Intrinsic::nvvm_suld_3d_i16_zero: + case Intrinsic::nvvm_suld_3d_v2i16_zero: + case Intrinsic::nvvm_suld_3d_v4i16_zero: + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i16; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + + case Intrinsic::nvvm_suld_1d_i32_clamp: + case Intrinsic::nvvm_suld_1d_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_v4i32_clamp: + case Intrinsic::nvvm_suld_1d_array_i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_1d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_i32_clamp: + case Intrinsic::nvvm_suld_2d_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_v4i32_clamp: + case Intrinsic::nvvm_suld_2d_array_i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i32_clamp: + case Intrinsic::nvvm_suld_2d_array_v4i32_clamp: + case Intrinsic::nvvm_suld_3d_i32_clamp: + case Intrinsic::nvvm_suld_3d_v2i32_clamp: + case Intrinsic::nvvm_suld_3d_v4i32_clamp: + case Intrinsic::nvvm_suld_1d_i32_trap: + case Intrinsic::nvvm_suld_1d_v2i32_trap: + case Intrinsic::nvvm_suld_1d_v4i32_trap: + case Intrinsic::nvvm_suld_1d_array_i32_trap: + case Intrinsic::nvvm_suld_1d_array_v2i32_trap: + case Intrinsic::nvvm_suld_1d_array_v4i32_trap: + case Intrinsic::nvvm_suld_2d_i32_trap: + case Intrinsic::nvvm_suld_2d_v2i32_trap: + case Intrinsic::nvvm_suld_2d_v4i32_trap: + case Intrinsic::nvvm_suld_2d_array_i32_trap: + case Intrinsic::nvvm_suld_2d_array_v2i32_trap: + case Intrinsic::nvvm_suld_2d_array_v4i32_trap: + case Intrinsic::nvvm_suld_3d_i32_trap: + case Intrinsic::nvvm_suld_3d_v2i32_trap: + case Intrinsic::nvvm_suld_3d_v4i32_trap: + case Intrinsic::nvvm_suld_1d_i32_zero: + case Intrinsic::nvvm_suld_1d_v2i32_zero: + case Intrinsic::nvvm_suld_1d_v4i32_zero: + case Intrinsic::nvvm_suld_1d_array_i32_zero: + case Intrinsic::nvvm_suld_1d_array_v2i32_zero: + case Intrinsic::nvvm_suld_1d_array_v4i32_zero: + case Intrinsic::nvvm_suld_2d_i32_zero: + case Intrinsic::nvvm_suld_2d_v2i32_zero: + case Intrinsic::nvvm_suld_2d_v4i32_zero: + case Intrinsic::nvvm_suld_2d_array_i32_zero: + case Intrinsic::nvvm_suld_2d_array_v2i32_zero: + case Intrinsic::nvvm_suld_2d_array_v4i32_zero: + case Intrinsic::nvvm_suld_3d_i32_zero: + case Intrinsic::nvvm_suld_3d_v2i32_zero: + case Intrinsic::nvvm_suld_3d_v4i32_zero: + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i32; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + + case Intrinsic::nvvm_suld_1d_i64_clamp: + case Intrinsic::nvvm_suld_1d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_array_i64_clamp: + case Intrinsic::nvvm_suld_1d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_i64_clamp: + case Intrinsic::nvvm_suld_2d_v2i64_clamp: + case Intrinsic::nvvm_suld_2d_array_i64_clamp: + case Intrinsic::nvvm_suld_2d_array_v2i64_clamp: + case Intrinsic::nvvm_suld_3d_i64_clamp: + case Intrinsic::nvvm_suld_3d_v2i64_clamp: + case Intrinsic::nvvm_suld_1d_i64_trap: + case Intrinsic::nvvm_suld_1d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_array_i64_trap: + case Intrinsic::nvvm_suld_1d_array_v2i64_trap: + case Intrinsic::nvvm_suld_2d_i64_trap: + case Intrinsic::nvvm_suld_2d_v2i64_trap: + case Intrinsic::nvvm_suld_2d_array_i64_trap: + case Intrinsic::nvvm_suld_2d_array_v2i64_trap: + case Intrinsic::nvvm_suld_3d_i64_trap: + case Intrinsic::nvvm_suld_3d_v2i64_trap: + case Intrinsic::nvvm_suld_1d_i64_zero: + case Intrinsic::nvvm_suld_1d_v2i64_zero: + case Intrinsic::nvvm_suld_1d_array_i64_zero: + case Intrinsic::nvvm_suld_1d_array_v2i64_zero: + case Intrinsic::nvvm_suld_2d_i64_zero: + case Intrinsic::nvvm_suld_2d_v2i64_zero: + case Intrinsic::nvvm_suld_2d_array_i64_zero: + case Intrinsic::nvvm_suld_2d_array_v2i64_zero: + case Intrinsic::nvvm_suld_3d_i64_zero: + case Intrinsic::nvvm_suld_3d_v2i64_zero: + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i64; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + return false; +} + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +/// Used to guide target specific optimizations, like loop strength reduction +/// (LoopStrengthReduce.cpp) and memory optimization for address mode +/// (CodeGenPrepare.cpp) +bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + // AddrMode - This represents an addressing mode of: + // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + // + // The legal address modes are + // - [avar] + // - [areg] + // - [areg+immoff] + // - [immAddr] + + if (AM.BaseGV) { + return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale; + } + + switch (AM.Scale) { + case 0: // "r", "r+i" or "i" is allowed + break; + case 1: + if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. + return false; + // Otherwise we have r+i. + break; + default: + // No scale > 1 is allowed + return false; + } + return true; +} + +//===----------------------------------------------------------------------===// +// NVPTX Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +NVPTXTargetLowering::ConstraintType +NVPTXTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'b': + case 'r': + case 'h': + case 'c': + case 'l': + case 'f': + case 'd': + case '0': + case 'N': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair +NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'b': + return std::make_pair(0U, &NVPTX::Int1RegsRegClass); + case 'c': + return std::make_pair(0U, &NVPTX::Int16RegsRegClass); + case 'h': + return std::make_pair(0U, &NVPTX::Int16RegsRegClass); + case 'r': + return std::make_pair(0U, &NVPTX::Int32RegsRegClass); + case 'l': + case 'N': + return std::make_pair(0U, &NVPTX::Int64RegsRegClass); + case 'f': + return std::make_pair(0U, &NVPTX::Float32RegsRegClass); + case 'd': + return std::make_pair(0U, &NVPTX::Float64RegsRegClass); + } + } + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +//===----------------------------------------------------------------------===// +// NVPTX DAG Combining +//===----------------------------------------------------------------------===// + +bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, + CodeGenOpt::Level OptLevel) const { + // Always honor command-line argument + if (FMAContractLevelOpt.getNumOccurrences() > 0) + return FMAContractLevelOpt > 0; + + // Do not contract if we're not optimizing the code. + if (OptLevel == 0) + return false; + + // Honor TargetOptions flags that explicitly say fusion is okay. + if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast) + return true; + + return allowUnsafeFPMath(MF); +} + +bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { + // Honor TargetOptions flags that explicitly say unsafe math is okay. + if (MF.getTarget().Options.UnsafeFPMath) + return true; + + // Allow unsafe math if unsafe-fp-math attribute explicitly says so. + const Function *F = MF.getFunction(); + if (F->hasFnAttribute("unsafe-fp-math")) { + Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + StringRef Val = Attr.getValueAsString(); + if (Val == "true") + return true; + } + + return false; +} + +/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with +/// operands N0 and N1. This is a helper for PerformADDCombine that is +/// called with the default operands, and if that fails, with commuted +/// operands. +static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const NVPTXSubtarget &Subtarget, + CodeGenOpt::Level OptLevel) { + SelectionDAG &DAG = DCI.DAG; + // Skip non-integer, non-scalar case + EVT VT=N0.getValueType(); + if (VT.isVector()) + return SDValue(); + + // fold (add (mul a, b), c) -> (mad a, b, c) + // + if (N0.getOpcode() == ISD::MUL) { + assert (VT.isInteger()); + // For integer: + // Since integer multiply-add costs the same as integer multiply + // but is more costly than integer add, do the fusion only when + // the mul is only used in the add. + if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || + !N0.getNode()->hasOneUse()) + return SDValue(); + + // Do the folding + return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + else if (N0.getOpcode() == ISD::FMUL) { + if (VT == MVT::f32 || VT == MVT::f64) { + const auto *TLI = static_cast( + &DAG.getTargetLoweringInfo()); + if (!TLI->allowFMA(DAG.getMachineFunction(), OptLevel)) + return SDValue(); + + // For floating point: + // Do the fusion only when the mul has less than 5 uses and all + // are add. + // The heuristic is that if a use is not an add, then that use + // cannot be fused into fma, therefore mul is still needed anyway. + // If there are more than 4 uses, even if they are all add, fusing + // them will increase register pressue. + // + int numUses = 0; + int nonAddCount = 0; + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + numUses++; + SDNode *User = *UI; + if (User->getOpcode() != ISD::FADD) + ++nonAddCount; + } + if (numUses >= 5) + return SDValue(); + if (nonAddCount) { + int orderNo = N->getIROrder(); + int orderNo2 = N0.getNode()->getIROrder(); + // simple heuristics here for considering potential register + // pressure, the logics here is that the differnce are used + // to measure the distance between def and use, the longer distance + // more likely cause register pressure. + if (orderNo - orderNo2 < 500) + return SDValue(); + + // Now, check if at least one of the FMUL's operands is live beyond the node N, + // which guarantees that the FMA will not increase register pressure at node N. + bool opIsLive = false; + const SDNode *left = N0.getOperand(0).getNode(); + const SDNode *right = N0.getOperand(1).getNode(); + + if (isa(left) || isa(right)) + opIsLive = true; + + if (!opIsLive) + for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + int orderNo3 = User->getIROrder(); + if (orderNo3 > orderNo) { + opIsLive = true; + break; + } + } + + if (!opIsLive) + for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + int orderNo3 = User->getIROrder(); + if (orderNo3 > orderNo) { + opIsLive = true; + break; + } + } + + if (!opIsLive) + return SDValue(); + } + + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + } + + return SDValue(); +} + +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +/// +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const NVPTXSubtarget &Subtarget, + CodeGenOpt::Level OptLevel) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // First try with the default operand order. + if (SDValue Result = + PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, OptLevel)) + return Result; + + // If that didn't work, try again with the operands commuted. + return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); +} + +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // The type legalizer turns a vector load of i8 values into a zextload to i16 + // registers, optionally ANY_EXTENDs it (if target type is integer), + // and ANDs off the high 8 bits. Since we turn this load into a + // target-specific DAG node, the DAG combiner fails to eliminate these AND + // nodes. Do that here. + SDValue Val = N->getOperand(0); + SDValue Mask = N->getOperand(1); + + if (isa(Val)) { + std::swap(Val, Mask); + } + + SDValue AExt; + // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and + if (Val.getOpcode() == ISD::ANY_EXTEND) { + AExt = Val; + Val = Val->getOperand(0); + } + + if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { + Val = Val->getOperand(0); + } + + if (Val->getOpcode() == NVPTXISD::LoadV2 || + Val->getOpcode() == NVPTXISD::LoadV4) { + ConstantSDNode *MaskCnst = dyn_cast(Mask); + if (!MaskCnst) { + // Not an AND with a constant + return SDValue(); + } + + uint64_t MaskVal = MaskCnst->getZExtValue(); + if (MaskVal != 0xff) { + // Not an AND that chops off top 8 bits + return SDValue(); + } + + MemSDNode *Mem = dyn_cast(Val); + if (!Mem) { + // Not a MemSDNode?!? + return SDValue(); + } + + EVT MemVT = Mem->getMemoryVT(); + if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { + // We only handle the i8 case + return SDValue(); + } + + unsigned ExtType = + cast(Val->getOperand(Val->getNumOperands()-1))-> + getZExtValue(); + if (ExtType == ISD::SEXTLOAD) { + // If for some reason the load is a sextload, the and is needed to zero + // out the high 8 bits + return SDValue(); + } + + bool AddTo = false; + if (AExt.getNode() != nullptr) { + // Re-insert the ext as a zext. + Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), + AExt.getValueType(), Val); + AddTo = true; + } + + // If we get here, the AND is unnecessary. Just replace it with the load + DCI.CombineTo(N, Val, AddTo); + } + + return SDValue(); +} + +static SDValue PerformREMCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + CodeGenOpt::Level OptLevel) { + assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM); + + // Don't do anything at less than -O2. + if (OptLevel < CodeGenOpt::Default) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + EVT VT = N->getValueType(0); + bool IsSigned = N->getOpcode() == ISD::SREM; + unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV; + + const SDValue &Num = N->getOperand(0); + const SDValue &Den = N->getOperand(1); + + for (const SDNode *U : Num->uses()) { + if (U->getOpcode() == DivOpc && U->getOperand(0) == Num && + U->getOperand(1) == Den) { + // Num % Den -> Num - (Num / Den) * Den + return DAG.getNode(ISD::SUB, DL, VT, Num, + DAG.getNode(ISD::MUL, DL, VT, + DAG.getNode(DivOpc, DL, VT, Num, Den), + Den)); + } + } + return SDValue(); +} + +enum OperandSignedness { + Signed = 0, + Unsigned, + Unknown +}; + +/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand +/// that can be demoted to \p OptSize bits without loss of information. The +/// signedness of the operand, if determinable, is placed in \p S. +static bool IsMulWideOperandDemotable(SDValue Op, + unsigned OptSize, + OperandSignedness &S) { + S = Unknown; + + if (Op.getOpcode() == ISD::SIGN_EXTEND || + Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { + EVT OrigVT = Op.getOperand(0).getValueType(); + if (OrigVT.getSizeInBits() <= OptSize) { + S = Signed; + return true; + } + } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { + EVT OrigVT = Op.getOperand(0).getValueType(); + if (OrigVT.getSizeInBits() <= OptSize) { + S = Unsigned; + return true; + } + } + + return false; +} + +/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can +/// be demoted to \p OptSize bits without loss of information. If the operands +/// contain a constant, it should appear as the RHS operand. The signedness of +/// the operands is placed in \p IsSigned. +static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, + unsigned OptSize, + bool &IsSigned) { + OperandSignedness LHSSign; + + // The LHS operand must be a demotable op + if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) + return false; + + // We should have been able to determine the signedness from the LHS + if (LHSSign == Unknown) + return false; + + IsSigned = (LHSSign == Signed); + + // The RHS can be a demotable op or a constant + if (ConstantSDNode *CI = dyn_cast(RHS)) { + const APInt &Val = CI->getAPIntValue(); + if (LHSSign == Unsigned) { + return Val.isIntN(OptSize); + } else { + return Val.isSignedIntN(OptSize); + } + } else { + OperandSignedness RHSSign; + if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) + return false; + + return LHSSign == RHSSign; + } +} + +/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply +/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform +/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift +/// amount. +static SDValue TryMULWIDECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT MulType = N->getValueType(0); + if (MulType != MVT::i32 && MulType != MVT::i64) { + return SDValue(); + } + + SDLoc DL(N); + unsigned OptSize = MulType.getSizeInBits() >> 1; + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Canonicalize the multiply so the constant (if any) is on the right + if (N->getOpcode() == ISD::MUL) { + if (isa(LHS)) { + std::swap(LHS, RHS); + } + } + + // If we have a SHL, determine the actual multiply amount + if (N->getOpcode() == ISD::SHL) { + ConstantSDNode *ShlRHS = dyn_cast(RHS); + if (!ShlRHS) { + return SDValue(); + } + + APInt ShiftAmt = ShlRHS->getAPIntValue(); + unsigned BitWidth = MulType.getSizeInBits(); + if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { + APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; + RHS = DCI.DAG.getConstant(MulVal, DL, MulType); + } else { + return SDValue(); + } + } + + bool Signed; + // Verify that our operands are demotable + if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { + return SDValue(); + } + + EVT DemotedVT; + if (MulType == MVT::i32) { + DemotedVT = MVT::i16; + } else { + DemotedVT = MVT::i32; + } + + // Truncate the operands to the correct size. Note that these are just for + // type consistency and will (likely) be eliminated in later phases. + SDValue TruncLHS = + DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, LHS); + SDValue TruncRHS = + DCI.DAG.getNode(ISD::TRUNCATE, DL, DemotedVT, RHS); + + unsigned Opc; + if (Signed) { + Opc = NVPTXISD::MUL_WIDE_SIGNED; + } else { + Opc = NVPTXISD::MUL_WIDE_UNSIGNED; + } + + return DCI.DAG.getNode(Opc, DL, MulType, TruncLHS, TruncRHS); +} + +/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. +static SDValue PerformMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + CodeGenOpt::Level OptLevel) { + if (OptLevel > 0) { + // Try mul.wide combining at OptLevel > 0 + if (SDValue Ret = TryMULWIDECombine(N, DCI)) + return Ret; + } + + return SDValue(); +} + +/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. +static SDValue PerformSHLCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + CodeGenOpt::Level OptLevel) { + if (OptLevel > 0) { + // Try mul.wide combining at OptLevel > 0 + if (SDValue Ret = TryMULWIDECombine(N, DCI)) + return Ret; + } + + return SDValue(); +} + +static SDValue PerformSETCCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT CCType = N->getValueType(0); + SDValue A = N->getOperand(0); + SDValue B = N->getOperand(1); + + if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16) + return SDValue(); + + SDLoc DL(N); + // setp.f16x2 returns two scalar predicates, which we need to + // convert back to v2i1. The returned result will be scalarized by + // the legalizer, but the comparison will remain a single vector + // instruction. + SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL, + DCI.DAG.getVTList(MVT::i1, MVT::i1), + {A, B, N->getOperand(2)}); + return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0), + CCNode.getValue(1)); +} + +SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel(); + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::FADD: + return PerformADDCombine(N, DCI, STI, OptLevel); + case ISD::MUL: + return PerformMULCombine(N, DCI, OptLevel); + case ISD::SHL: + return PerformSHLCombine(N, DCI, OptLevel); + case ISD::AND: + return PerformANDCombine(N, DCI); + case ISD::UREM: + case ISD::SREM: + return PerformREMCombine(N, DCI, OptLevel); + case ISD::SETCC: + return PerformSETCCCombine(N, DCI); + } + return SDValue(); +} + +/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. +static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl &Results) { + EVT ResVT = N->getValueType(0); + SDLoc DL(N); + + assert(ResVT.isVector() && "Vector load must have vector type"); + + // We only handle "native" vector sizes for now, e.g. <4 x double> is not + // legal. We can (and should) split that into 2 loads of <2 x double> here + // but I'm leaving that as a TODO for now. + assert(ResVT.isSimple() && "Can only handle simple types"); + switch (ResVT.getSimpleVT().SimpleTy) { + default: + return; + case MVT::v2i8: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v2i64: + case MVT::v2f16: + case MVT::v2f32: + case MVT::v2f64: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v4i32: + case MVT::v4f16: + case MVT::v4f32: + case MVT::v8f16: // <4 x f16x2> + // This is a "native" vector type + break; + } + + LoadSDNode *LD = cast(N); + + unsigned Align = LD->getAlignment(); + auto &TD = DAG.getDataLayout(); + unsigned PrefAlign = + TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext())); + if (Align < PrefAlign) { + // This load is not sufficiently aligned, so bail out and let this vector + // load be scalarized. Note that we may still be able to emit smaller + // vector loads. For example, if we are loading a <4 x float> with an + // alignment of 8, this check will fail but the legalizer will try again + // with 2 x <2 x float>, which will succeed with an alignment of 8. + return; + } + + EVT EltVT = ResVT.getVectorElementType(); + unsigned NumElts = ResVT.getVectorNumElements(); + + // Since LoadV2 is a target node, we cannot rely on DAG type legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propagate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + bool LoadF16x2 = false; + + switch (NumElts) { + default: + return; + case 2: + Opcode = NVPTXISD::LoadV2; + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + Opcode = NVPTXISD::LoadV4; + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs); + break; + } + case 8: { + // v8f16 is a special case. PTX doesn't have ld.v8.f16 + // instruction. Instead, we split the vector into v2f16 chunks and + // load them with ld.v4.b32. + assert(EltVT == MVT::f16 && "Unsupported v8 vector type."); + LoadF16x2 = true; + Opcode = NVPTXISD::LoadV4; + EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16, + MVT::Other}; + LdResVTs = DAG.getVTList(ListVTs); + break; + } + } + + // Copy regular operands + SmallVector OtherOps(N->op_begin(), N->op_end()); + + // The select routine does not have access to the LoadSDNode instance, so + // pass along the extension information + OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType(), DL)); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, + LD->getMemoryVT(), + LD->getMemOperand()); + + SmallVector ScalarRes; + if (LoadF16x2) { + // Split v2f16 subvectors back into individual elements. + NumElts /= 2; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue SubVector = NewLD.getValue(i); + SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, + DAG.getIntPtrConstant(0, DL)); + SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector, + DAG.getIntPtrConstant(1, DL)); + ScalarRes.push_back(E0); + ScalarRes.push_back(E1); + } + } else { + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = DAG.getBuildVector(ResVT, DL, ScalarRes); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); +} + +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, + SmallVectorImpl &Results) { + SDValue Chain = N->getOperand(0); + SDValue Intrin = N->getOperand(1); + SDLoc DL(N); + + // Get the intrinsic ID + unsigned IntrinNo = cast(Intrin.getNode())->getZExtValue(); + switch (IntrinNo) { + default: + return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: { + EVT ResVT = N->getValueType(0); + + if (ResVT.isVector()) { + // Vector LDG/LDU + + unsigned NumElts = ResVT.getVectorNumElements(); + EVT EltVT = ResVT.getVectorElementType(); + + // Since LDU/LDG are target nodes, we cannot rely on DAG type + // legalization. + // Therefore, we must ensure the type is legal. For i1 and i8, we set the + // loaded type to i16 and propagate the "real" type as the memory type. + bool NeedTrunc = false; + if (EltVT.getSizeInBits() < 16) { + EltVT = MVT::i16; + NeedTrunc = true; + } + + unsigned Opcode = 0; + SDVTList LdResVTs; + + switch (NumElts) { + default: + return; + case 2: + switch (IntrinNo) { + default: + return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV2; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV2; + break; + } + LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); + break; + case 4: { + switch (IntrinNo) { + default: + return; + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: + Opcode = NVPTXISD::LDGV4; + break; + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_p: + Opcode = NVPTXISD::LDUV4; + break; + } + EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; + LdResVTs = DAG.getVTList(ListVTs); + break; + } + } + + SmallVector OtherOps; + + // Copy regular operands + + OtherOps.push_back(Chain); // Chain + // Skip operand 1 (intrinsic ID) + // Others + OtherOps.append(N->op_begin() + 2, N->op_end()); + + MemIntrinsicSDNode *MemSD = cast(N); + + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, + MemSD->getMemoryVT(), + MemSD->getMemOperand()); + + SmallVector ScalarRes; + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Res = NewLD.getValue(i); + if (NeedTrunc) + Res = + DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + ScalarRes.push_back(Res); + } + + SDValue LoadChain = NewLD.getValue(NumElts); + + SDValue BuildVec = + DAG.getBuildVector(ResVT, DL, ScalarRes); + + Results.push_back(BuildVec); + Results.push_back(LoadChain); + } else { + // i8 LDG/LDU + assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 && + "Custom handling of non-i8 ldu/ldg?"); + + // Just copy all operands as-is + SmallVector Ops(N->op_begin(), N->op_end()); + + // Force output to i16 + SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other); + + MemIntrinsicSDNode *MemSD = cast(N); + + // We make sure the memory type is i8, which will be used during isel + // to select the proper instruction. + SDValue NewLD = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, + MVT::i8, MemSD->getMemOperand()); + + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, + NewLD.getValue(0))); + Results.push_back(NewLD.getValue(1)); + } + } + } +} + +void NVPTXTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + switch (N->getOpcode()) { + default: + report_fatal_error("Unhandled custom legalization"); + case ISD::LOAD: + ReplaceLoadVector(N, DAG, Results); + return; + case ISD::INTRINSIC_W_CHAIN: + ReplaceINTRINSIC_W_CHAIN(N, DAG, Results); + return; + } +} + +// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file. +void NVPTXSection::anchor() {} + +NVPTXTargetObjectFile::~NVPTXTargetObjectFile() { + delete static_cast(TextSection); + delete static_cast(DataSection); + delete static_cast(BSSSection); + delete static_cast(ReadOnlySection); + + delete static_cast(StaticCtorSection); + delete static_cast(StaticDtorSection); + delete static_cast(LSDASection); + delete static_cast(EHFrameSection); + delete static_cast(DwarfAbbrevSection); + delete static_cast(DwarfInfoSection); + delete static_cast(DwarfLineSection); + delete static_cast(DwarfFrameSection); + delete static_cast(DwarfPubTypesSection); + delete static_cast(DwarfDebugInlineSection); + delete static_cast(DwarfStrSection); + delete static_cast(DwarfLocSection); + delete static_cast(DwarfARangesSection); + delete static_cast(DwarfRangesSection); + delete static_cast(DwarfMacinfoSection); +} + +MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal( + const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { + return getDataSection(); +} diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 3026f0be242dd07fa8aa7c684fc458cd0cd7f026..da563f0531d43255fd2a187a7870f21d632e7360 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "NVPTX.h" #include "NVPTXInstrInfo.h" +#include "NVPTX.h" #include "NVPTXTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" @@ -38,7 +38,7 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg); const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); - if (DestRC->getSize() != SrcRC->getSize()) + if (RegInfo.getRegSizeInBits(*DestRC) != RegInfo.getRegSizeInBits(*SrcRC)) report_fatal_error("Copy one register into another with a different width"); unsigned Op; diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 2b847414b8a8aef2c79cb1eedbcf95d188a1456b..b5b5ea1ed639cbf4656298025bfda13831d6cc8f 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1,3165 +1,3165 @@ -//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the PTX instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -include "NVPTXInstrFormats.td" - -// A NOP instruction -let hasSideEffects = 0 in { - def NOP : NVPTXInst<(outs), (ins), "", []>; -} - -let OperandType = "OPERAND_IMMEDIATE" in { - def f16imm : Operand; -} - -// List of vector specific properties -def isVecLD : VecInstTypeEnum<1>; -def isVecST : VecInstTypeEnum<2>; -def isVecBuild : VecInstTypeEnum<3>; -def isVecShuffle : VecInstTypeEnum<4>; -def isVecExtract : VecInstTypeEnum<5>; -def isVecInsert : VecInstTypeEnum<6>; -def isVecDest : VecInstTypeEnum<7>; -def isVecOther : VecInstTypeEnum<15>; - -//===----------------------------------------------------------------------===// -// NVPTX Operand Definitions. -//===----------------------------------------------------------------------===// - -def brtarget : Operand; - -// CVT conversion modes -// These must match the enum in NVPTX.h -def CvtNONE : PatLeaf<(i32 0x0)>; -def CvtRNI : PatLeaf<(i32 0x1)>; -def CvtRZI : PatLeaf<(i32 0x2)>; -def CvtRMI : PatLeaf<(i32 0x3)>; -def CvtRPI : PatLeaf<(i32 0x4)>; -def CvtRN : PatLeaf<(i32 0x5)>; -def CvtRZ : PatLeaf<(i32 0x6)>; -def CvtRM : PatLeaf<(i32 0x7)>; -def CvtRP : PatLeaf<(i32 0x8)>; - -def CvtNONE_FTZ : PatLeaf<(i32 0x10)>; -def CvtRNI_FTZ : PatLeaf<(i32 0x11)>; -def CvtRZI_FTZ : PatLeaf<(i32 0x12)>; -def CvtRMI_FTZ : PatLeaf<(i32 0x13)>; -def CvtRPI_FTZ : PatLeaf<(i32 0x14)>; -def CvtRN_FTZ : PatLeaf<(i32 0x15)>; -def CvtRZ_FTZ : PatLeaf<(i32 0x16)>; -def CvtRM_FTZ : PatLeaf<(i32 0x17)>; -def CvtRP_FTZ : PatLeaf<(i32 0x18)>; - -def CvtSAT : PatLeaf<(i32 0x20)>; -def CvtSAT_FTZ : PatLeaf<(i32 0x30)>; - -def CvtMode : Operand { - let PrintMethod = "printCvtMode"; -} - -// Compare modes -// These must match the enum in NVPTX.h -def CmpEQ : PatLeaf<(i32 0)>; -def CmpNE : PatLeaf<(i32 1)>; -def CmpLT : PatLeaf<(i32 2)>; -def CmpLE : PatLeaf<(i32 3)>; -def CmpGT : PatLeaf<(i32 4)>; -def CmpGE : PatLeaf<(i32 5)>; -def CmpEQU : PatLeaf<(i32 10)>; -def CmpNEU : PatLeaf<(i32 11)>; -def CmpLTU : PatLeaf<(i32 12)>; -def CmpLEU : PatLeaf<(i32 13)>; -def CmpGTU : PatLeaf<(i32 14)>; -def CmpGEU : PatLeaf<(i32 15)>; -def CmpNUM : PatLeaf<(i32 16)>; -def CmpNAN : PatLeaf<(i32 17)>; - -def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; -def CmpNE_FTZ : PatLeaf<(i32 0x101)>; -def CmpLT_FTZ : PatLeaf<(i32 0x102)>; -def CmpLE_FTZ : PatLeaf<(i32 0x103)>; -def CmpGT_FTZ : PatLeaf<(i32 0x104)>; -def CmpGE_FTZ : PatLeaf<(i32 0x105)>; -def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; -def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; -def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; -def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; -def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; -def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; -def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; -def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; - -def CmpMode : Operand { - let PrintMethod = "printCmpMode"; -} -def VecElement : Operand { - let PrintMethod = "printVecElement"; -} - -//===----------------------------------------------------------------------===// -// NVPTX Instruction Predicate Definitions -//===----------------------------------------------------------------------===// - - -def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">; -def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">; -def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">; -def useAtomRedG32forGen32 : - Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">; -def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">; -def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">; -def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">; -def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">; -def useAtomRedG64forGen64 : - Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">; -def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">; -def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">; -def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">; -def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">; -def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">; -def hasVote : Predicate<"Subtarget->hasVote()">; -def hasDouble : Predicate<"Subtarget->hasDouble()">; -def reqPTX20 : Predicate<"Subtarget->reqPTX20()">; -def hasLDG : Predicate<"Subtarget->hasLDG()">; -def hasLDU : Predicate<"Subtarget->hasLDU()">; -def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">; - -def doF32FTZ : Predicate<"useF32FTZ()">; -def doNoF32FTZ : Predicate<"!useF32FTZ()">; - -def doMulWide : Predicate<"doMulWide">; - -def allowFMA : Predicate<"allowFMA()">; -def noFMA : Predicate<"!allowFMA()">; -def allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">; - -def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; -def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; - -def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; -def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; - -def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; -def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; - -def true : Predicate<"true">; - -def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; - -def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; - -//===----------------------------------------------------------------------===// -// Some Common Instruction Class Templates -//===----------------------------------------------------------------------===// - -// Template for instructions which take three int64, int32, or int16 args. -// The instructions are named "" (e.g. "add.s64"). -multiclass I3 { - def i64rr : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; - def i64ri : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; - def i32rr : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def i16rr : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; - def i16ri : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; -} - -// Template for instructions which take 3 int32 args. The instructions are -// named ".s32" (e.g. "addc.cc.s32"). -multiclass ADD_SUB_INT_32 { - def i32rr : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; -} - -// Template for instructions which take three fp64 or fp32 args. The -// instructions are named ".f" (e.g. "min.f64"). -// -// Also defines ftz (flush subnormal inputs and results to sign-preserving -// zero) variants for fp32 functions. -// -// This multiclass should be used for nodes that cannot be folded into FMAs. -// For nodes that can be folded into FMAs (i.e. adds and muls), use -// F3_fma_component. -multiclass F3 { - def f64rr : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; - def f64ri : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; - def f32rr_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[doF32FTZ]>; - def f32ri_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[doF32FTZ]>; - def f32rr : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; - def f32ri : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; -} - -// Template for instructions which take three FP args. The -// instructions are named ".f" (e.g. "add.f64"). -// -// Also defines ftz (flush subnormal inputs and results to sign-preserving -// zero) variants for fp32/fp16 functions. -// -// This multiclass should be used for nodes that can be folded to make fma ops. -// In this case, we use the ".rn" variant when FMA is disabled, as this behaves -// just like the non ".rn" op, but prevents ptxas from creating FMAs. -multiclass F3_fma_component { - def f64rr : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, - Requires<[allowFMA]>; - def f64ri : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, - Requires<[allowFMA]>; - def f32rr_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[allowFMA, doF32FTZ]>; - def f32ri_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[allowFMA, doF32FTZ]>; - def f32rr : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[allowFMA]>; - def f32ri : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[allowFMA]>; - - def f16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), - !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, - Requires<[useFP16Math, allowFMA, doF32FTZ]>; - def f16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), - !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, - Requires<[useFP16Math, allowFMA]>; - - def f16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), - !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, - Requires<[useFP16Math, allowFMA, doF32FTZ]>; - def f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), - !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, - Requires<[useFP16Math, allowFMA]>; - - // These have strange names so we don't perturb existing mir tests. - def _rnf64rr : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, - Requires<[noFMA]>; - def _rnf64ri : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, - Requires<[noFMA]>; - def _rnf32rr_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[noFMA, doF32FTZ]>; - def _rnf32ri_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[noFMA, doF32FTZ]>; - def _rnf32rr : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, - Requires<[noFMA]>; - def _rnf32ri : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, - Requires<[noFMA]>; - def _rnf16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), - !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, - Requires<[useFP16Math, noFMA, doF32FTZ]>; - def _rnf16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), - !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, - Requires<[useFP16Math, noFMA]>; - def _rnf16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), - !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, - Requires<[useFP16Math, noFMA, doF32FTZ]>; - def _rnf16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), - !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, - Requires<[useFP16Math, noFMA]>; -} - -// Template for operations which take two f32 or f64 operands. Provides three -// instructions: .f64, .f32, and .ftz.f32 (flush -// subnormal inputs and results to zero). -multiclass F2 { - def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), - !strconcat(OpcStr, ".f64 \t$dst, $a;"), - [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; - def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), - !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, - Requires<[doF32FTZ]>; - def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), - !strconcat(OpcStr, ".f32 \t$dst, $a;"), - [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; -} - -//===----------------------------------------------------------------------===// -// NVPTX Instructions. -//===----------------------------------------------------------------------===// - -//----------------------------------- -// Type Conversion -//----------------------------------- - -let hasSideEffects = 0 in { - // Generate a cvt to the given type from all possible types. Each instance - // takes a CvtMode immediate that defines the conversion mode to use. It can - // be CvtNONE to omit a conversion mode. - multiclass CVT_FROM_ALL { - def _s8 : - NVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".s8 \t$dst, $src;"), []>; - def _u8 : - NVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".u8 \t$dst, $src;"), []>; - def _s16 : - NVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".s16 \t$dst, $src;"), []>; - def _u16 : - NVPTXInst<(outs RC:$dst), - (ins Int16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".u16 \t$dst, $src;"), []>; - def _s32 : - NVPTXInst<(outs RC:$dst), - (ins Int32Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".s32 \t$dst, $src;"), []>; - def _u32 : - NVPTXInst<(outs RC:$dst), - (ins Int32Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".u32 \t$dst, $src;"), []>; - def _s64 : - NVPTXInst<(outs RC:$dst), - (ins Int64Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".s64 \t$dst, $src;"), []>; - def _u64 : - NVPTXInst<(outs RC:$dst), - (ins Int64Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".u64 \t$dst, $src;"), []>; - def _f16 : - NVPTXInst<(outs RC:$dst), - (ins Float16Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".f16 \t$dst, $src;"), []>; - def _f32 : - NVPTXInst<(outs RC:$dst), - (ins Float32Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".f32 \t$dst, $src;"), []>; - def _f64 : - NVPTXInst<(outs RC:$dst), - (ins Float64Regs:$src, CvtMode:$mode), - !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", - FromName, ".f64 \t$dst, $src;"), []>; - } - - // Generate cvts from all types to all types. - defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>; - defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>; - defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; - defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; - defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; - defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; - defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; - defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; - defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>; - defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; - defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; - - // These cvts are different from those above: The source and dest registers - // are of the same type. - def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "cvt.s16.s8 \t$dst, $src;", []>; - def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), - "cvt.s32.s8 \t$dst, $src;", []>; - def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), - "cvt.s32.s16 \t$dst, $src;", []>; - def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "cvt.s64.s8 \t$dst, $src;", []>; - def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "cvt.s64.s16 \t$dst, $src;", []>; - def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "cvt.s64.s32 \t$dst, $src;", []>; -} - -//----------------------------------- -// Integer Arithmetic -//----------------------------------- - -// Template for xor masquerading as int1 arithmetic. -multiclass ADD_SUB_i1 { - def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; - def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), - "xor.pred \t$dst, $a, $b;", - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; -} - -// int1 addition and subtraction are both just xor. -defm ADD_i1 : ADD_SUB_i1; -defm SUB_i1 : ADD_SUB_i1; - -// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we -// also use these for unsigned arithmetic. -defm ADD : I3<"add.s", add>; -defm SUB : I3<"sub.s", sub>; - -// int32 addition and subtraction with carry-out. -// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). -defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; -defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; - -// int32 addition and subtraction with carry-in and carry-out. -defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; -defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; - -defm MULT : I3<"mul.lo.s", mul>; - -defm MULTHS : I3<"mul.hi.s", mulhs>; -defm MULTHU : I3<"mul.hi.u", mulhu>; - -defm SDIV : I3<"div.s", sdiv>; -defm UDIV : I3<"div.u", udiv>; - -// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM -// will lower it. -defm SREM : I3<"rem.s", srem>; -defm UREM : I3<"rem.u", urem>; - -// Integer absolute value. NumBits should be one minus the bit width of RC. -// This idiom implements the algorithm at -// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs. -multiclass ABS { - def : NVPTXInst<(outs RC:$dst), (ins RC:$a), - !strconcat("abs", SizeName, " \t$dst, $a;"), - [(set RC:$dst, (xor (add (sra RC:$a, (i32 NumBits)), RC:$a), - (sra RC:$a, (i32 NumBits))))]>; -} -defm ABS_16 : ABS; -defm ABS_32 : ABS; -defm ABS_64 : ABS; - -// Integer min/max. -defm SMAX : I3<"max.s", smax>; -defm UMAX : I3<"max.u", umax>; -defm SMIN : I3<"min.s", smin>; -defm UMIN : I3<"min.u", umin>; - -// -// Wide multiplication -// -def MULWIDES64 : - NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - "mul.wide.s32 \t$dst, $a, $b;", []>; -def MULWIDES64Imm : - NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - "mul.wide.s32 \t$dst, $a, $b;", []>; -def MULWIDES64Imm64 : - NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), - "mul.wide.s32 \t$dst, $a, $b;", []>; - -def MULWIDEU64 : - NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - "mul.wide.u32 \t$dst, $a, $b;", []>; -def MULWIDEU64Imm : - NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - "mul.wide.u32 \t$dst, $a, $b;", []>; -def MULWIDEU64Imm64 : - NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), - "mul.wide.u32 \t$dst, $a, $b;", []>; - -def MULWIDES32 : - NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - "mul.wide.s16 \t$dst, $a, $b;", []>; -def MULWIDES32Imm : - NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - "mul.wide.s16 \t$dst, $a, $b;", []>; -def MULWIDES32Imm32 : - NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - "mul.wide.s16 \t$dst, $a, $b;", []>; - -def MULWIDEU32 : - NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - "mul.wide.u16 \t$dst, $a, $b;", []>; -def MULWIDEU32Imm : - NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - "mul.wide.u16 \t$dst, $a, $b;", []>; -def MULWIDEU32Imm32 : - NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - "mul.wide.u16 \t$dst, $a, $b;", []>; - -def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; -def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; -def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; - -// Matchers for signed, unsigned mul.wide ISD nodes. -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), - (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), - (MULWIDES32Imm Int16Regs:$a, imm:$b)>, - Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), - (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), - (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, - Requires<[doMulWide]>; - -def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), - (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), - (MULWIDES64Imm Int32Regs:$a, imm:$b)>, - Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), - (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), - (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, - Requires<[doMulWide]>; - -// Predicates used for converting some patterns to mul.wide. -def SInt32Const : PatLeaf<(imm), [{ - const APInt &v = N->getAPIntValue(); - return v.isSignedIntN(32); -}]>; - -def UInt32Const : PatLeaf<(imm), [{ - const APInt &v = N->getAPIntValue(); - return v.isIntN(32); -}]>; - -def SInt16Const : PatLeaf<(imm), [{ - const APInt &v = N->getAPIntValue(); - return v.isSignedIntN(16); -}]>; - -def UInt16Const : PatLeaf<(imm), [{ - const APInt &v = N->getAPIntValue(); - return v.isIntN(16); -}]>; - -def Int5Const : PatLeaf<(imm), [{ - // Check if 0 <= v < 32; only then will the result of (x << v) be an int32. - const APInt &v = N->getAPIntValue(); - return v.sge(0) && v.slt(32); -}]>; - -def Int4Const : PatLeaf<(imm), [{ - // Check if 0 <= v < 16; only then will the result of (x << v) be an int16. - const APInt &v = N->getAPIntValue(); - return v.sge(0) && v.slt(16); -}]>; - -def SHL2MUL32 : SDNodeXFormgetAPIntValue(); - APInt temp(32, 1); - return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32); -}]>; - -def SHL2MUL16 : SDNodeXFormgetAPIntValue(); - APInt temp(16, 1); - return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16); -}]>; - -// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide. -def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), - (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, - Requires<[doMulWide]>; -def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), - (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, - Requires<[doMulWide]>; - -def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), - (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, - Requires<[doMulWide]>; -def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), - (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, - Requires<[doMulWide]>; - -// Convert "sign/zero-extend then multiply" to mul.wide. -def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), - (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), - (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, - Requires<[doMulWide]>; - -def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), - (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), - (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, - Requires<[doMulWide]>; - -def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), - (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), - (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, - Requires<[doMulWide]>; - -def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), - (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, - Requires<[doMulWide]>; -def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), - (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, - Requires<[doMulWide]>; - -// -// Integer multiply-add -// -def SDTIMAD : - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, - SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; -def imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>; - -def MAD16rrr : - NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; -def MAD16rri : - NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; -def MAD16rir : - NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; -def MAD16rii : - NVPTXInst<(outs Int16Regs:$dst), - (ins Int16Regs:$a, i16imm:$b, i16imm:$c), - "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>; - -def MAD32rrr : - NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; -def MAD32rri : - NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; -def MAD32rir : - NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; -def MAD32rii : - NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$a, i32imm:$b, i32imm:$c), - "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]>; - -def MAD64rrr : - NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; -def MAD64rri : - NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; -def MAD64rir : - NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; -def MAD64rii : - NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$a, i64imm:$b, i64imm:$c), - "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>; - -def INEG16 : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "neg.s16 \t$dst, $src;", - [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; -def INEG32 : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), - "neg.s32 \t$dst, $src;", - [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; -def INEG64 : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "neg.s64 \t$dst, $src;", - [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; - -//----------------------------------- -// Floating Point Arithmetic -//----------------------------------- - -// Constant 1.0f -def FloatConst1 : PatLeaf<(fpimm), [{ - return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle() && - N->getValueAPF().convertToFloat() == 1.0f; -}]>; -// Constant 1.0 (double) -def DoubleConst1 : PatLeaf<(fpimm), [{ - return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() && - N->getValueAPF().convertToDouble() == 1.0; -}]>; - -// Loads FP16 constant into a register. -// -// ptxas does not have hex representation for fp16, so we can't use -// fp16 immediate values in .f16 instructions. Instead we have to load -// the constant into a register using mov.b16. -def LOAD_CONST_F16 : - NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a), - "mov.b16 \t$dst, $a;", []>; - -defm FADD : F3_fma_component<"add", fadd>; -defm FSUB : F3_fma_component<"sub", fsub>; -defm FMUL : F3_fma_component<"mul", fmul>; - -defm FMIN : F3<"min", fminnum>; -defm FMAX : F3<"max", fmaxnum>; - -defm FABS : F2<"abs", fabs>; -defm FNEG : F2<"neg", fneg>; -defm FSQRT : F2<"sqrt.rn", fsqrt>; - -// -// F64 division -// -def FDIV641r : - NVPTXInst<(outs Float64Regs:$dst), - (ins f64imm:$a, Float64Regs:$b), - "rcp.rn.f64 \t$dst, $b;", - [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>; -def FDIV64rr : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, Float64Regs:$b), - "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>; -def FDIV64ri : - NVPTXInst<(outs Float64Regs:$dst), - (ins Float64Regs:$a, f64imm:$b), - "div.rn.f64 \t$dst, $a, $b;", - [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>; - -// -// F32 Approximate reciprocal -// -def FDIV321r_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX, doF32FTZ]>; -def FDIV321r : - NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX]>; -// -// F32 Approximate division -// -def FDIV32approxrr_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.approx.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX, doF32FTZ]>; -def FDIV32approxri_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.approx.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_APPROX, doF32FTZ]>; -def FDIV32approxrr : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.approx.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_APPROX]>; -def FDIV32approxri : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.approx.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_APPROX]>; -// -// F32 Semi-accurate reciprocal -// -// rcp.approx gives the same result as div.full(1.0f, a) and is faster. -// -def FDIV321r_approx_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL, doF32FTZ]>; -def FDIV321r_approx : - NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.approx.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL]>; -// -// F32 Semi-accurate division -// -def FDIV32rr_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.full.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL, doF32FTZ]>; -def FDIV32ri_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.full.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_FULL, doF32FTZ]>; -def FDIV32rr : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.full.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[do_DIVF32_FULL]>; -def FDIV32ri : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.full.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[do_DIVF32_FULL]>; -// -// F32 Accurate reciprocal -// -def FDIV321r_prec_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.rn.ftz.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[reqPTX20, doF32FTZ]>; -def FDIV321r_prec : - NVPTXInst<(outs Float32Regs:$dst), - (ins f32imm:$a, Float32Regs:$b), - "rcp.rn.f32 \t$dst, $b;", - [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, - Requires<[reqPTX20]>; -// -// F32 Accurate division -// -def FDIV32rr_prec_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[doF32FTZ, reqPTX20]>; -def FDIV32ri_prec_ftz : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.rn.ftz.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[doF32FTZ, reqPTX20]>; -def FDIV32rr_prec : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b), - "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, - Requires<[reqPTX20]>; -def FDIV32ri_prec : - NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, f32imm:$b), - "div.rn.f32 \t$dst, $a, $b;", - [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, - Requires<[reqPTX20]>; - -// -// FMA -// - -multiclass FMA { - def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>, - Requires<[Pred]>; - def rri : NVPTXInst<(outs RC:$dst), - (ins RC:$a, RC:$b, ImmCls:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>, - Requires<[Pred]>; - def rir : NVPTXInst<(outs RC:$dst), - (ins RC:$a, ImmCls:$b, RC:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>, - Requires<[Pred]>; - def rii : NVPTXInst<(outs RC:$dst), - (ins RC:$a, ImmCls:$b, ImmCls:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>, - Requires<[Pred]>; -} - -multiclass FMA_F16 { - def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>, - Requires<[useFP16Math, Pred]>; -} - -defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", Float16Regs, doF32FTZ>; -defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, true>; -defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", Float16x2Regs, doF32FTZ>; -defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, true>; -defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; -defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>; -defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>; - -// sin/cos -def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), - "sin.approx.f32 \t$dst, $src;", - [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>, - Requires<[allowUnsafeFPMath]>; -def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), - "cos.approx.f32 \t$dst, $src;", - [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>, - Requires<[allowUnsafeFPMath]>; - -// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)), -// i.e. "poor man's fmod()" - -// frem - f32 FTZ -def : Pat<(frem Float32Regs:$x, Float32Regs:$y), - (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 - (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), - Float32Regs:$y))>, - Requires<[doF32FTZ]>; -def : Pat<(frem Float32Regs:$x, fpimm:$y), - (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 - (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), - fpimm:$y))>, - Requires<[doF32FTZ]>; - -// frem - f32 -def : Pat<(frem Float32Regs:$x, Float32Regs:$y), - (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 - (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), - Float32Regs:$y))>; -def : Pat<(frem Float32Regs:$x, fpimm:$y), - (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 - (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), - fpimm:$y))>; - -// frem - f64 -def : Pat<(frem Float64Regs:$x, Float64Regs:$y), - (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 - (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), - Float64Regs:$y))>; -def : Pat<(frem Float64Regs:$x, fpimm:$y), - (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 - (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), - fpimm:$y))>; - -//----------------------------------- -// Bitwise operations -//----------------------------------- - -// Template for three-arg bitwise operations. Takes three args, Creates .b16, -// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr. -multiclass BITWISE { - def b1rr : - NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), - !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; - def b1ri : - NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), - !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), - [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; - def b16rr : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), - !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; - def b16ri : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), - !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; - def b32rr : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def b32ri : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; - def b64rr : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), - !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; - def b64ri : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), - !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; -} - -defm OR : BITWISE<"or", or>; -defm AND : BITWISE<"and", and>; -defm XOR : BITWISE<"xor", xor>; - -def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), - "not.pred \t$dst, $src;", - [(set Int1Regs:$dst, (not Int1Regs:$src))]>; -def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "not.b16 \t$dst, $src;", - [(set Int16Regs:$dst, (not Int16Regs:$src))]>; -def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), - "not.b32 \t$dst, $src;", - [(set Int32Regs:$dst, (not Int32Regs:$src))]>; -def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), - "not.b64 \t$dst, $src;", - [(set Int64Regs:$dst, (not Int64Regs:$src))]>; - -// Template for left/right shifts. Takes three operands, -// [dest (reg), src (reg), shift (reg or imm)]. -// dest and src may be int64, int32, or int16, but shift is always int32. -// -// This template also defines a 32-bit shift (imm, imm) instruction. -multiclass SHIFT { - def i64rr : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>; - def i64ri : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), - !strconcat(OpcStr, "64 \t$dst, $a, $b;"), - [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>; - def i32rr : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; - def i32ri : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>; - def i32ii : - NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), - !strconcat(OpcStr, "32 \t$dst, $a, $b;"), - [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; - def i16rr : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>; - def i16ri : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), - !strconcat(OpcStr, "16 \t$dst, $a, $b;"), - [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; -} - -defm SHL : SHIFT<"shl.b", shl>; -defm SRA : SHIFT<"shr.s", sra>; -defm SRL : SHIFT<"shr.u", srl>; - -// Bit-reverse -def BREV32 : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), - "brev.b32 \t$dst, $a;", - [(set Int32Regs:$dst, (bitreverse Int32Regs:$a))]>; -def BREV64 : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a), - "brev.b64 \t$dst, $a;", - [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; - -// -// Rotate: Use ptx shf instruction if available. -// - -// 32 bit r2 = rotl r1, n -// => -// r2 = shf.l r1, r1, n -def ROTL32imm_hw : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), - "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, - Requires<[hasHWROT32]>; - -def ROTL32reg_hw : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), - "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[hasHWROT32]>; - -// 32 bit r2 = rotr r1, n -// => -// r2 = shf.r r1, r1, n -def ROTR32imm_hw : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), - "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, - Requires<[hasHWROT32]>; - -def ROTR32reg_hw : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), - "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", - [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[hasHWROT32]>; - -// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. -def ROT32imm_sw : - NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), - "{{\n\t" - ".reg .b32 %lhs;\n\t" - ".reg .b32 %rhs;\n\t" - "shl.b32 \t%lhs, $src, $amt1;\n\t" - "shr.b32 \t%rhs, $src, $amt2;\n\t" - "add.u32 \t$dst, %lhs, %rhs;\n\t" - "}}", - []>; - -def SUB_FRM_32 : SDNodeXFormgetTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); -}]>; - -def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), - (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, - Requires<[noHWROT32]>; -def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), - (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, - Requires<[noHWROT32]>; - -// 32-bit software rotate left by register. -def ROTL32reg_sw : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), - "{{\n\t" - ".reg .b32 %lhs;\n\t" - ".reg .b32 %rhs;\n\t" - ".reg .b32 %amt2;\n\t" - "shl.b32 \t%lhs, $src, $amt;\n\t" - "sub.s32 \t%amt2, 32, $amt;\n\t" - "shr.b32 \t%rhs, $src, %amt2;\n\t" - "add.u32 \t$dst, %lhs, %rhs;\n\t" - "}}", - [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[noHWROT32]>; - -// 32-bit software rotate right by register. -def ROTR32reg_sw : - NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), - "{{\n\t" - ".reg .b32 %lhs;\n\t" - ".reg .b32 %rhs;\n\t" - ".reg .b32 %amt2;\n\t" - "shr.b32 \t%lhs, $src, $amt;\n\t" - "sub.s32 \t%amt2, 32, $amt;\n\t" - "shl.b32 \t%rhs, $src, %amt2;\n\t" - "add.u32 \t$dst, %lhs, %rhs;\n\t" - "}}", - [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, - Requires<[noHWROT32]>; - -// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. -def ROT64imm_sw : - NVPTXInst<(outs Int64Regs:$dst), - (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), - "{{\n\t" - ".reg .b64 %lhs;\n\t" - ".reg .b64 %rhs;\n\t" - "shl.b64 \t%lhs, $src, $amt1;\n\t" - "shr.b64 \t%rhs, $src, $amt2;\n\t" - "add.u64 \t$dst, %lhs, %rhs;\n\t" - "}}", - []>; - -def SUB_FRM_64 : SDNodeXFormgetTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); -}]>; - -def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), - (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; -def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), - (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; - -// 64-bit software rotate left by register. -def ROTL64reg_sw : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), - "{{\n\t" - ".reg .b64 %lhs;\n\t" - ".reg .b64 %rhs;\n\t" - ".reg .u32 %amt2;\n\t" - "shl.b64 \t%lhs, $src, $amt;\n\t" - "sub.u32 \t%amt2, 64, $amt;\n\t" - "shr.b64 \t%rhs, $src, %amt2;\n\t" - "add.u64 \t$dst, %lhs, %rhs;\n\t" - "}}", - [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; - -def ROTR64reg_sw : - NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), - "{{\n\t" - ".reg .b64 %lhs;\n\t" - ".reg .b64 %rhs;\n\t" - ".reg .u32 %amt2;\n\t" - "shr.b64 \t%lhs, $src, $amt;\n\t" - "sub.u32 \t%amt2, 64, $amt;\n\t" - "shl.b64 \t%rhs, $src, %amt2;\n\t" - "add.u64 \t$dst, %lhs, %rhs;\n\t" - "}}", - [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; - -// -// Funnnel shift in clamp mode -// - -// Create SDNodes so they can be used in the DAG code, e.g. -// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) -def SDTIntShiftDOp : - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisInt<3>]>; -def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; -def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; - -def FUNSHFLCLAMP : - NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; - -def FUNSHFRCLAMP : - NVPTXInst<(outs Int32Regs:$dst), - (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), - "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", - [(set Int32Regs:$dst, - (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; - -// -// BFE - bit-field extract -// - -// Template for BFE instructions. Takes four args, -// [dest (reg), src (reg), start (reg or imm), end (reg or imm)]. -// Start may be an imm only if end is also an imm. FIXME: Is this a -// restriction in PTX? -// -// dest and src may be int32 or int64, but start and end are always int32. -multiclass BFE { - def rrr - : NVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, Int32Regs:$c), - !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; - def rri - : NVPTXInst<(outs RC:$d), - (ins RC:$a, Int32Regs:$b, i32imm:$c), - !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; - def rii - : NVPTXInst<(outs RC:$d), - (ins RC:$a, i32imm:$b, i32imm:$c), - !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; -} - -let hasSideEffects = 0 in { - defm BFE_S32 : BFE<"s32", Int32Regs>; - defm BFE_U32 : BFE<"u32", Int32Regs>; - defm BFE_S64 : BFE<"s64", Int64Regs>; - defm BFE_U64 : BFE<"u64", Int64Regs>; -} - -//----------------------------------- -// Comparison instructions (setp, set) -//----------------------------------- - -// FIXME: This doesn't cover versions of set and setp that combine with a -// boolean predicate, e.g. setp.eq.and.b16. - -let hasSideEffects = 0 in { - multiclass SETP { - def rr : - NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, - " \t$dst, $a, $b;"), []>; - def ri : - NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, - " \t$dst, $a, $b;"), []>; - def ir : - NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp), - !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, - " \t$dst, $a, $b;"), []>; - } -} - -defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; -defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; -defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; -defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; -defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; -defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; -defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; -defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; -defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; -defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; -defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; -def SETP_f16rr : - NVPTXInst<(outs Int1Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;", - []>, Requires<[useFP16Math]>; - -def SETP_f16x2rr : - NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), - (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp), - "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;", - []>, - Requires<[useFP16Math]>; - - -// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form -// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination -// reg, either u32, s32, or f32. Anyway these aren't used at the moment. - -let hasSideEffects = 0 in { - multiclass SET { - def rr : NVPTXInst<(outs Int32Regs:$dst), - (ins RC:$a, RC:$b, CmpMode:$cmp), - !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; - def ri : NVPTXInst<(outs Int32Regs:$dst), - (ins RC:$a, ImmCls:$b, CmpMode:$cmp), - !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; - def ir : NVPTXInst<(outs Int32Regs:$dst), - (ins ImmCls:$a, RC:$b, CmpMode:$cmp), - !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; - } -} - -defm SET_b16 : SET<"b16", Int16Regs, i16imm>; -defm SET_s16 : SET<"s16", Int16Regs, i16imm>; -defm SET_u16 : SET<"u16", Int16Regs, i16imm>; -defm SET_b32 : SET<"b32", Int32Regs, i32imm>; -defm SET_s32 : SET<"s32", Int32Regs, i32imm>; -defm SET_u32 : SET<"u32", Int32Regs, i32imm>; -defm SET_b64 : SET<"b64", Int64Regs, i64imm>; -defm SET_s64 : SET<"s64", Int64Regs, i64imm>; -defm SET_u64 : SET<"u64", Int64Regs, i64imm>; -defm SET_f16 : SET<"f16", Float16Regs, f16imm>; -defm SET_f32 : SET<"f32", Float32Regs, f32imm>; -defm SET_f64 : SET<"f64", Float64Regs, f64imm>; - -//----------------------------------- -// Selection instructions (selp) -//----------------------------------- - -// FIXME: Missing slct - -// selp instructions that don't have any pattern matches; we explicitly use -// them within this file. -let hasSideEffects = 0 in { - multiclass SELP { - def rr : NVPTXInst<(outs RC:$dst), - (ins RC:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; - def ri : NVPTXInst<(outs RC:$dst), - (ins RC:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; - def ir : NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; - def ii : NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; - } - - multiclass SELP_PATTERN { - def rr : - NVPTXInst<(outs RC:$dst), - (ins RC:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; - def ri : - NVPTXInst<(outs RC:$dst), - (ins RC:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; - def ir : - NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, RC:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; - def ii : - NVPTXInst<(outs RC:$dst), - (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), - !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), - [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; - } -} - -// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as -// good. -defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; -defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; -defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; -defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>; -defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; -defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; -defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>; -defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; -defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; -defm SELP_f16 : SELP_PATTERN<"b16", Float16Regs, f16imm, fpimm>; -defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; -defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; - -def SELP_f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p), - "selp.b32 \t$dst, $a, $b, $p;", - [(set Float16x2Regs:$dst, - (select Int1Regs:$p, Float16x2Regs:$a, Float16x2Regs:$b))]>; - -//----------------------------------- -// Data Movement (Load / Store, Move) -//----------------------------------- - -def ADDRri : ComplexPattern; -def ADDRri64 : ComplexPattern; - -def MEMri : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int32Regs, i32imm); -} -def MEMri64 : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops Int64Regs, i64imm); -} - -def imem : Operand { - let PrintMethod = "printOperand"; -} - -def imemAny : Operand { - let PrintMethod = "printOperand"; -} - -def LdStCode : Operand { - let PrintMethod = "printLdStCode"; -} - -def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; -def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; - -// Load a memory address into a u32 or u64 register. -def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), - "mov.u32 \t$dst, $a;", - [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; -def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), - "mov.u64 \t$dst, $a;", - [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; - -// Get pointer to local stack. -let hasSideEffects = 0 in { - def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), - "mov.u32 \t$d, __local_depot$num;", []>; - def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), - "mov.u64 \t$d, __local_depot$num;", []>; -} - - -// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp -let IsSimpleMove=1, hasSideEffects=0 in { - def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), - "mov.pred \t$dst, $sss;", []>; - def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), - "mov.u16 \t$dst, $sss;", []>; - def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), - "mov.u32 \t$dst, $sss;", []>; - def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), - "mov.u64 \t$dst, $sss;", []>; - - def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src), - // We have to use .b16 here as there's no mov.f16. - "mov.b16 \t$dst, $src;", []>; - def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), - "mov.f32 \t$dst, $src;", []>; - def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), - "mov.f64 \t$dst, $src;", []>; -} - -def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), - "mov.pred \t$dst, $src;", - [(set Int1Regs:$dst, imm:$src)]>; -def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), - "mov.u16 \t$dst, $src;", - [(set Int16Regs:$dst, imm:$src)]>; -def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), - "mov.u32 \t$dst, $src;", - [(set Int32Regs:$dst, imm:$src)]>; -def IMOV64i : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), - "mov.u64 \t$dst, $src;", - [(set Int64Regs:$dst, imm:$src)]>; - -def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), - "mov.f32 \t$dst, $src;", - [(set Float32Regs:$dst, fpimm:$src)]>; -def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), - "mov.f64 \t$dst, $src;", - [(set Float64Regs:$dst, fpimm:$src)]>; - -def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; - -//---- Copy Frame Index ---- -def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), - "add.u32 \t$dst, ${addr:add};", - [(set Int32Regs:$dst, ADDRri:$addr)]>; -def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), - "add.u64 \t$dst, ${addr:add};", - [(set Int64Regs:$dst, ADDRri64:$addr)]>; - -//----------------------------------- -// Comparison and Selection -//----------------------------------- - -multiclass ISET_FORMAT { - // i16 -> pred - def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), - (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), - (setp_16ri Int16Regs:$a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), - (setp_16ir imm:$a, Int16Regs:$b, Mode)>; - // i32 -> pred - def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), - (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), - (setp_32ri Int32Regs:$a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)), - (setp_32ir imm:$a, Int32Regs:$b, Mode)>; - // i64 -> pred - def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), - (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), - (setp_64ri Int64Regs:$a, imm:$b, Mode)>; - def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), - (setp_64ir imm:$a, Int64Regs:$b, Mode)>; - - // i16 -> i32 - def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), - (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), - (set_16ri Int16Regs:$a, imm:$b, Mode)>; - def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), - (set_16ir imm:$a, Int16Regs:$b, Mode)>; - // i32 -> i32 - def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), - (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), - (set_32ri Int32Regs:$a, imm:$b, Mode)>; - def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)), - (set_32ir imm:$a, Int32Regs:$b, Mode)>; - // i64 -> i32 - def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), - (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), - (set_64ri Int64Regs:$a, imm:$b, Mode)>; - def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), - (set_64ir imm:$a, Int64Regs:$b, Mode)>; -} - -multiclass ISET_FORMAT_SIGNED - : ISET_FORMAT { - // TableGen doesn't like empty multiclasses. - def : PatLeaf<(i32 0)>; -} - -multiclass ISET_FORMAT_UNSIGNED - : ISET_FORMAT { - // TableGen doesn't like empty multiclasses. - def : PatLeaf<(i32 0)>; -} - -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_SIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; -defm : ISET_FORMAT_UNSIGNED; - -// i1 compares -def : Pat<(setne Int1Regs:$a, Int1Regs:$b), - (XORb1rr Int1Regs:$a, Int1Regs:$b)>; -def : Pat<(setune Int1Regs:$a, Int1Regs:$b), - (XORb1rr Int1Regs:$a, Int1Regs:$b)>; - -def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), - (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; -def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), - (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; - -// i1 compare -> i32 -def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), - (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; -def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), - (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; - - - -multiclass FSET_FORMAT { - // f16 -> pred - def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, - Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, - Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, - Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, - Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, - Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, - Requires<[useFP16Math]>; - - // f32 -> pred - def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), - (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), - (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), - (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), - (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), - (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), - (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; - - // f64 -> pred - def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), - (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; - def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), - (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), - (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; - - // f16 -> i32 - def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, - Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, - Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, - Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, - Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, - Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, - Requires<[useFP16Math]>; - - // f32 -> i32 - def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), - (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), - (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), - (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), - (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), - (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, - Requires<[doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), - (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; - - // f64 -> i32 - def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), - (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; - def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), - (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; - def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), - (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; -} - -defm FSetOGT : FSET_FORMAT; -defm FSetOLT : FSET_FORMAT; -defm FSetOGE : FSET_FORMAT; -defm FSetOLE : FSET_FORMAT; -defm FSetOEQ : FSET_FORMAT; -defm FSetONE : FSET_FORMAT; - -defm FSetUGT : FSET_FORMAT; -defm FSetULT : FSET_FORMAT; -defm FSetUGE : FSET_FORMAT; -defm FSetULE : FSET_FORMAT; -defm FSetUEQ : FSET_FORMAT; -defm FSetUNE : FSET_FORMAT; - -defm FSetGT : FSET_FORMAT; -defm FSetLT : FSET_FORMAT; -defm FSetGE : FSET_FORMAT; -defm FSetLE : FSET_FORMAT; -defm FSetEQ : FSET_FORMAT; -defm FSetNE : FSET_FORMAT; - -defm FSetNUM : FSET_FORMAT; -defm FSetNAN : FSET_FORMAT; - -// FIXME: What is this doing here? Can it be deleted? -// def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, -// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - -def SDTDeclareParamProfile : - SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; -def SDTDeclareScalarParamProfile : - SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; -def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; -def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; -def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; -def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; -def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; -def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; -def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; -def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; -def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; -def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; -def SDTCallValProfile : SDTypeProfile<1, 0, []>; -def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; -def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; -def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; -def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; -def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; - -def DeclareParam : - SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def DeclareScalarParam : - SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def DeclareRetParam : - SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def DeclareRet : - SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def LoadParam : - SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, - [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; -def LoadParamV2 : - SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, - [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; -def LoadParamV4 : - SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, - [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; -def PrintCall : - SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def PrintConvergentCall : - SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def PrintCallUni : - SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def PrintConvergentCallUni : - SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallUniProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParam : - SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamV2 : - SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamV4 : - SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamU32 : - SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def StoreParamS32 : - SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArgBegin : - SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArg : - SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def LastCallArg : - SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallArgEnd : - SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallVoid : - SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def Prototype : - SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def CallVal : - SDNode<"NVPTXISD::CallVal", SDTCallValProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def MoveParam : - SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; -def StoreRetval : - SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, - [SDNPHasChain, SDNPSideEffect]>; -def StoreRetvalV2 : - SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, - [SDNPHasChain, SDNPSideEffect]>; -def StoreRetvalV4 : - SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, - [SDNPHasChain, SDNPSideEffect]>; -def PseudoUseParam : - SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def RETURNNode : - SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, - [SDNPHasChain, SDNPSideEffect]>; - -let mayLoad = 1 in { - class LoadParamMemInst : - NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), - !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"), - []>; - - class LoadParamV2MemInst : - NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), - !strconcat("ld.param.v2", opstr, - " \t{{$dst, $dst2}}, [retval0+$b];"), []>; - - class LoadParamV4MemInst : - NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, - regclass:$dst4), - (ins i32imm:$b), - !strconcat("ld.param.v4", opstr, - " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), - []>; -} - -class LoadParamRegInst : - NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), - !strconcat("mov", opstr, " \t$dst, retval$b;"), - [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; - -let mayStore = 1 in { - class StoreParamInst : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), - !strconcat("st.param", opstr, " \t[param$a+$b], $val;"), - []>; - - class StoreParamV2Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, - i32imm:$a, i32imm:$b), - !strconcat("st.param.v2", opstr, - " \t[param$a+$b], {{$val, $val2}};"), - []>; - - class StoreParamV4Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, - regclass:$val4, i32imm:$a, - i32imm:$b), - !strconcat("st.param.v4", opstr, - " \t[param$a+$b], {{$val, $val2, $val3, $val4}};"), - []>; - - class StoreRetvalInst : - NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), - !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"), - []>; - - class StoreRetvalV2Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), - !strconcat("st.param.v2", opstr, - " \t[func_retval0+$a], {{$val, $val2}};"), - []>; - - class StoreRetvalV4Inst : - NVPTXInst<(outs), - (ins regclass:$val, regclass:$val2, regclass:$val3, - regclass:$val4, i32imm:$a), - !strconcat("st.param.v4", opstr, - " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), - []>; -} - -let isCall=1 in { - multiclass CALL { - def PrintCallNoRetInst : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " "), [(OpNode (i32 0))]>; - def PrintCallRetInst1 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0), "), [(OpNode (i32 1))]>; - def PrintCallRetInst2 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0, retval1), "), [(OpNode (i32 2))]>; - def PrintCallRetInst3 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0, retval1, retval2), "), [(OpNode (i32 3))]>; - def PrintCallRetInst4 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0, retval1, retval2, retval3), "), - [(OpNode (i32 4))]>; - def PrintCallRetInst5 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4), "), - [(OpNode (i32 5))]>; - def PrintCallRetInst6 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " - "retval5), "), - [(OpNode (i32 6))]>; - def PrintCallRetInst7 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " - "retval5, retval6), "), - [(OpNode (i32 7))]>; - def PrintCallRetInst8 : NVPTXInst<(outs), (ins), - !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " - "retval5, retval6, retval7), "), - [(OpNode (i32 8))]>; - } -} - -defm Call : CALL<"call", PrintCall>; -defm CallUni : CALL<"call.uni", PrintCallUni>; - -// Convergent call instructions. These are identical to regular calls, except -// they have the isConvergent bit set. -let isConvergent=1 in { - defm ConvergentCall : CALL<"call", PrintConvergentCall>; - defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>; -} - -def LoadParamMemI64 : LoadParamMemInst; -def LoadParamMemI32 : LoadParamMemInst; -def LoadParamMemI16 : LoadParamMemInst; -def LoadParamMemI8 : LoadParamMemInst; -def LoadParamMemV2I64 : LoadParamV2MemInst; -def LoadParamMemV2I32 : LoadParamV2MemInst; -def LoadParamMemV2I16 : LoadParamV2MemInst; -def LoadParamMemV2I8 : LoadParamV2MemInst; -def LoadParamMemV4I32 : LoadParamV4MemInst; -def LoadParamMemV4I16 : LoadParamV4MemInst; -def LoadParamMemV4I8 : LoadParamV4MemInst; -def LoadParamMemF16 : LoadParamMemInst; -def LoadParamMemF16x2 : LoadParamMemInst; -def LoadParamMemF32 : LoadParamMemInst; -def LoadParamMemF64 : LoadParamMemInst; -def LoadParamMemV2F16 : LoadParamV2MemInst; -def LoadParamMemV2F16x2: LoadParamV2MemInst; -def LoadParamMemV2F32 : LoadParamV2MemInst; -def LoadParamMemV2F64 : LoadParamV2MemInst; -def LoadParamMemV4F16 : LoadParamV4MemInst; -def LoadParamMemV4F16x2: LoadParamV4MemInst; -def LoadParamMemV4F32 : LoadParamV4MemInst; - -def StoreParamI64 : StoreParamInst; -def StoreParamI32 : StoreParamInst; - -def StoreParamI16 : StoreParamInst; -def StoreParamI8 : StoreParamInst; -def StoreParamV2I64 : StoreParamV2Inst; -def StoreParamV2I32 : StoreParamV2Inst; -def StoreParamV2I16 : StoreParamV2Inst; -def StoreParamV2I8 : StoreParamV2Inst; - -def StoreParamV4I32 : StoreParamV4Inst; -def StoreParamV4I16 : StoreParamV4Inst; -def StoreParamV4I8 : StoreParamV4Inst; - -def StoreParamF16 : StoreParamInst; -def StoreParamF16x2 : StoreParamInst; -def StoreParamF32 : StoreParamInst; -def StoreParamF64 : StoreParamInst; -def StoreParamV2F16 : StoreParamV2Inst; -def StoreParamV2F16x2 : StoreParamV2Inst; -def StoreParamV2F32 : StoreParamV2Inst; -def StoreParamV2F64 : StoreParamV2Inst; -def StoreParamV4F16 : StoreParamV4Inst; -def StoreParamV4F16x2 : StoreParamV4Inst; -def StoreParamV4F32 : StoreParamV4Inst; - -def StoreRetvalI64 : StoreRetvalInst; -def StoreRetvalI32 : StoreRetvalInst; -def StoreRetvalI16 : StoreRetvalInst; -def StoreRetvalI8 : StoreRetvalInst; -def StoreRetvalV2I64 : StoreRetvalV2Inst; -def StoreRetvalV2I32 : StoreRetvalV2Inst; -def StoreRetvalV2I16 : StoreRetvalV2Inst; -def StoreRetvalV2I8 : StoreRetvalV2Inst; -def StoreRetvalV4I32 : StoreRetvalV4Inst; -def StoreRetvalV4I16 : StoreRetvalV4Inst; -def StoreRetvalV4I8 : StoreRetvalV4Inst; - -def StoreRetvalF64 : StoreRetvalInst; -def StoreRetvalF32 : StoreRetvalInst; -def StoreRetvalF16 : StoreRetvalInst; -def StoreRetvalF16x2 : StoreRetvalInst; -def StoreRetvalV2F64 : StoreRetvalV2Inst; -def StoreRetvalV2F32 : StoreRetvalV2Inst; -def StoreRetvalV2F16 : StoreRetvalV2Inst; -def StoreRetvalV2F16x2: StoreRetvalV2Inst; -def StoreRetvalV4F32 : StoreRetvalV4Inst; -def StoreRetvalV4F16 : StoreRetvalV4Inst; -def StoreRetvalV4F16x2: StoreRetvalV4Inst; - -def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; -def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; -def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; -def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; - -class CallArgInst : - NVPTXInst<(outs), (ins regclass:$a), "$a, ", - [(CallArg (i32 0), regclass:$a)]>; - -class LastCallArgInst : - NVPTXInst<(outs), (ins regclass:$a), "$a", - [(LastCallArg (i32 0), regclass:$a)]>; - -def CallArgI64 : CallArgInst; -def CallArgI32 : CallArgInst; -def CallArgI16 : CallArgInst; -def CallArgF64 : CallArgInst; -def CallArgF32 : CallArgInst; - -def LastCallArgI64 : LastCallArgInst; -def LastCallArgI32 : LastCallArgInst; -def LastCallArgI16 : LastCallArgInst; -def LastCallArgF64 : LastCallArgInst; -def LastCallArgF32 : LastCallArgInst; - -def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", - [(CallArg (i32 0), (i32 imm:$a))]>; -def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", - [(LastCallArg (i32 0), (i32 imm:$a))]>; - -def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", - [(CallArg (i32 1), (i32 imm:$a))]>; -def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", - [(LastCallArg (i32 1), (i32 imm:$a))]>; - -def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", - [(CallVoid (Wrapper tglobaladdr:$addr))]>; -def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", - [(CallVoid Int32Regs:$addr)]>; -def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", - [(CallVoid Int64Regs:$addr)]>; -def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", - [(Prototype (i32 imm:$val))]>; - -def DeclareRetMemInst : - NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num), - ".param .align $align .b8 retval$num[$size];", - [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; -def DeclareRetScalarInst : - NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), - ".param .b$size retval$num;", - [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; -def DeclareRetRegInst : - NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), - ".reg .b$size retval$num;", - [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; - -def DeclareParamInst : - NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size), - ".param .align $align .b8 param$a[$size];", - [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; -def DeclareScalarParamInst : - NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), - ".param .b$size param$a;", - [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; -def DeclareScalarRegInst : - NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), - ".reg .b$size param$a;", - [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; - -class MoveParamInst : - NVPTXInst<(outs regclass:$dst), (ins regclass:$src), - !strconcat("mov", asmstr, " \t$dst, $src;"), - [(set regclass:$dst, (MoveParam regclass:$src))]>; - -def MoveParamI64 : MoveParamInst; -def MoveParamI32 : MoveParamInst; -def MoveParamI16 : - NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "cvt.u16.u32 \t$dst, $src;", - [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; -def MoveParamF64 : MoveParamInst; -def MoveParamF32 : MoveParamInst; -def MoveParamF16 : MoveParamInst; - -class PseudoUseParamInst : - NVPTXInst<(outs), (ins regclass:$src), - "// Pseudo use of $src", - [(PseudoUseParam regclass:$src)]>; - -def PseudoUseParamI64 : PseudoUseParamInst; -def PseudoUseParamI32 : PseudoUseParamInst; -def PseudoUseParamI16 : PseudoUseParamInst; -def PseudoUseParamF64 : PseudoUseParamInst; -def PseudoUseParamF32 : PseudoUseParamInst; - - -// -// Load / Store Handling -// -multiclass LD { - def _avar : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr];", []>; - def _areg : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr];", []>; - def _areg_64 : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr];", []>; - def _ari : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; - def _ari_64 : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; - def _asi : NVPTXInst< - (outs regclass:$dst), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t$dst, [$addr+$offset];", []>; -} - -let mayLoad=1, hasSideEffects=0 in { - defm LD_i8 : LD; - defm LD_i16 : LD; - defm LD_i32 : LD; - defm LD_i64 : LD; - defm LD_f16 : LD; - defm LD_f16x2 : LD; - defm LD_f32 : LD; - defm LD_f64 : LD; -} - -multiclass ST { - def _avar : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, imem:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr], $src;", []>; - def _areg : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr], $src;", []>; - def _areg_64 : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr], $src;", []>; - def _ari : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; - def _ari_64 : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; - def _asi : NVPTXInst< - (outs), - (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, - LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" - " \t[$addr+$offset], $src;", []>; -} - -let mayStore=1, hasSideEffects=0 in { - defm ST_i8 : ST; - defm ST_i16 : ST; - defm ST_i32 : ST; - defm ST_i64 : ST; - defm ST_f16 : ST; - defm ST_f16x2 : ST; - defm ST_f32 : ST; - defm ST_f64 : ST; -} - -// The following is used only in and after vector elementizations. Vector -// elementization happens at the machine instruction level, so the following -// instructions never appear in the DAG. -multiclass LD_VEC { - def _v2_avar : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr];", []>; - def _v2_areg : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr];", []>; - def _v2_areg_64 : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr];", []>; - def _v2_ari : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; - def _v2_ari_64 : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; - def _v2_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; - def _v4_avar : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; - def _v4_areg : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; - def _v4_areg_64 : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; - def _v4_ari : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; - def _v4_ari_64 : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; - def _v4_asi : NVPTXInst< - (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), - (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr, i32imm:$offset), - "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; -} -let mayLoad=1, hasSideEffects=0 in { - defm LDV_i8 : LD_VEC; - defm LDV_i16 : LD_VEC; - defm LDV_i32 : LD_VEC; - defm LDV_i64 : LD_VEC; - defm LDV_f16 : LD_VEC; - defm LDV_f16x2 : LD_VEC; - defm LDV_f32 : LD_VEC; - defm LDV_f64 : LD_VEC; -} - -multiclass ST_VEC { - def _v2_avar : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr], {{$src1, $src2}};", []>; - def _v2_areg : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr], {{$src1, $src2}};", []>; - def _v2_areg_64 : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr], {{$src1, $src2}};", []>; - def _v2_ari : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, - i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; - def _v2_ari_64 : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, - i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; - def _v2_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, - LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, - i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2}};", []>; - def _v4_avar : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_areg : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_areg_64 : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_ari : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_ari_64 : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " - "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; - def _v4_asi : NVPTXInst< - (outs), - (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, - LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, - i32imm:$fromWidth, imem:$addr, i32imm:$offset), - "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}" - "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; -} - -let mayStore=1, hasSideEffects=0 in { - defm STV_i8 : ST_VEC; - defm STV_i16 : ST_VEC; - defm STV_i32 : ST_VEC; - defm STV_i64 : ST_VEC; - defm STV_f16 : ST_VEC; - defm STV_f16x2 : ST_VEC; - defm STV_f32 : ST_VEC; - defm STV_f64 : ST_VEC; -} - -//---- Conversion ---- - -class F_BITCONVERT : - NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), - !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")), - [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; - -def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>; -def BITCONVERT_16_F2I : F_BITCONVERT<"16", Float16Regs, Int16Regs>; -def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; -def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; -def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; -def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; -def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", Int32Regs, Float16x2Regs>; -def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", Float16x2Regs, Int32Regs>; - -// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where -// we cannot specify floating-point literals in isel patterns. Therefore, we -// use an integer selp to select either 1 or 0 and then cvt to floating-point. - -// sint -> f16 -def : Pat<(f16 (sint_to_fp Int1Regs:$a)), - (CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f16 (sint_to_fp Int16Regs:$a)), - (CVT_f16_s16 Int16Regs:$a, CvtRN)>; -def : Pat<(f16 (sint_to_fp Int32Regs:$a)), - (CVT_f16_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(f16 (sint_to_fp Int64Regs:$a)), - (CVT_f16_s64 Int64Regs:$a, CvtRN)>; - -// uint -> f16 -def : Pat<(f16 (uint_to_fp Int1Regs:$a)), - (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f16 (uint_to_fp Int16Regs:$a)), - (CVT_f16_u16 Int16Regs:$a, CvtRN)>; -def : Pat<(f16 (uint_to_fp Int32Regs:$a)), - (CVT_f16_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(f16 (uint_to_fp Int64Regs:$a)), - (CVT_f16_u64 Int64Regs:$a, CvtRN)>; - -// sint -> f32 -def : Pat<(f32 (sint_to_fp Int1Regs:$a)), - (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f32 (sint_to_fp Int16Regs:$a)), - (CVT_f32_s16 Int16Regs:$a, CvtRN)>; -def : Pat<(f32 (sint_to_fp Int32Regs:$a)), - (CVT_f32_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(f32 (sint_to_fp Int64Regs:$a)), - (CVT_f32_s64 Int64Regs:$a, CvtRN)>; - -// uint -> f32 -def : Pat<(f32 (uint_to_fp Int1Regs:$a)), - (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f32 (uint_to_fp Int16Regs:$a)), - (CVT_f32_u16 Int16Regs:$a, CvtRN)>; -def : Pat<(f32 (uint_to_fp Int32Regs:$a)), - (CVT_f32_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(f32 (uint_to_fp Int64Regs:$a)), - (CVT_f32_u64 Int64Regs:$a, CvtRN)>; - -// sint -> f64 -def : Pat<(f64 (sint_to_fp Int1Regs:$a)), - (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f64 (sint_to_fp Int16Regs:$a)), - (CVT_f64_s16 Int16Regs:$a, CvtRN)>; -def : Pat<(f64 (sint_to_fp Int32Regs:$a)), - (CVT_f64_s32 Int32Regs:$a, CvtRN)>; -def : Pat<(f64 (sint_to_fp Int64Regs:$a)), - (CVT_f64_s64 Int64Regs:$a, CvtRN)>; - -// uint -> f64 -def : Pat<(f64 (uint_to_fp Int1Regs:$a)), - (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; -def : Pat<(f64 (uint_to_fp Int16Regs:$a)), - (CVT_f64_u16 Int16Regs:$a, CvtRN)>; -def : Pat<(f64 (uint_to_fp Int32Regs:$a)), - (CVT_f64_u32 Int32Regs:$a, CvtRN)>; -def : Pat<(f64 (uint_to_fp Int64Regs:$a)), - (CVT_f64_u64 Int64Regs:$a, CvtRN)>; - - -// f16 -> sint -def : Pat<(i1 (fp_to_sint Float16Regs:$a)), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint Float16Regs:$a)), - (CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i16 (fp_to_sint Float16Regs:$a)), - (CVT_s16_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_sint Float16Regs:$a)), - (CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i32 (fp_to_sint Float16Regs:$a)), - (CVT_s32_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_sint Float16Regs:$a)), - (CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i64 (fp_to_sint Float16Regs:$a)), - (CVT_s64_f16 Float16Regs:$a, CvtRZI)>; - -// f16 -> uint -def : Pat<(i1 (fp_to_uint Float16Regs:$a)), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint Float16Regs:$a)), - (CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i16 (fp_to_uint Float16Regs:$a)), - (CVT_u16_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint Float16Regs:$a)), - (CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i32 (fp_to_uint Float16Regs:$a)), - (CVT_u32_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint Float16Regs:$a)), - (CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i64 (fp_to_uint Float16Regs:$a)), - (CVT_u64_f16 Float16Regs:$a, CvtRZI)>; - -// f32 -> sint -def : Pat<(i1 (fp_to_sint Float32Regs:$a)), - (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint Float32Regs:$a)), - (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i16 (fp_to_sint Float32Regs:$a)), - (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_sint Float32Regs:$a)), - (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i32 (fp_to_sint Float32Regs:$a)), - (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_sint Float32Regs:$a)), - (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i64 (fp_to_sint Float32Regs:$a)), - (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; - -// f32 -> uint -def : Pat<(i1 (fp_to_uint Float32Regs:$a)), - (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint Float32Regs:$a)), - (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i16 (fp_to_uint Float32Regs:$a)), - (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint Float32Regs:$a)), - (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i32 (fp_to_uint Float32Regs:$a)), - (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint Float32Regs:$a)), - (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(i64 (fp_to_uint Float32Regs:$a)), - (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; - -// f64 -> sint -def : Pat<(i1 (fp_to_sint Float64Regs:$a)), - (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint Float64Regs:$a)), - (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_sint Float64Regs:$a)), - (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_sint Float64Regs:$a)), - (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; - -// f64 -> uint -def : Pat<(i1 (fp_to_uint Float64Regs:$a)), - (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint Float64Regs:$a)), - (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint Float64Regs:$a)), - (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint Float64Regs:$a)), - (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; - -// sext i1 -def : Pat<(i16 (sext Int1Regs:$a)), - (SELP_s16ii -1, 0, Int1Regs:$a)>; -def : Pat<(i32 (sext Int1Regs:$a)), - (SELP_s32ii -1, 0, Int1Regs:$a)>; -def : Pat<(i64 (sext Int1Regs:$a)), - (SELP_s64ii -1, 0, Int1Regs:$a)>; - -// zext i1 -def : Pat<(i16 (zext Int1Regs:$a)), - (SELP_u16ii 1, 0, Int1Regs:$a)>; -def : Pat<(i32 (zext Int1Regs:$a)), - (SELP_u32ii 1, 0, Int1Regs:$a)>; -def : Pat<(i64 (zext Int1Regs:$a)), - (SELP_u64ii 1, 0, Int1Regs:$a)>; - -// anyext i1 -def : Pat<(i16 (anyext Int1Regs:$a)), - (SELP_u16ii -1, 0, Int1Regs:$a)>; -def : Pat<(i32 (anyext Int1Regs:$a)), - (SELP_u32ii -1, 0, Int1Regs:$a)>; -def : Pat<(i64 (anyext Int1Regs:$a)), - (SELP_u64ii -1, 0, Int1Regs:$a)>; - -// sext i16 -def : Pat<(i32 (sext Int16Regs:$a)), - (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; -def : Pat<(i64 (sext Int16Regs:$a)), - (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; - -// zext i16 -def : Pat<(i32 (zext Int16Regs:$a)), - (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -def : Pat<(i64 (zext Int16Regs:$a)), - (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; - -// anyext i16 -def : Pat<(i32 (anyext Int16Regs:$a)), - (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; -def : Pat<(i64 (anyext Int16Regs:$a)), - (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; - -// sext i32 -def : Pat<(i64 (sext Int32Regs:$a)), - (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; - -// zext i32 -def : Pat<(i64 (zext Int32Regs:$a)), - (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; - -// anyext i32 -def : Pat<(i64 (anyext Int32Regs:$a)), - (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; - - -// truncate i64 -def : Pat<(i32 (trunc Int64Regs:$a)), - (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; -def : Pat<(i16 (trunc Int64Regs:$a)), - (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; -def : Pat<(i1 (trunc Int64Regs:$a)), - (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; - -// truncate i32 -def : Pat<(i16 (trunc Int32Regs:$a)), - (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; -def : Pat<(i1 (trunc Int32Regs:$a)), - (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; - -// truncate i16 -def : Pat<(i1 (trunc Int16Regs:$a)), - (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; - -// sext_inreg -def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; -def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; -def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; -def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; -def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; -def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; - - -// Select instructions with 32-bit predicates -def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), - (SELP_b16rr Int16Regs:$a, Int16Regs:$b, - (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), - (SELP_b32rr Int32Regs:$a, Int32Regs:$b, - (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), - (SELP_b64rr Int64Regs:$a, Int64Regs:$b, - (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, Float16Regs:$a, Float16Regs:$b), - (SELP_f16rr Float16Regs:$a, Float16Regs:$b, - (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), - (SELP_f32rr Float32Regs:$a, Float32Regs:$b, - (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), - (SELP_f64rr Float64Regs:$a, Float64Regs:$b, - (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; - - -let hasSideEffects = 0 in { - // pack a set of smaller int registers to a larger int register - def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2, - Int16Regs:$s3, Int16Regs:$s4), - "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; - def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), - (ins Int16Regs:$s1, Int16Regs:$s2), - "mov.b32 \t$d, {{$s1, $s2}};", []>; - def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), - (ins Int32Regs:$s1, Int32Regs:$s2), - "mov.b64 \t$d, {{$s1, $s2}};", []>; - def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), - (ins Float32Regs:$s1, Float32Regs:$s2), - "mov.b64 \t$d, {{$s1, $s2}};", []>; - - // unpack a larger int register to a set of smaller int registers - def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, - Int16Regs:$d3, Int16Regs:$d4), - (ins Int64Regs:$s), - "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; - def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), - (ins Int32Regs:$s), - "mov.b32 \t{{$d1, $d2}}, $s;", []>; - def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), - (ins Int64Regs:$s), - "mov.b64 \t{{$d1, $d2}}, $s;", []>; - def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), - (ins Float64Regs:$s), - "mov.b64 \t{{$d1, $d2}}, $s;", []>; - -} - -let hasSideEffects = 0 in { - // Extract element of f16x2 register. PTX does not provide any way - // to access elements of f16x2 vector directly, so we need to - // extract it using a temporary register. - def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst), - (ins Float16x2Regs:$src), - "{{ .reg .b16 \t%tmp_hi;\n\t" - " mov.b32 \t{$dst, %tmp_hi}, $src; }}", - [(set Float16Regs:$dst, - (extractelt (v2f16 Float16x2Regs:$src), 0))]>; - def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst), - (ins Float16x2Regs:$src), - "{{ .reg .b16 \t%tmp_lo;\n\t" - " mov.b32 \t{%tmp_lo, $dst}, $src; }}", - [(set Float16Regs:$dst, - (extractelt (v2f16 Float16x2Regs:$src), 1))]>; - - // Coalesce two f16 registers into f16x2 - def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), - "mov.b32 \t$dst, {{$a, $b}};", - [(set Float16x2Regs:$dst, - (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>; - - // Directly initializing underlying the b32 register is one less SASS - // instruction than than vector-packing move. - def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src), - "mov.b32 \t$dst, $src;", - []>; - - // Split f16x2 into two f16 registers. - def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), - (ins Float16x2Regs:$src), - "mov.b32 \t{{$lo, $hi}}, $src;", - []>; - // Split an i32 into two f16 - def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), - (ins Int32Regs:$src), - "mov.b32 \t{{$lo, $hi}}, $src;", - []>; -} - -// Count leading zeros -let hasSideEffects = 0 in { - def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), - "clz.b32 \t$d, $a;", []>; - def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "clz.b64 \t$d, $a;", []>; -} - -// 32-bit has a direct PTX instruction -def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>; - -// The return type of the ctlz ISD node is the same as its input, but the PTX -// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the -// ptx value to 64 bits to match the ISD node's semantics, unless we know we're -// truncating back down to 32 bits. -def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; -def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>; - -// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the -// result back to 16-bits if necessary. We also need to subtract 16 because -// the high-order 16 zeros were counted. -// -// TODO: NVPTX has a mov.b32 b32reg, {imm, b16reg} instruction, which we could -// use to save one SASS instruction (on sm_35 anyway): -// -// mov.b32 $tmp, {0xffff, $a} -// ctlz.b32 $result, $tmp -// -// That is, instead of zero-extending the input to 32 bits, we'd "one-extend" -// and then ctlz that value. This way we don't have to subtract 16 from the -// result. Unfortunately today we don't have a way to generate -// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization. -def : Pat<(ctlz Int16Regs:$a), - (SUBi16ri (CVT_u16_u32 - (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>; -def : Pat<(i32 (zext (ctlz Int16Regs:$a))), - (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; - -// Population count -let hasSideEffects = 0 in { - def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), - "popc.b32 \t$d, $a;", []>; - def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), - "popc.b64 \t$d, $a;", []>; -} - -// 32-bit has a direct PTX instruction -def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>; - -// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit -// to match the LLVM semantics. Just as with ctlz.i64, we provide a second -// pattern that avoids the type conversion if we're truncating the result to -// i32 anyway. -def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; -def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>; - -// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits. -// If we know that we're storing into an i32, we can avoid the final trunc. -def : Pat<(ctpop Int16Regs:$a), - (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; -def : Pat<(i32 (zext (ctpop Int16Regs:$a))), - (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>; - -// fpround f32 -> f16 -def : Pat<(f16 (fpround Float32Regs:$a)), - (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f16 (fpround Float32Regs:$a)), - (CVT_f16_f32 Float32Regs:$a, CvtRN)>; - -// fpround f64 -> f16 -def : Pat<(f16 (fpround Float64Regs:$a)), - (CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f16 (fpround Float64Regs:$a)), - (CVT_f16_f64 Float64Regs:$a, CvtRN)>; - -// fpround f64 -> f32 -def : Pat<(f32 (fpround Float64Regs:$a)), - (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpround Float64Regs:$a)), - (CVT_f32_f64 Float64Regs:$a, CvtRN)>; - -// fpextend f16 -> f32 -def : Pat<(f32 (fpextend Float16Regs:$a)), - (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpextend Float16Regs:$a)), - (CVT_f32_f16 Float16Regs:$a, CvtNONE)>; - -// fpextend f16 -> f64 -def : Pat<(f64 (fpextend Float16Regs:$a)), - (CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f64 (fpextend Float16Regs:$a)), - (CVT_f64_f16 Float16Regs:$a, CvtNONE)>; - -// fpextend f32 -> f64 -def : Pat<(f64 (fpextend Float32Regs:$a)), - (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f64 (fpextend Float32Regs:$a)), - (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; - -def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; - -// fceil, ffloor, fround, ftrunc. - -def : Pat<(fceil Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(fceil Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>; -def : Pat<(fceil Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(fceil Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>; -def : Pat<(fceil Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; - -def : Pat<(ffloor Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(ffloor Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>; -def : Pat<(ffloor Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(ffloor Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>; -def : Pat<(ffloor Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; - -def : Pat<(fround Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f16 (fround Float16Regs:$a)), - (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; -def : Pat<(fround Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fround Float32Regs:$a)), - (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; -def : Pat<(f64 (fround Float64Regs:$a)), - (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; - -def : Pat<(ftrunc Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(ftrunc Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>; -def : Pat<(ftrunc Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(ftrunc Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>; -def : Pat<(ftrunc Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; - -// nearbyint and rint are implemented as rounding to nearest even. This isn't -// strictly correct, because it causes us to ignore the rounding mode. But it -// matches what CUDA's "libm" does. - -def : Pat<(fnearbyint Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(fnearbyint Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; -def : Pat<(fnearbyint Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(fnearbyint Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; -def : Pat<(fnearbyint Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; - -def : Pat<(frint Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(frint Float16Regs:$a), - (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; -def : Pat<(frint Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(frint Float32Regs:$a), - (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; -def : Pat<(frint Float64Regs:$a), - (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; - - -//----------------------------------- -// Control-flow -//----------------------------------- - -let isTerminator=1 in { - let isReturn=1, isBarrier=1 in - def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; - - let isBranch=1 in - def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), - "@$a bra \t$target;", - [(brcond Int1Regs:$a, bb:$target)]>; - let isBranch=1 in - def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), - "@!$a bra \t$target;", []>; - - let isBranch=1, isBarrier=1 in - def GOTO : NVPTXInst<(outs), (ins brtarget:$target), - "bra.uni \t$target;", [(br bb:$target)]>; -} - -def : Pat<(brcond Int32Regs:$a, bb:$target), - (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; - -// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a -// conditional branch if the target block is the next block so that the code -// can fall through to the target block. The invertion is done by 'xor -// condition, 1', which will be translated to (setne condition, -1). Since ptx -// supports '@!pred bra target', we should use it. -def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), - (CBranchOther Int1Regs:$a, bb:$target)>; - -// Call -def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; -def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, - [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPSideEffect]>; - -def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def calltarget : Operand; -let isCall=1 in { - def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>; -} - -def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; -def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>; - -// Pseudo instructions. -class Pseudo pattern> - : NVPTXInst; - -def Callseq_Start : - NVPTXInst<(outs), (ins i32imm:$amt), - "\\{ // callseq $amt\n" - "\t.reg .b32 temp_param_reg;", - [(callseq_start timm:$amt)]>; -def Callseq_End : - NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), - "\\} // callseq $amt1", - [(callseq_end timm:$amt1, timm:$amt2)]>; - -// trap instruction -def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>; - -// Call prototype wrapper -def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def CallPrototype : - SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; -def ProtoIdent : Operand { - let PrintMethod = "printProtoIdent"; -} -def CALL_PROTOTYPE : - NVPTXInst<(outs), (ins ProtoIdent:$ident), - "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; - - -include "NVPTXIntrinsics.td" - - -//----------------------------------- -// Notes -//----------------------------------- -// BSWAP is currently expanded. The following is a more efficient -// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register -// - for sm_20, use pmpt (use vector scalar mov to get the pack and -// unpack). sm_20 supports native 32-bit register, but not native 16-bit -// register. +//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +include "NVPTXInstrFormats.td" + +// A NOP instruction +let hasSideEffects = 0 in { + def NOP : NVPTXInst<(outs), (ins), "", []>; +} + +let OperandType = "OPERAND_IMMEDIATE" in { + def f16imm : Operand; +} + +// List of vector specific properties +def isVecLD : VecInstTypeEnum<1>; +def isVecST : VecInstTypeEnum<2>; +def isVecBuild : VecInstTypeEnum<3>; +def isVecShuffle : VecInstTypeEnum<4>; +def isVecExtract : VecInstTypeEnum<5>; +def isVecInsert : VecInstTypeEnum<6>; +def isVecDest : VecInstTypeEnum<7>; +def isVecOther : VecInstTypeEnum<15>; + +//===----------------------------------------------------------------------===// +// NVPTX Operand Definitions. +//===----------------------------------------------------------------------===// + +def brtarget : Operand; + +// CVT conversion modes +// These must match the enum in NVPTX.h +def CvtNONE : PatLeaf<(i32 0x0)>; +def CvtRNI : PatLeaf<(i32 0x1)>; +def CvtRZI : PatLeaf<(i32 0x2)>; +def CvtRMI : PatLeaf<(i32 0x3)>; +def CvtRPI : PatLeaf<(i32 0x4)>; +def CvtRN : PatLeaf<(i32 0x5)>; +def CvtRZ : PatLeaf<(i32 0x6)>; +def CvtRM : PatLeaf<(i32 0x7)>; +def CvtRP : PatLeaf<(i32 0x8)>; + +def CvtNONE_FTZ : PatLeaf<(i32 0x10)>; +def CvtRNI_FTZ : PatLeaf<(i32 0x11)>; +def CvtRZI_FTZ : PatLeaf<(i32 0x12)>; +def CvtRMI_FTZ : PatLeaf<(i32 0x13)>; +def CvtRPI_FTZ : PatLeaf<(i32 0x14)>; +def CvtRN_FTZ : PatLeaf<(i32 0x15)>; +def CvtRZ_FTZ : PatLeaf<(i32 0x16)>; +def CvtRM_FTZ : PatLeaf<(i32 0x17)>; +def CvtRP_FTZ : PatLeaf<(i32 0x18)>; + +def CvtSAT : PatLeaf<(i32 0x20)>; +def CvtSAT_FTZ : PatLeaf<(i32 0x30)>; + +def CvtMode : Operand { + let PrintMethod = "printCvtMode"; +} + +// Compare modes +// These must match the enum in NVPTX.h +def CmpEQ : PatLeaf<(i32 0)>; +def CmpNE : PatLeaf<(i32 1)>; +def CmpLT : PatLeaf<(i32 2)>; +def CmpLE : PatLeaf<(i32 3)>; +def CmpGT : PatLeaf<(i32 4)>; +def CmpGE : PatLeaf<(i32 5)>; +def CmpEQU : PatLeaf<(i32 10)>; +def CmpNEU : PatLeaf<(i32 11)>; +def CmpLTU : PatLeaf<(i32 12)>; +def CmpLEU : PatLeaf<(i32 13)>; +def CmpGTU : PatLeaf<(i32 14)>; +def CmpGEU : PatLeaf<(i32 15)>; +def CmpNUM : PatLeaf<(i32 16)>; +def CmpNAN : PatLeaf<(i32 17)>; + +def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; +def CmpNE_FTZ : PatLeaf<(i32 0x101)>; +def CmpLT_FTZ : PatLeaf<(i32 0x102)>; +def CmpLE_FTZ : PatLeaf<(i32 0x103)>; +def CmpGT_FTZ : PatLeaf<(i32 0x104)>; +def CmpGE_FTZ : PatLeaf<(i32 0x105)>; +def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; +def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; +def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; +def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; +def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; +def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; +def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; +def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; + +def CmpMode : Operand { + let PrintMethod = "printCmpMode"; +} +def VecElement : Operand { + let PrintMethod = "printVecElement"; +} + +//===----------------------------------------------------------------------===// +// NVPTX Instruction Predicate Definitions +//===----------------------------------------------------------------------===// + + +def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">; +def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">; +def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">; +def useAtomRedG32forGen32 : + Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">; +def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">; +def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">; +def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">; +def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">; +def useAtomRedG64forGen64 : + Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">; +def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">; +def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">; +def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">; +def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">; +def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">; +def hasVote : Predicate<"Subtarget->hasVote()">; +def hasDouble : Predicate<"Subtarget->hasDouble()">; +def reqPTX20 : Predicate<"Subtarget->reqPTX20()">; +def hasLDG : Predicate<"Subtarget->hasLDG()">; +def hasLDU : Predicate<"Subtarget->hasLDU()">; +def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">; + +def doF32FTZ : Predicate<"useF32FTZ()">; +def doNoF32FTZ : Predicate<"!useF32FTZ()">; + +def doMulWide : Predicate<"doMulWide">; + +def allowFMA : Predicate<"allowFMA()">; +def noFMA : Predicate<"!allowFMA()">; +def allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">; + +def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; +def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; + +def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; +def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; + +def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; +def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; + +def true : Predicate<"true">; + +def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; + +def useFP16Math: Predicate<"Subtarget->allowFP16Math()">; + +//===----------------------------------------------------------------------===// +// Some Common Instruction Class Templates +//===----------------------------------------------------------------------===// + +// Template for instructions which take three int64, int32, or int16 args. +// The instructions are named "" (e.g. "add.s64"). +multiclass I3 { + def i64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def i64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def i16rr : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def i16ri : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; +} + +// Template for instructions which take 3 int32 args. The instructions are +// named ".s32" (e.g. "addc.cc.s32"). +multiclass ADD_SUB_INT_32 { + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; +} + +// Template for instructions which take three fp64 or fp32 args. The +// instructions are named ".f" (e.g. "min.f64"). +// +// Also defines ftz (flush subnormal inputs and results to sign-preserving +// zero) variants for fp32 functions. +// +// This multiclass should be used for nodes that cannot be folded into FMAs. +// For nodes that can be folded into FMAs (i.e. adds and muls), use +// F3_fma_component. +multiclass F3 { + def f64rr : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; + def f64ri : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; + def f32rr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ]>; + def f32ri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ]>; + def f32rr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; + def f32ri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; +} + +// Template for instructions which take three FP args. The +// instructions are named ".f" (e.g. "add.f64"). +// +// Also defines ftz (flush subnormal inputs and results to sign-preserving +// zero) variants for fp32/fp16 functions. +// +// This multiclass should be used for nodes that can be folded to make fma ops. +// In this case, we use the ".rn" variant when FMA is disabled, as this behaves +// just like the non ".rn" op, but prevents ptxas from creating FMAs. +multiclass F3_fma_component { + def f64rr : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, + Requires<[allowFMA]>; + def f64ri : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; + def f32rr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA, doF32FTZ]>; + def f32ri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA, doF32FTZ]>; + def f32rr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[allowFMA]>; + def f32ri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[allowFMA]>; + + def f16rr_ftz : + NVPTXInst<(outs Float16Regs:$dst), + (ins Float16Regs:$a, Float16Regs:$b), + !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), + [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, + Requires<[useFP16Math, allowFMA, doF32FTZ]>; + def f16rr : + NVPTXInst<(outs Float16Regs:$dst), + (ins Float16Regs:$a, Float16Regs:$b), + !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), + [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, + Requires<[useFP16Math, allowFMA]>; + + def f16x2rr_ftz : + NVPTXInst<(outs Float16x2Regs:$dst), + (ins Float16x2Regs:$a, Float16x2Regs:$b), + !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), + [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, + Requires<[useFP16Math, allowFMA, doF32FTZ]>; + def f16x2rr : + NVPTXInst<(outs Float16x2Regs:$dst), + (ins Float16x2Regs:$a, Float16x2Regs:$b), + !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), + [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, + Requires<[useFP16Math, allowFMA]>; + + // These have strange names so we don't perturb existing mir tests. + def _rnf64rr : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, + Requires<[noFMA]>; + def _rnf64ri : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, + Requires<[noFMA]>; + def _rnf32rr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[noFMA, doF32FTZ]>; + def _rnf32ri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[noFMA, doF32FTZ]>; + def _rnf32rr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, + Requires<[noFMA]>; + def _rnf32ri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, + Requires<[noFMA]>; + def _rnf16rr_ftz : + NVPTXInst<(outs Float16Regs:$dst), + (ins Float16Regs:$a, Float16Regs:$b), + !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"), + [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, + Requires<[useFP16Math, noFMA, doF32FTZ]>; + def _rnf16rr : + NVPTXInst<(outs Float16Regs:$dst), + (ins Float16Regs:$a, Float16Regs:$b), + !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"), + [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>, + Requires<[useFP16Math, noFMA]>; + def _rnf16x2rr_ftz : + NVPTXInst<(outs Float16x2Regs:$dst), + (ins Float16x2Regs:$a, Float16x2Regs:$b), + !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"), + [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, + Requires<[useFP16Math, noFMA, doF32FTZ]>; + def _rnf16x2rr : + NVPTXInst<(outs Float16x2Regs:$dst), + (ins Float16x2Regs:$a, Float16x2Regs:$b), + !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"), + [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>, + Requires<[useFP16Math, noFMA]>; +} + +// Template for operations which take two f32 or f64 operands. Provides three +// instructions: .f64, .f32, and .ftz.f32 (flush +// subnormal inputs and results to zero). +multiclass F2 { + def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), + !strconcat(OpcStr, ".f64 \t$dst, $a;"), + [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; + def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, + Requires<[doF32FTZ]>; + def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), + !strconcat(OpcStr, ".f32 \t$dst, $a;"), + [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; +} + +//===----------------------------------------------------------------------===// +// NVPTX Instructions. +//===----------------------------------------------------------------------===// + +//----------------------------------- +// Type Conversion +//----------------------------------- + +let hasSideEffects = 0 in { + // Generate a cvt to the given type from all possible types. Each instance + // takes a CvtMode immediate that defines the conversion mode to use. It can + // be CvtNONE to omit a conversion mode. + multiclass CVT_FROM_ALL { + def _s8 : + NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s8 \t$dst, $src;"), []>; + def _u8 : + NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u8 \t$dst, $src;"), []>; + def _s16 : + NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s16 \t$dst, $src;"), []>; + def _u16 : + NVPTXInst<(outs RC:$dst), + (ins Int16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u16 \t$dst, $src;"), []>; + def _s32 : + NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s32 \t$dst, $src;"), []>; + def _u32 : + NVPTXInst<(outs RC:$dst), + (ins Int32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u32 \t$dst, $src;"), []>; + def _s64 : + NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".s64 \t$dst, $src;"), []>; + def _u64 : + NVPTXInst<(outs RC:$dst), + (ins Int64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".u64 \t$dst, $src;"), []>; + def _f16 : + NVPTXInst<(outs RC:$dst), + (ins Float16Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f16 \t$dst, $src;"), []>; + def _f32 : + NVPTXInst<(outs RC:$dst), + (ins Float32Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f32 \t$dst, $src;"), []>; + def _f64 : + NVPTXInst<(outs RC:$dst), + (ins Float64Regs:$src, CvtMode:$mode), + !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", + FromName, ".f64 \t$dst, $src;"), []>; + } + + // Generate cvts from all types to all types. + defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>; + defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>; + defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; + defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; + defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; + defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; + defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; + defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; + defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>; + defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; + defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; + + // These cvts are different from those above: The source and dest registers + // are of the same type. + def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.s16.s8 \t$dst, $src;", []>; + def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s8 \t$dst, $src;", []>; + def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "cvt.s32.s16 \t$dst, $src;", []>; + def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s8 \t$dst, $src;", []>; + def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s16 \t$dst, $src;", []>; + def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "cvt.s64.s32 \t$dst, $src;", []>; +} + +//----------------------------------- +// Integer Arithmetic +//----------------------------------- + +// Template for xor masquerading as int1 arithmetic. +multiclass ADD_SUB_i1 { + def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), + "xor.pred \t$dst, $a, $b;", + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; +} + +// int1 addition and subtraction are both just xor. +defm ADD_i1 : ADD_SUB_i1; +defm SUB_i1 : ADD_SUB_i1; + +// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we +// also use these for unsigned arithmetic. +defm ADD : I3<"add.s", add>; +defm SUB : I3<"sub.s", sub>; + +// int32 addition and subtraction with carry-out. +// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). +defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; +defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; + +// int32 addition and subtraction with carry-in and carry-out. +defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; +defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; + +defm MULT : I3<"mul.lo.s", mul>; + +defm MULTHS : I3<"mul.hi.s", mulhs>; +defm MULTHU : I3<"mul.hi.u", mulhu>; + +defm SDIV : I3<"div.s", sdiv>; +defm UDIV : I3<"div.u", udiv>; + +// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM +// will lower it. +defm SREM : I3<"rem.s", srem>; +defm UREM : I3<"rem.u", urem>; + +// Integer absolute value. NumBits should be one minus the bit width of RC. +// This idiom implements the algorithm at +// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs. +multiclass ABS { + def : NVPTXInst<(outs RC:$dst), (ins RC:$a), + !strconcat("abs", SizeName, " \t$dst, $a;"), + [(set RC:$dst, (abs RC:$a))]>; +} +defm ABS_16 : ABS; +defm ABS_32 : ABS; +defm ABS_64 : ABS; + +// Integer min/max. +defm SMAX : I3<"max.s", smax>; +defm UMAX : I3<"max.u", umax>; +defm SMIN : I3<"min.s", smin>; +defm UMIN : I3<"min.u", umin>; + +// +// Wide multiplication +// +def MULWIDES64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; +def MULWIDES64Imm : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; +def MULWIDES64Imm64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; + +def MULWIDEU64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; +def MULWIDEU64Imm : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; +def MULWIDEU64Imm64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; + +def MULWIDES32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; +def MULWIDES32Imm : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; +def MULWIDES32Imm32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; + +def MULWIDEU32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; + +def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; +def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; +def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; + +// Matchers for signed, unsigned mul.wide ISD nodes. +def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), + (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), + (MULWIDES32Imm Int16Regs:$a, imm:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), + (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), + (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, + Requires<[doMulWide]>; + +def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), + (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), + (MULWIDES64Imm Int32Regs:$a, imm:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), + (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), + (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, + Requires<[doMulWide]>; + +// Predicates used for converting some patterns to mul.wide. +def SInt32Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + return v.isSignedIntN(32); +}]>; + +def UInt32Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + return v.isIntN(32); +}]>; + +def SInt16Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + return v.isSignedIntN(16); +}]>; + +def UInt16Const : PatLeaf<(imm), [{ + const APInt &v = N->getAPIntValue(); + return v.isIntN(16); +}]>; + +def Int5Const : PatLeaf<(imm), [{ + // Check if 0 <= v < 32; only then will the result of (x << v) be an int32. + const APInt &v = N->getAPIntValue(); + return v.sge(0) && v.slt(32); +}]>; + +def Int4Const : PatLeaf<(imm), [{ + // Check if 0 <= v < 16; only then will the result of (x << v) be an int16. + const APInt &v = N->getAPIntValue(); + return v.sge(0) && v.slt(16); +}]>; + +def SHL2MUL32 : SDNodeXFormgetAPIntValue(); + APInt temp(32, 1); + return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32); +}]>; + +def SHL2MUL16 : SDNodeXFormgetAPIntValue(); + APInt temp(16, 1); + return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16); +}]>; + +// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide. +def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), + (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, + Requires<[doMulWide]>; +def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), + (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, + Requires<[doMulWide]>; + +def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), + (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, + Requires<[doMulWide]>; +def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), + (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, + Requires<[doMulWide]>; + +// Convert "sign/zero-extend then multiply" to mul.wide. +def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), + (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), + (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), + (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), + (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), + (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), + (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, + Requires<[doMulWide]>; + +def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), + (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), + (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, + Requires<[doMulWide]>; + +// +// Integer multiply-add +// +def SDTIMAD : + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, + SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +def imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>; + +def MAD16rrr : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; +def MAD16rri : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; +def MAD16rir : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; +def MAD16rii : + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, i16imm:$b, i16imm:$c), + "mad.lo.s16 \t$dst, $a, $b, $c;", + [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>; + +def MAD32rrr : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; +def MAD32rri : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; +def MAD32rir : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; +def MAD32rii : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, i32imm:$b, i32imm:$c), + "mad.lo.s32 \t$dst, $a, $b, $c;", + [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]>; + +def MAD64rrr : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; +def MAD64rri : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; +def MAD64rir : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; +def MAD64rii : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$a, i64imm:$b, i64imm:$c), + "mad.lo.s64 \t$dst, $a, $b, $c;", + [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>; + +def INEG16 : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "neg.s16 \t$dst, $src;", + [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; +def INEG32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "neg.s32 \t$dst, $src;", + [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; +def INEG64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "neg.s64 \t$dst, $src;", + [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; + +//----------------------------------- +// Floating Point Arithmetic +//----------------------------------- + +// Constant 1.0f +def FloatConst1 : PatLeaf<(fpimm), [{ + return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle() && + N->getValueAPF().convertToFloat() == 1.0f; +}]>; +// Constant 1.0 (double) +def DoubleConst1 : PatLeaf<(fpimm), [{ + return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() && + N->getValueAPF().convertToDouble() == 1.0; +}]>; + +// Loads FP16 constant into a register. +// +// ptxas does not have hex representation for fp16, so we can't use +// fp16 immediate values in .f16 instructions. Instead we have to load +// the constant into a register using mov.b16. +def LOAD_CONST_F16 : + NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a), + "mov.b16 \t$dst, $a;", []>; + +defm FADD : F3_fma_component<"add", fadd>; +defm FSUB : F3_fma_component<"sub", fsub>; +defm FMUL : F3_fma_component<"mul", fmul>; + +defm FMIN : F3<"min", fminnum>; +defm FMAX : F3<"max", fmaxnum>; + +defm FABS : F2<"abs", fabs>; +defm FNEG : F2<"neg", fneg>; +defm FSQRT : F2<"sqrt.rn", fsqrt>; + +// +// F64 division +// +def FDIV641r : + NVPTXInst<(outs Float64Regs:$dst), + (ins f64imm:$a, Float64Regs:$b), + "rcp.rn.f64 \t$dst, $b;", + [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>; +def FDIV64rr : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, Float64Regs:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>; +def FDIV64ri : + NVPTXInst<(outs Float64Regs:$dst), + (ins Float64Regs:$a, f64imm:$b), + "div.rn.f64 \t$dst, $a, $b;", + [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>; + +// +// F32 Approximate reciprocal +// +def FDIV321r_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV321r : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; +// +// F32 Approximate division +// +def FDIV32approxrr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV32approxri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.approx.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_APPROX, doF32FTZ]>; +def FDIV32approxrr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.approx.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_APPROX]>; +def FDIV32approxri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.approx.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_APPROX]>; +// +// F32 Semi-accurate reciprocal +// +// rcp.approx gives the same result as div.full(1.0f, a) and is faster. +// +def FDIV321r_approx_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV321r_approx : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.approx.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; +// +// F32 Semi-accurate division +// +def FDIV32rr_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32ri_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL, doF32FTZ]>; +def FDIV32rr : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[do_DIVF32_FULL]>; +def FDIV32ri : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.full.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[do_DIVF32_FULL]>; +// +// F32 Accurate reciprocal +// +def FDIV321r_prec_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.ftz.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20, doF32FTZ]>; +def FDIV321r_prec : + NVPTXInst<(outs Float32Regs:$dst), + (ins f32imm:$a, Float32Regs:$b), + "rcp.rn.f32 \t$dst, $b;", + [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; +// +// F32 Accurate division +// +def FDIV32rr_prec_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32ri_prec_ftz : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.ftz.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[doF32FTZ, reqPTX20]>; +def FDIV32rr_prec : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, Float32Regs:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, + Requires<[reqPTX20]>; +def FDIV32ri_prec : + NVPTXInst<(outs Float32Regs:$dst), + (ins Float32Regs:$a, f32imm:$b), + "div.rn.f32 \t$dst, $a, $b;", + [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, + Requires<[reqPTX20]>; + +// +// FMA +// + +multiclass FMA { + def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>, + Requires<[Pred]>; + def rri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, ImmCls:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>, + Requires<[Pred]>; + def rir : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, RC:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>, + Requires<[Pred]>; + def rii : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, ImmCls:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>, + Requires<[Pred]>; +} + +multiclass FMA_F16 { + def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), + !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), + [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>, + Requires<[useFP16Math, Pred]>; +} + +defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", Float16Regs, doF32FTZ>; +defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, true>; +defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", Float16x2Regs, doF32FTZ>; +defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, true>; +defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; +defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>; +defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>; + +// sin/cos +def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "sin.approx.f32 \t$dst, $src;", + [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>, + Requires<[allowUnsafeFPMath]>; +def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "cos.approx.f32 \t$dst, $src;", + [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>, + Requires<[allowUnsafeFPMath]>; + +// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)), +// i.e. "poor man's fmod()" + +// frem - f32 FTZ +def : Pat<(frem Float32Regs:$x, Float32Regs:$y), + (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 + (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), + Float32Regs:$y))>, + Requires<[doF32FTZ]>; +def : Pat<(frem Float32Regs:$x, fpimm:$y), + (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 + (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), + fpimm:$y))>, + Requires<[doF32FTZ]>; + +// frem - f32 +def : Pat<(frem Float32Regs:$x, Float32Regs:$y), + (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 + (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), + Float32Regs:$y))>; +def : Pat<(frem Float32Regs:$x, fpimm:$y), + (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 + (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), + fpimm:$y))>; + +// frem - f64 +def : Pat<(frem Float64Regs:$x, Float64Regs:$y), + (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 + (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), + Float64Regs:$y))>; +def : Pat<(frem Float64Regs:$x, fpimm:$y), + (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 + (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), + fpimm:$y))>; + +//----------------------------------- +// Bitwise operations +//----------------------------------- + +// Template for three-arg bitwise operations. Takes three args, Creates .b16, +// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr. +multiclass BITWISE { + def b1rr : + NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; + def b1ri : + NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), + !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), + [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; + def b16rr : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; + def b16ri : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; + def b32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def b32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; + def b64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; + def b64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), + !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; +} + +defm OR : BITWISE<"or", or>; +defm AND : BITWISE<"and", and>; +defm XOR : BITWISE<"xor", xor>; + +def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), + "not.pred \t$dst, $src;", + [(set Int1Regs:$dst, (not Int1Regs:$src))]>; +def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "not.b16 \t$dst, $src;", + [(set Int16Regs:$dst, (not Int16Regs:$src))]>; +def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), + "not.b32 \t$dst, $src;", + [(set Int32Regs:$dst, (not Int32Regs:$src))]>; +def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), + "not.b64 \t$dst, $src;", + [(set Int64Regs:$dst, (not Int64Regs:$src))]>; + +// Template for left/right shifts. Takes three operands, +// [dest (reg), src (reg), shift (reg or imm)]. +// dest and src may be int64, int32, or int16, but shift is always int32. +// +// This template also defines a 32-bit shift (imm, imm) instruction. +multiclass SHIFT { + def i64rr : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>; + def i64ri : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), + !strconcat(OpcStr, "64 \t$dst, $a, $b;"), + [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>; + def i32rr : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; + def i32ri : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>; + def i32ii : + NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), + !strconcat(OpcStr, "32 \t$dst, $a, $b;"), + [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; + def i16rr : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>; + def i16ri : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + !strconcat(OpcStr, "16 \t$dst, $a, $b;"), + [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; +} + +defm SHL : SHIFT<"shl.b", shl>; +defm SRA : SHIFT<"shr.s", sra>; +defm SRL : SHIFT<"shr.u", srl>; + +// Bit-reverse +def BREV32 : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a), + "brev.b32 \t$dst, $a;", + [(set Int32Regs:$dst, (bitreverse Int32Regs:$a))]>; +def BREV64 : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a), + "brev.b64 \t$dst, $a;", + [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>; + +// +// Rotate: Use ptx shf instruction if available. +// + +// 32 bit r2 = rotl r1, n +// => +// r2 = shf.l r1, r1, n +def ROTL32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTL32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]>; + +// 32 bit r2 = rotr r1, n +// => +// r2 = shf.r r1, r1, n +def ROTR32imm_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTR32reg_hw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]>; + +// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. +def ROT32imm_sw : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + "shl.b32 \t%lhs, $src, $amt1;\n\t" + "shr.b32 \t%rhs, $src, $amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_32 : SDNodeXFormgetTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; + +// 32-bit software rotate left by register. +def ROTL32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shl.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shr.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[noHWROT32]>; + +// 32-bit software rotate right by register. +def ROTR32reg_sw : + NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b32 %lhs;\n\t" + ".reg .b32 %rhs;\n\t" + ".reg .b32 %amt2;\n\t" + "shr.b32 \t%lhs, $src, $amt;\n\t" + "sub.s32 \t%amt2, 32, $amt;\n\t" + "shl.b32 \t%rhs, $src, %amt2;\n\t" + "add.u32 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[noHWROT32]>; + +// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. +def ROT64imm_sw : + NVPTXInst<(outs Int64Regs:$dst), + (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + "shl.b64 \t%lhs, $src, $amt1;\n\t" + "shr.b64 \t%rhs, $src, $amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + []>; + +def SUB_FRM_64 : SDNodeXFormgetTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; +def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; + +// 64-bit software rotate left by register. +def ROTL64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "shl.b64 \t%lhs, $src, $amt;\n\t" + "sub.u32 \t%amt2, 64, $amt;\n\t" + "shr.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; + +def ROTR64reg_sw : + NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), + "{{\n\t" + ".reg .b64 %lhs;\n\t" + ".reg .b64 %rhs;\n\t" + ".reg .u32 %amt2;\n\t" + "shr.b64 \t%lhs, $src, $amt;\n\t" + "sub.u32 \t%amt2, 64, $amt;\n\t" + "shl.b64 \t%rhs, $src, %amt2;\n\t" + "add.u64 \t$dst, %lhs, %rhs;\n\t" + "}}", + [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; + +// +// Funnnel shift in clamp mode +// + +// Create SDNodes so they can be used in the DAG code, e.g. +// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) +def SDTIntShiftDOp : + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisInt<3>]>; +def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; +def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; + +def FUNSHFLCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; + +def FUNSHFRCLAMP : + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; + +// +// BFE - bit-field extract +// + +// Template for BFE instructions. Takes four args, +// [dest (reg), src (reg), start (reg or imm), end (reg or imm)]. +// Start may be an imm only if end is also an imm. FIXME: Is this a +// restriction in PTX? +// +// dest and src may be int32 or int64, but start and end are always int32. +multiclass BFE { + def rrr + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, Int32Regs:$c), + !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; + def rri + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, i32imm:$c), + !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; + def rii + : NVPTXInst<(outs RC:$d), + (ins RC:$a, i32imm:$b, i32imm:$c), + !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; +} + +let hasSideEffects = 0 in { + defm BFE_S32 : BFE<"s32", Int32Regs>; + defm BFE_U32 : BFE<"u32", Int32Regs>; + defm BFE_S64 : BFE<"s64", Int64Regs>; + defm BFE_U64 : BFE<"u64", Int64Regs>; +} + +//----------------------------------- +// Comparison instructions (setp, set) +//----------------------------------- + +// FIXME: This doesn't cover versions of set and setp that combine with a +// boolean predicate, e.g. setp.eq.and.b16. + +let hasSideEffects = 0 in { + multiclass SETP { + def rr : + NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + " \t$dst, $a, $b;"), []>; + def ri : + NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + " \t$dst, $a, $b;"), []>; + def ir : + NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, + " \t$dst, $a, $b;"), []>; + } +} + +defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; +defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; +defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; +defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; +defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; +defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; +defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; +defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; +defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; +defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; +defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; +def SETP_f16rr : + NVPTXInst<(outs Int1Regs:$dst), + (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp), + "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;", + []>, Requires<[useFP16Math]>; + +def SETP_f16x2rr : + NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), + (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp), + "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;", + []>, + Requires<[useFP16Math]>; + + +// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form +// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination +// reg, either u32, s32, or f32. Anyway these aren't used at the moment. + +let hasSideEffects = 0 in { + multiclass SET { + def rr : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; + def ri : NVPTXInst<(outs Int32Regs:$dst), + (ins RC:$a, ImmCls:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; + def ir : NVPTXInst<(outs Int32Regs:$dst), + (ins ImmCls:$a, RC:$b, CmpMode:$cmp), + !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>; + } +} + +defm SET_b16 : SET<"b16", Int16Regs, i16imm>; +defm SET_s16 : SET<"s16", Int16Regs, i16imm>; +defm SET_u16 : SET<"u16", Int16Regs, i16imm>; +defm SET_b32 : SET<"b32", Int32Regs, i32imm>; +defm SET_s32 : SET<"s32", Int32Regs, i32imm>; +defm SET_u32 : SET<"u32", Int32Regs, i32imm>; +defm SET_b64 : SET<"b64", Int64Regs, i64imm>; +defm SET_s64 : SET<"s64", Int64Regs, i64imm>; +defm SET_u64 : SET<"u64", Int64Regs, i64imm>; +defm SET_f16 : SET<"f16", Float16Regs, f16imm>; +defm SET_f32 : SET<"f32", Float32Regs, f32imm>; +defm SET_f64 : SET<"f64", Float64Regs, f64imm>; + +//----------------------------------- +// Selection instructions (selp) +//----------------------------------- + +// FIXME: Missing slct + +// selp instructions that don't have any pattern matches; we explicitly use +// them within this file. +let hasSideEffects = 0 in { + multiclass SELP { + def rr : NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; + def ri : NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; + def ir : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; + def ii : NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>; + } + + multiclass SELP_PATTERN { + def rr : + NVPTXInst<(outs RC:$dst), + (ins RC:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; + def ri : + NVPTXInst<(outs RC:$dst), + (ins RC:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; + def ir : + NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, RC:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; + def ii : + NVPTXInst<(outs RC:$dst), + (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), + !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), + [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; + } +} + +// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as +// good. +defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; +defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; +defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; +defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>; +defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; +defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; +defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>; +defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; +defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; +defm SELP_f16 : SELP_PATTERN<"b16", Float16Regs, f16imm, fpimm>; +defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; +defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; + +def SELP_f16x2rr : + NVPTXInst<(outs Float16x2Regs:$dst), + (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p), + "selp.b32 \t$dst, $a, $b, $p;", + [(set Float16x2Regs:$dst, + (select Int1Regs:$p, Float16x2Regs:$a, Float16x2Regs:$b))]>; + +//----------------------------------- +// Data Movement (Load / Store, Move) +//----------------------------------- + +def ADDRri : ComplexPattern; +def ADDRri64 : ComplexPattern; + +def MEMri : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops Int32Regs, i32imm); +} +def MEMri64 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops Int64Regs, i64imm); +} + +def imem : Operand { + let PrintMethod = "printOperand"; +} + +def imemAny : Operand { + let PrintMethod = "printOperand"; +} + +def LdStCode : Operand { + let PrintMethod = "printLdStCode"; +} + +def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; + +// Load a memory address into a u32 or u64 register. +def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), + "mov.u32 \t$dst, $a;", + [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; +def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), + "mov.u64 \t$dst, $a;", + [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; + +// Get pointer to local stack. +let hasSideEffects = 0 in { + def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), + "mov.u32 \t$d, __local_depot$num;", []>; + def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), + "mov.u64 \t$d, __local_depot$num;", []>; +} + + +// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp +let IsSimpleMove=1, hasSideEffects=0 in { + def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), + "mov.pred \t$dst, $sss;", []>; + def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), + "mov.u16 \t$dst, $sss;", []>; + def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), + "mov.u32 \t$dst, $sss;", []>; + def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), + "mov.u64 \t$dst, $sss;", []>; + + def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src), + // We have to use .b16 here as there's no mov.f16. + "mov.b16 \t$dst, $src;", []>; + def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), + "mov.f32 \t$dst, $src;", []>; + def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), + "mov.f64 \t$dst, $src;", []>; +} + +def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), + "mov.pred \t$dst, $src;", + [(set Int1Regs:$dst, imm:$src)]>; +def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), + "mov.u16 \t$dst, $src;", + [(set Int16Regs:$dst, imm:$src)]>; +def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), + "mov.u32 \t$dst, $src;", + [(set Int32Regs:$dst, imm:$src)]>; +def IMOV64i : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), + "mov.u64 \t$dst, $src;", + [(set Int64Regs:$dst, imm:$src)]>; + +def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), + "mov.f32 \t$dst, $src;", + [(set Float32Regs:$dst, fpimm:$src)]>; +def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), + "mov.f64 \t$dst, $src;", + [(set Float64Regs:$dst, fpimm:$src)]>; + +def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; + +//---- Copy Frame Index ---- +def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), + "add.u32 \t$dst, ${addr:add};", + [(set Int32Regs:$dst, ADDRri:$addr)]>; +def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), + "add.u64 \t$dst, ${addr:add};", + [(set Int64Regs:$dst, ADDRri64:$addr)]>; + +//----------------------------------- +// Comparison and Selection +//----------------------------------- + +multiclass ISET_FORMAT { + // i16 -> pred + def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), + (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), + (setp_16ri Int16Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), + (setp_16ir imm:$a, Int16Regs:$b, Mode)>; + // i32 -> pred + def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), + (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), + (setp_32ri Int32Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)), + (setp_32ir imm:$a, Int32Regs:$b, Mode)>; + // i64 -> pred + def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), + (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), + (setp_64ri Int64Regs:$a, imm:$b, Mode)>; + def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), + (setp_64ir imm:$a, Int64Regs:$b, Mode)>; + + // i16 -> i32 + def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), + (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), + (set_16ri Int16Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), + (set_16ir imm:$a, Int16Regs:$b, Mode)>; + // i32 -> i32 + def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), + (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), + (set_32ri Int32Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)), + (set_32ir imm:$a, Int32Regs:$b, Mode)>; + // i64 -> i32 + def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), + (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), + (set_64ri Int64Regs:$a, imm:$b, Mode)>; + def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), + (set_64ir imm:$a, Int64Regs:$b, Mode)>; +} + +multiclass ISET_FORMAT_SIGNED + : ISET_FORMAT { + // TableGen doesn't like empty multiclasses. + def : PatLeaf<(i32 0)>; +} + +multiclass ISET_FORMAT_UNSIGNED + : ISET_FORMAT { + // TableGen doesn't like empty multiclasses. + def : PatLeaf<(i32 0)>; +} + +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_SIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; +defm : ISET_FORMAT_UNSIGNED; + +// i1 compares +def : Pat<(setne Int1Regs:$a, Int1Regs:$b), + (XORb1rr Int1Regs:$a, Int1Regs:$b)>; +def : Pat<(setune Int1Regs:$a, Int1Regs:$b), + (XORb1rr Int1Regs:$a, Int1Regs:$b)>; + +def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), + (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), + (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; + +// i1 compare -> i32 +def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), + (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; +def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), + (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; + + + +multiclass FSET_FORMAT { + // f16 -> pred + def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)), + (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + Requires<[useFP16Math,doF32FTZ]>; + def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)), + (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + Requires<[useFP16Math]>; + def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)), + (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + Requires<[useFP16Math,doF32FTZ]>; + def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)), + (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + Requires<[useFP16Math]>; + def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + Requires<[useFP16Math,doF32FTZ]>; + def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + Requires<[useFP16Math]>; + + // f32 -> pred + def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), + (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), + (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), + (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; + + // f64 -> pred + def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), + (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; + def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), + (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), + (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; + + // f16 -> i32 + def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)), + (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + Requires<[useFP16Math, doF32FTZ]>; + def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)), + (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + Requires<[useFP16Math]>; + def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)), + (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + Requires<[useFP16Math, doF32FTZ]>; + def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)), + (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + Requires<[useFP16Math]>; + def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + Requires<[useFP16Math, doF32FTZ]>; + def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + Requires<[useFP16Math]>; + + // f32 -> i32 + def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), + (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), + (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, + Requires<[doF32FTZ]>; + def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), + (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; + + // f64 -> i32 + def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), + (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; + def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), + (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; + def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), + (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; +} + +defm FSetOGT : FSET_FORMAT; +defm FSetOLT : FSET_FORMAT; +defm FSetOGE : FSET_FORMAT; +defm FSetOLE : FSET_FORMAT; +defm FSetOEQ : FSET_FORMAT; +defm FSetONE : FSET_FORMAT; + +defm FSetUGT : FSET_FORMAT; +defm FSetULT : FSET_FORMAT; +defm FSetUGE : FSET_FORMAT; +defm FSetULE : FSET_FORMAT; +defm FSetUEQ : FSET_FORMAT; +defm FSetUNE : FSET_FORMAT; + +defm FSetGT : FSET_FORMAT; +defm FSetLT : FSET_FORMAT; +defm FSetGE : FSET_FORMAT; +defm FSetLE : FSET_FORMAT; +defm FSetEQ : FSET_FORMAT; +defm FSetNE : FSET_FORMAT; + +defm FSetNUM : FSET_FORMAT; +defm FSetNAN : FSET_FORMAT; + +// FIXME: What is this doing here? Can it be deleted? +// def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, +// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def SDTDeclareParamProfile : + SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; +def SDTDeclareScalarParamProfile : + SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; +def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; +def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; +def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; +def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; +def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; +def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; +def SDTCallValProfile : SDTypeProfile<1, 0, []>; +def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; +def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; +def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; +def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; +def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; + +def DeclareParam : + SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareScalarParam : + SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRetParam : + SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def DeclareRet : + SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LoadParam : + SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def LoadParamV2 : + SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def LoadParamV4 : + SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; +def PrintCall : + SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def PrintConvergentCall : + SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def PrintCallUni : + SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def PrintConvergentCallUni : + SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallUniProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParam : + SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamV2 : + SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamV4 : + SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamU32 : + SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def StoreParamS32 : + SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgBegin : + SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArg : + SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def LastCallArg : + SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallArgEnd : + SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVoid : + SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def Prototype : + SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def CallVal : + SDNode<"NVPTXISD::CallVal", SDTCallValProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def MoveParam : + SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; +def StoreRetval : + SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, + [SDNPHasChain, SDNPSideEffect]>; +def StoreRetvalV2 : + SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, + [SDNPHasChain, SDNPSideEffect]>; +def StoreRetvalV4 : + SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, + [SDNPHasChain, SDNPSideEffect]>; +def PseudoUseParam : + SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def RETURNNode : + SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, + [SDNPHasChain, SDNPSideEffect]>; + +let mayLoad = 1 in { + class LoadParamMemInst : + NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), + !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"), + []>; + + class LoadParamV2MemInst : + NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), + !strconcat("ld.param.v2", opstr, + " \t{{$dst, $dst2}}, [retval0+$b];"), []>; + + class LoadParamV4MemInst : + NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, + regclass:$dst4), + (ins i32imm:$b), + !strconcat("ld.param.v4", opstr, + " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), + []>; +} + +class LoadParamRegInst : + NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), + !strconcat("mov", opstr, " \t$dst, retval$b;"), + [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; + +let mayStore = 1 in { + class StoreParamInst : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), + !strconcat("st.param", opstr, " \t[param$a+$b], $val;"), + []>; + + class StoreParamV2Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, + i32imm:$a, i32imm:$b), + !strconcat("st.param.v2", opstr, + " \t[param$a+$b], {{$val, $val2}};"), + []>; + + class StoreParamV4Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, + regclass:$val4, i32imm:$a, + i32imm:$b), + !strconcat("st.param.v4", opstr, + " \t[param$a+$b], {{$val, $val2, $val3, $val4}};"), + []>; + + class StoreRetvalInst : + NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), + !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"), + []>; + + class StoreRetvalV2Inst : + NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), + !strconcat("st.param.v2", opstr, + " \t[func_retval0+$a], {{$val, $val2}};"), + []>; + + class StoreRetvalV4Inst : + NVPTXInst<(outs), + (ins regclass:$val, regclass:$val2, regclass:$val3, + regclass:$val4, i32imm:$a), + !strconcat("st.param.v4", opstr, + " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), + []>; +} + +let isCall=1 in { + multiclass CALL { + def PrintCallNoRetInst : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " "), [(OpNode (i32 0))]>; + def PrintCallRetInst1 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0), "), [(OpNode (i32 1))]>; + def PrintCallRetInst2 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0, retval1), "), [(OpNode (i32 2))]>; + def PrintCallRetInst3 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0, retval1, retval2), "), [(OpNode (i32 3))]>; + def PrintCallRetInst4 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0, retval1, retval2, retval3), "), + [(OpNode (i32 4))]>; + def PrintCallRetInst5 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4), "), + [(OpNode (i32 5))]>; + def PrintCallRetInst6 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " + "retval5), "), + [(OpNode (i32 6))]>; + def PrintCallRetInst7 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " + "retval5, retval6), "), + [(OpNode (i32 7))]>; + def PrintCallRetInst8 : NVPTXInst<(outs), (ins), + !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " + "retval5, retval6, retval7), "), + [(OpNode (i32 8))]>; + } +} + +defm Call : CALL<"call", PrintCall>; +defm CallUni : CALL<"call.uni", PrintCallUni>; + +// Convergent call instructions. These are identical to regular calls, except +// they have the isConvergent bit set. +let isConvergent=1 in { + defm ConvergentCall : CALL<"call", PrintConvergentCall>; + defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>; +} + +def LoadParamMemI64 : LoadParamMemInst; +def LoadParamMemI32 : LoadParamMemInst; +def LoadParamMemI16 : LoadParamMemInst; +def LoadParamMemI8 : LoadParamMemInst; +def LoadParamMemV2I64 : LoadParamV2MemInst; +def LoadParamMemV2I32 : LoadParamV2MemInst; +def LoadParamMemV2I16 : LoadParamV2MemInst; +def LoadParamMemV2I8 : LoadParamV2MemInst; +def LoadParamMemV4I32 : LoadParamV4MemInst; +def LoadParamMemV4I16 : LoadParamV4MemInst; +def LoadParamMemV4I8 : LoadParamV4MemInst; +def LoadParamMemF16 : LoadParamMemInst; +def LoadParamMemF16x2 : LoadParamMemInst; +def LoadParamMemF32 : LoadParamMemInst; +def LoadParamMemF64 : LoadParamMemInst; +def LoadParamMemV2F16 : LoadParamV2MemInst; +def LoadParamMemV2F16x2: LoadParamV2MemInst; +def LoadParamMemV2F32 : LoadParamV2MemInst; +def LoadParamMemV2F64 : LoadParamV2MemInst; +def LoadParamMemV4F16 : LoadParamV4MemInst; +def LoadParamMemV4F16x2: LoadParamV4MemInst; +def LoadParamMemV4F32 : LoadParamV4MemInst; + +def StoreParamI64 : StoreParamInst; +def StoreParamI32 : StoreParamInst; + +def StoreParamI16 : StoreParamInst; +def StoreParamI8 : StoreParamInst; +def StoreParamV2I64 : StoreParamV2Inst; +def StoreParamV2I32 : StoreParamV2Inst; +def StoreParamV2I16 : StoreParamV2Inst; +def StoreParamV2I8 : StoreParamV2Inst; + +def StoreParamV4I32 : StoreParamV4Inst; +def StoreParamV4I16 : StoreParamV4Inst; +def StoreParamV4I8 : StoreParamV4Inst; + +def StoreParamF16 : StoreParamInst; +def StoreParamF16x2 : StoreParamInst; +def StoreParamF32 : StoreParamInst; +def StoreParamF64 : StoreParamInst; +def StoreParamV2F16 : StoreParamV2Inst; +def StoreParamV2F16x2 : StoreParamV2Inst; +def StoreParamV2F32 : StoreParamV2Inst; +def StoreParamV2F64 : StoreParamV2Inst; +def StoreParamV4F16 : StoreParamV4Inst; +def StoreParamV4F16x2 : StoreParamV4Inst; +def StoreParamV4F32 : StoreParamV4Inst; + +def StoreRetvalI64 : StoreRetvalInst; +def StoreRetvalI32 : StoreRetvalInst; +def StoreRetvalI16 : StoreRetvalInst; +def StoreRetvalI8 : StoreRetvalInst; +def StoreRetvalV2I64 : StoreRetvalV2Inst; +def StoreRetvalV2I32 : StoreRetvalV2Inst; +def StoreRetvalV2I16 : StoreRetvalV2Inst; +def StoreRetvalV2I8 : StoreRetvalV2Inst; +def StoreRetvalV4I32 : StoreRetvalV4Inst; +def StoreRetvalV4I16 : StoreRetvalV4Inst; +def StoreRetvalV4I8 : StoreRetvalV4Inst; + +def StoreRetvalF64 : StoreRetvalInst; +def StoreRetvalF32 : StoreRetvalInst; +def StoreRetvalF16 : StoreRetvalInst; +def StoreRetvalF16x2 : StoreRetvalInst; +def StoreRetvalV2F64 : StoreRetvalV2Inst; +def StoreRetvalV2F32 : StoreRetvalV2Inst; +def StoreRetvalV2F16 : StoreRetvalV2Inst; +def StoreRetvalV2F16x2: StoreRetvalV2Inst; +def StoreRetvalV4F32 : StoreRetvalV4Inst; +def StoreRetvalV4F16 : StoreRetvalV4Inst; +def StoreRetvalV4F16x2: StoreRetvalV4Inst; + +def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; +def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; +def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; +def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; + +class CallArgInst : + NVPTXInst<(outs), (ins regclass:$a), "$a, ", + [(CallArg (i32 0), regclass:$a)]>; + +class LastCallArgInst : + NVPTXInst<(outs), (ins regclass:$a), "$a", + [(LastCallArg (i32 0), regclass:$a)]>; + +def CallArgI64 : CallArgInst; +def CallArgI32 : CallArgInst; +def CallArgI16 : CallArgInst; +def CallArgF64 : CallArgInst; +def CallArgF32 : CallArgInst; + +def LastCallArgI64 : LastCallArgInst; +def LastCallArgI32 : LastCallArgInst; +def LastCallArgI16 : LastCallArgInst; +def LastCallArgF64 : LastCallArgInst; +def LastCallArgF32 : LastCallArgInst; + +def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", + [(CallArg (i32 0), (i32 imm:$a))]>; +def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", + [(LastCallArg (i32 0), (i32 imm:$a))]>; + +def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", + [(CallArg (i32 1), (i32 imm:$a))]>; +def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", + [(LastCallArg (i32 1), (i32 imm:$a))]>; + +def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", + [(CallVoid (Wrapper tglobaladdr:$addr))]>; +def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", + [(CallVoid Int32Regs:$addr)]>; +def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", + [(CallVoid Int64Regs:$addr)]>; +def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", + [(Prototype (i32 imm:$val))]>; + +def DeclareRetMemInst : + NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num), + ".param .align $align .b8 retval$num[$size];", + [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetScalarInst : + NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".param .b$size retval$num;", + [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; +def DeclareRetRegInst : + NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), + ".reg .b$size retval$num;", + [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; + +def DeclareParamInst : + NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size), + ".param .align $align .b8 param$a[$size];", + [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; +def DeclareScalarParamInst : + NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".param .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; +def DeclareScalarRegInst : + NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), + ".reg .b$size param$a;", + [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; + +class MoveParamInst : + NVPTXInst<(outs regclass:$dst), (ins regclass:$src), + !strconcat("mov", asmstr, " \t$dst, $src;"), + [(set regclass:$dst, (MoveParam regclass:$src))]>; + +def MoveParamI64 : MoveParamInst; +def MoveParamI32 : MoveParamInst; +def MoveParamI16 : + NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), + "cvt.u16.u32 \t$dst, $src;", + [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; +def MoveParamF64 : MoveParamInst; +def MoveParamF32 : MoveParamInst; +def MoveParamF16 : MoveParamInst; + +class PseudoUseParamInst : + NVPTXInst<(outs), (ins regclass:$src), + "// Pseudo use of $src", + [(PseudoUseParam regclass:$src)]>; + +def PseudoUseParamI64 : PseudoUseParamInst; +def PseudoUseParamI32 : PseudoUseParamInst; +def PseudoUseParamI16 : PseudoUseParamInst; +def PseudoUseParamF64 : PseudoUseParamInst; +def PseudoUseParamF32 : PseudoUseParamInst; + + +// +// Load / Store Handling +// +multiclass LD { + def _avar : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr];", []>; + def _areg : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr];", []>; + def _areg_64 : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr];", []>; + def _ari : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr+$offset];", []>; + def _ari_64 : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr+$offset];", []>; + def _asi : NVPTXInst< + (outs regclass:$dst), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t$dst, [$addr+$offset];", []>; +} + +let mayLoad=1, hasSideEffects=0 in { + defm LD_i8 : LD; + defm LD_i16 : LD; + defm LD_i32 : LD; + defm LD_i64 : LD; + defm LD_f16 : LD; + defm LD_f16x2 : LD; + defm LD_f32 : LD; + defm LD_f64 : LD; +} + +multiclass ST { + def _avar : NVPTXInst< + (outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, imem:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr], $src;", []>; + def _areg : NVPTXInst< + (outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr], $src;", []>; + def _areg_64 : NVPTXInst< + (outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr], $src;", []>; + def _ari : NVPTXInst< + (outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr+$offset], $src;", []>; + def _ari_64 : NVPTXInst< + (outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr+$offset], $src;", []>; + def _asi : NVPTXInst< + (outs), + (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, + LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" + " \t[$addr+$offset], $src;", []>; +} + +let mayStore=1, hasSideEffects=0 in { + defm ST_i8 : ST; + defm ST_i16 : ST; + defm ST_i32 : ST; + defm ST_i64 : ST; + defm ST_f16 : ST; + defm ST_f16x2 : ST; + defm ST_f32 : ST; + defm ST_f64 : ST; +} + +// The following is used only in and after vector elementizations. Vector +// elementization happens at the machine instruction level, so the following +// instructions never appear in the DAG. +multiclass LD_VEC { + def _v2_avar : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v2_areg : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v2_areg_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr];", []>; + def _v2_ari : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + def _v2_ari_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + def _v2_asi : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; + def _v4_avar : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; + def _v4_areg : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; + def _v4_areg_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; + def _v4_ari : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + def _v4_ari_64 : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; + def _v4_asi : NVPTXInst< + (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), + (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; +} +let mayLoad=1, hasSideEffects=0 in { + defm LDV_i8 : LD_VEC; + defm LDV_i16 : LD_VEC; + defm LDV_i32 : LD_VEC; + defm LDV_i64 : LD_VEC; + defm LDV_f16 : LD_VEC; + defm LDV_f16x2 : LD_VEC; + defm LDV_f32 : LD_VEC; + defm LDV_f64 : LD_VEC; +} + +multiclass ST_VEC { + def _v2_avar : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2}};", []>; + def _v2_areg : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2}};", []>; + def _v2_areg_64 : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2}};", []>; + def _v2_ari : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, + i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2}};", []>; + def _v2_ari_64 : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, + i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2}};", []>; + def _v2_asi : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, + LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, + i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2}};", []>; + def _v4_avar : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_areg : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_areg_64 : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_ari : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_ari_64 : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " + "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; + def _v4_asi : NVPTXInst< + (outs), + (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, + LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, + i32imm:$fromWidth, imem:$addr, i32imm:$offset), + "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}" + "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; +} + +let mayStore=1, hasSideEffects=0 in { + defm STV_i8 : ST_VEC; + defm STV_i16 : ST_VEC; + defm STV_i32 : ST_VEC; + defm STV_i64 : ST_VEC; + defm STV_f16 : ST_VEC; + defm STV_f16x2 : ST_VEC; + defm STV_f32 : ST_VEC; + defm STV_f64 : ST_VEC; +} + +//---- Conversion ---- + +class F_BITCONVERT : + NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), + !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")), + [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; + +def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>; +def BITCONVERT_16_F2I : F_BITCONVERT<"16", Float16Regs, Int16Regs>; +def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; +def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; +def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; +def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; +def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", Int32Regs, Float16x2Regs>; +def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", Float16x2Regs, Int32Regs>; + +// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where +// we cannot specify floating-point literals in isel patterns. Therefore, we +// use an integer selp to select either 1 or 0 and then cvt to floating-point. + +// sint -> f16 +def : Pat<(f16 (sint_to_fp Int1Regs:$a)), + (CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f16 (sint_to_fp Int16Regs:$a)), + (CVT_f16_s16 Int16Regs:$a, CvtRN)>; +def : Pat<(f16 (sint_to_fp Int32Regs:$a)), + (CVT_f16_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(f16 (sint_to_fp Int64Regs:$a)), + (CVT_f16_s64 Int64Regs:$a, CvtRN)>; + +// uint -> f16 +def : Pat<(f16 (uint_to_fp Int1Regs:$a)), + (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f16 (uint_to_fp Int16Regs:$a)), + (CVT_f16_u16 Int16Regs:$a, CvtRN)>; +def : Pat<(f16 (uint_to_fp Int32Regs:$a)), + (CVT_f16_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(f16 (uint_to_fp Int64Regs:$a)), + (CVT_f16_u64 Int64Regs:$a, CvtRN)>; + +// sint -> f32 +def : Pat<(f32 (sint_to_fp Int1Regs:$a)), + (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f32 (sint_to_fp Int16Regs:$a)), + (CVT_f32_s16 Int16Regs:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp Int32Regs:$a)), + (CVT_f32_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(f32 (sint_to_fp Int64Regs:$a)), + (CVT_f32_s64 Int64Regs:$a, CvtRN)>; + +// uint -> f32 +def : Pat<(f32 (uint_to_fp Int1Regs:$a)), + (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f32 (uint_to_fp Int16Regs:$a)), + (CVT_f32_u16 Int16Regs:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp Int32Regs:$a)), + (CVT_f32_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(f32 (uint_to_fp Int64Regs:$a)), + (CVT_f32_u64 Int64Regs:$a, CvtRN)>; + +// sint -> f64 +def : Pat<(f64 (sint_to_fp Int1Regs:$a)), + (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f64 (sint_to_fp Int16Regs:$a)), + (CVT_f64_s16 Int16Regs:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp Int32Regs:$a)), + (CVT_f64_s32 Int32Regs:$a, CvtRN)>; +def : Pat<(f64 (sint_to_fp Int64Regs:$a)), + (CVT_f64_s64 Int64Regs:$a, CvtRN)>; + +// uint -> f64 +def : Pat<(f64 (uint_to_fp Int1Regs:$a)), + (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; +def : Pat<(f64 (uint_to_fp Int16Regs:$a)), + (CVT_f64_u16 Int16Regs:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp Int32Regs:$a)), + (CVT_f64_u32 Int32Regs:$a, CvtRN)>; +def : Pat<(f64 (uint_to_fp Int64Regs:$a)), + (CVT_f64_u64 Int64Regs:$a, CvtRN)>; + + +// f16 -> sint +def : Pat<(i1 (fp_to_sint Float16Regs:$a)), + (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_sint Float16Regs:$a)), + (CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_sint Float16Regs:$a)), + (CVT_s16_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint Float16Regs:$a)), + (CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_sint Float16Regs:$a)), + (CVT_s32_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint Float16Regs:$a)), + (CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_sint Float16Regs:$a)), + (CVT_s64_f16 Float16Regs:$a, CvtRZI)>; + +// f16 -> uint +def : Pat<(i1 (fp_to_uint Float16Regs:$a)), + (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_uint Float16Regs:$a)), + (CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_uint Float16Regs:$a)), + (CVT_u16_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint Float16Regs:$a)), + (CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_uint Float16Regs:$a)), + (CVT_u32_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint Float16Regs:$a)), + (CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_uint Float16Regs:$a)), + (CVT_u64_f16 Float16Regs:$a, CvtRZI)>; + +// f32 -> sint +def : Pat<(i1 (fp_to_sint Float32Regs:$a)), + (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_sint Float32Regs:$a)), + (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_sint Float32Regs:$a)), + (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint Float32Regs:$a)), + (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_sint Float32Regs:$a)), + (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint Float32Regs:$a)), + (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_sint Float32Regs:$a)), + (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; + +// f32 -> uint +def : Pat<(i1 (fp_to_uint Float32Regs:$a)), + (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_uint Float32Regs:$a)), + (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i16 (fp_to_uint Float32Regs:$a)), + (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint Float32Regs:$a)), + (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i32 (fp_to_uint Float32Regs:$a)), + (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint Float32Regs:$a)), + (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(i64 (fp_to_uint Float32Regs:$a)), + (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; + +// f64 -> sint +def : Pat<(i1 (fp_to_sint Float64Regs:$a)), + (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_sint Float64Regs:$a)), + (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_sint Float64Regs:$a)), + (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_sint Float64Regs:$a)), + (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; + +// f64 -> uint +def : Pat<(i1 (fp_to_uint Float64Regs:$a)), + (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_uint Float64Regs:$a)), + (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint Float64Regs:$a)), + (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint Float64Regs:$a)), + (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; + +// sext i1 +def : Pat<(i16 (sext Int1Regs:$a)), + (SELP_s16ii -1, 0, Int1Regs:$a)>; +def : Pat<(i32 (sext Int1Regs:$a)), + (SELP_s32ii -1, 0, Int1Regs:$a)>; +def : Pat<(i64 (sext Int1Regs:$a)), + (SELP_s64ii -1, 0, Int1Regs:$a)>; + +// zext i1 +def : Pat<(i16 (zext Int1Regs:$a)), + (SELP_u16ii 1, 0, Int1Regs:$a)>; +def : Pat<(i32 (zext Int1Regs:$a)), + (SELP_u32ii 1, 0, Int1Regs:$a)>; +def : Pat<(i64 (zext Int1Regs:$a)), + (SELP_u64ii 1, 0, Int1Regs:$a)>; + +// anyext i1 +def : Pat<(i16 (anyext Int1Regs:$a)), + (SELP_u16ii -1, 0, Int1Regs:$a)>; +def : Pat<(i32 (anyext Int1Regs:$a)), + (SELP_u32ii -1, 0, Int1Regs:$a)>; +def : Pat<(i64 (anyext Int1Regs:$a)), + (SELP_u64ii -1, 0, Int1Regs:$a)>; + +// sext i16 +def : Pat<(i32 (sext Int16Regs:$a)), + (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (sext Int16Regs:$a)), + (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; + +// zext i16 +def : Pat<(i32 (zext Int16Regs:$a)), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (zext Int16Regs:$a)), + (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; + +// anyext i16 +def : Pat<(i32 (anyext Int16Regs:$a)), + (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; +def : Pat<(i64 (anyext Int16Regs:$a)), + (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; + +// sext i32 +def : Pat<(i64 (sext Int32Regs:$a)), + (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; + +// zext i32 +def : Pat<(i64 (zext Int32Regs:$a)), + (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; + +// anyext i32 +def : Pat<(i64 (anyext Int32Regs:$a)), + (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; + + +// truncate i64 +def : Pat<(i32 (trunc Int64Regs:$a)), + (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; +def : Pat<(i16 (trunc Int64Regs:$a)), + (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; +def : Pat<(i1 (trunc Int64Regs:$a)), + (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; + +// truncate i32 +def : Pat<(i16 (trunc Int32Regs:$a)), + (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; +def : Pat<(i1 (trunc Int32Regs:$a)), + (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; + +// truncate i16 +def : Pat<(i1 (trunc Int16Regs:$a)), + (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; + +// sext_inreg +def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; +def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; +def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; +def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; + + +// Select instructions with 32-bit predicates +def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), + (SELP_b16rr Int16Regs:$a, Int16Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; +def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), + (SELP_b32rr Int32Regs:$a, Int32Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; +def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), + (SELP_b64rr Int64Regs:$a, Int64Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; +def : Pat<(select Int32Regs:$pred, Float16Regs:$a, Float16Regs:$b), + (SELP_f16rr Float16Regs:$a, Float16Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; +def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), + (SELP_f32rr Float32Regs:$a, Float32Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; +def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), + (SELP_f64rr Float64Regs:$a, Float64Regs:$b, + (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; + + +let hasSideEffects = 0 in { + // pack a set of smaller int registers to a larger int register + def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2, + Int16Regs:$s3, Int16Regs:$s4), + "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>; + def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), + (ins Int16Regs:$s1, Int16Regs:$s2), + "mov.b32 \t$d, {{$s1, $s2}};", []>; + def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), + (ins Int32Regs:$s1, Int32Regs:$s2), + "mov.b64 \t$d, {{$s1, $s2}};", []>; + def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), + (ins Float32Regs:$s1, Float32Regs:$s2), + "mov.b64 \t$d, {{$s1, $s2}};", []>; + + // unpack a larger int register to a set of smaller int registers + def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, + Int16Regs:$d3, Int16Regs:$d4), + (ins Int64Regs:$s), + "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>; + def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), + (ins Int32Regs:$s), + "mov.b32 \t{{$d1, $d2}}, $s;", []>; + def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), + (ins Int64Regs:$s), + "mov.b64 \t{{$d1, $d2}}, $s;", []>; + def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), + (ins Float64Regs:$s), + "mov.b64 \t{{$d1, $d2}}, $s;", []>; + +} + +let hasSideEffects = 0 in { + // Extract element of f16x2 register. PTX does not provide any way + // to access elements of f16x2 vector directly, so we need to + // extract it using a temporary register. + def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst), + (ins Float16x2Regs:$src), + "{{ .reg .b16 \t%tmp_hi;\n\t" + " mov.b32 \t{$dst, %tmp_hi}, $src; }}", + [(set Float16Regs:$dst, + (extractelt (v2f16 Float16x2Regs:$src), 0))]>; + def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst), + (ins Float16x2Regs:$src), + "{{ .reg .b16 \t%tmp_lo;\n\t" + " mov.b32 \t{%tmp_lo, $dst}, $src; }}", + [(set Float16Regs:$dst, + (extractelt (v2f16 Float16x2Regs:$src), 1))]>; + + // Coalesce two f16 registers into f16x2 + def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst), + (ins Float16Regs:$a, Float16Regs:$b), + "mov.b32 \t$dst, {{$a, $b}};", + [(set Float16x2Regs:$dst, + (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>; + + // Directly initializing underlying the b32 register is one less SASS + // instruction than than vector-packing move. + def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src), + "mov.b32 \t$dst, $src;", + []>; + + // Split f16x2 into two f16 registers. + def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), + (ins Float16x2Regs:$src), + "mov.b32 \t{{$lo, $hi}}, $src;", + []>; + // Split an i32 into two f16 + def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), + (ins Int32Regs:$src), + "mov.b32 \t{{$lo, $hi}}, $src;", + []>; +} + +// Count leading zeros +let hasSideEffects = 0 in { + def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "clz.b32 \t$d, $a;", []>; + def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "clz.b64 \t$d, $a;", []>; +} + +// 32-bit has a direct PTX instruction +def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>; + +// The return type of the ctlz ISD node is the same as its input, but the PTX +// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the +// ptx value to 64 bits to match the ISD node's semantics, unless we know we're +// truncating back down to 32 bits. +def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>; + +// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the +// result back to 16-bits if necessary. We also need to subtract 16 because +// the high-order 16 zeros were counted. +// +// TODO: NVPTX has a mov.b32 b32reg, {imm, b16reg} instruction, which we could +// use to save one SASS instruction (on sm_35 anyway): +// +// mov.b32 $tmp, {0xffff, $a} +// ctlz.b32 $result, $tmp +// +// That is, instead of zero-extending the input to 32 bits, we'd "one-extend" +// and then ctlz that value. This way we don't have to subtract 16 from the +// result. Unfortunately today we don't have a way to generate +// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization. +def : Pat<(ctlz Int16Regs:$a), + (SUBi16ri (CVT_u16_u32 + (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>; +def : Pat<(i32 (zext (ctlz Int16Regs:$a))), + (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; + +// Population count +let hasSideEffects = 0 in { + def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), + "popc.b32 \t$d, $a;", []>; + def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "popc.b64 \t$d, $a;", []>; +} + +// 32-bit has a direct PTX instruction +def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>; + +// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit +// to match the LLVM semantics. Just as with ctlz.i64, we provide a second +// pattern that avoids the type conversion if we're truncating the result to +// i32 anyway. +def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>; + +// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits. +// If we know that we're storing into an i32, we can avoid the final trunc. +def : Pat<(ctpop Int16Regs:$a), + (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; +def : Pat<(i32 (zext (ctpop Int16Regs:$a))), + (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>; + +// fpround f32 -> f16 +def : Pat<(f16 (fpround Float32Regs:$a)), + (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f16 (fpround Float32Regs:$a)), + (CVT_f16_f32 Float32Regs:$a, CvtRN)>; + +// fpround f64 -> f16 +def : Pat<(f16 (fpround Float64Regs:$a)), + (CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f16 (fpround Float64Regs:$a)), + (CVT_f16_f64 Float64Regs:$a, CvtRN)>; + +// fpround f64 -> f32 +def : Pat<(f32 (fpround Float64Regs:$a)), + (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fpround Float64Regs:$a)), + (CVT_f32_f64 Float64Regs:$a, CvtRN)>; + +// fpextend f16 -> f32 +def : Pat<(f32 (fpextend Float16Regs:$a)), + (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fpextend Float16Regs:$a)), + (CVT_f32_f16 Float16Regs:$a, CvtNONE)>; + +// fpextend f16 -> f64 +def : Pat<(f64 (fpextend Float16Regs:$a)), + (CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f64 (fpextend Float16Regs:$a)), + (CVT_f64_f16 Float16Regs:$a, CvtNONE)>; + +// fpextend f32 -> f64 +def : Pat<(f64 (fpextend Float32Regs:$a)), + (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f64 (fpextend Float32Regs:$a)), + (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; + +def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +// fceil, ffloor, fround, ftrunc. + +def : Pat<(fceil Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(fceil Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>; +def : Pat<(fceil Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(fceil Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>; +def : Pat<(fceil Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; + +def : Pat<(ffloor Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(ffloor Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>; +def : Pat<(ffloor Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(ffloor Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>; +def : Pat<(ffloor Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; + +def : Pat<(fround Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f16 (fround Float16Regs:$a)), + (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; +def : Pat<(fround Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fround Float32Regs:$a)), + (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; +def : Pat<(f64 (fround Float64Regs:$a)), + (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; + +def : Pat<(ftrunc Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(ftrunc Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>; +def : Pat<(ftrunc Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(ftrunc Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>; +def : Pat<(ftrunc Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; + +// nearbyint and rint are implemented as rounding to nearest even. This isn't +// strictly correct, because it causes us to ignore the rounding mode. But it +// matches what CUDA's "libm" does. + +def : Pat<(fnearbyint Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(fnearbyint Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; +def : Pat<(fnearbyint Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(fnearbyint Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; +def : Pat<(fnearbyint Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; + +def : Pat<(frint Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(frint Float16Regs:$a), + (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; +def : Pat<(frint Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(frint Float32Regs:$a), + (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>; +def : Pat<(frint Float64Regs:$a), + (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; + + +//----------------------------------- +// Control-flow +//----------------------------------- + +let isTerminator=1 in { + let isReturn=1, isBarrier=1 in + def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; + + let isBranch=1 in + def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + "@$a bra \t$target;", + [(brcond Int1Regs:$a, bb:$target)]>; + let isBranch=1 in + def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), + "@!$a bra \t$target;", []>; + + let isBranch=1, isBarrier=1 in + def GOTO : NVPTXInst<(outs), (ins brtarget:$target), + "bra.uni \t$target;", [(br bb:$target)]>; +} + +def : Pat<(brcond Int32Regs:$a, bb:$target), + (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; + +// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a +// conditional branch if the target block is the next block so that the code +// can fall through to the target block. The invertion is done by 'xor +// condition, 1', which will be translated to (setne condition, -1). Since ptx +// supports '@!pred bra target', we should use it. +def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), + (CBranchOther Int1Regs:$a, bb:$target)>; + +// Call +def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPSideEffect]>; + +def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def calltarget : Operand; +let isCall=1 in { + def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>; +} + +def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; +def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>; + +// Pseudo instructions. +class Pseudo pattern> + : NVPTXInst; + +def Callseq_Start : + NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "\\{ // callseq $amt1, $amt2\n" + "\t.reg .b32 temp_param_reg;", + [(callseq_start timm:$amt1, timm:$amt2)]>; +def Callseq_End : + NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "\\} // callseq $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; + +// trap instruction +def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>; + +// Call prototype wrapper +def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def CallPrototype : + SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, + [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; +def ProtoIdent : Operand { + let PrintMethod = "printProtoIdent"; +} +def CALL_PROTOTYPE : + NVPTXInst<(outs), (ins ProtoIdent:$ident), + "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; + + +include "NVPTXIntrinsics.td" + + +//----------------------------------- +// Notes +//----------------------------------- +// BSWAP is currently expanded. The following is a more efficient +// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register +// - for sm_20, use pmpt (use vector scalar mov to get the pack and +// unpack). sm_20 supports native 32-bit register, but not native 16-bit +// register. diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp index 5b626cbcd5ba12ebd52bb17038d4810bad0cde98..139dc7fbeeda15f8aa30f7858d39b4f289771ecf 100644 --- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -90,8 +90,8 @@ //===----------------------------------------------------------------------===// #include "NVPTX.h" -#include "NVPTXUtilities.h" #include "NVPTXTargetMachine.h" +#include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -164,7 +164,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) { // Set the alignment to alignment of the byval parameter. This is because, // later load/stores assume that alignment, and we are going to replace // the use of the byval parameter with this alloca instruction. - AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1)); + AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo())); Arg->replaceAllUsesWith(AllocA); Value *ArgInParam = new AddrSpaceCastInst( diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp index e10b046f7c97ae44839fc3d42a5262a8a2242093..4e902c0fb5073ac9a4ab00e5e2af13fc9b0a24f8 100644 --- a/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -36,8 +36,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ab5298d0dcfd69615f9d9b6d206cb3341a248940..2b6ba8c85d4d19065f091f0d2cde8b3228d83b91 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "NVPTXTargetMachine.h" #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" -#include "NVPTXTargetMachine.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" #include "llvm/ADT/STLExtras.h" @@ -132,7 +132,7 @@ namespace { class NVPTXPassConfig : public TargetPassConfig { public: - NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) + NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} NVPTXTargetMachine &getNVPTXTargetMachine() const { @@ -163,7 +163,7 @@ private: } // end anonymous namespace TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { - return new NVPTXPassConfig(this, PM); + return new NVPTXPassConfig(*this, PM); } void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 1ed8e3b1e93573bab0a165a3632e430cbaa7e60a..2f3981be22f831ff34f19d293c23e61022c896bb 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -65,6 +65,9 @@ public: TargetIRAnalysis getTargetIRAnalysis() override; + bool isMachineVerifierClean() const override { + return false; + } }; // NVPTXTargetMachine. class NVPTXTargetMachine32 : public NVPTXTargetMachine { diff --git a/lib/Target/NVPTX/NVVMIntrRange.cpp b/lib/Target/NVPTX/NVVMIntrRange.cpp index 9c71a2ee165b3c5c5689cbb57c42fdb49f475c4a..11277f5ba5966114e114e22eecdc6cdaf5922ec8 100644 --- a/lib/Target/NVPTX/NVVMIntrRange.cpp +++ b/lib/Target/NVPTX/NVVMIntrRange.cpp @@ -15,8 +15,8 @@ #include "NVPTX.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" using namespace llvm; diff --git a/lib/Target/Nios2/CMakeLists.txt b/lib/Target/Nios2/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..78db452094bd06e02ec082929c8cd07d2c103b07 --- /dev/null +++ b/lib/Target/Nios2/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_TARGET_DEFINITIONS Nios2.td) + +#Generate Nios2GenRegisterInfo.inc and Nios2GenInstrInfo.inc which included by +#your hand code C++ files. +#Nios2GenRegisterInfo.inc came from Nios2RegisterInfo.td, Nios2GenInstrInfo.inc +#came from Nios2InstrInfo.td. +tablegen(LLVM Nios2GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM Nios2GenInstrInfo.inc -gen-instr-info) + +#Nios2CommonTableGen must be defined +add_public_tablegen_target(Nios2CommonTableGen) + +#Nios2CodeGen should match with LLVMBuild.txt Nios2CodeGen +add_llvm_target(Nios2CodeGen Nios2TargetMachine.cpp) + +#Should match with "subdirectories = MCTargetDesc TargetInfo" in LLVMBuild.txt +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Nios2/LLVMBuild.txt b/lib/Target/Nios2/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..b40a763797065f17d5cb8312ec445d6c81359ea1 --- /dev/null +++ b/lib/Target/Nios2/LLVMBuild.txt @@ -0,0 +1,61 @@ +;===- ./lib/Target/Nios2/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +#Following comments extracted from http: // llvm.org/docs/LLVMBuild.html + +[common] +subdirectories = + MCTargetDesc + TargetInfo + +[component_0] +#TargetGroup components are an extension of LibraryGroups, specifically for +#defining LLVM targets(which are handled specially in a few places). +type = TargetGroup +#The name of the component should always be the name of the target.(should +#match "def Nios2 : Target" in Nios2.td) +name = Nios2 +#Nios2 component is located in directory Target / +parent = Target +#Whether this target defines an assembly parser, assembly printer, disassembler +#, and supports JIT compilation.They are optional. + +[component_1] +#component_1 is a Library type and name is Nios2CodeGen.After build it will +#in lib / libLLVMNios2CodeGen.a of your build command directory. +type = Library +name = Nios2CodeGen +#Nios2CodeGen component(Library) is located in directory Nios2 / +parent = Nios2 +#If given, a list of the names of Library or LibraryGroup components which +#must also be linked in whenever this library is used.That is, the link time +#dependencies for this component.When tools are built, the build system will +#include the transitive closure of all required_libraries for the components +#the tool needs. +required_libraries = CodeGen + Core + GlobalISel + MC + Nios2Desc + Nios2Info + Support + Target +#end of required_libraries + +#All LLVMBuild.txt in Target / Nios2 and subdirectory use 'add_to_library_groups +#= Nios2' +add_to_library_groups = Nios2 diff --git a/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt b/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..21def509a2324a5cf82c330e8a43595fea8a162d --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,2 @@ +#MCTargetDesc / CMakeLists.txt +add_llvm_library(LLVMNios2Desc Nios2MCTargetDesc.cpp) diff --git a/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt b/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..4dc6995e7f5c6634bed53a3a4e8eb3764bc8d2a6 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,25 @@ +;===- ./lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Nios2Desc +parent = Nios2 +required_libraries = MC + Nios2Info + Support +add_to_library_groups = Nios2 diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d913166399c677f149947489f78c8d3f752868de --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp @@ -0,0 +1,25 @@ +//===-- Nios2MCTargetDesc.cpp - Nios2 Target Descriptions -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Nios2 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "Nios2MCTargetDesc.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "Nios2GenInstrInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "Nios2GenRegisterInfo.inc" + +extern "C" void LLVMInitializeNios2TargetMC() {} diff --git a/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h new file mode 100644 index 0000000000000000000000000000000000000000..d426062db16837116060fd741ab073b06ff90d57 --- /dev/null +++ b/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h @@ -0,0 +1,34 @@ +//===-- Nios2MCTargetDesc.h - Nios2 Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Nios2 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H + +namespace llvm { +class Target; +class Triple; + +Target &getTheNios2Target(); + +} // namespace llvm + +// Defines symbolic names for Nios2 registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "Nios2GenRegisterInfo.inc" + +// Defines symbolic names for the Nios2 instructions. +#define GET_INSTRINFO_ENUM +#include "Nios2GenInstrInfo.inc" + +#endif diff --git a/lib/Target/Nios2/Nios2.h b/lib/Target/Nios2/Nios2.h new file mode 100644 index 0000000000000000000000000000000000000000..87202f48cfbe11c16f254858f7890473d2ec1554 --- /dev/null +++ b/lib/Target/Nios2/Nios2.h @@ -0,0 +1,25 @@ +//===-- Nios2.h - Top-level interface for Nios2 representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM Nios2 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2_H + +#include "MCTargetDesc/Nios2MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Nios2TargetMachine; +} // namespace llvm + +#endif diff --git a/lib/Target/Nios2/Nios2.td b/lib/Target/Nios2/Nios2.td new file mode 100644 index 0000000000000000000000000000000000000000..e8abba8633708cd34a67aa110f72d7202526d080 --- /dev/null +++ b/lib/Target/Nios2/Nios2.td @@ -0,0 +1,29 @@ +//===-- Nios2.td - Describe the Nios2 Target Machine -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Target-dependent interfaces +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "Nios2RegisterInfo.td" +include "Nios2InstrInfo.td" + +def Nios2InstrInfo : InstrInfo; + +def Nios2 : Target { let InstructionSet = Nios2InstrInfo; } diff --git a/lib/Target/Nios2/Nios2InstrFormats.td b/lib/Target/Nios2/Nios2InstrFormats.td new file mode 100644 index 0000000000000000000000000000000000000000..79868be48a488b9f97e8abd5f43283ae3f71d14f --- /dev/null +++ b/lib/Target/Nios2/Nios2InstrFormats.td @@ -0,0 +1,117 @@ +//===-- Nios2InstrFormats.td - Nios2 Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe NIOS2 instructions format +// +// +//===----------------------------------------------------------------------===// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<3> Value = val; +} + +def Pseudo : Format<0>; +def FrmI : Format<1>; +def FrmR : Format<2>; +def FrmJ : Format<3>; +def FrmOther : Format<4>; // Instruction w/ a custom format + +// Generic Nios2 Format +class Nios2Inst pattern, Format f> + : Instruction { + field bits<32> Inst; + Format Form = f; + + let Namespace = "Nios2"; + + let Size = 4; + + bits<6> Opcode = 0; + + // Bottom 6 bits are the 'opcode' field + let Inst{5 - 0} = Opcode; + + let OutOperandList = outs; + let InOperandList = ins; + + let AsmString = asmstr; + let Pattern = pattern; + + // + // Attributes specific to Nios2 instructions: + // + bits<3> FormBits = Form.Value; + + // TSFlags layout should be kept in sync with Nios2InstrInfo.h. + let TSFlags{2 - 0} = FormBits; + + let DecoderNamespace = "Nios2"; +} + +// Nios2 Instruction Format +class InstSE pattern, Format f> + : Nios2Inst { +} + +//===----------------------------------------------------------------------===// +// Format I instruction class in Nios2 : <|A|B|immediate|opcode|> +//===----------------------------------------------------------------------===// + +class FI op, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<5> rA; + bits<5> rB; + bits<16> imm; + + let Opcode = op; + + let Inst{31 - 27} = rA; + let Inst{26 - 22} = rB; + let Inst{21 - 6} = imm; +} + +//===----------------------------------------------------------------------===// +// Format R instruction : <|A|B|C|opx|imm|opcode|> +//===----------------------------------------------------------------------===// + +class FR opx, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<5> rA; + bits<5> rB; + bits<5> rC; + bits<5> imm = 0; + + // opcode is always 0x3a for R instr. + let Opcode = 0x3a; + + let Inst{31 - 27} = rA; + let Inst{26 - 22} = rB; + let Inst{21 - 17} = rC; + // opx stands for opcode extension + let Inst{16 - 11} = opx; + // optional 5-bit immediate value + let Inst{10 - 6} = imm; +} + +//===----------------------------------------------------------------------===// +// Format J instruction class in Nios2 : <|address|opcode|> +//===----------------------------------------------------------------------===// + +class FJ op, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<26> addr; + + let Opcode = op; + + let Inst{31 - 6} = addr; +} diff --git a/lib/Target/Nios2/Nios2InstrInfo.td b/lib/Target/Nios2/Nios2InstrInfo.td new file mode 100644 index 0000000000000000000000000000000000000000..5e4815ab3e16f7a7bfeb5b2fb4e20ef823c8e78c --- /dev/null +++ b/lib/Target/Nios2/Nios2InstrInfo.td @@ -0,0 +1,50 @@ +//===- Nios2InstrInfo.td - Target Description for Nios2 ------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Nios2 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "Nios2InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Nios2 Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +def simm16 : Operand { + let DecoderMethod= "DecodeSimm16"; +} + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi +def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// + +// Arithmetic and logical instructions with 2 register operands. +class ArithLogicI op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type, RegisterClass RC> : + FI { + let isReMaterializable = 1; +} + +//===----------------------------------------------------------------------===// +// Nios2 R1 Instructions +//===----------------------------------------------------------------------===// + +/// Arithmetic Instructions (ALU Immediate) +def ADDi : ArithLogicI<0x04, "addi", add, simm16, immSExt16, CPURegs>; diff --git a/lib/Target/Nios2/Nios2RegisterInfo.td b/lib/Target/Nios2/Nios2RegisterInfo.td new file mode 100644 index 0000000000000000000000000000000000000000..1808815816f3b3292f0f734dc980a7c1e74320e7 --- /dev/null +++ b/lib/Target/Nios2/Nios2RegisterInfo.td @@ -0,0 +1,60 @@ +//===-- Nios2RegisterInfo.td - Nios2 Register defs ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// We have bank of 32 registers. +class Nios2Reg : Register { + field bits<5> Num; + let Namespace = "Nios2"; +} + +// Nios2 CPU Registers +class Nios2GPRReg num, string n> : Nios2Reg { + let Num = num; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +let Namespace = "Nios2" in { + // General Purpose Registers + def ZERO : Nios2GPRReg<0, "zero">, DwarfRegNum<[ 0 ]>; + def AT : Nios2GPRReg<1, "at">, DwarfRegNum<[ 1 ]>; + foreach RegNum = 2 - 23 in { + def R #RegNum : Nios2GPRReg, DwarfRegNum<[ RegNum ]>; + } + def ET : Nios2GPRReg<24, "et">, DwarfRegNum<[ 24 ]>; + def BT : Nios2GPRReg<25, "bt">, DwarfRegNum<[ 25 ]>; + def GP : Nios2GPRReg<26, "gp">, DwarfRegNum<[ 26 ]>; + def SP : Nios2GPRReg<27, "sp">, DwarfRegNum<[ 27 ]>; + def FP : Nios2GPRReg<28, "fp">, DwarfRegNum<[ 28 ]>; + def EA : Nios2GPRReg<29, "ea">, DwarfRegNum<[ 29 ]>; + def BA : Nios2GPRReg<30, "ba">, DwarfRegNum<[ 30 ]>; + def RA : Nios2GPRReg<31, "ra">, DwarfRegNum<[ 31 ]>; + def PC : Nios2Reg<"pc">, DwarfRegNum<[ 32 ]>; +} + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def CPURegs : RegisterClass<"Nios2", [ i32 ], 32, + (add + // Reserved + ZERO, + AT, + // Return Values and Arguments + (sequence "R%u", 2, 7), + // Not preserved across procedure calls + // Caller saved + (sequence "R%u", 8, 15), + // Callee saved + (sequence "R%u", 16, 23), + // Reserved + ET, BT, GP, SP, FP, EA, BA, RA, PC)>; diff --git a/lib/Target/Nios2/Nios2TargetMachine.cpp b/lib/Target/Nios2/Nios2TargetMachine.cpp new file mode 100644 index 0000000000000000000000000000000000000000..16d4eabcfaf71d0409675c8d68ca1a4092f87b1e --- /dev/null +++ b/lib/Target/Nios2/Nios2TargetMachine.cpp @@ -0,0 +1,46 @@ +//===-- Nios2TargetMachine.cpp - Define TargetMachine for Nios2 -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Nios2 target spec. +// +//===----------------------------------------------------------------------===// + +#include "Nios2TargetMachine.h" +#include "Nios2.h" + +using namespace llvm; + +#define DEBUG_TYPE "nios2" + +extern "C" void LLVMInitializeNios2Target() { + // Register the target. +} + +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + return "e-p:32:32:32-i8:8:32-i16:16:32-n32"; +} + +static Reloc::Model getEffectiveRelocModel(CodeModel::Model CM, + Optional RM) { + if (!RM.hasValue() || CM == CodeModel::JITDefault) + return Reloc::Static; + return *RM; +} + +Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, + Options, getEffectiveRelocModel(CM, RM), CM, OL) {} + +Nios2TargetMachine::~Nios2TargetMachine() {} diff --git a/lib/Target/Nios2/Nios2TargetMachine.h b/lib/Target/Nios2/Nios2TargetMachine.h new file mode 100644 index 0000000000000000000000000000000000000000..7f145c82f32cea1faeb942f3adedfe05a6bd6e57 --- /dev/null +++ b/lib/Target/Nios2/Nios2TargetMachine.h @@ -0,0 +1,30 @@ +//===-- Nios2TargetMachine.h - Define TargetMachine for Nios2 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Nios2 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Nios2TargetMachine : public LLVMTargetMachine { +public: + Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~Nios2TargetMachine() override; +}; +} // namespace llvm + +#endif diff --git a/lib/Target/Nios2/TargetInfo/CMakeLists.txt b/lib/Target/Nios2/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..394d2c2680b76a0403ac6741774fe1f49fc04fe6 --- /dev/null +++ b/lib/Target/Nios2/TargetInfo/CMakeLists.txt @@ -0,0 +1 @@ +add_llvm_library(LLVMNios2Info Nios2TargetInfo.cpp) diff --git a/lib/Target/Nios2/TargetInfo/LLVMBuild.txt b/lib/Target/Nios2/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..558f7501ea6b6e7195152b63d958259c8e8957a0 --- /dev/null +++ b/lib/Target/Nios2/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Nios2/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Nios2Info +parent = Nios2 +required_libraries = Support +add_to_library_groups = Nios2 diff --git a/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e317686140f79b5ece8a608b1bb1fe1fafbeffe2 --- /dev/null +++ b/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp @@ -0,0 +1,24 @@ +//===-- Nios2TargetInfo.cpp - Nios2 Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Nios2.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target &llvm::getTheNios2Target() { + static Target TheNios2Target; + return TheNios2Target; +} + +extern "C" void LLVMInitializeNios2TargetInfo() { + RegisterTarget + X(getTheNios2Target(), "nios2", "Nios2"); +} diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 12ffbfdeacc12cd8a2319ca629933daafb141be7..11d22377611bf4a556aa33d928b27b8129d33a4c 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -204,6 +204,17 @@ static const unsigned G8Regs[] = { PPC::X28, PPC::X29, PPC::X30, PPC::X31 }; +static const unsigned G80Regs[] = { + PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3, + PPC::X4, PPC::X5, PPC::X6, PPC::X7, + PPC::X8, PPC::X9, PPC::X10, PPC::X11, + PPC::X12, PPC::X13, PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31 +}; + static const unsigned QFRegs[] = { PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, @@ -301,6 +312,12 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, G8Regs); } +static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, G80Regs); +} + #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 609d959c6d08ffafadf84c91d6360662da12ffb1..baf5902ddf584463a022550c725af3cdd62719f8 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "PPCInstPrinter.h" -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPCInstrInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -95,7 +95,8 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } - if (MI->getOpcode() == PPC::RLDICR) { + if (MI->getOpcode() == PPC::RLDICR || + MI->getOpcode() == PPC::RLDICR_32) { unsigned char SH = MI->getOperand(2).getImm(); unsigned char ME = MI->getOperand(3).getImm(); // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 4863ac54273666555e2ccc13a90dfc192a1efffa..028c2cb562f8e6361c83647e24a207a1e989b3f5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,8 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -18,9 +20,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index fd279c60f3f59ab7cc56a2a2efe63f7287d9d03b..1488bd5b0be618a01afb66cf8aabf23cbf3a546e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index d8fab5b7c01a2f7d78c8ae7b32a9188f6847b32d..d30bf1a56e8aa34a29e6c0003fed0b60f533b84e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -20,7 +20,7 @@ void PPCMCAsmInfoDarwin::anchor() { } PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { if (is64Bit) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } IsLittleEndian = false; @@ -50,7 +50,7 @@ PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) { NeedsLocalForSize = true; if (is64Bit) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } IsLittleEndian = T.getArch() == Triple::ppc64le; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 6b97d4c1456b63d2ce02f1606c2d5f2ad6ca14ce..54f664314578e826fc003cd91e0453cbf71c5938 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "PPCFixupKinds.h" #include "PPCMCExpr.h" +#include "PPCFixupKinds.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 2d686f2279194bd833404ec2fa47906d0f846d4d..e8f220ea545766caf33ae6aad9162878f84a4c36 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPCTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" @@ -30,11 +31,10 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index 1f38a8c947e73266fded438965a991903ba94f4a..6d591ca964a6a2454c9f2b092251fe9ddb172c2c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -18,7 +19,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; @@ -151,7 +151,7 @@ static void makeRelocationInfo(MachO::any_relocation_info &MRE, // The bitfield offsets that work (as determined by trial-and-error) // are different than what is documented in the mach-o manuals. // This appears to be an endianness issue; reversing the order of the - // documented bitfields in fixes this (but + // documented bitfields in fixes this (but // breaks x86/ARM assembly). MRE.r_word1 = ((Index << 8) | // was << 0 (IsPCRel << 7) | // was << 24 @@ -222,7 +222,7 @@ bool PPCMachObjectWriter::recordScatteredRelocation( report_fatal_error("symbol '" + B->getSymbol().getName() + "' can not be undefined in a subtraction expression"); - // FIXME: is Type correct? see include/llvm/Support/MachO.h + // FIXME: is Type correct? see include/llvm/BinaryFormat/MachO.h Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout); FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent()); } diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 38ae62b26757a5ddfe83e15c33e6a1676f74f483..07c9c1f9f84c0824d4e1022c6c5d930b5239a921 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -24,7 +24,6 @@ namespace llvm { class PPCTargetMachine; class PassRegistry; class FunctionPass; - class ImmutablePass; class MachineInstr; class AsmPrinter; class MCInst; diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1f181d007f637aadb18c9e6c73e0aec5d799e2ff..841b8c51446417fd9d54f5392075c9f81bd0da64 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -16,11 +16,11 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" -#include "PPCInstrInfo.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCExpr.h" #include "MCTargetDesc/PPCMCTargetDesc.h" +#include "PPC.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" @@ -29,6 +29,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -55,11 +57,9 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include #include diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp index 93c201d03869096e7605b4f98610d8c4c3abe3b4..55e105dad0e5b4736a1c7d894708d285c78ca66d 100644 --- a/lib/Target/PowerPC/PPCBoolRetToInt.cpp +++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp @@ -7,15 +7,15 @@ // //===----------------------------------------------------------------------===// // -// This file implements converting i1 values to i32 if they could be more +// This file implements converting i1 values to i32/i64 if they could be more // profitably allocated as GPRs rather than CRs. This pass will become totally // unnecessary if Register Bank Allocation and Global Instruction Selection ever // go upstream. // -// Presently, the pass converts i1 Constants, and Arguments to i32 if the +// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the // transitive closure of their uses includes only PHINodes, CallInsts, and // ReturnInsts. The rational is that arguments are generally passed and returned -// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will +// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will // actually save casts at the Machine Instruction level. // // It might be useful to expand this pass to add bit-wise operations to the list @@ -33,11 +33,12 @@ //===----------------------------------------------------------------------===// #include "PPC.h" +#include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -50,8 +51,9 @@ #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" #include "llvm/Pass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/Casting.h" #include using namespace llvm; @@ -87,17 +89,19 @@ class PPCBoolRetToInt : public FunctionPass { return Defs; } - // Translate a i1 value to an equivalent i32 value: - static Value *translate(Value *V) { - Type *Int32Ty = Type::getInt32Ty(V->getContext()); + // Translate a i1 value to an equivalent i32/i64 value: + Value *translate(Value *V) { + Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext()) + : Type::getInt32Ty(V->getContext()); + if (auto *C = dyn_cast(V)) - return ConstantExpr::getZExt(C, Int32Ty); + return ConstantExpr::getZExt(C, IntTy); if (auto *P = dyn_cast(V)) { // Temporarily set the operands to 0. We'll fix this later in // runOnUse. - Value *Zero = Constant::getNullValue(Int32Ty); + Value *Zero = Constant::getNullValue(IntTy); PHINode *Q = - PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P); for (unsigned i = 0; i < P->getNumOperands(); ++i) Q->addIncoming(Zero, P->getIncomingBlock(i)); return Q; @@ -109,7 +113,7 @@ class PPCBoolRetToInt : public FunctionPass { auto InstPt = A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); - return new ZExtInst(V, Int32Ty, "", InstPt); + return new ZExtInst(V, IntTy, "", InstPt); } typedef SmallPtrSet PHINodeSet; @@ -185,6 +189,13 @@ class PPCBoolRetToInt : public FunctionPass { if (skipFunction(F)) return false; + auto *TPC = getAnalysisIfAvailable(); + if (!TPC) + return false; + + auto &TM = TPC->getTM(); + ST = TM.getSubtargetImpl(F); + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); B2IMap Bool2IntMap; bool Changed = false; @@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass { return Changed; } - static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, B2IMap &BoolToIntMap) { auto Defs = findAllDefs(U); @@ -262,13 +273,16 @@ class PPCBoolRetToInt : public FunctionPass { AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } + +private: + const PPCSubtarget *ST; }; } // end anonymous namespace char PPCBoolRetToInt::ID = 0; INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", - "Convert i1 constants to i32 if they are returned", + "Convert i1 constants to i32/i64 if they are returned", false, false) FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index b7d3154d000008e90ce02be0076ca46f70812236..d0b66f9bca09ab83cb88ef617f261bc6ae864d3b 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCSubtarget.h" diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 70c4170653aef0e28bc554dbbfb4504eb9a741f3..24bc027f8106331941f8c4cdf9887f0d81c9cdd1 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -23,7 +23,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "PPC.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" @@ -43,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp index 6bd229625fc3a95797f1d81b66d049bf3c14492e..811e4dd9dfe168019776205995e20e130e172e45 100644 --- a/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp index ebd414baf1d2160de00485aeea0480f1c5c5f384..41e3190c3eec7e618663e37557a5c9cfa54b6d7b 100644 --- a/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -339,7 +339,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, // Note: Cannot use stepBackward instead since we are using the Reg // liveness state at the end of MBB (liveOut of MBB) as the liveIn for // NewSuccessor. Otherwise, will cause cyclic dependence. - LivePhysRegs LPR(MF->getSubtarget().getRegisterInfo()); + LivePhysRegs LPR(*MF->getSubtarget().getRegisterInfo()); SmallVector, 2> Clobbers; for (MachineInstr &MI : *MBB) LPR.stepForward(MI, Clobbers); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 9b91b9ab8f8275753af4accc7b0a0b4211f301b1..bc9957194f6dd00077db0c9b2e5721da1ca95acc 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" -#include "PPCCallingConv.h" +#include "PPC.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCSubtarget.h" @@ -1330,7 +1330,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl &Args, // Issue CALLSEQ_START. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameSetupOpcode())) - .addImm(NumBytes); + .addImm(NumBytes).addImm(0); // Prepare to assign register arguments. Every argument uses up a // GPR protocol register even if it's passed in a floating-point @@ -2246,6 +2246,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, } case PPC::EXTSW: + case PPC::EXTSW_32: case PPC::EXTSW_32_64: { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) return false; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 4c9430a2eca07cf571ebd539e9561108e9e054c5..57a1d373c88cf27d192ba076aa4821d9928ef9b6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1459,8 +1459,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } if (FI->usesPICBase()) - BuildMI(MBB, MBBI, dl, LoadInst) - .addReg(PPC::R30) + BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) .addImm(PBPOffset) .addReg(RBReg); @@ -1766,31 +1765,36 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, // Check whether the frame pointer register is allocated. If so, make sure it // is spilled to the correct offset. if (needsFP(MF)) { - HasGPSaveArea = true; - int FI = PFI->getFramePointerSaveIndex(); assert(FI && "No Frame Pointer Save Slot!"); - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + // FP is R31/X31, so no need to update MinGPR/MinG8R. + HasGPSaveArea = true; } if (PFI->usesPICBase()) { - HasGPSaveArea = true; - int FI = PFI->getPICBasePointerSaveIndex(); assert(FI && "No PIC Base Pointer Save Slot!"); - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + + MinGPR = std::min(MinGPR, PPC::R30); + HasGPSaveArea = true; } const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); if (RegInfo->hasBasePointer(MF)) { - HasGPSaveArea = true; - int FI = PFI->getBasePointerSaveIndex(); assert(FI && "No Base Pointer Save Slot!"); - MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); + + unsigned BP = RegInfo->getBaseRegister(MF); + if (PPC::G8RCRegClass.contains(BP)) { + MinG8R = std::min(MinG8R, BP); + HasG8SaveArea = true; + } else if (PPC::GPRCRegClass.contains(BP)) { + MinGPR = std::min(MinGPR, BP); + HasGPSaveArea = true; + } } // General register save area starts right below the Floating-point @@ -1898,12 +1902,13 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + const TargetRegisterClass &GPRC = PPC::GPRCRegClass; + const TargetRegisterClass &G8RC = PPC::G8RCRegClass; + const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; + const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); // Might we have over-aligned allocas? bool HasAlVars = MFI.hasVarSizedObjects() && @@ -1911,9 +1916,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, // These kinds of spills might need two registers. if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 9c72638023bb30115de6f9d30c205e1de2f6140f..afd2e87078a9635542b56e1a95355984e7330d89 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -21,9 +21,10 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -49,6 +50,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -67,6 +69,19 @@ using namespace llvm; #define DEBUG_TYPE "ppc-codegen" +STATISTIC(NumSextSetcc, + "Number of (sext(setcc)) nodes expanded into GPR sequence."); +STATISTIC(NumZextSetcc, + "Number of (zext(setcc)) nodes expanded into GPR sequence."); +STATISTIC(SignExtensionsAdded, + "Number of sign extensions for compare inputs added."); +STATISTIC(ZeroExtensionsAdded, + "Number of zero extensions for compare inputs added."); +STATISTIC(NumLogicOpsOnComparison, + "Number of logical ops on i1 values calculated in GPR."); +STATISTIC(OmittedForNonExtendUses, + "Number of compares not eliminated as they have non-extending uses."); + // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -251,7 +266,34 @@ namespace { #include "PPCGenDAGISel.inc" private: + // Conversion type for interpreting results of a 32-bit instruction as + // a 64-bit value or vice versa. + enum ExtOrTruncConversion { Ext, Trunc }; + + // Modifiers to guide how an ISD::SETCC node's result is to be computed + // in a GPR. + // ZExtOrig - use the original condition code, zero-extend value + // ZExtInvert - invert the condition code, zero-extend value + // SExtOrig - use the original condition code, sign-extend value + // SExtInvert - invert the condition code, sign-extend value + enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; + bool trySETCC(SDNode *N); + bool tryEXTEND(SDNode *N); + bool tryLogicOpOfCompares(SDNode *N); + SDValue computeLogicOpInGPR(SDValue LogicOp); + SDValue signExtendInputIfNeeded(SDValue Input); + SDValue zeroExtendInputIfNeeded(SDValue Input); + SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); + SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, + int64_t RHSValue, SDLoc dl); + SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); void PeepholePPC64(); void PeepholePPC64ZExt(); @@ -542,12 +584,12 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { SDValue Op1 = N->getOperand(1); SDLoc dl(N); - APInt LKZ, LKO, RKZ, RKO; - CurDAG->computeKnownBits(Op0, LKZ, LKO); - CurDAG->computeKnownBits(Op1, RKZ, RKO); + KnownBits LKnown, RKnown; + CurDAG->computeKnownBits(Op0, LKnown); + CurDAG->computeKnownBits(Op1, RKnown); - unsigned TargetMask = LKZ.getZExtValue(); - unsigned InsertMask = RKZ.getZExtValue(); + unsigned TargetMask = LKnown.Zero.getZExtValue(); + unsigned InsertMask = RKnown.Zero.getZExtValue(); if ((TargetMask | InsertMask) == 0xFFFFFFFF) { unsigned Op0Opc = Op0.getOpcode(); @@ -590,9 +632,9 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { // The AND mask might not be a constant, and we need to make sure that // if we're going to fold the masking with the insert, all bits not // know to be zero in the mask are known to be one. - APInt MKZ, MKO; - CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO); - bool CanFoldMask = InsertMask == MKO.getZExtValue(); + KnownBits MKnown; + CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); + bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); unsigned SHOpc = Op1.getOperand(0).getOpcode(); if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && @@ -2470,6 +2512,506 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) { return true; } +// Is this opcode a bitwise logical operation? +static bool isLogicOp(unsigned Opc) { + return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; +} + +/// If this node is a sign/zero extension of an integer comparison, +/// it can usually be computed in GPR's rather than using comparison +/// instructions and ISEL. We only do this on 64-bit targets for now +/// as the code is specialized for 64-bit (it uses 64-bit instructions +/// and assumes 64-bit registers). +bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + assert((N->getOpcode() == ISD::ZERO_EXTEND || + N->getOpcode() == ISD::SIGN_EXTEND) && + "Expecting a zero/sign extend node!"); + + SDValue WideRes; + // If we are zero-extending the result of a logical operation on i1 + // values, we can keep the values in GPRs. + if (isLogicOp(N->getOperand(0).getOpcode()) && + N->getOperand(0).getValueType() == MVT::i1 && + N->getOpcode() == ISD::ZERO_EXTEND) + WideRes = computeLogicOpInGPR(N->getOperand(0)); + else if (N->getOperand(0).getOpcode() != ISD::SETCC) + return false; + else + WideRes = + getSETCCInGPR(N->getOperand(0), + N->getOpcode() == ISD::SIGN_EXTEND ? + SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); + + if (!WideRes) + return false; + + SDLoc dl(N); + bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32; + bool Output32Bit = N->getValueType(0) == MVT::i32; + + NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; + NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; + + SDValue ConvOp = WideRes; + if (Inputs32Bit != Output32Bit) + ConvOp = addExtOrTrunc(WideRes, Inputs32Bit ? ExtOrTruncConversion::Ext : + ExtOrTruncConversion::Trunc); + ReplaceNode(N, ConvOp.getNode()); + + return true; +} + +// Lower a logical operation on i1 values into a GPR sequence if possible. +// The result can be kept in a GPR if requested. +// Three types of inputs can be handled: +// - SETCC +// - TRUNCATE +// - Logical operation (AND/OR/XOR) +// There is also a special case that is handled (namely a complement operation +// achieved with xor %a, -1). +SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) { + assert(isLogicOp(LogicOp.getOpcode()) && + "Can only handle logic operations here."); + assert(LogicOp.getValueType() == MVT::i1 && + "Can only handle logic operations on i1 values here."); + SDLoc dl(LogicOp); + SDValue LHS, RHS; + + // Special case: xor %a, -1 + bool IsBitwiseNegation = isBitwiseNot(LogicOp); + + // Produces a GPR sequence for each operand of the binary logic operation. + // For SETCC, it produces the respective comparison, for TRUNCATE it truncates + // the value in a GPR and for logic operations, it will recursively produce + // a GPR sequence for the operation. + auto getLogicOperand = [&] (SDValue Operand) -> SDValue { + unsigned OperandOpcode = Operand.getOpcode(); + if (OperandOpcode == ISD::SETCC) + return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); + else if (OperandOpcode == ISD::TRUNCATE) { + SDValue InputOp = Operand.getOperand(0); + EVT InVT = InputOp.getValueType(); + return + SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : + PPC::RLDICL, dl, InVT, InputOp, + getI64Imm(0, dl), getI64Imm(63, dl)), 0); + } else if (isLogicOp(OperandOpcode)) + return computeLogicOpInGPR(Operand); + return SDValue(); + }; + LHS = getLogicOperand(LogicOp.getOperand(0)); + RHS = getLogicOperand(LogicOp.getOperand(1)); + + // If a GPR sequence can't be produced for the LHS we can't proceed. + // Not producing a GPR sequence for the RHS is only a problem if this isn't + // a bitwise negation operation. + if (!LHS || (!RHS && !IsBitwiseNegation)) + return SDValue(); + + NumLogicOpsOnComparison++; + + // We will use the inputs as 64-bit values. + if (LHS.getValueType() == MVT::i32) + LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); + if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) + RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); + + unsigned NewOpc; + switch (LogicOp.getOpcode()) { + default: llvm_unreachable("Unknown logic operation."); + case ISD::AND: NewOpc = PPC::AND8; break; + case ISD::OR: NewOpc = PPC::OR8; break; + case ISD::XOR: NewOpc = PPC::XOR8; break; + } + + if (IsBitwiseNegation) { + RHS = getI64Imm(1, dl); + NewOpc = PPC::XORI8; + } + + return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); + +} + +/// Try performing logical operations on results of comparisons in GPRs. +/// It is typically preferred from a performance perspective over performing +/// the operations on individual bits in the CR. We only do this on 64-bit +/// targets for now as the code is specialized for 64-bit (it uses 64-bit +/// instructions and assumes 64-bit registers). +bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { + if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) + return false; + if (N->getValueType(0) != MVT::i1) + return false; + assert(isLogicOp(N->getOpcode()) && + "Expected a logic operation on setcc results."); + SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); + if (!LoweredLogical) + return false; + + SDLoc dl(N); + bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; + unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; + SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); + SDValue LHS = LoweredLogical.getOperand(0); + SDValue RHS = LoweredLogical.getOperand(1); + SDValue WideOp; + SDValue OpToConvToRecForm; + + // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode + // that is input to the XORI. + if (IsBitwiseNegate && + LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) + OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); + else if (IsBitwiseNegate) + // If the input to the XORI isn't an extension, that's what we're after. + OpToConvToRecForm = LoweredLogical.getOperand(0); + else + // If this is not an XORI, it is a reg-reg logical op and we can convert it + // to record-form. + OpToConvToRecForm = LoweredLogical; + + // Get the record-form version of the node we're looking to use to get the + // CR result from. + uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); + int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); + + // Convert the right node to record-form. This is either the logical we're + // looking at or it is the input node to the negation (if we're looking at + // a bitwise negation). + if (NewOpc != -1 && IsBitwiseNegate) { + // The input to the XORI has a record-form. Use it. + assert(LoweredLogical.getConstantOperandVal(1) == 1 && + "Expected a PPC::XORI8 only for bitwise negation."); + // Emit the record-form instruction. + std::vector Ops; + for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) + Ops.push_back(OpToConvToRecForm.getOperand(i)); + + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc, dl, + OpToConvToRecForm.getValueType(), + MVT::Glue, Ops), 0); + } else { + assert((NewOpc != -1 || !IsBitwiseNegate) && + "No record form available for AND8/OR8/XOR8?"); + WideOp = + SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, + MVT::i64, MVT::Glue, LHS, RHS), 0); + } + + // Select this node to a single bit from CR0 set by the record-form node + // just created. For bitwise negation, use the EQ bit which is the equivalent + // of negating the result (i.e. it is a bit set when the result of the + // operation is zero). + SDValue SRIdxVal = + CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); + SDValue CRBit = + SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, + MVT::i1, CR0Reg, SRIdxVal, + WideOp.getValue(1)), 0); + ReplaceNode(N, CRBit.getNode()); + return true; +} + +/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. +/// Useful when emitting comparison code for 32-bit values without using +/// the compare instruction (which only considers the lower 32-bits). +SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) { + assert(Input.getValueType() == MVT::i32 && + "Can only sign-extend 32-bit values here."); + unsigned Opc = Input.getOpcode(); + + // The value was sign extended and then truncated to 32-bits. No need to + // sign extend it again. + if (Opc == ISD::TRUNCATE && + (Input.getOperand(0).getOpcode() == ISD::AssertSext || + Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) + return Input; + + LoadSDNode *InputLoad = dyn_cast(Input); + // The input is a sign-extending load. No reason to sign-extend. + if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) + return Input; + + ConstantSDNode *InputConst = dyn_cast(Input); + // We don't sign-extend constants and already sign-extended values. + if (InputConst || Opc == ISD::AssertSext || Opc == ISD::SIGN_EXTEND_INREG || + Opc == ISD::SIGN_EXTEND) + return Input; + + SDLoc dl(Input); + SignExtensionsAdded++; + return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32, dl, MVT::i32, Input), 0); +} + +/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. +/// Useful when emitting comparison code for 32-bit values without using +/// the compare instruction (which only considers the lower 32-bits). +SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) { + assert(Input.getValueType() == MVT::i32 && + "Can only zero-extend 32-bit values here."); + LoadSDNode *InputLoad = dyn_cast(Input); + unsigned Opc = Input.getOpcode(); + + // No need to zero-extend loaded values (unless they're loaded with + // a sign-extending load). + if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) + return Input; + + ConstantSDNode *InputConst = dyn_cast(Input); + bool InputZExtConst = InputConst && InputConst->getSExtValue() >= 0; + // An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have + // to conservatively actually clear the high bits. We also don't need to + // zero-extend constants or values that are already zero-extended. + if (InputZExtConst || Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND) + return Input; + + SDLoc dl(Input); + ZeroExtensionsAdded++; + return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Input, + getI64Imm(0, dl), getI64Imm(32, dl)), + 0); +} + +// Handle a 32-bit value in a 64-bit register and vice-versa. These are of +// course not actual zero/sign extensions that will generate machine code, +// they're just a way to reinterpret a 32 bit value in a register as a +// 64 bit value and vice-versa. +SDValue PPCDAGToDAGISel::addExtOrTrunc(SDValue NatWidthRes, + ExtOrTruncConversion Conv) { + SDLoc dl(NatWidthRes); + + // For reinterpreting 32-bit values as 64 bit values, we generate + // INSERT_SUBREG IMPLICIT_DEF:i64, , TargetConstant:i32<1> + if (Conv == ExtOrTruncConversion::Ext) { + SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); + SDValue SubRegIdx = + CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, + ImDef, NatWidthRes, SubRegIdx), 0); + } + + assert(Conv == ExtOrTruncConversion::Trunc && + "Unknown convertion between 32 and 64 bit values."); + // For reinterpreting 64-bit values as 32-bit values, we just need to + // EXTRACT_SUBREG (i.e. extract the low word). + SDValue SubRegIdx = + CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, + NatWidthRes, SubRegIdx), 0); +} + +/// Produces a zero-extended result of comparing two 32-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) + // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), + getI32Imm(31, dl) }; + return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, + ShiftOps), 0); + } + case ISD::SETNE: { + // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) + // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), + getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + } + } +} + +/// Produces a sign-extended result of comparing two 32-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (sext (setcc %a, %b, seteq)) -> + // (ashr (shl (ctlz (xor %a, %b)), 58), 63) + // (sext (setcc %a, 0, seteq)) -> + // (ashr (shl (ctlz %a), 58), 63) + SDValue CountInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Cntlzw = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); + SDValue SHLOps[] = { Cntlzw, getI32Imm(58, dl), getI32Imm(0, dl) }; + SDValue Sldi = + SDValue(CurDAG->getMachineNode(PPC::RLDICR_32, dl, MVT::i32, SHLOps), 0); + return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, + getI32Imm(63, dl)), 0); + } + case ISD::SETNE: { + // Bitwise xor the operands, count leading zeros, shift right by 5 bits and + // flip the bit, finally take 2's complement. + // (sext (setcc %a, %b, setne)) -> + // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) + // Same as above, but the first xor is not needed. + // (sext (setcc %a, 0, setne)) -> + // (neg (xor (lshr (ctlz %a), 5), 1)) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); + SDValue ShiftOps[] = + { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; + SDValue Shift = + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); + SDValue Xori = + SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, + getI32Imm(1, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); + } + } +} + +/// Produces a zero-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) + // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) + SDValue Xor = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Clz = + SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); + return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, + getI64Imm(58, dl), getI64Imm(63, dl)), + 0); + } + } +} + +/// Produces a sign-extended result of comparing two 64-bit values according to +/// the passed condition code. +SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, + ISD::CondCode CC, + int64_t RHSValue, SDLoc dl) { + bool IsRHSZero = RHSValue == 0; + switch (CC) { + default: return SDValue(); + case ISD::SETEQ: { + // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) + // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) + // {addcz.reg, addcz.CA} = (addcarry %a, -1) + // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) + SDValue AddInput = IsRHSZero ? LHS : + SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); + SDValue Addic = + SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, + AddInput, getI32Imm(~0U, dl)), 0); + return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, + Addic, Addic.getValue(1)), 0); + } + } +} + +/// Does this SDValue have any uses for which keeping the value in a GPR is +/// appropriate. This is meant to be used on values that have type i1 since +/// it is somewhat meaningless to ask if values of other types can be kept in +/// GPR's. +static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { + assert(Compare.getOpcode() == ISD::SETCC && + "An ISD::SETCC node required here."); + + // For values that have a single use, the caller should obviously already have + // checked if that use is an extending use. We check the other uses here. + if (Compare.hasOneUse()) + return true; + // We want the value in a GPR if it is being extended, used for a select, or + // used in logical operations. + for (auto CompareUse : Compare.getNode()->uses()) + if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && + CompareUse->getOpcode() != ISD::ZERO_EXTEND && + CompareUse->getOpcode() != ISD::SELECT && + !isLogicOp(CompareUse->getOpcode())) { + OmittedForNonExtendUses++; + return false; + } + return true; +} + +/// Returns an equivalent of a SETCC node but with the result the same width as +/// the inputs. This can nalso be used for SELECT_CC if either the true or false +/// values is a power of two while the other is zero. +SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, + SetccInGPROpts ConvOpts) { + assert((Compare.getOpcode() == ISD::SETCC || + Compare.getOpcode() == ISD::SELECT_CC) && + "An ISD::SETCC node required here."); + + // Don't convert this comparison to a GPR sequence because there are uses + // of the i1 result (i.e. uses that require the result in the CR). + if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) + return SDValue(); + + SDValue LHS = Compare.getOperand(0); + SDValue RHS = Compare.getOperand(1); + + // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. + int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; + ISD::CondCode CC = + cast(Compare.getOperand(CCOpNum))->get(); + EVT InputVT = LHS.getValueType(); + if (InputVT != MVT::i32 && InputVT != MVT::i64) + return SDValue(); + + if (ConvOpts == SetccInGPROpts::ZExtInvert || + ConvOpts == SetccInGPROpts::SExtInvert) + CC = ISD::getSetCCInverse(CC, true); + + bool Inputs32Bit = InputVT == MVT::i32; + if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { + LHS = signExtendInputIfNeeded(LHS); + RHS = signExtendInputIfNeeded(RHS); + } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { + LHS = zeroExtendInputIfNeeded(LHS); + RHS = zeroExtendInputIfNeeded(RHS); + } + + SDLoc dl(Compare); + ConstantSDNode *RHSConst = dyn_cast(RHS); + int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; + bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || + ConvOpts == SetccInGPROpts::SExtInvert; + + if (IsSext && Inputs32Bit) + return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (Inputs32Bit) + return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); + else if (IsSext) + return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); + return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); +} + void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); @@ -2507,6 +3049,12 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } break; + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + if (tryEXTEND(N)) + return; + break; + case ISD::SETCC: if (trySETCC(N)) return; @@ -2650,6 +3198,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) { } case ISD::AND: { + if (tryLogicOpOfCompares(N)) + return; + unsigned Imm, Imm2, SH, MB, ME; uint64_t Imm64; @@ -2769,15 +3320,18 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryBitfieldInsert(N)) return; + if (tryLogicOpOfCompares(N)) + return; + short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { - APInt LHSKnownZero, LHSKnownOne; - CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne); + KnownBits LHSKnown; + CurDAG->computeKnownBits(N->getOperand(0), LHSKnown); // If this is equivalent to an add, then we can fold it with the // FrameIndex calculation. - if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { + if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); return; } @@ -2786,6 +3340,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } + case ISD::XOR: { + if (tryLogicOpOfCompares(N)) + return; + break; + } case ISD::ADD: { short Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && @@ -2977,10 +3536,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) { SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; - SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, - N->getValueType(0), Ops); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); + SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, + N->getValueType(0), Ops); cast(NewN)->setMemRefs(MemOp, MemOp + 1); return; } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f7663d8e5185c3eeeb838c4eb68bd46b5404f55e..72f14e9691382dadcddebecd271ca4fdd35099a0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" -#include "PPCCallingConv.h" #include "PPCCCState.h" +#include "PPCCallingConv.h" #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" -#include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" @@ -28,11 +28,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -52,8 +52,8 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -61,9 +61,9 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" @@ -79,6 +79,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -135,7 +136,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } - // PowerPC has an i16 but no i8 (or i1) SEXTLOAD + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); @@ -174,7 +175,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } - // PowerPC does not support direct load / store of condition registers + // PowerPC does not support direct load/store of condition registers. setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); @@ -203,11 +204,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); - // PowerPC has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // PowerPC has no SREM/UREM instructions unless we are on P9 + // On P9 we may use a hardware instruction to compute the remainder. + // The instructions are not legalized directly because in the cases where the + // result of both the remainder and the division is required it is more + // efficient to compute the remainder from the result of the division rather + // than use the remainder instruction. + if (Subtarget.isISA3_0()) { + setOperationAction(ISD::SREM, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Custom); + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + } else { + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + } // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); @@ -409,6 +422,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + // Comparisons that require checking two conditions. setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); @@ -533,7 +551,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); @@ -683,6 +700,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::SRA, MVT::v2i64, Legal); setOperationAction(ISD::SRL, MVT::v2i64, Legal); + // 128 bit shifts can be accomplished via 3 instructions for SHL and + // SRL, but not for SRA because of the instructions available: + // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth + // doing + setOperationAction(ISD::SHL, MVT::v1i128, Expand); + setOperationAction(ISD::SRL, MVT::v1i128, Expand); + setOperationAction(ISD::SRA, MVT::v1i128, Expand); + setOperationAction(ISD::SETCC, MVT::v2i64, Legal); } else { @@ -736,6 +761,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, if (Subtarget.hasP9Vector()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + + // 128 bit shifts can be accomplished via 3 instructions for SHL and + // SRL, but not for SRA because of the instructions available: + // VS{RL} and VS{RL}O. + setOperationAction(ISD::SHL, MVT::v1i128, Legal); + setOperationAction(ISD::SRL, MVT::v1i128, Legal); + setOperationAction(ISD::SRA, MVT::v1i128, Expand); } } @@ -777,7 +809,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FABS , MVT::v4f64, Legal); setOperationAction(ISD::FSIN , MVT::v4f64, Expand); setOperationAction(ISD::FCOS , MVT::v4f64, Expand); - setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); setOperationAction(ISD::FPOW , MVT::v4f64, Expand); setOperationAction(ISD::FLOG , MVT::v4f64, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); @@ -823,7 +854,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FABS , MVT::v4f32, Legal); setOperationAction(ISD::FSIN , MVT::v4f32, Expand); setOperationAction(ISD::FCOS , MVT::v4f32, Expand); - setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); setOperationAction(ISD::FPOW , MVT::v4f32, Expand); setOperationAction(ISD::FLOG , MVT::v4f32, Expand); setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); @@ -922,6 +952,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) @@ -1020,6 +1053,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; + MaxLoadsPerMemcmp = 128; + } else { + MaxLoadsPerMemcmp = 8; + MaxLoadsPerMemcmpOptSize = 4; } } @@ -1091,6 +1128,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; + case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; @@ -1572,20 +1611,47 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, - unsigned &InsertAtByte, bool &Swap, bool IsLE) { - // Check that the mask is shuffling words - for (unsigned i = 0; i < 4; ++i) { - unsigned B0 = N->getMaskElt(i*4); - unsigned B1 = N->getMaskElt(i*4+1); - unsigned B2 = N->getMaskElt(i*4+2); - unsigned B3 = N->getMaskElt(i*4+3); - if (B0 % 4) +/// Check that the mask is shuffling N byte elements. Within each N byte +/// element of the mask, the indices could be either in increasing or +/// decreasing order as long as they are consecutive. +/// \param[in] N the shuffle vector SD Node to analyze +/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ +/// Word/DoubleWord/QuadWord). +/// \param[in] StepLen the delta indices number among the N byte element, if +/// the mask is in increasing/decreasing order then it is 1/-1. +/// \return true iff the mask is shuffling N byte elements. +static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, + int StepLen) { + assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && + "Unexpected element width."); + assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); + + unsigned NumOfElem = 16 / Width; + unsigned MaskVal[16]; // Width is never greater than 16 + for (unsigned i = 0; i < NumOfElem; ++i) { + MaskVal[0] = N->getMaskElt(i * Width); + if ((StepLen == 1) && (MaskVal[0] % Width)) { return false; - if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) + } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { return false; + } + + for (unsigned int j = 1; j < Width; ++j) { + MaskVal[j] = N->getMaskElt(i * Width + j); + if (MaskVal[j] != MaskVal[j-1] + StepLen) { + return false; + } + } } + return true; +} + +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + if (!isNByteElemShuffleMask(N, 4, 1)) + return false; + // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; @@ -1656,6 +1722,158 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } +bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + // Ensure each byte index of the word is consecutive. + if (!isNByteElemShuffleMask(N, 4, 1)) + return false; + + // Now we look at mask elements 0,4,8,12, which are the beginning of words. + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + assert(M0 < 4 && "Indexing into an undef vector?"); + if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) + return false; + + ShiftElts = IsLE ? (4 - M0) % 4 : M0; + Swap = false; + return true; + } + + // Ensure each word index of the ShuffleVector Mask is consecutive. + if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) + return false; + + if (IsLE) { + if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 3 left elements of the second vector + // (or if there is no shift to be done at all). + Swap = false; + ShiftElts = (8 - M0) % 8; + } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 3 left elements of the first vector + // (or if we're shifting by 4 - thereby simply swapping the vectors). + Swap = true; + ShiftElts = (4 - M0) % 4; + } + + return true; + } else { // BE + if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 4 elements of the first vector. + Swap = false; + ShiftElts = M0; + } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 4 elements of the right vector. + Swap = true; + ShiftElts = M0 - 4; + } + + return true; + } +} + +bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + if (!isNByteElemShuffleMask(N, Width, -1)) + return false; + + for (int i = 0; i < 16; i += Width) + if (N->getMaskElt(i) != i + Width - 1) + return false; + + return true; +} + +bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 2); +} + +bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 4); +} + +bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 8); +} + +bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 16); +} + +/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap +/// if the inputs to the instruction should be swapped and set \p DM to the +/// value for the immediate. +/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI +/// AND element 0 of the result comes from the first input (LE) or second input +/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. +/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle +/// mask. +bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + // Ensure each byte index of the double word is consecutive. + if (!isNByteElemShuffleMask(N, 8, 1)) + return false; + + unsigned M0 = N->getMaskElt(0) / 8; + unsigned M1 = N->getMaskElt(8) / 8; + assert(((M0 | M1) < 4) && "A mask element out of bounds?"); + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + if ((M0 | M1) < 2) { + DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); + Swap = false; + return true; + } else + return false; + } + + if (IsLE) { + if (M0 > 1 && M1 < 2) { + Swap = false; + } else if (M0 < 2 && M1 > 1) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + // Note: if control flow comes here that means Swap is already set above + DM = (((~M1) & 1) << 1) + ((~M0) & 1); + return true; + } else { // BE + if (M0 < 2 && M1 > 1) { + Swap = false; + } else if (M0 > 1 && M1 < 2) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + // Note: if control flow comes here that means Swap is already set above + DM = (M0 << 1) + (M1 & 1); + return true; + } +} + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -1847,17 +2065,14 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. - APInt LHSKnownZero, LHSKnownOne; - APInt RHSKnownZero, RHSKnownOne; - DAG.computeKnownBits(N.getOperand(0), - LHSKnownZero, LHSKnownOne); - - if (LHSKnownZero.getBoolValue()) { - DAG.computeKnownBits(N.getOperand(1), - RHSKnownZero, RHSKnownOne); + KnownBits LHSKnown, RHSKnown; + DAG.computeKnownBits(N.getOperand(0), LHSKnown); + + if (LHSKnown.Zero.getBoolValue()) { + DAG.computeKnownBits(N.getOperand(1), RHSKnown); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. - if (~(LHSKnownZero | RHSKnownZero) == 0) { + if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -1953,10 +2168,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. - APInt LHSKnownZero, LHSKnownOne; - DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); + KnownBits LHSKnown; + DAG.computeKnownBits(N.getOperand(0), LHSKnown); - if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { + if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (FrameIndexSDNode *FI = @@ -2345,8 +2560,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_LO); - SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); + SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) + : DAG.getRegister(PPC::R2, MVT::i32); + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } @@ -4951,8 +5167,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), - dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be moved somewhere else @@ -5002,9 +5217,8 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. - SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1), - SDLoc(MemcpyCall)); + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0, + SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; @@ -5085,9 +5299,9 @@ SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // The MEMCPY must go outside the CALLSEQ_START..END. - SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, - CallSeqStart.getNode()->getOperand(1), - SDLoc(MemcpyCall)); + int64_t FrameSize = CallSeqStart.getConstantOperandVal(1); + SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0, + SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); return NewCallSeqStart; @@ -5270,8 +5484,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(NumBytes, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else @@ -5830,8 +6043,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin( // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), - dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else @@ -6466,7 +6678,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETNE: std::swap(TV, FV); case ISD::SETEQ: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -6476,25 +6688,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: - Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags); + Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); @@ -6662,6 +6874,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, // Given the head of the old chain, ResChain, insert a token factor containing // it and NewResChain, and make users of ResChain now be users of that token // factor. +// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const { @@ -7662,6 +7875,53 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } + + if (Subtarget.hasVSX() && + PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); + + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); + } + + if (Subtarget.hasVSX() && + PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2); + + SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); + } + + if (Subtarget.hasP9Vector()) { + if (PPC::isXXBRHShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); + } else if (PPC::isXXBRWShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); + } else if (PPC::isXXBRDShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); + } else if (PPC::isXXBRQShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); + SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); + } + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); @@ -8118,9 +8378,9 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, if (IntrinsicID == Intrinsic::thread_pointer) { // Reads the thread pointer register, used for __builtin_thread_pointer. - bool is64bit = Subtarget.isPPC64(); - return DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, - is64bit ? MVT::i64 : MVT::i32); + if (Subtarget.isPPC64()) + return DAG.getRegister(PPC::X13, MVT::i64); + return DAG.getRegister(PPC::R2, MVT::i32); } // If this is a lowered altivec predicate compare, CompareOpc is set to the @@ -8187,6 +8447,40 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return Flags; } +SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to + // the beginning of the argument list. + int ArgStart = isa(Op.getOperand(0)) ? 0 : 1; + SDLoc DL(Op); + switch (cast(Op.getOperand(ArgStart))->getZExtValue()) { + case Intrinsic::ppc_cfence: { + assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); + assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); + return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, + Op.getOperand(ArgStart + 1)), + Op.getOperand(0)), + 0); + } + default: + break; + } + return SDValue(); +} + +SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { + // Check for a DIV with the same operands as this REM. + for (auto UI : Op.getOperand(1)->uses()) { + if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || + (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) + if (UI->getOperand(0) == Op.getOperand(0) && + UI->getOperand(1) == Op.getOperand(1)) + return SDValue(); + } + return Op; +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8652,6 +8946,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + + case ISD::INTRINSIC_VOID: + return LowerINTRINSIC_VOID(Op, DAG); + case ISD::SREM: + case ISD::UREM: + return LowerREM(Op, DAG); } } @@ -8743,9 +9043,9 @@ static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { // The mappings for emitLeading/TrailingFence is taken from // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html -Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { +Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { if (Ord == AtomicOrdering::SequentiallyConsistent) return callIntrinsic(Builder, Intrinsic::ppc_sync); if (isReleaseOrStronger(Ord)) @@ -8753,15 +9053,22 @@ Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, return nullptr; } -Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord, bool IsStore, - bool IsLoad) const { - if (IsLoad && isAcquireOrStronger(Ord)) +Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) { + // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and + // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html + // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. + if (isa(Inst) && Subtarget.isPPC64()) + return Builder.CreateCall( + Intrinsic::getDeclaration( + Builder.GetInsertBlock()->getParent()->getParent(), + Intrinsic::ppc_cfence, {Inst->getType()}), + {Inst}); + // FIXME: Can use isync for rmw operation. return callIntrinsic(Builder, Intrinsic::ppc_lwsync); - // FIXME: this is too conservative, a dependent branch + isync is enough. - // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and - // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html - // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. + } return nullptr; } @@ -9057,6 +9364,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -9070,7 +9378,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); @@ -9153,7 +9461,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); - const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); @@ -10318,17 +10625,16 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. - APInt Op1Zero, Op1One; - APInt Op2Zero, Op2One; - DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); - DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); + KnownBits Op1Known, Op2Known; + DAG.computeKnownBits(N->getOperand(0), Op1Known); + DAG.computeKnownBits(N->getOperand(1), Op2Known); // We don't really care about what is known about the first bit (if // anything), so clear it in all masks prior to comparing them. - Op1Zero.clearBit(0); Op1One.clearBit(0); - Op2Zero.clearBit(0); Op2One.clearBit(0); + Op1Known.Zero.clearBit(0); Op1Known.One.clearBit(0); + Op2Known.Zero.clearBit(0); Op2Known.One.clearBit(0); - if (Op1Zero != Op2Zero || Op1One != Op2One) + if (Op1Known.Zero != Op2Known.Zero || Op1Known.One != Op2Known.One) return SDValue(); } } @@ -11216,6 +11522,14 @@ SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, } MVT VecTy = N->getValueType(0).getSimpleVT(); + + // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is + // aligned and the type is a vector with elements up to 4 bytes + if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) + && VecTy.getScalarSizeInBits() <= 32 ) { + return SDValue(); + } + SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), @@ -11280,6 +11594,13 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); + // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is + // aligned and the type is a vector with elements up to 4 bytes + if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16) + && VecTy.getScalarSizeInBits() <= 32 ) { + return SDValue(); + } + // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); @@ -11304,6 +11625,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDLoc dl(N); switch (N->getOpcode()) { default: break; + case ISD::SHL: + return combineSHL(N, DCI); + case ISD::SRA: + return combineSRA(N, DCI); + case ISD::SRL: + return combineSRL(N, DCI); case PPCISD::SHL: if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); @@ -12015,18 +12342,17 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, //===----------------------------------------------------------------------===// void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); + Known.resetAll(); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { // lhbrx is known to have the top bits cleared out. if (cast(Op.getOperand(2))->getVT() == MVT::i16) - KnownZero = 0xFFFF0000; + Known.Zero = 0xFFFF0000; break; } case ISD::INTRINSIC_WO_CHAIN: { @@ -12048,7 +12374,7 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: case Intrinsic::ppc_altivec_vcmpgtud_p: - KnownZero = ~1U; // All bits but the low one are known to be zero. + Known.Zero = ~1U; // All bits but the low one are known to be zero. break; } } @@ -12937,3 +13263,58 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return Imm.isPosZero(); } } + +// For vector shift operation op, fold +// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y) +static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, + SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + unsigned Opcode = N->getOpcode(); + unsigned TargetOpcode; + + switch (Opcode) { + default: + llvm_unreachable("Unexpected shift operation"); + case ISD::SHL: + TargetOpcode = PPCISD::SHL; + break; + case ISD::SRL: + TargetOpcode = PPCISD::SRL; + break; + case ISD::SRA: + TargetOpcode = PPCISD::SRA; + break; + } + + if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) && + N1->getOpcode() == ISD::AND) + if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) + if (Mask->getZExtValue() == OpSizeInBits - 1) + return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0)); + + return SDValue(); +} + +SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { + if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) + return Value; + + return SDValue(); +} + +SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { + if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) + return Value; + + return SDValue(); +} + +SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { + if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) + return Value; + + return SDValue(); +} diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6113eb58f421150d23e60f39dde3c0999a90b970..a5108727bb4b1ea731715c9b526132fc23a6e4ee 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -86,10 +86,18 @@ namespace llvm { /// XXINSERT, + /// XXREVERSE - The PPC VSX reverse instruction + /// + XXREVERSE, + /// VECSHL - The PPC VSX shift left instruction /// VECSHL, + /// XXPERMDI - The PPC XXPERMDI instruction + /// + XXPERMDI, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -117,9 +125,13 @@ namespace llvm { /// at function entry, used for PIC code. GlobalBaseReg, - /// These nodes represent the 32-bit PPC shifts that operate on 6-bit - /// shift amounts. These nodes are generated by the multi-precision shift - /// code. + /// These nodes represent PPC shifts. + /// + /// For scalar types, only the last `n + 1` bits of the shift amounts + /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. + /// for exact behaviors. + /// + /// For vector types, only the last n bits are used. See vsld. SRL, SRA, SHL, /// The combination of sra[wd]i and addze used to implemented signed @@ -446,7 +458,32 @@ namespace llvm { /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); - + /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXSLDWI instruction. + bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + + /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRH instruction. + bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRW instruction. + bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRD instruction. + bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRQ instruction. + bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); + + /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXPERMDI instruction. + bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, @@ -606,8 +643,7 @@ namespace llvm { SelectionDAG &DAG) const override; void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -618,10 +654,10 @@ namespace llvm { return true; } - Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; - Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, - bool IsStore, bool IsLoad) const override; + Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, @@ -902,6 +938,8 @@ namespace llvm { SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; @@ -1000,6 +1038,9 @@ namespace llvm { SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 997b96ca6ec8b15adfee74486e61106b065646ce..235640a901828937f36e2b0a4f8bf45425e5f2fc 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -634,10 +634,19 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS), "extsw", "$rA, $rS", IIC_IntSimple, [(set i64:$rA, (sext i32:$rS))]>, isPPC64; +let isCodeGenOnly = 1 in +def EXTSW_32 : XForm_11<31, 986, (outs gprc:$rA), (ins gprc:$rS), + "extsw $rA, $rS", IIC_IntSimple, + []>, isPPC64; defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; +// For fast-isel: +let isCodeGenOnly = 1 in +def SRADI_32 : XSForm_1<31, 413, (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH), + "sradi $rA, $rS, $SH", IIC_IntRotateDI, []>, isPPC64; + defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS), "cntlzd", "$rA, $rS", IIC_IntGeneral, [(set i64:$rA, (ctlz i64:$rS))]>; @@ -674,6 +683,16 @@ def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divde $rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, isPPC64, Requires<[HasExtDiv]>; + +let Predicates = [IsISA3_0] in { +def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modsd $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (srem i64:$rA, i64:$rB))]>; +def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modud $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (urem i64:$rA, i64:$rB))]>; +} + let Defs = [CR0] in def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divde. $rT, $rA, $rB", IIC_IntDivD, @@ -721,15 +740,26 @@ defm RLDICL : MDForm_1r<30, 0, // For fast-isel: let isCodeGenOnly = 1 in def RLDICL_32_64 : MDForm_1<30, 0, - (outs g8rc:$rA), + (outs g8rc:$rA), + (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; +// End fast-isel. +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm RLDICL_32 : MDForm_1r<30, 0, + (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), - "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI, + "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; -// End fast-isel. defm RLDICR : MDForm_1r<30, 1, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, []>, isPPC64; +let isCodeGenOnly = 1 in +def RLDICR_32 : MDForm_1<30, 1, + (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE), + "rldicr $rA, $rS, $SH, $MBE", IIC_IntRotateDI, + []>, isPPC64; defm RLDIC : MDForm_1r<30, 2, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE), "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI, @@ -963,6 +993,10 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg), [(set i64:$rD, (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; + +let isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in +def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>; + def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), (ADD8TLS $in, tglobaltlsaddr:$g)>; def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), @@ -977,7 +1011,9 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), isPPC64; // LR8 is a true define, while the rest of the Defs are clobbers. X3 is // explicitly defined when this op is created, so not mentioned here. -let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, +// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be +// correct because the branch select pass is relying on it. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8, Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsADDR", diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index c380766e9f5c44f9f86c0ad097e335f0d5412a03..5465b5f2d66cd36d98b74490402ac9a6553fbc05 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -987,6 +987,16 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)), (v8i16 (VSLH $vA, $vB))>; def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)), (v4i32 (VSLW $vA, $vB))>; +def : Pat<(v1i128 (shl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>; +def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSLB $vA, $vB))>; +def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSLH $vA, $vB))>; +def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSLW $vA, $vB))>; +def : Pat<(v1i128 (PPCshl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSL (VSLO $vA, $vB), (VSPLTB 15, $vB)))>; def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)), (v16i8 (VSRB $vA, $vB))>; @@ -994,6 +1004,16 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)), (v8i16 (VSRH $vA, $vB))>; def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)), (v4i32 (VSRW $vA, $vB))>; +def : Pat<(v1i128 (srl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>; +def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRB $vA, $vB))>; +def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRH $vA, $vB))>; +def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRW $vA, $vB))>; +def : Pat<(v1i128 (PPCsrl v1i128:$vA, v1i128:$vB)), + (v1i128 (VSR (VSRO $vA, $vB), (VSPLTB 15, $vB)))>; def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)), (v16i8 (VSRAB $vA, $vB))>; @@ -1001,6 +1021,12 @@ def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)), (v8i16 (VSRAH $vA, $vB))>; def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)), (v4i32 (VSRAW $vA, $vB))>; +def : Pat<(v16i8 (PPCsra v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRAB $vA, $vB))>; +def : Pat<(v8i16 (PPCsra v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRAH $vA, $vB))>; +def : Pat<(v4i32 (PPCsra v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRAW $vA, $vB))>; // Float to integer and integer to float conversions def : Pat<(v4i32 (fp_to_sint v4f32:$vA)), @@ -1072,14 +1098,24 @@ def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB), // Vector shifts def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>; def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsld $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>; + "vsld $vD, $vA, $vB", IIC_VecGeneral, []>; def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsrd $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>; + "vsrd $vD, $vA, $vB", IIC_VecGeneral, []>; def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vsrad $vD, $vA, $vB", IIC_VecGeneral, - [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>; + "vsrad $vD, $vA, $vB", IIC_VecGeneral, []>; + +def : Pat<(v2i64 (shl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSLD $vA, $vB))>; +def : Pat<(v2i64 (PPCshl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSLD $vA, $vB))>; +def : Pat<(v2i64 (srl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRD $vA, $vB))>; +def : Pat<(v2i64 (PPCsrl v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRD $vA, $vB))>; +def : Pat<(v2i64 (sra v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRAD $vA, $vB))>; +def : Pat<(v2i64 (PPCsra v2i64:$vA, v2i64:$vB)), + (v2i64 (VSRAD $vA, $vB))>; // Vector Integer Arithmetic Instructions let isCommutable = 1 in { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 8e159f47ea2eedc4c6e39ac04feb60880060097a..236e513bec231546b57ce3de9543965f9e8826fc 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -440,8 +440,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opcode)); } -/// getNoopForMachoTarget - Return the noop instruction to use for a noop. -void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +/// Return the noop instruction to use for a noop. +void PPCInstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(PPC::NOP); } @@ -1533,6 +1533,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case PPC::FCMPUD: SrcReg = MI.getOperand(1).getReg(); SrcReg2 = MI.getOperand(2).getReg(); + Value = 0; + Mask = 0; return true; } } @@ -1591,9 +1593,12 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // We can perform this optimization, equality only, if MI is // zero-extending. + // FIXME: Other possible target instructions include ANDISo and + // RLWINM aliases, such as ROTRWI, EXTLWI, SLWI and SRWI. if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo || MIOpC == PPC::SLW || MIOpC == PPC::SLWo || MIOpC == PPC::SRW || MIOpC == PPC::SRWo || + MIOpC == PPC::ANDIo || isZeroExtendingRotate) { noSub = true; equalityOnly = true; @@ -1641,6 +1646,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, break; } + SmallVector, 4> PredsToUpdate; + SmallVector, 4> SubRegsToUpdate; + // There are two possible candidates which can be changed to set CR[01]. // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). @@ -1652,9 +1660,37 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // same BB as the comparison. This is to allow the check below to avoid calls // (and other explicit clobbers); instead we should really check for these // more explicitly (in at least a few predecessors). - else if (MI->getParent() != CmpInstr.getParent() || Value != 0) { - // PPC does not have a record-form SUBri. + else if (MI->getParent() != CmpInstr.getParent()) return false; + else if (Value != 0) { + // The record-form instructions set CR bit based on signed comparison against 0. + // We try to convert a compare against 1 or -1 into a compare against 0. + bool Success = false; + if (!equalityOnly && MRI->hasOneUse(CRReg)) { + MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg); + if (UseMI->getOpcode() == PPC::BCC) { + PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); + int16_t Immed = (int16_t)Value; + + if (Immed == -1 && Pred == PPC::PRED_GT) { + // We convert "greater than -1" into "greater than or equal to 0", + // since we are assuming signed comparison by !equalityOnly + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), + PPC::PRED_GE)); + Success = true; + } + else if (Immed == 1 && Pred == PPC::PRED_LT) { + // We convert "less than 1" into "less than or equal to 0". + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), + PPC::PRED_LE)); + Success = true; + } + } + } + + // PPC does not have a record-form SUBri. + if (!Success) + return false; } // Search for Sub. @@ -1720,15 +1756,14 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (NewOpC == -1) return false; - SmallVector, 4> PredsToUpdate; - SmallVector, 4> SubRegsToUpdate; - // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP // needs to be updated to be based on SUB. Push the condition code // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the // condition code of these operands will be modified. + // Here, Value == 0 means we haven't converted comparison against 1 or -1 to + // comparison against 0, which may modify predicate. bool ShouldSwap = false; - if (Sub) { + if (Sub && Value == 0) { ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && Sub->getOperand(2).getReg() == SrcReg; @@ -1765,6 +1800,9 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, } else // We need to abort on a user we don't understand. return false; } + assert(!(Value != 0 && ShouldSwap) && + "Non-zero immediate support and ShouldSwap" + "may conflict in updating predicate"); // Create a new virtual register to hold the value of the CR set by the // record-form instruction. If the instruction was not previously in @@ -1873,6 +1911,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { } bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + auto &MBB = *MI.getParent(); + auto DL = MI.getDebugLoc(); switch (MI.getOpcode()) { case TargetOpcode::LOAD_STACK_GUARD: { assert(Subtarget.isTargetLinux() && @@ -1891,6 +1931,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case PPC::DFSTOREf64: { assert(Subtarget.hasP9Vector() && "Invalid D-Form Pseudo-ops on non-P9 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + "D-form op must have register and immediate operands"); unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: @@ -1920,6 +1962,17 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(Opcode)); return true; } + case PPC::CFENCE8: { + auto Val = MI.getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val); + BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP)) + .addImm(PPC::PRED_NE_MINUS) + .addReg(PPC::CR7) + .addImm(1); + MI.setDesc(get(PPC::ISYNC)); + MI.RemoveOperand(0); + return true; + } } return false; } @@ -1930,3 +1983,7 @@ PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { return &PPC::VSRCRegClass; return RC; } + +int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { + return PPC::getRecordFormOpcode(Opcode); +} diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index f11aed8fa268f25edbcd09cd74822edc12f0e952..8dd4dbb608794a4c8d003a0c74723df1f37ce4b4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -269,7 +269,7 @@ public: /// unsigned getInstSizeInBytes(const MachineInstr &MI) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; std::pair decomposeMachineOperandsTargetFlags(unsigned TF) const override; @@ -290,6 +290,7 @@ public: return Reg >= PPC::V0 && Reg <= PPC::V31; } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; + static int getRecordFormOpcode(unsigned Opcode); }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index f004ce49cac0dc6be403543d00fed6be3a394338..47d59c25392a2e9f71ee3866780674e5afa67617 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -33,7 +33,8 @@ def SDT_PPCVexts : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> ]>; -def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; def SDT_PPCvperm : SDTypeProfile<1, 3, [ @@ -45,13 +46,21 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, ]>; def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, - SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> + SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; +def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>, + SDTCisVec<1> +]>; + +def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -169,6 +178,8 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>; +def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; @@ -1099,9 +1110,11 @@ multiclass AForm_3r opcode, bits<5> xo, dag OOL, dag IOL, let hasCtrlDep = 1 in { let Defs = [R1], Uses = [R1] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt", - [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2", +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), + "#ADJCALLSTACKDOWN $amt1 $amt2", + [(callseq_start timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), + "#ADJCALLSTACKUP $amt1 $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -1220,9 +1233,15 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { // FIXME: should be able to write a pattern for PPCcondbranch, but can't use // a two-value operand where a dag node expects two operands. :( let isCodeGenOnly = 1 in { - def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), - "b${cond:cc}${cond:pm} ${cond:reg}, $dst" - /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; + class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst), + "b${cond:cc}${cond:pm} ${cond:reg}, $dst" + /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>; + def BCC : BCC_class; + + // The same as BCC, except that it's not a terminator. Used for introducing + // control flow dependency without creating new blocks. + let isTerminator = 0 in def CTRL_DEP : BCC_class; + def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst), "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">; @@ -2530,6 +2549,14 @@ let Uses = [RM] in { "mffs. $rT", IIC_IntMFFS, []>, isDOT; } +let Predicates = [IsISA3_0] in { +def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "modsw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (srem i32:$rA, i32:$rB))]>; +def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "moduw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (urem i32:$rA, i32:$rB))]>; +} let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit @@ -4163,6 +4190,8 @@ def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0 def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>; def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; +def : InstAlias<"clrldi $rA, $rS, $n", + (RLDICL_32 gprc:$rA, gprc:$rS, 0, u6imm:$n)>; def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>; def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b", diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 13603732397ad3d29616215dedd476edbc85d984..9cfc897cdb3f206837ab972436ec7ddaead5dcce 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -138,7 +138,7 @@ let Uses = [RM] in { def LXVW4X : XX1Form<31, 780, (outs vsrc:$XT), (ins memrr:$src), "lxvw4x $XT, $src", IIC_LdStLFD, - [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>; + []>; } // mayLoad // Store indexed instructions @@ -160,7 +160,7 @@ let Uses = [RM] in { def STXVW4X : XX1Form<31, 908, (outs), (ins vsrc:$XT, memrr:$dst), "stxvw4x $XT, $dst", IIC_LdStSTFD, - [(store v4i32:$XT, xoaddr:$dst)]>; + []>; } } // mayStore @@ -843,7 +843,9 @@ let Uses = [RM] in { def XXPERMDI : XX3Form_2<60, 10, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), - "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>; + "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, + [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB, + imm32SExt16:$DM))]>; let isCodeGenOnly = 1 in def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; @@ -1041,8 +1043,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // Stores. def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVW4X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst), @@ -1053,8 +1053,12 @@ let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; + def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVW4X $rS, xoaddr:$dst)>; } // Permutes. @@ -1064,6 +1068,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; + // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; @@ -1434,7 +1442,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in { def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA), "mtvsrws $XT, $rA", IIC_VecGeneral, []>; - def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB), + def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB), "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral, []>, Requires<[In64BitMode]>; @@ -1890,8 +1898,8 @@ let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; +def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) @@ -2332,6 +2340,16 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; + // Vector Reverse + def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)), + (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)), + (v4i32 (XXBRW $A))>; + def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)), + (v2i64 (XXBRD $A))>; + def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)), + (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + // Vector Permute def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, IIC_VecPerm, []>; @@ -2377,8 +2395,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, - [(set v2f64:$XT, (load xoaddr:$src))]>; - + [(set v2f64:$XT, (load xaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, @@ -2428,7 +2445,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, - [(store v2f64:$XT, xoaddr:$dst)]>; + [(store v2f64:$XT, xaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), @@ -2496,21 +2513,38 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; } // IsLittleEndian, HasP9Vector - def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - + // D-Form Load/Store + def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>; + + def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst), + (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst), + (STXV $rS, memrix16:$dst)>; + + + def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst), + (STXVX $rS, xaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst), + (STXVX $rS, xaddr:$dst)>; def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), @@ -2693,6 +2727,40 @@ def DblToFlt { dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } + +def ByteToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); +} + +def ByteToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); +} + +def HWordToWord { + dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); +} + +def HWordToDWord { + dag A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); +} + +def WordToDWord { + dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); +} + def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } @@ -2702,9 +2770,15 @@ def FltToUIntLoad { def FltToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } +def FltToLongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A))))); +} def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } +def FltToULongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A))))); +} def FltToLong { dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); } @@ -2726,9 +2800,15 @@ def DblToULong { def DblToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } +def DblToIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A))))); +} def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } +def DblToUIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A))))); +} def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); } @@ -2896,17 +2976,17 @@ let AddedComplexity = 400 in { (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; - def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddr:$A), VSFRC)), 0))>; - def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddr:$A), VSFRC)), 0))>; @@ -2933,4 +3013,21 @@ let AddedComplexity = 400 in { (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec] in { + def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1, + HWordToWord.A2, HWordToWord.A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1, + ByteToWord.A2, ByteToWord.A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)), + (v2i64 (VEXTSB2D $A))>; + } } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 541b98e01b99414bcb74c688d64f26e99cefb07a..b310493587ae7fbb62ef8207dee2a122ac01ff63 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCMCExpr.h" +#include "PPC.h" #include "PPCSubtarget.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index c6d2c3ebcc0fc61eaa1e7eae6e138dc9a12abec3..ff5f17c7628f2f5833bd8a4c5217ae3b7307947c 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -19,9 +19,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index aad9139246923358a84786e35f2da1f9505fe8d1..637e52bbdbeecb19f682e4caad6d0290b251c793 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -273,6 +273,20 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +bool PPCRegisterInfo::isCallerPreservedPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + if (TM.isELFv2ABI() && PhysReg == PPC::X2) { + // X2 is guaranteed to be preserved within a function if it is reserved. + // The reason it's reserved is that it's the TOC pointer (and the function + // uses the TOC). In functions where it isn't reserved (i.e. leaf functions + // with no TOC access), we can't claim that it is preserved. + return (getReservedRegs(MF).test(PPC::X2)); + } else { + return false; + } +} + unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { const PPCFrameLowering *TFI = getFrameLowering(MF); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 4a96327fe5527c5b0b88790c0828494db3675276..0bbb71fdf9fbec64ea6020dfec5be15294c23a62 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -83,6 +83,7 @@ public: void adjustStackMapLiveOutMask(uint32_t *Mask) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isCallerPreservedPhysReg(unsigned PhysReg, const MachineFunction &MF) const override; /// We require the register scavenger. bool requiresRegisterScavenging(const MachineFunction &MF) const override { diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index 0c1260a2965b756b679a2b5945b91b2ba8173a5d..31c50785c2ee59c8e3c22e315f959dab4a6116b9 100644 --- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -21,9 +21,9 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -99,7 +99,8 @@ protected: // Don't really need to save data to the stack - the clobbered // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr) // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR). - BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0); + BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0) + .addImm(0); // Expand into two ops built prior to the existing instruction. MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3) diff --git a/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 7c53a5601790e7ac54948153ca2b6da1fec30c59..17345b6ca8d30ce58c1116b9dfc92aaf73abfc86 100644 --- a/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -61,8 +61,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 7806d45b54575711cdbbf4e074b6f97eccdfd7b8..fd8df8c23353a42debef57d56d17603d38e5d384 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "PPCTargetMachine.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetObjectFile.h" -#include "PPCTargetMachine.h" #include "PPCTargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" @@ -86,9 +86,9 @@ EnableMachineCombinerPass("ppc-machine-combiner", extern "C" void LLVMInitializePowerPCTarget() { // Register the targets - RegisterTargetMachine A(getThePPC32Target()); - RegisterTargetMachine B(getThePPC64Target()); - RegisterTargetMachine C(getThePPC64LETarget()); + RegisterTargetMachine A(getThePPC32Target()); + RegisterTargetMachine B(getThePPC64Target()); + RegisterTargetMachine C(getThePPC64LETarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); initializePPCBoolRetToIntPass(PR); @@ -177,32 +177,34 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, assert(Options.MCOptions.getABIName().empty() && "Unknown target-abi option!"); - if (!TT.isMacOSX()) { - switch (TT.getArch()) { - case Triple::ppc64le: - return PPCTargetMachine::PPC_ABI_ELFv2; - case Triple::ppc64: - return PPCTargetMachine::PPC_ABI_ELFv1; - default: - // Fallthrough. - ; - } + if (TT.isMacOSX()) + return PPCTargetMachine::PPC_ABI_UNKNOWN; + + switch (TT.getArch()) { + case Triple::ppc64le: + return PPCTargetMachine::PPC_ABI_ELFv2; + case Triple::ppc64: + return PPCTargetMachine::PPC_ABI_ELFv1; + default: + return PPCTargetMachine::PPC_ABI_UNKNOWN; } - return PPCTargetMachine::PPC_ABI_UNKNOWN; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { - if (!RM.hasValue()) { - if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { - if (!TT.isOSBinFormatMachO() && !TT.isMacOSX()) - return Reloc::PIC_; - } - if (TT.isOSDarwin()) - return Reloc::DynamicNoPIC; - return Reloc::Static; - } - return *RM; + if (RM.hasValue()) + return *RM; + + // Darwin defaults to dynamic-no-pic. + if (TT.isOSDarwin()) + return Reloc::DynamicNoPIC; + + // Non-darwin 64-bit platforms are PIC by default. + if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) + return Reloc::PIC_; + + // 32-bit is static by default. + return Reloc::Static; } // The FeatureString here is a little subtle. We are modifying the feature @@ -224,26 +226,6 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, PPCTargetMachine::~PPCTargetMachine() = default; -void PPC32TargetMachine::anchor() {} - -PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} - -void PPC64TargetMachine::anchor() {} - -PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} - const PPCSubtarget * PPCTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); @@ -296,7 +278,7 @@ namespace { /// PPC Code Generator Pass Configuration Options. class PPCPassConfig : public TargetPassConfig { public: - PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) + PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} PPCTargetMachine &getPPCTargetMachine() const { @@ -316,13 +298,13 @@ public: } // end anonymous namespace TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { - return new PPCPassConfig(this, PM); + return new PPCPassConfig(*this, PM); } void PPCPassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createPPCBoolRetToIntPass()); - addPass(createAtomicExpandPass(&getPPCTargetMachine())); + addPass(createAtomicExpandPass()); // For the BG/Q (or if explicitly requested), add explicit data prefetch // intrinsics. diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index f2838351cee56b001c698a7f50e9f7f755385de7..5eb6ba785d1b84439b3768c94c09d3fe70a2cabf 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -23,7 +23,7 @@ namespace llvm { /// Common code between 32-bit and 64-bit PowerPC targets. /// -class PPCTargetMachine : public LLVMTargetMachine { +class PPCTargetMachine final : public LLVMTargetMachine { public: enum PPCABI { PPC_ABI_UNKNOWN, PPC_ABI_ELFv1, PPC_ABI_ELFv2 }; private: @@ -55,30 +55,11 @@ public: const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; -}; - -/// PowerPC 32-bit target machine. -/// -class PPC32TargetMachine : public PPCTargetMachine { - virtual void anchor(); -public: - PPC32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; -/// PowerPC 64-bit target machine. -/// -class PPC64TargetMachine : public PPCTargetMachine { - virtual void anchor(); -public: - PPC64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + bool isMachineVerifierClean() const override { + return false; + } }; - } // end namespace llvm #endif diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 7ee1317bf72f2d3b6a4214f05fdd0e0e3e34bb1a..3dbd5f5b9a928343a1af9593540a71dc2e5e6e80 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -215,6 +215,11 @@ bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) { return LoopHasReductions; } +bool PPCTTIImpl::expandMemCmp(Instruction *I, unsigned &MaxLoadSize) { + MaxLoadSize = 8; + return true; +} + bool PPCTTIImpl::enableInterleavedAccessVectorization() { return true; } @@ -225,7 +230,7 @@ unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { return ST->hasVSX() ? 64 : 32; } -unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector) { if (ST->hasQPX()) return 256; if (ST->hasAltivec()) return 128; @@ -239,9 +244,18 @@ unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { } unsigned PPCTTIImpl::getCacheLineSize() { - // This is currently only used for the data prefetch pass which is only - // enabled for BG/Q by default. - return CacheLineSize; + // Check first if the user specified a custom line size. + if (CacheLineSize.getNumOccurrences() > 0) + return CacheLineSize; + + // On P7, P8 or P9 we have a cache line size of 128. + unsigned Directive = ST->getDarwinDirective(); + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || + Directive == PPC::DIR_PWR9) + return 128; + + // On other processors return a default of 64 bytes. + return 64; } unsigned PPCTTIImpl::getPrefetchDistance() { diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index 6ce70fbd8778e29c420f5b8d3c5b6238798440ea..758c335def087ce856a30b4656543d697cae8afb 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -60,9 +60,10 @@ public: /// @{ bool enableAggressiveInterleaving(bool LoopHasReductions); + bool expandMemCmp(Instruction *I, unsigned &MaxLoadSize); bool enableInterleavedAccessVectorization(); unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getCacheLineSize(); unsigned getPrefetchDistance(); unsigned getMaxInterleaveFactor(unsigned VF); diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp index f3a0290da05406af9b71aa73eab93f35d73f4d1c..93fe3230ab815a81720892767e69b86ed8a1dd0d 100644 --- a/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" #include "PPCHazardRecognizers.h" #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f6d20ced15a00c753f5d0b07386a51c7d2f1754f..a57484e5abdf71ebd911efb42f07c61a96b66d10 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index d3434b77be8af6d7e0bdeb631cb7d1a58fdd01e8..491eaf326a50874805da7f220ff0447c07679c2f 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -42,9 +42,9 @@ // //===---------------------------------------------------------------------===// -#include "PPCInstrInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" +#include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index d6f2672271e9b6e33b1c8a65a5db92fb139ac508..d9a71893afee7fc6657373e9d475e65f071aa384 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -12,10 +12,10 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp index b164df8b595a3511d7e3039f71dc8a489baee0b8..d622911e92c4f3b7b79d6f890452101009e04d8b 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; void RISCVMCAsmInfo::anchor() {} RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) { - PointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; + CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4; CommentString = "#"; AlignmentIsInBytes = false; SupportsDebugInformation = true; diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index b2ed13758d417d28805d169bb9dafedd86390a40..9309d493cef489f790262e89957038946ccf9eb8 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -13,13 +13,13 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index ddc3bf35045263ef8ea56c1ea8378aea09f86788..7c98b1c8f321335c345167e3ac8e2a0f949eb16a 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -14,9 +14,9 @@ #ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H #define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H +#include "llvm/Config/config.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Config/config.h" namespace llvm { class MCAsmBackend; diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp index a20331cd0a3edadfb4553bdee8b186ae2950dc57..744d7b8aaa3a4228e19d765941bc62a3705998a2 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -13,10 +13,10 @@ #include "RISCVTargetMachine.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" @@ -56,5 +56,5 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, } TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { - return new TargetPassConfig(this, PM); + return new TargetPassConfig(*this, PM); } diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 7e6dff6b7894850c07ca49634171e3a2c37dc365..087c037614a9d23b4fef2c8e9dcf4e43b3b440b0 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -9,8 +9,8 @@ #include "MCTargetDesc/SparcMCExpr.h" #include "MCTargetDesc/SparcMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" @@ -28,8 +28,8 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include #include #include diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 6f9cc314e37648367f4b13e9c1757057d109f68a..df819ccd15dbde357200cf7b49dde05c71a77405 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -96,7 +96,7 @@ namespace { /// createSparcDelaySlotFillerPass - Returns a pass that fills in delay /// slots in Sparc MachineFunctions /// -FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { +FunctionPass *llvm::createSparcDelaySlotFillerPass() { return new Filler; } diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index da7e0b737e785818ef6fbf4b935953fc6b4a656b..8e298e8316dae76b22d3d269a2eeb54823210a66 100644 --- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -14,11 +14,11 @@ #include "Sparc.h" #include "SparcRegisterInfo.h" #include "SparcSubtarget.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/Sparc/LeonFeatures.td b/lib/Target/Sparc/LeonFeatures.td old mode 100755 new mode 100644 diff --git a/lib/Target/Sparc/LeonPasses.cpp b/lib/Target/Sparc/LeonPasses.cpp index 0acc2875daa848319049fec3b43d28d370015208..ca6a0dc3c2a3ab0ec3e21e4d3739b02fc2287b69 100644 --- a/lib/Target/Sparc/LeonPasses.cpp +++ b/lib/Target/Sparc/LeonPasses.cpp @@ -21,9 +21,6 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -LEONMachineFunctionPass::LEONMachineFunctionPass(TargetMachine &tm, char &ID) - : MachineFunctionPass(ID) {} - LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID) : MachineFunctionPass(ID) {} @@ -72,8 +69,7 @@ int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo &MRI) { // char InsertNOPLoad::ID = 0; -InsertNOPLoad::InsertNOPLoad(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +InsertNOPLoad::InsertNOPLoad() : LEONMachineFunctionPass(ID) {} bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -114,7 +110,7 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) { // char FixFSMULD::ID = 0; -FixFSMULD::FixFSMULD(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {} +FixFSMULD::FixFSMULD() : LEONMachineFunctionPass(ID) {} bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -203,8 +199,7 @@ bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) { // char ReplaceFMULS::ID = 0; -ReplaceFMULS::ReplaceFMULS(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +ReplaceFMULS::ReplaceFMULS() : LEONMachineFunctionPass(ID) {} bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -287,8 +282,7 @@ bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) { char DetectRoundChange::ID = 0; -DetectRoundChange::DetectRoundChange(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +DetectRoundChange::DetectRoundChange() : LEONMachineFunctionPass(ID) {} bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); @@ -338,8 +332,7 @@ bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) { // char FixAllFDIVSQRT::ID = 0; -FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm) - : LEONMachineFunctionPass(tm, ID) {} +FixAllFDIVSQRT::FixAllFDIVSQRT() : LEONMachineFunctionPass(ID) {} bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); diff --git a/lib/Target/Sparc/LeonPasses.h b/lib/Target/Sparc/LeonPasses.h old mode 100755 new mode 100644 index 2158cb636bfcb5d4e3feebf74bda4bcef50fdf77..99cdfc4589ef463cb849593145a138e6a2470ba5 --- a/lib/Target/Sparc/LeonPasses.h +++ b/lib/Target/Sparc/LeonPasses.h @@ -32,7 +32,6 @@ protected: std::vector UsedRegisters; protected: - LEONMachineFunctionPass(TargetMachine &tm, char &ID); LEONMachineFunctionPass(char &ID); int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex); @@ -48,7 +47,7 @@ class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { public: static char ID; - InsertNOPLoad(TargetMachine &tm); + InsertNOPLoad(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -62,7 +61,7 @@ class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass { public: static char ID; - FixFSMULD(TargetMachine &tm); + FixFSMULD(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -74,7 +73,7 @@ class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass { public: static char ID; - ReplaceFMULS(TargetMachine &tm); + ReplaceFMULS(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -89,7 +88,7 @@ class LLVM_LIBRARY_VISIBILITY DetectRoundChange public: static char ID; - DetectRoundChange(TargetMachine &tm); + DetectRoundChange(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -102,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY FixAllFDIVSQRT : public LEONMachineFunctionPass { public: static char ID; - FixAllFDIVSQRT(TargetMachine &tm); + FixAllFDIVSQRT(); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index cc07547ede2c2a603f4610db0a127fb6d7b82f9d..d1d1334163a26b3ac0e73260856dbc890fa7ad06 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/SparcFixupKinds.h" #include "MCTargetDesc/SparcMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index 3ed09898fb78de6b8b4c84cc3ec92d03b083fd2b..50e8825b15e8cadf9701854e3da8a0806570064c 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -14,10 +14,10 @@ #include "SparcMCAsmInfo.h" #include "SparcMCExpr.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCTargetOptions.h" -#include "llvm/Support/Dwarf.h" using namespace llvm; @@ -28,7 +28,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Triple &TheTriple) { IsLittleEndian = (TheTriple.getArch() == Triple::sparcel); if (isV9) { - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; } Data16bitsDirective = "\t.half\t"; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index e85a8cd5e3399e6be5608741b0f1b296d8eb34a2..a77f760d9eff06a9c0e00016c858b1faaa521a1c 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -19,7 +19,6 @@ #include "llvm/MC/MCSymbolELF.h" #include "llvm/Object/ELF.h" - using namespace llvm; #define DEBUG_TYPE "sparcmcexpr" diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index 0a8272d892976a1855baf20b9e6d5f2f2efed54b..4135e4e1b61d3a311f113b1f11686cb79b121698 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -28,7 +28,7 @@ namespace llvm { class MachineInstr; FunctionPass *createSparcISelDag(SparcTargetMachine &TM); - FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createSparcDelaySlotFillerPass(); void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 31a128a5f2714b1d7a62481b4bca6cc6d9597a6b..19fb94534b256b0a987f209e3ad9d05d2f5f282c 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "InstPrinter/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "SparcInstrInfo.h" #include "SparcTargetMachine.h" #include "SparcTargetStreamer.h" diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 455d1ee1564a891fa367114ddd87d01d295f6301..9e7e3c6b705a9baeba87dc9725e384e86f3a2712 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; @@ -772,8 +773,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, } } - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, dl, true), - dl); + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; @@ -1164,8 +1164,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Adjust the stack pointer to make room for the arguments. // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls // with more than 6 arguments. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), - DL); + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); // Collect the set of registers to pass to the function and their values. // This will be emitted as a sequence of CopyToReg nodes glued to the call @@ -1875,25 +1874,24 @@ EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, /// combiner. void SparcTargetLowering::computeKnownBitsForTargetNode (const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - APInt KnownZero2, KnownOne2; - KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); + KnownBits Known2; + Known.resetAll(); switch (Op.getOpcode()) { default: break; case SPISD::SELECT_ICC: case SPISD::SELECT_XCC: case SPISD::SELECT_FCC: - DAG.computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - DAG.computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1); // Only known if known in both the LHS and RHS. - KnownOne &= KnownOne2; - KnownZero &= KnownZero2; + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; break; } } @@ -2058,7 +2056,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, SDValue Chain = DAG.getEntryNode(); SDValue InFlag; - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(1, DL, true), DL); + Chain = DAG.getCALLSEQ_START(Chain, 1, 0, DL); Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag); InFlag = Chain.getValue(1); SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT); @@ -3234,6 +3232,7 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -3245,7 +3244,8 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, unsigned DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + (void)TRI; unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); @@ -3384,7 +3384,10 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; - case 'r': return C_RegisterClass; + case 'r': + case 'f': + case 'e': + return C_RegisterClass; case 'I': // SIMM13 return C_Other; } @@ -3463,6 +3466,24 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &SP::IntPairRegClass); else return std::make_pair(0U, &SP::IntRegsRegClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, &SP::FPRegsRegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &SP::LowDFPRegsRegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &SP::LowQFPRegsRegClass); + llvm_unreachable("Unknown ValueType for f-register-type!"); + break; + case 'e': + if (VT == MVT::f32) + return std::make_pair(0U, &SP::FPRegsRegClass); + else if (VT == MVT::f64) + return std::make_pair(0U, &SP::DFPRegsRegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &SP::QFPRegsRegClass); + llvm_unreachable("Unknown ValueType for e-register-type!"); + break; } } else if (!Constraint.empty() && Constraint.size() <= 5 && Constraint[0] == '{' && *(Constraint.end()-1) == '}') { diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 90d03984060cd029af2d5e045470a1d28043fca2..cc6386bccbb1e9d74d1a26f5d84a94ff222575bf 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -66,8 +66,7 @@ namespace llvm { /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 5a19c624abb5341c851ec373cd87c2b552c40e2b..ae45c8be675249348ba4d5e0501dd00d17a2bb8c 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -195,7 +195,8 @@ def SPsjlj_longjmp: SDNode<"SPISD::EH_SJLJ_LONGJMP", [SDNPHasChain, SDNPSideEffect]>; // These are target-independent nodes, but have target-specific formats. -def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -404,9 +405,9 @@ let Defs = [O7] in { } let Defs = [O6], Uses = [O6] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - "!ADJCALLSTACKDOWN $amt", - [(callseq_start timm:$amt)]>; +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "!ADJCALLSTACKDOWN $amt1, $amt2", + [(callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), "!ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp index a3cedcbf9dd145796c825e5f77142ce1a87cb89f..a784124ff68839e87e460b1a992bd9e776e645c9 100644 --- a/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/lib/Target/Sparc/SparcMCInstLower.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "Sparc.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index 6ecfddfc7d66ea25a0fc7fd63f177492b5a3dfd3..6625eaafd992634435692b0063e38d998a871e9e 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -346,11 +346,13 @@ def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>; // Floating point register classes. def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>; - def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 31)>; - def QFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 15)>; +// The Low?FPRegs classes are used only for inline-asm constraints. +def LowDFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>; +def LowQFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 7)>; + // Floating point control register classes. def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>; diff --git a/lib/Target/Sparc/SparcSchedule.td b/lib/Target/Sparc/SparcSchedule.td old mode 100755 new mode 100644 diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 4ae64062d9e2cf299f1240eda8654ad46d625f02..c7a1ca262d2c192cd43cf86a73fd3ef69cfdbd6b 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -11,9 +11,9 @@ //===----------------------------------------------------------------------===// #include "SparcTargetMachine.h" -#include "SparcTargetObjectFile.h" -#include "Sparc.h" #include "LeonPasses.h" +#include "Sparc.h" +#include "SparcTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" @@ -114,7 +114,7 @@ namespace { /// Sparc Code Generator Pass Configuration Options. class SparcPassConfig : public TargetPassConfig { public: - SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM) + SparcPassConfig(SparcTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} SparcTargetMachine &getSparcTargetMachine() const { @@ -128,11 +128,11 @@ public: } // namespace TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) { - return new SparcPassConfig(this, PM); + return new SparcPassConfig(*this, PM); } void SparcPassConfig::addIRPasses() { - addPass(createAtomicExpandPass(&getSparcTargetMachine())); + addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); } @@ -143,26 +143,26 @@ bool SparcPassConfig::addInstSelector() { } void SparcPassConfig::addPreEmitPass(){ - addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine())); + addPass(createSparcDelaySlotFillerPass()); if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad()) { - addPass(new InsertNOPLoad(getSparcTargetMachine())); + addPass(new InsertNOPLoad()); } if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD()) { - addPass(new FixFSMULD(getSparcTargetMachine())); + addPass(new FixFSMULD()); } if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS()) { - addPass(new ReplaceFMULS(getSparcTargetMachine())); + addPass(new ReplaceFMULS()); } if (this->getSparcTargetMachine().getSubtargetImpl()->detectRoundChange()) { - addPass(new DetectRoundChange(getSparcTargetMachine())); + addPass(new DetectRoundChange()); } if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT()) { - addPass(new FixAllFDIVSQRT(getSparcTargetMachine())); + addPass(new FixAllFDIVSQRT()); } } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 48193fe095bedbc5cc4c67891445247bac2b10e5..faf714cbe2c982d21d035ef8637dccaa6a51d08a 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -40,6 +40,10 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isMachineVerifierClean() const override { + return false; + } }; /// Sparc 32-bit target machine diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp index 8fdde15d8d27f79202e7886921af25ad71218a89..627e49a95f3cc84d68e4d89ed845d3301f9a10a8 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -9,8 +9,8 @@ #include "SparcTargetObjectFile.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 3f91ca9035a61da5c7b955f8928fcd971c33eb3b..ad05779a9f64823c954ba4a46d7b3350935a8004 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/SystemZMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -262,6 +262,9 @@ public: bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const { return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287); } + bool isMemDisp12Len4(RegisterKind RegKind) const { + return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x10); + } bool isMemDisp12Len8(RegisterKind RegKind) const { return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x100); } @@ -347,6 +350,7 @@ public: bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); } bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); } bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); } + bool isBDLAddr64Disp12Len4() const { return isMemDisp12Len4(ADDR64Reg); } bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, ADDR64Reg); } bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); } diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index a281a0aa6bccd5450c9272c993ca7a4653d32e07..27fd70bc609254932752ff8754dcc996e4cefbf7 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -327,6 +327,18 @@ static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field, return MCDisassembler::Success; } +static DecodeStatus decodeBDLAddr12Len4Operand(MCInst &Inst, uint64_t Field, + const unsigned *Regs) { + uint64_t Length = Field >> 16; + uint64_t Base = (Field >> 12) & 0xf; + uint64_t Disp = Field & 0xfff; + assert(Length < 16 && "Invalid BDLAddr12Len4"); + Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base])); + Inst.addOperand(MCOperand::createImm(Disp)); + Inst.addOperand(MCOperand::createImm(Length + 1)); + return MCDisassembler::Success; +} + static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field, const unsigned *Regs) { uint64_t Length = Field >> 16; @@ -399,6 +411,13 @@ static DecodeStatus decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs); } +static DecodeStatus decodeBDLAddr64Disp12Len4Operand(MCInst &Inst, + uint64_t Field, + uint64_t Address, + const void *Decoder) { + return decodeBDLAddr12Len4Operand(Inst, Field, SystemZMC::GR64Regs); +} + static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, uint64_t Field, uint64_t Address, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 23b7d5b5d50132ab219ef8321ee284aacfb474e0..fd1fd7bc40dccc7a45a331b540a0cb1b3f0a6f4a 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "MCTargetDesc/SystemZMCFixups.h" +#include "MCTargetDesc/SystemZMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixupKindInfo.h" diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp index b17977d41be1f6e075caa1b9128343003a5b47a4..6e00981939b638b788ddc38e459b4673c9743eb1 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp @@ -14,7 +14,7 @@ using namespace llvm; SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) { - PointerSize = 8; + CodePointerSize = 8; CalleeSaveStackSlotSize = 8; IsLittleEndian = false; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index 092eb4011adc5a269b4448c3f31a7bdcc57beeda..d188f56512ab7aff24c050a7ad81dadab2d67ef7 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -77,6 +77,9 @@ private: uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + uint64_t getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; @@ -219,6 +222,17 @@ getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum, | ((Disp & 0xff000) >> 12); } +uint64_t SystemZMCCodeEmitter:: +getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); + uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); + uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1; + assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len)); + return (Len << 16) | (Base << 12) | Disp; +} + uint64_t SystemZMCCodeEmitter:: getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, SmallVectorImpl &Fixups, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 3de570bf30cc47711c04a56d9a350ce241f6d311..df0a8161e6e7c81075e8eab726e61fa38af4b0dd 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -9,11 +9,11 @@ #include "MCTargetDesc/SystemZMCFixups.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 86a1322c9e2305567b1b4e80e3b303f5b205e5b2..74cf653b9d95c53b82268eb2cc371c880c7687db 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -63,7 +63,7 @@ via a register.) -- -We don't use ICM or STCM. +We don't use ICM, STCM, or CLM. -- diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td index 6bdfd4d07edce98a1c0ec9a307fed1c3678059ef..c5f324418da51255c200770837d699165e7ebe10 100644 --- a/lib/Target/SystemZ/SystemZ.td +++ b/lib/Target/SystemZ/SystemZ.td @@ -54,6 +54,8 @@ include "SystemZInstrFormats.td" include "SystemZInstrInfo.td" include "SystemZInstrVector.td" include "SystemZInstrFP.td" +include "SystemZInstrHFP.td" +include "SystemZInstrDFP.td" def SystemZInstrInfo : InstrInfo {} diff --git a/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/lib/Target/SystemZ/SystemZExpandPseudo.cpp index 92ce8089c24f185a620297ccae358db780ddaf0e..d02db9a617a33c753eee8844f45737209bbba572 100644 --- a/lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ b/lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -74,7 +74,7 @@ bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td index 716e5add8051b7a7bdfb162dce17448cf30a89be..ffb0b8d1c861c7a851607e373900072ca9ccde16 100644 --- a/lib/Target/SystemZ/SystemZFeatures.td +++ b/lib/Target/SystemZ/SystemZFeatures.td @@ -68,6 +68,11 @@ def FeaturePopulationCount : SystemZFeature< "Assume that the population-count facility is installed" >; +def FeatureMessageSecurityAssist4 : SystemZFeature< + "message-security-assist-extension4", "MessageSecurityAssist4", + "Assume that the message-security-assist extension facility 4 is installed" +>; + def Arch9NewFeatures : SystemZFeatureList<[ FeatureDistinctOps, FeatureFastSerialization, @@ -75,7 +80,8 @@ def Arch9NewFeatures : SystemZFeatureList<[ FeatureHighWord, FeatureInterlockedAccess1, FeatureLoadStoreOnCond, - FeaturePopulationCount + FeaturePopulationCount, + FeatureMessageSecurityAssist4 ]>; //===----------------------------------------------------------------------===// @@ -109,12 +115,18 @@ def FeatureTransactionalExecution : SystemZFeature< "Assume that the transactional-execution facility is installed" >; +def FeatureDFPZonedConversion : SystemZFeature< + "dfp-zoned-conversion", "DFPZonedConversion", + "Assume that the DFP zoned-conversion facility is installed" +>; + def Arch10NewFeatures : SystemZFeatureList<[ FeatureExecutionHint, FeatureLoadAndTrap, FeatureMiscellaneousExtensions, FeatureProcessorAssist, - FeatureTransactionalExecution + FeatureTransactionalExecution, + FeatureDFPZonedConversion ]>; //===----------------------------------------------------------------------===// @@ -133,6 +145,16 @@ def FeatureLoadStoreOnCond2 : SystemZFeature< "Assume that the load/store-on-condition facility 2 is installed" >; +def FeatureMessageSecurityAssist5 : SystemZFeature< + "message-security-assist-extension5", "MessageSecurityAssist5", + "Assume that the message-security-assist extension facility 5 is installed" +>; + +def FeatureDFPPackedConversion : SystemZFeature< + "dfp-packed-conversion", "DFPPackedConversion", + "Assume that the DFP packed-conversion facility is installed" +>; + def FeatureVector : SystemZFeature< "vector", "Vector", "Assume that the vectory facility is installed" @@ -142,6 +164,8 @@ def FeatureNoVector : SystemZMissingFeature<"Vector">; def Arch11NewFeatures : SystemZFeatureList<[ FeatureLoadAndZeroRightmostByte, FeatureLoadStoreOnCond2, + FeatureMessageSecurityAssist5, + FeatureDFPPackedConversion, FeatureVector ]>; diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.h b/lib/Target/SystemZ/SystemZHazardRecognizer.h index 8fa54ee434cf28829676b26db6917e023848476d..0c755c9ad1b9ace9fe77aa889e37f8edcc06ce8a 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.h +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -25,10 +25,10 @@ #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H #include "SystemZSubtarget.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 920b6e430e8fa10b1c9900f2406a8d0ddffbe60a..cd2f708458bfc1ab8c9b25a7487c867193de86e6 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -711,9 +712,9 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, // The inner check covers all cases but is more expensive. uint64_t Used = allOnes(Op.getValueSizeInBits()); if (Used != (AndMask | InsertMask)) { - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Op.getOperand(0), KnownZero, KnownOne); - if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) + KnownBits Known; + CurDAG->computeKnownBits(Op.getOperand(0), Known); + if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue())) return false; } @@ -770,9 +771,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { // If some bits of Input are already known zeros, those bits will have // been removed from the mask. See if adding them back in makes the // mask suitable. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Input, KnownZero, KnownOne); - Mask |= KnownZero.getZExtValue(); + KnownBits Known; + CurDAG->computeKnownBits(Input, Known); + Mask |= Known.Zero.getZExtValue(); if (!refineRxSBGMask(RxSBG, Mask)) return false; } @@ -794,9 +795,9 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { // If some bits of Input are already known ones, those bits will have // been removed from the mask. See if adding them back in makes the // mask suitable. - APInt KnownZero, KnownOne; - CurDAG->computeKnownBits(Input, KnownZero, KnownOne); - Mask &= ~KnownOne.getZExtValue(); + KnownBits Known; + CurDAG->computeKnownBits(Input, Known); + Mask &= ~Known.One.getZExtValue(); if (!refineRxSBGMask(RxSBG, Mask)) return false; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 84d3c7bed50a2f1d58ea322cb7f713a62b00cab8..ac4c3f6db684d628872cc0f5794955fe5c4f7dcb 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -20,8 +20,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Support/CommandLine.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" #include using namespace llvm; @@ -829,7 +830,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, return isTruncateFree(FromType, ToType); } -bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return CI->isTailCall(); } @@ -1109,9 +1110,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Mark the start of the call. if (!IsTailCall) - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getConstant(NumBytes, DL, PtrVT, true), - DL); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); // Copy argument values to their designated locations. SmallVector, 9> RegsToPass; @@ -3066,14 +3065,14 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { // Get the known-zero masks for each operand. SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; - APInt KnownZero[2], KnownOne[2]; - DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); - DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); + KnownBits Known[2]; + DAG.computeKnownBits(Ops[0], Known[0]); + DAG.computeKnownBits(Ops[1], Known[1]); // See if the upper 32 bits of one operand and the lower 32 bits of the // other are known zero. They are the low and high operands respectively. - uint64_t Masks[] = { KnownZero[0].getZExtValue(), - KnownZero[1].getZExtValue() }; + uint64_t Masks[] = { Known[0].Zero.getZExtValue(), + Known[1].Zero.getZExtValue() }; unsigned High, Low; if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) High = 1, Low = 0; @@ -3158,9 +3157,9 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, } // Get the known-zero mask for the operand. - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Op, KnownZero, KnownOne); - unsigned NumSignificantBits = (~KnownZero).getActiveBits(); + KnownBits Known; + DAG.computeKnownBits(Op, Known); + unsigned NumSignificantBits = (~Known.Zero).getActiveBits(); if (NumSignificantBits == 0) return DAG.getConstant(0, DL, VT); @@ -4190,12 +4189,20 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); + // If all elements are loads, use VLREP/VLEs (below). + bool AllLoads = true; + for (auto Elem : Elems) + if (Elem.getOpcode() != ISD::LOAD || cast(Elem)->isIndexed()) { + AllLoads = false; + break; + } + // The best way of building a v2i64 from two i64s is to use VLVGP. - if (VT == MVT::v2i64) + if (VT == MVT::v2i64 && !AllLoads) return joinDwords(DAG, DL, Elems[0], Elems[1]); // Use a 64-bit merge high to combine two doubles. - if (VT == MVT::v2f64) + if (VT == MVT::v2f64 && !AllLoads) return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); // Build v4f32 values directly from the FPRs: @@ -4205,7 +4212,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // // V VMRHG // - if (VT == MVT::v4f32) { + if (VT == MVT::v4f32 && !AllLoads) { SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); // Avoid unnecessary undefs by reusing the other operand. @@ -4247,23 +4254,37 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); Result = DAG.getBuildVector(VT, DL, Constants); } else { - // Otherwise try to use VLVGP to start the sequence in order to + // Otherwise try to use VLREP or VLVGP to start the sequence in order to // avoid a false dependency on any previous contents of the vector - // register. This only makes sense if one of the associated elements - // is defined. - unsigned I1 = NumElements / 2 - 1; - unsigned I2 = NumElements - 1; - bool Def1 = !Elems[I1].isUndef(); - bool Def2 = !Elems[I2].isUndef(); - if (Def1 || Def2) { - SDValue Elem1 = Elems[Def1 ? I1 : I2]; - SDValue Elem2 = Elems[Def2 ? I2 : I1]; - Result = DAG.getNode(ISD::BITCAST, DL, VT, - joinDwords(DAG, DL, Elem1, Elem2)); - Done[I1] = true; - Done[I2] = true; - } else - Result = DAG.getUNDEF(VT); + // register. + + // Use a VLREP if at least one element is a load. + unsigned LoadElIdx = UINT_MAX; + for (unsigned I = 0; I < NumElements; ++I) + if (Elems[I].getOpcode() == ISD::LOAD && + cast(Elems[I])->isUnindexed()) { + LoadElIdx = I; + break; + } + if (LoadElIdx != UINT_MAX) { + Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]); + Done[LoadElIdx] = true; + } else { + // Try to use VLVGP. + unsigned I1 = NumElements / 2 - 1; + unsigned I2 = NumElements - 1; + bool Def1 = !Elems[I1].isUndef(); + bool Def2 = !Elems[I2].isUndef(); + if (Def1 || Def2) { + SDValue Elem1 = Elems[Def1 ? I1 : I2]; + SDValue Elem2 = Elems[Def2 ? I2 : I1]; + Result = DAG.getNode(ISD::BITCAST, DL, VT, + joinDwords(DAG, DL, Elem1, Elem2)); + Done[I1] = true; + Done[I2] = true; + } else + Result = DAG.getUNDEF(VT); + } } // Use VLVGx to insert the other elements. @@ -5346,12 +5367,24 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; + + // ISel pattern matching also adds a load memory operand of the same + // address, so take special care to find the storing memory operand. + MachineMemOperand *MMO = nullptr; + for (auto *I : MI.memoperands()) + if (I->isStore()) { + MMO = I; + break; + } + BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) - .addReg(SrcReg) - .add(Base) - .addImm(Disp) - .addImm(CCValid) - .addImm(CCMask); + .addReg(SrcReg) + .add(Base) + .addImm(Disp) + .addImm(CCValid) + .addImm(CCMask) + .addMemOperand(MMO); + MI.eraseFromParent(); return MBB; } @@ -5929,7 +5962,8 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( .addImm(DestDisp) .addImm(ThisLength) .add(SrcBase) - .addImm(SrcDisp); + .addImm(SrcDisp) + ->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; @@ -6353,3 +6387,12 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( llvm_unreachable("Unexpected instr type to insert"); } } + +// This is only used by the isel schedulers, and is needed only to prevent +// compiler from crashing when list-ilp is used. +const TargetRegisterClass * +SystemZTargetLowering::getRepRegClassFor(MVT VT) const { + if (VT == MVT::Untyped) + return &SystemZ::ADDR128BitRegClass; + return TargetLowering::getRepRegClassFor(VT); +} diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 7d92a7355877879c0cc1df14d0573bf6570f0e18..79c8c4d92669f9d1ca1aed150867895ecb143cc3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -454,7 +454,7 @@ public: MachineBasicBlock *BB) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; bool allowTruncateForTailCall(Type *, Type *) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, @@ -590,6 +590,8 @@ private: MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const; + + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZInstrDFP.td b/lib/Target/SystemZ/SystemZInstrDFP.td new file mode 100644 index 0000000000000000000000000000000000000000..08ab2d7bbc523dc32d08d6f83c59298e2923ce82 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrDFP.td @@ -0,0 +1,231 @@ +//==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ decimal floating-point +// arithmetic. These instructions are inot currently used for code generation, +// are provided for use with the assembler and disassembler only. If LLVM +// ever supports decimal floating-point types (_Decimal64 etc.), they can +// also be used for code generation for those types. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load and test. +let Defs = [CC] in { + def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64, FP64>; + def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>; +} + + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations. The destination +// of LDXTR is a 128-bit value, but only the first register of the pair is used. +def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32, FP64>; +def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>; + +// Extend floating-point values to wider representations. +def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64, FP32>; +def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>; + +// Convert a signed integer value to a floating-point one. +def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64, GR64>; +def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>; +let Predicates = [FeatureFPExtension] in { + def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64, GR64>; + def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>; + def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64, GR32>; + def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>; +} + +// Convert an unsigned integer value to a floating-point one. +let Predicates = [FeatureFPExtension] in { + def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64, GR64>; + def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>; + def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64, GR32>; + def CXLFTR : TernaryRRFe<"cxlftr", 0xB95B, FP128, GR32>; +} + +// Convert a floating-point value to a signed integer value. +let Defs = [CC] in { + def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>; + def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>; + let Predicates = [FeatureFPExtension] in { + def CGDTRA : TernaryRRFe<"cgdtra", 0xB3E1, GR64, FP64>; + def CGXTRA : TernaryRRFe<"cgxtra", 0xB3E9, GR64, FP128>; + def CFDTR : TernaryRRFe<"cfdtr", 0xB941, GR32, FP64>; + def CFXTR : TernaryRRFe<"cfxtr", 0xB949, GR32, FP128>; + } +} + +// Convert a floating-point value to an unsigned integer value. +let Defs = [CC] in { + let Predicates = [FeatureFPExtension] in { + def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>; + def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>; + def CLFDTR : TernaryRRFe<"clfdtr", 0xB943, GR32, FP64>; + def CLFXTR : TernaryRRFe<"clfxtr", 0xB94B, GR32, FP128>; + } +} + +// Convert a packed value to a floating-point one. +def CDSTR : UnaryRRE<"cdstr", 0xB3F3, null_frag, FP64, GR64>; +def CXSTR : UnaryRRE<"cxstr", 0xB3FB, null_frag, FP128, GR128>; +def CDUTR : UnaryRRE<"cdutr", 0xB3F2, null_frag, FP64, GR64>; +def CXUTR : UnaryRRE<"cxutr", 0xB3FA, null_frag, FP128, GR128>; + +// Convert a floating-point value to a packed value. +def CSDTR : BinaryRRFd<"csdtr", 0xB3E3, GR64, FP64>; +def CSXTR : BinaryRRFd<"csxtr", 0xB3EB, GR128, FP128>; +def CUDTR : UnaryRRE<"cudtr", 0xB3E2, null_frag, GR64, FP64>; +def CUXTR : UnaryRRE<"cuxtr", 0xB3EA, null_frag, GR128, FP128>; + +// Convert from/to memory values in the zoned format. +let Predicates = [FeatureDFPZonedConversion] in { + def CDZT : BinaryRSL<"cdzt", 0xEDAA, FP64>; + def CXZT : BinaryRSL<"cxzt", 0xEDAB, FP128>; + def CZDT : StoreBinaryRSL<"czdt", 0xEDA8, FP64>; + def CZXT : StoreBinaryRSL<"czxt", 0xEDA9, FP128>; +} + +// Convert from/to memory values in the packed format. +let Predicates = [FeatureDFPPackedConversion] in { + def CDPT : BinaryRSL<"cdpt", 0xEDAE, FP64>; + def CXPT : BinaryRSL<"cxpt", 0xEDAF, FP128>; + def CPDT : StoreBinaryRSL<"cpdt", 0xEDAC, FP64>; + def CPXT : StoreBinaryRSL<"cpxt", 0xEDAD, FP128>; +} + +// Perform floating-point operation. +let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in + def PFPO : SideEffectInherentE<"pfpo", 0x010A>; + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// Round to an integer, with the second operand (M3) specifying the rounding +// mode. M4 can be set to 4 to suppress detection of inexact conditions. +def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64, FP64>; +def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>; + +// Extract biased exponent. +def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64, FP64>; +def EEXTR : UnaryRRE<"eextr", 0xB3ED, null_frag, FP128, FP128>; + +// Extract significance. +def ESDTR : UnaryRRE<"esdtr", 0xB3E7, null_frag, FP64, FP64>; +def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Defs = [CC] in { + let isCommutable = 1 in { + def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64, FP64, FP64>; + def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>; + } + let Predicates = [FeatureFPExtension] in { + def ADTRA : TernaryRRFa<"adtra", 0xB3D2, FP64, FP64, FP64>; + def AXTRA : TernaryRRFa<"axtra", 0xB3DA, FP128, FP128, FP128>; + } +} + +// Subtraction. +let Defs = [CC] in { + def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64, FP64, FP64>; + def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>; + let Predicates = [FeatureFPExtension] in { + def SDTRA : TernaryRRFa<"sdtra", 0xB3D3, FP64, FP64, FP64>; + def SXTRA : TernaryRRFa<"sxtra", 0xB3DB, FP128, FP128, FP128>; + } +} + +// Multiplication. +let isCommutable = 1 in { + def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64, FP64, FP64>; + def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>; +} +let Predicates = [FeatureFPExtension] in { + def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64, FP64, FP64>; + def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>; +} + +// Division. +def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64, FP64, FP64>; +def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>; +let Predicates = [FeatureFPExtension] in { + def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64, FP64, FP64>; + def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>; +} + +// Quantize. +def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64, FP64, FP64>; +def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>; + +// Reround. +def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64, FP64, FP64>; +def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>; + +// Shift significand left/right. +def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64, FP64, null_frag, 0>; +def SLXT : BinaryRXF<"slxt", 0xED48, null_frag, FP128, FP128, null_frag, 0>; +def SRDT : BinaryRXF<"srdt", 0xED41, null_frag, FP64, FP64, null_frag, 0>; +def SRXT : BinaryRXF<"srxt", 0xED49, null_frag, FP128, FP128, null_frag, 0>; + +// Insert biased exponent. +def IEDTR : BinaryRRFb<"iedtr", 0xB3F6, null_frag, FP64, FP64, FP64>; +def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +// Compare. +let Defs = [CC] in { + def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64, FP64>; + def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>; +} + +// Compare and signal. +let Defs = [CC] in { + def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64, FP64>; + def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>; +} + +// Compare biased exponent. +let Defs = [CC] in { + def CEDTR : CompareRRE<"cedtr", 0xB3F4, null_frag, FP64, FP64>; + def CEXTR : CompareRRE<"cextr", 0xB3FC, null_frag, FP128, FP128>; +} + +// Test Data Class. +let Defs = [CC] in { + def TDCET : TestRXE<"tdcet", 0xED50, null_frag, FP32>; + def TDCDT : TestRXE<"tdcdt", 0xED54, null_frag, FP64>; + def TDCXT : TestRXE<"tdcxt", 0xED58, null_frag, FP128>; +} + +// Test Data Group. +let Defs = [CC] in { + def TDGET : TestRXE<"tdget", 0xED51, null_frag, FP32>; + def TDGDT : TestRXE<"tdgdt", 0xED55, null_frag, FP64>; + def TDGXT : TestRXE<"tdgxt", 0xED59, null_frag, FP128>; +} + diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index bb6d27e248287c0e4d196cc381f6d27e7c54d00d..10172bd4520346095d5bcc8978374fde1af47ba9 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -121,7 +121,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; // For z13 we prefer LDE over LE to avoid partial register dependencies. - def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; + let isCodeGenOnly = 1 in + def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; // These instructions are split after register allocation, so we don't // want a custom inserter. @@ -437,18 +438,18 @@ def : Pat<(fmul (f128 (fpextend FP64:$src1)), bdxaddr12only:$addr)>; // Fused multiply-add. -def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>; -def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>; +def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>; +def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>; -def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>; -def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>; +def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>; +def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>; // Fused multiply-subtract. -def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>; -def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>; +def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>; +def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>; -def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>; -def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>; +def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>; +def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>; // Division. def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>; @@ -458,6 +459,12 @@ def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>; def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>; def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>; +// Divide to integer. +let Defs = [CC] in { + def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>; + def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>; +} + //===----------------------------------------------------------------------===// // Comparisons //===----------------------------------------------------------------------===// @@ -469,6 +476,13 @@ let Defs = [CC], CCValues = 0xF in { def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>; def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>; + + def KEBR : CompareRRE<"kebr", 0xB308, null_frag, FP32, FP32>; + def KDBR : CompareRRE<"kdbr", 0xB318, null_frag, FP64, FP64>; + def KXBR : CompareRRE<"kxbr", 0xB348, null_frag, FP128, FP128>; + + def KEB : CompareRXE<"keb", 0xED08, null_frag, FP32, load, 4>; + def KDB : CompareRXE<"kdb", 0xED18, null_frag, FP64, load, 8>; } // Test Data Class. diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index c727f486087e9a9590905fa2aee85190ca20dbc9..5f6115ed86a47e3233bf868bbbdb96fa86855279 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -527,6 +527,22 @@ class InstRRFc op, dag outs, dag ins, string asmstr, list pattern> let Inst{3-0} = R2; } +class InstRRFd op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<4, outs, ins, asmstr, pattern> { + field bits<32> Inst; + field bits<32> SoftFail = 0; + + bits<4> R1; + bits<4> R2; + bits<4> M4; + + let Inst{31-16} = op; + let Inst{15-12} = 0; + let Inst{11-8} = M4; + let Inst{7-4} = R1; + let Inst{3-0} = R2; +} + class InstRRFe op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<4, outs, ins, asmstr, pattern> { field bits<32> Inst; @@ -710,6 +726,37 @@ class InstRSI op, dag outs, dag ins, string asmstr, list pattern> let Inst{15-0} = RI2; } +class InstRSLa op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<20> BDL1; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = BDL1{19-16}; + let Inst{35-32} = 0; + let Inst{31-16} = BDL1{15-0}; + let Inst{15-8} = 0; + let Inst{7-0} = op{7-0}; +} + +class InstRSLb op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<4> R1; + bits<24> BDL2; + bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-16} = BDL2; + let Inst{15-12} = R1; + let Inst{11-8} = M3; + let Inst{7-0} = op{7-0}; +} + class InstRSYa op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -817,6 +864,37 @@ class InstSSa op, dag outs, dag ins, string asmstr, list pattern> let Inst{15-0} = BD2; } +class InstSSb op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<20> BDL1; + bits<20> BDL2; + + let Inst{47-40} = op; + let Inst{39-36} = BDL1{19-16}; + let Inst{35-32} = BDL2{19-16}; + let Inst{31-16} = BDL1{15-0}; + let Inst{15-0} = BDL2{15-0}; +} + +class InstSSc op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<20> BDL1; + bits<16> BD2; + bits<4> I3; + + let Inst{47-40} = op; + let Inst{39-36} = BDL1{19-16}; + let Inst{35-32} = I3; + let Inst{31-16} = BDL1{15-0}; + let Inst{15-0} = BD2; +} + class InstSSd op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -850,6 +928,20 @@ class InstSSe op, dag outs, dag ins, string asmstr, list pattern> let Inst{15-0} = BD4; } +class InstSSf op, dag outs, dag ins, string asmstr, list pattern> + : InstSystemZ<6, outs, ins, asmstr, pattern> { + field bits<48> Inst; + field bits<48> SoftFail = 0; + + bits<16> BD1; + bits<24> BDL2; + + let Inst{47-40} = op; + let Inst{39-32} = BDL2{23-16}; + let Inst{31-16} = BD1; + let Inst{15-0} = BDL2{15-0}; +} + class InstSSE op, dag outs, dag ins, string asmstr, list pattern> : InstSystemZ<6, outs, ins, asmstr, pattern> { field bits<48> Inst; @@ -1567,6 +1659,9 @@ class ICV // Inherent: // One register output operand and no input operands. // +// InherentDual: +// Two register output operands and no input operands. +// // StoreInherent: // One address operand. The instruction stores to the address. // @@ -1642,8 +1737,9 @@ class ICV // Two input operands and an implicit CC output operand. // // Test: -// Two input operands and an implicit CC output operand. The second -// input operand is an "address" operand used as a test class mask. +// One or two input operands and an implicit CC output operand. If +// present, the second input operand is an "address" operand used as +// a test class mask. // // Ternary: // One register output operand and three input operands. @@ -1691,6 +1787,10 @@ class InherentRRE opcode, RegisterOperand cls, let R2 = 0; } +class InherentDualRRE opcode, RegisterOperand cls> + : InstRRE; + class InherentVRIa opcode, bits<16> value> : InstVRIa { let I2 = value; @@ -1714,6 +1814,12 @@ class SideEffectInherentS opcode, let BD2 = 0; } +class SideEffectInherentRRE opcode> + : InstRRE { + let R1 = 0; + let R2 = 0; +} + // Allow an optional TLS marker symbol to generate TLS call relocations. class CallRI opcode> : InstRIb rsOpcode, } } +class LoadMultipleSSe opcode, RegisterOperand cls> + : InstSSe { + let mayLoad = 1; +} + class LoadMultipleVRSa opcode> : InstVRSa { @@ -2355,6 +2468,15 @@ class UnaryRRE opcode, SDPatternOperator operator, let OpType = "reg"; } +class UnaryMemRRFc opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; + let M3 = 0; +} + class UnaryRI opcode, SDPatternOperator operator, RegisterOperand cls, Immediate imm> : InstRIa opcode, : InstIE; +class SideEffectBinarySI opcode, Operand imm> + : InstSI; + class SideEffectBinarySIL opcode, SDPatternOperator operator, Immediate imm> : InstSIL; +class SideEffectBinarySSa opcode> + : InstSSa; + +class SideEffectBinarySSb opcode> + : InstSSb; + +class SideEffectBinarySSf opcode> + : InstSSf; + +class SideEffectBinaryMemMemRR opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; +} + +class SideEffectBinaryMemRRE opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE { + let Constraints = "$R2 = $R2src"; + let DisableEncoding = "$R2src"; +} + +class SideEffectBinaryMemMemRRE opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRE { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; +} + +class SideEffectBinaryMemMemRRFc opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRFc { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; + let M3 = 0; +} + class BinaryRR opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR opcode, SDPatternOperator operator, let DisableEncoding = "$R1src"; } +class BinaryRRD opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRD { + let OpKey = mnemonic#cls; + let OpType = "reg"; +} + class BinaryRRFa opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2, RegisterOperand cls3> @@ -2654,6 +2835,25 @@ class BinaryRRFb opcode, SDPatternOperator operator, let M4 = 0; } +class BinaryMemRRFc opcode, + RegisterOperand cls1, RegisterOperand cls2, Immediate imm> + : InstRRFc { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +multiclass BinaryMemRRFcOpt opcode, + RegisterOperand cls1, RegisterOperand cls2> { + def "" : BinaryMemRRFc; + def Opt : UnaryMemRRFc; +} + +class BinaryRRFd opcode, RegisterOperand cls1, + RegisterOperand cls2> + : InstRRFd; + class BinaryRRFe opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRFe opcode1, bits<16> opcode2, } } +class BinaryRSL opcode, RegisterOperand cls> + : InstRSLb { + let mayLoad = 1; +} + class BinaryRX opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> @@ -2833,6 +3040,18 @@ class BinaryRXE opcode, SDPatternOperator operator, let M3 = 0; } +class BinaryRXF opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2, + SDPatternOperator load, bits<5> bytes> + : InstRXF { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; +} + class BinaryRXY opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr20only> @@ -3112,6 +3331,41 @@ class BinaryVRX opcode, SDPatternOperator operator, let AccessBytes = bytes; } +class StoreBinaryRS opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr12only> + : InstRSb { + let mayStore = 1; + let AccessBytes = bytes; +} + +class StoreBinaryRSY opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSYb { + let mayStore = 1; + let AccessBytes = bytes; +} + +multiclass StoreBinaryRSPair rsOpcode, + bits<16> rsyOpcode, RegisterOperand cls, + bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : StoreBinaryRS; + let DispSize = "20" in + def Y : StoreBinaryRSY; + } +} + +class StoreBinaryRSL opcode, RegisterOperand cls> + : InstRSLb { + let mayStore = 1; +} + class StoreBinaryVRV opcode, bits<5> bytes, Immediate index> : InstVRV rxOpcode, bits<16> rxyOpcode, } } +class CompareRS opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr12only> + : InstRSb { + let mayLoad = 1; + let AccessBytes = bytes; +} + +class CompareRSY opcode, RegisterOperand cls, + bits<5> bytes, AddressingMode mode = bdaddr20only> + : InstRSYb { + let mayLoad = 1; + let AccessBytes = bytes; +} + +multiclass CompareRSPair rsOpcode, bits<16> rsyOpcode, + RegisterOperand cls, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + let DispSize = "12" in + def "" : CompareRS; + let DispSize = "20" in + def Y : CompareRSY; + } +} + +class CompareSSb opcode> + : InstSSb { + let isCompare = 1; + let mayLoad = 1; +} + class CompareSI opcode, SDPatternOperator operator, SDPatternOperator load, Immediate imm, AddressingMode mode = bdaddr12only> @@ -3313,29 +3601,85 @@ class TestRXE opcode, SDPatternOperator operator, let M3 = 0; } +class TestRSL opcode> + : InstRSLa { + let mayLoad = 1; +} + +class SideEffectTernarySSc opcode> + : InstSSc; + +class SideEffectTernaryMemMemMemRRFb opcode, + RegisterOperand cls1, + RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb { + let Constraints = "$R1 = $R1src, $R2 = $R2src, $R3 = $R3src"; + let DisableEncoding = "$R1src, $R2src, $R3src"; + let M4 = 0; +} + class SideEffectTernaryRRFc opcode, RegisterOperand cls1, RegisterOperand cls2, Immediate imm> : InstRRFc; +class SideEffectTernaryMemMemRRFc opcode, + RegisterOperand cls1, RegisterOperand cls2, + Immediate imm> + : InstRRFc { + let Constraints = "$R1 = $R1src, $R2 = $R2src"; + let DisableEncoding = "$R1src, $R2src"; +} + +multiclass SideEffectTernaryMemMemRRFcOpt opcode, + RegisterOperand cls1, + RegisterOperand cls2> { + def "" : SideEffectTernaryMemMemRRFc; + def Opt : SideEffectBinaryMemMemRRFc; +} + class SideEffectTernarySSF opcode, RegisterOperand cls> : InstSSF; +class TernaryRRFa opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFa; + +class TernaryRRFb opcode, + RegisterOperand cls1, RegisterOperand cls2, + RegisterOperand cls3> + : InstRRFb { + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + class TernaryRRFe opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRRFe; -class TernaryRRD opcode, - SDPatternOperator operator, RegisterOperand cls> - : InstRRD opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRRD { + [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, cls2:$R2))]> { let OpKey = mnemonic#cls; let OpType = "reg"; let Constraints = "$R1 = $R1src"; @@ -3376,13 +3720,32 @@ multiclass TernaryRSPair rsOpcode, bits<16> rsyOpcode, } } +class SideEffectTernaryMemMemRS opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSa { + let Constraints = "$R1 = $R1src, $R3 = $R3src"; + let DisableEncoding = "$R1src, $R3src"; +} + +class SideEffectTernaryMemMemRSY opcode, + RegisterOperand cls1, RegisterOperand cls2> + : InstRSYa { + let Constraints = "$R1 = $R1src, $R3 = $R3src"; + let DisableEncoding = "$R1src, $R3src"; +} + class TernaryRXF opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, bits<5> bytes> - : InstRXF bytes> + : InstRXF { + [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, + (load bdxaddr12only:$XBD2)))]> { let OpKey = mnemonic#"r"#cls; let OpType = "mem"; let Constraints = "$R1 = $R1src"; @@ -3981,9 +4344,7 @@ class AtomicLoadWBinaryImm // another instruction to handle the excess. multiclass MemorySS opcode, SDPatternOperator sequence, SDPatternOperator loop> { - def "" : InstSSa; + def "" : SideEffectBinarySSa; let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, imm64:$length), @@ -4003,13 +4364,8 @@ multiclass MemorySS opcode, // the full loop (the main instruction plus the branch on CC==3). multiclass StringRRE opcode, SDPatternOperator operator> { - def "" : InstRRE { - let Uses = [R0L]; - let Constraints = "$R1 = $R1src, $R2 = $R2src"; - let DisableEncoding = "$R1src, $R2src"; - } + let Uses = [R0L] in + def "" : SideEffectBinaryMemMemRRE; let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in def Loop : Pseudo<(outs GR64:$end), (ins GR64:$start1, GR64:$start2, GR32:$char), diff --git a/lib/Target/SystemZ/SystemZInstrHFP.td b/lib/Target/SystemZ/SystemZInstrHFP.td new file mode 100644 index 0000000000000000000000000000000000000000..6d5b4b92f6508630c09e45b9adb4f9c5b6c006ef --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrHFP.td @@ -0,0 +1,240 @@ +//==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The instructions in this file implement SystemZ hexadecimal floating-point +// arithmetic. Since this format is not mapped to any source-language data +// type, these instructions are not used for code generation, but are provided +// for use with the assembler and disassembler only. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Move instructions +//===----------------------------------------------------------------------===// + +// Load and test. +let Defs = [CC] in { + def LTER : UnaryRR <"lter", 0x32, null_frag, FP32, FP32>; + def LTDR : UnaryRR <"ltdr", 0x22, null_frag, FP64, FP64>; + def LTXR : UnaryRRE<"ltxr", 0xB362, null_frag, FP128, FP128>; +} + +//===----------------------------------------------------------------------===// +// Conversion instructions +//===----------------------------------------------------------------------===// + +// Convert floating-point values to narrower representations. +def LEDR : UnaryRR <"ledr", 0x35, null_frag, FP32, FP64>; +def LEXR : UnaryRRE<"lexr", 0xB366, null_frag, FP32, FP128>; +def LDXR : UnaryRR <"ldxr", 0x25, null_frag, FP64, FP128>; +let isAsmParserOnly = 1 in { + def LRER : UnaryRR <"lrer", 0x35, null_frag, FP32, FP64>; + def LRDR : UnaryRR <"lrdr", 0x25, null_frag, FP64, FP128>; +} + +// Extend floating-point values to wider representations. +def LDER : UnaryRRE<"lder", 0xB324, null_frag, FP64, FP32>; +def LXER : UnaryRRE<"lxer", 0xB326, null_frag, FP128, FP32>; +def LXDR : UnaryRRE<"lxdr", 0xB325, null_frag, FP128, FP64>; + +def LDE : UnaryRXE<"lde", 0xED24, null_frag, FP64, 4>; +def LXE : UnaryRXE<"lxe", 0xED26, null_frag, FP128, 4>; +def LXD : UnaryRXE<"lxd", 0xED25, null_frag, FP128, 8>; + +// Convert a signed integer register value to a floating-point one. +def CEFR : UnaryRRE<"cefr", 0xB3B4, null_frag, FP32, GR32>; +def CDFR : UnaryRRE<"cdfr", 0xB3B5, null_frag, FP64, GR32>; +def CXFR : UnaryRRE<"cxfr", 0xB3B6, null_frag, FP128, GR32>; + +def CEGR : UnaryRRE<"cegr", 0xB3C4, null_frag, FP32, GR64>; +def CDGR : UnaryRRE<"cdgr", 0xB3C5, null_frag, FP64, GR64>; +def CXGR : UnaryRRE<"cxgr", 0xB3C6, null_frag, FP128, GR64>; + +// Convert a floating-point register value to a signed integer value, +// with the second operand (modifier M3) specifying the rounding mode. +let Defs = [CC] in { + def CFER : BinaryRRFe<"cfer", 0xB3B8, GR32, FP32>; + def CFDR : BinaryRRFe<"cfdr", 0xB3B9, GR32, FP64>; + def CFXR : BinaryRRFe<"cfxr", 0xB3BA, GR32, FP128>; + + def CGER : BinaryRRFe<"cger", 0xB3C8, GR64, FP32>; + def CGDR : BinaryRRFe<"cgdr", 0xB3C9, GR64, FP64>; + def CGXR : BinaryRRFe<"cgxr", 0xB3CA, GR64, FP128>; +} + +// Convert BFP to HFP. +let Defs = [CC] in { + def THDER : UnaryRRE<"thder", 0xB358, null_frag, FP64, FP32>; + def THDR : UnaryRRE<"thdr", 0xB359, null_frag, FP64, FP64>; +} + +// Convert HFP to BFP. +let Defs = [CC] in { + def TBEDR : BinaryRRFe<"tbedr", 0xB350, FP32, FP64>; + def TBDR : BinaryRRFe<"tbdr", 0xB351, FP64, FP64>; +} + + +//===----------------------------------------------------------------------===// +// Unary arithmetic +//===----------------------------------------------------------------------===// + +// Negation (Load Complement). +let Defs = [CC] in { + def LCER : UnaryRR <"lcer", 0x33, null_frag, FP32, FP32>; + def LCDR : UnaryRR <"lcdr", 0x23, null_frag, FP64, FP64>; + def LCXR : UnaryRRE<"lcxr", 0xB363, null_frag, FP128, FP128>; +} + +// Absolute value (Load Positive). +let Defs = [CC] in { + def LPER : UnaryRR <"lper", 0x30, null_frag, FP32, FP32>; + def LPDR : UnaryRR <"lpdr", 0x20, null_frag, FP64, FP64>; + def LPXR : UnaryRRE<"lpxr", 0xB360, null_frag, FP128, FP128>; +} + +// Negative absolute value (Load Negative). +let Defs = [CC] in { + def LNER : UnaryRR <"lner", 0x31, null_frag, FP32, FP32>; + def LNDR : UnaryRR <"lndr", 0x21, null_frag, FP64, FP64>; + def LNXR : UnaryRRE<"lnxr", 0xB361, null_frag, FP128, FP128>; +} + +// Halve. +def HER : UnaryRR <"her", 0x34, null_frag, FP32, FP32>; +def HDR : UnaryRR <"hdr", 0x24, null_frag, FP64, FP64>; + +// Square root. +def SQER : UnaryRRE<"sqer", 0xB245, null_frag, FP32, FP32>; +def SQDR : UnaryRRE<"sqdr", 0xB244, null_frag, FP64, FP64>; +def SQXR : UnaryRRE<"sqxr", 0xB336, null_frag, FP128, FP128>; + +def SQE : UnaryRXE<"sqe", 0xED34, null_frag, FP32, 4>; +def SQD : UnaryRXE<"sqd", 0xED35, null_frag, FP64, 8>; + +// Round to an integer (rounding towards zero). +def FIER : UnaryRRE<"fier", 0xB377, null_frag, FP32, FP32>; +def FIDR : UnaryRRE<"fidr", 0xB37F, null_frag, FP64, FP64>; +def FIXR : UnaryRRE<"fixr", 0xB367, null_frag, FP128, FP128>; + + +//===----------------------------------------------------------------------===// +// Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition. +let Defs = [CC] in { + let isCommutable = 1 in { + def AER : BinaryRR<"aer", 0x3A, null_frag, FP32, FP32>; + def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64, FP64>; + def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>; + } + def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, load, 4>; + def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, load, 8>; +} + +// Addition (unnormalized). +let Defs = [CC] in { + let isCommutable = 1 in { + def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>; + def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>; + } + def AU : BinaryRX<"au", 0x7E, null_frag, FP32, load, 4>; + def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, load, 8>; +} + +// Subtraction. +let Defs = [CC] in { + def SER : BinaryRR<"ser", 0x3B, null_frag, FP32, FP32>; + def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64, FP64>; + def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>; + + def SE : BinaryRX<"se", 0x7B, null_frag, FP32, load, 4>; + def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, load, 8>; +} + +// Subtraction (unnormalized). +let Defs = [CC] in { + def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>; + def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>; + + def SU : BinaryRX<"su", 0x7F, null_frag, FP32, load, 4>; + def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, load, 8>; +} + +// Multiplication. +let isCommutable = 1 in { + def MEER : BinaryRRE<"meer", 0xB337, null_frag, FP32, FP32>; + def MDR : BinaryRR <"mdr", 0x2C, null_frag, FP64, FP64>; + def MXR : BinaryRR <"mxr", 0x26, null_frag, FP128, FP128>; +} +def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, load, 4>; +def MD : BinaryRX <"md", 0x6C, null_frag, FP64, load, 8>; + +// Extending multiplication (f32 x f32 -> f64). +def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>; +def MDE : BinaryRX<"mde", 0x7C, null_frag, FP64, load, 4>; +let isAsmParserOnly = 1 in { + def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>; + def ME : BinaryRX<"me", 0x7C, null_frag, FP64, load, 4>; +} + +// Extending multiplication (f64 x f64 -> f128). +def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>; +def MXD : BinaryRX<"mxd", 0x67, null_frag, FP128, load, 8>; + +// Fused multiply-add. +def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>; +def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>; +def MAE : TernaryRXF<"mae", 0xED2E, null_frag, FP32, FP32, load, 4>; +def MAD : TernaryRXF<"mad", 0xED3E, null_frag, FP64, FP64, load, 8>; + +// Fused multiply-subtract. +def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>; +def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>; +def MSE : TernaryRXF<"mse", 0xED2F, null_frag, FP32, FP32, load, 4>; +def MSD : TernaryRXF<"msd", 0xED3F, null_frag, FP64, FP64, load, 8>; + +// Multiplication (unnormalized). +def MYR : BinaryRRD<"myr", 0xB33B, null_frag, FP128, FP64>; +def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64, FP64>; +def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64, FP64>; +def MY : BinaryRXF<"my", 0xED3B, null_frag, FP128, FP64, load, 8>; +def MYH : BinaryRXF<"myh", 0xED3D, null_frag, FP64, FP64, load, 8>; +def MYL : BinaryRXF<"myl", 0xED39, null_frag, FP64, FP64, load, 8>; + +// Fused multiply-add (unnormalized). +def MAYR : TernaryRRD<"mayr", 0xB33A, null_frag, FP128, FP64>; +def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64, FP64>; +def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64, FP64>; +def MAY : TernaryRXF<"may", 0xED3A, null_frag, FP128, FP64, load, 8>; +def MAYH : TernaryRXF<"mayh", 0xED3C, null_frag, FP64, FP64, load, 8>; +def MAYL : TernaryRXF<"mayl", 0xED38, null_frag, FP64, FP64, load, 8>; + +// Division. +def DER : BinaryRR <"der", 0x3D, null_frag, FP32, FP32>; +def DDR : BinaryRR <"ddr", 0x2D, null_frag, FP64, FP64>; +def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>; +def DE : BinaryRX <"de", 0x7D, null_frag, FP32, load, 4>; +def DD : BinaryRX <"dd", 0x6D, null_frag, FP64, load, 8>; + + +//===----------------------------------------------------------------------===// +// Comparisons +//===----------------------------------------------------------------------===// + +let Defs = [CC] in { + def CER : CompareRR <"cer", 0x39, null_frag, FP32, FP32>; + def CDR : CompareRR <"cdr", 0x29, null_frag, FP64, FP64>; + def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>; + + def CE : CompareRX<"ce", 0x79, null_frag, FP32, load, 4>; + def CD : CompareRX<"cd", 0x69, null_frag, FP64, load, 8>; +} + diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index c8ff9558cc8826b1f5b5962b33e04e20c03cbb4d..66a5ff12be464fd29227f9a0294af00572278248 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "SystemZInstrInfo.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZInstrBuilder.h" -#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -104,8 +104,9 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI, MachineOperand &LowOffsetOp = MI->getOperand(2); LowOffsetOp.setImm(LowOffsetOp.getImm() + 8); - // Clear the kill flags for the base and index registers in the first - // instruction. + // Clear the kill flags on the registers in the first instruction. + if (EarlierMI->getOperand(0).isReg() && EarlierMI->getOperand(0).isUse()) + EarlierMI->getOperand(0).setIsKill(false); EarlierMI->getOperand(1).setIsKill(false); EarlierMI->getOperand(3).setIsKill(false); @@ -235,32 +236,30 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); - const unsigned Reg = MI->getOperand(0).getReg(); + const unsigned Reg64 = MI->getOperand(0).getReg(); + const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); - // Conveniently, all 4 instructions are cloned from LOAD_STACK_GUARD, - // so they already have operand 0 set to reg. + // EAR can only load the low subregister so us a shift for %a0 to produce + // the GR containing %a0 and %a1. // ear , %a0 - MachineInstr *Ear1MI = MF.CloneMachineInstr(MI); - MBB->insert(MI, Ear1MI); - Ear1MI->setDesc(get(SystemZ::EAR)); - MachineInstrBuilder(MF, Ear1MI).addReg(SystemZ::A0); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A0) + .addReg(Reg64, RegState::ImplicitDefine); // sllg , , 32 - MachineInstr *SllgMI = MF.CloneMachineInstr(MI); - MBB->insert(MI, SllgMI); - SllgMI->setDesc(get(SystemZ::SLLG)); - MachineInstrBuilder(MF, SllgMI).addReg(Reg).addReg(0).addImm(32); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::SLLG), Reg64) + .addReg(Reg64) + .addReg(0) + .addImm(32); // ear , %a1 - MachineInstr *Ear2MI = MF.CloneMachineInstr(MI); - MBB->insert(MI, Ear2MI); - Ear2MI->setDesc(get(SystemZ::EAR)); - MachineInstrBuilder(MF, Ear2MI).addReg(SystemZ::A1); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A1); // lg , 40() MI->setDesc(get(SystemZ::LG)); - MachineInstrBuilder(MF, MI).addReg(Reg).addImm(40).addReg(0); + MachineInstrBuilder(MF, MI).addReg(Reg64).addImm(40).addReg(0); } // Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR @@ -849,12 +848,18 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - // Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too. + // Split 128-bit GPR moves into two 64-bit moves. Add implicit uses of the + // super register in case one of the subregs is undefined. + // This handles ADDR128 too. if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) { copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64), RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc); + MachineInstrBuilder(*MBB.getParent(), std::prev(MBBI)) + .addReg(SrcReg, RegState::Implicit); copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64), RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc); + MachineInstrBuilder(*MBB.getParent(), std::prev(MBBI)) + .addReg(SrcReg, (getKillRegState(KillSrc) | RegState::Implicit)); return; } @@ -1114,10 +1119,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( return nullptr; unsigned OpNum = Ops[0]; - assert(Size == - MF.getRegInfo() - .getRegClass(MI.getOperand(OpNum).getReg()) - ->getSize() && + assert(Size * 8 == + TRI->getRegSizeInBits(*MF.getRegInfo() + .getRegClass(MI.getOperand(OpNum).getReg())) && "Invalid size combination"); if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 && diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index d63525f29412322597cf44bee2ab7c494b0fa927..fa5ecdd8524333e87ab42253a0d0661e872316b7 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// let hasNoSchedulingInfo = 1 in { - def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt), - [(callseq_start timm:$amt)]>; + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -464,6 +464,11 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>; // Memory-to-memory moves. let mayLoad = 1, mayStore = 1 in defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>; +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + def MVCL : SideEffectBinaryMemMemRR<"mvcl", 0x0E, GR128, GR128>; + def MVCLE : SideEffectTernaryMemMemRS<"mvcle", 0xA8, GR128, GR128>; + def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>; +} // String moves. let mayLoad = 1, mayStore = 1, Defs = [CC] in @@ -707,6 +712,10 @@ def : StoreGR64PC; defm : StoreGR64Pair; def : StoreGR64PC; +// Store characters under mask -- not (yet) used for codegen. +defm STCM : StoreBinaryRSPair<"stcm", 0xBE, 0xEB2D, GR32, 0>; +def STCMH : StoreBinaryRSY<"stcmh", 0xEB2C, GRH32, 0>; + //===----------------------------------------------------------------------===// // Multi-register moves //===----------------------------------------------------------------------===// @@ -715,6 +724,7 @@ def : StoreGR64PC; defm LM : LoadMultipleRSPair<"lm", 0x98, 0xEB98, GR32>; def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>; def LMH : LoadMultipleRSY<"lmh", 0xEB96, GRH32>; +def LMD : LoadMultipleSSe<"lmd", 0xEF, GR64>; // Multi-register stores. defm STM : StoreMultipleRSPair<"stm", 0x90, 0xEB90, GR32>; @@ -742,6 +752,10 @@ def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>; def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>; def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>; +// Byte-swapping memory-to-memory moves. +let mayLoad = 1, mayStore = 1 in + def MVCIN : SideEffectBinarySSa<"mvcin", 0xE8>; + //===----------------------------------------------------------------------===// // Load address instructions //===----------------------------------------------------------------------===// @@ -816,6 +830,7 @@ defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>; defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>; defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>; +// Insert characters under mask -- not (yet) used for codegen. let Defs = [CC] in { defm ICM : TernaryRSPair<"icm", 0xBF, 0xEB81, GR32, 0>; def ICMH : TernaryRSY<"icmh", 0xEB80, GRH32, 0>; @@ -919,6 +934,10 @@ let Defs = [CC] in { defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>; def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>; def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>; + + // Addition to memory. + def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>; + def ALGSI : BinarySIY<"algsi", 0xEB7E, null_frag, imm64sx8>; } defm : ZXB; @@ -1166,9 +1185,14 @@ def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>; def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of a register, producing two results. +def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; +def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; // Multiplication of memory, producing two results. +def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>; +def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>; +def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; //===----------------------------------------------------------------------===// @@ -1177,12 +1201,14 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; let hasSideEffects = 1 in { // Do not speculatively execute. // Division and remainder, from registers. + def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; // Division and remainder, from memory. + def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; @@ -1193,23 +1219,32 @@ let hasSideEffects = 1 in { // Do not speculatively execute. // Shifts //===----------------------------------------------------------------------===// -// Shift left. +// Logical shift left. let hasSideEffects = 0 in { defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; - defm SLA : BinaryRSAndK<"sla", 0x8B, 0xEBDD, null_frag, GR32>; def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; + def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>; +} + +// Arithmetic shift left. +let Defs = [CC] in { + defm SLA : BinaryRSAndK<"sla", 0x8B, 0xEBDD, null_frag, GR32>; + def SLAG : BinaryRSY<"slag", 0xEB0B, null_frag, GR64>; + def SLDA : BinaryRS<"slda", 0x8F, null_frag, GR128>; } // Logical shift right. let hasSideEffects = 0 in { defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; + def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>; } // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; + def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>; } // Rotate left. @@ -1351,8 +1386,12 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in { defm : ZXB; // Memory-to-memory comparison. -let mayLoad = 1, Defs = [CC] in +let mayLoad = 1, Defs = [CC] in { defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>; + def CLCL : SideEffectBinaryMemMemRR<"clcl", 0x0F, GR128, GR128>; + def CLCLE : SideEffectTernaryMemMemRS<"clcle", 0xA9, GR128, GR128>; + def CLCLU : SideEffectTernaryMemMemRSY<"clclu", 0xEB8F, GR128, GR128>; +} // String comparison. let mayLoad = 1, Defs = [CC] in @@ -1381,6 +1420,12 @@ let Defs = [CC] in { def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>; def TMH : InstAlias<"tmh\t$R, $I", (TMLH GR32:$R, imm32lh16:$I), 0>; +// Compare logical characters under mask -- not (yet) used for codegen. +let Defs = [CC] in { + defm CLM : CompareRSPair<"clm", 0xBD, 0xEB21, GR32, 0>; + def CLMH : CompareRSY<"clmh", 0xEB20, GRH32, 0>; +} + //===----------------------------------------------------------------------===// // Prefetch and execution hint //===----------------------------------------------------------------------===// @@ -1580,6 +1625,115 @@ let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in { def LPDG : BinarySSF<"lpdg", 0xC85, GR128>; } +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +let mayLoad = 1, mayStore = 1 in + def TR : SideEffectBinarySSa<"tr", 0xDC>; + +let mayLoad = 1, Defs = [CC, R0L, R1D] in { + def TRT : SideEffectBinarySSa<"trt", 0xDD>; + def TRTR : SideEffectBinarySSa<"trtr", 0xD0>; +} + +let mayLoad = 1, mayStore = 1, Uses = [R0L] in + def TRE : SideEffectBinaryMemMemRRE<"tre", 0xB2A5, GR128, GR64>; + +let mayLoad = 1, Uses = [R1D], Defs = [CC] in { + defm TRTE : BinaryMemRRFcOpt<"trte", 0xB9BF, GR128, GR64>; + defm TRTRE : BinaryMemRRFcOpt<"trtre", 0xB9BD, GR128, GR64>; +} + +let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { + defm TROO : SideEffectTernaryMemMemRRFcOpt<"troo", 0xB993, GR128, GR64>; + defm TROT : SideEffectTernaryMemMemRRFcOpt<"trot", 0xB992, GR128, GR64>; + defm TRTO : SideEffectTernaryMemMemRRFcOpt<"trto", 0xB991, GR128, GR64>; + defm TRTT : SideEffectTernaryMemMemRRFcOpt<"trtt", 0xB990, GR128, GR64>; +} + +let mayLoad = 1, mayStore = 1, Defs = [CC] in { + defm CU12 : SideEffectTernaryMemMemRRFcOpt<"cu12", 0xB2A7, GR128, GR128>; + defm CU14 : SideEffectTernaryMemMemRRFcOpt<"cu14", 0xB9B0, GR128, GR128>; + defm CU21 : SideEffectTernaryMemMemRRFcOpt<"cu21", 0xB2A6, GR128, GR128>; + defm CU24 : SideEffectTernaryMemMemRRFcOpt<"cu24", 0xB9B1, GR128, GR128>; + def CU41 : SideEffectBinaryMemMemRRE<"cu41", 0xB9B2, GR128, GR128>; + def CU42 : SideEffectBinaryMemMemRRE<"cu42", 0xB9B3, GR128, GR128>; + + let isAsmParserOnly = 1 in { + defm CUUTF : SideEffectTernaryMemMemRRFcOpt<"cuutf", 0xB2A6, GR128, GR128>; + defm CUTFU : SideEffectTernaryMemMemRRFcOpt<"cutfu", 0xB2A7, GR128, GR128>; + } +} + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in { + def KM : SideEffectBinaryMemMemRRE<"km", 0xB92E, GR128, GR128>; + def KMC : SideEffectBinaryMemMemRRE<"kmc", 0xB92F, GR128, GR128>; + + def KIMD : SideEffectBinaryMemRRE<"kimd", 0xB93E, GR64, GR128>; + def KLMD : SideEffectBinaryMemRRE<"klmd", 0xB93F, GR64, GR128>; + def KMAC : SideEffectBinaryMemRRE<"kmac", 0xB91E, GR64, GR128>; + + let Predicates = [FeatureMessageSecurityAssist4] in { + def KMF : SideEffectBinaryMemMemRRE<"kmf", 0xB92A, GR128, GR128>; + def KMO : SideEffectBinaryMemMemRRE<"kmo", 0xB92B, GR128, GR128>; + def KMCTR : SideEffectTernaryMemMemMemRRFb<"kmctr", 0xB92D, + GR128, GR128, GR128>; + def PCC : SideEffectInherentRRE<"pcc", 0xB92C>; + } + let Predicates = [FeatureMessageSecurityAssist5] in + def PPNO : SideEffectBinaryMemMemRRE<"ppno", 0xB93C, GR128, GR128>; +} + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +defm CVB : BinaryRXPair<"cvb",0x4F, 0xE306, null_frag, GR32, load, 4>; +def CVBG : BinaryRXY<"cvbg", 0xE30E, null_frag, GR64, load, 8>; + +defm CVD : StoreRXPair<"cvd", 0x4E, 0xE326, null_frag, GR32, 4>; +def CVDG : StoreRXY<"cvdg", 0xE32E, null_frag, GR64, 8>; + +let mayLoad = 1, mayStore = 1 in { + def MVN : SideEffectBinarySSa<"mvn", 0xD1>; + def MVZ : SideEffectBinarySSa<"mvz", 0xD3>; + def MVO : SideEffectBinarySSb<"mvo", 0xF1>; + + def PACK : SideEffectBinarySSb<"pack", 0xF2>; + def PKA : SideEffectBinarySSf<"pka", 0xE9>; + def PKU : SideEffectBinarySSf<"pku", 0xE1>; + def UNPK : SideEffectBinarySSb<"unpk", 0xF3>; + let Defs = [CC] in { + def UNPKA : SideEffectBinarySSa<"unpka", 0xEA>; + def UNPKU : SideEffectBinarySSa<"unpku", 0xE2>; + } +} + +let mayLoad = 1, mayStore = 1 in { + let Defs = [CC] in { + def AP : SideEffectBinarySSb<"ap", 0xFA>; + def SP : SideEffectBinarySSb<"sp", 0xFB>; + def ZAP : SideEffectBinarySSb<"zap", 0xF8>; + def SRP : SideEffectTernarySSc<"srp", 0xF0>; + } + def MP : SideEffectBinarySSb<"mp", 0xFC>; + def DP : SideEffectBinarySSb<"dp", 0xFD>; + let Defs = [CC] in { + def ED : SideEffectBinarySSa<"ed", 0xDE>; + def EDMK : SideEffectBinarySSa<"edmk", 0xDF>; + } +} + +let Defs = [CC] in { + def CP : CompareSSb<"cp", 0xF9>; + def TP : TestRSL<"tp", 0xEBC0>; +} + //===----------------------------------------------------------------------===// // Access registers //===----------------------------------------------------------------------===// @@ -1712,12 +1866,39 @@ let usesCustomInserter = 1 in { // Search a block of memory for a character. let mayLoad = 1, Defs = [CC] in - defm SRST : StringRRE<"srst", 0xb25e, z_search_string>; + defm SRST : StringRRE<"srst", 0xB25E, z_search_string>; +let mayLoad = 1, Defs = [CC], Uses = [R0L] in + def SRSTU : SideEffectBinaryMemMemRRE<"srstu", 0xB9BE, GR64, GR64>; + +// Compare until substring equal. +let mayLoad = 1, Defs = [CC], Uses = [R0L, R1L] in + def CUSE : SideEffectBinaryMemMemRRE<"cuse", 0xB257, GR128, GR128>; + +// Compare and form codeword. +let mayLoad = 1, Defs = [CC, R1D, R2D, R3D], Uses = [R1D, R2D, R3D] in + def CFC : SideEffectAddressS<"cfc", 0xB21A, null_frag>; + +// Update tree. +let mayLoad = 1, mayStore = 1, Defs = [CC, R0D, R1D, R2D, R3D, R5D], + Uses = [R0D, R1D, R2D, R3D, R4D, R5D] in + def UPT : SideEffectInherentE<"upt", 0x0102>; + +// Checksum. +let mayLoad = 1, Defs = [CC] in + def CKSM : SideEffectBinaryMemMemRRE<"cksm", 0xB241, GR64, GR128>; + +// Compression call. +let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in + def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>; // Supervisor call. let hasSideEffects = 1, isCall = 1, Defs = [CC] in def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>; +// Monitor call. +let hasSideEffects = 1, isCall = 1 in + def MC : SideEffectBinarySI<"mc", 0xAF, imm32zx8>; + // Store clock. let hasSideEffects = 1, Defs = [CC] in { def STCK : StoreInherentS<"stck", 0xB205, null_frag, 8>; @@ -1729,10 +1910,18 @@ let hasSideEffects = 1, Defs = [CC] in { let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>; +// Extract CPU attribute. +let hasSideEffects = 1 in + def ECAG : BinaryRSY<"ecag", 0xEB4C, null_frag, GR64>; + // Extract CPU time. let Defs = [R0D, R1D], hasSideEffects = 1, mayLoad = 1 in def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>; +// Extract PSW. +let hasSideEffects = 1, Uses = [CC] in + def EPSW : InherentDualRRE<"epsw", 0xB98D, GR32>; + // Execute. let hasSideEffects = 1 in { def EX : SideEffectBinaryRX<"ex", 0x44, GR64>; diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index ec8ce6e911faeb8ef597a603a8e30a47ca95245a..3a0e01da42f034124fb16abf0fb08eeaae10bce9 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZTargetMachine.h" #include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 7bb4fe5afb3f45d2afe505dacbedd6a0c4ef3738..713612129d90c31c44e712e66d0911efe6b4df01 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -531,6 +531,7 @@ def BDAddr64Disp12 : AddressAsmOperand<"BDAddr", "64", "12">; def BDAddr64Disp20 : AddressAsmOperand<"BDAddr", "64", "20">; def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; +def BDLAddr64Disp12Len4 : AddressAsmOperand<"BDLAddr", "64", "12", "Len4">; def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; def BDRAddr64Disp12 : AddressAsmOperand<"BDRAddr", "64", "12">; def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">; @@ -578,6 +579,7 @@ def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">; def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">; def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; +def bdladdr12onlylen4 : BDLMode<"BDLAddr", "64", "12", "Only", "4">; def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; def bdraddr12only : BDRMode<"BDRAddr", "64", "12", "Only">; def bdvaddr12only : BDVMode< "64", "12">; diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index fde26ed4e1c5510323bf47676a049e3c35351e8e..adfc69c5d4cf4f8291cf8f337890f9da78363248 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -10,7 +10,8 @@ //===----------------------------------------------------------------------===// // Type profiles //===----------------------------------------------------------------------===// -def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>]>; +def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i64>, + SDTCisVT<1, i64>]>; def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>, SDTCisVT<1, i64>]>; def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 6ef8000d6f4311ceb525ec28dca0cca20f631a6d..d14a0fb0b0b2b7d6ab956e07dcf3436e8c68bb76 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" +#include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/SystemZ/SystemZSchedule.td b/lib/Target/SystemZ/SystemZSchedule.td index dbba8ab42b5a54679e59f8b5e5259df67bfdc91a..1ce0168f95e95725d7828747d9e4cba85dac6306 100644 --- a/lib/Target/SystemZ/SystemZSchedule.td +++ b/lib/Target/SystemZ/SystemZSchedule.td @@ -56,12 +56,16 @@ def LSU_lat1 : SchedWrite; // Floating point unit (zEC12 and earlier) def FPU : SchedWrite; def FPU2 : SchedWrite; +def DFU : SchedWrite; +def DFU2 : SchedWrite; // Vector sub units (z13) def VecBF : SchedWrite; def VecBF2 : SchedWrite; def VecDF : SchedWrite; def VecDF2 : SchedWrite; +def VecDFX : SchedWrite; +def VecDFX2 : SchedWrite; def VecFPd : SchedWrite; // Blocking BFP div/sqrt unit. def VecMul : SchedWrite; def VecStr : SchedWrite; diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td index 7aee6f52e9a7d5eb52759d7ebd15184b19aff06f..5f5f2f690e58a6497bbe816f74ed17cd49ab5cd6 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -76,6 +76,8 @@ def : WriteRes { let Latency = 8; } def : WriteRes { let Latency = 9; } def : WriteRes { let Latency = 8; } def : WriteRes { let Latency = 9; } +def : WriteRes { let Latency = 1; } +def : WriteRes { let Latency = 2; } def : WriteRes; // Move character def : InstRW<[FXb, LSU, LSU, LSU, Lat8, GroupAlone], (instregex "MVC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>; // Pseudo -> reg move def : InstRW<[FXa], (instregex "COPY(_TO_REGCLASS)?$")>; @@ -268,6 +271,7 @@ def : InstRW<[FXb, LSU, Lat5], (instregex "LLG(F|T)?AT$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "STCM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Multi-register moves @@ -277,6 +281,9 @@ def : InstRW<[FXb, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; +// Load multiple disjoint +def : InstRW<[FXb, Lat30, GroupAlone], (instregex "LMD$")>; + // Store multiple (estimated average of ceil(5/2) FXb ops) def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10, GroupAlone], (instregex "STM(G|H|Y)?$")>; @@ -288,6 +295,7 @@ def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10, def : InstRW<[FXa], (instregex "LRV(G)?R$")>; def : InstRW<[FXa, LSU, Lat5], (instregex "LRV(G|H)?$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "STRV(G|H)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>; //===----------------------------------------------------------------------===// // Load address instructions @@ -345,7 +353,7 @@ def : InstRW<[FXa], (instregex "ALGF(I|R)$")>; def : InstRW<[FXa], (instregex "ALGR(K)?$")>; def : InstRW<[FXa], (instregex "ALR(K)?$")>; def : InstRW<[FXa], (instregex "AR(K)?$")>; -def : InstRW<[FXb, LSU, Lat5], (instregex "A(G)?SI$")>; +def : InstRW<[FXb, LSU, Lat5], (instregex "A(L)?(G)?SI$")>; // Logical addition with carry def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "ALC(G)?$")>; @@ -438,11 +446,15 @@ def : InstRW<[FXa, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXa, Lat5], (instregex "MGHI$")>; def : InstRW<[FXa, Lat5], (instregex "MHI$")>; def : InstRW<[FXa, LSU, Lat9], (instregex "MH(Y)?$")>; +def : InstRW<[FXa, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder //===----------------------------------------------------------------------===// +def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>; +def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>; def : InstRW<[FXa, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; def : InstRW<[LSU, FXa, Lat30, GroupAlone], (instregex "DSG(F)?$")>; def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>; @@ -456,7 +468,8 @@ def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>; def : InstRW<[FXa], (instregex "SLL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRA(G|K)?$")>; -def : InstRW<[FXa], (instregex "SLA(K)?$")>; +def : InstRW<[FXa], (instregex "SLA(G|K)?$")>; +def : InstRW<[FXa, FXa, FXa, FXa, Lat8], (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -505,7 +518,7 @@ def : InstRW<[FXb, Lat2], (instregex "CGFR$")>; // Compare logical character def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "CLC$")>; - +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>; // Test under mask @@ -516,6 +529,9 @@ def : InstRW<[FXb], (instregex "TMHL(64)?$")>; def : InstRW<[FXb], (instregex "TMLH(64)?$")>; def : InstRW<[FXb], (instregex "TMLL(64)?$")>; +// Compare logical characters under mask +def : InstRW<[FXb, LSU, Lat5], (instregex "CLM(H|Y)?$")>; + //===----------------------------------------------------------------------===// // Prefetch and execution hint //===----------------------------------------------------------------------===// @@ -562,6 +578,42 @@ def : InstRW<[FXb, FXb, LSU, Lat6, GroupAlone], (instregex "STPQ$")>; // Load pair disjoint def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; +def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[FXa, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; +def : InstRW<[FXb, VecDF, FXb, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; + +def : InstRW<[FXb, VecDFX, LSU, LSU, Lat9, GroupAlone], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[FXb, VecDFX2, LSU, LSU, Lat30, GroupAlone], + (instregex "(M|D)P$")>; +def : InstRW<[FXb, FXb, VecDFX2, LSU, LSU, LSU, Lat15, GroupAlone], + (instregex "SRP$")>; +def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>; +def : InstRW<[VecDFX, LSU, Lat4, GroupAlone], (instregex "TP$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; + //===----------------------------------------------------------------------===// // Access registers //===----------------------------------------------------------------------===// @@ -640,13 +692,30 @@ def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>; // String instructions def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>; +def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; // Move with key def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVCK$")>; +// Monitor call +def : InstRW<[FXb], (instregex "MC$")>; + +// Extract CPU attribute +def : InstRW<[FXb, Lat30], (instregex "ECAG$")>; + // Extract CPU Time def : InstRW<[FXa, Lat5, LSU], (instregex "ECTG$")>; +// Extract PSW +def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; + // Execute def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>; @@ -811,14 +880,17 @@ def : InstRW<[VecFPd, LSU], (instregex "D(E|D)B$")>; def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>; def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>; +// Divide to integer +def : InstRW<[VecFPd, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; + //===----------------------------------------------------------------------===// // FP: Comparisons //===----------------------------------------------------------------------===// // Compare -def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)B$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)BR?$")>; -def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXBR$")>; +def : InstRW<[VecXsPm, LSU, Lat8], (instregex "(K|C)(E|D)B$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(K|C)(E|D)BR?$")>; +def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "(K|C)XBR$")>; // Test Data Class def : InstRW<[LSU, VecXsPm, Lat9], (instregex "TC(E|D)B$")>; @@ -836,6 +908,238 @@ def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>; def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>; + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>; +def : InstRW<[VecBF], (instregex "LEXR$")>; +def : InstRW<[VecDF2, VecDF2], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXb], (instregex "LDER$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>; +def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>; +def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[VecBF], (instregex "THD(E)?R$")>; +def : InstRW<[VecBF], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[VecBF], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>; +def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[VecBF], (instregex "FIER$")>; +def : InstRW<[VecBF], (instregex "FIDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)?$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MY(H|L)?R$")>; + +// Multiply and add / subtract +def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[VecBF], (instregex "M(A|S)DR$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAY(H|L)?R$")>; + +// Division +def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>; +def : InstRW<[VecFPd], (instregex "D(E|D)R$")>; +def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)R$")>; +def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[VecDF], (instregex "LTDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>; +def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[VecDF], (instregex "LDETR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, BeginGroup], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>; + +// Convert from / to packed +def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>; +def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>; +def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>; +def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>; + +// Perform floating-point operation +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[VecDF], (instregex "FIDTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>; +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>; +def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[VecDF], (instregex "ADTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[VecDF], (instregex "SDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[VecDF], (instregex "QADTR$")>; +def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXb, VecDF, Lat11], (instregex "RRDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, VecDF, Lat11], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXb, VecDF, Lat11], (instregex "IEDTR$")>; +def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[VecDF], (instregex "(K|C)DTR$")>; +def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[VecDF], (instregex "CEDTR$")>; +def : InstRW<[VecDF], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + + // --------------------------------- Vector --------------------------------- // //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td index a950e54e76016f4d0e3c7d3fd56f81b4acaf9ae9..126eac2e2072dae65574c248ad888f5aa06d29c3 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -59,6 +59,7 @@ def : WriteRes { let Latency = 30; let NumMicroOps = 0;} def Z196_FXUnit : ProcResource<2>; def Z196_LSUnit : ProcResource<2>; def Z196_FPUnit : ProcResource<1>; +def Z196_DFUnit : ProcResource<1>; // Subtarget specific definitions of scheduling resources. def : WriteRes { let Latency = 1; } @@ -66,6 +67,8 @@ def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 8; } def : WriteRes { let Latency = 9; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 3; } // -------------------------- INSTRUCTIONS ---------------------------------- // @@ -152,6 +155,7 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "MVI(Y)?$")>; // Move character def : InstRW<[LSU, LSU, LSU, FXU, Lat8, GroupAlone], (instregex "MVC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>; // Pseudo -> reg move def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>; @@ -226,6 +230,7 @@ def : InstRW<[LSU], (instregex "LLG(C|F|H|T|FRL|HRL)$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "STCM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Multi-register moves @@ -235,6 +240,9 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; +// Load multiple disjoint +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; + // Store multiple (estimated average of 3 ops) def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone], (instregex "STM(H|Y|G)?$")>; @@ -246,6 +254,7 @@ def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone], def : InstRW<[FXU], (instregex "LRV(G)?R$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "LRV(G|H)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STRV(G|H)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>; //===----------------------------------------------------------------------===// // Load address instructions @@ -285,7 +294,7 @@ def : InstRW<[FXU], (instregex "IILL(64)?$")>; // Addition //===----------------------------------------------------------------------===// -def : InstRW<[FXU, LSU, Lat5], (instregex "A(Y|SI)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?(Y|SI)?$")>; def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "AH(Y)?$")>; def : InstRW<[FXU], (instregex "AIH$")>; def : InstRW<[FXU], (instregex "AFI(Mux)?$")>; @@ -294,15 +303,14 @@ def : InstRW<[FXU], (instregex "AGHI(K)?$")>; def : InstRW<[FXU], (instregex "AGR(K)?$")>; def : InstRW<[FXU], (instregex "AHI(K)?$")>; def : InstRW<[FXU], (instregex "AHIMux(K)?$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "AL(Y)?$")>; def : InstRW<[FXU], (instregex "AL(FI|HSIK)$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "ALG(F)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "ALGF$")>; def : InstRW<[FXU], (instregex "ALGHSIK$")>; def : InstRW<[FXU], (instregex "ALGF(I|R)$")>; def : InstRW<[FXU], (instregex "ALGR(K)?$")>; def : InstRW<[FXU], (instregex "ALR(K)?$")>; def : InstRW<[FXU], (instregex "AR(K)?$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "AG(SI)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>; // Logical addition with carry def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "ALC(G)?$")>; @@ -395,11 +403,17 @@ def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXU, Lat5], (instregex "MGHI$")>; def : InstRW<[FXU, Lat5], (instregex "MHI$")>; def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>; +def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder //===----------------------------------------------------------------------===// +def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone], + (instregex "DR$")>; +def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone], + (instregex "D$")>; def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, Lat30, GroupAlone], @@ -416,7 +430,8 @@ def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone], def : InstRW<[FXU], (instregex "SLL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRA(G|K)?$")>; -def : InstRW<[FXU, Lat2], (instregex "SLA(K)?$")>; +def : InstRW<[FXU, Lat2], (instregex "SLA(G|K)?$")>; +def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -465,7 +480,7 @@ def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "CGFR$")>; // Compare logical character def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "CLC$")>; - +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>; // Test under mask @@ -476,6 +491,9 @@ def : InstRW<[FXU], (instregex "TMHL(64)?$")>; def : InstRW<[FXU], (instregex "TMLH(64)?$")>; def : InstRW<[FXU], (instregex "TMLL(64)?$")>; +// Compare logical characters under mask +def : InstRW<[FXU, LSU, Lat5], (instregex "CLM(H|Y)?$")>; + //===----------------------------------------------------------------------===// // Prefetch //===----------------------------------------------------------------------===// @@ -519,6 +537,42 @@ def : InstRW<[FXU, FXU, LSU, LSU, Lat6, GroupAlone], (instregex "STPQ$")>; // Load pair disjoint def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; +def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; + +def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone], + (instregex "(M|D)P$")>; +def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone], + (instregex "SRP$")>; +def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; +def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; + //===----------------------------------------------------------------------===// // Access registers //===----------------------------------------------------------------------===// @@ -571,13 +625,30 @@ def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; +def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; // Move with key def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>; +// Monitor call +def : InstRW<[FXU], (instregex "MC$")>; + +// Extract CPU attribute +def : InstRW<[FXU, Lat30], (instregex "ECAG$")>; + // Extract CPU Time def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>; +// Extract PSW +def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; + // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; @@ -740,14 +811,17 @@ def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)B$")>; def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>; +// Divide to integer +def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; + //===----------------------------------------------------------------------===// // FP: Comparisons //===----------------------------------------------------------------------===// // Compare -def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)B$")>; -def : InstRW<[FPU], (instregex "C(E|D)BR$")>; -def : InstRW<[FPU, FPU, Lat30], (instregex "CXBR$")>; +def : InstRW<[FPU, LSU, Lat12], (instregex "(K|C)(E|D)B$")>; +def : InstRW<[FPU], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[FPU, FPU, Lat30], (instregex "(K|C)XBR$")>; // Test Data Class def : InstRW<[FPU, LSU, Lat15], (instregex "TC(E|D)B$")>; @@ -765,5 +839,224 @@ def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>; def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>; + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[FPU], (instregex "LT(D|E)R$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>; +def : InstRW<[FPU], (instregex "LEXR$")>; +def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXU], (instregex "LDER$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>; +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>; +def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>; +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>; +def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[FPU], (instregex "THD(E)?R$")>; +def : InstRW<[FPU], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>; +def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[FPU], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[FPU], (instregex "FIER$")>; +def : InstRW<[FPU], (instregex "FIDR$")>; +def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>; + +// Multiply and add / subtract +def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>; +def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>; + +// Division +def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[FPU], (instregex "C(E|D)R$")>; +def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>; +def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[DFU, Lat20], (instregex "LDETR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>; + +// Perform floating-point operation +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>; +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[DFU, Lat30], (instregex "QADTR$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>; +def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>; +def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + } diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td index 8ab6c826f1ed0cbc63a1cbff9d853cb0ec648149..d38ca64d2e9b99d101c3b3866b03a416c61cd282 100644 --- a/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -59,6 +59,7 @@ def : WriteRes { let Latency = 30; let NumMicroOps = 0;} def ZEC12_FXUnit : ProcResource<2>; def ZEC12_LSUnit : ProcResource<2>; def ZEC12_FPUnit : ProcResource<1>; +def ZEC12_DFUnit : ProcResource<1>; def ZEC12_VBUnit : ProcResource<1>; // Subtarget specific definitions of scheduling resources. @@ -67,6 +68,8 @@ def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 8; } def : WriteRes { let Latency = 9; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 3; } def : WriteRes; // Virtual Branching Unit // -------------------------- INSTRUCTIONS ---------------------------------- // @@ -155,6 +158,7 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "MVI(Y)?$")>; // Move character def : InstRW<[LSU, LSU, LSU, FXU, Lat8, GroupAlone], (instregex "MVC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCL(E|U)?$")>; // Pseudo -> reg move def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>; @@ -236,6 +240,7 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "LLG(F|T)?AT$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "STCM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Multi-register moves @@ -245,6 +250,9 @@ def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>; def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; +// Load multiple disjoint +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "LMD$")>; + // Store multiple (estimated average of 3 ops) def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone], (instregex "STM(H|Y|G)?$")>; @@ -256,6 +264,7 @@ def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone], def : InstRW<[FXU], (instregex "LRV(G)?R$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "LRV(G|H)?$")>; def : InstRW<[FXU, LSU, Lat5], (instregex "STRV(G|H)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVCIN$")>; //===----------------------------------------------------------------------===// // Load address instructions @@ -295,7 +304,7 @@ def : InstRW<[FXU], (instregex "IILL(64)?$")>; // Addition //===----------------------------------------------------------------------===// -def : InstRW<[FXU, LSU, Lat5], (instregex "A(Y|SI)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?(Y|SI)?$")>; def : InstRW<[FXU, LSU, Lat6], (instregex "AH(Y)?$")>; def : InstRW<[FXU], (instregex "AIH$")>; def : InstRW<[FXU], (instregex "AFI(Mux)?$")>; @@ -304,15 +313,14 @@ def : InstRW<[FXU], (instregex "AGHI(K)?$")>; def : InstRW<[FXU], (instregex "AGR(K)?$")>; def : InstRW<[FXU], (instregex "AHI(K)?$")>; def : InstRW<[FXU], (instregex "AHIMux(K)?$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "AL(Y)?$")>; def : InstRW<[FXU], (instregex "AL(FI|HSIK)$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "ALG(F)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "ALGF$")>; def : InstRW<[FXU], (instregex "ALGHSIK$")>; def : InstRW<[FXU], (instregex "ALGF(I|R)$")>; def : InstRW<[FXU], (instregex "ALGR(K)?$")>; def : InstRW<[FXU], (instregex "ALR(K)?$")>; def : InstRW<[FXU], (instregex "AR(K)?$")>; -def : InstRW<[FXU, LSU, Lat5], (instregex "AG(SI)?$")>; +def : InstRW<[FXU, LSU, Lat5], (instregex "A(L)?G(SI)?$")>; // Logical addition with carry def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "ALC(G)?$")>; @@ -405,11 +413,17 @@ def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXU, Lat5], (instregex "MGHI$")>; def : InstRW<[FXU, Lat5], (instregex "MHI$")>; def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>; +def : InstRW<[FXU, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder //===----------------------------------------------------------------------===// +def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone], + (instregex "DR$")>; +def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone], + (instregex "D$")>; def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, Lat30, GroupAlone], @@ -426,7 +440,8 @@ def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone], def : InstRW<[FXU], (instregex "SLL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRL(G|K)?$")>; def : InstRW<[FXU], (instregex "SRA(G|K)?$")>; -def : InstRW<[FXU], (instregex "SLA(K)?$")>; +def : InstRW<[FXU], (instregex "SLA(G|K)?$")>; +def : InstRW<[FXU, FXU, FXU, FXU, Lat8], (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -475,7 +490,7 @@ def : InstRW<[FXU, Lat2], (instregex "CGFR$")>; // Compare logical character def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "CLC$")>; - +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLCL(E|U)?$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>; // Test under mask @@ -486,6 +501,9 @@ def : InstRW<[FXU], (instregex "TMHL(64)?$")>; def : InstRW<[FXU], (instregex "TMLH(64)?$")>; def : InstRW<[FXU], (instregex "TMLL(64)?$")>; +// Compare logical characters under mask +def : InstRW<[FXU, LSU, Lat5], (instregex "CLM(H|Y)?$")>; + //===----------------------------------------------------------------------===// // Prefetch and execution hint //===----------------------------------------------------------------------===// @@ -531,6 +549,42 @@ def : InstRW<[FXU, FXU, LSU, LSU, Lat6, GroupAlone], (instregex "STPQ$")>; // Load pair disjoint def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; +//===----------------------------------------------------------------------===// +// Translate and convert +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; + +//===----------------------------------------------------------------------===// +// Message-security assist +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXU, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC)$")>; + +//===----------------------------------------------------------------------===// +// Decimal arithmetic +//===----------------------------------------------------------------------===// + +def : InstRW<[FXU, DFU, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; +def : InstRW<[FXU, DFU, FXU, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; + +def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat15, GroupAlone], + (instregex "(A|S|ZA)P$")>; +def : InstRW<[FXU, FXU, DFU2, LSU, LSU, LSU, LSU, Lat30, GroupAlone], + (instregex "(M|D)P$")>; +def : InstRW<[FXU, FXU, DFU2, LSU, LSU, Lat15, GroupAlone], + (instregex "SRP$")>; +def : InstRW<[DFU2, LSU, LSU, LSU, LSU, Lat11, GroupAlone], (instregex "CP$")>; +def : InstRW<[DFU2, LSU, LSU, Lat3, GroupAlone], (instregex "TP$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; + //===----------------------------------------------------------------------===// // Access registers //===----------------------------------------------------------------------===// @@ -609,13 +663,30 @@ def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; +def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; + +// Various complex instructions +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; // Move with key def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>; +// Monitor call +def : InstRW<[FXU], (instregex "MC$")>; + +// Extract CPU attribute +def : InstRW<[FXU, Lat30], (instregex "ECAG$")>; + // Extract CPU Time def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>; +// Extract PSW +def : InstRW<[FXU, Lat30], (instregex "EPSW$")>; + // Execute def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>; @@ -778,14 +849,17 @@ def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)B$")>; def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>; def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>; +// Divide to integer +def : InstRW<[FPU, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; + //===----------------------------------------------------------------------===// // FP: Comparisons //===----------------------------------------------------------------------===// // Compare -def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)B$")>; -def : InstRW<[FPU], (instregex "C(E|D)BR$")>; -def : InstRW<[FPU, FPU, Lat30], (instregex "CXBR$")>; +def : InstRW<[FPU, LSU, Lat12], (instregex "(K|C)(E|D)B$")>; +def : InstRW<[FPU], (instregex "(K|C)(E|D)BR$")>; +def : InstRW<[FPU, FPU, Lat30], (instregex "(K|C)XBR$")>; // Test Data Class def : InstRW<[FPU, LSU, Lat15], (instregex "TC(E|D)B$")>; @@ -803,5 +877,230 @@ def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>; def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>; + +// --------------------- Hexadecimal floating point ------------------------- // + +//===----------------------------------------------------------------------===// +// HFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[FPU], (instregex "LT(D|E)R$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>; +def : InstRW<[FPU], (instregex "LEXR$")>; +def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>; + +// Load lengthened +def : InstRW<[LSU], (instregex "LDE$")>; +def : InstRW<[FXU], (instregex "LDER$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>; + +// Convert from fixed +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>; +def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>; +def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>; + +// Convert to fixed +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>; +def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>; +def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>; + +// Convert BFP to HFP / HFP to BFP. +def : InstRW<[FPU], (instregex "THD(E)?R$")>; +def : InstRW<[FPU], (instregex "TB(E)?DR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load Complement / Negative / Positive +def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>; +def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>; +def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>; + +// Halve +def : InstRW<[FPU], (instregex "H(E|D)R$")>; + +// Square root +def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>; + +// Load FP integer +def : InstRW<[FPU], (instregex "FIER$")>; +def : InstRW<[FPU], (instregex "FIDR$")>; +def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>; + +// Subtraction +def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>; +def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>; +def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>; + +// Multiply +def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>; +def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>; +def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>; + +// Multiply and add / subtract +def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>; +def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>; +def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>; + +// Division +def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>; +def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>; +def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>; + +//===----------------------------------------------------------------------===// +// HFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[FPU], (instregex "C(E|D)R$")>; +def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>; + + +// ------------------------ Decimal floating point -------------------------- // + +//===----------------------------------------------------------------------===// +// DFP: Move instructions +//===----------------------------------------------------------------------===// + +// Load and Test +def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Conversion instructions +//===----------------------------------------------------------------------===// + +// Load rounded +def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>; +def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>; + +// Load lengthened +def : InstRW<[DFU, Lat20], (instregex "LDETR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>; + +// Convert from fixed / logical +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>; +def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>; + +// Convert to fixed / logical +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>; +def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>; + +// Convert from / to signed / unsigned packed +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>; +def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>; +def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>; + +// Convert from / to zoned +def : InstRW<[LSU, DFU2, Lat7, GroupAlone], (instregex "CDZT$")>; +def : InstRW<[LSU, LSU, DFU2, DFU2, Lat10, GroupAlone], (instregex "CXZT$")>; +def : InstRW<[FXU, LSU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>; +def : InstRW<[FXU, LSU, DFU, DFU, Lat15, GroupAlone], (instregex "CZXT$")>; + +// Perform floating-point operation +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>; + +//===----------------------------------------------------------------------===// +// DFP: Unary arithmetic +//===----------------------------------------------------------------------===// + +// Load FP integer +def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>; +def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>; + +// Extract biased exponent +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>; +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>; + +// Extract significance +def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>; +def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Binary arithmetic +//===----------------------------------------------------------------------===// + +// Addition +def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>; + +// Subtraction +def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>; + +// Multiply +def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>; + +// Division +def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>; + +// Quantize +def : InstRW<[DFU, Lat30], (instregex "QADTR$")>; +def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>; + +// Reround +def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>; + +// Shift significand left/right +def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; + +// Insert biased exponent +def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>; +def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>; + +//===----------------------------------------------------------------------===// +// DFP: Comparisons +//===----------------------------------------------------------------------===// + +// Compare +def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>; +def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>; + +// Compare biased exponent +def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>; +def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>; + +// Test Data Class/Group +def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>; +def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; + } diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 263aff8b7bfb988f2c81472ed81ef6699f9af962..7391df8342efd11157dc72742256d418829e0bd3 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index ce07ea3318a56565937ca1fced8dc700ebb4c758..0ab0c2f259152be6c4154f26031520efe8952ff7 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -37,12 +37,15 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU, const TargetMachine &TM) : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), - HasPopulationCount(false), HasFastSerialization(false), - HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), + HasPopulationCount(false), HasMessageSecurityAssist4(false), + HasFastSerialization(false), HasInterlockedAccess1(false), + HasMiscellaneousExtensions(false), HasExecutionHint(false), HasLoadAndTrap(false), HasTransactionalExecution(false), HasProcessorAssist(false), + HasDFPZonedConversion(false), HasVector(false), HasLoadStoreOnCond2(false), - HasLoadAndZeroRightmostByte(false), + HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false), + HasDFPPackedConversion(false), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), TSInfo(), FrameLowering() {} diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index cdb61327a16af6d0c30cbbd8a8e7a0889017bb8b..be480f03c5724ae9ab29497818c2827b95f5c9c8 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -19,8 +19,8 @@ #include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -39,6 +39,7 @@ protected: bool HasHighWord; bool HasFPExtension; bool HasPopulationCount; + bool HasMessageSecurityAssist4; bool HasFastSerialization; bool HasInterlockedAccess1; bool HasMiscellaneousExtensions; @@ -46,9 +47,12 @@ protected: bool HasLoadAndTrap; bool HasTransactionalExecution; bool HasProcessorAssist; + bool HasDFPZonedConversion; bool HasVector; bool HasLoadStoreOnCond2; bool HasLoadAndZeroRightmostByte; + bool HasMessageSecurityAssist5; + bool HasDFPPackedConversion; private: Triple TargetTriple; @@ -104,6 +108,10 @@ public: // Return true if the target has the population-count facility. bool hasPopulationCount() const { return HasPopulationCount; } + // Return true if the target has the message-security-assist + // extension facility 4. + bool hasMessageSecurityAssist4() const { return HasMessageSecurityAssist4; } + // Return true if the target has the fast-serialization facility. bool hasFastSerialization() const { return HasFastSerialization; } @@ -127,11 +135,21 @@ public: // Return true if the target has the processor-assist facility. bool hasProcessorAssist() const { return HasProcessorAssist; } + // Return true if the target has the DFP zoned-conversion facility. + bool hasDFPZonedConversion() const { return HasDFPZonedConversion; } + // Return true if the target has the load-and-zero-rightmost-byte facility. bool hasLoadAndZeroRightmostByte() const { return HasLoadAndZeroRightmostByte; } + // Return true if the target has the message-security-assist + // extension facility 5. + bool hasMessageSecurityAssist5() const { return HasMessageSecurityAssist5; } + + // Return true if the target has the DFP packed-conversion facility. + bool hasDFPPackedConversion() const { return HasDFPPackedConversion; } + // Return true if the target has the vector facility. bool hasVector() const { return HasVector; } diff --git a/lib/Target/SystemZ/SystemZTDC.cpp b/lib/Target/SystemZ/SystemZTDC.cpp index 96a9ef82c125f77d5a81124cd3a8d7d5db72c2e2..5dbd23d420a309c39f5903f38e15467f608e8882 100644 --- a/lib/Target/SystemZ/SystemZTDC.cpp +++ b/lib/Target/SystemZ/SystemZTDC.cpp @@ -47,10 +47,10 @@ #include "SystemZ.h" #include "llvm/ADT/MapVector.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index ede5005fa49160a41c9b78ff0552648912a8d616..cb81c0e5276e7e246799f60818df64e89380853f 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -7,14 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "SystemZTargetMachine.h" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "SystemZ.h" #include "SystemZMachineScheduler.h" -#include "SystemZTargetMachine.h" #include "SystemZTargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" @@ -119,7 +119,7 @@ namespace { /// SystemZ Code Generator Pass Configuration Options. class SystemZPassConfig : public TargetPassConfig { public: - SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM) + SystemZPassConfig(SystemZTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} SystemZTargetMachine &getSystemZTargetMachine() const { @@ -212,7 +212,7 @@ void SystemZPassConfig::addPreEmitPass() { } TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { - return new SystemZPassConfig(this, PM); + return new SystemZPassConfig(*this, PM); } TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() { diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index a10ca64fa6329d3045bc04e3d606980e17ffd21c..eb2f17a2091c3418bf6eb6139537a8944356e332 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -51,6 +51,8 @@ public: } bool targetSchedulesPostRAScheduling() const override { return true; }; + + bool isMachineVerifierClean() const override { return false; } }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index e74c9a80515d84d8238900e166f6e4061c49f595..422c16b8eb625c4a1019d900a361aa88c6dbae40 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -302,7 +302,7 @@ unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { return 0; } -unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) const { if (!Vector) return 64; if (ST->hasVector()) @@ -325,6 +325,30 @@ int SystemZTTIImpl::getArithmeticInstrCost( unsigned ScalarBits = Ty->getScalarSizeInBits(); + // Div with a constant which is a power of 2 will be converted by + // DAGCombiner to use shifts. With vector shift-element instructions, a + // vector sdiv costs about as much as a scalar one. + const unsigned SDivCostEstimate = 4; + bool SDivPow2 = false; + bool UDivPow2 = false; + if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) && + Args.size() == 2) { + const ConstantInt *CI = nullptr; + if (const Constant *C = dyn_cast(Args[1])) { + if (C->getType()->isVectorTy()) + CI = dyn_cast_or_null(C->getSplatValue()); + else + CI = dyn_cast(C); + } + if (CI != nullptr && + (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) { + if (Opcode == Instruction::SDiv) + SDivPow2 = true; + else + UDivPow2 = true; + } + } + if (Ty->isVectorTy()) { assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type."); unsigned VF = Ty->getVectorNumElements(); @@ -333,10 +357,13 @@ int SystemZTTIImpl::getArithmeticInstrCost( // These vector operations are custom handled, but are still supported // with one instruction per vector, regardless of element size. if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || - Opcode == Instruction::AShr) { + Opcode == Instruction::AShr || UDivPow2) { return NumVectors; } + if (SDivPow2) + return (NumVectors * SDivCostEstimate); + // These FP operations are supported with a single vector instruction for // double (base implementation assumes float generally costs 2). For // FP128, the scalar cost is 1, and there is no overhead since the values @@ -395,6 +422,11 @@ int SystemZTTIImpl::getArithmeticInstrCost( // 2 * ipm sequences ; xor ; shift ; compare return 7; + if (UDivPow2) + return 1; + if (SDivPow2) + return SDivCostEstimate; + // An extra extension for narrow types is needed. if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem)) // sext of op(s) for narrow types @@ -530,9 +562,10 @@ static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) { if (CmpInst *CI = dyn_cast(I->getOperand(0))) OpTy = CI->getOperand(0)->getType(); else if (Instruction *LogicI = dyn_cast(I->getOperand(0))) - if (CmpInst *CI0 = dyn_cast(LogicI->getOperand(0))) - if (isa(LogicI->getOperand(1))) - OpTy = CI0->getOperand(0)->getType(); + if (LogicI->getNumOperands() == 2) + if (CmpInst *CI0 = dyn_cast(LogicI->getOperand(0))) + if (isa(LogicI->getOperand(1))) + OpTy = CI0->getOperand(0)->getType(); if (OpTy != nullptr) { if (VF == 1) { @@ -676,7 +709,6 @@ int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondT const Instruction *I) { if (ValTy->isVectorTy()) { assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type."); - assert (CondTy == nullptr || CondTy->isVectorTy()); unsigned VF = ValTy->getVectorNumElements(); // Called with a compare instruction. diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 3766ed45b8c4e47c1e5281e2602bf188cc963696..bdba7601eb78b8a1280b894aa0183ed588d91821 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -53,8 +53,9 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; + bool prefersVectorizedAddressing() { return false; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 5d1616d037798bca1b09343acc56114c9627789a..42d92622d6c811e57cb96011530c27138bca64fb 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -14,12 +14,12 @@ #include "llvm-c/Target.h" #include "llvm-c/Initialization.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include using namespace llvm; diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 50272fda56ded9d5c21eef0c13be5b97a320ab56..f941891f318331cf0a0ad1862868fc211f463c22 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -24,7 +25,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" @@ -44,7 +44,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx, const TargetMachine &TM) { Ctx = &ctx; // `Initialize` can be called more than once. - if (Mang != nullptr) delete Mang; + delete Mang; Mang = new Mangler(); InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(), TM.getCodeModel(), *Ctx); @@ -240,6 +240,20 @@ MCSection *TargetLoweringObjectFile::SectionForGlobal( if (GO->hasSection()) return getExplicitSectionGlobal(GO, Kind, TM); + if (auto *GVar = dyn_cast(GO)) { + auto Attrs = GVar->getAttributes(); + if ((Attrs.hasAttribute("bss-section") && Kind.isBSS()) || + (Attrs.hasAttribute("data-section") && Kind.isData()) || + (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly())) { + return getExplicitSectionGlobal(GO, Kind, TM); + } + } + + if (auto *F = dyn_cast(GO)) { + if (F->hasFnAttribute("implicit-section-name")) + return getExplicitSectionGlobal(GO, Kind, TM); + } + // Use default section depending on the 'type' of global return SelectSectionForGlobal(GO, Kind, TM); } diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index df12e0e88e3bb073da3191b9a1764c6b76650119..01f14939864f0598596342979085b322b57f4e30 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/TargetMachine.h" #include "llvm-c/Core.h" #include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/Module.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CodeGenCWrappers.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index b5f53114d3e16fc2605b699a12b249f09dc71c75..9be11da9afac81a08b4e9781a38346e80e61d5d0 100644 --- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index c6158720d62f1a4e62d8ef927ad2c2d31b7084d1..b1de84d7e8e61c75193714ccf385a9222bc41d80 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,9 +16,9 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/ADT/SmallVector.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/Wasm.h" namespace llvm { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 7c78285fbda4557632a506390b34616ed1596639..4f20096c1583010cbb08137c78e963a486c7e034 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index 2dcec5263fa1e0715924f4060bfa477845f324e4..5f8c78ed16834a5eb8979dd00a6532c42b09a068 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -22,7 +22,7 @@ using namespace llvm; WebAssemblyMCAsmInfoELF::~WebAssemblyMCAsmInfoELF() {} WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) { - PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; + CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; // TODO: What should MaxInstLength be? @@ -55,7 +55,7 @@ WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) { WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {} WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { - PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; + CodePointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4; // TODO: What should MaxInstLength be? diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index a0b008947491a1cbfb276d124ecd5ef24ebd6d2c..3e3b52fca5691387c50f3e471cde27b15677970a 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" @@ -36,7 +36,6 @@ STATISTIC(MCNumFixups, "Number of MC fixups created."); namespace { class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCInstrInfo &MCII; - MCContext &Ctx; // Implementation generated by tablegen. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -48,14 +47,12 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCSubtargetInfo &STI) const override; public: - WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), Ctx(ctx) {} + WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {} }; } // end anonymous namespace -MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx) { - return new WebAssemblyMCCodeEmitter(MCII, Ctx); +MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) { + return new WebAssemblyMCCodeEmitter(MCII); } void WebAssemblyMCCodeEmitter::encodeInstruction( @@ -89,11 +86,9 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) { encodeSLEB128(int64_t(MO.getImm()), OS); } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) { - Fixups.push_back(MCFixup::create( - OS.tell() - Start, MCConstantExpr::create(MO.getImm(), Ctx), - MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc())); - ++MCNumFixups; - encodeULEB128(uint64_t(MO.getImm()), OS); + llvm_unreachable("wasm globals should only be accessed symbolicly"); + } else if (Info.OperandType == WebAssembly::OPERAND_SIGNATURE) { + encodeSLEB128(int64_t(MO.getImm()), OS); } else { encodeULEB128(uint64_t(MO.getImm()), OS); } @@ -133,6 +128,9 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) { FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32); PaddedSize = 5; + } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) { + FixupKind = MCFixupKind(WebAssembly::fixup_code_global_index); + PaddedSize = 5; } else { llvm_unreachable("unexpected symbolic operand kind"); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 9fd3ec81c258f4209e972611c462eb8f191671f3..9580eeaa33d7347ec46d838cc092f67908b36af0 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -74,7 +74,7 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/, static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo & /*MRI*/, MCContext &Ctx) { - return createWebAssemblyMCCodeEmitter(MCII, Ctx); + return createWebAssemblyMCCodeEmitter(MCII); } static MCAsmBackend *createAsmBackend(const Target & /*T*/, diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 795658ca96b4c0dd2ac53e866ea117a985157fdf..4d676c32a09c511dce6b6449cbad36d6798e79b0 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -15,9 +15,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Wasm.h" namespace llvm { @@ -35,8 +35,7 @@ class raw_pwrite_stream; Target &getTheWebAssemblyTarget32(); Target &getTheWebAssemblyTarget64(); -MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx); +MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 68d6747298dfc537e8e4bfd6f6a759bc33b7f587..5ad147e5e59603e62395c80307e7dcc8d0dceb04 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -16,9 +16,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Wasm.h" namespace llvm { @@ -46,9 +46,7 @@ public: /// .functype virtual void emitIndirectFunctionType(StringRef name, SmallVectorImpl &Params, - SmallVectorImpl &Results) { - llvm_unreachable("emitIndirectFunctionType not implemented"); - } + SmallVectorImpl &Results) = 0; /// .indidx virtual void emitIndIdx(const MCExpr *Value) = 0; /// .import_global diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 2846ec5e933773e0349d415e74bf91d2010d9998..9cf77829f3bc26f94961b3f29a5c4d373a416a8b 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -13,14 +13,18 @@ /// //===----------------------------------------------------------------------===// -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "MCTargetDesc/WebAssemblyFixupKinds.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCWasmObjectWriter.h" +#include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Wasm.h" + using namespace llvm; namespace { @@ -29,8 +33,8 @@ public: explicit WebAssemblyWasmObjectWriter(bool Is64Bit); private: - unsigned getRelocType(MCContext &Ctx, const MCValue &Target, - const MCFixup &Fixup, bool IsPCRel) const override; + unsigned getRelocType(const MCValue &Target, + const MCFixup &Fixup) const override; }; } // end anonymous namespace @@ -39,31 +43,33 @@ WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit) // Test whether the given expression computes a function address. static bool IsFunctionExpr(const MCExpr *Expr) { - if (const MCSymbolRefExpr *SyExp = - dyn_cast(Expr)) + if (auto SyExp = dyn_cast(Expr)) return cast(SyExp->getSymbol()).isFunction(); - if (const MCBinaryExpr *BinOp = - dyn_cast(Expr)) + if (auto BinOp = dyn_cast(Expr)) return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS()); - if (const MCUnaryExpr *UnOp = - dyn_cast(Expr)) + if (auto UnOp = dyn_cast(Expr)) return IsFunctionExpr(UnOp->getSubExpr()); return false; } -unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, - const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { +static bool IsFunctionType(const MCValue &Target) { + const MCSymbolRefExpr *RefA = Target.getSymA(); + return RefA && RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX; +} + +unsigned +WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup) const { // WebAssembly functions are not allocated in the data address space. To // resolve a pointer to a function, we must use a special relocation type. bool IsFunction = IsFunctionExpr(Fixup.getValue()); - assert(!IsPCRel); switch (unsigned(Fixup.getKind())) { + case WebAssembly::fixup_code_global_index: + return wasm::R_WEBASSEMBLY_GLOBAL_INDEX_LEB; case WebAssembly::fixup_code_sleb128_i32: if (IsFunction) return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB; @@ -71,6 +77,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx, case WebAssembly::fixup_code_sleb128_i64: llvm_unreachable("fixup_sleb128_i64 not implemented yet"); case WebAssembly::fixup_code_uleb128_i32: + if (IsFunctionType(Target)) + return wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB; if (IsFunction) return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB; return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB; diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index d9c2dba5bace33733cd095457d6a414a4be62fc3..f51585a10ca122358967d9f8e960eda28c952c27 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -33,6 +33,8 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolWasm.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -45,10 +47,11 @@ using namespace llvm; //===----------------------------------------------------------------------===// MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32}) - if (TRC->hasType(T)) + if (TRI->isTypeLegalForClass(*TRC, T)) return T; DEBUG(errs() << "Unknown type for register number: " << RegNo); llvm_unreachable("Unknown register type"); @@ -93,13 +96,6 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { MCConstantExpr::create(Size, OutContext)); } } - - if (!TM.getTargetTriple().isOSBinFormatELF()) { - MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo(); - getTargetStreamer()->emitGlobal(MMIW.getGlobals()); - if (MMIW.hasStackPointerGlobal()) - getTargetStreamer()->emitStackPointer(MMIW.getStackPointerGlobal()); - } } void WebAssemblyAsmPrinter::EmitConstantPool() { @@ -217,9 +213,13 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) { if (const GlobalValue *GV = dyn_cast(CV)) - if (GV->getValueType()->isFunctionTy()) + if (GV->getValueType()->isFunctionTy()) { + MCSymbol* Sym = getSymbol(GV); + if (!isa(Sym)) + cast(Sym)->setIsFunction(true); return MCSymbolRefExpr::create( - getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + Sym, MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + } return AsmPrinter::lowerConstant(CV); } diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 40e1928197bcf77e52fd312aa137934a7f1de794..1691808d05a0f82e1fdc617b7e64e0c6ce4770e2 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -17,8 +17,8 @@ /// ////===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" #include "llvm/ADT/PriorityQueue.h" diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index bd11d1b469063f69836b036433f673629d433492..21e0f6b23777a99f25a9498f1aec5d89f3a2e5cf 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -18,8 +18,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp index bc6360aafd61c9d4bf0786afc2ed0b8a8037f30d..b2330a2320933156b1324d0ef7a41da40eaa9954 100644 --- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp @@ -22,8 +22,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index e7fd4ef33e1c5385d58eb5624c922fcb5398a52b..09338a4898e03eb8799c627425516b9bee755c3b 100644 --- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -16,8 +16,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" @@ -597,11 +597,11 @@ bool WebAssemblyFastISel::fastLowerArguments() { unsigned i = 0; for (auto const &Arg : F->args()) { const AttributeList &Attrs = F->getAttributes(); - if (Attrs.hasAttribute(i+1, Attribute::ByVal) || - Attrs.hasAttribute(i+1, Attribute::SwiftSelf) || - Attrs.hasAttribute(i+1, Attribute::SwiftError) || - Attrs.hasAttribute(i+1, Attribute::InAlloca) || - Attrs.hasAttribute(i+1, Attribute::Nest)) + if (Attrs.hasParamAttribute(i, Attribute::ByVal) || + Attrs.hasParamAttribute(i, Attribute::SwiftSelf) || + Attrs.hasParamAttribute(i, Attribute::SwiftError) || + Attrs.hasParamAttribute(i, Attribute::InAlloca) || + Attrs.hasParamAttribute(i, Attribute::Nest)) return false; Type *ArgTy = Arg.getType(); @@ -747,18 +747,18 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { return false; const AttributeList &Attrs = Call->getAttributes(); - if (Attrs.hasAttribute(i+1, Attribute::ByVal) || - Attrs.hasAttribute(i+1, Attribute::SwiftSelf) || - Attrs.hasAttribute(i+1, Attribute::SwiftError) || - Attrs.hasAttribute(i+1, Attribute::InAlloca) || - Attrs.hasAttribute(i+1, Attribute::Nest)) + if (Attrs.hasParamAttribute(i, Attribute::ByVal) || + Attrs.hasParamAttribute(i, Attribute::SwiftSelf) || + Attrs.hasParamAttribute(i, Attribute::SwiftError) || + Attrs.hasParamAttribute(i, Attribute::InAlloca) || + Attrs.hasParamAttribute(i, Attribute::Nest)) return false; unsigned Reg; - if (Attrs.hasAttribute(i+1, Attribute::SExt)) + if (Attrs.hasParamAttribute(i, Attribute::SExt)) Reg = getRegForSignedValue(V); - else if (Attrs.hasAttribute(i+1, Attribute::ZExt)) + else if (Attrs.hasParamAttribute(i, Attribute::ZExt)) Reg = getRegForUnsignedValue(V); else Reg = getRegForValue(V); diff --git a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp index 2bbf7a2b42f9a46b8405b2838f1bf0c76e30b542..41f315c2825b6ec0fe212f0ea764985724283de7 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -26,8 +26,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/PriorityQueue.h" diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 4209bc333f230640d83efbc83a48743312defce1..a37d6136e44ed87c5efdc228c790a8c7124d6ad2 100644 --- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -104,10 +104,10 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, const DebugLoc &DL) { const auto *TII = MF.getSubtarget().getInstrInfo(); + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); if (MF.getSubtarget() .getTargetTriple().isOSBinFormatELF()) { - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterClass *PtrRC = MRI.getTargetRegisterInfo()->getPointerRegClass(MF); @@ -125,10 +125,8 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, .addReg(SrcReg) .addMemOperand(MMO); } else { - MachineModuleInfoWasm &MMIW = - MF.getMMI().getObjFileInfo(); BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32)) - .addImm(MMIW.getStackPointerGlobal()) + .addExternalSymbol(SPSymbol) .addReg(SrcReg); } } @@ -171,10 +169,11 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, unsigned SPReg = WebAssembly::SP32; if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); + + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); if (MF.getSubtarget() .getTargetTriple().isOSBinFormatELF()) { - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); unsigned Zero = MRI.createVirtualRegister(PtrRC); BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero) @@ -189,22 +188,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(Zero) // addr .addMemOperand(LoadMMO); } else { - auto &MMIW = MF.getMMI().getObjFileInfo(); - if (!MMIW.hasStackPointerGlobal()) { - MMIW.setStackPointerGlobal(MMIW.getGlobals().size()); - - // Create the stack-pointer global. For now, just use the - // Emscripten/Binaryen ABI names. - wasm::Global G; - G.Type = wasm::ValType::I32; - G.Mutable = true; - G.InitialValue = 0; - G.InitialModule = "env"; - G.InitialName = "STACKTOP"; - MMIW.addGlobal(G); - } BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg) - .addImm(MMIW.getStackPointerGlobal()); + .addExternalSymbol(SPSymbol); } bool HasBP = hasBP(MF); diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index a67137f867e7e3431a59afd2cf37982246c9cc98..4f3ae57733e5b72574a4b528b3837ca6da083915 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -12,12 +12,13 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Function.h" // To access function attributes. #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 31a5ca1f4cc2729806cd1e44f751acc7f5fa3472..814377003cbccefc9c37a0672c7e3b50764b3f4f 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -84,8 +84,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. - for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW, - ISD::FREM, ISD::FMA}) + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, + ISD::FMA}) setOperationAction(Op, T, Expand); // Note supported floating-point library function operators that otherwise // default to expand. diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 73d1d4be293ba251d9ee1f2826cdef396b33b599..6b45839c14b0e43c6df170d3ee430c20863c711b 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -19,8 +19,8 @@ let Defs = [ARGUMENTS] in { // Call sequence markers. These have an immediate which represents the amount of // stack space to allocate or free, which is used for varargs lowering. let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in { -def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt), - [(WebAssemblycallseq_start timm:$amt)]>; +def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt, i32imm:$amt2), + [(WebAssemblycallseq_start timm:$amt, timm:$amt2)]>; def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2), [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>; } // isCodeGenOnly = 1 diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index a601b575f5791474927c6fecdcf45c25b506448c..fa2146f7db84d1e827635c0fe00e38c0988d509b 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -25,7 +25,8 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">, // WebAssembly-specific DAG Node Types. //===----------------------------------------------------------------------===// -def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>; +def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>, + SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 25d77bb1f234315291e70b449a2b4e5ff08d8caa..365b327190ec1f6be0e0370e7e56efebf34d331d 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -26,18 +26,18 @@ // offset for an add that needs wrapping. def regPlusImm : PatFrag<(ops node:$addr, node:$off), (add node:$addr, node:$off), - [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; + [{ return N->getFlags().hasNoUnsignedWrap(); }]>; // Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - APInt KnownZero0, KnownOne0; - CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; + KnownBits Known0; + CurDAG->computeKnownBits(N->getOperand(0), Known0, 0); + KnownBits Known1; + CurDAG->computeKnownBits(N->getOperand(1), Known1, 0); + return (~Known0.Zero & ~Known1.Zero) == 0; }]>; // GlobalAddresses are conceptually unsigned values, so we can also fold them @@ -47,7 +47,7 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ def regPlusGA : PatFrag<(ops node:$addr, node:$off), (add node:$addr, node:$off), [{ - return N->getFlags()->hasNoUnsignedWrap(); + return N->getFlags().hasNoUnsignedWrap(); }]>; // We don't need a regPlusES because external symbols never have constant diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index 744a3ed427af74af24039af33bfc518bf78baead..576b71dd7966035de30bef34795ec7ad0a5a5640 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -15,8 +15,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index e1b2f79c81ccaf907be27dacb94ee191e31c79a0..947c0329bb6e8779339e0f384ca29dede95ffc05 100644 --- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -435,22 +435,19 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) { // Because we added the pointer to the callee as first argument, all // argument attribute indices have to be incremented by one. - SmallVector AttributesVec; + SmallVector ArgAttributes; const AttributeList &InvokeAL = CI->getAttributes(); - // Add any return attributes. - AttributesVec.push_back(InvokeAL.getRetAttributes()); // No attributes for the callee pointer. - AttributesVec.push_back(AttributeSet()); + ArgAttributes.push_back(AttributeSet()); // Copy the argument attributes from the original - for (unsigned i = 1, e = CI->getNumArgOperands(); i <= e; ++i) { - AttributesVec.push_back(InvokeAL.getParamAttributes(i)); - } + for (unsigned i = 0, e = CI->getNumArgOperands(); i < e; ++i) + ArgAttributes.push_back(InvokeAL.getParamAttributes(i)); - // Add any function attributes. - AttributesVec.push_back(InvokeAL.getFnAttributes()); // Reconstruct the AttributesList based on the vector we constructed. - AttributeList NewCallAL = AttributeList::get(C, AttributesVec); + AttributeList NewCallAL = + AttributeList::get(C, InvokeAL.getFnAttributes(), + InvokeAL.getRetAttributes(), ArgAttributes); NewCall->setAttributes(NewCallAL); CI->replaceAllUsesWith(NewCall); diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp index 96520aa5d28c50c49b9df182c108b77e678de211..559165e4c86b24e7c48cd032e781d5150bc973e8 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() { void OptimizeReturned::visitCallSite(CallSite CS) { for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i) - if (CS.paramHasAttr(1 + i, Attribute::Returned)) { + if (CS.paramHasAttr(i, Attribute::Returned)) { Instruction *Inst = CS.getInstruction(); Value *Arg = CS.getArgOperand(i); // Ignore constants, globals, undef, etc. diff --git a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 473dcb7a33fdca5dbe6a62d0cf504daa6abcba0b..1462c49aa9fd96dea81807b369735f446fcc7fc9 100644 --- a/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 5fd4a8d1949ef6bed6b7853e7e443051f9d1c3a7..ba39b6cdb56822108643f04a87ea97c5c32c20cc 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -140,8 +140,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { // Check if it's possible to reuse any of the used colors. if (!MRI->isLiveIn(Old)) - for (int C(UsedColors.find_first()); C != -1; - C = UsedColors.find_next(C)) { + for (unsigned C : UsedColors.set_bits()) { if (MRI->getRegClass(SortedIntervals[C]->reg) != RC) continue; for (LiveInterval *OtherLI : Assignments[C]) diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index e3470825940c3288ceb5af5ee4858f4f9e3b79ba..766ab456a8e6cf5730ed05475f4922e6e7e9d029 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -13,8 +13,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 57d454746b06849282141fbd0d85426efbf0c28f..ea9e3fa862ce2b84b07d76c21291644361358864 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -20,8 +20,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyUtilities.h" @@ -170,28 +170,16 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, if (MI.mayStore()) { Write = true; - const MachineFunction &MF = *MI.getParent()->getParent(); - if (MF.getSubtarget() - .getTargetTriple().isOSBinFormatELF()) { - // Check for stores to __stack_pointer. - for (auto MMO : MI.memoperands()) { - const MachinePointerInfo &MPI = MMO->getPointerInfo(); - if (MPI.V.is()) { - auto PSV = MPI.V.get(); - if (const ExternalSymbolPseudoSourceValue *EPSV = - dyn_cast(PSV)) - if (StringRef(EPSV->getSymbol()) == "__stack_pointer") - StackPointer = true; - } - } - } else { - // Check for sets of the stack pointer. - const MachineModuleInfoWasm &MMIW = - MF.getMMI().getObjFileInfo(); - if ((MI.getOpcode() == WebAssembly::SET_LOCAL_I32 || - MI.getOpcode() == WebAssembly::SET_LOCAL_I64) && - MI.getOperand(0).getImm() == MMIW.getStackPointerGlobal()) { - StackPointer = true; + // Check for stores to __stack_pointer. + for (auto MMO : MI.memoperands()) { + const MachinePointerInfo &MPI = MMO->getPointerInfo(); + if (MPI.V.is()) { + auto PSV = MPI.V.get(); + if (const ExternalSymbolPseudoSourceValue *EPSV = + dyn_cast(PSV)) + if (StringRef(EPSV->getSymbol()) == "__stack_pointer") { + StackPointer = true; + } } } } else if (MI.hasOrderedMemoryRef()) { diff --git a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp index 9e944df637d9439dff614c9a33ed36b376693f04..878ffd08d228a5b29751006b5dedc1e7cec3b4e9 100644 --- a/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp +++ b/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -19,8 +19,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index 2441ead7cb27cf022f90988776544a8dd1bab1a0..b1385f409fd33381aff268de13691f761cd35dea 100644 --- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -12,8 +12,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index a9aa781610ce137bd02b809126dd2f24257b0819..8173364fa8809da83ed4df9c7a721b90dda3089c 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -24,8 +24,8 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/Analysis/TargetLibraryInfo.h" diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 44c794ef5da1976c6ac0b319489bf9ab80ee38eb..7b05f671bdcbfeb1adbde6a55eb7513d796bc50f 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -12,9 +12,9 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyTargetMachine.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyTargetObjectFile.h" #include "WebAssemblyTargetTransformInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -129,7 +129,7 @@ namespace { /// WebAssembly Code Generator Pass Configuration Options. class WebAssemblyPassConfig final : public TargetPassConfig { public: - WebAssemblyPassConfig(WebAssemblyTargetMachine *TM, PassManagerBase &PM) + WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { @@ -154,7 +154,7 @@ TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() { TargetPassConfig * WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { - return new WebAssemblyPassConfig(this, PM); + return new WebAssemblyPassConfig(*this, PM); } FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { @@ -173,7 +173,7 @@ void WebAssemblyPassConfig::addIRPasses() { else // Expand some atomic operations. WebAssemblyTargetLowering has hooks which // control specifically what gets lowered. - addPass(createAtomicExpandPass(TM)); + addPass(createAtomicExpandPass()); // Fix function bitcasts, as WebAssembly requires caller and callee signatures // to match. diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 47aadf99e86018cd096f1b7c80060b848ddbac88..b3ce4bd27460678899272eef47d6352bb7e2fdbb 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -36,7 +36,7 @@ unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) { return Result; } -unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) { +unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) const { if (Vector && getST()->hasSIMD128()) return 128; diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index f658609f89300f8e83f6bf843a4a5305510f7a2e..7b35fc916133949ba8751c54ecc16747d3689da1 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -55,7 +55,7 @@ public: /// @{ unsigned getNumberOfRegisters(bool Vector); - unsigned getRegisterBitWidth(bool Vector); + unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt index 8dd5e8a03e2ee2bbf05a9ba6022d9bba55eee986..35a67134775a646df3159b2267b047a1b41bcc3e 100644 --- a/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -1,5 +1,15 @@ # Tests which are known to fail from the GCC torture test suite. +# Syntax: Each line has a single test to be marked as a 'known failure' (or +# 'exclusion'. Known failures are expected to fail, and will cause an error if +# they pass. (Known failures that do not run at all will not cause an +# error). The format is +# # comment +# +# The attributes in this case represent the different arguments used to +# compiler: 'wasm-s' is for compiling to .s files, and 'wasm-o' for compiling +# to wasm object files (.o). + # Computed gotos are not supported (Cannot select BlockAddress/BRIND) 20040302-1.c 20071210-1.c @@ -23,9 +33,6 @@ built-in-setjmp.c pr60003.c # Error in the program / unsupported by Clang. -scal-to-vec1.c -scal-to-vec2.c -scal-to-vec3.c 20000822-1.c 20010209-1.c 20010605-1.c @@ -66,3 +73,18 @@ pr41935.c 920728-1.c pr28865.c widechar-2.c + +# crash: Running pass 'WebAssembly Explicit Locals' on function +20020107-1.c wasm-o +20030222-1.c wasm-o +20071220-1.c wasm-o +20071220-2.c wasm-o +990130-1.c wasm-o +pr38533.c wasm-o +pr41239.c wasm-o +pr43385.c wasm-o +pr43560.c wasm-o +pr45695.c wasm-o +pr49279.c wasm-o +pr49390.c wasm-o +pr52286.c wasm-o diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 788fac62626b71339472df5fe3989cb44c3d8bf4..f7e31de65f6d11ffde7d731268e3037e2e684425 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -7,11 +7,11 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86AsmInstrumentation.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86Operand.h" -#include "llvm/ADT/Twine.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 324da650e74e71fd515c1402080d9508fd573b06..e5d3209ec6a979a7e8878ccf94ab28449f5fbff7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -776,11 +776,6 @@ private: bool ParseZ(std::unique_ptr &Z, const SMLoc &StartLoc); - /// MS-compatibility: - /// Obtain an appropriate size qualifier, when facing its absence, - /// upon AVX512 vector/broadcast memory operand - unsigned AdjustAVX512Mem(unsigned Size, X86Operand* UnsizedMemOpNext); - bool is64BitMode() const { // FIXME: Can tablegen auto-generate this? return getSTI().getFeatureBits()[X86::Mode64Bit]; @@ -1206,27 +1201,16 @@ std::unique_ptr X86AsmParser::CreateMemForInlineAsm( Identifier, Info.OpDecl); } + // We either have a direct symbol reference, or an offset from a symbol. The // parser always puts the symbol on the LHS, so look there for size // calculation purposes. + unsigned FrontendSize = 0; const MCBinaryExpr *BinOp = dyn_cast(Disp); bool IsSymRef = isa(BinOp ? BinOp->getLHS() : Disp); - if (IsSymRef) { - if (!Size) { - Size = Info.Type * 8; // Size is in terms of bits in this context. - if (Size) - InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, - /*Len=*/0, Size); - if (AllowBetterSizeMatch) - // Handle cases where size qualifier is absent, upon an indirect symbol - // reference - e.g. "vaddps zmm1, zmm2, [var]" - // set Size to zero to allow matching mechansim to try and find a better - // size qualifier than our initial guess, based on available variants of - // the given instruction - Size = 0; - } - } + if (IsSymRef && !Size && Info.Type) + FrontendSize = Info.Type * 8; // Size is in terms of bits in this context. // When parsing inline assembly we set the base register to a non-zero value // if we don't know the actual value at this time. This is necessary to @@ -1234,7 +1218,7 @@ std::unique_ptr X86AsmParser::CreateMemForInlineAsm( BaseReg = BaseReg ? BaseReg : 1; return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, Size, Identifier, - Info.OpDecl); + Info.OpDecl, FrontendSize); } static void @@ -1332,16 +1316,17 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { while (!Done) { bool UpdateLocLex = true; + AsmToken::TokenKind TK = getLexer().getKind(); // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an // identifier. Don't try an parse it as a register. - if (PrevTK != AsmToken::Error && Tok.getString().startswith(".")) + if (PrevTK != AsmToken::Error && Tok.getString().startswith(".") && + TK != AsmToken::Identifier) break; // If we're parsing an immediate expression, we don't expect a '['. if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) break; - AsmToken::TokenKind TK = getLexer().getKind(); switch (TK) { default: { if (SM.isValidEndState()) { @@ -2884,23 +2869,6 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } -unsigned X86AsmParser::AdjustAVX512Mem(unsigned Size, - X86Operand* UnsizedMemOpNext) { - // Check for the existence of an AVX512 platform - if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) - return 0; - // Allow adjusting upon a (x|y|z)mm - if (Size == 512 || Size == 256 || Size == 128) - return Size; - // This is an allegadly broadcasting mem op adjustment, - // allow some more inquiring to validate it - if (Size == 64 || Size == 32) - return UnsizedMemOpNext && UnsizedMemOpNext->isToken() && - UnsizedMemOpNext->getToken().substr(0, 4).equals("{1to") ? Size : 0; - // Do not allow any other type of adjustments - return 0; -} - bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2920,19 +2888,14 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, // Find one unsized memory operand, if present. X86Operand *UnsizedMemOp = nullptr; - // If unsized memory operand was found - obtain following operand. - // For use in AdjustAVX512Mem - X86Operand *UnsizedMemOpNext = nullptr; for (const auto &Op : Operands) { X86Operand *X86Op = static_cast(Op.get()); - if (UnsizedMemOp) { - UnsizedMemOpNext = X86Op; + if (X86Op->isMemUnsized()) { + UnsizedMemOp = X86Op; // Have we found an unqualified memory operand, // break. IA allows only one memory operand. break; } - if (X86Op->isMemUnsized()) - UnsizedMemOp = X86Op; } // Allow some instructions to have implicitly pointer-sized operands. This is @@ -2978,7 +2941,6 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, // If an unsized memory operand is present, try to match with each memory // operand size. In Intel assembly, the size is not part of the instruction // mnemonic. - unsigned MatchedSize = 0; if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; for (unsigned Size : MopSizes) { @@ -2993,17 +2955,10 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, // If this returned as a missing feature failure, remember that. if (Match.back() == Match_MissingFeature) ErrorInfoMissingFeature = ErrorInfoIgnore; - if (M == Match_Success) - // MS-compatability: - // Adjust AVX512 vector/broadcast memory operand, - // when facing the absence of a size qualifier. - // Match GCC behavior on respective cases. - MatchedSize = AdjustAVX512Mem(Size, UnsizedMemOpNext); } // Restore the size of the unsized memory operand if we modified it. - if (UnsizedMemOp) - UnsizedMemOp->Mem.Size = 0; + UnsizedMemOp->Mem.Size = 0; } // If we haven't matched anything yet, this is not a basic integer or FPU @@ -3027,20 +2982,30 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, Op.getLocRange(), MatchingInlineAsm); } + unsigned NumSuccessfulMatches = + std::count(std::begin(Match), std::end(Match), Match_Success); + + // If matching was ambiguous and we had size information from the frontend, + // try again with that. This handles cases like "movxz eax, m8/m16". + if (UnsizedMemOp && NumSuccessfulMatches > 1 && + UnsizedMemOp->getMemFrontendSize()) { + UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); + unsigned M = MatchInstruction( + Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()); + if (M == Match_Success) + NumSuccessfulMatches = 1; + + // Add a rewrite that encodes the size information we used from the + // frontend. + InstInfo->AsmRewrites->emplace_back( + AOK_SizeDirective, UnsizedMemOp->getStartLoc(), + /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); + } + // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). - unsigned NumSuccessfulMatches = - std::count(std::begin(Match), std::end(Match), Match_Success); if (NumSuccessfulMatches == 1) { - if (MatchedSize && isParsingInlineAsm() && isParsingIntelSyntax()) - // MS compatibility - - // Fix the rewrite according to the matched memory size - // MS inline assembly only - for (AsmRewrite &AR : *InstInfo->AsmRewrites) - if ((AR.Loc.getPointer() == UnsizedMemOp->StartLoc.getPointer()) && - (AR.Kind == AOK_SizeDirective)) - AR.Val = MatchedSize; // Some instructions need post-processing to, for example, tweak which // encoding is selected. Loop on it while changes happen so the individual // transformations can chain off each other. @@ -3057,7 +3022,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, "multiple matches only possible with unsized memory operands"); return Error(UnsizedMemOp->getStartLoc(), "ambiguous operand size for instruction '" + Mnemonic + "\'", - UnsizedMemOp->getLocRange(), MatchingInlineAsm); + UnsizedMemOp->getLocRange()); } // If one instruction matched with a missing feature, report this as a @@ -3094,6 +3059,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { else if (IDVal.startswith(".code")) return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); else if (IDVal.startswith(".att_syntax")) { + getParser().setParsingInlineAsm(false); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "prefix") Parser.Lex(); @@ -3106,6 +3072,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return false; } else if (IDVal.startswith(".intel_syntax")) { getParser().setAssemblerDialect(1); + getParser().setParsingInlineAsm(true); if (getLexer().isNot(AsmToken::EndOfStatement)) { if (Parser.getTok().getString() == "noprefix") Parser.Lex(); diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index 9f1fa6c65907044bc918f99315e92400b76c2485..0fba15cc692ca17ccac8912a8e3dec2bd400e0cd 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -15,8 +15,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" @@ -62,6 +62,10 @@ struct X86Operand : public MCParsedAsmOperand { unsigned Scale; unsigned Size; unsigned ModeSize; + + /// If the memory operand is unsized and there are multiple instruction + /// matches, prefer the one with this size. + unsigned FrontendSize; }; union { @@ -136,6 +140,10 @@ struct X86Operand : public MCParsedAsmOperand { assert(Kind == Memory && "Invalid access!"); return Mem.ModeSize; } + unsigned getMemFrontendSize() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.FrontendSize; + } bool isToken() const override {return Kind == Token; } @@ -512,7 +520,7 @@ struct X86Operand : public MCParsedAsmOperand { static std::unique_ptr CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(), - void *OpDecl = nullptr) { + void *OpDecl = nullptr, unsigned FrontendSize = 0) { auto Res = llvm::make_unique(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -521,6 +529,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = 1; Res->Mem.Size = Size; Res->Mem.ModeSize = ModeSize; + Res->Mem.FrontendSize = FrontendSize; Res->SymName = SymName; Res->OpDecl = OpDecl; Res->AddressOf = false; @@ -532,7 +541,7 @@ struct X86Operand : public MCParsedAsmOperand { CreateMem(unsigned ModeSize, unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(), - void *OpDecl = nullptr) { + void *OpDecl = nullptr, unsigned FrontendSize = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -548,6 +557,7 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.Scale = Scale; Res->Mem.Size = Size; Res->Mem.ModeSize = ModeSize; + Res->Mem.FrontendSize = FrontendSize; Res->SymName = SymName; Res->OpDecl = OpDecl; Res->AddressOf = false; diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 36ad23bb41c05d41743093aa330cb201fbb9e903..4ce908b1da64e0fefdb41c491150ebe10488712a 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -74,8 +74,8 @@ // //===----------------------------------------------------------------------===// -#include "X86DisassemblerDecoder.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index b7f637e9a8cd7ac4fac8d25d979ee7fbc31858d2..577b7a776c6dfd5757adcb95289453ad95aec262 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include /* for va_*() */ -#include /* for vsnprintf() */ -#include /* for exit() */ -#include /* for memset() */ +#include /* for va_*() */ +#include /* for vsnprintf() */ +#include /* for exit() */ +#include /* for memset() */ #include "X86DisassemblerDecoder.h" diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 6aa7003067440c997273d9698686b73a029b1a73..4d91300c7edecf6edbe4641c6bb9ed9a4e9b5462 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86ATTInstPrinter.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 8594addb5dd419d456b25b5ec21d4cd8c6084130..5e809c34325ee0e1d16cc274fa84116c685e7607 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -15,8 +15,8 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "Utils/X86ShuffleDecode.h" -#include "llvm/MC/MCInst.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -587,6 +587,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPSLLDQZ256rr: case X86::VPSLLDQZ512rr: Src1Name = getRegName(MI->getOperand(1).getReg()); + LLVM_FALLTHROUGH; case X86::VPSLLDQZ128rm: case X86::VPSLLDQZ256rm: case X86::VPSLLDQZ512rm: @@ -604,6 +605,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::VPSRLDQZ256rr: case X86::VPSRLDQZ512rr: Src1Name = getRegName(MI->getOperand(1).getReg()); + LLVM_FALLTHROUGH; case X86::VPSRLDQZ128rm: case X86::VPSRLDQZ256rm: case X86::VPSRLDQZ512rm: @@ -1091,6 +1093,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m) DecodeSubVectorBroadcast(MVT::v8f32, MVT::v2f32, ShuffleMask); @@ -1099,6 +1102,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m) DecodeSubVectorBroadcast(MVT::v16f32, MVT::v2f32, ShuffleMask); @@ -1189,8 +1193,6 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, OS << ']'; --i; // For loop increments element #. } - //MI->print(OS, 0); - OS << "\n"; // We successfully added a comment to this instruction. return true; diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index a8c631ae282f9bf6a973d088253bc37bba5a806f..d6af6712d5a1ccd4b0a3275344ed9ba874983f86 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "X86IntelInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstComments.h" -#include "X86IntelInstPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index a713af6aadb5afb40909f2a203a263d3a3102278..7a9e4f4468ec7309ddcbdafaf0566b2d188b72ec 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -10,6 +10,8 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -22,9 +24,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 0b73df3a2ff8c251dab8a16fbc298942cb9116e7..4da4eebec0386af815a0911de5bad0c524886792 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -9,13 +9,13 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include #include diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 48a1d8f1330cd876ecf7dd660a05c7b808cba646..1538a515f41904c7eb39cde0bf5bb893d4a98e6c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -13,12 +13,12 @@ #include "X86MCAsmInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/ELF.h" using namespace llvm; enum AsmWriterFlavorTy { @@ -43,7 +43,7 @@ void X86MCAsmInfoDarwin::anchor() { } X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { bool is64Bit = T.getArch() == Triple::x86_64; if (is64Bit) - PointerSize = CalleeSaveStackSlotSize = 8; + CodePointerSize = CalleeSaveStackSlotSize = 8; AssemblerDialect = AsmWriterFlavor; @@ -92,7 +92,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // For ELF, x86-64 pointer size depends on the ABI. // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64 // with the x32 ABI, pointer size remains the default 4. - PointerSize = (is64Bit && !isX32) ? 8 : 4; + CodePointerSize = (is64Bit && !isX32) ? 8 : 4; // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI. CalleeSaveStackSlotSize = is64Bit ? 8 : 4; @@ -129,7 +129,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; - PointerSize = 8; + CodePointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; } else { // 32-bit X86 doesn't use CFI, so this isn't a real encoding type. It's just @@ -156,7 +156,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PrivateLabelPrefix = ".L"; - PointerSize = 8; + CodePointerSize = 8; WinEHEncodingType = WinEH::EncodingType::Itanium; ExceptionsType = ExceptionHandling::WinEH; } else { diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 297926ddcfdaf18e2903941b9160e0c0d2a982f7..4097ef224d503a06f837075a21a8db5639db486c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -7,9 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -19,7 +20,6 @@ #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" -#include "llvm/Support/MachO.h" using namespace llvm; diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index d6777fc8aa6aeb9e553618f3c4d221e515cec61b..105580c913a16c7d04868aa307bc5f62a80c475f 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -9,11 +9,11 @@ #include "MCTargetDesc/X86FixupKinds.h" #include "MCTargetDesc/X86MCTargetDesc.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index fdcc7e1ab7b0545096cea2785abf8e0fd505e91d..19c93cfff0fe90cded2dc8a6dc0dfc2655f18f68 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -95,7 +95,8 @@ void initializeFixupBWInstPassPass(PassRegistry &); /// encoding when possible in order to reduce code size. FunctionPass *createX86EvexToVexInsts(); -InstructionSelector *createX86InstructionSelector(X86Subtarget &, +InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &, X86RegisterBankInfo &); void initializeEvexToVexInstPassPass(PassRegistry &); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 8fcc8e31d5d44152b611f04a6e1d11b1c93acb58..fe105298f5c19ac3743b3bb6b9e824dbd2936b3d 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -127,6 +127,9 @@ def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; +def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", + "true", "Enable AVX-512 Population Count Instructions", + [FeatureAVX512]>; def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; @@ -170,6 +173,8 @@ def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", [FeatureSSE2]>; def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true", "Enable TBM instructions">; +def FeatureLWP : SubtargetFeature<"lwp", "HasLWP", "true", + "Enable LWP instructions">; def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true", "Support MOVBE instruction">; def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true", @@ -233,6 +238,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; +def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", + "LEA instruction with 3 ops or certain registers is slow">; def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", "INC and DEC instructions are slower than ADD and SUB">; def FeatureSoftFloat @@ -273,6 +280,16 @@ def FeatureFastSHLDRotate "fast-shld-rotate", "HasFastSHLDRotate", "true", "SHLD can be used as a faster rotate">; +// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka +// "string operations"). See "REP String Enhancement" in the Intel Software +// Development Manual. This feature essentially means that REP MOVSB will copy +// using the largest available size instead of copying bytes one by one, making +// it at least as fast as REPMOVS{W,D,Q}. +def FeatureERMSB + : SubtargetFeature< + "ermsb", "HasERMSB", "true", + "REP MOVS/STOS are fast">; + //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// @@ -468,6 +485,7 @@ def SNBFeatures : ProcessorFeatures<[], [ FeatureXSAVE, FeatureXSAVEOPT, FeatureLAHFSAHF, + FeatureSlow3OpsLEA, FeatureFastScalarFSQRT, FeatureFastSHLDRotate ]>; @@ -498,6 +516,7 @@ def HSWFeatures : ProcessorFeatures; @@ -702,6 +722,7 @@ def : Proc<"bdver2", [ FeatureXSAVE, FeatureBMI, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureSlowSHLD, FeatureLAHFSAHF @@ -726,6 +747,7 @@ def : Proc<"bdver3", [ FeatureXSAVE, FeatureBMI, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, @@ -752,6 +774,7 @@ def : Proc<"bdver4", [ FeatureBMI, FeatureBMI2, FeatureTBM, + FeatureLWP, FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index e1825ca1eda138b6516f3b256cc8bfeb5f4ba036..dc15aeadaa6196d8854b88f8a187ff85156abdea 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "llvm/BinaryFormat/COFF.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineValueType.h" @@ -34,7 +35,6 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 44bc373b0394c5afd724cdc622913f346cebc9d4..d7c3b74d3efb2a37e839ad57ce7c14d158463b5d 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -91,6 +91,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { X86MCInstLower &MCIL); void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index b8f088dfbe589fe76de9d6b5ac1c3602d0bf7cb7..765af67de160abf23e6f59563ba6aa0f753bece4 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -114,7 +114,7 @@ private: StringRef getPassName() const override { return "X86 Optimize Call Frame"; } - const TargetInstrInfo *TII; + const X86InstrInfo *TII; const X86FrameLowering *TFL; const X86Subtarget *STI; MachineRegisterInfo *MRI; @@ -331,7 +331,6 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, // transformation. const X86RegisterInfo &RegInfo = *static_cast(STI->getRegisterInfo()); - unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence assert(I->getOpcode() == TII->getCallFrameSetupOpcode()); @@ -340,8 +339,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, // How much do we adjust the stack? This puts an upper bound on // the number of parameters actually passed on it. - unsigned int MaxAdjust = - FrameSetup->getOperand(0).getImm() >> Log2SlotSize; + unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize; // A zero adjustment means no stack parameters if (!MaxAdjust) { @@ -434,7 +432,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, return; Context.Call = &*I; - if ((++I)->getOpcode() != FrameDestroyOpcode) + if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode()) return; // Now, go through the vector, and see that we don't have any gaps, @@ -464,7 +462,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, // PEI will end up finalizing the handling of this. MachineBasicBlock::iterator FrameSetup = Context.FrameSetup; MachineBasicBlock &MBB = *(FrameSetup->getParent()); - FrameSetup->getOperand(1).setImm(Context.ExpectedDist); + TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist); DebugLoc DL = FrameSetup->getDebugLoc(); bool Is64Bit = STI->is64Bit(); diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index 137ef166aaeb00897734bafbc5dd212691f3bef4..161bfa7b54748893d5a7fff3cad60b06131a10a8 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -53,7 +53,6 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, return; } - SmallVector BitOffsets; SmallVector SplitRegs; EVT PartVT = TLI.getRegisterType(Context, VT); @@ -64,8 +63,10 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)), PartTy, OrigArg.Flags}; SplitArgs.push_back(Info); - PerformArgSplit(Info.Reg, PartVT.getSizeInBits() * i); + SplitRegs.push_back(Info.Reg); } + + PerformArgSplit(SplitRegs); } namespace { @@ -112,10 +113,9 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); SmallVector SplitArgs; - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](unsigned Reg, uint64_t Offset) { - MIRBuilder.buildExtract(Reg, VReg, Offset); - }); + splitToValueTypes( + OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs) { MIRBuilder.buildUnmerge(Regs, VReg); }); FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) @@ -183,22 +183,10 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, for (auto &Arg : F.args()) { ArgInfo OrigArg(VRegs[Idx], Arg.getType()); setArgFlags(OrigArg, Idx + 1, DL, F); - LLT Ty = MRI.getType(VRegs[Idx]); - unsigned Dst = VRegs[Idx]; - bool Split = false; splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](unsigned Reg, uint64_t Offset) { - if (!Split) { - Split = true; - Dst = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildUndef(Dst); - } - unsigned Tmp = MRI.createGenericVirtualRegister(Ty); - MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset); - Dst = Tmp; + [&](ArrayRef Regs) { + MIRBuilder.buildMerge(VRegs[Idx], Regs); }); - if (Dst != VRegs[Idx]) - MIRBuilder.buildCopy(VRegs[Idx], Dst); Idx++; } diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index 204e6974c702e3601cdddf8a8c33ffa8d1d71728..8a8afb5682982448431e7622b916ea3dede4303b 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -34,14 +34,15 @@ public: bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const override; + private: /// A function of this type is used to perform value split action. - typedef std::function SplitArgTy; + typedef std::function)> SplitArgTy; void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy SplitArg) const; }; -} // End of namespace llvm; +} // namespace llvm #endif diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 6781d761a1c4fca81d0073959095900996aa5846..7d146d050a5c2c5fba4d7f01abc82aab5d3a54ab 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -73,8 +73,8 @@ def CC_#NAME : CallingConv<[ CCIfSubtarget<"is64Bit()", CCIfByVal>>, CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // Promote v8i1/v16i1/v32i1 arguments to i32. CCIfType<[v8i1, v16i1, v32i1], CCPromoteToType>, @@ -146,8 +146,8 @@ def CC_#NAME : CallingConv<[ ]>; def RetCC_#NAME : CallingConv<[ - // Promote i1, v8i1 arguments to i8. - CCIfType<[i1, v8i1], CCPromoteToType>, + // Promote i1, v1i1, v8i1 arguments to i8. + CCIfType<[i1, v1i1, v8i1], CCPromoteToType>, // Promote v16i1 arguments to i16. CCIfType<[v16i1], CCPromoteToType>, @@ -207,6 +207,7 @@ def RetCC_X86Common : CallingConv<[ // // For code that doesn't care about the ABI, we allow returning more than two // integer values in registers. + CCIfType<[v1i1], CCPromoteToType>, CCIfType<[i1], CCPromoteToType>, CCIfType<[i8] , CCAssignToReg<[AL, DL, CL]>>, CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>, @@ -375,6 +376,7 @@ def RetCC_X86_64_Swift : CallingConv<[ CCIfSwiftError>>, // For integers, ECX, R8D can be used as extra return registers. + CCIfType<[v1i1], CCPromoteToType>, CCIfType<[i1], CCPromoteToType>, CCIfType<[i8] , CCAssignToReg<[AL, DL, CL, R8B]>>, CCIfType<[i16], CCAssignToReg<[AX, DX, CX, R8W]>>, @@ -485,8 +487,8 @@ def CC_X86_64_C : CallingConv<[ // Handles byval parameters. CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in R10. CCIfNest>>, @@ -584,8 +586,8 @@ def CC_X86_Win64_C : CallingConv<[ // FIXME: Handle byval stuff. // FIXME: Handle varargs. - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in R10. CCIfNest>, @@ -796,8 +798,8 @@ def CC_X86_32_Common : CallingConv<[ ]>; def CC_X86_32_C : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in ECX. CCIfNest>, @@ -816,8 +818,8 @@ def CC_X86_32_MCU : CallingConv<[ // puts arguments in registers. CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // If the call is not a vararg call, some arguments may be passed // in integer registers. @@ -828,8 +830,8 @@ def CC_X86_32_MCU : CallingConv<[ ]>; def CC_X86_32_FastCall : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in EAX. CCIfNest>, @@ -858,15 +860,15 @@ def CC_X86_32_ThisCall_Common : CallingConv<[ ]>; def CC_X86_32_ThisCall_Mingw : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, CCDelegateTo ]>; def CC_X86_32_ThisCall_Win : CallingConv<[ - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // Pass sret arguments indirectly through stack. CCIfSRet>, @@ -885,8 +887,8 @@ def CC_X86_32_FastCC : CallingConv<[ // puts arguments in registers. CCIfByVal>, - // Promote i1/i8/i16 arguments to i32. - CCIfType<[i1, i8, i16], CCPromoteToType>, + // Promote i1/i8/i16/v1i1 arguments to i32. + CCIfType<[i1, i8, i16, v1i1], CCPromoteToType>, // The 'nest' parameter, if any, is passed in EAX. CCIfNest>, diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index e82f43c1e0e123c62501219621fa975fa47a0c6c..621505aaded9e3baf37aaa8ae7564813e3ecd5fd 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -180,44 +180,6 @@ private: } // end anonymous namespace. -static std::pair -getX86ConditionCode(CmpInst::Predicate Predicate) { - X86::CondCode CC = X86::COND_INVALID; - bool NeedSwap = false; - switch (Predicate) { - default: break; - // Floating-point Predicates - case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; - case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_OGT: CC = X86::COND_A; break; - case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; - case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_ULT: CC = X86::COND_B; break; - case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH; - case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; - case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; - case CmpInst::FCMP_UNO: CC = X86::COND_P; break; - case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; - case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH; - case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; - - // Integer Predicates - case CmpInst::ICMP_EQ: CC = X86::COND_E; break; - case CmpInst::ICMP_NE: CC = X86::COND_NE; break; - case CmpInst::ICMP_UGT: CC = X86::COND_A; break; - case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; - case CmpInst::ICMP_ULT: CC = X86::COND_B; break; - case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; - case CmpInst::ICMP_SGT: CC = X86::COND_G; break; - case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; - case CmpInst::ICMP_SLT: CC = X86::COND_L; break; - case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; - } - - return std::make_pair(CC, NeedSwap); -} - static std::pair getX86SSEConditionCode(CmpInst::Predicate Predicate) { unsigned CC; @@ -452,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm; else @@ -462,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm; else @@ -475,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, assert(HasAVX); if (IsNonTemporal && Alignment >= 32 && HasAVX2) Opc = X86::VMOVNTDQAYrm; + else if (IsNonTemporal && Alignment >= 16) + return false; // Force split for X86::VMOVNTDQArm else if (Alignment >= 32) Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm; else @@ -1559,7 +1527,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { X86::CondCode CC; bool SwapArgs; - std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); + std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); unsigned Opc = X86::getSETFromCond(CC); @@ -1697,7 +1665,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { bool SwapArgs; unsigned BranchOpc; - std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); + std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); BranchOpc = X86::GetCondBranchFromCond(CC); @@ -2070,7 +2038,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { } bool NeedSwap; - std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); + std::tie(CC, NeedSwap) = X86::getX86ConditionCode(Predicate); assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); const Value *CmpLHS = CI->getOperand(0); @@ -2149,7 +2117,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { if (!LHSReg || !RHSReg) return false; - unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); + const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo(); + unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8); unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill); updateValueMap(I, ResultReg); @@ -2318,7 +2287,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { const auto *CI = dyn_cast(Cond); if (CI && (CI->getParent() == I->getParent())) { bool NeedSwap; - std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); + std::tie(CC, NeedSwap) = X86::getX86ConditionCode(CI->getPredicate()); if (CC > X86::LAST_VALID_COND) return false; @@ -3073,16 +3042,13 @@ bool X86FastISel::fastLowerArguments() { // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; - unsigned Idx = 0; for (auto const &Arg : F->args()) { - // The first argument is at index 1. - ++Idx; - if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || - F->getAttributes().hasAttribute(Idx, Attribute::InReg) || - F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || - F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || - F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) return false; Type *ArgTy = Arg.getType(); @@ -3161,8 +3127,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, return 0; if (CS) - if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || - CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU()) + if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) || + CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU()) return 0; return 4; @@ -3183,6 +3149,15 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); + const CallInst *CI = + CLI.CS ? dyn_cast(CLI.CS->getInstruction()) : nullptr; + const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; + + // Functions with no_caller_saved_registers that need special handling. + if ((CI && CI->hasFnAttr("no_caller_saved_registers")) || + (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; @@ -3286,7 +3261,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes).addImm(0); + .addImm(NumBytes).addImm(0).addImm(0); // Walk the register/memloc assignments, inserting copies/loads. const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); @@ -3678,13 +3653,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type"); case MVT::i1: - if (Subtarget->hasAVX512()) { - // Need to copy to a VK1 register. - unsigned ResultReg = createResultReg(&X86::VK1RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg); - return ResultReg; - } case MVT::i8: return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, X86::sub_8bit); diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 2cd4c1a3e7b3617c5690b25145039559a2ad7112..9f649dad8bc074fc222b190d334425c655bcb643 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -27,20 +27,26 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; -#define DEBUG_TYPE "x86-fixup-LEAs" +namespace llvm { +void initializeFixupLEAPassPass(PassRegistry &); +} + +#define FIXUPLEA_DESC "X86 LEA Fixup" +#define FIXUPLEA_NAME "x86-fixup-LEAs" + +#define DEBUG_TYPE FIXUPLEA_NAME STATISTIC(NumLEAs, "Number of LEA instructions created"); namespace { class FixupLEAPass : public MachineFunctionPass { enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; - static char ID; + /// \brief Loop over all of the instructions in the basic block /// replacing applicable instructions with LEA instructions, /// where appropriate. bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI); - StringRef getPassName() const override { return "X86 LEA Fixup"; } /// \brief Given a machine register, look for the instruction /// which writes it in the current basic block. If found, @@ -62,6 +68,22 @@ class FixupLEAPass : public MachineFunctionPass { void processInstructionForSLM(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI); + + /// \brief Given a LEA instruction which is unprofitable + /// on SNB+ try to replace it with other instructions. + /// According to Intel's Optimization Reference Manual: + /// " For LEA instructions with three source operands and some specific + /// situations, instruction latency has increased to 3 cycles, and must + /// dispatch via port 1: + /// - LEA that has all three source operands: base, index, and offset + /// - LEA that uses base and index registers where the base is EBP, RBP, + /// or R13 + /// - LEA that uses RIP relative addressing mode + /// - LEA that uses 16-bit addressing mode " + /// This function currently handles the first 2 cases only. + MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI, + MachineFunction::iterator MFI); + /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg /// and convert them to INC or DEC respectively. bool fixupIncDec(MachineBasicBlock::iterator &I, @@ -85,7 +107,13 @@ class FixupLEAPass : public MachineFunctionPass { MachineBasicBlock::iterator &MBBI) const; public: - FixupLEAPass() : MachineFunctionPass(ID) {} + static char ID; + + StringRef getPassName() const override { return FIXUPLEA_DESC; } + + FixupLEAPass() : MachineFunctionPass(ID) { + initializeFixupLEAPassPass(*PassRegistry::getPassRegistry()); + } /// \brief Loop over all of the basic blocks, /// replacing instructions by equivalent LEA instructions @@ -104,9 +132,12 @@ private: bool OptIncDec; bool OptLEA; }; -char FixupLEAPass::ID = 0; } +char FixupLEAPass::ID = 0; + +INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false) + MachineInstr * FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI) const { @@ -168,7 +199,7 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { MF = &Func; const X86Subtarget &ST = Func.getSubtarget(); OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize(); - OptLEA = ST.LEAusesAG() || ST.slowLEA(); + OptLEA = ST.LEAusesAG() || ST.slowLEA() || ST.slow3OpsLEA(); if (!OptLEA && !OptIncDec) return false; @@ -242,9 +273,64 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, return MachineBasicBlock::iterator(); } -static inline bool isLEA(const int opcode) { - return opcode == X86::LEA16r || opcode == X86::LEA32r || - opcode == X86::LEA64r || opcode == X86::LEA64_32r; +static inline bool isLEA(const int Opcode) { + return Opcode == X86::LEA16r || Opcode == X86::LEA32r || + Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; +} + +static inline bool isInefficientLEAReg(unsigned int Reg) { + return Reg == X86::EBP || Reg == X86::RBP || Reg == X86::R13; +} + +static inline bool isRegOperand(const MachineOperand &Op) { + return Op.isReg() && Op.getReg() != X86::NoRegister; +} +/// hasIneffecientLEARegs - LEA that uses base and index registers +/// where the base is EBP, RBP, or R13 +static inline bool hasInefficientLEABaseReg(const MachineOperand &Base, + const MachineOperand &Index) { + return Base.isReg() && isInefficientLEAReg(Base.getReg()) && + isRegOperand(Index); +} + +static inline bool hasLEAOffset(const MachineOperand &Offset) { + return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal(); +} + +// LEA instruction that has all three operands: offset, base and index +static inline bool isThreeOperandsLEA(const MachineOperand &Base, + const MachineOperand &Index, + const MachineOperand &Offset) { + return isRegOperand(Base) && isRegOperand(Index) && hasLEAOffset(Offset); +} + +static inline int getADDrrFromLEA(int LEAOpcode) { + switch (LEAOpcode) { + default: + llvm_unreachable("Unexpected LEA instruction"); + case X86::LEA16r: + return X86::ADD16rr; + case X86::LEA32r: + return X86::ADD32rr; + case X86::LEA64_32r: + case X86::LEA64r: + return X86::ADD64rr; + } +} + +static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) { + bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm()); + switch (LEAOpcode) { + default: + llvm_unreachable("Unexpected LEA instruction"); + case X86::LEA16r: + return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri; + case X86::LEA32r: + case X86::LEA64_32r: + return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri; + case X86::LEA64r: + return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32; + } } /// isLEASimpleIncOrDec - Does this LEA have one these forms: @@ -337,8 +423,8 @@ void FixupLEAPass::seekLEAFixup(MachineOperand &p, void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { MachineInstr &MI = *I; - const int opcode = MI.getOpcode(); - if (!isLEA(opcode)) + const int Opcode = MI.getOpcode(); + if (!isLEA(Opcode)) return; if (MI.getOperand(5).getReg() != 0 || !MI.getOperand(4).isImm() || !TII->isSafeToClobberEFLAGS(*MFI, I)) @@ -350,53 +436,142 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, return; if (MI.getOperand(2).getImm() > 1) return; - int addrr_opcode, addri_opcode; - switch (opcode) { - default: - llvm_unreachable("Unexpected LEA instruction"); - case X86::LEA16r: - addrr_opcode = X86::ADD16rr; - addri_opcode = X86::ADD16ri; - break; - case X86::LEA32r: - addrr_opcode = X86::ADD32rr; - addri_opcode = X86::ADD32ri; - break; - case X86::LEA64_32r: - case X86::LEA64r: - addrr_opcode = X86::ADD64rr; - addri_opcode = X86::ADD64ri32; - break; - } DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); DEBUG(dbgs() << "FixLEA: Replaced by: ";); MachineInstr *NewMI = nullptr; - const MachineOperand &Dst = MI.getOperand(0); // Make ADD instruction for two registers writing to LEA's destination if (SrcR1 != 0 && SrcR2 != 0) { - const MachineOperand &Src1 = MI.getOperand(SrcR1 == DstR ? 1 : 3); - const MachineOperand &Src2 = MI.getOperand(SrcR1 == DstR ? 3 : 1); - NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addrr_opcode)) - .add(Dst) - .add(Src1) - .add(Src2); - MFI->insert(I, NewMI); + const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode)); + const MachineOperand &Src = MI.getOperand(SrcR1 == DstR ? 3 : 1); + NewMI = + BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src); DEBUG(NewMI->dump();); } // Make ADD instruction for immediate if (MI.getOperand(4).getImm() != 0) { + const MCInstrDesc &ADDri = + TII->get(getADDriFromLEA(Opcode, MI.getOperand(4))); const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3); - NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addri_opcode)) - .add(Dst) + NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR) .add(SrcR) .addImm(MI.getOperand(4).getImm()); - MFI->insert(I, NewMI); DEBUG(NewMI->dump();); } if (NewMI) { MFI->erase(I); - I = static_cast(NewMI); + I = NewMI; + } +} + +MachineInstr * +FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI, + MachineFunction::iterator MFI) { + + const int LEAOpcode = MI.getOpcode(); + if (!isLEA(LEAOpcode)) + return nullptr; + + const MachineOperand &Dst = MI.getOperand(0); + const MachineOperand &Base = MI.getOperand(1); + const MachineOperand &Scale = MI.getOperand(2); + const MachineOperand &Index = MI.getOperand(3); + const MachineOperand &Offset = MI.getOperand(4); + const MachineOperand &Segment = MI.getOperand(5); + + if (!(isThreeOperandsLEA(Base, Index, Offset) || + hasInefficientLEABaseReg(Base, Index)) || + !TII->isSafeToClobberEFLAGS(*MFI, MI) || + Segment.getReg() != X86::NoRegister) + return nullptr; + + unsigned int DstR = Dst.getReg(); + unsigned int BaseR = Base.getReg(); + unsigned int IndexR = Index.getReg(); + unsigned SSDstR = + (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR; + bool IsScale1 = Scale.getImm() == 1; + bool IsInefficientBase = isInefficientLEAReg(BaseR); + bool IsInefficientIndex = isInefficientLEAReg(IndexR); + + // Skip these cases since it takes more than 2 instructions + // to replace the LEA instruction. + if (IsInefficientBase && SSDstR == BaseR && !IsScale1) + return nullptr; + if (LEAOpcode == X86::LEA64_32r && IsInefficientBase && + (IsInefficientIndex || !IsScale1)) + return nullptr; + + const DebugLoc DL = MI.getDebugLoc(); + const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode)); + const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset)); + + DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump();); + DEBUG(dbgs() << "FixLEA: Replaced by: ";); + + // First try to replace LEA with one or two (for the 3-op LEA case) + // add instructions: + // 1.lea (%base,%index,1), %base => add %index,%base + // 2.lea (%base,%index,1), %index => add %base,%index + if (IsScale1 && (DstR == BaseR || DstR == IndexR)) { + const MachineOperand &Src = DstR == BaseR ? Index : Base; + MachineInstr *NewMI = + BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src); + DEBUG(NewMI->dump();); + // Create ADD instruction for the Offset in case of 3-Ops LEA. + if (hasLEAOffset(Offset)) { + NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); + DEBUG(NewMI->dump();); + } + return NewMI; + } + // If the base is inefficient try switching the index and base operands, + // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction: + // lea offset(%base,%index,scale),%dst => + // lea (%base,%index,scale); add offset,%dst + if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) { + MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode)) + .add(Dst) + .add(IsInefficientBase ? Index : Base) + .add(Scale) + .add(IsInefficientBase ? Base : Index) + .addImm(0) + .add(Segment); + DEBUG(NewMI->dump();); + // Create ADD instruction for the Offset in case of 3-Ops LEA. + if (hasLEAOffset(Offset)) { + NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset); + DEBUG(NewMI->dump();); + } + return NewMI; + } + // Handle the rest of the cases with inefficient base register: + assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!"); + assert(IsInefficientBase && "efficient base should be handled already!"); + + // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst + if (IsScale1 && !hasLEAOffset(Offset)) { + TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, Base.isKill()); + DEBUG(MI.getPrevNode()->dump();); + + MachineInstr *NewMI = + BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index); + DEBUG(NewMI->dump();); + return NewMI; } + // lea offset(%base,%index,scale), %dst => + // lea offset( ,%index,scale), %dst; add %base,%dst + MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode)) + .add(Dst) + .addReg(0) + .add(Scale) + .add(Index) + .add(Offset) + .add(Segment); + DEBUG(NewMI->dump();); + + NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base); + DEBUG(NewMI->dump();); + return NewMI; } bool FixupLEAPass::processBasicBlock(MachineFunction &MF, @@ -410,8 +585,16 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF, if (OptLEA) { if (MF.getSubtarget().isSLM()) processInstructionForSLM(I, MFI); - else - processInstruction(I, MFI); + + else { + if (MF.getSubtarget().slow3OpsLEA()) { + if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) { + MFI->erase(I); + I = NewMI; + } + } else + processInstruction(I, MFI); + } } } return false; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index a5489b9aa8b7e7172b1a86f3b8c28bd2e888b4b4..5582526541bae336982ed8f9fb2bff4415bb4486 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -123,18 +123,26 @@ namespace { EdgeBundles *Bundles; // Return a bitmask of FP registers in block's live-in list. - static unsigned calcLiveInMask(MachineBasicBlock *MBB) { + static unsigned calcLiveInMask(MachineBasicBlock *MBB, bool RemoveFPs) { unsigned Mask = 0; - for (const auto &LI : MBB->liveins()) { - if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6) - continue; - Mask |= 1 << (LI.PhysReg - X86::FP0); + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(); + I != MBB->livein_end(); ) { + MCPhysReg Reg = I->PhysReg; + static_assert(X86::FP6 - X86::FP0 == 6, "sequential regnums"); + if (Reg >= X86::FP0 && Reg <= X86::FP6) { + Mask |= 1 << (Reg - X86::FP0); + if (RemoveFPs) { + I = MBB->removeLiveIn(I); + continue; + } + } + ++I; } return Mask; } // Partition all the CFG edges into LiveBundles. - void bundleCFG(MachineFunction &MF); + void bundleCFGRecomputeKillFlags(MachineFunction &MF); MachineBasicBlock *MBB; // Current basic block @@ -327,7 +335,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); // Prepare cross-MBB liveness. - bundleCFG(MF); + bundleCFGRecomputeKillFlags(MF); StackTop = 0; @@ -375,13 +383,15 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { /// registers live-out from a block is identical to the live-in set of all /// successors. This is not enforced by the normal live-in lists since /// registers may be implicitly defined, or not used by all successors. -void FPS::bundleCFG(MachineFunction &MF) { +void FPS::bundleCFGRecomputeKillFlags(MachineFunction &MF) { assert(LiveBundles.empty() && "Stale data in LiveBundles"); LiveBundles.resize(Bundles->getNumBundles()); // Gather the actual live-in masks for all MBBs. for (MachineBasicBlock &MBB : MF) { - const unsigned Mask = calcLiveInMask(&MBB); + setKillFlags(MBB); + + const unsigned Mask = calcLiveInMask(&MBB, false); if (!Mask) continue; // Update MBB ingoing bundle mask. @@ -396,7 +406,6 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; MBB = &BB; - setKillFlags(BB); setupBlockStack(); for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { @@ -453,6 +462,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { unsigned Reg = DeadRegs[i]; // Check if Reg is live on the stack. An inline-asm register operand that // is in the clobber list and marked dead might not be live on the stack. + static_assert(X86::FP7 - X86::FP0 == 7, "sequential FP regnumbers"); if (Reg >= X86::FP0 && Reg <= X86::FP6 && isLive(Reg-X86::FP0)) { DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); freeStackSlotAfter(I, Reg-X86::FP0); @@ -506,7 +516,6 @@ void FPS::setupBlockStack() { // Push the fixed live-in registers. for (unsigned i = Bundle.FixCount; i > 0; --i) { - MBB->addLiveIn(X86::ST0+i-1); DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" << unsigned(Bundle.FixStack[i-1]) << '\n'); pushReg(Bundle.FixStack[i-1]); @@ -515,7 +524,8 @@ void FPS::setupBlockStack() { // Kill off unwanted live-ins. This can happen with a critical edge. // FIXME: We could keep these live registers around as zombies. They may need // to be revived at the end of a short block. It might save a few instrs. - adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); + unsigned Mask = calcLiveInMask(MBB, /*RemoveFPs=*/true); + adjustLiveRegs(Mask, MBB->begin()); DEBUG(MBB->dump()); } @@ -1655,8 +1665,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { } void FPS::setKillFlags(MachineBasicBlock &MBB) const { - const TargetRegisterInfo *TRI = - MBB.getParent()->getSubtarget().getRegisterInfo(); + const TargetRegisterInfo &TRI = + *MBB.getParent()->getSubtarget().getRegisterInfo(); LivePhysRegs LPR(TRI); LPR.addLiveOuts(MBB); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index fc5c9ac2e25f374aee21418906128520c0c9afac..2777fa89330f609dbfe0c0e23f0aeca6d014a805 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -29,8 +29,8 @@ #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" #include using namespace llvm; @@ -1062,6 +1062,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, } if (HasFP) { + assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved"); + // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; // If required, include space for extra hidden slot for stashing base pointer. @@ -1124,13 +1126,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, nullptr, DwarfFramePtr)); } } - - // Mark the FramePtr as live-in in every block. Don't do this again for - // funclet prologues. - if (!IsFunclet) { - for (MachineBasicBlock &EveryMBB : MF) - EveryMBB.addLiveIn(MachineFramePtr); - } } else { assert(!IsFunclet && "funclets without FPs not yet implemented"); NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); @@ -1698,21 +1693,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -// NOTE: this only has a subset of the full frame index logic. In -// particular, the FI < 0 and AfterFPPop logic is handled in -// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly -// (probably?) it should be moved into here. int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); + bool IsFixed = MFI.isFixedObjectIndex(FI); // We can't calculate offset from frame pointer if the stack is realigned, // so enforce usage of stack/base pointer. The base pointer is used when we // have dynamic allocas in addition to dynamic realignment. if (TRI->hasBasePointer(MF)) - FrameReg = TRI->getBaseRegister(); + FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); else if (TRI->needsStackRealignment(MF)) - FrameReg = TRI->getStackRegister(); + FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); else FrameReg = TRI->getFrameRegister(MF); @@ -1786,6 +1778,14 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return Offset + FPDelta; } +int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &FrameReg, + int Adjustment) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + FrameReg = TRI->getStackRegister(); + return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment; +} + int X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, @@ -1842,9 +1842,6 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, assert(MF.getInfo()->getTCReturnAddrDelta() >= 0 && "we don't handle this case!"); - // Fill in FrameReg output argument. - FrameReg = TRI->getStackRegister(); - // This is how the math works out: // // %rsp grows (i.e. gets lower) left to right. Each box below is @@ -1869,12 +1866,8 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, // (C - E) == (C - A) - (B - A) + (B - E) // { Using [1], [2] and [3] above } // == getObjectOffset - LocalAreaOffset + StackSize - // - // Get the Offset from the StackPointer - int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); - - return Offset + StackSize; + return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize); } bool X86FrameLowering::assignCalleeSavedSpillSlots( @@ -1926,14 +1919,15 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots( continue; const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + unsigned Align = TRI->getSpillAlignment(*RC); // ensure alignment - SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment(); + SpillSlotOffset -= std::abs(SpillSlotOffset) % Align; // spill into slot - SpillSlotOffset -= RC->getSize(); - int SlotIndex = - MFI.CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + SpillSlotOffset -= Size; + int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); CSI[i - 1].setFrameIdx(SlotIndex); - MFI.ensureMaxAlignment(RC->getAlignment()); + MFI.ensureMaxAlignment(Align); } return true; @@ -2626,8 +2620,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); DebugLoc DL = I->getDebugLoc(); - uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; - uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; + uint64_t Amount = !reserveCallFrame ? TII.getFrameSize(*I) : 0; + uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0; I = MBB.erase(I); auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); @@ -2991,6 +2985,10 @@ unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) void X86FrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { + // Mark the function as not having WinCFI. We will set it back to true in + // emitPrologue if it gets called and emits CFI. + MF.setHasWinCFI(false); + // If this function isn't doing Win64-style C++ EH, we don't need to do // anything. const Function *Fn = MF.getFunction(); diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index e1b04d6dc30034324b061646cecea7276c9d2a2a..7d214cabad536ab486671b1cb8c63d867928a90d 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -20,6 +20,7 @@ namespace llvm { class MachineInstrBuilder; class MCCFIInstruction; +class X86InstrInfo; class X86Subtarget; class X86RegisterInfo; @@ -30,7 +31,7 @@ public: // Cached subtarget predicates. const X86Subtarget &STI; - const TargetInstrInfo &TII; + const X86InstrInfo &TII; const X86RegisterInfo *TRI; unsigned SlotSize; @@ -99,6 +100,8 @@ public: int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + int getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &SPReg, int Adjustment) const; int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const override; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index eb5c56ff2ff9113c1a31ecab4f54d2382a409036..2a1633de0a2397f30ae05fdfe16ef63a474d4626 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -417,8 +418,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { case X86ISD::XOR: case X86ISD::OR: case ISD::ADD: - case ISD::ADDC: - case ISD::ADDE: + case ISD::ADDCARRY: case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -1070,9 +1070,9 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, } APInt MaskedHighBits = APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); - APInt KnownZero, KnownOne; - DAG.computeKnownBits(X, KnownZero, KnownOne); - if (MaskedHighBits != KnownZero) return true; + KnownBits Known; + DAG.computeKnownBits(X, Known); + if (MaskedHighBits != Known.Zero) return true; // We've identified a pattern that can be transformed into a single shift // and an addressing mode. Make it so. @@ -1176,8 +1176,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; - if (ConstantSDNode - *CN = dyn_cast(N.getNode()->getOperand(1))) { + if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { unsigned Val = CN->getZExtValue(); // Note that we handle x<<1 as (,x,2) rather than (x,x) here so // that the base operand remains free for further matching. If @@ -1185,15 +1184,14 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // in MatchAddress turns (,x,2) into (x,x), which is cheaper. if (Val == 1 || Val == 2 || Val == 3) { AM.Scale = 1 << Val; - SDValue ShVal = N.getNode()->getOperand(0); + SDValue ShVal = N.getOperand(0); // Okay, we know that we have a scale by now. However, if the scaled // value is an add of something and a constant, we can fold the // constant into the disp field here. if (CurDAG->isBaseWithConstantOffset(ShVal)) { - AM.IndexReg = ShVal.getNode()->getOperand(0); - ConstantSDNode *AddVal = - cast(ShVal.getNode()->getOperand(1)); + AM.IndexReg = ShVal.getOperand(0); + ConstantSDNode *AddVal = cast(ShVal.getOperand(1)); uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; if (!foldOffsetIntoAddress(Disp, AM)) return false; @@ -1243,28 +1241,27 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() == nullptr && AM.IndexReg.getNode() == nullptr) { - if (ConstantSDNode - *CN = dyn_cast(N.getNode()->getOperand(1))) + if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || CN->getZExtValue() == 9) { AM.Scale = unsigned(CN->getZExtValue())-1; - SDValue MulVal = N.getNode()->getOperand(0); + SDValue MulVal = N.getOperand(0); SDValue Reg; // Okay, we know that we have a scale by now. However, if the scaled // value is an add of something and a constant, we can fold the // constant into the disp field here. if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && - isa(MulVal.getNode()->getOperand(1))) { - Reg = MulVal.getNode()->getOperand(0); + isa(MulVal.getOperand(1))) { + Reg = MulVal.getOperand(0); ConstantSDNode *AddVal = - cast(MulVal.getNode()->getOperand(1)); + cast(MulVal.getOperand(1)); uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); if (foldOffsetIntoAddress(Disp, AM)) - Reg = N.getNode()->getOperand(0); + Reg = N.getOperand(0); } else { - Reg = N.getNode()->getOperand(0); + Reg = N.getOperand(0); } AM.IndexReg = AM.Base_Reg = Reg; @@ -1287,7 +1284,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { + if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) { AM = Backup; break; } @@ -1298,7 +1295,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } int Cost = 0; - SDValue RHS = Handle.getValue().getNode()->getOperand(1); + SDValue RHS = Handle.getValue().getOperand(1); // If the RHS involves a register with multiple uses, this // transformation incurs an extra mov, due to the neg instruction // clobbering its operand. @@ -1307,12 +1304,13 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, RHS.getNode()->getOpcode() == ISD::TRUNCATE || RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && - RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) + RHS.getOperand(0).getValueType() == MVT::i32)) ++Cost; // If the base is a register with multiple uses, this // transformation may save a mov. - if ((AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() && + // FIXME: Don't rely on DELETED_NODEs. + if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && + AM.Base_Reg->getOpcode() != ISD::DELETED_NODE && !AM.Base_Reg.getNode()->hasOneUse()) || AM.BaseType == X86ISelAddressMode::FrameIndexBase) --Cost; @@ -2521,7 +2519,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { - ConstantSDNode *C = dyn_cast(N0.getNode()->getOperand(1)); + ConstantSDNode *C = dyn_cast(N0.getOperand(1)); if (!C) break; // For example, convert "testl %eax, $8" to "testb %al, $8" @@ -2529,7 +2527,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { (!(C->getZExtValue() & 0x80) || hasNoSignedComparisonUses(Node))) { SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8); - SDValue Reg = N0.getNode()->getOperand(0); + SDValue Reg = N0.getOperand(0); // On x86-32, only the ABCD registers have 8-bit subregisters. if (!Subtarget->is64Bit()) { @@ -2565,7 +2563,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { // Shift the immediate right by 8 bits. SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, dl, MVT::i8); - SDValue Reg = N0.getNode()->getOperand(0); + SDValue Reg = N0.getOperand(0); // Put the value in an ABCD register. const TargetRegisterClass *TRC; @@ -2602,7 +2600,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { hasNoSignedComparisonUses(Node))) { SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i16); - SDValue Reg = N0.getNode()->getOperand(0); + SDValue Reg = N0.getOperand(0); // Extract the 16-bit subregister. SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, @@ -2625,7 +2623,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { hasNoSignedComparisonUses(Node))) { SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i32); - SDValue Reg = N0.getNode()->getOperand(0); + SDValue Reg = N0.getOperand(0); // Extract the 32-bit subregister. SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5c8a95963c3bef8cf13bd40b446b50c6d3a28cb6..172eba0002d4f58b8d6fcb860fa19778b4aaaf1a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" @@ -52,6 +53,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -78,6 +80,23 @@ static cl::opt ExperimentalPrefLoopAlignment( " of the loop header PC will be 0)."), cl::Hidden); +static cl::opt MulConstantOptimization( + "mul-constant-optimization", cl::init(true), + cl::desc("Replace 'mul x, Const' with more effective instructions like " + "SHIFT, LEA, etc."), + cl::Hidden); + +/// Call this when the user attempts to do something unsupported, like +/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike +/// report_fatal_error, so calling code should attempt to recover without +/// crashing. +static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, + const char *Msg) { + MachineFunction &MF = DAG.getMachineFunction(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc())); +} + X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, const X86Subtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -298,16 +317,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UREM, VT, Expand); } - for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { - if (VT == MVT::i64 && !Subtarget.is64Bit()) - continue; - // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences. - setOperationAction(ISD::ADDC, VT, Custom); - setOperationAction(ISD::ADDE, VT, Custom); - setOperationAction(ISD::SUBC, VT, Custom); - setOperationAction(ISD::SUBE, VT, Custom); - } - setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, @@ -409,7 +418,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, continue; setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::SETCCE, VT, Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support @@ -657,7 +665,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); - setOperationAction(ISD::FPOWI, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); @@ -784,30 +791,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SMIN, MVT::v8i16, Legal); setOperationAction(ISD::UMIN, MVT::v16i8, Legal); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - setOperationAction(ISD::SETCC, MVT::v16i8, Custom); - setOperationAction(ISD::SETCC, MVT::v8i16, Custom); - setOperationAction(ISD::SETCC, MVT::v4i32, Custom); - - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - setOperationAction(ISD::CTPOP, MVT::v16i8, Custom); - setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); - setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); - setOperationAction(ISD::CTPOP, MVT::v2i64, Custom); - - setOperationAction(ISD::CTTZ, MVT::v16i8, Custom); - setOperationAction(ISD::CTTZ, MVT::v8i16, Custom); - setOperationAction(ISD::CTTZ, MVT::v4i32, Custom); - setOperationAction(ISD::CTTZ, MVT::v2i64, Custom); + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTTZ, VT, Custom); + } - // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -882,18 +877,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); - for (auto VT : { MVT::v8i16, MVT::v16i8 }) { - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - } - - // In the customized shift lowering, the legal cases in AVX2 will be - // recognized. - for (auto VT : { MVT::v4i32, MVT::v2i64 }) { - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); + // In the customized shift lowering, the legal v4i32/v2i64 cases + // in AVX2 will be recognized. + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); } } @@ -935,13 +924,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // SSE41 brings specific instructions for doing vector sign extend even in // cases where we don't have SRA. - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal); - - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal); + for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); + } for (MVT VT : MVT::integer_vector_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom); @@ -950,19 +936,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); - - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal); + for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { + setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); + } // i8 vectors are custom because the source register and source // source memory operand types are not the same width. @@ -1026,36 +1007,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal); - for (auto VT : { MVT::v32i8, MVT::v16i16 }) { + // In the customized shift lowering, the legal v8i32/v4i64 cases + // in AVX2 will be recognized. + for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); } - setOperationAction(ISD::SETCC, MVT::v32i8, Custom); - setOperationAction(ISD::SETCC, MVT::v16i16, Custom); - setOperationAction(ISD::SETCC, MVT::v8i32, Custom); - setOperationAction(ISD::SETCC, MVT::v4i64, Custom); - setOperationAction(ISD::SELECT, MVT::v4f64, Custom); setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom); + for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + } + setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { + setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Custom); setOperationAction(ISD::CTLZ, VT, Custom); @@ -1103,27 +1079,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X - setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); - setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); - - setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal); - setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal); - } - - // In the customized shift lowering, the legal cases in AVX2 will be - // recognized. - for (auto VT : { MVT::v8i32, MVT::v4i64 }) { - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); + for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { + setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); + setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); + setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); + } } for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, @@ -1171,7 +1134,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::v8i64, &X86::VR512RegClass); addRegisterClass(MVT::v8f64, &X86::VR512RegClass); - addRegisterClass(MVT::i1, &X86::VK1RegClass); + addRegisterClass(MVT::v1i1, &X86::VK1RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); @@ -1186,16 +1149,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); } - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::SETCC, MVT::i1, Custom); - setOperationAction(ISD::SETCCE, MVT::i1, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); - setOperationAction(ISD::XOR, MVT::i1, Legal); - setOperationAction(ISD::OR, MVT::i1, Legal); - setOperationAction(ISD::AND, MVT::i1, Legal); - setOperationAction(ISD::SUB, MVT::i1, Custom); - setOperationAction(ISD::ADD, MVT::i1, Custom); - setOperationAction(ISD::MUL, MVT::i1, Custom); for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16, MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32, @@ -1264,27 +1217,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSTORE, VT, Custom); } } - setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom); - setOperationAction(ISD::VSELECT, MVT::v8i1, Expand); - setOperationAction(ISD::VSELECT, MVT::v16i1, Expand); - if (Subtarget.hasDQI()) { - setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + if (Subtarget.hasDQI()) { + for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) { + setOperationAction(ISD::SINT_TO_FP, VT, Legal); + setOperationAction(ISD::UINT_TO_FP, VT, Legal); + setOperationAction(ISD::FP_TO_SINT, VT, Legal); + setOperationAction(ISD::FP_TO_UINT, VT, Legal); + } if (Subtarget.hasVLX()) { // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion. setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); @@ -1293,8 +1235,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } if (Subtarget.hasVLX()) { - setOperationAction(ISD::ABS, MVT::v4i64, Legal); - setOperationAction(ISD::ABS, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); @@ -1320,8 +1260,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal); } - setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); @@ -1334,11 +1272,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); for (auto VT : { MVT::v16f32, MVT::v8f64 }) { - setOperationAction(ISD::FFLOOR, VT, Legal); - setOperationAction(ISD::FCEIL, VT, Legal); - setOperationAction(ISD::FTRUNC, VT, Legal); - setOperationAction(ISD::FRINT, VT, Legal); - setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FRINT, VT, Legal); + setOperationAction(ISD::FNEARBYINT, VT, Legal); } setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom); @@ -1354,49 +1292,47 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); - setOperationAction(ISD::SETCC, MVT::v16i1, Custom); - setOperationAction(ISD::SETCC, MVT::v8i1, Custom); + setOperationAction(ISD::MUL, MVT::v8i64, Custom); - setOperationAction(ISD::MUL, MVT::v8i64, Custom); - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); setOperationAction(ISD::SELECT, MVT::v8f64, Custom); setOperationAction(ISD::SELECT, MVT::v8i64, Custom); setOperationAction(ISD::SELECT, MVT::v16f32, Custom); - setOperationAction(ISD::SELECT, MVT::v16i1, Custom); - setOperationAction(ISD::SELECT, MVT::v8i1, Custom); - - setOperationAction(ISD::SMAX, MVT::v16i32, Legal); - setOperationAction(ISD::SMAX, MVT::v8i64, Legal); - setOperationAction(ISD::UMAX, MVT::v16i32, Legal); - setOperationAction(ISD::UMAX, MVT::v8i64, Legal); - setOperationAction(ISD::SMIN, MVT::v16i32, Legal); - setOperationAction(ISD::SMIN, MVT::v8i64, Legal); - setOperationAction(ISD::UMIN, MVT::v16i32, Legal); - setOperationAction(ISD::UMIN, MVT::v8i64, Legal); - - setOperationAction(ISD::ADD, MVT::v8i1, Custom); - setOperationAction(ISD::ADD, MVT::v16i1, Custom); - setOperationAction(ISD::SUB, MVT::v8i1, Custom); - setOperationAction(ISD::SUB, MVT::v16i1, Custom); - setOperationAction(ISD::MUL, MVT::v8i1, Custom); - setOperationAction(ISD::MUL, MVT::v16i1, Custom); setOperationAction(ISD::MUL, MVT::v16i32, Legal); + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. + setOperationAction(ISD::ABS, MVT::v4i64, Legal); + setOperationAction(ISD::ABS, MVT::v2i64, Legal); + + for (auto VT : { MVT::v8i1, MVT::v16i1 }) { + setOperationAction(ISD::ADD, VT, Custom); + setOperationAction(ISD::SUB, VT, Custom); + setOperationAction(ISD::MUL, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Expand); + } + for (auto VT : { MVT::v16i32, MVT::v8i64 }) { - setOperationAction(ISD::ABS, VT, Legal); - setOperationAction(ISD::SRL, VT, Custom); - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::CTTZ, VT, Custom); + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); + setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::CTPOP, VT, Custom); + setOperationAction(ISD::CTTZ, VT, Custom); } // Need to promote to 64-bit even though we have 32-bit masked instructions @@ -1407,33 +1343,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64); if (Subtarget.hasCDI()) { - setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); - setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); - - setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); - setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); - setOperationAction(ISD::CTLZ, MVT::v16i16, Custom); - setOperationAction(ISD::CTLZ, MVT::v32i8, Custom); - - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom); - - if (Subtarget.hasVLX()) { - setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); - setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); - setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); - setOperationAction(ISD::CTLZ, MVT::v4i32, Legal); - } else { - setOperationAction(ISD::CTLZ, MVT::v4i64, Custom); - setOperationAction(ISD::CTLZ, MVT::v8i32, Custom); - setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); - setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. + for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64, + MVT::v4i64, MVT::v8i64}) { + setOperationAction(ISD::CTLZ, VT, Legal); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); } - - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); } // Subtarget.hasCDI() if (Subtarget.hasDQI()) { @@ -1443,6 +1358,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v8i64, Legal); } + if (Subtarget.hasVPOPCNTDQ()) { + // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512 + // version of popcntd/q. + for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64, + MVT::v4i32, MVT::v2i64}) + setOperationAction(ISD::CTPOP, VT, Legal); + } + // Custom lower several nodes. for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { @@ -1463,7 +1386,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); @@ -1527,8 +1450,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom); - setOperationAction(ISD::VSELECT, MVT::v32i16, Legal); - setOperationAction(ISD::VSELECT, MVT::v64i8, Legal); setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom); @@ -1540,15 +1461,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v64i1, Expand); setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); - setOperationAction(ISD::SMAX, MVT::v64i8, Legal); - setOperationAction(ISD::SMAX, MVT::v32i16, Legal); - setOperationAction(ISD::UMAX, MVT::v64i8, Legal); - setOperationAction(ISD::UMAX, MVT::v32i16, Legal); - setOperationAction(ISD::SMIN, MVT::v64i8, Legal); - setOperationAction(ISD::SMIN, MVT::v32i16, Legal); - setOperationAction(ISD::UMIN, MVT::v64i8, Legal); - setOperationAction(ISD::UMIN, MVT::v32i16, Legal); - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom); setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); @@ -1570,7 +1482,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); @@ -1579,6 +1491,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSTORE, VT, Legal); setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Custom); + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); setOperationPromotedToType(ISD::AND, VT, MVT::v8i64); setOperationPromotedToType(ISD::OR, VT, MVT::v8i64); @@ -1652,6 +1568,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::USUBO, VT, Custom); setOperationAction(ISD::SMULO, VT, Custom); setOperationAction(ISD::UMULO, VT, Custom); + + // Support carry in as value rather than glue. + setOperationAction(ISD::ADDCARRY, VT, Custom); + setOperationAction(ISD::SUBCARRY, VT, Custom); + setOperationAction(ISD::SETCCCARRY, VT, Custom); } if (!Subtarget.is64Bit()) { @@ -1772,7 +1693,7 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext& Context, EVT VT) const { if (!VT.isVector()) - return Subtarget.hasAVX512() ? MVT::i1: MVT::i8; + return MVT::i8; if (VT.isSimple()) { MVT VVT = VT.getSimpleVT(); @@ -2236,6 +2157,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); + // In some cases we need to disable registers from the default CSR list. + // For example, when they are used for argument passing. + bool ShouldDisableCalleeSavedRegister = + CallConv == CallingConv::X86_RegCall || + MF.getFunction()->hasFnAttribute("no_caller_saved_registers"); + if (CallConv == CallingConv::X86_INTR && !Outs.empty()) report_fatal_error("X86 interrupts may not return any value"); @@ -2257,7 +2184,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(VA.isRegLoc() && "Can only return in registers!"); // Add the register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); SDValue ValToCopy = OutVals[OutsIndex]; @@ -2284,15 +2211,17 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // or SSE or MMX vectors. if ((ValVT == MVT::f32 || ValVT == MVT::f64 || VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && - (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { - report_fatal_error("SSE register return with SSE disabled"); + (Subtarget.is64Bit() && !Subtarget.hasSSE1())) { + errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. + } else if (ValVT == MVT::f64 && + (Subtarget.is64Bit() && !Subtarget.hasSSE2())) { + // Likewise we can't return F64 values with SSE1 only. gcc does so, but + // llvm-gcc has never done it right and no one has noticed, so this + // should be OK for now. + errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } - // Likewise we can't return F64 values with SSE1 only. gcc does so, but - // llvm-gcc has never done it right and no one has noticed, so this - // should be OK for now. - if (ValVT == MVT::f64 && - (Subtarget.is64Bit() && !Subtarget.hasSSE2())) - report_fatal_error("SSE2 register return with SSE2 disabled"); // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -2336,7 +2265,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, "Expecting two registers after Pass64BitArgInRegs"); // Add the second register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); } else { RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); @@ -2396,7 +2325,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); // Add the returned register to the CalleeSaveDisableRegs list. - if (CallConv == CallingConv::X86_RegCall) + if (ShouldDisableCalleeSavedRegister) MF.getRegInfo().disableCalleeSavedRegister(RetValReg); } @@ -2545,6 +2474,9 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, SelectionDAG &DAG) { SDValue ValReturned = ValArg; + if (ValVT == MVT::v1i1) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned); + if (ValVT == MVT::v64i1) { // In 32 bit machine, this case is handled by getv64i1Argument assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); @@ -2567,7 +2499,6 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned); } - return DAG.getBitcast(ValVT, ValReturned); } @@ -2596,7 +2527,7 @@ SDValue X86TargetLowering::LowerCallResult( // In some calling conventions we need to remove the used registers // from the register mask. - if (RegMask && CallConv == CallingConv::X86_RegCall) { + if (RegMask) { for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true); SubRegs.isValid(); ++SubRegs) RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); @@ -2605,7 +2536,8 @@ SDValue X86TargetLowering::LowerCallResult( // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) { - report_fatal_error("SSE register return with SSE disabled"); + errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); + VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts. } // If we prefer to use the value in xmm registers, copy it out as f80 and @@ -2742,13 +2674,13 @@ static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { return GuaranteedTailCallOpt && canGuaranteeTCO(CC); } -bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { +bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { auto Attr = CI->getParent()->getParent()->getFnAttribute("disable-tail-calls"); if (!CI->isTailCall() || Attr.getValueAsString() == "true") return false; - CallSite CS(CI); + ImmutableCallSite CS(CI); CallingConv::ID CalleeCC = CS.getCallingConv(); if (!mayTailCallThisCC(CalleeCC)) return false; @@ -2873,8 +2805,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, SDValue Val = DAG.getLoad( ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); - return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) - : Val; + return ExtendedInMem + ? (VA.getValVT().isVector() + ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) + : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) + : Val; } // FIXME: Get this from tablegen. @@ -3024,7 +2959,7 @@ SDValue X86TargetLowering::LowerFormalArguments( RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) RC = &X86::VR64RegClass; - else if (RegVT == MVT::i1) + else if (RegVT == MVT::v1i1) RC = &X86::VK1RegClass; else if (RegVT == MVT::v8i1) RC = &X86::VK8RegClass; @@ -3293,7 +3228,8 @@ SDValue X86TargetLowering::LowerFormalArguments( } } - if (CallConv == CallingConv::X86_RegCall) { + if (CallConv == CallingConv::X86_RegCall || + Fn->hasFnAttribute("no_caller_saved_registers")) { const MachineRegisterInfo &MRI = MF.getRegInfo(); for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) MF.getRegInfo().disableCalleeSavedRegister(Pair.first); @@ -3385,6 +3321,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsSibcall = false; X86MachineFunctionInfo *X86Info = MF.getInfo(); auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + const CallInst *CI = + CLI.CS ? dyn_cast(CLI.CS->getInstruction()) : nullptr; + const Function *Fn = CI ? CI->getCalledFunction() : nullptr; + bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) || + (Fn && Fn->hasFnAttribute("no_caller_saved_registers")); if (CallConv == CallingConv::X86_INTR) report_fatal_error("X86 interrupts may not be called directly"); @@ -3486,8 +3427,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!IsSibcall) - Chain = DAG.getCALLSEQ_START( - Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, + NumBytes - NumBytesToPush, dl); SDValue RetAddrFrIdx; // Load return address for tail calls. @@ -3797,7 +3738,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv); + // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we + // set X86_INTR calling convention because it has the same CSR mask + // (same preserved registers). + const uint32_t *Mask = RegInfo->getCallPreservedMask( + MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv); assert(Mask && "Missing call preserved mask for calling convention"); // If this is an invoke in a 32-bit function using a funclet-based @@ -3820,7 +3765,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // In some calling conventions we need to remove the used physical registers // from the reg mask. - if (CallConv == CallingConv::X86_RegCall) { + if (CallConv == CallingConv::X86_RegCall || HasNCSR) { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); // Allocate a new Reg Mask and copy Mask. @@ -4813,7 +4758,7 @@ static void scaleShuffleMask(int Scale, ArrayRef Mask, SmallVectorImpl &ScaledMask) { assert(0 < Scale && "Unexpected scaling factor"); int NumElts = Mask.size(); - ScaledMask.assign(NumElts * Scale, -1); + ScaledMask.assign(static_cast(NumElts * Scale), -1); for (int i = 0; i != NumElts; ++i) { int M = Mask[i]; @@ -4839,14 +4784,10 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) { return false; // The index should be aligned on a vecWidth-bit boundary. - uint64_t Index = - cast(N->getOperand(1).getNode())->getZExtValue(); - + uint64_t Index = N->getConstantOperandVal(1); MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getScalarSizeInBits(); - bool Result = (Index * ElSize) % vecWidth == 0; - - return Result; + return (Index * ElSize) % vecWidth == 0; } /// Return true if the specified INSERT_SUBVECTOR @@ -4856,15 +4797,12 @@ static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"); if (!isa(N->getOperand(2).getNode())) return false; - // The index should be aligned on a vecWidth-bit boundary. - uint64_t Index = - cast(N->getOperand(2).getNode())->getZExtValue(); + // The index should be aligned on a vecWidth-bit boundary. + uint64_t Index = N->getConstantOperandVal(2); MVT VT = N->getSimpleValueType(0); unsigned ElSize = VT.getScalarSizeInBits(); - bool Result = (Index * ElSize) % vecWidth == 0; - - return Result; + return (Index * ElSize) % vecWidth == 0; } bool X86::isVINSERT128Index(SDNode *N) { @@ -4888,13 +4826,9 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { assert(isa(N->getOperand(1).getNode()) && "Illegal extract subvector for VEXTRACT"); - uint64_t Index = - cast(N->getOperand(1).getNode())->getZExtValue(); - + uint64_t Index = N->getConstantOperandVal(1); MVT VecVT = N->getOperand(0).getSimpleValueType(); - MVT ElVT = VecVT.getVectorElementType(); - - unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); + unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits(); return Index / NumElemsPerChunk; } @@ -4903,13 +4837,9 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) { assert(isa(N->getOperand(2).getNode()) && "Illegal insert subvector for VINSERT"); - uint64_t Index = - cast(N->getOperand(2).getNode())->getZExtValue(); - + uint64_t Index = N->getConstantOperandVal(2); MVT VecVT = N->getSimpleValueType(0); - MVT ElVT = VecVT.getVectorElementType(); - - unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits(); + unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits(); return Index / NumElemsPerChunk; } @@ -4942,9 +4872,9 @@ bool X86::isZeroNode(SDValue Elt) { return isNullConstant(Elt) || isNullFPConstant(Elt); } -// Build a vector of constants +// Build a vector of constants. // Use an UNDEF node if MaskElt == -1. -// Spilt 64-bit constants in the 32-bit mode. +// Split 64-bit constants in the 32-bit mode. static SDValue getConstVector(ArrayRef Values, MVT VT, SelectionDAG &DAG, const SDLoc &dl, bool IsMask = false) { @@ -5060,8 +4990,8 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); + return DAG.getBuildVector( + ResultVT, dl, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -5383,20 +5313,37 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); unsigned NumElts = SizeInBits / EltSizeInBits; - unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); - unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; - - // Extract all the undef/constant element data and pack into single bitsets. - APInt UndefBits(SizeInBits, 0); - APInt MaskBits(SizeInBits, 0); + // Bitcast a source array of element bits to the target size. + auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef SrcEltBits) { + unsigned NumSrcElts = UndefSrcElts.getBitWidth(); + unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth(); + assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits && + "Constant bit sizes don't match"); - // Split the undef/constant single bitset data into the target elements. - auto SplitBitData = [&]() { // Don't split if we don't allow undef bits. bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; - if (UndefBits.getBoolValue() && !AllowUndefs) + if (UndefSrcElts.getBoolValue() && !AllowUndefs) return false; + // If we're already the right size, don't bother bitcasting. + if (NumSrcElts == NumElts) { + UndefElts = UndefSrcElts; + EltBits.assign(SrcEltBits.begin(), SrcEltBits.end()); + return true; + } + + // Extract all the undef/constant element data and pack into single bitsets. + APInt UndefBits(SizeInBits, 0); + APInt MaskBits(SizeInBits, 0); + + for (unsigned i = 0; i != NumSrcElts; ++i) { + unsigned BitOffset = i * SrcEltSizeInBits; + if (UndefSrcElts[i]) + UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); + MaskBits.insertBits(SrcEltBits[i], BitOffset); + } + + // Split the undef/constant single bitset data into the target elements. UndefElts = APInt(NumElts, 0); EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); @@ -5425,20 +5372,19 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, // Collect constant bits and insert into mask/undef bit masks. auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, - unsigned BitOffset) { + unsigned UndefBitIndex) { if (!Cst) return false; - unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits(); if (isa(Cst)) { - Undefs.setBits(BitOffset, BitOffset + CstSizeInBits); + Undefs.setBit(UndefBitIndex); return true; } if (auto *CInt = dyn_cast(Cst)) { - Mask.insertBits(CInt->getValue(), BitOffset); + Mask = CInt->getValue(); return true; } if (auto *CFP = dyn_cast(Cst)) { - Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset); + Mask = CFP->getValueAPF().bitcastToAPInt(); return true; } return false; @@ -5446,18 +5392,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, // Extract constant bits from build vector. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { const SDValue &Src = Op.getOperand(i); - unsigned BitOffset = i * SrcEltSizeInBits; if (Src.isUndef()) { - UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); + UndefSrcElts.setBit(i); continue; } auto *Cst = cast(Src); - APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits.insertBits(Bits, BitOffset); + SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); } - return SplitBitData(); + return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from constant pool vector. @@ -5466,27 +5415,33 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits())) return false; - unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); - for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) - if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits, - i * CstEltSizeInBits)) + unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); + unsigned NumSrcElts = CstTy->getVectorNumElements(); + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); + for (unsigned i = 0; i != NumSrcElts; ++i) + if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], + UndefSrcElts, i)) return false; - return SplitBitData(); + return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from a broadcasted constant pool scalar. if (Op.getOpcode() == X86ISD::VBROADCAST && - EltSizeInBits <= SrcEltSizeInBits) { + EltSizeInBits <= VT.getScalarSizeInBits()) { if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) { - APInt Bits(SizeInBits, 0); - APInt Undefs(SizeInBits, 0); - if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) { - for (unsigned i = 0; i != NumSrcElts; ++i) { - MaskBits |= Bits.shl(i * SrcEltSizeInBits); - UndefBits |= Undefs.shl(i * SrcEltSizeInBits); - } - return SplitBitData(); + unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); + if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) { + if (UndefSrcElts[0]) + UndefSrcElts.setBits(0, NumSrcElts); + SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); + return CastBitData(UndefSrcElts, SrcEltBits); } } } @@ -5495,10 +5450,15 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, if (Op.getOpcode() == X86ISD::VZEXT_MOVL && Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && isa(Op.getOperand(0).getOperand(0))) { + unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); + unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; + + APInt UndefSrcElts(NumSrcElts, 0); + SmallVector SrcEltBits; auto *CN = cast(Op.getOperand(0).getOperand(0)); - MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits = MaskBits.zext(SizeInBits); - return SplitBitData(); + SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); + SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); + return CastBitData(UndefSrcElts, SrcEltBits); } return false; @@ -5885,7 +5845,8 @@ static bool setTargetShuffleZeroElements(SDValue N, // The decoded shuffle mask may contain a different number of elements to the // destination value type. static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + SelectionDAG &DAG) { Mask.clear(); Ops.clear(); @@ -5923,17 +5884,42 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, return true; } case ISD::SCALAR_TO_VECTOR: { - // Match against a scalar_to_vector of an extract from a similar vector. + // Match against a scalar_to_vector of an extract from a vector, + // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar. SDValue N0 = N.getOperand(0); - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - N0.getOperand(0).getValueType() != VT || - !isa(N0.getOperand(1)) || - NumElts <= N0.getConstantOperandVal(1) || - !N->isOnlyUserOf(N0.getNode())) + SDValue SrcExtract; + + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getValueType() == VT) { + SrcExtract = N0; + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRW && + cast(N0.getOperand(1))->getVT() == MVT::i16) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16); + } else if (N0.getOpcode() == ISD::AssertZext && + N0.getOperand(0).getOpcode() == X86ISD::PEXTRB && + cast(N0.getOperand(1))->getVT() == MVT::i8) { + SrcExtract = N0.getOperand(0); + assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); + } + + if (!SrcExtract || !isa(SrcExtract.getOperand(1))) + return false; + + SDValue SrcVec = SrcExtract.getOperand(0); + EVT SrcVT = SrcVec.getValueType(); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1; + + unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); + if (NumSrcElts <= SrcIdx) return false; - Ops.push_back(N0.getOperand(0)); - Mask.push_back(N0.getConstantOperandVal(1)); - Mask.append(NumElts - 1, SM_SentinelUndef); + + Ops.push_back(SrcVec); + Mask.push_back(SrcIdx); + Mask.append(NumZeros, SM_SentinelZero); + Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef); return true; } case X86ISD::PINSRB: @@ -5968,6 +5954,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } + case X86ISD::PACKSS: { + // If we know input saturation won't happen we can treat this + // as a truncation shuffle. + if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt || + DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt) + return false; + + Ops.push_back(N.getOperand(0)); + Ops.push_back(N.getOperand(1)); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(i * 2); + return true; + } case X86ISD::VSHLI: case X86ISD::VSRLI: { uint64_t ShiftVal = N.getConstantOperandVal(1); @@ -6042,9 +6041,10 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, /// Returns true if the target shuffle mask was decoded. static bool resolveTargetShuffleInputs(SDValue Op, SmallVectorImpl &Inputs, - SmallVectorImpl &Mask) { + SmallVectorImpl &Mask, + SelectionDAG &DAG) { if (!setTargetShuffleZeroElements(Op, Mask, Inputs)) - if (!getFauxShuffleMask(Op, Mask, Inputs)) + if (!getFauxShuffleMask(Op, Mask, Inputs, DAG)) return false; resolveTargetShuffleInputsAndMask(Inputs, Mask); @@ -6125,7 +6125,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - if (NumNonZero > 8) + if (NumNonZero > 8 && !Subtarget.hasSSE41()) return SDValue(); SDLoc dl(Op); @@ -6213,7 +6213,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - if (NumNonZero > 4) + if (NumNonZero > 4 && !Subtarget.hasSSE41()) return SDValue(); SDLoc dl(Op); @@ -6296,7 +6296,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, Elt = Op->getOperand(EltIdx); // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index. - EltMaskIdx = cast(Elt.getOperand(1))->getZExtValue(); + EltMaskIdx = Elt.getConstantOperandVal(1); if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx) break; Mask[EltIdx] = EltIdx; @@ -6327,8 +6327,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, SDValue SrcVector = Current->getOperand(0); if (!V1.getNode()) V1 = SrcVector; - CanFold = SrcVector == V1 && - cast(Current.getOperand(1))->getZExtValue() == i; + CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i); } if (!CanFold) @@ -6445,6 +6444,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, /// Example: -> zextload a static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, const SDLoc &DL, SelectionDAG &DAG, + const X86Subtarget &Subtarget, bool isAfterLegalize) { unsigned NumElems = Elts.size(); @@ -6520,16 +6520,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); - - if (LDBase->hasAnyUseOfValue(1)) { - SDValue NewChain = - DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), - SDValue(NewLd.getNode(), 1)); - } - + DAG.makeEquivalentMemoryOrdering(LDBase, NewLd); return NewLd; }; @@ -6549,6 +6540,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) return SDValue(); + // Don't create 256-bit non-temporal aligned loads without AVX2 as these + // will lower to regular temporal loads and use the cache. + if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && + VT.is256BitVector() && !Subtarget.hasInt256()) + return SDValue(); + if (IsConsecutiveLoad) return CreateLoad(VT, LDBase); @@ -6588,19 +6585,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); - - // Make sure the newly-created LOAD is in the same position as LDBase in - // terms of dependency. We create a TokenFactor for LDBase and ResNode, - // and update uses of LDBase's output chain to use the TokenFactor. - if (LDBase->hasAnyUseOfValue(1)) { - SDValue NewChain = - DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1), - SDValue(ResNode.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1), - SDValue(ResNode.getNode(), 1)); - } - + DAG.makeEquivalentMemoryOrdering(LDBase, ResNode); return DAG.getBitcast(VT, ResNode); } } @@ -6618,12 +6603,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue, APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); Constant *Const; if (VT.isFloatingPoint()) { - assert((ScalarSize == 32 || ScalarSize == 64) && - "Unsupported floating point scalar size"); - if (ScalarSize == 32) - Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat()); - else - Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble()); + if (ScalarSize == 32) { + Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); + } else { + assert(ScalarSize == 64 && "Unsupported floating point scalar size"); + Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); + } } else Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); ConstantVec.push_back(Const); @@ -6641,18 +6626,16 @@ static bool isUseOfShuffle(SDNode *N) { return false; } -/// Attempt to use the vbroadcast instruction to generate a splat value for the -/// following cases: -/// 1. A splat BUILD_VECTOR which uses: -/// a. A single scalar load, or a constant. -/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>). -/// 2. A splat shuffle which uses a scalar_to_vector node which comes from -/// a scalar load, or a constant. +/// Attempt to use the vbroadcast instruction to generate a splat value +/// from a splat BUILD_VECTOR which uses: +/// a. A single scalar load, or a constant. +/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>). /// /// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue LowerVectorBroadcast(BuildVectorSDNode *BVOp, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { // VBROADCAST requires AVX. // TODO: Splats could be generated for non-AVX CPUs using SSE // instructions, but there's less potential gain for only 128-bit vectors. @@ -6711,11 +6694,13 @@ static SDValue LowerVectorBroadcast(BuildVectorSDNode *BVOp, const X86Subtarget // AVX have support for 32 and 64 bit broadcast for floats only. // No 64bit integer in 32bit subtarget. MVT CVT = MVT::getFloatingPointVT(SplatBitSize); - Constant *C = SplatBitSize == 32 - ? ConstantFP::get(Type::getFloatTy(*Ctx), - SplatValue.bitsToFloat()) - : ConstantFP::get(Type::getDoubleTy(*Ctx), - SplatValue.bitsToDouble()); + // Lower the splat via APFloat directly, to avoid any conversion. + Constant *C = + SplatBitSize == 32 + ? ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEsingle(), SplatValue)) + : ConstantFP::get(*Ctx, + APFloat(APFloat::IEEEdouble(), SplatValue)); SDValue CP = DAG.getConstantPool(C, PVT); unsigned Repeat = VT.getSizeInBits() / SplatBitSize; @@ -6943,7 +6928,7 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) { for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (!In.isUndef()) - Immediate |= cast(In)->getZExtValue() << idx; + Immediate |= (cast(In)->getZExtValue() & 0x1) << idx; } SDLoc dl(Op); MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8)); @@ -6986,7 +6971,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { if (!isa(In)) NonConstIdx.push_back(idx); else { - Immediate |= cast(In)->getZExtValue() << idx; + Immediate |= (cast(In)->getZExtValue() & 0x1) << idx; HasConstElts = true; } if (SplatIdx < 0) @@ -6997,9 +6982,9 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { // for splat use " (select i1 splat_elt, all-ones, all-zeroes)" if (IsSplat) - return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx), - DAG.getConstant(1, dl, VT), - DAG.getConstant(0, dl, VT)); + return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx), + DAG.getConstant(1, dl, VT), + DAG.getConstant(0, dl, VT)); // insert elements one by one SDValue DstVec; @@ -7605,7 +7590,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return AddSub; if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) return HorizontalOp; - if (SDValue Broadcast = LowerVectorBroadcast(BV, Subtarget, DAG)) + if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG)) return Broadcast; if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG)) return BitOp; @@ -7755,7 +7740,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // See if we can use a vector load to get all of the elements. if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) { SmallVector Ops(Op->op_begin(), Op->op_begin() + NumElems); - if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) return LD; } @@ -7879,24 +7865,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } // Next, we iteratively mix elements, e.g. for v4f32: - // Step 1: unpcklps 0, 2 ==> X: - // : unpcklps 1, 3 ==> Y: - // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> - unsigned EltStride = NumElems >> 1; - while (EltStride != 0) { - for (unsigned i = 0; i < EltStride; ++i) { - // If Ops[i+EltStride] is undef and this is the first round of mixing, - // then it is safe to just drop this shuffle: V[i] is already in the - // right place, the one element (since it's the first round) being - // inserted as undef can be dropped. This isn't safe for successive - // rounds because they will permute elements within both vectors. - if (Ops[i+EltStride].isUndef() && - EltStride == NumElems/2) - continue; - - Ops[i] = getUnpackl(DAG, dl, VT, Ops[i], Ops[i + EltStride]); - } - EltStride >>= 1; + // Step 1: unpcklps 0, 1 ==> X: + // : unpcklps 2, 3 ==> Y: + // Step 2: unpcklpd X, Y ==> <3, 2, 1, 0> + for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { + // Generate scaled UNPCKL shuffle mask. + SmallVector Mask; + for(unsigned i = 0; i != Scale; ++i) + Mask.push_back(i); + for (unsigned i = 0; i != Scale; ++i) + Mask.push_back(NumElems+i); + Mask.append(NumElems - Mask.size(), SM_SentinelUndef); + + for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) + Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); } return Ops[0]; } @@ -8081,7 +8063,7 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef Mask) { static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef Mask, SmallVectorImpl &RepeatedMask) { - int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); + auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); RepeatedMask.assign(LaneSize, -1); int Size = Mask.size(); for (int i = 0; i < Size; ++i) { @@ -8327,13 +8309,13 @@ static APInt computeZeroableShuffleElements(ArrayRef Mask, Zeroable.setBit(i); else if (ConstantSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getAPIntValue(); - Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); } else if (ConstantFPSDNode *Cst = dyn_cast(Op)) { APInt Val = Cst->getValueAPF().bitcastToAPInt(); - Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val.lshrInPlace((M % Scale) * ScalarSizeInBits); Val = Val.getLoBits(ScalarSizeInBits); if (Val == 0) Zeroable.setBit(i); @@ -8471,9 +8453,9 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, Subtarget, DAG, DL); SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; - return DAG.getNode(ISD::VSELECT, DL, VT, VMask, - DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), - ZeroVector); + return DAG.getSelect(DL, VT, VMask, + DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector), + ZeroVector); } static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, @@ -8833,8 +8815,9 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, V1 = DAG.getBitcast(BlendVT, V1); V2 = DAG.getBitcast(BlendVT, V2); return DAG.getBitcast( - VT, DAG.getNode(ISD::VSELECT, DL, BlendVT, - DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2)); + VT, + DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), + V1, V2)); } case MVT::v16f32: case MVT::v8f64: @@ -9843,7 +9826,6 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, /// For convenience, this code also bundles all of the subtarget feature set /// filtering. While a little annoying to re-dispatch on type here, there isn't /// a convenient way to factor it out. -/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them? static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, @@ -9956,17 +9938,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); - - // Make sure the newly-created LOAD is in the same position as Ld in - // terms of dependency. We create a TokenFactor for Ld and V, - // and update uses of Ld's output chain to use the TokenFactor. - if (Ld->hasAnyUseOfValue(1)) { - SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - SDValue(Ld, 1), SDValue(V.getNode(), 1)); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); - DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), - SDValue(V.getNode(), 1)); - } + DAG.makeEquivalentMemoryOrdering(Ld, V); } else if (!BroadcastFromReg) { // We can't broadcast from a vector register. return SDValue(); @@ -10917,9 +10889,10 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( "We need to be changing the number of flipped inputs!"); int PSHUFHalfMask[] = {0, 1, 2, 3}; std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); - V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, - MVT::v8i16, V, - getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); + V = DAG.getNode( + FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, + MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V, + getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); for (int &M : Mask) if (M >= 0 && M == FixIdx) @@ -12033,18 +12006,22 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // subvector. bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}); if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) { - // With AVX2 we should use VPERMQ/VPERMPD to allow memory folding. + // With AVX2, use VPERMQ/VPERMPD to allow memory folding. if (Subtarget.hasAVX2() && V2.isUndef()) return SDValue(); - MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() / 2); - SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, - DAG.getIntPtrConstant(0, DL)); - SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - OnlyUsesV1 ? V1 : V2, - DAG.getIntPtrConstant(0, DL)); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + // With AVX1, use vperm2f128 (below) to allow load folding. Otherwise, + // this will likely become vinsertf128 which can't fold a 256-bit memop. + if (!isa(peekThroughBitcasts(V1))) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), + VT.getVectorNumElements() / 2); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0, DL)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } } } @@ -13903,6 +13880,11 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode())) return SDValue(); + // If this VSELECT has a vector if i1 as a mask, it will be directly matched + // with patterns on the mask registers on AVX-512. + if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1) + return Op; + // Try to lower this to a blend-style vector shuffle. This can handle all // constant condition cases. if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) @@ -13912,10 +13894,30 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget.hasSSE41()) return SDValue(); + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + + // If the VSELECT is on a 512-bit type, we have to convert a non-i1 condition + // into an i1 condition so that we can use the mask-based 512-bit blend + // instructions. + if (VT.getSizeInBits() == 512) { + SDValue Cond = Op.getOperand(0); + // The vNi1 condition case should be handled above as it can be trivially + // lowered. + assert(Cond.getValueType().getScalarSizeInBits() == + VT.getScalarSizeInBits() && + "Should have a size-matched integer condition!"); + // Build a mask by testing the condition against itself (tests for zero). + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond); + // Now return a new VSELECT using the mask. + return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2)); + } + // Only some types will be legal on some subtargets. If we can emit a legal // VSELECT-matching blend, return Op, and but if we need to expand, return // a null value. - switch (Op.getSimpleValueType().SimpleTy) { + switch (VT.SimpleTy) { default: // Most of the vector types have blends past SSE4.1. return Op; @@ -13993,7 +13995,6 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const SDValue Idx = Op.getOperand(1); MVT EltVT = Op.getSimpleValueType(); - assert((EltVT == MVT::i1) && "Unexpected operands in ExtractBitFromMaskVector"); assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) && "Unexpected vector type in ExtractBitFromMaskVector"); @@ -14027,8 +14028,8 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, DAG.getConstant(MaxSift, dl, MVT::i8)); - return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec, - DAG.getIntPtrConstant(0, dl)); + return DAG.getNode(X86ISD::VEXTRACT, dl, Op.getSimpleValueType(), Vec, + DAG.getIntPtrConstant(0, dl)); } SDValue @@ -14039,7 +14040,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, MVT VecVT = Vec.getSimpleValueType(); SDValue Idx = Op.getOperand(1); - if (Op.getSimpleValueType() == MVT::i1) + if (VecVT.getVectorElementType() == MVT::i1) return ExtractBitFromMaskVector(Op, DAG); if (!isa(Idx)) { @@ -14210,10 +14211,13 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { return EltInVec; } - // Insertion of one bit into first or last position - // can be done with two SHIFTs + OR. + // Insertion of one bit into first position if (IdxVal == 0 ) { - // EltInVec already at correct index and other bits are 0. + // Clean top bits of vector. + EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, + DAG.getConstant(NumElems - 1, dl, MVT::i8)); + EltInVec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, EltInVec, + DAG.getConstant(NumElems - 1, dl, MVT::i8)); // Clean the first bit in source vector. Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, DAG.getConstant(1 , dl, MVT::i8)); @@ -14222,6 +14226,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } + // Insertion of one bit into last position if (IdxVal == NumElems -1) { // Move the bit to the last position inside the vector. EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec, @@ -14424,8 +14429,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, // If the input is a buildvector just emit a smaller one. unsigned ElemsPerChunk = ResVT.getVectorNumElements(); if (In.getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(ISD::BUILD_VECTOR, dl, ResVT, - makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk)); + return DAG.getBuildVector( + ResVT, dl, makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk)); // Everything else is legal. return Op; @@ -14811,7 +14816,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // location. SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); SDValue Args[] = { Chain, Offset }; Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), @@ -15434,8 +15439,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Get a pointer to FF if the sign bit was set, or to 0 otherwise. SDValue Zero = DAG.getIntPtrConstant(0, dl); SDValue Four = DAG.getIntPtrConstant(4, dl); - SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, - Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Zero, Four); FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); // Load the value out, extending it from f32 to f80. @@ -15707,7 +15711,7 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SDValue Zero = DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT); - SDValue SelectedVal = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero); + SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero); if (VT == ExtVT) return SelectedVal; return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal); @@ -16069,7 +16073,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { unsigned EltBits = EltVT.getSizeInBits(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... APInt MaskElt = - IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits); + IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits); const fltSemantics &Sem = EltVT == MVT::f64 ? APFloat::IEEEdouble() : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle()); @@ -16132,9 +16136,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // The mask constants are automatically splatted for vector types. unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue SignMask = DAG.getConstantFP( - APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); SDValue MagMask = DAG.getConstantFP( - APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT); // First, clear all bits but the sign bit from the second operand (sign). if (IsFakeVector) @@ -16337,11 +16341,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, case ISD::ADD: case ISD::SUB: case ISD::MUL: - case ISD::SHL: { - const auto *BinNode = cast(Op.getNode()); - if (BinNode->Flags.hasNoSignedWrap()) + case ISD::SHL: + if (Op.getNode()->getFlags().hasNoSignedWrap()) break; - } + LLVM_FALLTHROUGH; default: NeedOF = true; break; @@ -16799,9 +16802,9 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, unsigned BitWidth = Op0.getValueSizeInBits(); unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { - APInt Zeros, Ones; - DAG.computeKnownBits(Op0, Zeros, Ones); - if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) + KnownBits Known; + DAG.computeKnownBits(Op0, Known); + if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth) return SDValue(); } LHS = Op1; @@ -17049,7 +17052,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); MVT VT = Op.getSimpleValueType(); - ISD::CondCode SetCCOpcode = cast(CC)->get(); + ISD::CondCode Cond = cast(CC)->get(); bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint(); SDLoc dl(Op); @@ -17076,18 +17079,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is // available. SDValue Cmp; - unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1); + unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1); if (SSECC == 8) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; - if (SetCCOpcode == ISD::SETUEQ) { + if (Cond == ISD::SETUEQ) { CC0 = 3; // UNORD CC1 = 0; // EQ CombineOpc = Opc == X86ISD::CMPP ? static_cast(X86ISD::FOR) : static_cast(ISD::OR); } else { - assert(SetCCOpcode == ISD::SETONE); + assert(Cond == ISD::SETONE); CC0 = 7; // ORD CC1 = 4; // NEQ CombineOpc = Opc == X86ISD::CMPP ? static_cast(X86ISD::FAND) : @@ -17134,7 +17137,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // 2. The original operand type has been promoted to a 256-bit vector. // // Note that condition 2. only applies for AVX targets. - SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode); + SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond); return DAG.getZExtOrTrunc(NewOp, dl, VT); } @@ -17174,7 +17177,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) { // Translate compare code to XOP PCOM compare mode. unsigned CmpMode = 0; - switch (SetCCOpcode) { + switch (Cond) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETULT: case ISD::SETLT: CmpMode = 0x00; break; @@ -17189,60 +17192,55 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, } // Are we comparing unsigned or signed integers? - unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) - ? X86ISD::VPCOMU : X86ISD::VPCOM; + unsigned Opc = + ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(CmpMode, dl, MVT::i8)); } - // We are handling one of the integer comparisons here. Since SSE only has + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc; - bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; - bool Subus = false; - - switch (SetCCOpcode) { - default: llvm_unreachable("Unexpected SETCC condition"); - case ISD::SETNE: Invert = true; - case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; - case ISD::SETLT: Swap = true; - case ISD::SETGT: Opc = X86ISD::PCMPGT; break; - case ISD::SETGE: Swap = true; - case ISD::SETLE: Opc = X86ISD::PCMPGT; - Invert = true; break; - case ISD::SETULT: Swap = true; - case ISD::SETUGT: Opc = X86ISD::PCMPGT; - FlipSigns = true; break; - case ISD::SETUGE: Swap = true; - case ISD::SETULE: Opc = X86ISD::PCMPGT; - FlipSigns = true; Invert = true; break; - } + unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ + : X86ISD::PCMPGT; + bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || + Cond == ISD::SETGE || Cond == ISD::SETUGE; + bool Invert = Cond == ISD::SETNE || + (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); + + // If both operands are known non-negative, then an unsigned compare is the + // same as a signed compare and there's no need to flip signbits. + // TODO: We could check for more general simplifications here since we're + // computing known bits. + bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && + !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); // Special case: Use min/max operations for SETULE/SETUGE MVT VET = VT.getVectorElementType(); - bool hasMinMax = - (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) - || (Subtarget.hasSSE2() && (VET == MVT::i8)); - - if (hasMinMax) { - switch (SetCCOpcode) { + bool HasMinMax = + (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) || + (Subtarget.hasSSE2() && (VET == MVT::i8)); + bool MinMax = false; + if (HasMinMax) { + switch (Cond) { default: break; case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break; case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break; } - if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } + if (MinMax) + Swap = Invert = FlipSigns = false; } - bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); - if (!MinMax && hasSubus) { + bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16); + bool Subus = false; + if (!MinMax && HasSubus) { // As another special case, use PSUBUS[BW] when it's profitable. E.g. for // Op0 u<= Op1: // t = psubus Op0, Op1 // pcmpeq t, <0..0> - switch (SetCCOpcode) { + switch (Cond) { default: break; case ISD::SETULT: { // If the comparison is against a constant we can turn this into a @@ -17344,10 +17342,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // bits of the inputs before performing those operations. if (FlipSigns) { MVT EltVT = VT.getVectorElementType(); - SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl, + SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT); - Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); - Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); @@ -17372,8 +17370,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); - assert(((!Subtarget.hasAVX512() && VT == MVT::i8) || (VT == MVT::i1)) - && "SetCC type must be 8-bit or 1-bit integer"); + assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc dl(Op); @@ -17437,19 +17434,24 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } -SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const { +SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue Carry = Op.getOperand(2); SDValue Cond = Op.getOperand(3); SDLoc DL(Op); - assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); + assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."); X86::CondCode CC = TranslateIntegerX86CC(cast(Cond)->get()); - assert(Carry.getOpcode() != ISD::CARRY_FALSE); + // Recreate the carry if needed. + EVT CarryVT = Carry.getValueType(); + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), + Carry, DAG.getConstant(NegOne, DL, CarryVT)); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry); + SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1)); SDValue SetCC = getSETCC(CC, Cmp.getValue(1), DL, DAG); if (Op.getSimpleValueType() == MVT::i1) return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); @@ -17507,7 +17509,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (SSECC != 8) { if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CondOp0, + SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); return DAG.getNode(VT.isVector() ? X86ISD::SELECT : X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); @@ -17543,7 +17545,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; VCmp = DAG.getBitcast(VCmpVT, VCmp); - SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); + SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VSel, DAG.getIntPtrConstant(0, DL)); @@ -17555,9 +17557,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } // AVX512 fallback is to lower selects of scalar floats to masked moves. - if (Cond.getValueType() == MVT::i1 && (VT == MVT::f64 || VT == MVT::f32) && - Subtarget.hasAVX512()) - return DAG.getNode(X86ISD::SELECTS, DL, VT, Cond, Op1, Op2); + if ((VT == MVT::f64 || VT == MVT::f32) && Subtarget.hasAVX512()) { + SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond); + return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); + } if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { SDValue Op1Scalar; @@ -17571,9 +17574,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0)) Op2Scalar = Op2.getOperand(0); if (Op1Scalar.getNode() && Op2Scalar.getNode()) { - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, - Op1Scalar.getValueType(), - Cond, Op1Scalar, Op2Scalar); + SDValue newSelect = DAG.getSelect(DL, Op1Scalar.getValueType(), Cond, + Op1Scalar, Op2Scalar); if (newSelect.getValueSizeInBits() == VT.getSizeInBits()) return DAG.getBitcast(VT, newSelect); SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect); @@ -17588,8 +17590,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getUNDEF(MVT::v8i1), Op1, zeroConst); Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1, DAG.getUNDEF(MVT::v8i1), Op2, zeroConst); - SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::v8i1, - Cond, Op1, Op2); + SDValue newSelect = DAG.getSelect(DL, MVT::v8i1, Cond, Op1, Op2); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst); } @@ -17858,7 +17859,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, } else { SDValue NegOne = getOnesVector(ExtVT, DAG, dl); SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl); - V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); + V = DAG.getSelect(dl, ExtVT, In, NegOne, Zero); if (ExtVT == VT) return V; } @@ -18660,7 +18661,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); bool Is64Bit = Subtarget.is64Bit(); MVT SPTy = getPointerTy(DAG.getDataLayout()); @@ -19100,8 +19101,8 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, /// \brief Creates an SDNode for a predicated scalar operation. /// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc). -/// The mask is coming as MVT::i8 and it should be truncated -/// to MVT::i1 while lowering masking intrinsics. +/// The mask is coming as MVT::i8 and it should be transformed +/// to MVT::v1i1 while lowering masking intrinsics. /// The main difference between ScalarMaskingNode and VectorMaskingNode is using /// "X86select" instead of "vselect". We just can't create the "vselect" node /// for a scalar instruction. @@ -19109,19 +19110,19 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - if (isAllOnesConstant(Mask)) - return Op; + + if (auto *MaskConst = dyn_cast(Mask)) + if (MaskConst->getZExtValue() & 0x1) + return Op; MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - // The mask should be of type MVT::i1 - SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); + SDValue IMask = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Mask); if (Op.getOpcode() == X86ISD::FSETCCM || Op.getOpcode() == X86ISD::FSETCCM_RND) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); - if (Op.getOpcode() == X86ISD::VFPCLASS || - Op.getOpcode() == X86ISD::VFPCLASSS) + if (Op.getOpcode() == X86ISD::VFPCLASSS) return DAG.getNode(ISD::OR, dl, VT, Op, IMask); if (PreservedSrc.isUndef()) @@ -19170,7 +19171,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, // registration, or the .set_setframe offset. MCSymbol *OffsetSym = MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( - GlobalValue::getRealLinkageName(Fn->getName())); + GlobalValue::dropLLVMManglingEscape(Fn->getName())); SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); SDValue ParentFrameOffset = DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); @@ -19558,10 +19559,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Src1 = Op.getOperand(1); SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); - SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm); + SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); - return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, FPclassMask); + return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, FPclassMask, + DAG.getIntPtrConstant(0, dl)); } case CMP_MASK: case CMP_MASK_CC: { @@ -19621,18 +19623,18 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget if (IntrData->Opc1 != 0) { SDValue Rnd = Op.getOperand(5); if (!isRoundModeCurDirection(Rnd)) - Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd); + Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Rnd); } //default rounding mode if(!Cmp.getNode()) - Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC); + Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG); - - return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, CmpMask); + return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i8, CmpMask, + DAG.getIntPtrConstant(0, dl)); } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; @@ -19680,13 +19682,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue FCmp; if (isRoundModeCurDirection(Sae)) - FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8)); + FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, + DAG.getConstant(CondVal, dl, MVT::i8)); else - FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::i1, LHS, RHS, - DAG.getConstant(CondVal, dl, MVT::i8), Sae); - // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg" - return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp); + FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::v1i1, LHS, RHS, + DAG.getConstant(CondVal, dl, MVT::i8), Sae); + return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i32, FCmp, + DAG.getIntPtrConstant(0, dl)); } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), @@ -19772,12 +19774,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget DAG.getIntPtrConstant(0, dl)); return DAG.getBitcast(Op.getValueType(), Res); } - case CONVERT_MASK_TO_VEC: { - SDValue Mask = Op.getOperand(1); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return DAG.getNode(IntrData->Opc0, dl, VT, VMask); - } case BRCST_SUBVEC_TO_VEC: { SDValue Src = Op.getOperand(1); SDValue Passthru = Op.getOperand(2); @@ -20021,7 +20017,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Op1 = Op.getOperand(1); auto *Fn = cast(cast(Op1)->getGlobal()); MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( - GlobalValue::getRealLinkageName(Fn->getName())); + GlobalValue::dropLLVMManglingEscape(Fn->getName())); // Generate a simple absolute symbol reference. This intrinsic is only // supported on 32-bit Windows, which isn't PIC. @@ -20360,16 +20356,17 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); - const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo); + const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { - if (IntNo == llvm::Intrinsic::x86_seh_ehregnode) + switch (IntNo) { + case llvm::Intrinsic::x86_seh_ehregnode: return MarkEHRegistrationNode(Op, DAG); - if (IntNo == llvm::Intrinsic::x86_seh_ehguard) + case llvm::Intrinsic::x86_seh_ehguard: return MarkEHGuard(Op, DAG); - if (IntNo == llvm::Intrinsic::x86_flags_read_u32 || - IntNo == llvm::Intrinsic::x86_flags_read_u64 || - IntNo == llvm::Intrinsic::x86_flags_write_u32 || - IntNo == llvm::Intrinsic::x86_flags_write_u64) { + case llvm::Intrinsic::x86_flags_read_u32: + case llvm::Intrinsic::x86_flags_read_u64: + case llvm::Intrinsic::x86_flags_write_u32: + case llvm::Intrinsic::x86_flags_write_u64: { // We need a frame pointer because this will get lowered to a PUSH/POP // sequence. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); @@ -20378,6 +20375,20 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, // during ExpandISelPseudos in EmitInstrWithCustomInserter. return SDValue(); } + case Intrinsic::x86_lwpins32: + case Intrinsic::x86_lwpins64: { + SDLoc dl(Op); + SDValue Chain = Op->getOperand(0); + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + SDValue LwpIns = + DAG.getNode(X86ISD::LWPINS, dl, VTs, Chain, Op->getOperand(2), + Op->getOperand(3), Op->getOperand(4)); + SDValue SetCC = getSETCC(X86::COND_B, LwpIns.getValue(0), dl, DAG); + SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, SetCC); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + LwpIns.getValue(1)); + } + } return SDValue(); } @@ -20990,54 +21001,62 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); } +// Split an unary integer op into 2 half sized ops. +static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned SizeInBits = VT.getSizeInBits(); + + // Extract the Lo/Hi vectors + SDLoc dl(Op); + SDValue Src = Op.getOperand(0); + SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2); + SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2); + + MVT EltVT = VT.getVectorElementType(); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(Op.getOpcode(), dl, NewVT, Lo), + DAG.getNode(Op.getOpcode(), dl, NewVT, Hi)); +} + +// Decompose 256-bit ops into smaller 128-bit ops. +static SDValue Lower256IntUnary(SDValue Op, SelectionDAG &DAG) { + assert(Op.getSimpleValueType().is256BitVector() && + Op.getSimpleValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + return LowerVectorIntUnary(Op, DAG); +} + +// Decompose 512-bit ops into smaller 256-bit ops. +static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) { + assert(Op.getSimpleValueType().is512BitVector() && + Op.getSimpleValueType().isInteger() && + "Only handle AVX 512-bit vector integer operation"); + return LowerVectorIntUnary(Op, DAG); +} + /// \brief Lower a vector CTLZ using native supported vector CTLZ instruction. // -// 1. i32/i64 128/256-bit vector (native support require VLX) are expended -// to 512-bit vector. -// 2. i8/i16 vector implemented using dword LZCNT vector instruction -// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, -// split the vector, perform operation on it's Lo a Hi part and -// concatenate the results. -static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) { +// i8/i16 vector implemented using dword LZCNT vector instruction +// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, +// split the vector, perform operation on it's Lo a Hi part and +// concatenate the results. +static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG) { assert(Op.getOpcode() == ISD::CTLZ); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); - if (EltVT == MVT::i64 || EltVT == MVT::i32) { - // Extend to 512 bit vector. - assert((VT.is256BitVector() || VT.is128BitVector()) && - "Unsupported value type for operation"); - - MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits()); - SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, - DAG.getUNDEF(NewVT), - Op.getOperand(0), - DAG.getIntPtrConstant(0, dl)); - SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512); - - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode, - DAG.getIntPtrConstant(0, dl)); - } - assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type"); - if (16 < NumElems) { - // Split vector, it's Lo and Hi parts will be handled in next iteration. - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); - MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2); - - Lo = DAG.getNode(ISD::CTLZ, dl, OutVT, Lo); - Hi = DAG.getNode(ISD::CTLZ, dl, OutVT, Hi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); - } + // Split vector, it's Lo and Hi parts will be handled in next iteration. + if (16 < NumElems) + return LowerVectorIntUnary(Op, DAG); MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); - assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && "Unsupported value type for operation"); @@ -21124,23 +21143,17 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); - SDValue Op0 = Op.getOperand(0); - if (Subtarget.hasAVX512()) - return LowerVectorCTLZ_AVX512(Op, DAG); + if (Subtarget.hasCDI()) + return LowerVectorCTLZ_AVX512CDI(Op, DAG); // Decompose 256-bit ops into smaller 128-bit ops. - if (VT.is256BitVector() && !Subtarget.hasInt256()) { - unsigned NumElems = VT.getVectorNumElements(); - - // Extract each 128-bit vector, perform ctlz and concat the result. - SDValue LHS = extract128BitVector(Op0, 0, DAG, DL); - SDValue RHS = extract128BitVector(Op0, NumElems / 2, DAG, DL); + if (VT.is256BitVector() && !Subtarget.hasInt256()) + return Lower256IntUnary(Op, DAG); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, - DAG.getNode(ISD::CTLZ, DL, LHS.getValueType(), LHS), - DAG.getNode(ISD::CTLZ, DL, RHS.getValueType(), RHS)); - } + // Decompose 512-bit ops into smaller 256-bit ops. + if (VT.is512BitVector() && !Subtarget.hasBWI()) + return Lower512IntUnary(Op, DAG); assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB"); return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG); @@ -21304,19 +21317,7 @@ static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) { assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); - MVT VT = Op.getSimpleValueType(); - unsigned NumElems = VT.getVectorNumElements(); - - SDLoc dl(Op); - SDValue Src = Op.getOperand(0); - SDValue Lo = extract128BitVector(Src, 0, DAG, dl); - SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl); - - MVT EltVT = VT.getVectorElementType(); - MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - DAG.getNode(ISD::ABS, dl, NewVT, Lo), - DAG.getNode(ISD::ABS, dl, NewVT, Hi)); + return Lower256IntUnary(Op, DAG); } static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { @@ -21825,6 +21826,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); SDValue Ex = DAG.getBitcast(ExVT, R); + // ashr(R, 63) === cmp_slt(R, 0) + if (ShiftAmt == 63 && Subtarget.hasSSE42()) { + assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && + "Unsupported PCMPGT op"); + return DAG.getNode(X86ISD::PCMPGT, dl, VT, + getZeroVector(VT, Subtarget, DAG, dl), R); + } + if (ShiftAmt >= 32) { // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32. SDValue Upper = @@ -21923,10 +21932,19 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } // Special case in 32-bit mode, where i64 is expanded into high and low parts. + // TODO: Replace constant extraction with getTargetConstantBitsFromNode. if (!Subtarget.is64Bit() && !Subtarget.hasXOP() && (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) || (Subtarget.hasAVX512() && VT == MVT::v8i64))) { + // AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant. + unsigned SubVectorScale = 1; + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SubVectorScale = + Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits(); + Amt = Amt.getOperand(0); + } + // Peek through any splat that was introduced for i64 shift vectorization. int SplatIndex = -1; if (ShuffleVectorSDNode *SVN = dyn_cast(Amt.getNode())) @@ -21943,7 +21961,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / - VT.getVectorNumElements(); + (SubVectorScale * VT.getVectorNumElements()); unsigned RatioInLog2 = Log2_32_Ceil(Ratio); uint64_t ShiftAmt = 0; unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio); @@ -22111,11 +22129,11 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, } // i64 vector arithmetic shift can be emulated with the transform: - // M = lshr(SIGN_BIT, Amt) + // M = lshr(SIGN_MASK, Amt) // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && Op.getOpcode() == ISD::SRA) { - SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT); + SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); R = DAG.getNode(ISD::XOR, dl, VT, R, M); @@ -22317,23 +22335,21 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel); - return DAG.getBitcast(SelVT, - DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } else if (Subtarget.hasSSE41()) { // On SSE41 targets we make use of the fact that VSELECT lowers // to PBLENDVB which selects bytes based just on the sign bit. V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); - return DAG.getBitcast(SelVT, - DAG.getNode(ISD::VSELECT, dl, VT, Sel, V0, V1)); + return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } // On pre-SSE41 targets we test for the sign bit by comparing to // zero - a negative value will set all bits of the lanes to true // and VSELECT uses that in its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue Z = getZeroVector(SelVT, Subtarget, DAG, dl); SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); - return DAG.getNode(ISD::VSELECT, dl, SelVT, C, V0, V1); + return DAG.getSelect(dl, SelVT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 5; @@ -22455,15 +22471,14 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V0 = DAG.getBitcast(ExtVT, V0); V1 = DAG.getBitcast(ExtVT, V1); Sel = DAG.getBitcast(ExtVT, Sel); - return DAG.getBitcast( - VT, DAG.getNode(ISD::VSELECT, dl, ExtVT, Sel, V0, V1)); + return DAG.getBitcast(VT, DAG.getSelect(dl, ExtVT, Sel, V0, V1)); } // On pre-SSE41 targets we splat the sign bit - a negative value will // set all bits of the lanes to true and VSELECT uses that in // its OR(AND(V0,C),AND(V1,~C)) lowering. SDValue C = DAG.getNode(ISD::SRA, dl, VT, Sel, DAG.getConstant(15, dl, VT)); - return DAG.getNode(ISD::VSELECT, dl, VT, C, V0, V1); + return DAG.getSelect(dl, VT, C, V0, V1); }; // Turn 'a' into a mask suitable for VSELECT: a = a << 12; @@ -22647,7 +22662,7 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b. TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { - auto PTy = cast(LI->getPointerOperand()->getType()); + auto PTy = cast(LI->getPointerOperandType()); return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; } @@ -23095,29 +23110,13 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG); } - if (VT.is256BitVector() && !Subtarget.hasInt256()) { - unsigned NumElems = VT.getVectorNumElements(); - - // Extract each 128-bit vector, compute pop count and concat the result. - SDValue LHS = extract128BitVector(Op0, 0, DAG, DL); - SDValue RHS = extract128BitVector(Op0, NumElems / 2, DAG, DL); - - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, - LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG), - LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG)); - } - - if (VT.is512BitVector() && !Subtarget.hasBWI()) { - unsigned NumElems = VT.getVectorNumElements(); - - // Extract each 256-bit vector, compute pop count and concat the result. - SDValue LHS = extract256BitVector(Op0, 0, DAG, DL); - SDValue RHS = extract256BitVector(Op0, NumElems / 2, DAG, DL); + // Decompose 256-bit ops into smaller 128-bit ops. + if (VT.is256BitVector() && !Subtarget.hasInt256()) + return Lower256IntUnary(Op, DAG); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, - LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG), - LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG)); - } + // Decompose 512-bit ops into smaller 256-bit ops. + if (VT.is512BitVector() && !Subtarget.hasBWI()) + return Lower512IntUnary(Op, DAG); return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); } @@ -23144,20 +23143,12 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) { DAG.getIntPtrConstant(0, DL)); } - MVT SVT = VT.getVectorElementType(); int NumElts = VT.getVectorNumElements(); int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; // Decompose 256-bit ops into smaller 128-bit ops. - if (VT.is256BitVector()) { - SDValue Lo = extract128BitVector(In, 0, DAG, DL); - SDValue Hi = extract128BitVector(In, NumElts / 2, DAG, DL); - - MVT HalfVT = MVT::getVectorVT(SVT, NumElts / 2); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, - DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Lo), - DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Hi)); - } + if (VT.is256BitVector()) + return Lower256IntUnary(Op, DAG); assert(VT.is128BitVector() && "Only 128-bit vector bitreverse lowering supported."); @@ -23198,14 +23189,8 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, "Only byte vector BITREVERSE supported"); // Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2. - if (VT.is256BitVector() && !Subtarget.hasInt256()) { - MVT HalfVT = MVT::getVectorVT(MVT::i8, NumElts / 2); - SDValue Lo = extract128BitVector(In, 0, DAG, DL); - SDValue Hi = extract128BitVector(In, NumElts / 2, DAG, DL); - Lo = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Lo); - Hi = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Hi); - return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - } + if (VT.is256BitVector() && !Subtarget.hasInt256()) + return Lower256IntUnary(Op, DAG); // Perform BITREVERSE using PSHUFB lookups. Each byte is split into // two nibbles and a PSHUFB lookup to find the bitreverse of each @@ -23325,30 +23310,33 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { return Op; } -static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getNode()->getSimpleValueType(0); +static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + MVT VT = N->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDLoc DL(N); - unsigned Opc; - bool ExtraOp = false; - switch (Op.getOpcode()) { - default: llvm_unreachable("Invalid code"); - case ISD::ADDC: Opc = X86ISD::ADD; break; - case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break; - case ISD::SUBC: Opc = X86ISD::SUB; break; - case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break; - } + // Set the carry flag. + SDValue Carry = Op.getOperand(2); + EVT CarryVT = Carry.getValueType(); + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), + Carry, DAG.getConstant(NegOne, DL, CarryVT)); + + unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB; + SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); - if (!ExtraOp) - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), - Op.getOperand(1)); - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), - Op.getOperand(1), Op.getOperand(2)); + SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG); + if (N->getValueType(1) == MVT::i1) + SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); + + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, @@ -23381,9 +23369,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); - Type *RetTy = isF64 - ? (Type*)StructType::get(ArgTy, ArgTy, nullptr) - : (Type*)VectorType::get(ArgTy, 4); + Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) + : (Type *)VectorType::get(ArgTy, 4); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl) @@ -23425,8 +23412,6 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && "Unexpected request for vector widening"); - EVT EltVT = NVT.getVectorElementType(); - SDLoc dl(InOp); if (InOp.getOpcode() == ISD::CONCAT_VECTORS && InOp.getNumOperands() == 2) { @@ -23444,6 +23429,8 @@ static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, for (unsigned i = 0; i < InNumElts; ++i) Ops.push_back(InOp.getOperand(i)); + EVT EltVT = InOp.getOperand(0).getValueType(); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) @@ -23813,7 +23800,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::SETCCE: return LowerSETCCE(Op, DAG); + case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); @@ -23858,10 +23845,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); - case ISD::ADDC: - case ISD::ADDE: - case ISD::SUBC: - case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: case ISD::SUB: return LowerADD_SUB(Op, DAG); case ISD::SMAX: @@ -24522,6 +24507,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTP2UI_RND: return "X86ISD::CVTP2UI_RND"; case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND"; case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; + case X86ISD::LWPINS: return "X86ISD::LWPINS"; } return nullptr; } @@ -24746,16 +24732,22 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, // xbegin sinkMBB // // mainMBB: - // eax = -1 + // s0 = -1 + // + // fallBB: + // eax = # XABORT_DEF + // s1 = eax // // sinkMBB: - // v = eax + // v = phi(s0/mainBB, s1/fallBB) MachineBasicBlock *thisMBB = MBB; MachineFunction *MF = MBB->getParent(); MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); + MF->insert(I, fallMBB); MF->insert(I, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. @@ -24763,25 +24755,40 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned DstReg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned fallDstReg = MRI.createVirtualRegister(RC); + // thisMBB: - // xbegin sinkMBB + // xbegin fallMBB // # fallthrough to mainMBB - // # abortion to sinkMBB - BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(sinkMBB); + // # abortion to fallMBB + BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB); thisMBB->addSuccessor(mainMBB); - thisMBB->addSuccessor(sinkMBB); + thisMBB->addSuccessor(fallMBB); // mainMBB: - // EAX = -1 - BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), X86::EAX).addImm(-1); + // mainDstReg := -1 + BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1); + BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); mainMBB->addSuccessor(sinkMBB); - // sinkMBB: - // EAX is live into the sinkMBB - sinkMBB->addLiveIn(X86::EAX); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), - MI.getOperand(0).getReg()) + // fallMBB: + // ; pseudo instruction to model hardware's definition from XABORT + // EAX := XABORT_DEF + // fallDstReg := EAX + BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF)); + BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg) .addReg(X86::EAX); + fallMBB->addSuccessor(sinkMBB); + + // sinkMBB: + // DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB) + BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(fallDstReg).addMBB(fallMBB); MI.eraseFromParent(); return sinkMBB; @@ -25861,7 +25868,7 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, // Emit CALLSEQ_START right before the instruction. unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); MachineInstrBuilder CallseqStart = - BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0); + BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); // Emit CALLSEQ_END right after the instruction. @@ -25944,6 +25951,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); @@ -25960,7 +25968,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, DstReg = MI.getOperand(CurOp++).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - assert(RC->hasType(MVT::i32) && "Invalid destination!"); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + (void)TRI; unsigned mainDstReg = MRI.createVirtualRegister(RC); unsigned restoreDstReg = MRI.createVirtualRegister(RC); @@ -26598,6 +26607,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + case TargetOpcode::PATCHABLE_EVENT_CALL: + // Do nothing here, handle in xray instrumentation pass. + return BB; + case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B @@ -26665,12 +26678,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, //===----------------------------------------------------------------------===// void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - unsigned BitWidth = KnownZero.getBitWidth(); + unsigned BitWidth = Known.getBitWidth(); unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); assert((Opc >= ISD::BUILTIN_OP_END || @@ -26680,7 +26692,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + Known.resetAll(); switch (Opc) { default: break; case X86ISD::ADD: @@ -26699,33 +26711,33 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, break; LLVM_FALLTHROUGH; case X86ISD::SETCC: - KnownZero.setBits(1, BitWidth); + Known.Zero.setBitsFrom(1); break; case X86ISD::MOVMSK: { unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); - KnownZero.setBits(NumLoBits, BitWidth); + Known.Zero.setBitsFrom(NumLoBits); break; } case X86ISD::VSHLI: case X86ISD::VSRLI: { if (auto *ShiftImm = dyn_cast(Op.getOperand(1))) { if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) { - KnownZero = APInt::getAllOnesValue(BitWidth); + Known.setAllZero(); break; } - DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1); + DAG.computeKnownBits(Op.getOperand(0), Known, Depth + 1); unsigned ShAmt = ShiftImm->getZExtValue(); if (Opc == X86ISD::VSHLI) { - KnownZero = KnownZero << ShAmt; - KnownOne = KnownOne << ShAmt; + Known.Zero <<= ShAmt; + Known.One <<= ShAmt; // Low bits are known zero. - KnownZero.setLowBits(ShAmt); + Known.Zero.setLowBits(ShAmt); } else { - KnownZero = KnownZero.lshr(ShAmt); - KnownOne = KnownOne.lshr(ShAmt); + Known.Zero.lshrInPlace(ShAmt); + Known.One.lshrInPlace(ShAmt); // High bits are known zero. - KnownZero.setHighBits(ShAmt); + Known.Zero.setHighBits(ShAmt); } } break; @@ -26739,12 +26751,11 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, unsigned InBitWidth = SrcVT.getScalarSizeInBits(); assert(InNumElts >= NumElts && "Illegal VZEXT input"); - KnownZero = KnownOne = APInt(InBitWidth, 0); + Known = KnownBits(InBitWidth); APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts); - DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedSrcElts, Depth + 1); - KnownOne = KnownOne.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownZero.setBits(InBitWidth, BitWidth); + DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1); + Known = Known.zext(BitWidth); + Known.Zero.setBitsFrom(InBitWidth); break; } } @@ -26767,6 +26778,17 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( return Tmp; } + case X86ISD::VSHLI: { + SDValue Src = Op.getOperand(0); + unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); + APInt ShiftVal = cast(Op.getOperand(1))->getAPIntValue(); + if (ShiftVal.uge(VTBits)) + return VTBits; // Shifted all bits out --> zero. + if (ShiftVal.uge(Tmp)) + return 1; // Shifted all sign bits out --> unknown. + return Tmp - ShiftVal.getZExtValue(); + } + case X86ISD::VSRAI: { SDValue Src = Op.getOperand(0); unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); @@ -27915,7 +27937,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, // Extract target shuffle mask and resolve sentinels and inputs. SmallVector OpMask; SmallVector OpInputs; - if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask)) + if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) return false; assert(OpInputs.size() <= 2 && "Too many shuffle inputs"); @@ -27949,28 +27971,45 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, OpMask.size() % RootMask.size() == 0) || OpMask.size() == RootMask.size()) && "The smaller number of elements must divide the larger."); - int MaskWidth = std::max(OpMask.size(), RootMask.size()); - int RootRatio = std::max(1, OpMask.size() / RootMask.size()); - int OpRatio = std::max(1, RootMask.size() / OpMask.size()); - assert(((RootRatio == 1 && OpRatio == 1) || - (RootRatio == 1) != (OpRatio == 1)) && + + // This function can be performance-critical, so we rely on the power-of-2 + // knowledge that we have about the mask sizes to replace div/rem ops with + // bit-masks and shifts. + assert(isPowerOf2_32(RootMask.size()) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes"); + unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size()); + unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size()); + + unsigned MaskWidth = std::max(OpMask.size(), RootMask.size()); + unsigned RootRatio = std::max(1, OpMask.size() >> RootMaskSizeLog2); + unsigned OpRatio = std::max(1, RootMask.size() >> OpMaskSizeLog2); + assert((RootRatio == 1 || OpRatio == 1) && "Must not have a ratio for both incoming and op masks!"); - SmallVector Mask((unsigned)MaskWidth, SM_SentinelUndef); + assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); + assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); + unsigned RootRatioLog2 = countTrailingZeros(RootRatio); + unsigned OpRatioLog2 = countTrailingZeros(OpRatio); + + SmallVector Mask(MaskWidth, SM_SentinelUndef); // Merge this shuffle operation's mask into our accumulated mask. Note that // this shuffle's mask will be the first applied to the input, followed by the // root mask to get us all the way to the root value arrangement. The reason // for this order is that we are recursing up the operation chain. - for (int i = 0; i < MaskWidth; ++i) { - int RootIdx = i / RootRatio; + for (unsigned i = 0; i < MaskWidth; ++i) { + unsigned RootIdx = i >> RootRatioLog2; if (RootMask[RootIdx] < 0) { // This is a zero or undef lane, we're done. Mask[i] = RootMask[RootIdx]; continue; } - int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio; + unsigned RootMaskedIdx = + RootRatio == 1 + ? RootMask[RootIdx] + : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); // Just insert the scaled root mask value if it references an input other // than the SrcOp we're currently inserting. @@ -27980,9 +28019,8 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, continue; } - RootMaskedIdx %= MaskWidth; - - int OpIdx = RootMaskedIdx / OpRatio; + RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); + unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; if (OpMask[OpIdx] < 0) { // The incoming lanes are zero or undef, it doesn't matter which ones we // are using. @@ -27991,9 +28029,12 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, } // Ok, we have non-zero lanes, map them through to one of the Op's inputs. - int OpMaskedIdx = OpMask[OpIdx] * OpRatio + RootMaskedIdx % OpRatio; - OpMaskedIdx %= MaskWidth; + unsigned OpMaskedIdx = + OpRatio == 1 + ? OpMask[OpIdx] + : (OpMask[OpIdx] << OpRatioLog2) + (RootMaskedIdx & (OpRatio - 1)); + OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); if (OpMask[OpIdx] < (int)OpMask.size()) { assert(0 <= InputIdx0 && "Unknown target shuffle input"); OpMaskedIdx += InputIdx0 * MaskWidth; @@ -28814,7 +28855,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, } if (Elts.size() == VT.getVectorNumElements()) - if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true)) + if (SDValue LD = + EltsFromConsecutiveLoads(VT, Elts, dl, DAG, Subtarget, true)) return LD; // For AVX2, we sometimes want to combine @@ -28946,12 +28988,118 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } +// Try to match patterns such as +// (i16 bitcast (v16i1 x)) +// -> +// (i16 movmsk (16i8 sext (v16i1 x))) +// before the illegal vector is scalarized on subtargets that don't have legal +// vxi1 types. +static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, + const X86Subtarget &Subtarget) { + EVT VT = BitCast.getValueType(); + SDValue N0 = BitCast.getOperand(0); + EVT VecVT = N0->getValueType(0); + + if (!VT.isScalarInteger() || !VecVT.isSimple()) + return SDValue(); + + // With AVX512 vxi1 types are legal and we prefer using k-regs. + // MOVMSK is supported in SSE2 or later. + if (Subtarget.hasAVX512() || !Subtarget.hasSSE2()) + return SDValue(); + + // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and + // v8f64. So all legal 128-bit and 256-bit vectors are covered except for + // v8i16 and v16i16. + // For these two cases, we can shuffle the upper element bytes to a + // consecutive sequence at the start of the vector and treat the results as + // v16i8 or v32i8, and for v61i8 this is the prefferable solution. However, + // for v16i16 this is not the case, because the shuffle is expensive, so we + // avoid sign-exteding to this type entirely. + // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: + // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) + MVT SExtVT; + MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE; + switch (VecVT.getSimpleVT().SimpleTy) { + default: + return SDValue(); + case MVT::v2i1: + SExtVT = MVT::v2i64; + FPCastVT = MVT::v2f64; + break; + case MVT::v4i1: + SExtVT = MVT::v4i32; + FPCastVT = MVT::v4f32; + // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) + // sign-extend to a 256-bit operation to avoid truncation. + if (N0->getOpcode() == ISD::SETCC && + N0->getOperand(0)->getValueType(0).is256BitVector() && + Subtarget.hasInt256()) { + SExtVT = MVT::v4i64; + FPCastVT = MVT::v4f64; + } + break; + case MVT::v8i1: + SExtVT = MVT::v8i16; + // For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)), + // sign-extend to a 256-bit operation to match the compare. + // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over + // 256-bit because the shuffle is cheaper than sign extending the result of + // the compare. + if (N0->getOpcode() == ISD::SETCC && + N0->getOperand(0)->getValueType(0).is256BitVector() && + Subtarget.hasInt256()) { + SExtVT = MVT::v8i32; + FPCastVT = MVT::v8f32; + } + break; + case MVT::v16i1: + SExtVT = MVT::v16i8; + // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)), + // it is not profitable to sign-extend to 256-bit because this will + // require an extra cross-lane shuffle which is more exprensive than + // truncating the result of the compare to 128-bits. + break; + case MVT::v32i1: + // TODO: Handle pre-AVX2 cases by splitting to two v16i1's. + if (!Subtarget.hasInt256()) + return SDValue(); + SExtVT = MVT::v32i8; + break; + }; + + SDLoc DL(BitCast); + SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT); + if (SExtVT == MVT::v8i16) { + V = DAG.getBitcast(MVT::v16i8, V); + V = DAG.getVectorShuffle( + MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8), + {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1}); + } else + assert(SExtVT.getScalarType() != MVT::i16 && + "Vectors of i16 must be shuffled"); + if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE) + V = DAG.getBitcast(FPCastVT, V); + V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); + return DAG.getZExtOrTrunc(V, DL, VT); +} + static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT SrcVT = N0.getValueType(); + // Try to match patterns such as + // (i16 bitcast (v16i1 x)) + // -> + // (i16 movmsk (16i8 sext (v16i1 x))) + // before the setcc result is scalarized on subtargets that don't have legal + // vxi1 types. + if (DCI.isBeforeLegalize()) + if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget)) + return V; // Since MMX types are special and don't usually play with other vector types, // it's better to handle them early to be sure we emit efficient code by // avoiding store-load conversions. @@ -29350,7 +29498,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, // Resolve the target shuffle inputs and mask. SmallVector Mask; SmallVector Ops; - if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask)) + if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask, DAG)) return SDValue(); // Attempt to narrow/widen the shuffle mask to the correct size. @@ -29607,10 +29755,10 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1) { // Invert the cond to not(cond) : xor(op,allones)=not(op) - SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, + SDValue CondNew = DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getAllOnesConstant(DL, CondVT)); // Vselect cond, op1, op2 = Vselect not(cond), op2, op1 - return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS); + return DAG.getSelect(DL, VT, CondNew, RHS, LHS); } // To use the condition operand as a bitwise mask, it must have elements that @@ -29944,6 +30092,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: @@ -29974,6 +30123,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: @@ -30008,6 +30158,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a min would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOGT: case ISD::SETGT: case ISD::SETGE: @@ -30036,6 +30187,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Converting this to a max would handle both negative zeros and NaNs // incorrectly, but we can swap the operands to fix both. std::swap(LHS, RHS); + LLVM_FALLTHROUGH; case ISD::SETOLT: case ISD::SETLT: case ISD::SETLE: @@ -30093,7 +30245,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE; Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0), Cond.getOperand(1), NewCC); - return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS); + return DAG.getSelect(DL, VT, Cond, LHS, RHS); } } } @@ -30152,7 +30304,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // x s< 0 ? x^C : 0 --> subus x, C if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && ISD::isBuildVectorAllZeros(CondRHS.getNode()) && - OpRHSConst->getAPIntValue().isSignBit()) + OpRHSConst->getAPIntValue().isSignMask()) // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. return DAG.getNode( @@ -30203,13 +30355,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); - APInt DemandedMask(APInt::getSignBit(BitWidth)); - APInt KnownZero, KnownOne; + APInt DemandedMask(APInt::getSignMask(BitWidth)); + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); - if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || - TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, - TLO)) { + if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) || + TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) { // If we changed the computation somewhere in the DAG, this change will // affect all users of Cond. Make sure it is fine and update all the nodes // so that we do not use the generic VSELECT anymore. Otherwise, we may @@ -30934,6 +31085,77 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, } } +static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, + EVT VT, SDLoc DL) { + + auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(Mult, DL, VT)); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, + DAG.getConstant(Shift, DL, MVT::i8)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + auto combineMulMulAddOrSub = [&](bool isAdd) { + SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), + DAG.getConstant(9, DL, VT)); + Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT)); + Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, + N->getOperand(0)); + return Result; + }; + + switch (MulAmt) { + default: + break; + case 11: + // mul x, 11 => add ((shl (mul x, 5), 1), x) + return combineMulShlAddOrSub(5, 1, /*isAdd*/ true); + case 21: + // mul x, 21 => add ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ true); + case 22: + // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(5, 2, /*isAdd*/ true)); + case 19: + // mul x, 19 => sub ((shl (mul x, 5), 2), x) + return combineMulShlAddOrSub(5, 2, /*isAdd*/ false); + case 13: + // mul x, 13 => add ((shl (mul x, 3), 2), x) + return combineMulShlAddOrSub(3, 2, /*isAdd*/ true); + case 23: + // mul x, 13 => sub ((shl (mul x, 3), 3), x) + return combineMulShlAddOrSub(3, 3, /*isAdd*/ false); + case 14: + // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulShlAddOrSub(3, 2, /*isAdd*/ true)); + case 26: + // mul x, 26 => sub ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ false); + case 28: + // mul x, 28 => add ((mul (mul x, 9), 3), x) + return combineMulMulAddOrSub(/*isAdd*/ true); + case 29: + // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x) + return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), + combineMulMulAddOrSub(/*isAdd*/ true)); + case 30: + // mul x, 30 => sub (sub ((shl x, 5), x), x) + return DAG.getNode( + ISD::SUB, DL, VT, + DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant(5, DL, MVT::i8)), + N->getOperand(0)), + N->getOperand(0)); + } + return SDValue(); +} + /// Optimize a single multiply with constant into two operations in order to /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA. static SDValue combineMul(SDNode *N, SelectionDAG &DAG, @@ -30943,6 +31165,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalize() && VT.isVector()) return reduceVMULWidth(N, DAG, Subtarget); + if (!MulConstantOptimization) + return SDValue(); // An imul is usually smaller than the alternative sequence. if (DAG.getMachineFunction().getFunction()->optForMinSize()) return SDValue(); @@ -30998,7 +31222,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG, else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, DL, VT)); - } + } else if (!Subtarget.slowLEA()) + NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL); if (!NewMul) { assert(MulAmt != 0 && @@ -31054,8 +31279,7 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue N00 = N0.getOperand(0); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); - const APInt &ShAmt = N1C->getAPIntValue(); - Mask = Mask.shl(ShAmt); + Mask <<= N1C->getAPIntValue(); bool MaskOK = false; // We can handle cases concerning bit-widening nodes containing setcc_c if // we carefully interrogate the mask to make sure we are semantics @@ -31265,11 +31489,11 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, unsigned ShiftImm = ShiftVal.getZExtValue(); for (APInt &Elt : EltBits) { if (X86ISD::VSHLI == Opcode) - Elt = Elt.shl(ShiftImm); + Elt <<= ShiftImm; else if (X86ISD::VSRAI == Opcode) - Elt = Elt.ashr(ShiftImm); + Elt.ashrInPlace(ShiftImm); else - Elt = Elt.lshr(ShiftImm); + Elt.lshrInPlace(ShiftImm); } return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); } @@ -31356,13 +31580,11 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; if (Subtarget.hasAVX512()) { - SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CMP00, - CMP01, - DAG.getConstant(x86cc, DL, MVT::i8)); - if (N->getValueType(0) != MVT::i1) - return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), - FSetCC); - return FSetCC; + SDValue FSetCC = + DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, + DAG.getConstant(x86cc, DL, MVT::i8)); + return DAG.getNode(X86ISD::VEXTRACT, DL, N->getSimpleValueType(0), + FSetCC, DAG.getIntPtrConstant(0, DL)); } SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01, @@ -31641,20 +31863,22 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, // (sub (xor X, M), M) static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - assert(N->getOpcode() == ISD::OR); + assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - if (!((VT == MVT::v2i64) || (VT == MVT::v4i64 && Subtarget.hasInt256()))) + if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || + (VT.is256BitVector() && Subtarget.hasInt256()))) return SDValue(); - assert(Subtarget.hasSSE2() && "Unexpected i64 vector without SSE2!"); - // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::ANDNP) + // Canonicalize AND to LHS. + if (N1.getOpcode() == ISD::AND) std::swap(N0, N1); + // TODO: Attempt to match against AND(XOR(-1,X),Y) as well, waiting for + // ANDNP combine allows other combines to happen that prevent matching. if (N0.getOpcode() != ISD::AND || N1.getOpcode() != X86ISD::ANDNP) return SDValue(); @@ -31676,21 +31900,10 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, Y = peekThroughBitcasts(Y); EVT MaskVT = Mask.getValueType(); - - // Validate that the Mask operand is a vector sra node. - // FIXME: what to do for bytes, since there is a psignb/pblendvb, but - // there is no psrai.b unsigned EltBits = MaskVT.getScalarSizeInBits(); - unsigned SraAmt = ~0; - if (Mask.getOpcode() == ISD::SRA) { - if (auto *AmtBV = dyn_cast(Mask.getOperand(1))) - if (auto *AmtConst = AmtBV->getConstantSplatNode()) - SraAmt = AmtConst->getZExtValue(); - } else if (Mask.getOpcode() == X86ISD::VSRAI) { - SDValue SraC = Mask.getOperand(1); - SraAmt = cast(SraC)->getZExtValue(); - } - if ((SraAmt + 1) != EltBits) + + // TODO: Attempt to handle floating point cases as well? + if (!MaskVT.isInteger() || DAG.ComputeNumSignBits(Mask) != EltBits) return SDValue(); SDLoc DL(N); @@ -31711,7 +31924,8 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, // (add (xor X, M), (and M, 1)) // And further to: // (sub (xor X, M), M) - if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { + if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT && + DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) { auto IsNegV = [](SDNode *N, SDValue V) { return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); @@ -31723,7 +31937,6 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, V = Y; if (V) { - assert(EltBits == 8 || EltBits == 16 || EltBits == 32); SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask); SDValue SubOp2 = Mask; @@ -31740,8 +31953,8 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, if (V == Y) std::swap(SubOp1, SubOp2); - return DAG.getBitcast(VT, - DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2)); + SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2); + return DAG.getBitcast(VT, Res); } } @@ -31754,7 +31967,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, X = DAG.getBitcast(BlendVT, X); Y = DAG.getBitcast(BlendVT, Y); Mask = DAG.getBitcast(BlendVT, Mask); - Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X); + Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X); return DAG.getBitcast(VT, Mask); } @@ -32234,8 +32447,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, BuildVectorSDNode *BV = dyn_cast(V); if (!BV || !BV->isConstant()) return false; - for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) { - ConstantSDNode *C = dyn_cast(V.getOperand(i)); + for (SDValue Op : V->ops()) { + ConstantSDNode *C = dyn_cast(Op); if (!C) return false; uint64_t Val = C->getZExtValue(); @@ -32310,15 +32523,17 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // For chips with slow 32-byte unaligned loads, break the 32-byte operation - // into two 16-byte operations. + // into two 16-byte operations. Also split non-temporal aligned loads on + // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. ISD::LoadExtType Ext = Ld->getExtensionType(); bool Fast; unsigned AddressSpace = Ld->getAddressSpace(); unsigned Alignment = Ld->getAlignment(); if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && - TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, - AddressSpace, Alignment, &Fast) && !Fast) { + ((Ld->isNonTemporal() && !Subtarget.hasInt256() && Alignment >= 16) || + (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, + AddressSpace, Alignment, &Fast) && !Fast))) { unsigned NumElems = RegVT.getVectorNumElements(); if (NumElems < 2) return SDValue(); @@ -33428,8 +33643,8 @@ static SDValue isFNEG(SDNode *N) { SDValue Op0 = peekThroughBitcasts(Op.getOperand(0)); unsigned EltBits = Op1.getScalarValueSizeInBits(); - auto isSignBitValue = [&](const ConstantFP *C) { - return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits); + auto isSignMask = [&](const ConstantFP *C) { + return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits); }; // There is more than one way to represent the same constant on @@ -33440,21 +33655,21 @@ static SDValue isFNEG(SDNode *N) { // We check all variants here. if (Op1.getOpcode() == X86ISD::VBROADCAST) { if (auto *C = getTargetConstantFromNode(Op1.getOperand(0))) - if (isSignBitValue(cast(C))) + if (isSignMask(cast(C))) return Op0; } else if (BuildVectorSDNode *BV = dyn_cast(Op1)) { if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode()) - if (isSignBitValue(CN->getConstantFPValue())) + if (isSignMask(CN->getConstantFPValue())) return Op0; } else if (auto *C = getTargetConstantFromNode(Op1)) { if (C->getType()->isVectorTy()) { if (auto *SplatV = C->getSplatValue()) - if (isSignBitValue(cast(SplatV))) + if (isSignMask(cast(SplatV))) return Op0; } else if (auto *FPConst = dyn_cast(C)) - if (isSignBitValue(FPConst)) + if (isSignMask(FPConst)) return Op0; } return SDValue(); @@ -33479,7 +33694,7 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, // use of a constant by performing (-0 - A*B) instead. // FIXME: Check rounding control flags as well once it becomes available. if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && - Arg->getFlags()->hasNoSignedZeros() && Subtarget.hasAnyFMA()) { + Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) { SDValue Zero = DAG.getConstantFP(0.0, DL, VT); SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), Arg.getOperand(1), Zero); @@ -33734,8 +33949,7 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands // are NaN, the NaN value of Op1 is the result. - auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; - return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax); + return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); } /// Do target-specific dag combines on X86ISD::ANDNP nodes. @@ -33773,12 +33987,12 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG, if (Op1.hasOneUse()) { unsigned BitWidth = Op1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || - TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) + if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) || + TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } return SDValue(); @@ -33840,8 +34054,8 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, return SDValue(); bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND; - bool NSW = Add->getFlags()->hasNoSignedWrap(); - bool NUW = Add->getFlags()->hasNoUnsignedWrap(); + bool NSW = Add->getFlags().hasNoSignedWrap(); + bool NUW = Add->getFlags().hasNoUnsignedWrap(); // We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding // into the 'zext' @@ -33881,7 +34095,7 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, SDNodeFlags Flags; Flags.setNoSignedWrap(NSW); Flags.setNoUnsignedWrap(NUW); - return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, &Flags); + return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); } /// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) -> @@ -34028,7 +34242,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (InVT == MVT::i1) { SDValue Zero = DAG.getConstant(0, DL, VT); SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); - return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero); + return DAG.getSelect(DL, VT, N0, AllOnes, Zero); } return SDValue(); } @@ -34484,6 +34698,32 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Optimize RES, EFLAGS = X86ISD::ADD LHS, RHS +static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG, + X86TargetLowering::DAGCombinerInfo &DCI) { + // When legalizing carry, we create carries via add X, -1 + // If that comes from an actual carry, via setcc, we use the + // carry directly. + if (isAllOnesConstant(N->getOperand(1)) && N->hasAnyUseOfValue(1)) { + SDValue Carry = N->getOperand(0); + while (Carry.getOpcode() == ISD::TRUNCATE || + Carry.getOpcode() == ISD::ZERO_EXTEND || + Carry.getOpcode() == ISD::SIGN_EXTEND || + Carry.getOpcode() == ISD::ANY_EXTEND || + (Carry.getOpcode() == ISD::AND && + isOneConstant(Carry.getOperand(1)))) + Carry = Carry.getOperand(0); + + if (Carry.getOpcode() == X86ISD::SETCC || + Carry.getOpcode() == X86ISD::SETCC_CARRY) { + if (Carry.getConstantOperandVal(0) == X86::COND_B) + return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1)); + } + } + + return SDValue(); +} + // Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS static SDValue combineADC(SDNode *N, SelectionDAG &DAG, X86TargetLowering::DAGCombinerInfo &DCI) { @@ -34631,7 +34871,7 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG, return SDValue(); ShrinkMode Mode; - if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode)) + if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16) return SDValue(); EVT VT = N->getValueType(0); @@ -34738,8 +34978,8 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - const SDNodeFlags *Flags = &cast(N)->Flags; - if (Flags->hasVectorReduction()) { + const SDNodeFlags Flags = N->getFlags(); + if (Flags.hasVectorReduction()) { if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget)) return Sad; if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget)) @@ -35001,7 +35241,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), OpVT, AS, Alignment, &Fast) && Fast) { SDValue Ops[] = {SubVec2, SubVec}; - if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) + if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, + Subtarget, false)) return Ld; } } @@ -35041,10 +35282,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VSELECT: case ISD::SELECT: case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget); - case ISD::BITCAST: return combineBitcast(N, DAG, Subtarget); + case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); case ISD::ADD: return combineAdd(N, DAG, Subtarget); case ISD::SUB: return combineSub(N, DAG, Subtarget); + case X86ISD::ADD: return combineX86ADD(N, DAG, DCI); case X86ISD::ADC: return combineADC(N, DAG, DCI); case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); case ISD::SHL: @@ -35169,14 +35411,21 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { /// know that the code that lowers COPY of EFLAGS has to use the stack, and if /// we don't adjust the stack we clobber the first frame index. /// See X86InstrInfo::copyPhysReg. -bool X86TargetLowering::hasCopyImplyingStackAdjustment( - MachineFunction *MF) const { - const MachineRegisterInfo &MRI = MF->getRegInfo(); - +static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); return any_of(MRI.reg_instructions(X86::EFLAGS), [](const MachineInstr &RI) { return RI.isCopy(); }); } +void X86TargetLowering::finalizeLowering(MachineFunction &MF) const { + if (hasCopyImplyingStackAdjustment(MF)) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setHasCopyImplyingStackAdjustment(true); + } + + TargetLoweringBase::finalizeLowering(MF); +} + /// This method query the target whether it is beneficial for dag combiner to /// promote the specified node. If true, it should return the desired promotion /// type by reference. @@ -35417,6 +35666,7 @@ TargetLowering::ConstraintWeight switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + LLVM_FALLTHROUGH; case 'R': case 'q': case 'Q': @@ -35768,6 +36018,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &X86::GR64RegClass); break; } + LLVM_FALLTHROUGH; // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32 || VT == MVT::f32) @@ -35917,10 +36168,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return Res; } - // 'A' means EAX + EDX. + // 'A' means [ER]AX + [ER]DX. if (Constraint == "A") { - Res.first = X86::EAX; - Res.second = &X86::GR32_ADRegClass; + if (Subtarget.is64Bit()) { + Res.first = X86::RAX; + Res.second = &X86::GR64_ADRegClass; + } else { + assert((Subtarget.is32Bit() || Subtarget.is16Bit()) && + "Expecting 64, 32 or 16 bit subtarget"); + Res.first = X86::EAX; + Res.second = &X86::GR32_ADRegClass; + } return Res; } return Res; @@ -35930,7 +36188,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to // turn into {ax},{dx}. // MVT::Other is used to specify clobber names. - if (Res.second->hasType(VT) || VT == MVT::Other) + if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) return Res; // Correct type already, nothing to do. // Get a matching integer of the correct size. i.e. "ax" with MVT::32 should @@ -35968,11 +36226,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64RegClass; - else if (X86::VR128RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT)) Res.second = &X86::VR128RegClass; - else if (X86::VR256RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT)) Res.second = &X86::VR256RegClass; - else if (X86::VR512RegClass.hasType(VT)) + else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) Res.second = &X86::VR512RegClass; else { // Type mismatch and not a clobber: Return an error; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ab4910daca02b9dc5d7fe6917513e7e4b4792a3b..f51b6641db2fb77d2f77fd4723de6e5198b6dab2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -559,6 +559,9 @@ namespace llvm { // Conversions between float and half-float. CVTPS2PH, CVTPH2PS, + // LWP insert record. + LWPINS, + // Compare and swap. LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, @@ -773,10 +776,6 @@ namespace llvm { /// and some i16 instructions are slow. bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; - /// Return true if the MachineFunction contains a COPY which would imply - /// HasOpaqueSPAdjustment. - bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const override; - MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -828,8 +827,7 @@ namespace llvm { /// Determine which of the bits specified in Mask are known to be either /// zero or one and return them in the KnownZero/KnownOne bitsets. void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; @@ -1066,6 +1064,9 @@ namespace llvm { ArrayRef Shuffles, ArrayRef Indices, unsigned Factor) const override; + + void finalizeLowering(MachineFunction &MF) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, @@ -1162,7 +1163,7 @@ namespace llvm { SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; @@ -1207,7 +1208,7 @@ namespace llvm { bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; - bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 78c44050c6102eb56338668e22d2f12807596135..2620679df2517289cd0aff8ad44c5c092927f9f4 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -31,8 +31,7 @@ class X86VectorVTInfo("VK" # NumElts # "WM"); // The mask VT. - ValueType KVT = !cast(!if (!eq (NumElts, 1), "i1", - "v" # NumElts # "i1")); + ValueType KVT = !cast("v" # NumElts # "i1"); // Suffix used in the instruction mnemonic. string Suffix = suffix; @@ -1546,6 +1545,7 @@ multiclass avx512_cmp_scalar (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc)>, EVEX_4V; + let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc), @@ -1570,6 +1570,7 @@ multiclass avx512_cmp_scalar (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V; + let mayLoad = 1 in defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), @@ -1630,6 +1631,7 @@ multiclass avx512_icmp_packed opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2)))))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = IsCommutable in def rrk : AVX512BI opc, string Suffix, SDNode OpNode, (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + let isCommutable = 1 in def rrik : AVX512AIi8; - def : Pat<(i1 (load addr:$src)), + def : Pat<(v1i1 (load addr:$src)), (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>; def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))), (KMOVWkm addr:$src)>; @@ -2278,77 +2281,45 @@ let Predicates = [HasBWI] in { } let Predicates = [HasAVX512] in { - def : Pat<(i1 (trunc (i64 GR64:$src))), - (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit), - (i32 1)), VK1)>; - - def : Pat<(i1 (trunc (i32 GR32:$src))), - (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>; - - def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))), - (COPY_TO_REGCLASS GR32:$src, VK1)>; - - def : Pat<(i1 (trunc (i8 GR8:$src))), - (COPY_TO_REGCLASS - (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR8:$src, sub_8bit), (i32 1)), VK1)>; - - def : Pat<(i1 (trunc (i16 GR16:$src))), - (COPY_TO_REGCLASS - (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - GR16:$src, sub_16bit), (i32 1)), VK1)>; + multiclass operation_gpr_mask_copy_lowering { + def : Pat<(maskVT (scalar_to_vector GR32:$src)), + (COPY_TO_REGCLASS GR32:$src, maskRC)>; - def : Pat<(i32 (zext VK1:$src)), - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>; + def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))), + (COPY_TO_REGCLASS maskRC:$src, GR32)>; - def : Pat<(i32 (anyext VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, GR32)>; + def : Pat<(maskVT (scalar_to_vector GR8:$src)), + (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; - def : Pat<(i8 (zext VK1:$src)), - (EXTRACT_SUBREG - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>; + def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; - def : Pat<(i8 (anyext VK1:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>; + def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))), + (COPY_TO_REGCLASS maskRC:$src, GR32)>; + } - def : Pat<(i64 (zext VK1:$src)), - (SUBREG_TO_REG (i64 0), - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; + defm : operation_gpr_mask_copy_lowering; - def : Pat<(i64 (anyext VK1:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>; + def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) , + (COPY_TO_REGCLASS + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1))), VK1)>; + def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) , + (COPY_TO_REGCLASS + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1))), VK16)>; + def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) , + (COPY_TO_REGCLASS + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1))), VK8)>; - def : Pat<(i16 (zext VK1:$src)), - (EXTRACT_SUBREG - (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>; - - def : Pat<(i16 (anyext VK1:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>; -} -def : Pat<(v16i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK16)>; -def : Pat<(v8i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK8)>; -def : Pat<(v4i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK4)>; -def : Pat<(v2i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK2)>; -def : Pat<(v32i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK32)>; -def : Pat<(v64i1 (scalar_to_vector VK1:$src)), - (COPY_TO_REGCLASS VK1:$src, VK64)>; - -def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; -def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; - -def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))), (COPY_TO_REGCLASS VK64:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))), (COPY_TO_REGCLASS VK32:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), (COPY_TO_REGCLASS VK16:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), (COPY_TO_REGCLASS VK8:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK4:$src, (iPTR 0))), (COPY_TO_REGCLASS VK4:$src, VK1)>; -def : Pat<(i1 (X86Vextract VK2:$src, (iPTR 0))), (COPY_TO_REGCLASS VK2:$src, VK1)>; +} // Mask unary operation // - KNOT @@ -2549,14 +2520,11 @@ let Predicates = [HasAVX512] in { def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>; def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>; def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>; + def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>; def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>; def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>; def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>; - let AddedComplexity = 10 in { // To optimize isel table. - def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>; - def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; - def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>; - } + def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>; } // Patterns for kmask insert_subvector/extract_subvector to/from index=0 @@ -2568,6 +2536,12 @@ multiclass operation_subvector_mask_lowering; } +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; +defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; defm : operation_subvector_mask_lowering; @@ -2721,22 +2695,22 @@ multiclass avx512_load_vl opc, string OpcodeStr, } multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, - PatFrag st_frag, PatFrag mstore> { + PatFrag st_frag, PatFrag mstore, string Name> { let hasSideEffects = 0 in { def rr_REV : AVX512PI, EVEX; + [], _.ExeDomain>, EVEX, FoldGenData; def rrk_REV : AVX512PI, EVEX, EVEX_K; + [], _.ExeDomain>, EVEX, EVEX_K, FoldGenData; def rrkz_REV : AVX512PI, EVEX, EVEX_KZ; + [], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData; } def mr : AVX512PI opc, string OpcodeStr, X86VectorVTInfo _, multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_unaligned, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_unaligned, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_unaligned, Name#Z128>, EVEX_V128; } } multiclass avx512_alignedstore_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_aligned512, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_aligned256, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_aligned128, Name#Z128>, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, - HasAVX512>, PS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVAPS">, + PS, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVAPD">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>, + avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, + "VMOVUPS">, PS, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, + avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, + "VMOVUPD">, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512>, PD, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQA32">, + PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQA64">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, - HasBWI>, XD, EVEX_CD8<8, CD8VF>; + avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, + HasBWI, "VMOVDQU8">, + XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, - HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>; + HasBWI, "VMOVDQU16">, + XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, - HasAVX512>, XS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQU32">, + XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, - HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQU64">, + XS, VEX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need // to load or store from a ZMM register instead. These are converted in @@ -3247,7 +3233,7 @@ multiclass avx512_move_scalar_lowering(InstrStr#rrk) @@ -3258,7 +3244,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0, def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector - (_.EltVT (X86selects (i1 (trunc GR32:$mask)), + (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))), (_.EltVT _.FRC:$src1), (_.EltVT ZeroFP))))))), (COPY_TO_REGCLASS (!cast(InstrStr#rrkz) @@ -3277,7 +3263,7 @@ def : Pat<(masked_store addr:$dst, Mask, (iPTR 0))), (iPTR 0)))), (!cast(InstrStr#mrk) addr:$dst, - (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)), + (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; } @@ -3294,7 +3280,7 @@ def : Pat<(masked_store addr:$dst, Mask, (iPTR 0))), (iPTR 0)))), (!cast(InstrStr#mrk) addr:$dst, - (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>; } @@ -3308,7 +3294,7 @@ def : Pat<(_.info128.VT (extract_subvector (v16i32 immAllZerosV))))), (iPTR 0))), (!cast(InstrStr#rmkz) - (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)), + (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (extract_subvector @@ -3320,7 +3306,7 @@ def : Pat<(_.info128.VT (extract_subvector (iPTR 0))))), (iPTR 0))), (!cast(InstrStr#rmk) _.info128.RC:$src, - (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)), + (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), addr:$srcAddr)>; } @@ -3336,7 +3322,7 @@ def : Pat<(_.info128.VT (extract_subvector (v16i32 immAllZerosV))))), (iPTR 0))), (!cast(InstrStr#rmkz) - (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; def : Pat<(_.info128.VT (extract_subvector @@ -3348,7 +3334,7 @@ def : Pat<(_.info128.VT (extract_subvector (iPTR 0))))), (iPTR 0))), (!cast(InstrStr#rmk) _.info128.RC:$src, - (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; } @@ -3379,20 +3365,55 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), - (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)), + (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; -let hasSideEffects = 0 in -defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2), - "vmovss.s", "$src2, $src1", "$src1, $src2", []>, - XS, EVEX_4V, VEX_LIG; - -let hasSideEffects = 0 in -defm VMOVSDZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2), - "vmovsd.s", "$src2, $src1", "$src1, $src2", []>, - XD, EVEX_4V, VEX_LIG, VEX_W; +let hasSideEffects = 0 in { + def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrr">; + +let Constraints = "$src0 = $dst" in + def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, + VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrk">; + + def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrkz">; + + def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, + FoldGenData<"VMOVSDZrr">; + +let Constraints = "$src0 = $dst" in + def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, + VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrk">; + + def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.KRCWM:$mask, VR128X:$src1, + FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrkz">; +} let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { @@ -3633,23 +3654,20 @@ let Predicates = [HasAVX512] in { let SchedRW = [WriteLoad] in { def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst), (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", - [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))], - SSEPackedInt>, EVEX, T8PD, EVEX_V512, + [], SSEPackedInt>, EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>; let Predicates = [HasVLX] in { def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", - [(set VR256X:$dst, (int_x86_avx2_movntdqa addr:$src))], - SSEPackedInt>, EVEX, T8PD, EVEX_V256, + [], SSEPackedInt>, EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>; def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", - [(set VR128X:$dst, (int_x86_sse41_movntdqa addr:$src))], - SSEPackedInt>, EVEX, T8PD, EVEX_V128, + [], SSEPackedInt>, EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>; } } @@ -8632,6 +8650,20 @@ multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>; +// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. +let Predicates = [HasAVX512, NoVLX] in { + def : Pat<(v4i64 (abs VR256X:$src)), + (EXTRACT_SUBREG + (VPABSQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), + sub_ymm)>; + def : Pat<(v2i64 (abs VR128X:$src)), + (EXTRACT_SUBREG + (VPABSQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), + sub_xmm)>; +} + multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ defm NAME : avx512_unary_rm_vl_dq; @@ -8640,6 +8672,66 @@ multiclass avx512_ctlz opc, string OpcodeStr, Predicate prd>{ defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>; defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; +// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. +let Predicates = [HasCDI, NoVLX] in { + def : Pat<(v4i64 (ctlz VR256X:$src)), + (EXTRACT_SUBREG + (VPLZCNTQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), + sub_ymm)>; + def : Pat<(v2i64 (ctlz VR128X:$src)), + (EXTRACT_SUBREG + (VPLZCNTQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), + sub_xmm)>; + + def : Pat<(v8i32 (ctlz VR256X:$src)), + (EXTRACT_SUBREG + (VPLZCNTDZrr + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), + sub_ymm)>; + def : Pat<(v4i32 (ctlz VR128X:$src)), + (EXTRACT_SUBREG + (VPLZCNTDZrr + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), + sub_xmm)>; +} + +//===---------------------------------------------------------------------===// +// Counts number of ones - VPOPCNTD and VPOPCNTQ +//===---------------------------------------------------------------------===// + +multiclass avx512_unary_rmb_popcnt opc, string OpcodeStr, X86VectorVTInfo VTInfo> { + let Predicates = [HasVPOPCNTDQ] in + defm Z : avx512_unary_rmb, EVEX_V512; +} + +// Use 512bit version to implement 128/256 bit. +multiclass avx512_unary_lowering { + let Predicates = [prd] in { + def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + (EXTRACT_SUBREG + (!cast(NAME # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info256.RC:$src1, + _.info256.SubRegIdx)), + _.info256.SubRegIdx)>; + + def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + (EXTRACT_SUBREG + (!cast(NAME # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info128.RC:$src1, + _.info128.SubRegIdx)), + _.info128.SubRegIdx)>; + } +} + +defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>, + avx512_unary_lowering; +defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>, + avx512_unary_lowering, VEX_W; + //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// @@ -8787,7 +8879,7 @@ multiclass avx512_extract_elt_w { def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX, TAPD; + EVEX, TAPD, FoldGenData; defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index bfd21c062aa26ec51ebfe28b0c88654dd387cbe9..e38bbc9b3d368f7ca672bfc72205f0011f08dd08 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -964,10 +964,10 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_Rev; - def NAME#16rr_REV : BinOpRR_Rev; - def NAME#32rr_REV : BinOpRR_Rev; - def NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_Rev, FoldGenData; def NAME#8rm : BinOpRM_RF; def NAME#16rm : BinOpRM_RF; @@ -989,10 +989,12 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // Constraints = "$src1 = $dst" - def NAME#8mr : BinOpMR_RMW; - def NAME#16mr : BinOpMR_RMW; - def NAME#32mr : BinOpMR_RMW; - def NAME#64mr : BinOpMR_RMW; + let mayLoad = 1, mayStore = 1 in { + def NAME#8mr : BinOpMR_RMW; + def NAME#16mr : BinOpMR_RMW; + def NAME#32mr : BinOpMR_RMW; + def NAME#64mr : BinOpMR_RMW; + } // NOTE: These are order specific, we want the mi8 forms to be listed // first so that they are slightly preferred to the mi forms. @@ -1047,10 +1049,10 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_RFF_Rev; - def NAME#16rr_REV : BinOpRR_RFF_Rev; - def NAME#32rr_REV : BinOpRR_RFF_Rev; - def NAME#64rr_REV : BinOpRR_RFF_Rev; + def NAME#8rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_RFF_Rev, FoldGenData; def NAME#8rm : BinOpRM_RFF; def NAME#16rm : BinOpRM_RFF; @@ -1127,10 +1129,10 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev; - def NAME#16rr_REV : BinOpRR_F_Rev; - def NAME#32rr_REV : BinOpRR_F_Rev; - def NAME#64rr_REV : BinOpRR_F_Rev; + def NAME#8rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_F_Rev, FoldGenData; def NAME#8rm : BinOpRM_F; def NAME#16rm : BinOpRM_F; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index e592c2b3c0aa581c1291504d8918f33f5e746ac7..d003d027ddb9f5abc36058bc379f18e472808240 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -43,7 +43,8 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP] in // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. let Defs = [ESP, EFLAGS], Uses = [ESP] in { -def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), +def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), + (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), "#ADJCALLSTACKDOWN", []>, Requires<[NotLP64]>; @@ -52,8 +53,8 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[NotLP64]>; } -def : Pat<(X86callseq_start timm:$amt1), - (ADJCALLSTACKDOWN32 i32imm:$amt1, 0)>, Requires<[NotLP64]>; +def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), + (ADJCALLSTACKDOWN32 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[NotLP64]>; // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into @@ -62,7 +63,8 @@ def : Pat<(X86callseq_start timm:$amt1), // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber EFLAGS. let Defs = [RSP, EFLAGS], Uses = [RSP] in { -def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), +def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), + (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), "#ADJCALLSTACKDOWN", []>, Requires<[IsLP64]>; @@ -71,8 +73,8 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), [(X86callseq_end timm:$amt1, timm:$amt2)]>, Requires<[IsLP64]>; } -def : Pat<(X86callseq_start timm:$amt1), - (ADJCALLSTACKDOWN64 i32imm:$amt1, 0)>, Requires<[IsLP64]>; +def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), + (ADJCALLSTACKDOWN64 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[IsLP64]>; // x86-64 va_start lowering magic. @@ -1271,11 +1273,11 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - APInt KnownZero0, KnownOne0; - CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); - APInt KnownZero1, KnownOne1; - CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); - return (~KnownZero0 & ~KnownZero1) == 0; + KnownBits Known0; + CurDAG->computeKnownBits(N->getOperand(0), Known0, 0); + KnownBits Known1; + CurDAG->computeKnownBits(N->getOperand(1), Known1, 0); + return (~Known0.Zero & ~Known1.Zero) == 0; }]>; diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 1941ae57f0f1f5980eff828ca29ff0183fe8b7ca..3a3cdc9fa5742f8091418a5326b9348bbd6873ae 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -297,7 +297,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, - VEX_LIG; + VEX_LIG, FoldGenData; } multiclass fma4s_int opc, string OpcodeStr, Operand memop, @@ -321,6 +321,12 @@ let isCodeGenOnly = 1 in { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; +let hasSideEffects = 0 in + def rr_Int_REV : FMA4, VEX_LIG, FoldGenData; } // isCodeGenOnly = 1 } @@ -372,12 +378,13 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4; + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, + FoldGenData; def Yrr_REV : FMA4, - VEX_L; + VEX_L, FoldGenData; } // isCodeGenOnly = 1 } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index c2fe786732dcdd5fd7f57e47600c5b513f9eb270..bfcbf71d252f363f09f553fc19e4218297fe7f0a 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -225,6 +225,12 @@ class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class XOP { Encoding OpEnc = EncXOP; } class XOP_4V : XOP { bit hasVEX_4V = 1; } +// Specify the alternative register form instruction to replace the current +// instruction in case it was picked during generation of memory folding tables +class FoldGenData { + string FoldGenRegForm = _RegisterForm; +} + class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, InstrItinClass itin, @@ -304,6 +310,10 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, CD8_EltSize, !srl(VectSize, CD8_Form{1-0}))), 0); + // Used in the memory folding generation (TableGen backend) to point to an alternative + // instruction to replace the current one in case it got picked during generation. + string FoldGenRegForm = ?; + // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; let TSFlags{8-7} = OpSizeBits; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 9867ba84bb9ba5d1cf117c25ef31f6e9942330e8..c28b35b22977a3f04b431312399e63f950d66be3 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -274,7 +274,7 @@ def X86select : SDNode<"X86ISD::SELECT", SDTCisSameNumEltsAs<0, 1>]>>; def X86selects : SDNode<"X86ISD::SELECTS", - SDTypeProfile<1, 3, [SDTCisVT<1, i1>, + SDTypeProfile<1, 3, [SDTCisVT<1, v1i1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>>; @@ -441,7 +441,7 @@ def X86Vfpclass : SDNode<"X86ISD::VFPCLASS", SDTCisSameNumEltsAs<0,1>, SDTCisVT<2, i32>]>, []>; def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS", - SDTypeProfile<1, 2, [SDTCisVT<0, i1>, + SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>, SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>; def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", @@ -451,7 +451,7 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2, - [SDTCisEltOfVec<0, 1>, SDTCisVec<1>, + [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; @@ -641,22 +641,37 @@ def sdmem : Operand { // SSE pattern fragments //===----------------------------------------------------------------------===// +// Vector load wrappers to prevent folding of non-temporal aligned loads on +// supporting targets. +def vec128load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasSSE41() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 16; +}]>; +def vec256load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX2() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 32; +}]>; +def vec512load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return !Subtarget->hasAVX512() || !cast(N)->isNonTemporal() || + cast(N)->getAlignment() < 64; +}]>; + // 128-bit load pattern fragments // NOTE: all 128-bit integer vector loads are promoted to v2i64 -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (vec128load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (vec128load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (vec128load node:$ptr))>; // 256-bit load pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 -def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; -def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; -def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (vec256load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (vec256load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (vec256load node:$ptr))>; // 512-bit load pattern fragments -def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; -def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; -def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; +def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (vec512load node:$ptr))>; +def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (vec512load node:$ptr))>; +def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (vec512load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; @@ -722,15 +737,15 @@ def alignedloadv8f64 : PatFrag<(ops node:$ptr), def alignedloadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (alignedload512 node:$ptr))>; -// Like 'load', but uses special alignment checks suitable for use in +// Like 'vec128load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to // be naturally aligned on some targets but not on others. If the subtarget // allows unaligned accesses, match any load, though this may require // setting a feature bit in the processor (on startup, for example). // Opteron 10h and later implement such a feature. -def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasSSEUnalignedMem() - || cast(N)->getAlignment() >= 16; +def memop : PatFrag<(ops node:$ptr), (vec128load node:$ptr), [{ + return Subtarget->hasSSEUnalignedMem() || + cast(N)->getAlignment() >= 16; }]>; // 128-bit memop pattern fragments diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 722fb12fadd53a042893ff38f65b438d1786f445..f3094b781c494f5ceba5f047552b8e66896a0c7c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -821,6 +821,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSHLQrr, X86::VPSHLQmr, 0 }, { X86::VPSHLWrr, X86::VPSHLWmr, 0 }, + // LWP foldable instructions + { X86::LWPINS32rri, X86::LWPINS32rmi, 0 }, + { X86::LWPINS64rri, X86::LWPINS64rmi, 0 }, + { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 }, + { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 }, + // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, @@ -892,10 +898,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, + { X86::VPCONFLICTDZrr, X86::VPCONFLICTDZrm, 0 }, + { X86::VPCONFLICTQZrr, X86::VPCONFLICTQZrm, 0 }, { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, { X86::VPERMQZri, X86::VPERMQZmi, 0 }, + { X86::VPLZCNTDZrr, X86::VPLZCNTDZrm, 0 }, + { X86::VPLZCNTQZrr, X86::VPLZCNTQZrm, 0 }, { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 }, { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE }, { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 }, @@ -908,6 +918,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 }, { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 }, { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 }, + { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 }, + { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 }, { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, @@ -940,10 +952,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, + { X86::VPCONFLICTDZ256rr, X86::VPCONFLICTDZ256rm, 0 }, + { X86::VPCONFLICTQZ256rr, X86::VPCONFLICTQZ256rm, 0 }, { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 }, { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 }, { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 }, + { X86::VPLZCNTDZ256rr, X86::VPLZCNTDZ256rm, 0 }, + { X86::VPLZCNTQZ256rr, X86::VPLZCNTQZ256rm, 0 }, { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 }, @@ -987,8 +1003,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, + { X86::VPCONFLICTDZ128rr, X86::VPCONFLICTDZ128rm, 0 }, + { X86::VPCONFLICTQZ128rr, X86::VPCONFLICTQZ128rm, 0 }, { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 }, + { X86::VPLZCNTDZ128rr, X86::VPLZCNTDZ128rm, 0 }, + { X86::VPLZCNTQZ128rr, X86::VPLZCNTQZ128rm, 0 }, { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE }, @@ -2304,10 +2324,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 }, { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 }, { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 }, + { X86::VPCONFLICTDZrrkz, X86::VPCONFLICTDZrmkz, 0 }, + { X86::VPCONFLICTQZrrkz, X86::VPCONFLICTQZrmkz, 0 }, { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 }, { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 }, { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 }, { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 }, + { X86::VPLZCNTDZrrkz, X86::VPLZCNTDZrmkz, 0 }, + { X86::VPLZCNTQZrrkz, X86::VPLZCNTQZrmkz, 0 }, { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 }, { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 }, @@ -2320,6 +2344,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 }, { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 }, { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 }, + { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 }, + { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 }, { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 }, { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 }, { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 }, @@ -2340,10 +2366,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 }, { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 }, { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 }, + { X86::VPCONFLICTDZ256rrkz, X86::VPCONFLICTDZ256rmkz, 0 }, + { X86::VPCONFLICTQZ256rrkz, X86::VPCONFLICTQZ256rmkz, 0 }, { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 }, { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 }, { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 }, { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 }, + { X86::VPLZCNTDZ256rrkz, X86::VPLZCNTDZ256rmkz, 0 }, + { X86::VPLZCNTQZ256rrkz, X86::VPLZCNTQZ256rmkz, 0 }, { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 }, @@ -2375,8 +2405,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 }, { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 }, { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 }, + { X86::VPCONFLICTDZ128rrkz, X86::VPCONFLICTDZ128rmkz, 0 }, + { X86::VPCONFLICTQZ128rrkz, X86::VPCONFLICTQZ128rmkz, 0 }, { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 }, { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 }, + { X86::VPLZCNTDZ128rrkz, X86::VPLZCNTDZ128rmkz, 0 }, + { X86::VPLZCNTQZ128rrkz, X86::VPLZCNTQZ128rmkz, 0 }, { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE }, @@ -2925,10 +2959,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 }, { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 }, { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 }, + { X86::VPCONFLICTDZrrk, X86::VPCONFLICTDZrmk, 0 }, + { X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 }, { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 }, { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 }, { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 }, { X86::VPERMQZrik, X86::VPERMQZmik, 0 }, + { X86::VPLZCNTDZrrk, X86::VPLZCNTDZrmk, 0 }, + { X86::VPLZCNTQZrrk, X86::VPLZCNTQZrmk, 0 }, { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 }, { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 }, @@ -2941,6 +2979,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 }, { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 }, { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 }, + { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 }, + { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 }, { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 }, { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 }, { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 }, @@ -2961,10 +3001,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 }, { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 }, { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 }, + { X86::VPCONFLICTDZ256rrk, X86::VPCONFLICTDZ256rmk, 0 }, + { X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 }, { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 }, { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 }, { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 }, { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 }, + { X86::VPLZCNTDZ256rrk, X86::VPLZCNTDZ256rmk, 0 }, + { X86::VPLZCNTQZ256rrk, X86::VPLZCNTQZ256rmk, 0 }, { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE }, { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 }, @@ -2996,8 +3040,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 }, { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 }, { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 }, + { X86::VPCONFLICTDZ128rrk, X86::VPCONFLICTDZ128rmk, 0 }, + { X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 }, { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 }, { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 }, + { X86::VPLZCNTDZ128rrk, X86::VPLZCNTDZ128rmk, 0 }, + { X86::VPLZCNTQZ128rrk, X86::VPLZCNTQZ128rmk, 0 }, { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE }, { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE }, { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE }, @@ -3022,6 +3070,64 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 }, { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 }, { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 }, + + // AVX-512 masked compare instructions + { X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 }, + { X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 }, + { X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 }, + { X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 }, + { X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 }, + { X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 }, + { X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE }, + { X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE }, + { X86::VPCMPBZ128rrik, X86::VPCMPBZ128rmik, 0 }, + { X86::VPCMPBZ256rrik, X86::VPCMPBZ256rmik, 0 }, + { X86::VPCMPBZrrik, X86::VPCMPBZrmik, 0 }, + { X86::VPCMPDZ128rrik, X86::VPCMPDZ128rmik, 0 }, + { X86::VPCMPDZ256rrik, X86::VPCMPDZ256rmik, 0 }, + { X86::VPCMPDZrrik, X86::VPCMPDZrmik, 0 }, + { X86::VPCMPEQBZ128rrk, X86::VPCMPEQBZ128rmk, 0 }, + { X86::VPCMPEQBZ256rrk, X86::VPCMPEQBZ256rmk, 0 }, + { X86::VPCMPEQBZrrk, X86::VPCMPEQBZrmk, 0 }, + { X86::VPCMPEQDZ128rrk, X86::VPCMPEQDZ128rmk, 0 }, + { X86::VPCMPEQDZ256rrk, X86::VPCMPEQDZ256rmk, 0 }, + { X86::VPCMPEQDZrrk, X86::VPCMPEQDZrmk, 0 }, + { X86::VPCMPEQQZ128rrk, X86::VPCMPEQQZ128rmk, 0 }, + { X86::VPCMPEQQZ256rrk, X86::VPCMPEQQZ256rmk, 0 }, + { X86::VPCMPEQQZrrk, X86::VPCMPEQQZrmk, 0 }, + { X86::VPCMPEQWZ128rrk, X86::VPCMPEQWZ128rmk, 0 }, + { X86::VPCMPEQWZ256rrk, X86::VPCMPEQWZ256rmk, 0 }, + { X86::VPCMPEQWZrrk, X86::VPCMPEQWZrmk, 0 }, + { X86::VPCMPGTBZ128rrk, X86::VPCMPGTBZ128rmk, 0 }, + { X86::VPCMPGTBZ256rrk, X86::VPCMPGTBZ256rmk, 0 }, + { X86::VPCMPGTBZrrk, X86::VPCMPGTBZrmk, 0 }, + { X86::VPCMPGTDZ128rrk, X86::VPCMPGTDZ128rmk, 0 }, + { X86::VPCMPGTDZ256rrk, X86::VPCMPGTDZ256rmk, 0 }, + { X86::VPCMPGTDZrrk, X86::VPCMPGTDZrmk, 0 }, + { X86::VPCMPGTQZ128rrk, X86::VPCMPGTQZ128rmk, 0 }, + { X86::VPCMPGTQZ256rrk, X86::VPCMPGTQZ256rmk, 0 }, + { X86::VPCMPGTQZrrk, X86::VPCMPGTQZrmk, 0 }, + { X86::VPCMPGTWZ128rrk, X86::VPCMPGTWZ128rmk, 0 }, + { X86::VPCMPGTWZ256rrk, X86::VPCMPGTWZ256rmk, 0 }, + { X86::VPCMPGTWZrrk, X86::VPCMPGTWZrmk, 0 }, + { X86::VPCMPQZ128rrik, X86::VPCMPQZ128rmik, 0 }, + { X86::VPCMPQZ256rrik, X86::VPCMPQZ256rmik, 0 }, + { X86::VPCMPQZrrik, X86::VPCMPQZrmik, 0 }, + { X86::VPCMPUBZ128rrik, X86::VPCMPUBZ128rmik, 0 }, + { X86::VPCMPUBZ256rrik, X86::VPCMPUBZ256rmik, 0 }, + { X86::VPCMPUBZrrik, X86::VPCMPUBZrmik, 0 }, + { X86::VPCMPUDZ128rrik, X86::VPCMPUDZ128rmik, 0 }, + { X86::VPCMPUDZ256rrik, X86::VPCMPUDZ256rmik, 0 }, + { X86::VPCMPUDZrrik, X86::VPCMPUDZrmik, 0 }, + { X86::VPCMPUQZ128rrik, X86::VPCMPUQZ128rmik, 0 }, + { X86::VPCMPUQZ256rrik, X86::VPCMPUQZ256rmik, 0 }, + { X86::VPCMPUQZrrik, X86::VPCMPUQZrmik, 0 }, + { X86::VPCMPUWZ128rrik, X86::VPCMPUWZ128rmik, 0 }, + { X86::VPCMPUWZ256rrik, X86::VPCMPUWZ256rmik, 0 }, + { X86::VPCMPUWZrrik, X86::VPCMPUWZrmik, 0 }, + { X86::VPCMPWZ128rrik, X86::VPCMPWZ128rmik, 0 }, + { X86::VPCMPWZ256rrik, X86::VPCMPWZ256rmik, 0 }, + { X86::VPCMPWZrrik, X86::VPCMPWZrmik, 0 }, }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { @@ -3579,6 +3685,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, // It's not always legal to reference the low 8-bit of the larger // register in 32-bit mode. return false; + LLVM_FALLTHROUGH; case X86::MOVSX32rr16: case X86::MOVZX32rr16: case X86::MOVSX64rr16: @@ -3616,18 +3723,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const { const MachineFunction *MF = MI.getParent()->getParent(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); - if (MI.getOpcode() == getCallFrameSetupOpcode() || - MI.getOpcode() == getCallFrameDestroyOpcode()) { + if (isFrameInstr(MI)) { unsigned StackAlign = TFI->getStackAlignment(); - int SPAdj = - (MI.getOperand(0).getImm() + StackAlign - 1) / StackAlign * StackAlign; - - SPAdj -= MI.getOperand(1).getImm(); - - if (MI.getOpcode() == getCallFrameSetupOpcode()) - return SPAdj; - else - return -SPAdj; + int SPAdj = alignTo(getFrameSize(MI), StackAlign); + SPAdj -= getFrameAdjustment(MI); + if (!isFrameSetup(MI)) + SPAdj = -SPAdj; + return SPAdj; } // To know whether a call adjusts the stack, we need information @@ -5128,20 +5230,32 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, return nullptr; } } - case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: - case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: - case X86::VPCMPBZrri: case X86::VPCMPUBZrri: - case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: - case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: - case X86::VPCMPDZrri: case X86::VPCMPUDZrri: - case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: - case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: - case X86::VPCMPQZrri: case X86::VPCMPUQZrri: - case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: - case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: - case X86::VPCMPWZrri: case X86::VPCMPUWZrri: { + case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri: + case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri: + case X86::VPCMPBZrri: case X86::VPCMPUBZrri: + case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri: + case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri: + case X86::VPCMPDZrri: case X86::VPCMPUDZrri: + case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri: + case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri: + case X86::VPCMPQZrri: case X86::VPCMPUQZrri: + case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri: + case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri: + case X86::VPCMPWZrri: case X86::VPCMPUWZrri: + case X86::VPCMPBZ128rrik: case X86::VPCMPUBZ128rrik: + case X86::VPCMPBZ256rrik: case X86::VPCMPUBZ256rrik: + case X86::VPCMPBZrrik: case X86::VPCMPUBZrrik: + case X86::VPCMPDZ128rrik: case X86::VPCMPUDZ128rrik: + case X86::VPCMPDZ256rrik: case X86::VPCMPUDZ256rrik: + case X86::VPCMPDZrrik: case X86::VPCMPUDZrrik: + case X86::VPCMPQZ128rrik: case X86::VPCMPUQZ128rrik: + case X86::VPCMPQZ256rrik: case X86::VPCMPUQZ256rrik: + case X86::VPCMPQZrrik: case X86::VPCMPUQZrrik: + case X86::VPCMPWZ128rrik: case X86::VPCMPUWZ128rrik: + case X86::VPCMPWZ256rrik: case X86::VPCMPUWZ256rrik: + case X86::VPCMPWZrrik: case X86::VPCMPUWZrrik: { // Flip comparison mode immediate (if necessary). - unsigned Imm = MI.getOperand(3).getImm() & 0x7; + unsigned Imm = MI.getOperand(MI.getNumOperands() - 1).getImm() & 0x7; switch (Imm) { default: llvm_unreachable("Unreachable!"); case 0x01: Imm = 0x06; break; // LT -> NLE @@ -5155,7 +5269,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, break; } auto &WorkingMI = cloneIfNew(MI); - WorkingMI.getOperand(3).setImm(Imm); + WorkingMI.getOperand(MI.getNumOperands() - 1).setImm(Imm); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -5728,6 +5842,44 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { } } +std::pair +X86::getX86ConditionCode(CmpInst::Predicate Predicate) { + X86::CondCode CC = X86::COND_INVALID; + bool NeedSwap = false; + switch (Predicate) { + default: break; + // Floating-point Predicates + case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; + case CmpInst::FCMP_OLT: NeedSwap = true; LLVM_FALLTHROUGH; + case CmpInst::FCMP_OGT: CC = X86::COND_A; break; + case CmpInst::FCMP_OLE: NeedSwap = true; LLVM_FALLTHROUGH; + case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; + case CmpInst::FCMP_UGT: NeedSwap = true; LLVM_FALLTHROUGH; + case CmpInst::FCMP_ULT: CC = X86::COND_B; break; + case CmpInst::FCMP_UGE: NeedSwap = true; LLVM_FALLTHROUGH; + case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; + case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; + case CmpInst::FCMP_UNO: CC = X86::COND_P; break; + case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; + case CmpInst::FCMP_OEQ: LLVM_FALLTHROUGH; + case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; + + // Integer Predicates + case CmpInst::ICMP_EQ: CC = X86::COND_E; break; + case CmpInst::ICMP_NE: CC = X86::COND_NE; break; + case CmpInst::ICMP_UGT: CC = X86::COND_A; break; + case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; + case CmpInst::ICMP_ULT: CC = X86::COND_B; break; + case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; + case CmpInst::ICMP_SGT: CC = X86::COND_G; break; + case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; + case CmpInst::ICMP_SLT: CC = X86::COND_L; break; + case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; + } + + return std::make_pair(CC, NeedSwap); +} + /// Return a set opcode for the given condition and /// whether it has memory operand. unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { @@ -5891,7 +6043,7 @@ void X86InstrInfo::replaceBranchWithTailCall( // Add implicit uses and defs of all live regs potentially clobbered by the // call. This way they still appear live across the call. - LivePhysRegs LiveRegs(&getRegisterInfo()); + LivePhysRegs LiveRegs(getRegisterInfo()); LiveRegs.addLiveOuts(MBB); SmallVector, 8> Clobbers; LiveRegs.stepForward(*MIB, Clobbers); @@ -6289,9 +6441,11 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + const TargetRegisterClass &RC = *MRI.getRegClass(DstReg); assert(Cond.size() == 1 && "Invalid Cond array"); unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), - MRI.getRegClass(DstReg)->getSize(), + TRI.getRegSizeInBits(RC) / 8, false /*HasMemoryOperand*/); BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); } @@ -6504,9 +6658,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // first frame index. // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment. - const TargetRegisterInfo *TRI = &getRegisterInfo(); + const TargetRegisterInfo &TRI = getRegisterInfo(); MachineBasicBlock::LivenessQueryResult LQR = - MBB.computeRegisterLiveness(TRI, AX, MI); + MBB.computeRegisterLiveness(&TRI, AX, MI); // We do not want to save and restore AX if we do not have to. // Moreover, if we do so whereas AX is dead, we would need to set // an undef flag on the use of AX, otherwise the verifier will @@ -6523,7 +6677,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // AX contains the top most register in the aliasing hierarchy. // It may not be live, but one of its aliases may be. - for (MCRegAliasIterator AI(AX, TRI, true); + for (MCRegAliasIterator AI(AX, &TRI, true); AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI) LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live : MachineBasicBlock::LQR_Dead; @@ -6562,7 +6716,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, bool HasAVX512 = STI.hasAVX512(); bool HasVLX = STI.hasVLX(); - switch (RC->getSize()) { + switch (STI.getRegisterInfo()->getSpillSize(*RC)) { default: llvm_unreachable("Unknown spill size"); case 1: @@ -6608,28 +6762,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass"); return load ? X86::LD_Fp80m : X86::ST_FpP80m; case 16: { - assert(X86::VR128XRegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass"); - // If stack is realigned we can use aligned stores. - if (isStackAligned) - return load ? - (HasVLX ? X86::VMOVAPSZ128rm : - HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : - HasAVX ? X86::VMOVAPSrm : - X86::MOVAPSrm): - (HasVLX ? X86::VMOVAPSZ128mr : - HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : - HasAVX ? X86::VMOVAPSmr : - X86::MOVAPSmr); - else - return load ? - (HasVLX ? X86::VMOVUPSZ128rm : - HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : - HasAVX ? X86::VMOVUPSrm : - X86::MOVUPSrm): - (HasVLX ? X86::VMOVUPSZ128mr : - HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : - HasAVX ? X86::VMOVUPSmr : - X86::MOVUPSmr); + if (X86::VR128XRegClass.hasSubClassEq(RC)) { + // If stack is realigned we can use aligned stores. + if (isStackAligned) + return load ? + (HasVLX ? X86::VMOVAPSZ128rm : + HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX : + HasAVX ? X86::VMOVAPSrm : + X86::MOVAPSrm): + (HasVLX ? X86::VMOVAPSZ128mr : + HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX : + HasAVX ? X86::VMOVAPSmr : + X86::MOVAPSmr); + else + return load ? + (HasVLX ? X86::VMOVUPSZ128rm : + HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX : + HasAVX ? X86::VMOVUPSrm : + X86::MOVUPSrm): + (HasVLX ? X86::VMOVUPSZ128mr : + HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : + HasAVX ? X86::VMOVUPSmr : + X86::MOVUPSmr); + } + if (X86::BNDRRegClass.hasSubClassEq(RC)) { + if (STI.is64Bit()) + return load ? X86::BNDMOVRM64rm : X86::BNDMOVMR64mr; + else + return load ? X86::BNDMOVRM32rm : X86::BNDMOVMR32mr; + } + llvm_unreachable("Unknown 16-byte regclass"); } case 32: assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass"); @@ -6714,9 +6876,9 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= RC->getSize() && + assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) && "Stack slot too small for store"); - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); @@ -6733,7 +6895,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - unsigned Alignment = std::max(RC->getSize(), 16); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); @@ -6753,7 +6916,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI->getSpillSize(*RC), 16); bool isAligned = (Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) || RI.canRealignStack(MF); @@ -6768,7 +6931,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - unsigned Alignment = std::max(RC->getSize(), 16); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); @@ -7227,7 +7391,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, NewOpc = getSETFromCond(NewCC, HasMemoryOperand); else { unsigned DstReg = Instr.getOperand(0).getReg(); - NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(), + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + NewOpc = getCMovFromCond(NewCC, TRI->getRegSizeInBits(*DstRC)/8, HasMemoryOperand); } @@ -7575,6 +7740,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr)); case X86::AVX2_SETALLONES: return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr)); + case X86::AVX1_SETALLONES: { + unsigned Reg = MIB->getOperand(0).getReg(); + // VCMPPSYrri with an immediate 0xf should produce VCMPTRUEPS. + MIB->setDesc(get(X86::VCMPPSYrri)); + MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xf); + return true; + } case X86::AVX512_512_SETALLONES: { unsigned Reg = MIB->getOperand(0).getReg(); MIB->setDesc(get(X86::VPTERNLOGDZrri)); @@ -7755,7 +7927,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( unsigned DstIdx = (Imm >> 4) & 3; unsigned SrcIdx = (Imm >> 6) & 3; - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size <= RCSize && 4 <= Align) { int PtrOffset = SrcIdx * 4; unsigned NewImm = (DstIdx << 4) | ZMask; @@ -7777,7 +7951,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( // To fold the load, adjust the pointer to the upper and use (V)MOVLPS. // TODO: In most cases AVX doesn't have a 8-byte alignment requirement. if (OpNum == 2) { - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size <= RCSize && 8 <= Align) { unsigned NewOpCode = (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm : @@ -7866,7 +8042,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( return nullptr; bool NarrowToMOV32rm = false; if (Size) { - unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, + &RI, MF); + unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if (Size < RCSize) { // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. @@ -8307,11 +8486,13 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, const MachineFunction &MF) { unsigned Opc = LoadMI.getOpcode(); unsigned UserOpc = UserMI.getOpcode(); - unsigned RegSize = - MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg()); + unsigned RegSize = TRI.getRegSizeInBits(*RC); if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) && - RegSize > 4) { + RegSize > 32) { // These instructions only load 32 bits, we can't fold them if the // destination register is wider than 32 bits (4 bytes), and its user // instruction isn't scalar (SS). @@ -8362,7 +8543,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, } if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) && - RegSize > 8) { + RegSize > 64) { // These instructions only load 64 bits, we can't fold them if the // destination register is wider than 64 bits (8 bytes), and its user // instruction isn't scalar (SD). @@ -8454,6 +8635,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Alignment = 64; break; case X86::AVX2_SETALLONES: + case X86::AVX1_SETALLONES: case X86::AVX_SET0: case X86::AVX512_256_SET0: Alignment = 32; @@ -8499,6 +8681,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::V_SET0: case X86::V_SETALLONES: case X86::AVX2_SETALLONES: + case X86::AVX1_SETALLONES: case X86::AVX_SET0: case X86::AVX512_128_SET0: case X86::AVX512_256_SET0: @@ -8540,13 +8723,14 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16); else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || - Opc == X86::AVX512_256_SET0) + Opc == X86::AVX512_256_SET0 || Opc == X86::AVX1_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES || - Opc == X86::AVX512_512_SETALLONES); + Opc == X86::AVX512_512_SETALLONES || + Opc == X86::AVX1_SETALLONES); const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -8707,6 +8891,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedStore = I->second.second & TB_FOLDED_STORE; const MCInstrDesc &MCID = get(Opc); MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); unsigned NumDefs = MCID.NumDefs; std::vector AddrOps; @@ -8729,7 +8914,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = nullptr; if (FoldedLoad) { - EVT VT = *RC->vt_begin(); + EVT VT = *TRI.legalclasstypes_begin(*RC); std::pair MMOs = MF.extractLoadMemRefs(cast(N)->memoperands_begin(), @@ -8741,7 +8926,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, @@ -8757,7 +8942,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, const TargetRegisterClass *DstRC = nullptr; if (MCID.getNumDefs() > 0) { DstRC = getRegClass(MCID, 0, &RI, MF); - VTs.push_back(*DstRC->vt_begin()); + VTs.push_back(*TRI.legalclasstypes_begin(*DstRC)); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); @@ -8786,7 +8971,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte // memory access is slow above. - unsigned Alignment = std::max(RC->getSize(), 16); + unsigned Alignment = std::max(TRI.getSpillSize(*RC), 16); bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; SDNode *Store = @@ -9519,7 +9704,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { } /// Return the noop instruction to use for a noop. -void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +void X86InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } @@ -10401,7 +10586,7 @@ X86InstrInfo::getOutliningType(MachineInstr &MI) const { // catch it. if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) || MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) || - MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) + MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) return MachineOutlinerInstrType::Illegal; // Outlined calls change the instruction pointer, so don't read from it. @@ -10439,9 +10624,7 @@ void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF, - bool IsTailCall) const { - return; -} + bool IsTailCall) const {} MachineBasicBlock::iterator X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 582515dc1154baa9bd71f24029c156b492117da8..e64876073ccf1dedbdc9c808065ca860e7b0c295 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -64,6 +64,10 @@ enum CondCode { // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); +/// \brief Return a pair of condition code for the given predicate and whether +/// the instruction operands should be swaped to match the condition code. +std::pair getX86ConditionCode(CmpInst::Predicate Predicate); + /// \brief Return a set opcode for the given condition and whether it has /// a memory operand. unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); @@ -182,6 +186,25 @@ public: /// const X86RegisterInfo &getRegisterInfo() const { return RI; } + /// Returns the stack pointer adjustment that happens inside the frame + /// setup..destroy sequence (e.g. by pushes, or inside the callee). + int64_t getFrameAdjustment(const MachineInstr &I) const { + assert(isFrameInstr(I)); + if (isFrameSetup(I)) + return I.getOperand(2).getImm(); + return I.getOperand(1).getImm(); + } + + /// Sets the stack pointer adjustment made inside the frame made up by this + /// instruction. + void setFrameAdjustment(MachineInstr &I, int64_t V) const { + assert(isFrameInstr(I)); + if (isFrameSetup(I)) + I.getOperand(2).setImm(V); + else + I.getOperand(1).setImm(V); + } + /// getSPAdjust - This returns the stack pointer adjustment made by /// this instruction. For x86, we need to handle more complex call /// sequences involving PUSHes. @@ -443,7 +466,7 @@ public: int64_t Offset1, int64_t Offset2, unsigned NumLoads) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; + void getNoop(MCInst &NopInst) const override; bool reverseBranchCondition(SmallVectorImpl &Cond) const override; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 163f4eef72ed7077f3df76bf9483647a4b8cb888..fab70e918b8adbb93208bc9379b70adb84b05959 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -84,7 +84,8 @@ def SDTLockBinaryArithWithFlags : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>; -def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; @@ -283,6 +284,11 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def X86lwpins : SDNode<"X86ISD::LWPINS", + SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // X86 Operand Definitions. // @@ -807,6 +813,8 @@ def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">, AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">; +def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">, + AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">; def HasPFI : Predicate<"Subtarget->hasPFI()">, AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">; def HasERI : Predicate<"Subtarget->hasERI()">, @@ -836,6 +844,7 @@ def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasTBM : Predicate<"Subtarget->hasTBM()">; +def HasLWP : Predicate<"Subtarget->hasLWP()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; @@ -877,7 +886,9 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">, def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||" - "Subtarget->getFrameLowering()->hasFP(*MF)">; + "Subtarget->getFrameLowering()->hasFP(*MF)"> { + let RecomputePerFunction = 1; +} def IsPS4 : Predicate<"Subtarget->isTargetPS4()">; def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -887,9 +898,16 @@ def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" "TM.getCodeModel() == CodeModel::Kernel">; def IsNotPIC : Predicate<"!TM.isPositionIndependent()">; -def OptForSize : Predicate<"OptForSize">; -def OptForMinSize : Predicate<"OptForMinSize">; -def OptForSpeed : Predicate<"!OptForSize">; + +// We could compute these on a per-module basis but doing so requires accessing +// the Function object through the Subtarget and objections were raised +// to that (see post-commit review comments for r301750). +let RecomputePerFunction = 1 in { + def OptForSize : Predicate<"MF->getFunction()->optForSize()">; + def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">; + def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">; +} + def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; @@ -897,6 +915,7 @@ def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; +def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; //===----------------------------------------------------------------------===// @@ -1124,13 +1143,15 @@ def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>; def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG>, OpSize16; -def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [], - IIC_POP_MEM>, OpSize16; def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>; +} // mayLoad, SchedRW +let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in { +def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [], + IIC_POP_MEM>, OpSize16; def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", [], IIC_POP_MEM>, OpSize32, Requires<[Not64BitMode]>; -} // mayLoad, SchedRW +} // mayStore, mayLoad, WriteRMW let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], @@ -1212,9 +1233,10 @@ def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>; +} // mayLoad, SchedRW +let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", [], IIC_POP_MEM>, OpSize32, Requires<[In64BitMode]>; -} // mayLoad, SchedRW let mayStore = 1, SchedRW = [WriteStore] in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; @@ -1416,11 +1438,14 @@ def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), // Longer forms that use a ModR/M byte. Needed for disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8ri">; def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16ri">; def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32ri">; } } // SchedRW @@ -1543,13 +1568,17 @@ def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8rr">; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16rr">; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32rr">; def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV64rr">; } let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { @@ -1983,11 +2012,11 @@ def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>, Requires<[In64BitMode]>; // Data16 instruction prefix -def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>, +def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>, Requires<[Not16BitMode]>; // Data instruction prefix -def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>, +def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>, Requires<[In16BitMode]>; // Repeat string operation instruction prefixes @@ -2102,6 +2131,7 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>, Requires<[Not64BitMode]>; +let mayStore = 1 in def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[Not64BitMode]>; @@ -2338,6 +2368,38 @@ let Predicates = [HasBMI2] in { def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)), (BZHI64rm addr:$src, (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + + // x & (-1 >> (32 - y)) + def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))), + (BZHI32rr GR32:$src, GR32:$lz)>; + def : Pat<(and (loadi32 addr:$src), (srl -1, (i8 (trunc (sub 32, GR32:$lz))))), + (BZHI32rm addr:$src, GR32:$lz)>; + + // x & (-1 >> (64 - y)) + def : Pat<(and GR64:$src, (srl -1, (i8 (trunc (sub 64, GR32:$lz))))), + (BZHI64rr GR64:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; + def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))), + (BZHI64rm addr:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; + + // x << (32 - y) >> (32 - y) + def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))), + (i8 (trunc (sub 32, GR32:$lz)))), + (BZHI32rr GR32:$src, GR32:$lz)>; + def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))), + (i8 (trunc (sub 32, GR32:$lz)))), + (BZHI32rm addr:$src, GR32:$lz)>; + + // x << (64 - y) >> (64 - y) + def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))), + (i8 (trunc (sub 64, GR32:$lz)))), + (BZHI64rr GR64:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; + def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))), + (i8 (trunc (sub 64, GR32:$lz)))), + (BZHI64rm addr:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>; } // HasBMI2 let Predicates = [HasBMI] in { @@ -2436,6 +2498,59 @@ defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", MRM7r, MRM7m>; defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>; } // HasTBM, EFLAGS +//===----------------------------------------------------------------------===// +// Lightweight Profiling Instructions + +let Predicates = [HasLWP] in { + +def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src", + [(int_x86_llwpcb GR32:$src)], IIC_LWP>, + XOP, XOP9, Requires<[Not64BitMode]>; +def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst", + [(set GR32:$dst, (int_x86_slwpcb))], IIC_LWP>, + XOP, XOP9, Requires<[Not64BitMode]>; + +def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src", + [(int_x86_llwpcb GR64:$src)], IIC_LWP>, + XOP, XOP9, VEX_W, Requires<[In64BitMode]>; +def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst", + [(set GR64:$dst, (int_x86_slwpcb))], IIC_LWP>, + XOP, XOP9, VEX_W, Requires<[In64BitMode]>; + +multiclass lwpins_intr { + def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), + "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, imm:$cntl))]>, + XOP_4V, XOPA; + let mayLoad = 1 in + def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), + "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), imm:$cntl))]>, + XOP_4V, XOPA; +} + +let Defs = [EFLAGS] in { + defm LWPINS32 : lwpins_intr; + defm LWPINS64 : lwpins_intr, VEX_W; +} // EFLAGS + +multiclass lwpval_intr { + def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl), + "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(Int RC:$src0, GR32:$src1, imm:$cntl)], IIC_LWP>, + XOP_4V, XOPA; + let mayLoad = 1 in + def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl), + "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}", + [(Int RC:$src0, (loadi32 addr:$src1), imm:$cntl)], IIC_LWP>, + XOP_4V, XOPA; +} + +defm LWPVAL32 : lwpval_intr; +defm LWPVAL64 : lwpval_intr, VEX_W; + +} // HasLWP + //===----------------------------------------------------------------------===// // MONITORX/MWAITX Instructions // @@ -2452,7 +2567,7 @@ let SchedRW = [ WriteSystem ] in { } let Uses = [ ECX, EAX, EBX ] in { - def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", + def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", [(int_x86_mwaitx ECX, EAX, EBX)], IIC_SSE_MWAITX>, TB, Requires<[ HasMWAITX ]>; } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index dc3800ce381b0a5e6de4ec9176078d80e3fadaad..2c047722db249a4333b1e7c1d5ea284abe5208c4 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -248,7 +248,8 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (MMX_X86movd2w (x86mmx VR64:$src)))], - IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; + IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>, + FoldGenData<"MMX_MOVD64rr">; let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -277,7 +278,7 @@ def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; + IIC_MMX_MOVQ_RR>, FoldGenData<"MMX_MOVQ64rr">; } } // SchedRW diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td index 309f601d1fcee237a16842229d13eec1de48a957..104ba2a174db17c1d7da8591dc4e3792dda54eea 100644 --- a/lib/Target/X86/X86InstrMPX.td +++ b/lib/Target/X86/X86InstrMPX.td @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// multiclass mpx_bound_make opc, string OpcodeStr> { +let mayLoad = 1 in { def 32rm: I, Requires<[HasMPX, Not64BitMode]>; @@ -21,16 +22,19 @@ multiclass mpx_bound_make opc, string OpcodeStr> { OpcodeStr#"\t{$src, $dst|$dst, $src}", []>, Requires<[HasMPX, In64BitMode]>; } +} defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS; multiclass mpx_bound_check opc, string OpcodeStr> { +let mayLoad = 1 in { def 32rm: I, Requires<[HasMPX, Not64BitMode]>; def 64rm: RI, Requires<[HasMPX, In64BitMode]>; +} def 32rr: I, Requires<[HasMPX, Not64BitMode]>; @@ -45,16 +49,18 @@ defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD; def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX]>; +let mayLoad = 1 in { def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX, Not64BitMode]>; def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX, In64BitMode]>; - +} def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX]>; +let mayStore = 1 in { def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), "bndmov\t{$src, $dst|$dst, $src}", []>, PD, Requires<[HasMPX, Not64BitMode]>; @@ -65,6 +71,8 @@ def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src), def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src), "bndstx\t{$src, $dst|$dst, $src}", []>, PS, Requires<[HasMPX]>; +} +let mayLoad = 1 in def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src), "bndldx\t{$src, $dst|$dst, $src}", []>, PS, Requires<[HasMPX]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f4fc87f80e3d14bdabfce5cec93a0904a8344a46..8490b972eb5c15a8a75ba0ff909361d4dccc547c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -486,6 +486,10 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; + let Predicates = [HasAVX1Only, OptForMinSize] in { + def AVX1_SETALLONES: I<0, Pseudo, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8i32 immAllOnesV))]>; + } let Predicates = [HasAVX2] in def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8i32 immAllOnesV))]>; @@ -503,7 +507,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, multiclass sse12_move_rr { + string asm_opr, Domain d = GenericDomain, + string Name> { let isCommutable = 1 in def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), @@ -517,15 +522,17 @@ multiclass sse12_move_rr, Sched<[WriteFShuffle]>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>, + FoldGenData; } multiclass sse12_move { + Domain d = GenericDomain, string Name> { // AVX defm V#NAME : sse12_move_rr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, + "V"#Name>, VEX_4V, VEX_LIG, VEX_WIG; def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -535,7 +542,7 @@ multiclass sse12_move; + "\t{$src2, $dst|$dst, $src2}", d, Name>; } def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -559,9 +566,9 @@ multiclass sse12_move_rm, XS; + SSEPackedSingle, "MOVSS">, XS; defm MOVSD : sse12_move, XD; + SSEPackedDouble, "MOVSD">, XD; let canFoldAsLoad = 1, isReMaterializable = 1 in { defm MOVSS : sse12_move_rm, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPSrr">; def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPDrr">; def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPSrr">; def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPDrr">; def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPSYrr">; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPDYrr">; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPSYrr">; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPDYrr">; } // Aliases to help the assembler pick two byte VEX encodings by swapping the @@ -934,16 +949,16 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteFShuffle] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPSrr">; def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPDrr">; def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPSrr">; def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPDrr">; } let Predicates = [HasAVX, NoVLX] in { @@ -2342,6 +2357,7 @@ multiclass sse12_cmp_scalar_int, Sched<[itins.Sched]>; +let mayLoad = 1 in def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, @@ -2385,6 +2401,7 @@ multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], IIC_SSE_COMIS_RR>, Sched<[WriteFAdd]>; +let mayLoad = 1 in def rm: SI opc, RegisterClass RC, SDNode OpNode, [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], IIC_SSE_COMIS_RR>, Sched<[WriteFAdd]>; +let mayLoad = 1 in def rm: SI, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQAYrr">; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQUYrr">; } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, @@ -3813,11 +3833,12 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVDQArr">; def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>, + FoldGenData<"MOVDQUrr">; } } // SchedRW @@ -4599,17 +4620,17 @@ def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], IIC_SSE_MOVDQ>, Sched<[WriteMove]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; let isCodeGenOnly = 1 in def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], IIC_SSE_MOVDQ>, Sched<[WriteMove]>; } // ExeDomain = SSEPackedInt @@ -4678,7 +4699,7 @@ def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), VEX; def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; @@ -4691,7 +4712,7 @@ def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs), [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; } // ExeDomain = SSEPackedInt @@ -4718,7 +4739,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), @@ -4808,12 +4829,12 @@ let Predicates = [UseSSE2] in { } } -// These are the correct encodings of the instructions so that we know how to -// read correct assembly, even though we continue to emit the wrong ones for -// compatibility with Darwin's buggy assembler. -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of +// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add +// these aliases. +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; -def : InstAlias<"movq\t{$src, $dst|$dst, $src}", +def : InstAlias<"movd\t{$src, $dst|$dst, $src}", (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; // Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", @@ -5162,14 +5183,14 @@ multiclass S3D_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3DI, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, @@ -5179,14 +5200,14 @@ multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, - Sched<[WriteFAdd]>; + Sched<[WriteFHAdd]>; def rm : S3I, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFHAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -5289,7 +5310,7 @@ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>; // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// -let Sched = WriteVecALU in { +let Sched = WritePHAdd in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -5908,7 +5929,7 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, Sched<[WriteShuffle]>; + []>, Sched<[WriteShuffle]>, FoldGenData; let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteShuffleLd, WriteRMW] in @@ -6715,22 +6736,23 @@ let Constraints = "$src1 = $dst" in { SSE_INTMUL_ITINS_P, 1>; } -let Predicates = [HasAVX, NoVLX] in { +let Predicates = [HasAVX, NoVLX] in defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, loadv2i64, i128mem, 0, SSE_PMULLD_ITINS>, VEX_4V, VEX_WIG; +let Predicates = [HasAVX] in defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_WIG; -} -let Predicates = [HasAVX2] in { + +let Predicates = [HasAVX2, NoVLX] in defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, loadv4i64, i256mem, 0, SSE_PMULLD_ITINS>, VEX_4V, VEX_L, VEX_WIG; +let Predicates = [HasAVX2] in defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L, VEX_WIG; -} let Constraints = "$src1 = $dst" in { defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, @@ -7095,17 +7117,14 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions let SchedRW = [WriteLoad] in { let Predicates = [HasAVX, NoVLX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovntdqa\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + "vmovntdqa\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; let Predicates = [HasAVX2, NoVLX] in def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), - "vmovntdqa\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, + "vmovntdqa\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L, VEX_WIG; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movntdqa\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; + "movntdqa\t{$src, $dst|$dst, $src}", []>; } // SchedRW let Predicates = [HasAVX2, NoVLX] in { @@ -7144,33 +7163,37 @@ let Predicates = [UseSSE41] in { /// SS42I_binop_rm - Simple SSE 4.2 binary operator multiclass SS42I_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, bit Is2Addr = 1> { + X86MemOperand x86memop, OpndItins itins, + bit Is2Addr = 1> { def rr : SS428I; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>; def rm : SS428I; + (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, - loadv2i64, i128mem, 0>, VEX_4V, VEX_WIG; + loadv2i64, i128mem, SSE_INTALU_ITINS_P, 0>, + VEX_4V, VEX_WIG; let Predicates = [HasAVX2] in defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, - loadv4i64, i256mem, 0>, VEX_4V, VEX_L, VEX_WIG; + loadv4i64, i256mem, SSE_INTALU_ITINS_P, 0>, + VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, - memopv2i64, i128mem>; + memopv2i64, i128mem, SSE_INTALU_ITINS_P>; //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions @@ -7750,14 +7773,12 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; } - -// Without AVX2 we need to concat two v4i32 V_SETALLONES to create a 256-bit -// all ones value. -let Predicates = [HasAVX1Only] in -def : Pat<(v8i32 immAllOnesV), - (VINSERTF128rr - (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), (V_SETALLONES), sub_xmm), - (V_SETALLONES), 1)>; +// To create a 256-bit all ones value, we should produce VCMPTRUEPS +// with YMM register containing zero. +// FIXME: Avoid producing vxorps to clear the fake inputs. +let Predicates = [HasAVX1Only] in { +def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; +} multiclass vinsert_lowering { diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index b21f0b923da8d476d29c1c5424ffb2e02052dd09..0efb383e1c8d4cd6fe5a4908871fbb635e526e69 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -403,7 +403,7 @@ def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), } // Constraints = "$src = $dst" -let SchedRW = [WriteShiftLd, WriteRMW] in { +let SchedRW = [WriteShiftLd, WriteRMW], mayStore = 1 in { let Uses = [EFLAGS] in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t$dst", [], IIC_SR>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 9265d64b3230fe7e326f4595bd512fd88548b0d7..2e5350ce979e3dd4c264908cf4f30c33c1005074 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -173,27 +173,28 @@ def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize32; def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>; - +let mayStore = 1 in { def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize16; def MOV32ms : I<0x8C, MRMDestMem, (outs), (ins i32mem:$dst, SEGMENT_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize32; def MOV64ms : RI<0x8C, MRMDestMem, (outs), (ins i64mem:$dst, SEGMENT_REG:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>; - +} def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize16; def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize32; def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>; - +let mayLoad = 1 in { def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize16; def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize32; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; +} } // SchedRW //===----------------------------------------------------------------------===// @@ -202,6 +203,7 @@ def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; +let mayLoad = 1 in def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize16; @@ -210,6 +212,7 @@ def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), OpSize16; // i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. +let mayLoad = 1 in def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB, OpSize32; @@ -217,23 +220,27 @@ def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB, OpSize32; // i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. +let mayLoad = 1 in def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB; def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB; +let mayLoad = 1 in def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB, OpSize16; def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize16; +let mayLoad = 1 in def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB, OpSize32; def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB, OpSize32; +let mayLoad = 1 in def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB; def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), @@ -248,11 +255,13 @@ def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins), "str{l}\t$dst", [], IIC_STR>, TB, OpSize32; def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins), "str{q}\t$dst", [], IIC_STR>, TB; +let mayStore = 1 in def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", [], IIC_STR>, TB; def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", [], IIC_LTR>, TB; +let mayLoad = 1 in def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", [], IIC_LTR>, TB; @@ -377,12 +386,14 @@ def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src), def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", [], IIC_VERR>, TB; -def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), - "verr\t$seg", [], IIC_VERR>, TB; def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", [], IIC_VERW_MEM>, TB; +let mayLoad = 1 in { +def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), + "verr\t$seg", [], IIC_VERR>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", [], IIC_VERW_REG>, TB; +} } // SchedRW //===----------------------------------------------------------------------===// @@ -403,6 +414,7 @@ def SIDT64m : I<0x01, MRM1m, (outs), (ins opaque80mem:$dst), "sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>; def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins), "sldt{w}\t$dst", [], IIC_SLDT>, TB, OpSize16; +let mayStore = 1 in def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst), "sldt{w}\t$dst", [], IIC_SLDT>, TB; def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), @@ -412,6 +424,7 @@ def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins), // extension. def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins), "sldt{q}\t$dst", [], IIC_SLDT>, TB; +let mayStore = 1 in def SLDT64m : RI<0x00, MRM0m, (outs), (ins i16mem:$dst), "sldt{q}\t$dst", [], IIC_SLDT>, TB; @@ -429,6 +442,7 @@ def LIDT64m : I<0x01, MRM3m, (outs), (ins opaque80mem:$src), "lidt{q}\t$src", [], IIC_LIDT>, TB, Requires<[In64BitMode]>; def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; +let mayLoad = 1 in def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; } // SchedRW @@ -459,6 +473,7 @@ def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst), def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src), "lmsw{w}\t$src", [], IIC_LMSW_MEM>, TB; +let mayLoad = 1 in def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index 38ac8be9448323ea047d5703fea4e119431e8279..61aac58a491f2f858e4ce99eda0d3b85520c228a 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -30,6 +30,11 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst), "xbegin\t$dst", []>, OpSize32; } +// Psuedo instruction to fake the definition of EAX on the fallback code path. +let isPseudo = 1, Defs = [EAX] in { +def XABORT_DEF : I<0, Pseudo, (outs), (ins), "# XABORT DEF", []>; +} + def XEND : I<0x01, MRM_D5, (outs), (ins), "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 2ea27a934b478f56308378b16a1e399abf108422..315a69e6a2a245d70f063dde21f141161eb9dd02 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -43,22 +43,26 @@ def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmptrld\t$vmcs", []>, PS; def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs), "vmptrst\t$vmcs", []>, TB; -def VMREAD64rm : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; -def VMREAD32rm : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; -def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; +let mayStore = 1 in { +def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; +def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; +} def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; -def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; +let mayLoad = 1 in { +def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), + "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; +def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), + "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>; +} // 0F 01 C4 def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB; def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon), diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 53224431c0e90c481b91ba8cf178cc9efb99546c..5dde2d07babeb678e1cc21567f1319e9f1456c1f 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -111,7 +111,7 @@ multiclass xop3op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - XOP_4V, VEX_W, Sched<[WriteVarVecShift]>; + XOP_4V, VEX_W, Sched<[WriteVarVecShift]>, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -282,7 +282,7 @@ multiclass xop4op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -318,7 +318,7 @@ multiclass xop4op_int opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -357,7 +357,7 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W; + []>, VEX_W, FoldGenData; } let ExeDomain = SSEPackedDouble in { diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index 6cc5e8b63597502644483cb0e4eec7826e0b6e98..77dead8d24137a7bdd0b05b3ea5c0ebc609278b3 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -19,6 +19,7 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -39,9 +40,13 @@ using namespace llvm; namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class X86InstructionSelector : public InstructionSelector { public: - X86InstructionSelector(const X86Subtarget &STI, + X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); bool select(MachineInstr &I) const override; @@ -51,28 +56,36 @@ private: /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I) const; - // TODO: remove after selectImpl support pattern with a predicate. - unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const; - unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const; - unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const; - unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const; + // TODO: remove after suported by Tablegen-erated instruction selection. unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc, uint64_t Alignment) const; - bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF) const; bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; - bool selectFrameIndex(MachineInstr &I, MachineRegisterInfo &MRI, - MachineFunction &MF) const; + bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + + const X86TargetMachine &TM; const X86Subtarget &STI; const X86InstrInfo &TII; const X86RegisterInfo &TRI; const X86RegisterBankInfo &RBI; +#define GET_GLOBALISEL_PREDICATES_DECL +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + #define GET_GLOBALISEL_TEMPORARIES_DECL #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_DECL @@ -84,10 +97,14 @@ private: #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_IMPL -X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, +X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, + const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT #define GET_GLOBALISEL_TEMPORARIES_INIT #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -99,6 +116,10 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, static const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { if (RB.getID() == X86::GPRRegBankID) { + if (Ty.getSizeInBits() <= 8) + return &X86::GR8RegClass; + if (Ty.getSizeInBits() == 16) + return &X86::GR16RegClass; if (Ty.getSizeInBits() == 32) return &X86::GR32RegClass; if (Ty.getSizeInBits() == 64) @@ -133,10 +154,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); - (void)DstSize; unsigned SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); - (void)SrcSize; + assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) && "No phys reg on generic operators"); assert((DstSize == SrcSize || @@ -152,6 +172,18 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, case X86::GPRRegBankID: assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values."); RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + + // Change the physical register + if (SrcSize > DstSize && TargetRegisterInfo::isPhysicalRegister(SrcReg)) { + if (RC == &X86::GR32RegClass) + I.getOperand(1).setSubReg(X86::sub_32bit); + else if (RC == &X86::GR16RegClass) + I.getOperand(1).setSubReg(X86::sub_16bit); + else if (RC == &X86::GR8RegClass) + I.getOperand(1).setSubReg(X86::sub_8bit); + + I.getOperand(1).substPhysReg(SrcReg, TRI); + } break; case X86::VECRRegBankID: RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); @@ -197,161 +229,28 @@ bool X86InstructionSelector::select(MachineInstr &I) const { assert(I.getNumOperands() == I.getNumExplicitOperands() && "Generic instruction has unexpected implicit operands\n"); - // TODO: This should be implemented by tblgen, pattern with predicate not - // supported yet. - if (selectBinaryOp(I, MRI, MF)) + if (selectImpl(I)) return true; + + DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); + + // TODO: This should be implemented by tblgen. if (selectLoadStoreOp(I, MRI, MF)) return true; - if (selectFrameIndex(I, MRI, MF)) + if (selectFrameIndexOrGep(I, MRI, MF)) return true; if (selectConstant(I, MRI, MF)) return true; + if (selectTrunc(I, MRI, MF)) + return true; + if (selectZext(I, MRI, MF)) + return true; + if (selectCmp(I, MRI, MF)) + return true; + if (selectUadde(I, MRI, MF)) + return true; - return selectImpl(I); -} - -unsigned X86InstructionSelector::getFAddOp(LLT &Ty, - const RegisterBank &RB) const { - - if (X86::VECRRegBankID != RB.getID()) - return TargetOpcode::G_FADD; - - if (Ty == LLT::scalar(32)) { - if (STI.hasAVX512()) { - return X86::VADDSSZrr; - } else if (STI.hasAVX()) { - return X86::VADDSSrr; - } else if (STI.hasSSE1()) { - return X86::ADDSSrr; - } - } else if (Ty == LLT::scalar(64)) { - if (STI.hasAVX512()) { - return X86::VADDSDZrr; - } else if (STI.hasAVX()) { - return X86::VADDSDrr; - } else if (STI.hasSSE2()) { - return X86::ADDSDrr; - } - } else if (Ty == LLT::vector(4, 32)) { - if ((STI.hasAVX512()) && (STI.hasVLX())) { - return X86::VADDPSZ128rr; - } else if (STI.hasAVX()) { - return X86::VADDPSrr; - } else if (STI.hasSSE1()) { - return X86::ADDPSrr; - } - } - - return TargetOpcode::G_FADD; -} - -unsigned X86InstructionSelector::getFSubOp(LLT &Ty, - const RegisterBank &RB) const { - - if (X86::VECRRegBankID != RB.getID()) - return TargetOpcode::G_FSUB; - - if (Ty == LLT::scalar(32)) { - if (STI.hasAVX512()) { - return X86::VSUBSSZrr; - } else if (STI.hasAVX()) { - return X86::VSUBSSrr; - } else if (STI.hasSSE1()) { - return X86::SUBSSrr; - } - } else if (Ty == LLT::scalar(64)) { - if (STI.hasAVX512()) { - return X86::VSUBSDZrr; - } else if (STI.hasAVX()) { - return X86::VSUBSDrr; - } else if (STI.hasSSE2()) { - return X86::SUBSDrr; - } - } else if (Ty == LLT::vector(4, 32)) { - if ((STI.hasAVX512()) && (STI.hasVLX())) { - return X86::VSUBPSZ128rr; - } else if (STI.hasAVX()) { - return X86::VSUBPSrr; - } else if (STI.hasSSE1()) { - return X86::SUBPSrr; - } - } - - return TargetOpcode::G_FSUB; -} - -unsigned X86InstructionSelector::getAddOp(LLT &Ty, - const RegisterBank &RB) const { - - if (X86::VECRRegBankID != RB.getID()) - return TargetOpcode::G_ADD; - - if (Ty == LLT::vector(4, 32)) { - if (STI.hasAVX512() && STI.hasVLX()) { - return X86::VPADDDZ128rr; - } else if (STI.hasAVX()) { - return X86::VPADDDrr; - } else if (STI.hasSSE2()) { - return X86::PADDDrr; - } - } - - return TargetOpcode::G_ADD; -} - -unsigned X86InstructionSelector::getSubOp(LLT &Ty, - const RegisterBank &RB) const { - - if (X86::VECRRegBankID != RB.getID()) - return TargetOpcode::G_SUB; - - if (Ty == LLT::vector(4, 32)) { - if (STI.hasAVX512() && STI.hasVLX()) { - return X86::VPSUBDZ128rr; - } else if (STI.hasAVX()) { - return X86::VPSUBDrr; - } else if (STI.hasSSE2()) { - return X86::PSUBDrr; - } - } - - return TargetOpcode::G_SUB; -} - -bool X86InstructionSelector::selectBinaryOp(MachineInstr &I, - MachineRegisterInfo &MRI, - MachineFunction &MF) const { - - const unsigned DefReg = I.getOperand(0).getReg(); - LLT Ty = MRI.getType(DefReg); - const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); - - unsigned NewOpc = I.getOpcode(); - - switch (NewOpc) { - case TargetOpcode::G_FADD: - NewOpc = getFAddOp(Ty, RB); - break; - case TargetOpcode::G_FSUB: - NewOpc = getFSubOp(Ty, RB); - break; - case TargetOpcode::G_ADD: - NewOpc = getAddOp(Ty, RB); - break; - case TargetOpcode::G_SUB: - NewOpc = getSubOp(Ty, RB); - break; - default: - break; - } - - if (NewOpc == I.getOpcode()) - return false; - - I.setDesc(TII.get(NewOpc)); - - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + return false; } unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, @@ -368,7 +267,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, } else if (Ty == LLT::scalar(16)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV16rm : X86::MOV16mr; - } else if (Ty == LLT::scalar(32)) { + } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV32rm : X86::MOV32mr; if (X86::VECRRegBankID == RB.getID()) @@ -376,7 +275,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) : (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); - } else if (Ty == LLT::scalar(64)) { + } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { if (X86::GPRRegBankID == RB.getID()) return Isload ? X86::MOV64rm : X86::MOV64mr; if (X86::VECRRegBankID == RB.getID()) @@ -403,6 +302,26 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { + if (Alignment >= 32) + return Isload ? (HasVLX ? X86::VMOVAPSZ256rm + : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX + : X86::VMOVAPSYrm) + : (HasVLX ? X86::VMOVAPSZ256mr + : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX + : X86::VMOVAPSYmr); + else + return Isload ? (HasVLX ? X86::VMOVUPSZ256rm + : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX + : X86::VMOVUPSYrm) + : (HasVLX ? X86::VMOVUPSZ256mr + : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX + : X86::VMOVUPSYmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { + if (Alignment >= 64) + return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; + else + return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } return Opc; } @@ -437,27 +356,37 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } -bool X86InstructionSelector::selectFrameIndex(MachineInstr &I, - MachineRegisterInfo &MRI, - MachineFunction &MF) const { - if (I.getOpcode() != TargetOpcode::G_FRAME_INDEX) +bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + unsigned Opc = I.getOpcode(); + + if (Opc != TargetOpcode::G_FRAME_INDEX && Opc != TargetOpcode::G_GEP) return false; const unsigned DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); - // Use LEA to calculate frame index. + // Use LEA to calculate frame index and GEP unsigned NewOpc; if (Ty == LLT::pointer(0, 64)) NewOpc = X86::LEA64r; else if (Ty == LLT::pointer(0, 32)) NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; else - llvm_unreachable("Can't select G_FRAME_INDEX, unsupported type."); + llvm_unreachable("Can't select G_FRAME_INDEX/G_GEP, unsupported type."); I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); - addOffset(MIB, 0); + + if (Opc == TargetOpcode::G_FRAME_INDEX) { + addOffset(MIB, 0); + } else { + MachineOperand &InxOp = I.getOperand(2); + I.addOperand(InxOp); // set IndexReg + InxOp.ChangeToImmediate(1); // set Scale + MIB.addImm(0).addReg(0); + } return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } @@ -509,8 +438,227 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectTrunc(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_TRUNC) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + + const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); + + if (DstRB.getID() != SrcRB.getID()) { + DEBUG(dbgs() << "G_TRUNC input/output on different banks\n"); + return false; + } + + if (DstRB.getID() != X86::GPRRegBankID) + return false; + + const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); + if (!DstRC) + return false; + + const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); + if (!SrcRC) + return false; + + unsigned SubIdx; + if (DstRC == SrcRC) { + // Nothing to be done + SubIdx = X86::NoSubRegister; + } else if (DstRC == &X86::GR32RegClass) { + SubIdx = X86::sub_32bit; + } else if (DstRC == &X86::GR16RegClass) { + SubIdx = X86::sub_16bit; + } else if (DstRC == &X86::GR8RegClass) { + SubIdx = X86::sub_8bit; + } else { + return false; + } + + SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); + + if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || + !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { + DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); + return false; + } + + I.getOperand(1).setSubReg(SubIdx); + + I.setDesc(TII.get(X86::COPY)); + return true; +} + +bool X86InstructionSelector::selectZext(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_ZEXT) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned SrcReg = I.getOperand(1).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + + if (SrcTy == LLT::scalar(1)) { + + unsigned AndOpc; + if (DstTy == LLT::scalar(32)) + AndOpc = X86::AND32ri8; + else if (DstTy == LLT::scalar(64)) + AndOpc = X86::AND64ri8; + else + return false; + + const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); + unsigned DefReg = + MRI.createVirtualRegister(getRegClassForTypeOnBank(DstTy, RegBank)); + + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::SUBREG_TO_REG), DefReg) + .addImm(0) + .addReg(SrcReg) + .addImm(X86::sub_8bit); + + MachineInstr &AndInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) + .addReg(DefReg) + .addImm(1); + + constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); + + I.eraseFromParent(); + return true; + } + + return false; +} + +bool X86InstructionSelector::selectCmp(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_ICMP) + return false; + + X86::CondCode CC; + bool SwapArgs; + std::tie(CC, SwapArgs) = X86::getX86ConditionCode( + (CmpInst::Predicate)I.getOperand(1).getPredicate()); + unsigned OpSet = X86::getSETFromCond(CC); + + unsigned LHS = I.getOperand(2).getReg(); + unsigned RHS = I.getOperand(3).getReg(); + + if (SwapArgs) + std::swap(LHS, RHS); + + unsigned OpCmp; + LLT Ty = MRI.getType(LHS); + + switch (Ty.getSizeInBits()) { + default: + return false; + case 8: + OpCmp = X86::CMP8rr; + break; + case 16: + OpCmp = X86::CMP16rr; + break; + case 32: + OpCmp = X86::CMP32rr; + break; + case 64: + OpCmp = X86::CMP64rr; + break; + } + + MachineInstr &CmpInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) + .addReg(LHS) + .addReg(RHS); + + MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(OpSet), I.getOperand(0).getReg()); + + constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); + constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI); + + I.eraseFromParent(); + return true; +} + +bool X86InstructionSelector::selectUadde(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_UADDE) + return false; + + const unsigned DstReg = I.getOperand(0).getReg(); + const unsigned CarryOutReg = I.getOperand(1).getReg(); + const unsigned Op0Reg = I.getOperand(2).getReg(); + const unsigned Op1Reg = I.getOperand(3).getReg(); + unsigned CarryInReg = I.getOperand(4).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + + if (DstTy != LLT::scalar(32)) + return false; + + // find CarryIn def instruction. + MachineInstr *Def = MRI.getVRegDef(CarryInReg); + while (Def->getOpcode() == TargetOpcode::G_TRUNC) { + CarryInReg = Def->getOperand(1).getReg(); + Def = MRI.getVRegDef(CarryInReg); + } + + unsigned Opcode; + if (Def->getOpcode() == TargetOpcode::G_UADDE) { + // carry set by prev ADD. + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS) + .addReg(CarryInReg); + + if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI)) + return false; + + Opcode = X86::ADC32rr; + } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) { + // carry is constant, support only 0. + if (*val != 0) + return false; + + Opcode = X86::ADD32rr; + } else + return false; + + MachineInstr &AddInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg) + .addReg(Op0Reg) + .addReg(Op1Reg); + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) + .addReg(X86::EFLAGS); + + if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) || + !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI)) + return false; + + I.eraseFromParent(); + return true; +} + InstructionSelector * -llvm::createX86InstructionSelector(X86Subtarget &Subtarget, +llvm::createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &Subtarget, X86RegisterBankInfo &RBI) { - return new X86InstructionSelector(Subtarget, RBI); + return new X86InstructionSelector(TM, Subtarget, RBI); } diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 2a40399ba5712d515c2034c87aafe1fed400e128..bc73bb1ae8c51d745bc47345458c46254255fa55 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t { TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, - FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP, + FIXUPIMMS_MASKZ, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP, }; struct IntrinsicData { diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index c2dc762fec5eb30ab077329881ec8303c6841429..979aaee110aa409bce3521d5d930481c341f3878 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -34,6 +34,12 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, setLegalizerInfo64bit(); setLegalizerInfoSSE1(); setLegalizerInfoSSE2(); + setLegalizerInfoSSE41(); + setLegalizerInfoAVX(); + setLegalizerInfoAVX2(); + setLegalizerInfoAVX512(); + setLegalizerInfoAVX512DQ(); + setLegalizerInfoAVX512BW(); computeTables(); } @@ -50,10 +56,15 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) for (auto Ty : {s8, s16, s32}) setAction({BinOp, Ty}, Legal); + for (unsigned Op : {G_UADDE}) { + setAction({Op, s32}, Legal); + setAction({Op, 1, s1}, Legal); + } + for (unsigned MemOp : {G_LOAD, G_STORE}) { for (auto Ty : {s8, s16, s32, p0}) setAction({MemOp, Ty}, Legal); @@ -65,12 +76,33 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GEP, p0}, Legal); + setAction({G_GEP, 1, s32}, Legal); + + for (auto Ty : {s1, s8, s16}) + setAction({G_GEP, 1, Ty}, WidenScalar); + // Constants for (auto Ty : {s8, s16, s32, p0}) setAction({TargetOpcode::G_CONSTANT, Ty}, Legal); setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar); + + // Extensions + setAction({G_ZEXT, s32}, Legal); + setAction({G_SEXT, s32}, Legal); + + for (auto Ty : {s1, s8, s16}) { + setAction({G_ZEXT, 1, Ty}, Legal); + setAction({G_SEXT, 1, Ty}, Legal); + } + + // Comparison + setAction({G_ICMP, s1}, Legal); + + for (auto Ty : {s8, s16, s32, p0}) + setAction({G_ICMP, 1, Ty}, Legal); } void X86LegalizerInfo::setLegalizerInfo64bit() { @@ -85,7 +117,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - for (unsigned BinOp : {G_ADD, G_SUB}) + for (unsigned BinOp : {G_ADD, G_SUB, G_MUL}) for (auto Ty : {s8, s16, s32, s64}) setAction({BinOp, Ty}, Legal); @@ -100,11 +132,35 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GEP, p0}, Legal); + setAction({G_GEP, 1, s32}, Legal); + setAction({G_GEP, 1, s64}, Legal); + + for (auto Ty : {s1, s8, s16}) + setAction({G_GEP, 1, Ty}, WidenScalar); + // Constants for (auto Ty : {s8, s16, s32, s64, p0}) setAction({TargetOpcode::G_CONSTANT, Ty}, Legal); setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); + + // Extensions + for (auto Ty : {s32, s64}) { + setAction({G_ZEXT, Ty}, Legal); + setAction({G_SEXT, Ty}, Legal); + } + + for (auto Ty : {s1, s8, s16, s32}) { + setAction({G_ZEXT, 1, Ty}, Legal); + setAction({G_SEXT, 1, Ty}, Legal); + } + + // Comparison + setAction({G_ICMP, s1}, Legal); + + for (auto Ty : {s8, s16, s32, s64, p0}) + setAction({G_ICMP, 1, Ty}, Legal); } void X86LegalizerInfo::setLegalizerInfoSSE1() { @@ -129,6 +185,8 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() { return; const LLT s64 = LLT::scalar(64); + const LLT v16s8 = LLT::vector(16, 8); + const LLT v8s16 = LLT::vector(8, 16); const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); @@ -137,6 +195,117 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() { setAction({BinOp, Ty}, Legal); for (unsigned BinOp : {G_ADD, G_SUB}) - for (auto Ty : {v4s32}) + for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) setAction({BinOp, Ty}, Legal); + + setAction({G_MUL, v8s16}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoSSE41() { + if (!Subtarget.hasSSE41()) + return; + + const LLT v4s32 = LLT::vector(4, 32); + + setAction({G_MUL, v4s32}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoAVX() { + if (!Subtarget.hasAVX()) + return; + + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v8s32, v4s64}) + setAction({MemOp, Ty}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoAVX2() { + if (!Subtarget.hasAVX2()) + return; + + const LLT v32s8 = LLT::vector(32, 8); + const LLT v16s16 = LLT::vector(16, 16); + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) + setAction({BinOp, Ty}, Legal); + + for (auto Ty : {v16s16, v8s32}) + setAction({G_MUL, Ty}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoAVX512() { + if (!Subtarget.hasAVX512()) + return; + + const LLT v16s32 = LLT::vector(16, 32); + const LLT v8s64 = LLT::vector(8, 64); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v16s32, v8s64}) + setAction({BinOp, Ty}, Legal); + + setAction({G_MUL, v16s32}, Legal); + + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v16s32, v8s64}) + setAction({MemOp, Ty}, Legal); + + /************ VLX *******************/ + if (!Subtarget.hasVLX()) + return; + + const LLT v4s32 = LLT::vector(4, 32); + const LLT v8s32 = LLT::vector(8, 32); + + for (auto Ty : {v4s32, v8s32}) + setAction({G_MUL, Ty}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoAVX512DQ() { + if (!(Subtarget.hasAVX512() && Subtarget.hasDQI())) + return; + + const LLT v8s64 = LLT::vector(8, 64); + + setAction({G_MUL, v8s64}, Legal); + + /************ VLX *******************/ + if (!Subtarget.hasVLX()) + return; + + const LLT v2s64 = LLT::vector(2, 64); + const LLT v4s64 = LLT::vector(4, 64); + + for (auto Ty : {v2s64, v4s64}) + setAction({G_MUL, Ty}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoAVX512BW() { + if (!(Subtarget.hasAVX512() && Subtarget.hasBWI())) + return; + + const LLT v64s8 = LLT::vector(64, 8); + const LLT v32s16 = LLT::vector(32, 16); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v64s8, v32s16}) + setAction({BinOp, Ty}, Legal); + + setAction({G_MUL, v32s16}, Legal); + + /************ VLX *******************/ + if (!Subtarget.hasVLX()) + return; + + const LLT v8s16 = LLT::vector(8, 16); + const LLT v16s16 = LLT::vector(16, 16); + + for (auto Ty : {v8s16, v16s16}) + setAction({G_MUL, Ty}, Legal); } diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h index 3f00898b42322b27dc65db16ad3995f84a3be628..135950a95f84454dc06ba5fc930c22b0f318dbe5 100644 --- a/lib/Target/X86/X86LegalizerInfo.h +++ b/lib/Target/X86/X86LegalizerInfo.h @@ -38,6 +38,12 @@ private: void setLegalizerInfo64bit(); void setLegalizerInfoSSE1(); void setLegalizerInfoSSE2(); + void setLegalizerInfoSSE41(); + void setLegalizerInfoAVX(); + void setLegalizerInfoAVX2(); + void setLegalizerInfoAVX512(); + void setLegalizerInfoAVX512DQ(); + void setLegalizerInfoAVX512BW(); }; } // namespace llvm #endif diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 55b090b67640f4b65b56203b32686d115199ca32..33bc8e11a572910c32a980c75c481949bb0ab9c7 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,20 +12,21 @@ // //===----------------------------------------------------------------------===// -#include "X86AsmPrinter.h" -#include "X86RegisterInfo.h" -#include "X86ShuffleDecodeConstantPool.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "InstPrinter/X86InstComments.h" #include "MCTargetDesc/X86BaseInfo.h" #include "Utils/X86ShuffleDecode.h" +#include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" +#include "X86ShuffleDecodeConstantPool.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalValue.h" @@ -38,13 +39,12 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -102,7 +102,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( } void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { - OutStreamer->EmitInstruction(Inst, getSubtargetInfo()); + OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo); SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get()); } @@ -1040,6 +1040,83 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, getSubtargetInfo()); } +void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { + assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64"); + + // We want to emit the following pattern, which follows the x86 calling + // convention to prepare for the trampoline call to be patched in. + // + // + // .p2align 1, ... + // .Lxray_event_sled_N: + // jmp +N // jump across the call instruction + // callq __xray_CustomEvent // force relocation to symbol + // + // + // The relative jump needs to jump forward 24 bytes: + // 10 (args) + 5 (nops) + 9 (cleanup) + // + // After patching, it would look something like: + // + // nopw (2-byte nop) + // callq __xrayCustomEvent // already lowered + // + // --- + // First we emit the label and the jump. + auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); + OutStreamer->AddComment("# XRay Custom Event Log"); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x14"); + + // The default C calling convention will place two arguments into %rcx and + // %rdx -- so we only work with those. + unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX}; + + // Because we will use %rax, we preserve that across the call. + EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX)); + + // Then we put the operands in the %rdi and %rsi registers. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { + if (Op->isImm()) + EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) + .addReg(UsedRegs[I]) + .addImm(Op->getImm())); + else if (Op->isReg()) { + if (Op->getReg() != UsedRegs[I]) + EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) + .addReg(UsedRegs[I]) + .addReg(Op->getReg())); + else + EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo()); + } + } + + // We emit a hard dependency on the __xray_CustomEvent symbol, which is the + // name of the trampoline to be implemented by the XRay runtime. We put this + // explicitly in the %rax register. + auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); + MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); + EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) + .addReg(X86::RAX) + .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); + + // Emit the call instruction. + EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX)); + + // Restore caller-saved and used registers. + OutStreamer->AddComment("xray custom event end."); + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX)); + + recordSled(CurSled, MI, SledKind::CUSTOM_EVENT); +} + void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: @@ -1415,6 +1492,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case TargetOpcode::PATCHABLE_TAIL_CALL: return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_EVENT_CALL: + return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); @@ -1529,7 +1609,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector Mask; DecodePSHUFBMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask), + !EnablePrintSchedInfo); } break; } @@ -1600,7 +1681,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector Mask; DecodeVPERMILPMask(C, ElSize, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask), + !EnablePrintSchedInfo); } break; } @@ -1630,7 +1712,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector Mask; DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask), + !EnablePrintSchedInfo); } break; } @@ -1646,7 +1729,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallVector Mask; DecodeVPPERMMask(C, Mask); if (!Mask.empty()) - OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask)); + OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask), + !EnablePrintSchedInfo); } break; } @@ -1706,7 +1790,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { CS << "?"; } CS << "]"; - OutStreamer->AddComment(CS.str()); + OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); } else if (auto *CV = dyn_cast(C)) { CS << "<"; for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { @@ -1738,7 +1822,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } } CS << ">"; - OutStreamer->AddComment(CS.str()); + OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); } } break; diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index debb192732e5c3c540b663a9f4c9140e5da847b1..e6756b975c10cff2cb141d461ec3fb7c68aad7cb 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -27,6 +27,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -262,6 +264,12 @@ private: /// \brief Removes redundant address calculations. bool removeRedundantAddrCalc(MemOpMap &LEAs); + /// Replace debug value MI with a new debug value instruction using register + /// VReg with an appropriate offset and DIExpression to incorporate the + /// address displacement AddrDispShift. Return new debug value instruction. + MachineInstr *replaceDebugValue(MachineInstr &MI, unsigned VReg, + int64_t AddrDispShift); + /// \brief Removes LEAs which calculate similar addresses. bool removeRedundantLEAs(MemOpMap &LEAs); @@ -532,6 +540,25 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { return Changed; } +MachineInstr *OptimizeLEAPass::replaceDebugValue(MachineInstr &MI, + unsigned VReg, + int64_t AddrDispShift) { + DIExpression *Expr = const_cast(MI.getDebugExpression()); + + if (AddrDispShift != 0) + Expr = DIExpression::prepend(Expr, DIExpression::NoDeref, AddrDispShift, + DIExpression::WithStackValue); + + // Replace DBG_VALUE instruction with modified version. + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + bool IsIndirect = MI.isIndirectDebugValue(); + int64_t Offset = IsIndirect ? MI.getOperand(1).getImm() : 0; + const MDNode *Var = MI.getDebugVariable(); + return BuildMI(*MBB, MBB->erase(&MI), DL, TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, VReg, Offset, Var, Expr); +} + // Try to find similar LEAs in the list and replace one with another. bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { bool Changed = false; @@ -563,13 +590,21 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { // Loop over all uses of the Last LEA and update their operands. Note // that the correctness of this has already been checked in the // isReplaceable function. + unsigned FirstVReg = First.getOperand(0).getReg(); unsigned LastVReg = Last.getOperand(0).getReg(); - for (auto UI = MRI->use_nodbg_begin(LastVReg), - UE = MRI->use_nodbg_end(); + for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end(); UI != UE;) { MachineOperand &MO = *UI++; MachineInstr &MI = *MO.getParent(); + if (MI.isDebugValue()) { + // Replace DBG_VALUE instruction with modified version using the + // register from the replacing LEA and the address displacement + // between the LEA instructions. + replaceDebugValue(MI, FirstVReg, AddrDispShift); + continue; + } + // Get the number of the first memory operand. const MCInstrDesc &Desc = MI.getDesc(); int MemOpNo = @@ -577,7 +612,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { X86II::getOperandBias(Desc); // Update address base. - MO.setReg(First.getOperand(0).getReg()); + MO.setReg(FirstVReg); // Update address disp. MachineOperand &Op = MI.getOperand(MemOpNo + X86::AddrDisp); @@ -587,11 +622,8 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { Op.setOffset(Op.getOffset() + AddrDispShift); } - // Mark debug values referring to Last LEA as undefined. - MRI->markUsesInDebugValueAsUndef(LastVReg); - // Since we can possibly extend register lifetime, clear kill flags. - MRI->clearKillFlags(First.getOperand(0).getReg()); + MRI->clearKillFlags(FirstVReg); ++NumRedundantLEAs; DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump();); diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp index d395c826e6bf7a050bdd8b5ffdd9b65eb32bf3c6..efd3df26dd424f95a225a010ad613ac6bbefe605 100644 --- a/lib/Target/X86/X86RegisterBankInfo.cpp +++ b/lib/Target/X86/X86RegisterBankInfo.cpp @@ -68,6 +68,7 @@ X86GenRegisterBankInfo::PartialMappingIdx X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) { if ((Ty.isScalar() && !isFP) || Ty.isPointer()) { switch (Ty.getSizeInBits()) { + case 1: case 8: return PMI_GPR8; case 16: @@ -138,8 +139,9 @@ bool X86RegisterBankInfo::getInstrValueMapping( return true; } -RegisterBankInfo::InstructionMapping -X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) { +const RegisterBankInfo::InstructionMapping & +X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, + bool isFP) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -151,10 +153,10 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) { llvm_unreachable("Unsupported operand mapping yet."); auto Mapping = getValueMapping(getPartialMappingIdx(Ty, isFP), 3); - return InstructionMapping{DefaultMappingID, 1, Mapping, NumOperands}; + return getInstructionMapping(DefaultMappingID, 1, Mapping, NumOperands); } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -163,7 +165,7 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. if (!isPreISelGenericOpcode(Opc)) { - InstructionMapping Mapping = getInstrMappingImpl(MI); + const InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; } @@ -192,10 +194,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Finally construct the computed mapping. SmallVector OpdsMapping(NumOperands); if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping)) - return InstructionMapping(); + return getInvalidInstructionMapping(); - return InstructionMapping{DefaultMappingID, /* Cost */ 1, - getOperandsMapping(OpdsMapping), NumOperands}; + return getInstructionMapping(DefaultMappingID, /* Cost */ 1, + getOperandsMapping(OpdsMapping), NumOperands); } void X86RegisterBankInfo::applyMappingImpl( @@ -230,10 +232,10 @@ X86RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping)) break; - RegisterBankInfo::InstructionMapping Mapping = InstructionMapping{ - /*ID*/ 1, /*Cost*/ 1, getOperandsMapping(OpdsMapping), NumOperands}; + const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping( + /*ID*/ 1, /*Cost*/ 1, getOperandsMapping(OpdsMapping), NumOperands); InstructionMappings AltMappings; - AltMappings.emplace_back(std::move(Mapping)); + AltMappings.push_back(&Mapping); return AltMappings; } default: diff --git a/lib/Target/X86/X86RegisterBankInfo.h b/lib/Target/X86/X86RegisterBankInfo.h index a1e01a9ab94978391d7fcc74d7c891739e72943f..e227880427f3c4a33a89a1a5437ba740a0833288 100644 --- a/lib/Target/X86/X86RegisterBankInfo.h +++ b/lib/Target/X86/X86RegisterBankInfo.h @@ -46,8 +46,8 @@ private: /// Get an instruction mapping. /// \return An InstructionMappings with a statically allocated /// OperandsMapping. - static InstructionMapping getSameOperandsMapping(const MachineInstr &MI, - bool isFP); + const InstructionMapping &getSameOperandsMapping(const MachineInstr &MI, + bool isFP) const; /// Track the bank of each instruction operand(register) static void @@ -74,7 +74,8 @@ public: /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; }; } // namespace llvm diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 9bab9a4cf3ba4a649a0ea9643da938a918af2071..7e4cba1c8345f89e720e2c05a067b8dc9d1c25ea 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -137,25 +137,29 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case X86::FR32RegClassID: case X86::FR64RegClassID: // If AVX-512 isn't supported we should only inflate to these classes. - if (!Subtarget.hasAVX512() && Super->getSize() == RC->getSize()) + if (!Subtarget.hasAVX512() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128RegClassID: case X86::VR256RegClassID: // If VLX isn't supported we should only inflate to these classes. - if (!Subtarget.hasVLX() && Super->getSize() == RC->getSize()) + if (!Subtarget.hasVLX() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::VR128XRegClassID: case X86::VR256XRegClassID: // If VLX isn't support we shouldn't inflate to these classes. - if (Subtarget.hasVLX() && Super->getSize() == RC->getSize()) + if (Subtarget.hasVLX() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::FR32XRegClassID: case X86::FR64XRegClassID: // If AVX-512 isn't support we shouldn't inflate to these classes. - if (Subtarget.hasAVX512() && Super->getSize() == RC->getSize()) + if (Subtarget.hasAVX512() && + getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; break; case X86::GR8RegClassID: @@ -168,7 +172,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, case X86::VR512RegClassID: // Don't return a super-class that would shrink the spill size. // That can happen with the vector and float classes. - if (Super->getSize() == RC->getSize()) + if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC)) return Super; } Super = *I++; @@ -272,7 +276,14 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool HasAVX512 = Subtarget.hasAVX512(); bool CallsEHReturn = MF->callsEHReturn(); - switch (MF->getFunction()->getCallingConv()) { + CallingConv::ID CC = MF->getFunction()->getCallingConv(); + + // If attribute NoCallerSavedRegisters exists then we set X86_INTR calling + // convention because it has the CSR list. + if (MF->getFunction()->hasFnAttribute("no_caller_saved_registers")) + CC = CallingConv::X86_INTR; + + switch (CC) { case CallingConv::GHC: case CallingConv::HiPE: return CSR_NoRegs_SaveList; @@ -309,14 +320,14 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { case CallingConv::X86_RegCall: if (Is64Bit) { if (IsWin64) { - return (HasSSE ? CSR_Win64_RegCall_SaveList : + return (HasSSE ? CSR_Win64_RegCall_SaveList : CSR_Win64_RegCall_NoSSE_SaveList); } else { - return (HasSSE ? CSR_SysV64_RegCall_SaveList : + return (HasSSE ? CSR_SysV64_RegCall_SaveList : CSR_SysV64_RegCall_NoSSE_SaveList); } } else { - return (HasSSE ? CSR_32_RegCall_SaveList : + return (HasSSE ? CSR_32_RegCall_SaveList : CSR_32_RegCall_NoSSE_SaveList); } case CallingConv::Cold: @@ -424,15 +435,15 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_64_HHVM_RegMask; case CallingConv::X86_RegCall: if (Is64Bit) { - if (IsWin64) { - return (HasSSE ? CSR_Win64_RegCall_RegMask : + if (IsWin64) { + return (HasSSE ? CSR_Win64_RegCall_RegMask : CSR_Win64_RegCall_NoSSE_RegMask); } else { - return (HasSSE ? CSR_SysV64_RegCall_RegMask : + return (HasSSE ? CSR_SysV64_RegCall_RegMask : CSR_SysV64_RegCall_NoSSE_RegMask); } } else { - return (HasSSE ? CSR_32_RegCall_RegMask : + return (HasSSE ? CSR_32_RegCall_RegMask : CSR_32_RegCall_NoSSE_RegMask); } case CallingConv::Cold: @@ -669,32 +680,28 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - unsigned BasePtr; - unsigned Opc = MI.getOpcode(); - bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm || - Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64; - - if (hasBasePointer(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister()); - else if (needsStackRealignment(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); - else if (AfterFPPop) - BasePtr = StackPtr; - else - BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); + // Determine base register and offset. + int FIOffset; + unsigned BasePtr; + if (MI.isReturn()) { + assert((!needsStackRealignment(MF) || + MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) && + "Return instruction can only reference SP relative frame objects"); + FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); + } else { + FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); + } // LOCAL_ESCAPE uses a single offset, with no register. It only works in the // simple FP case, and doesn't work with stack realignment. On 32-bit, the // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. - unsigned IgnoredFrameReg; + unsigned Opc = MI.getOpcode(); if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); - int Offset; - Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - FI.ChangeToImmediate(Offset); + FI.ChangeToImmediate(FIOffset); return; } @@ -710,15 +717,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // FrameIndex with base register. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); - // Now add the frame object offset to the offset from EBP. - int FIOffset; - if (AfterFPPop) { - // Tail call jmp happens after FP is popped. - const MachineFrameInfo &MFI = MF.getFrameInfo(); - FIOffset = MFI.getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea(); - } else - FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - if (BasePtr == StackPtr) FIOffset += SPAdj; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 58fa31e94fba483370074305a361dbd8dec3a258..25958f0c310647e3146be43edae02debf0cf2c3a 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,11 @@ public: unsigned getPtrSizedFrameRegister(const MachineFunction &MF) const; unsigned getStackRegister() const { return StackPtr; } unsigned getBaseRegister() const { return BasePtr; } + /// Returns physical register used as frame pointer. + /// This will always returns the frame pointer register, contrary to + /// getFrameRegister() which returns the "base pointer" in situations + /// involving a stack, frame and base pointer. + unsigned getFramePtr() const { return FramePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } }; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index b8cae2f0bd26a5678447338ac66d50b02cbb931d..3a61a7247c72c8d6558fbc81117ed37671730cc2 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -437,8 +437,9 @@ def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>; def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32, (add LOW32_ADDR_ACCESS, RBP)>; -// A class to support the 'A' assembler constraint: EAX then EDX. +// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX. def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>; +def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>; // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>; @@ -510,7 +511,7 @@ def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], 256, (sequence "YMM%u", 0, 31)>; // Mask registers -def VK1 : RegisterClass<"X86", [i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;} +def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;} def VK2 : RegisterClass<"X86", [v2i1], 16, (add VK1)> {let Size = 16;} def VK4 : RegisterClass<"X86", [v4i1], 16, (add VK2)> {let Size = 16;} def VK8 : RegisterClass<"X86", [v8i1], 16, (add VK4)> {let Size = 16;} @@ -518,7 +519,7 @@ def VK16 : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;} def VK32 : RegisterClass<"X86", [v32i1], 32, (add VK16)> {let Size = 32;} def VK64 : RegisterClass<"X86", [v64i1], 64, (add VK32)> {let Size = 64;} -def VK1WM : RegisterClass<"X86", [i1], 16, (sub VK1, K0)> {let Size = 16;} +def VK1WM : RegisterClass<"X86", [v1i1], 16, (sub VK1, K0)> {let Size = 16;} def VK2WM : RegisterClass<"X86", [v2i1], 16, (sub VK2, K0)> {let Size = 16;} def VK4WM : RegisterClass<"X86", [v4i1], 16, (sub VK4, K0)> {let Size = 16;} def VK8WM : RegisterClass<"X86", [v8i1], 16, (sub VK8, K0)> {let Size = 16;} diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 677e82459766d510a265a966c8fd0bb83a0bab2d..03c8ccb53afeb01813028771ac22a666d5b0c020 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -1488,6 +1488,39 @@ def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; //-- Arithmetic instructions --// +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} +// v <- v,m. +def : WriteRes { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; +} + // PHADD|PHSUB (S) W/D. // v <- v,v. def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index eca65c2892b7e0cd61aa93806b781c3baf703df7..b8ec5883152c3955f149ed20753bd7c2b901985e 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -157,6 +157,31 @@ def : WriteRes { let ResourceCycles = [1, 1, 1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 7f7efd7cad3f615eb859b86d476e3d9b464cd303..a12fa68faf4f167ea036a705605598a6eadcf443 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -77,6 +77,10 @@ defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } +// Horizontal Add/Sub (float and integer) +defm WriteFHAdd : X86SchedWritePair; +defm WritePHAdd : X86SchedWritePair; + // Vector integer operations. defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. @@ -497,6 +501,7 @@ def IIC_IN_RI : InstrItinClass; def IIC_OUT_RR : InstrItinClass; def IIC_OUT_IR : InstrItinClass; def IIC_INS : InstrItinClass; +def IIC_LWP : InstrItinClass; def IIC_MOV_REG_DR : InstrItinClass; def IIC_MOV_DR_REG : InstrItinClass; def IIC_MOV_REG_CR : InstrItinClass; diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index ce1ece34e431a872d65d22a4938a91dda0ee0312..6cb2a3694d92ec3a31f5aa174ef44bdc7bc0d4ae 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -319,6 +319,38 @@ def : WriteRes { let ResourceCycles = [1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { + let Latency = 3; +} + +def : WriteRes { + let Latency = 8; +} + +def : WriteRes { + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1]; +} + +def WriteFHAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFHAddY], (instregex "VH(ADD|SUB)P(S|D)Yrr")>; + +def WriteFHAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFHAddYLd], (instregex "VH(ADD|SUB)P(S|D)Yrm")>; + //////////////////////////////////////////////////////////////////////////////// // Carry-less multiplication instructions. //////////////////////////////////////////////////////////////////////////////// diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index f95d4fa041774410fca9f143c13b5956adf96cfe..03ed2db2350ddc5c99ef0376f89abd0b862fc22c 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -137,6 +137,33 @@ defm : SMWriteResPair; defm : SMWriteResPair; defm : SMWriteResPair; +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD + +def : WriteRes { + let Latency = 3; + let ResourceCycles = [2]; +} + +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// PHADD|PHSUB (S) W/D. +def : WriteRes { + let Latency = 1; + let ResourceCycles = [1]; +} + +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 9da8a18965ea60de27dfc6ac8b6fea4abfc0cb23..c67aa04aebeab8aedac14c797ada0458f54b65e9 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "X86InstrInfo.h" +#include "X86SelectionDAGInfo.h" #include "X86ISelLowering.h" +#include "X86InstrInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" -#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Target/TargetLowering.h" @@ -44,8 +44,26 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( return false; } +namespace { + +// Represents a cover of a buffer of Size bytes with Count() blocks of type AVT +// (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is +// always smaller than the block size). +struct RepMovsRepeats { + RepMovsRepeats(uint64_t Size) : Size(Size) {} + + uint64_t Count() const { return Size / UBytes(); } + uint64_t BytesLeft() const { return Size % UBytes(); } + uint64_t UBytes() const { return AVT.getSizeInBits() / 8; } + + const uint64_t Size; + MVT AVT = MVT::i8; +}; + +} // namespace + SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( - SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -69,10 +87,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( if ((Align & 3) != 0 || !ConstantSize || ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast(Src); + ConstantSDNode *ValC = dyn_cast(Val); - if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { + if (const char *bzeroEntry = ValC && + ValC->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); @@ -104,9 +122,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue InFlag; EVT AVT; SDValue Count; - ConstantSDNode *ValC = dyn_cast(Src); + ConstantSDNode *ValC = dyn_cast(Val); unsigned BytesLeft = 0; - bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; @@ -148,7 +165,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( } else { AVT = MVT::i8; Count = DAG.getIntPtrConstant(SizeVal, dl); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag); InFlag = Chain.getValue(1); } @@ -163,20 +180,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, - CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - } else if (BytesLeft) { + if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); @@ -185,7 +189,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, dl, AddrVT)), - Src, + Val, DAG.getConstant(BytesLeft, dl, SizeVT), Align, isVolatile, false, DstPtrInfo.getWithOffset(Offset)); @@ -206,8 +210,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getMachineFunction().getSubtarget(); if (!ConstantSize) return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) + RepMovsRepeats Repeats(ConstantSize->getZExtValue()); + if (!AlwaysInline && Repeats.Size > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However @@ -228,26 +232,31 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( if (isBaseRegConflictPossible(DAG, ClobberSet)) return SDValue(); - MVT AVT; - if (Align & 1) - AVT = MVT::i8; - else if (Align & 2) - AVT = MVT::i16; - else if (Align & 4) - // DWORD aligned - AVT = MVT::i32; - else - // QWORD aligned - AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal, dl); - unsigned BytesLeft = SizeVal % UBytes; + // If the target has enhanced REPMOVSB, then it's at least as fast to use + // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle + // BytesLeft. + if (!Subtarget.hasERMSB() && !(Align & 1)) { + if (Align & 2) + // WORD aligned + Repeats.AVT = MVT::i16; + else if (Align & 4) + // DWORD aligned + Repeats.AVT = MVT::i32; + else + // QWORD aligned + Repeats.AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; + + if (Repeats.BytesLeft() > 0 && + DAG.getMachineFunction().getFunction()->optForMinSize()) { + // When agressively optimizing for size, avoid generating the code to + // handle BytesLeft. + Repeats.AVT = MVT::i8; + } + } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, - Count, InFlag); + DAG.getIntPtrConstant(Repeats.Count(), dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); @@ -257,14 +266,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue Ops[] = { Chain, DAG.getValueType(Repeats.AVT), InFlag }; SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector Results; Results.push_back(RepMovs); - if (BytesLeft) { + if (Repeats.BytesLeft()) { // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; + unsigned Offset = Repeats.Size - Repeats.BytesLeft(); EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); @@ -275,7 +284,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), + DAG.getConstant(Repeats.BytesLeft(), dl, + SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 92a68759195c8850afcd0da7a013aae13232378f..e36a47506ba066845fe5efc110827972f20a6c23 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MCTargetDesc/X86BaseInfo.h" #include "X86Subtarget.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Attributes.h" @@ -139,12 +139,18 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV, return X86II::MO_NO_FLAG; assert(!isTargetCOFF()); + const Function *F = dyn_cast_or_null(GV); - if (isTargetELF()) + if (isTargetELF()) { + if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv())) + // According to psABI, PLT stub clobbers XMM8-XMM15. + // In Regcall calling convention those registers are used for passing + // parameters. Thus we need to prevent lazy binding in Regcall. + return X86II::MO_GOTPCREL; return X86II::MO_PLT; + } if (is64Bit()) { - auto *F = dyn_cast_or_null(GV); if (F && F->hasFnAttribute(Attribute::NonLazyBind)) // If the function is marked as non-lazy, generate an indirect call // which loads from the GOT directly. This avoids runtime overhead @@ -265,6 +271,7 @@ void X86Subtarget::initializeEnvironment() { HasFMA4 = false; HasXOP = false; HasTBM = false; + HasLWP = false; HasMOVBE = false; HasRDRAND = false; HasF16C = false; @@ -279,6 +286,7 @@ void X86Subtarget::initializeEnvironment() { HasCDI = false; HasPFI = false; HasDQI = false; + HasVPOPCNTDQ = false; HasBWI = false; HasVLX = false; HasADX = false; @@ -290,6 +298,9 @@ void X86Subtarget::initializeEnvironment() { HasMWAITX = false; HasCLZERO = false; HasMPX = false; + HasSGX = false; + HasCLFLUSHOPT = false; + HasCLWB = false; IsBTMemSlow = false; IsPMULLDSlow = false; IsSHLDSlow = false; @@ -303,12 +314,14 @@ void X86Subtarget::initializeEnvironment() { HasFastVectorFSQRT = false; HasFastLZCNT = false; HasFastSHLDRotate = false; + HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false; SlowLEA = false; + Slow3OpsLEA = false; SlowIncDec = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? @@ -334,8 +347,8 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS, TargetTriple.getEnvironment() != Triple::CODE16), In16BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() == Triple::CODE16), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) { + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + FrameLowering(*this, getStackAlignment()) { // Determine the PICStyle based on the target selected. if (!isPositionIndependent()) setPICStyle(PICStyles::None); diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index c2c95658482d96c30d7693f607219b5525e5d01d..550e95c39ab5cc2d4f56cccb587f46d0e64185af 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -124,6 +124,9 @@ protected: /// Target has TBM instructions. bool HasTBM; + /// Target has LWP instructions + bool HasLWP; + /// True if the processor has the MOVBE instruction. bool HasMOVBE; @@ -232,6 +235,9 @@ protected: /// True if SHLD based rotate is fast. bool HasFastSHLDRotate; + /// True if the processor has enhanced REP MOVSB/STOSB. + bool HasERMSB; + /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; @@ -247,6 +253,11 @@ protected: /// True if the LEA instruction with certain arguments is slow bool SlowLEA; + /// True if the LEA instruction has all three source operands: base, index, + /// and offset or if the LEA instruction uses base and index registers where + /// the base is EBP, RBP,or R13 + bool Slow3OpsLEA; + /// True if INC and DEC instructions are slow when writing to flags bool SlowIncDec; @@ -259,6 +270,9 @@ protected: /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; + /// Processor has AVX-512 population count Instructions + bool HasVPOPCNTDQ; + /// Processor has AVX-512 Doubleword and Quadword instructions bool HasDQI; @@ -440,6 +454,7 @@ public: bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } bool hasTBM() const { return HasTBM; } + bool hasLWP() const { return HasLWP; } bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } @@ -472,14 +487,17 @@ public: bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } + bool slow3OpsLEA() const { return Slow3OpsLEA; } bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } + bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } @@ -624,6 +642,9 @@ public: /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } + // TODO: Update the regression tests and return true. + bool supportPrintSchedInfo() const override { return false; } + bool enableEarlyIfConversion() const override; /// Return the instruction itineraries based on the subtarget selection. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 03a1958121ab8e32c7cb9ed46e148e32de01a6af..278b57eb00b74d9245fbe9aaf5ca74a364c40e46 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -24,8 +24,8 @@ #include "X86TargetObjectFile.h" #include "X86TargetTransformInfo.h" #include "llvm/ADT/Optional.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -61,6 +61,7 @@ static cl::opt EnableMachineCombinerPass("x86-machine-combiner", namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); +void initializeFixupLEAPassPass(PassRegistry &); void initializeX86ExecutionDepsFixPass(PassRegistry &); } // end namespace llvm @@ -75,6 +76,7 @@ extern "C" void LLVMInitializeX86Target() { initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); initializeEvexToVexInstPassPass(PR); + initializeFixupLEAPassPass(PR); initializeX86ExecutionDepsFixPass(PR); } @@ -87,7 +89,7 @@ static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSFreeBSD()) return llvm::make_unique(); - if (TT.isOSLinux() || TT.isOSNaCl()) + if (TT.isOSLinux() || TT.isOSNaCl() || TT.isOSIAMCU()) return llvm::make_unique(); if (TT.isOSFuchsia()) return llvm::make_unique(); @@ -286,7 +288,8 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const { auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI)); + GISel->InstSelector.reset(createX86InstructionSelector( + *this, *I, *RBI)); #endif I->setGISelAccessor(*GISel); } @@ -320,7 +323,7 @@ namespace { /// X86 Code Generator Pass Configuration Options. class X86PassConfig : public TargetPassConfig { public: - X86PassConfig(X86TargetMachine *TM, PassManagerBase &PM) + X86PassConfig(X86TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} X86TargetMachine &getX86TargetMachine() const { @@ -366,16 +369,16 @@ INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix", "X86 Execution Dependency Fix", false, false) TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { - return new X86PassConfig(this, PM); + return new X86PassConfig(*this, PM); } void X86PassConfig::addIRPasses() { - addPass(createAtomicExpandPass(&getX86TargetMachine())); + addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createInterleavedAccessPass(TM)); + addPass(createInterleavedAccessPass()); } bool X86PassConfig::addInstSelector() { @@ -430,6 +433,7 @@ bool X86PassConfig::addPreISel() { void X86PassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); addPass(createX86OptimizeLEAs()); addPass(createX86CallFrameOptimization()); diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index cf933f52604ef8f35f4841e7a1f6d1706b757a59..1bf267d34ec2c39cd300a17a387a2fea614f5570 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -49,6 +49,10 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isMachineVerifierClean() const override { + return false; + } }; } // end namespace llvm diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 7f70829cb6c60ac9e7148f03673b40898ee83ff5..4fd95717478e9987ed9258db4ffe8feeb3495648 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -9,6 +9,8 @@ #include "X86TargetObjectFile.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCContext.h" @@ -16,8 +18,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Support/COFF.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index b742fb472372cd9f84c6159c6a98d7610caf94a0..11ba7025e1b73a29b3fed4c9e83762b66e8a55aa 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -247,35 +247,38 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry SSE2UniformConstCostTable[] = { - { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. - { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. - { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. - - { ISD::SHL, MVT::v32i8, 4 }, // 2*(psllw + pand). - { ISD::SRL, MVT::v32i8, 4 }, // 2*(psrlw + pand). - { ISD::SRA, MVT::v32i8, 8 }, // 2*(psrlw, pand, pxor, psubb). - - { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence - { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence - { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence - { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence - { ISD::SDIV, MVT::v8i32, 38 }, // pmuludq sequence - { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence - { ISD::UDIV, MVT::v8i32, 30 }, // pmuludq sequence - { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence + { ISD::SHL, MVT::v16i8, 2 }, // psllw + pand. + { ISD::SRL, MVT::v16i8, 2 }, // psrlw + pand. + { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. + + { ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split. + { ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split. + { ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split. + + { ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split. + { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence + { ISD::UDIV, MVT::v16i16, 12+2 }, // 2*pmulhuw sequence + split. + { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence + { ISD::SDIV, MVT::v8i32, 38+2 }, // 2*pmuludq sequence + split. + { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence + { ISD::UDIV, MVT::v8i32, 30+2 }, // 2*pmuludq sequence + split. + { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && ST->hasSSE2()) { // pmuldq sequence. if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) - return LT.first * 30; + return LT.first * 32; if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) return LT.first * 15; - if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD, - LT.second)) - return LT.first * Entry->Cost; + // XOP has faster vXi8 shifts. + if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) || + !ST->hasXOP()) + if (const auto *Entry = + CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; } static const CostTblEntry AVX2UniformCostTable[] = { @@ -430,18 +433,18 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::SRL, MVT::v2i64, 2 }, { ISD::SRA, MVT::v2i64, 2 }, // 256bit shifts require splitting if AVX2 didn't catch them above. - { ISD::SHL, MVT::v32i8, 2 }, - { ISD::SRL, MVT::v32i8, 4 }, - { ISD::SRA, MVT::v32i8, 4 }, - { ISD::SHL, MVT::v16i16, 2 }, - { ISD::SRL, MVT::v16i16, 4 }, - { ISD::SRA, MVT::v16i16, 4 }, - { ISD::SHL, MVT::v8i32, 2 }, - { ISD::SRL, MVT::v8i32, 4 }, - { ISD::SRA, MVT::v8i32, 4 }, - { ISD::SHL, MVT::v4i64, 2 }, - { ISD::SRL, MVT::v4i64, 4 }, - { ISD::SRA, MVT::v4i64, 4 }, + { ISD::SHL, MVT::v32i8, 2+2 }, + { ISD::SRL, MVT::v32i8, 4+2 }, + { ISD::SRA, MVT::v32i8, 4+2 }, + { ISD::SHL, MVT::v16i16, 2+2 }, + { ISD::SRL, MVT::v16i16, 4+2 }, + { ISD::SRA, MVT::v16i16, 4+2 }, + { ISD::SHL, MVT::v8i32, 2+2 }, + { ISD::SRL, MVT::v8i32, 4+2 }, + { ISD::SRA, MVT::v8i32, 4+2 }, + { ISD::SHL, MVT::v4i64, 2+2 }, + { ISD::SRL, MVT::v4i64, 4+2 }, + { ISD::SRA, MVT::v4i64, 4+2 }, }; // Look for XOP lowering tricks. @@ -451,23 +454,28 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE2UniformShiftCostTable[] = { // Uniform splats are cheaper for the following instructions. - { ISD::SHL, MVT::v16i16, 2 }, // psllw. - { ISD::SHL, MVT::v8i32, 2 }, // pslld - { ISD::SHL, MVT::v4i64, 2 }, // psllq. - - { ISD::SRL, MVT::v16i16, 2 }, // psrlw. - { ISD::SRL, MVT::v8i32, 2 }, // psrld. - { ISD::SRL, MVT::v4i64, 2 }, // psrlq. - - { ISD::SRA, MVT::v16i16, 2 }, // psraw. - { ISD::SRA, MVT::v8i32, 2 }, // psrad. - { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle. - { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle. + { ISD::SHL, MVT::v16i16, 2+2 }, // 2*psllw + split. + { ISD::SHL, MVT::v8i32, 2+2 }, // 2*pslld + split. + { ISD::SHL, MVT::v4i64, 2+2 }, // 2*psllq + split. + + { ISD::SRL, MVT::v16i16, 2+2 }, // 2*psrlw + split. + { ISD::SRL, MVT::v8i32, 2+2 }, // 2*psrld + split. + { ISD::SRL, MVT::v4i64, 2+2 }, // 2*psrlq + split. + + { ISD::SRA, MVT::v16i16, 2+2 }, // 2*psraw + split. + { ISD::SRA, MVT::v8i32, 2+2 }, // 2*psrad + split. + { ISD::SRA, MVT::v2i64, 4 }, // 2*psrad + shuffle. + { ISD::SRA, MVT::v4i64, 8+2 }, // 2*(2*psrad + shuffle) + split. }; if (ST->hasSSE2() && ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || (Op2Info == TargetTransformInfo::OK_UniformValue))) { + + // Handle AVX2 uniform v4i64 ISD::SRA, it's not worth a table. + if (ISD == ISD::SRA && LT.second == MVT::v4i64 && ST->hasAVX2()) + return LT.first * 4; // 2*psrad + shuffle. + if (const auto *Entry = CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; @@ -581,28 +589,28 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; static const CostTblEntry SSE41CostTable[] = { - { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. - { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence. - { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld - { ISD::SHL, MVT::v8i32, 2*4 }, // pslld/paddd/cvttps2dq/pmulld - - { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. - { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence. - { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRL, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. - { ISD::SRL, MVT::v8i32, 2*11 }, // Shift each lane + blend. - - { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. - { ISD::SRA, MVT::v32i8, 2*24 }, // pblendvb sequence. - { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence. - { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. - { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend. - - { ISD::MUL, MVT::v4i32, 1 } // pmulld + { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. + { ISD::SHL, MVT::v32i8, 2*11+2 }, // pblendvb sequence + split. + { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SHL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld + { ISD::SHL, MVT::v8i32, 2*4+2 }, // pslld/paddd/cvttps2dq/pmulld + split + + { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. + { ISD::SRL, MVT::v32i8, 2*12+2 }, // pblendvb sequence + split. + { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. + { ISD::SRL, MVT::v8i32, 2*11+2 }, // Shift each lane + blend + split. + + { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. + { ISD::SRA, MVT::v32i8, 2*24+2 }, // pblendvb sequence + split. + { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. + { ISD::SRA, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. + { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. + { ISD::SRA, MVT::v8i32, 2*12+2 }, // Shift each lane + blend + split. + + { ISD::MUL, MVT::v4i32, 1 } // pmulld }; if (ST->hasSSE41()) @@ -612,33 +620,33 @@ int X86TTIImpl::getArithmeticInstrCost( static const CostTblEntry SSE2CostTable[] = { // We don't correctly identify costs of casts because they are marked as // custom. - { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. - { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. - - { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. - { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence. - - { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. - { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. - { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. - { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. - - { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. - { ISD::MUL, MVT::v8i16, 1 }, // pmullw - { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle - { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add - - { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ - { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ + { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. + { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SHL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. + + { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. + { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SRL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. + + { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. + { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. + { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. + { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. + { ISD::SRA, MVT::v4i64, 2*12+2 }, // srl/xor/sub sequence+split. + + { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. + { ISD::MUL, MVT::v8i16, 1 }, // pmullw + { ISD::MUL, MVT::v4i32, 6 }, // 3*pmuludq/4*shuffle + { ISD::MUL, MVT::v2i64, 8 }, // 3*pmuludq/3*shift/2*add + + { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ + { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ // It is not a good idea to vectorize division. We have to scalarize it and // in the process we will often end up having to spilling regular @@ -1375,6 +1383,8 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } + int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed) { @@ -1384,6 +1394,48 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, // CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll // CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll // CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll + static const CostTblEntry AVX512CDCostTbl[] = { + { ISD::CTLZ, MVT::v8i64, 1 }, + { ISD::CTLZ, MVT::v16i32, 1 }, + { ISD::CTLZ, MVT::v32i16, 8 }, + { ISD::CTLZ, MVT::v64i8, 20 }, + { ISD::CTLZ, MVT::v4i64, 1 }, + { ISD::CTLZ, MVT::v8i32, 1 }, + { ISD::CTLZ, MVT::v16i16, 4 }, + { ISD::CTLZ, MVT::v32i8, 10 }, + { ISD::CTLZ, MVT::v2i64, 1 }, + { ISD::CTLZ, MVT::v4i32, 1 }, + { ISD::CTLZ, MVT::v8i16, 4 }, + { ISD::CTLZ, MVT::v16i8, 4 }, + }; + static const CostTblEntry AVX512BWCostTbl[] = { + { ISD::BITREVERSE, MVT::v8i64, 5 }, + { ISD::BITREVERSE, MVT::v16i32, 5 }, + { ISD::BITREVERSE, MVT::v32i16, 5 }, + { ISD::BITREVERSE, MVT::v64i8, 5 }, + { ISD::CTLZ, MVT::v8i64, 23 }, + { ISD::CTLZ, MVT::v16i32, 22 }, + { ISD::CTLZ, MVT::v32i16, 18 }, + { ISD::CTLZ, MVT::v64i8, 17 }, + { ISD::CTPOP, MVT::v8i64, 7 }, + { ISD::CTPOP, MVT::v16i32, 11 }, + { ISD::CTPOP, MVT::v32i16, 9 }, + { ISD::CTPOP, MVT::v64i8, 6 }, + { ISD::CTTZ, MVT::v8i64, 10 }, + { ISD::CTTZ, MVT::v16i32, 14 }, + { ISD::CTTZ, MVT::v32i16, 12 }, + { ISD::CTTZ, MVT::v64i8, 9 }, + }; + static const CostTblEntry AVX512CostTbl[] = { + { ISD::BITREVERSE, MVT::v8i64, 36 }, + { ISD::BITREVERSE, MVT::v16i32, 24 }, + { ISD::CTLZ, MVT::v8i64, 29 }, + { ISD::CTLZ, MVT::v16i32, 35 }, + { ISD::CTPOP, MVT::v8i64, 16 }, + { ISD::CTPOP, MVT::v16i32, 24 }, + { ISD::CTTZ, MVT::v8i64, 20 }, + { ISD::CTTZ, MVT::v16i32, 28 }, + }; static const CostTblEntry XOPCostTbl[] = { { ISD::BITREVERSE, MVT::v4i64, 4 }, { ISD::BITREVERSE, MVT::v8i32, 4 }, @@ -1426,25 +1478,25 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/ }; static const CostTblEntry AVX1CostTbl[] = { - { ISD::BITREVERSE, MVT::v4i64, 10 }, - { ISD::BITREVERSE, MVT::v8i32, 10 }, - { ISD::BITREVERSE, MVT::v16i16, 10 }, - { ISD::BITREVERSE, MVT::v32i8, 10 }, + { ISD::BITREVERSE, MVT::v4i64, 12 }, // 2 x 128-bit Op + extract/insert + { ISD::BITREVERSE, MVT::v8i32, 12 }, // 2 x 128-bit Op + extract/insert + { ISD::BITREVERSE, MVT::v16i16, 12 }, // 2 x 128-bit Op + extract/insert + { ISD::BITREVERSE, MVT::v32i8, 12 }, // 2 x 128-bit Op + extract/insert { ISD::BSWAP, MVT::v4i64, 4 }, { ISD::BSWAP, MVT::v8i32, 4 }, { ISD::BSWAP, MVT::v16i16, 4 }, - { ISD::CTLZ, MVT::v4i64, 46 }, - { ISD::CTLZ, MVT::v8i32, 36 }, - { ISD::CTLZ, MVT::v16i16, 28 }, - { ISD::CTLZ, MVT::v32i8, 18 }, - { ISD::CTPOP, MVT::v4i64, 14 }, - { ISD::CTPOP, MVT::v8i32, 22 }, - { ISD::CTPOP, MVT::v16i16, 18 }, - { ISD::CTPOP, MVT::v32i8, 12 }, - { ISD::CTTZ, MVT::v4i64, 20 }, - { ISD::CTTZ, MVT::v8i32, 28 }, - { ISD::CTTZ, MVT::v16i16, 24 }, - { ISD::CTTZ, MVT::v32i8, 18 }, + { ISD::CTLZ, MVT::v4i64, 48 }, // 2 x 128-bit Op + extract/insert + { ISD::CTLZ, MVT::v8i32, 38 }, // 2 x 128-bit Op + extract/insert + { ISD::CTLZ, MVT::v16i16, 30 }, // 2 x 128-bit Op + extract/insert + { ISD::CTLZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert + { ISD::CTPOP, MVT::v4i64, 16 }, // 2 x 128-bit Op + extract/insert + { ISD::CTPOP, MVT::v8i32, 24 }, // 2 x 128-bit Op + extract/insert + { ISD::CTPOP, MVT::v16i16, 20 }, // 2 x 128-bit Op + extract/insert + { ISD::CTPOP, MVT::v32i8, 14 }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v4i64, 22 }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert + { ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ @@ -1542,6 +1594,18 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, MVT MTy = LT.second; // Attempt to lookup cost. + if (ST->hasCDI()) + if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasBWI()) + if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasAVX512()) + if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) + return LT.first * Entry->Cost; + if (ST->hasXOP()) if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) return LT.first * Entry->Cost; @@ -2114,6 +2178,17 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); } +bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + // X86 specific here are "instruction number 1st priority". + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.ImmCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.ImmCost, C2.SetupCost); +} + bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { Type *ScalarTy = DataTy->getScalarType(); int DataWidth = isa(ScalarTy) ? diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h index 9bef9e80c395ca20cb1d5435ab554c10b9f895e4..09ce2c90498d9fcfc16d9cb2fb1b66e957423bfc 100644 --- a/lib/Target/X86/X86TargetTransformInfo.h +++ b/lib/Target/X86/X86TargetTransformInfo.h @@ -76,6 +76,8 @@ public: int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); + unsigned getAtomicMemIntrinsicMaxElementSize() const; + int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX); @@ -99,6 +101,8 @@ public: int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); bool isLegalMaskedLoad(Type *DataType); bool isLegalMaskedStore(Type *DataType); bool isLegalMaskedGather(Type *DataType); diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index bc14630584e5b729c339a93a9204a40724b59a36..0c3b343414760b1963520f8ef62c4e7ab83bb74a 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -22,9 +22,9 @@ #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -398,7 +398,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { /*isVarArg=*/false); Function *Trampoline = Function::Create(TrampolineTy, GlobalValue::InternalLinkage, - Twine("__ehhandler$") + GlobalValue::getRealLinkageName( + Twine("__ehhandler$") + GlobalValue::dropLLVMManglingEscape( ParentFunc->getName()), TheModule); BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline); @@ -412,7 +412,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { // Can't use musttail due to prototype mismatch, but we can use tail. Call->setTailCall(true); // Set inreg so we pass it in EAX. - Call->addAttribute(1, Attribute::InReg); + Call->addParamAttr(0, Attribute::InReg); Builder.CreateRet(Call); return Trampoline; } diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 5fc58d83131945da85213dba63af7ef67dfb7ffe..dd27e7ca30aa1e724a015725f49d9522a90bc9a7 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "InstPrinter/XCoreInstPrinter.h" #include "MCTargetDesc/XCoreMCAsmInfo.h" -#include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCoreTargetStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDwarf.h" @@ -23,8 +23,8 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index b35aa0b95821a921143237fffdab5fbb4ac6cd72..8f7c8a82380a049750335260272e5b2524cf41d9 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "XCore.h" #include "InstPrinter/XCoreInstPrinter.h" +#include "XCore.h" #include "XCoreInstrInfo.h" #include "XCoreMCInstLower.h" #include "XCoreSubtarget.h" diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index a752357400b3eea3718eb4698a77eac2bfbfb887..784612038c09c75d7cc2382f6bab6ee8d615ebec 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -575,18 +575,17 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS) const { assert(RS && "requiresRegisterScavenging failed"); MachineFrameInfo &MFI = MF.getFrameInfo(); - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); XCoreFunctionInfo *XFI = MF.getInfo(); // Reserve slots close to SP or frame pointer for Scavenging spills. // When using SP for small frames, we don't need any scratch registers. // When using SP for large frames, we may need 2 scratch registers. // When using FP, for large or small frames, we may need 1 scratch register. + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); if (XFI->isLargeFrame(MF) || hasFP(MF)) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); if (XFI->isLargeFrame(MF) && !hasFP(MF)) - RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); + RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 45437815fa371a167eba16291d8db0f3314b28b0..1da189c5cd3114bb23ac5286ea8c31cee6d1b2b3 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include @@ -406,9 +407,9 @@ SDValue XCoreTargetLowering::lowerLoadWordFromAlignedBasePlusOffset( static bool isWordAligned(SDValue Value, SelectionDAG &DAG) { - APInt KnownZero, KnownOne; - DAG.computeKnownBits(Value, KnownZero, KnownOne); - return KnownZero.countTrailingOnes() >= 2; + KnownBits Known; + DAG.computeKnownBits(Value, Known); + return Known.countMinTrailingZeros() >= 2; } SDValue XCoreTargetLowering:: @@ -1130,8 +1131,7 @@ SDValue XCoreTargetLowering::LowerCCCCallTo( unsigned NumBytes = RetCCInfo.getNextStackOffset(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getConstant(NumBytes, dl, PtrVT, true), dl); + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SmallVector, 4> RegsToPass; SmallVector MemOpChains; @@ -1601,13 +1601,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if (OutVal.hasOneUse()) { unsigned BitWidth = OutVal.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) || - TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, - TLO)) + if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) || + TLI.SimplifyDemandedBits(OutVal, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } break; @@ -1618,13 +1617,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if (Time.hasOneUse()) { unsigned BitWidth = Time.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); - APInt KnownZero, KnownOne; + KnownBits Known; TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Time, DemandedMask) || - TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, - TLO)) + if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) || + TLI.SimplifyDemandedBits(Time, DemandedMask, Known, TLO)) DCI.CommitTargetLoweringOpt(TLO); } break; @@ -1655,11 +1653,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, dl, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); SDValue Ops[] = { Result, Carry }; @@ -1678,11 +1676,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), N2); @@ -1694,11 +1692,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) { - APInt KnownZero, KnownOne; + KnownBits Known; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.computeKnownBits(N2, KnownZero, KnownOne); - if ((KnownZero & Mask) == Mask) { + DAG.computeKnownBits(N2, Known); + if ((Known.Zero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, dl, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); SDValue Ops[] = { Result, Borrow }; @@ -1822,20 +1820,19 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, } void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); + Known.resetAll(); switch (Op.getOpcode()) { default: break; case XCoreISD::LADD: case XCoreISD::LSUB: if (Op.getResNo() == 1) { // Top bits of carry / borrow are clear. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 1); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 1); } break; case ISD::INTRINSIC_W_CHAIN: @@ -1844,24 +1841,24 @@ void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, switch (IntNo) { case Intrinsic::xcore_getts: // High bits are known to be zero. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 16); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 16); break; case Intrinsic::xcore_int: case Intrinsic::xcore_inct: // High bits are known to be zero. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 8); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 8); break; case Intrinsic::xcore_testct: // Result is either 0 or 1. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 1); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 1); break; case Intrinsic::xcore_testwct: // Result is in the range 0 - 4. - KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), - KnownZero.getBitWidth() - 3); + Known.Zero = APInt::getHighBitsSet(Known.getBitWidth(), + Known.getBitWidth() - 3); break; } } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 188f4f1fa06b0a3250d7a59e1d29d4a102a849d6..452d5b046055d4003f4e5171598245e295e809d5 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -200,8 +200,7 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; void computeKnownBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, + KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth = 0) const override; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index f1d52d5a191fa77af644dd6d8ee5be714787e56f..b87ba6548962207c4cf6f72a856504a3635e866a 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -73,9 +73,10 @@ def XCoreLdwsp : SDNode<"XCoreISD::LDWSP", SDT_XCoreLdwsp, [SDNPHasChain, SDNPMayLoad]>; // These are target-independent nodes, but have target-specific formats. -def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; def SDT_XCoreCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; + SDTCisVT<1, i32> ]>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -323,9 +324,9 @@ class F2R_np opc, string OpcStr> : //===----------------------------------------------------------------------===// let Defs = [SP], Uses = [SP] in { -def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt), - "# ADJCALLSTACKDOWN $amt", - [(callseq_start timm:$amt)]>; +def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt, i32imm:$amt2), + "# ADJCALLSTACKDOWN $amt, $amt2", + [(callseq_start timm:$amt, timm:$amt2)]>; def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2), "# ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index 5cc51cd7a99267ad64a28b1fcdd77693280972c6..87532d11ede83f5e99fa1e336a26bbadc585f3e4 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -128,11 +128,11 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) { static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) { do { - SmallVector WUsers(CE->user_begin(), CE->user_end()); + SmallVector WUsers(CE->user_begin(), CE->user_end()); std::sort(WUsers.begin(), WUsers.end()); WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end()); while (!WUsers.empty()) - if (WeakVH WU = WUsers.pop_back_val()) { + if (WeakTrackingVH WU = WUsers.pop_back_val()) { if (PHINode *PN = dyn_cast(WU)) { for (int I = 0, E = PN->getNumIncomingValues(); I < E; ++I) if (PN->getIncomingValue(I) == CE) { @@ -159,12 +159,12 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) { } static bool rewriteNonInstructionUses(GlobalVariable *GV, Pass *P) { - SmallVector WUsers; + SmallVector WUsers; for (User *U : GV->users()) if (!isa(U)) - WUsers.push_back(WeakVH(U)); + WUsers.push_back(WeakTrackingVH(U)); while (!WUsers.empty()) - if (WeakVH WU = WUsers.pop_back_val()) { + if (WeakTrackingVH WU = WUsers.pop_back_val()) { ConstantExpr *CE = dyn_cast(WU); if (!CE || !replaceConstantExprOp(CE, P)) return false; diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp index e91536ca1e8349c0696fe29e1c56c1099bd96e78..75af0e97dfb54725320c8753325c95ec7eb61088 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp @@ -10,6 +10,7 @@ #include "XCoreMachineFunctionInfo.h" #include "XCoreInstrInfo.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -35,13 +36,15 @@ int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) { if (LRSpillSlotSet) { return LRSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); if (! MF.getFunction()->isVarArg()) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - LRSpillSlot = MFI.CreateFixedObject(RC->getSize(), 0, true); + LRSpillSlot = MFI.CreateFixedObject(TRI.getSpillSize(RC), 0, true); } else { - LRSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + LRSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), true); } LRSpillSlotSet = true; return LRSpillSlot; @@ -51,9 +54,11 @@ int XCoreFunctionInfo::createFPSpillSlot(MachineFunction &MF) { if (FPSpillSlotSet) { return FPSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - FPSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + FPSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC), + TRI.getSpillAlignment(RC), true); FPSpillSlotSet = true; return FPSpillSlot; } @@ -62,10 +67,13 @@ const int* XCoreFunctionInfo::createEHSpillSlot(MachineFunction &MF) { if (EHSpillSlotSet) { return EHSpillSlot; } - const TargetRegisterClass *RC = &XCore::GRRegsRegClass; + const TargetRegisterClass &RC = XCore::GRRegsRegClass; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); - EHSpillSlot[0] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); - EHSpillSlot[1] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true); + unsigned Size = TRI.getSpillSize(RC); + unsigned Align = TRI.getSpillAlignment(RC); + EHSpillSlot[0] = MFI.CreateStackObject(Size, Align, true); + EHSpillSlot[1] = MFI.CreateStackObject(Size, Align, true); EHSpillSlotSet = true; return EHSpillSlot; } diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index e28e05c7f6a82ebaceeb4c3c85c8e05fd76ac4e3..cb23399995dadf8a37f579bca330b6ab30566f65 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// +#include "XCoreTargetMachine.h" #include "MCTargetDesc/XCoreMCTargetDesc.h" #include "XCore.h" -#include "XCoreTargetMachine.h" #include "XCoreTargetObjectFile.h" #include "XCoreTargetTransformInfo.h" #include "llvm/ADT/Optional.h" @@ -54,7 +54,7 @@ namespace { /// XCore Code Generator Pass Configuration Options. class XCorePassConfig : public TargetPassConfig { public: - XCorePassConfig(XCoreTargetMachine *TM, PassManagerBase &PM) + XCorePassConfig(XCoreTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} XCoreTargetMachine &getXCoreTargetMachine() const { @@ -70,11 +70,11 @@ public: } // end anonymous namespace TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) { - return new XCorePassConfig(this, PM); + return new XCorePassConfig(*this, PM); } void XCorePassConfig::addIRPasses() { - addPass(createAtomicExpandPass(&getXCoreTargetMachine())); + addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 2b53f01a996deb9968533f96d4188238d6d309ec..a047b3c9d9fc3cbaf80d9fc98483725649753923 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -15,9 +15,9 @@ #define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H #include "XCoreSubtarget.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp index ad8693fd325e586df66130d15faa2ddf0acee60a..c60a262e719cb8bcf5bd9584f2b5114efba7329e 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -9,10 +9,10 @@ #include "XCoreTargetObjectFile.h" #include "XCoreSubtarget.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/Testing/CMakeLists.txt b/lib/Testing/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..fc23e64eeb7a439011c9dd162b83cae7cbc4ca59 --- /dev/null +++ b/lib/Testing/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Support) diff --git a/lib/Testing/LLVMBuild.txt b/lib/Testing/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..cdf83736298e4698bd1d209588712656e0aab416 --- /dev/null +++ b/lib/Testing/LLVMBuild.txt @@ -0,0 +1,19 @@ +;===- ./lib/Testing/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = Support diff --git a/lib/Testing/Support/CMakeLists.txt b/lib/Testing/Support/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa8dfe59c8bdbbc52199b651b28dc14e477f451f --- /dev/null +++ b/lib/Testing/Support/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(LLVMTestingSupport + Error.cpp + + BUILDTREE_ONLY + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Testing/Support + ) + +include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) +include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googlemock/include) +target_link_libraries(LLVMTestingSupport PRIVATE gtest) \ No newline at end of file diff --git a/lib/Testing/Support/Error.cpp b/lib/Testing/Support/Error.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ce0da44da408b48ffdc2d2104ffe70c871e6274f --- /dev/null +++ b/lib/Testing/Support/Error.cpp @@ -0,0 +1,22 @@ +//===- llvm/Testing/Support/Error.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Testing/Support/Error.h" + +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +llvm::detail::ErrorHolder llvm::detail::TakeError(llvm::Error Err) { + bool Succeeded = !static_cast(Err); + std::string Message; + if (!Succeeded) + Message = toString(std::move(Err)); + return {Succeeded, Message}; +} diff --git a/lib/Testing/Support/LLVMBuild.txt b/lib/Testing/Support/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..40853e8172d55b7b1d25466e184d51c03dce4b00 --- /dev/null +++ b/lib/Testing/Support/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./Testing/Support/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = TestingSupport +parent = Libraries +required_libraries = Support diff --git a/lib/ToolDrivers/CMakeLists.txt b/lib/ToolDrivers/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ad458450fda3e8f59e3b3bf5f3f7bd76a35632c4 --- /dev/null +++ b/lib/ToolDrivers/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(llvm-lib) diff --git a/lib/ToolDrivers/LLVMBuild.txt b/lib/ToolDrivers/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..7da9a5c01005011101994aa988a804b3bb43f0ca --- /dev/null +++ b/lib/ToolDrivers/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/ToolDrivers/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = llvm-lib + +[component_0] +type = Group +name = ToolDrivers +parent = Libraries diff --git a/lib/LibDriver/CMakeLists.txt b/lib/ToolDrivers/llvm-lib/CMakeLists.txt similarity index 100% rename from lib/LibDriver/CMakeLists.txt rename to lib/ToolDrivers/llvm-lib/CMakeLists.txt diff --git a/lib/LibDriver/LLVMBuild.txt b/lib/ToolDrivers/llvm-lib/LLVMBuild.txt similarity index 92% rename from lib/LibDriver/LLVMBuild.txt rename to lib/ToolDrivers/llvm-lib/LLVMBuild.txt index 799dc997c0bb8b891180959ec597dd8269c21dfd..e4b32ec4af90d86d5377472100873c5c29eaa286 100644 --- a/lib/LibDriver/LLVMBuild.txt +++ b/lib/ToolDrivers/llvm-lib/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = LibDriver parent = Libraries -required_libraries = Object Option Support +required_libraries = BinaryFormat Object Option Support diff --git a/lib/LibDriver/LibDriver.cpp b/lib/ToolDrivers/llvm-lib/LibDriver.cpp similarity index 93% rename from lib/LibDriver/LibDriver.cpp rename to lib/ToolDrivers/llvm-lib/LibDriver.cpp index bcdec4f7a93399229016b670d7778ae63b963c82..797e4ffc2d45638d681e2d3552f6ffa2b9dddbd7 100644 --- a/lib/LibDriver/LibDriver.cpp +++ b/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -12,16 +12,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/LibDriver/LibDriver.h" +#include "llvm/ToolDrivers/llvm-lib/LibDriver.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/StringSaver.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -121,7 +122,7 @@ int llvm::libDriverMain(llvm::ArrayRef ArgsArr) { for (auto *Arg : Args.filtered(OPT_UNKNOWN)) llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; - if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) { + if (!Args.hasArgNoClaim(OPT_INPUT)) { // No input files. To match lib.exe, silently do nothing. return 0; } @@ -143,11 +144,10 @@ int llvm::libDriverMain(llvm::ArrayRef ArgsArr) { }); return 1; } - sys::fs::file_magic Magic = - sys::fs::identify_magic(MOrErr->Buf->getBuffer()); - if (Magic != sys::fs::file_magic::coff_object && - Magic != sys::fs::file_magic::bitcode && - Magic != sys::fs::file_magic::windows_resource) { + llvm::file_magic Magic = llvm::identify_magic(MOrErr->Buf->getBuffer()); + if (Magic != llvm::file_magic::coff_object && + Magic != llvm::file_magic::bitcode && + Magic != llvm::file_magic::windows_resource) { llvm::errs() << Arg->getValue() << ": not a COFF object, bitcode or resource file\n"; return 1; diff --git a/lib/LibDriver/Options.td b/lib/ToolDrivers/llvm-lib/Options.td similarity index 100% rename from lib/LibDriver/Options.td rename to lib/ToolDrivers/llvm-lib/Options.td diff --git a/lib/Transforms/Coroutines/CoroCleanup.cpp b/lib/Transforms/Coroutines/CoroCleanup.cpp index a97db6fde454eb7b98e646b0dca3d7268c95bfc2..359876627fce13735d98ceb6f2002c2319c8337e 100644 --- a/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -101,7 +101,9 @@ namespace { struct CoroCleanup : FunctionPass { static char ID; // Pass identification, replacement for typeid - CoroCleanup() : FunctionPass(ID) {} + CoroCleanup() : FunctionPass(ID) { + initializeCoroCleanupPass(*PassRegistry::getPassRegistry()); + } std::unique_ptr L; @@ -124,6 +126,7 @@ struct CoroCleanup : FunctionPass { if (!L) AU.setPreservesAll(); } + StringRef getPassName() const override { return "Coroutine Cleanup"; } }; } diff --git a/lib/Transforms/Coroutines/CoroEarly.cpp b/lib/Transforms/Coroutines/CoroEarly.cpp index e8bb0ca99d8a5e59df5029156dfd1b67a98ec167..ba05896af150cf69500af8c891be892bd0354365 100644 --- a/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/lib/Transforms/Coroutines/CoroEarly.cpp @@ -183,7 +183,9 @@ namespace { struct CoroEarly : public FunctionPass { static char ID; // Pass identification, replacement for typeid. - CoroEarly() : FunctionPass(ID) {} + CoroEarly() : FunctionPass(ID) { + initializeCoroEarlyPass(*PassRegistry::getPassRegistry()); + } std::unique_ptr L; @@ -208,6 +210,9 @@ struct CoroEarly : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); } + StringRef getPassName() const override { + return "Lower early coroutine intrinsics"; + } }; } diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp index c6ac3f614ff7eec47e489542ab566a8b5d710413..42fd6d7461459cce3fcd5bbb940b2359266bb029 100644 --- a/lib/Transforms/Coroutines/CoroElide.cpp +++ b/lib/Transforms/Coroutines/CoroElide.cpp @@ -258,7 +258,9 @@ static bool replaceDevirtTrigger(Function &F) { namespace { struct CoroElide : FunctionPass { static char ID; - CoroElide() : FunctionPass(ID) {} + CoroElide() : FunctionPass(ID) { + initializeCoroElidePass(*PassRegistry::getPassRegistry()); + } std::unique_ptr L; @@ -301,6 +303,7 @@ struct CoroElide : FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } + StringRef getPassName() const override { return "Coroutine Elision"; } }; } diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp index 19e6789dfa74a09b7c01c8f0e37070c07c49c5c2..85e9003ec3c56ac73210a6cb722319e5eb49390b 100644 --- a/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/lib/Transforms/Coroutines/CoroFrame.cpp @@ -177,7 +177,7 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) // consume. Note, that crossing coro.save also requires a spill, as any code // between coro.save and coro.suspend may resume the coroutine and all of the // state needs to be saved by that time. - auto markSuspendBlock = [&](IntrinsicInst* BarrierInst) { + auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) { BasicBlock *SuspendBlock = BarrierInst->getParent(); auto &B = getBlockData(SuspendBlock); B.Suspend = true; @@ -347,6 +347,27 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, return FrameTy; } +// We need to make room to insert a spill after initial PHIs, but before +// catchswitch instruction. Placing it before violates the requirement that +// catchswitch, like all other EHPads must be the first nonPHI in a block. +// +// Split away catchswitch into a separate block and insert in its place: +// +// cleanuppad cleanupret. +// +// cleanupret instruction will act as an insert point for the spill. +static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { + BasicBlock *CurrentBlock = CatchSwitch->getParent(); + BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); + CurrentBlock->getTerminator()->eraseFromParent(); + + auto *CleanupPad = + CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); + auto *CleanupRet = + CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); + return CleanupRet; +} + // Replace all alloca and SSA values that are accessed across suspend points // with GetElementPointer from coroutine frame + loads and stores. Create an // AllocaSpillBB that will become the new entry block for the resume parts of @@ -437,8 +458,11 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { InsertPt = NewBB->getTerminator(); } else if (dyn_cast(CurrentValue)) { // Skip the PHINodes and EH pads instructions. - InsertPt = - &*cast(E.def())->getParent()->getFirstInsertionPt(); + BasicBlock *DefBlock = cast(E.def())->getParent(); + if (auto *CSI = dyn_cast(DefBlock->getTerminator())) + InsertPt = splitBeforeCatchSwitch(CSI); + else + InsertPt = &*DefBlock->getFirstInsertionPt(); } else { // For all other values, the spill is placed immediately after // the definition. @@ -495,6 +519,78 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { return FramePtr; } +// Sets the unwind edge of an instruction to a particular successor. +static void setUnwindEdgeTo(TerminatorInst *TI, BasicBlock *Succ) { + if (auto *II = dyn_cast(TI)) + II->setUnwindDest(Succ); + else if (auto *CS = dyn_cast(TI)) + CS->setUnwindDest(Succ); + else if (auto *CR = dyn_cast(TI)) + CR->setUnwindDest(Succ); + else + llvm_unreachable("unexpected terminator instruction"); +} + +// Replaces all uses of OldPred with the NewPred block in all PHINodes in a +// block. +static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, + BasicBlock *NewPred, + PHINode *LandingPadReplacement) { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + + // We manually update the LandingPadReplacement PHINode and it is the last + // PHI Node. So, if we find it, we are done. + if (LandingPadReplacement == PN) + break; + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != OldPred) + BBIdx = PN->getBasicBlockIndex(OldPred); + + assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!"); + PN->setIncomingBlock(BBIdx, NewPred); + } +} + +// Uses SplitEdge unless the successor block is an EHPad, in which case do EH +// specific handling. +static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, + LandingPadInst *OriginalPad, + PHINode *LandingPadReplacement) { + auto *PadInst = Succ->getFirstNonPHI(); + if (!LandingPadReplacement && !PadInst->isEHPad()) + return SplitEdge(BB, Succ); + + auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ); + setUnwindEdgeTo(BB->getTerminator(), NewBB); + updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); + + if (LandingPadReplacement) { + auto *NewLP = OriginalPad->clone(); + auto *Terminator = BranchInst::Create(Succ, NewBB); + NewLP->insertBefore(Terminator); + LandingPadReplacement->addIncoming(NewLP, NewBB); + return NewBB; + } + Value *ParentPad = nullptr; + if (auto *FuncletPad = dyn_cast(PadInst)) + ParentPad = FuncletPad->getParentPad(); + else if (auto *CatchSwitch = dyn_cast(PadInst)) + ParentPad = CatchSwitch->getParentPad(); + else + llvm_unreachable("handling for other EHPads not implemented yet"); + + auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB); + CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); + return NewBB; +} + static void rewritePHIs(BasicBlock &BB) { // For every incoming edge we will create a block holding all // incoming values in a single PHI nodes. @@ -502,7 +598,7 @@ static void rewritePHIs(BasicBlock &BB) { // loop: // %n.val = phi i32[%n, %entry], [%inc, %loop] // - // It will create: + // It will create: // // loop.from.entry: // %n.loop.pre = phi i32 [%n, %entry] @@ -517,9 +613,22 @@ static void rewritePHIs(BasicBlock &BB) { // TODO: Simplify PHINodes in the basic block to remove duplicate // predecessors. + LandingPadInst *LandingPad = nullptr; + PHINode *ReplPHI = nullptr; + if ((LandingPad = dyn_cast_or_null(BB.getFirstNonPHI()))) { + // ehAwareSplitEdge will clone the LandingPad in all the edge blocks. + // We replace the original landing pad with a PHINode that will collect the + // results from all of them. + ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad); + ReplPHI->takeName(LandingPad); + LandingPad->replaceAllUsesWith(ReplPHI); + // We will erase the original landing pad at the end of this function after + // ehAwareSplitEdge cloned it in the transition blocks. + } + SmallVector Preds(pred_begin(&BB), pred_end(&BB)); for (BasicBlock *Pred : Preds) { - auto *IncomingBB = SplitEdge(Pred, &BB); + auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI); IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName()); auto *PN = cast(&BB.front()); do { @@ -531,7 +640,14 @@ static void rewritePHIs(BasicBlock &BB) { InputV->addIncoming(V, Pred); PN->setIncomingValue(Index, InputV); PN = dyn_cast(PN->getNextNode()); - } while (PN); + } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced + // the landing pad. + } + + if (LandingPad) { + // Calls to ehAwareSplitEdge function cloned the original lading pad. + // No longer need it. + LandingPad->eraseFromParent(); } } @@ -683,9 +799,9 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { splitAround(CSI, "CoroSuspend"); } - // Put fallthrough CoroEnd into its own block. Note: Shape::buildFrom places - // the fallthrough coro.end as the first element of CoroEnds array. - splitAround(Shape.CoroEnds.front(), "CoroEnd"); + // Put CoroEnds into their own blocks. + for (CoroEndInst *CE : Shape.CoroEnds) + splitAround(CE, "CoroEnd"); // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will // never has its definition separated from the PHI by the suspend point. @@ -697,19 +813,24 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { IRBuilder<> Builder(F.getContext()); SpillInfo Spills; - // See if there are materializable instructions across suspend points. - for (Instruction &I : instructions(F)) - if (materializable(I)) - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) - Spills.emplace_back(&I, U); - - // Rewrite materializable instructions to be materialized at the use point. - DEBUG(dump("Materializations", Spills)); - rewriteMaterializableInstructions(Builder, Spills); + for (int Repeat = 0; Repeat < 4; ++Repeat) { + // See if there are materializable instructions across suspend points. + for (Instruction &I : instructions(F)) + if (materializable(I)) + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) + Spills.emplace_back(&I, U); + + if (Spills.empty()) + break; + + // Rewrite materializable instructions to be materialized at the use point. + DEBUG(dump("Materializations", Spills)); + rewriteMaterializableInstructions(Builder, Spills); + Spills.clear(); + } // Collect the spills for arguments and other not-materializable values. - Spills.clear(); for (Argument &A : F.args()) for (User *U : A.users()) if (Checker.isDefinitionAcrossSuspend(A, U)) @@ -731,8 +852,6 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { if (I.getType()->isTokenTy()) report_fatal_error( "token definition is separated from the use by a suspend point"); - assert(!materializable(I) && - "rewriteMaterializable did not do its job"); Spills.emplace_back(&I, U); } } diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp index ab648f884c5b18bb7af7ef71ca222d7ed2f32008..173dc05f058475b19ba79a9ed06d2fcbf85dabd4 100644 --- a/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/lib/Transforms/Coroutines/CoroSplit.cpp @@ -22,8 +22,8 @@ #include "CoroInternal.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Scalar.h" @@ -216,8 +216,8 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, Function *NewF = Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage, F.getName() + Suffix, M); - NewF->addAttribute(1, Attribute::NonNull); - NewF->addAttribute(1, Attribute::NoAlias); + NewF->addParamAttr(0, Attribute::NonNull); + NewF->addParamAttr(0, Attribute::NoAlias); ValueToValueMapTy VMap; // Replace all args with undefs. The buildCoroutineFrame algorithm already @@ -228,14 +228,6 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, SmallVector Returns; - if (DISubprogram *SP = F.getSubprogram()) { - // If we have debug info, add mapping for the metadata nodes that should not - // be cloned by CloneFunctionInfo. - auto &MD = VMap.MD(); - MD[SP->getUnit()].reset(SP->getUnit()); - MD[SP->getType()].reset(SP->getType()); - MD[SP->getFile()].reset(SP->getFile()); - } CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); // Remove old returns. @@ -245,9 +237,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, // Remove old return attributes. NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - NewF->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewF->getReturnType()))); + AttributeFuncs::typeIncompatible(NewF->getReturnType())); // Make AllocaSpillBlock the new entry block. auto *SwitchBB = cast(VMap[ResumeEntry]); @@ -511,12 +501,87 @@ static void simplifySuspendPoints(coro::Shape &Shape) { S.resize(N); } +static SmallPtrSet getCoroBeginPredBlocks(CoroBeginInst *CB) { + // Collect all blocks that we need to look for instructions to relocate. + SmallPtrSet RelocBlocks; + SmallVector Work; + Work.push_back(CB->getParent()); + + do { + BasicBlock *Current = Work.pop_back_val(); + for (BasicBlock *BB : predecessors(Current)) + if (RelocBlocks.count(BB) == 0) { + RelocBlocks.insert(BB); + Work.push_back(BB); + } + } while (!Work.empty()); + return RelocBlocks; +} + +static SmallPtrSet +getNotRelocatableInstructions(CoroBeginInst *CoroBegin, + SmallPtrSetImpl &RelocBlocks) { + SmallPtrSet DoNotRelocate; + // Collect all instructions that we should not relocate + SmallVector Work; + + // Start with CoroBegin and terminators of all preceding blocks. + Work.push_back(CoroBegin); + BasicBlock *CoroBeginBB = CoroBegin->getParent(); + for (BasicBlock *BB : RelocBlocks) + if (BB != CoroBeginBB) + Work.push_back(BB->getTerminator()); + + // For every instruction in the Work list, place its operands in DoNotRelocate + // set. + do { + Instruction *Current = Work.pop_back_val(); + DoNotRelocate.insert(Current); + for (Value *U : Current->operands()) { + auto *I = dyn_cast(U); + if (!I) + continue; + if (isa(U)) + continue; + if (DoNotRelocate.count(I) == 0) { + Work.push_back(I); + DoNotRelocate.insert(I); + } + } + } while (!Work.empty()); + return DoNotRelocate; +} + +static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) { + // Analyze which non-alloca instructions are needed for allocation and + // relocate the rest to after coro.begin. We need to do it, since some of the + // targets of those instructions may be placed into coroutine frame memory + // for which becomes available after coro.begin intrinsic. + + auto BlockSet = getCoroBeginPredBlocks(CoroBegin); + auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet); + + Instruction *InsertPt = CoroBegin->getNextNode(); + BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well. + for (auto B = BB.begin(), E = BB.end(); B != E;) { + Instruction &I = *B++; + if (isa(&I)) + continue; + if (&I == CoroBegin) + break; + if (DoNotRelocateSet.count(&I)) + continue; + I.moveBefore(InsertPt); + } +} + static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { coro::Shape Shape(F); if (!Shape.CoroBegin) return; simplifySuspendPoints(Shape); + relocateInstructionBefore(Shape.CoroBegin, F); buildCoroutineFrame(F, Shape); replaceFrameSize(Shape); @@ -616,7 +681,9 @@ namespace { struct CoroSplit : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - CoroSplit() : CallGraphSCCPass(ID) {} + CoroSplit() : CallGraphSCCPass(ID) { + initializeCoroSplitPass(*PassRegistry::getPassRegistry()); + } bool Run = false; @@ -662,6 +729,7 @@ struct CoroSplit : public CallGraphSCCPass { void getAnalysisUsage(AnalysisUsage &AU) const override { CallGraphSCCPass::getAnalysisUsage(AU); } + StringRef getPassName() const override { return "Coroutine Splitting"; } }; } diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp index ea48043f9381ffbd35c254816a63667ffde127c3..44e1f9b404eda31fc465f26a326fbe8e4f11a386 100644 --- a/lib/Transforms/Coroutines/Coroutines.cpp +++ b/lib/Transforms/Coroutines/Coroutines.cpp @@ -218,6 +218,8 @@ void coro::Shape::buildFrom(Function &F) { size_t FinalSuspendIndex = 0; clear(*this); SmallVector CoroFrames; + SmallVector UnusedCoroSaves; + for (Instruction &I : instructions(F)) { if (auto II = dyn_cast(&I)) { switch (II->getIntrinsicID()) { @@ -229,6 +231,12 @@ void coro::Shape::buildFrom(Function &F) { case Intrinsic::coro_frame: CoroFrames.push_back(cast(II)); break; + case Intrinsic::coro_save: + // After optimizations, coro_suspends using this coro_save might have + // been removed, remember orphaned coro_saves to remove them later. + if (II->use_empty()) + UnusedCoroSaves.push_back(cast(II)); + break; case Intrinsic::coro_suspend: CoroSuspends.push_back(cast(II)); if (CoroSuspends.back()->isFinal()) { @@ -311,4 +319,8 @@ void coro::Shape::buildFrom(Function &F) { if (HasFinalSuspend && FinalSuspendIndex != CoroSuspends.size() - 1) std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back()); + + // Remove orphaned coro.saves. + for (CoroSaveInst *CoroSave : UnusedCoroSaves) + CoroSave->eraseFromParent(); } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index c43557b4e1a9235577d708cca8eb94585c971a07..d8cf8d3f5da2148b4ed4247264dd7ec980d28573 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -102,28 +102,25 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost - SmallVector AttributesVec; - const AttributeList &PAL = F->getAttributes(); - - // Add any return attributes. - AttributesVec.push_back(PAL.getRetAttributes()); + SmallVector ArgAttrVec; + AttributeList PAL = F->getAttributes(); // First, determine the new argument list - unsigned ArgIndex = 1; + unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++ArgIndex) { + ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast(I->getType())->getElementType(); StructType *STy = cast(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); - AttributesVec.insert(AttributesVec.end(), STy->getNumElements(), - AttributeSet()); + ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), + AttributeSet()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); - AttributesVec.push_back(PAL.getParamAttributes(ArgIndex)); + ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; @@ -168,7 +165,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, Params.push_back(GetElementPtrInst::getIndexedType( cast(I->getType()->getScalarType())->getElementType(), ArgIndex.second)); - AttributesVec.push_back(AttributeSet()); + ArgAttrVec.push_back(AttributeSet()); assert(Params.back()); } @@ -179,9 +176,6 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, } } - // Add any function attributes. - AttributesVec.push_back(PAL.getFnAttributes()); - Type *RetTy = FTy->getReturnType(); // Construct the new function type using the new arguments. @@ -200,8 +194,9 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // Recompute the parameter attributes list based on the new arguments for // the function. - NF->setAttributes(AttributeList::get(F->getContext(), AttributesVec)); - AttributesVec.clear(); + NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), + PAL.getRetAttributes(), ArgAttrVec)); + ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); @@ -216,18 +211,15 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, Instruction *Call = CS.getInstruction(); const AttributeList &CallPAL = CS.getAttributes(); - // Add any return attributes. - AttributesVec.push_back(CallPAL.getRetAttributes()); - // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); - ArgIndex = 1; + ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++AI, ++ArgIndex) + ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument - AttributesVec.push_back(CallPAL.getAttributes(ArgIndex)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast(I->getType())->getElementType(); @@ -240,7 +232,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); - AttributesVec.push_back(AttributeSet()); + ArgAttrVec.push_back(AttributeSet()); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. @@ -255,7 +247,7 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, if (!ArgIndex.second.empty()) { Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); - for (unsigned long II : ArgIndex.second) { + for (auto II : ArgIndex.second) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = @@ -283,48 +275,46 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, newLoad->setAAMetadata(AAInfo); Args.push_back(newLoad); - AttributesVec.push_back(AttributeSet()); + ArgAttrVec.push_back(AttributeSet()); } } // Push any varargs arguments on the list. - for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { + for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); - AttributesVec.push_back(CallPAL.getAttributes(ArgIndex)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } - // Add any function attributes. - AttributesVec.push_back(CallPAL.getFnAttributes()); - SmallVector OpBundles; CS.getOperandBundlesAsDefs(OpBundles); - Instruction *New; + CallSite NewCS; if (InvokeInst *II = dyn_cast(Call)) { - New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles, "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes( - AttributeList::get(II->getContext(), AttributesVec)); + NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", Call); } else { - New = CallInst::Create(NF, Args, OpBundles, "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes( - AttributeList::get(New->getContext(), AttributesVec)); - cast(New)->setTailCallKind( - cast(Call)->getTailCallKind()); + auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); + NewCall->setTailCallKind(cast(Call)->getTailCallKind()); + NewCS = NewCall; } - New->setDebugLoc(Call->getDebugLoc()); + NewCS.setCallingConv(CS.getCallingConv()); + NewCS.setAttributes( + AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), + CallPAL.getRetAttributes(), ArgAttrVec)); + NewCS->setDebugLoc(Call->getDebugLoc()); + uint64_t W; + if (Call->extractProfTotalWeight(W)) + NewCS->setProfWeight(W); Args.clear(); - AttributesVec.clear(); + ArgAttrVec.clear(); // Update the callgraph to know that the callsite has been transformed. if (ReplaceCallSite) - (*ReplaceCallSite)(CS, CallSite(New)); + (*ReplaceCallSite)(CS, NewCS); if (!Call->use_empty()) { - Call->replaceAllUsesWith(New); - New->takeName(Call); + Call->replaceAllUsesWith(NewCS.getInstruction()); + NewCS->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of @@ -849,17 +839,12 @@ promoteArguments(Function *F, function_ref AARGetter, // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { unsigned ArgNo = PtrArg->getArgNo(); - F->setAttributes( - F->getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + F->removeParamAttr(ArgNo, Attribute::StructRet); + F->addParamAttr(ArgNo, Attribute::NoAlias); for (Use &U : F->uses()) { CallSite CS(U.getUser()); - CS.setAttributes( - CS.getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, - Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + CS.removeParamAttr(ArgNo, Attribute::StructRet); + CS.addParamAttr(ArgNo, Attribute::NoAlias); } } diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp index 1b111de061576e4afefcd2a4c1fee9f7237bfde1..d94aa5da85601bed4249963f908c8c8d55a74e6e 100644 --- a/lib/Transforms/IPO/CrossDSOCFI.cpp +++ b/lib/Transforms/IPO/CrossDSOCFI.cpp @@ -95,6 +95,17 @@ void CrossDSOCFI::buildCFICheck(Module &M) { } } + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto Func : CfiFunctionsMD->operands()) { + assert(Func->getNumOperands() >= 2); + for (unsigned I = 2; I < Func->getNumOperands(); ++I) + if (ConstantInt *TypeId = + extractNumericTypeId(cast(Func->getOperand(I).get()))) + TypeIds.insert(TypeId->getZExtValue()); + } + } + LLVMContext &Ctx = M.getContext(); Constant *C = M.getOrInsertFunction( "__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx), diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 66eb33f246ac018dc603668f77c08f0cdc723573..8e26849ea9e370e3c027a74e928201d5901b2dac 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -167,44 +167,39 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Drop any attributes that were on the vararg arguments. AttributeList PAL = CS.getAttributes(); - if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) { - SmallVector AttributesVec; - for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i) - AttributesVec.push_back(PAL.getSlotAttributes(i)); - if (PAL.hasAttributes(AttributeList::FunctionIndex)) - AttributesVec.push_back(AttributeList::get(Fn.getContext(), - AttributeList::FunctionIndex, - PAL.getFnAttributes())); - PAL = AttributeList::get(Fn.getContext(), AttributesVec); + if (!PAL.isEmpty()) { + SmallVector ArgAttrs; + for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) + ArgAttrs.push_back(PAL.getParamAttributes(ArgNo)); + PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(), + PAL.getRetAttributes(), ArgAttrs); } SmallVector OpBundles; CS.getOperandBundlesAsDefs(OpBundles); - Instruction *New; + CallSite NewCS; if (InvokeInst *II = dyn_cast(Call)) { - New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles, "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(PAL); + NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", Call); } else { - New = CallInst::Create(NF, Args, OpBundles, "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(PAL); - cast(New)->setTailCallKind( - cast(Call)->getTailCallKind()); + NewCS = CallInst::Create(NF, Args, OpBundles, "", Call); + cast(NewCS.getInstruction()) + ->setTailCallKind(cast(Call)->getTailCallKind()); } - New->setDebugLoc(Call->getDebugLoc()); + NewCS.setCallingConv(CS.getCallingConv()); + NewCS.setAttributes(PAL); + NewCS->setDebugLoc(Call->getDebugLoc()); uint64_t W; if (Call->extractProfTotalWeight(W)) - New->setProfWeight(W); + NewCS->setProfWeight(W); Args.clear(); if (!Call->use_empty()) - Call->replaceAllUsesWith(New); + Call->replaceAllUsesWith(NewCS.getInstruction()); - New->takeName(Call); + NewCS->takeName(Call); // Finally, remove the old call from the program, reducing the use-count of // F. @@ -685,13 +680,9 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { bool HasLiveReturnedArg = false; // Set up to build a new list of parameter attributes. - SmallVector AttributesVec; + SmallVector ArgAttrVec; const AttributeList &PAL = F->getAttributes(); - // Reserve an empty slot for the return value attributes, which we will - // compute last. - AttributesVec.push_back(AttributeSet()); - // Remember which arguments are still alive. SmallVector ArgAlive(FTy->getNumParams(), false); // Construct the new parameter list from non-dead arguments. Also construct @@ -704,8 +695,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { if (LiveValues.erase(Arg)) { Params.push_back(I->getType()); ArgAlive[i] = true; - AttributesVec.push_back(PAL.getParamAttributes(i + 1)); - HasLiveReturnedArg |= PAL.hasAttribute(i + 1, Attribute::Returned); + ArgAttrVec.push_back(PAL.getParamAttributes(i)); + HasLiveReturnedArg |= PAL.hasParamAttribute(i, Attribute::Returned); } else { ++NumArgumentsEliminated; DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument " << i @@ -791,14 +782,12 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { assert(!RAttrs.overlaps(AttributeFuncs::typeIncompatible(NRetTy)) && "Return attributes no longer compatible?"); - AttributesVec[0] = AttributeSet::get(F->getContext(), RAttrs); - - // Transfer the function attributes, if any. - AttributesVec.push_back(PAL.getFnAttributes()); + AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs); // Reconstruct the AttributesList based on the vector we constructed. - assert(AttributesVec.size() == Params.size() + 2); - AttributeList NewPAL = AttributeList::get(F->getContext(), AttributesVec); + assert(ArgAttrVec.size() == Params.size()); + AttributeList NewPAL = AttributeList::get( + F->getContext(), PAL.getFnAttributes(), RetAttrs, ArgAttrVec); // Create the new function type based on the recomputed parameters. FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); @@ -825,14 +814,14 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { CallSite CS(F->user_back()); Instruction *Call = CS.getInstruction(); - AttributesVec.clear(); + ArgAttrVec.clear(); const AttributeList &CallPAL = CS.getAttributes(); // Adjust the call return attributes in case the function was changed to // return void. AttrBuilder RAttrs(CallPAL.getRetAttributes()); RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy)); - AttributesVec.push_back(AttributeSet::get(F->getContext(), RAttrs)); + AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs); // Declare these outside of the loops, so we can reuse them for the second // loop, which loops the varargs. @@ -844,57 +833,55 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { if (ArgAlive[i]) { Args.push_back(*I); // Get original parameter attributes, but skip return attributes. - AttributeSet Attrs = CallPAL.getParamAttributes(i + 1); + AttributeSet Attrs = CallPAL.getParamAttributes(i); if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) { // If the return type has changed, then get rid of 'returned' on the // call site. The alternative is to make all 'returned' attributes on // call sites keep the return value alive just like 'returned' // attributes on function declaration but it's less clearly a win and // this is not an expected case anyway - AttributesVec.push_back(AttributeSet::get( + ArgAttrVec.push_back(AttributeSet::get( F->getContext(), AttrBuilder(Attrs).removeAttribute(Attribute::Returned))); } else { // Otherwise, use the original attributes. - AttributesVec.push_back(Attrs); + ArgAttrVec.push_back(Attrs); } } // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { Args.push_back(*I); - AttributesVec.push_back(CallPAL.getParamAttributes(i + 1)); + ArgAttrVec.push_back(CallPAL.getParamAttributes(i)); } - AttributesVec.push_back(CallPAL.getFnAttributes()); - // Reconstruct the AttributesList based on the vector we constructed. - AttributeList NewCallPAL = - AttributeList::get(F->getContext(), AttributesVec); + assert(ArgAttrVec.size() == Args.size()); + AttributeList NewCallPAL = AttributeList::get( + F->getContext(), CallPAL.getFnAttributes(), RetAttrs, ArgAttrVec); SmallVector OpBundles; CS.getOperandBundlesAsDefs(OpBundles); - Instruction *New; + CallSite NewCS; if (InvokeInst *II = dyn_cast(Call)) { - New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles, "", Call->getParent()); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(NewCallPAL); + NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", Call->getParent()); } else { - New = CallInst::Create(NF, Args, OpBundles, "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(NewCallPAL); - cast(New)->setTailCallKind( - cast(Call)->getTailCallKind()); + NewCS = CallInst::Create(NF, Args, OpBundles, "", Call); + cast(NewCS.getInstruction()) + ->setTailCallKind(cast(Call)->getTailCallKind()); } - New->setDebugLoc(Call->getDebugLoc()); + NewCS.setCallingConv(CS.getCallingConv()); + NewCS.setAttributes(NewCallPAL); + NewCS->setDebugLoc(Call->getDebugLoc()); uint64_t W; if (Call->extractProfTotalWeight(W)) - New->setProfWeight(W); - + NewCS->setProfWeight(W); Args.clear(); + ArgAttrVec.clear(); + Instruction *New = NewCS.getInstruction(); if (!Call->use_empty()) { if (New->getType() == Call->getType()) { // Return type not changed? Just replace users then. diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp index 98c4b1740306dd665f0a0d994e1656431643c020..ecff88c88dcba16cf060c1d7c66a26c90727386d 100644 --- a/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -17,9 +17,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/GlobalStatus.h" -#include "llvm/Pass.h" using namespace llvm; #define DEBUG_TYPE "elim-avail-extern" diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 479fd182598a7ff5e6aeebf4200ef628bbee47c8..d1147f7d844b5c3cf466e312e5a6894b73577cf6 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include using namespace llvm; @@ -53,18 +53,18 @@ static void makeVisible(GlobalValue &GV, bool Delete) { } namespace { - /// @brief A pass to extract specific functions and their dependencies. + /// @brief A pass to extract specific global values and their dependencies. class GVExtractorPass : public ModulePass { SetVector Named; bool deleteStuff; public: static char ID; // Pass identification, replacement for typeid - /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the - /// specified function. Otherwise, it deletes as much of the module as - /// possible, except for the function specified. - /// - explicit GVExtractorPass(std::vector& GVs, bool deleteS = true) + /// If deleteS is true, this pass deletes the specified global values. + /// Otherwise, it deletes as much of the module as possible, except for the + /// global values specified. + explicit GVExtractorPass(std::vector &GVs, + bool deleteS = true) : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {} bool runOnModule(Module &M) override { diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 80fea977f4003727441b69746a19ad68551761c9..813a4b6e283198409c067985957d9c50b8157220 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -14,7 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/FunctionAttrs.h" -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" @@ -34,7 +33,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; #define DEBUG_TYPE "functionattrs" @@ -222,15 +221,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { MadeChange = true; // Clear out any existing attributes. - AttrBuilder B; - B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); - F->removeAttributes( - AttributeList::FunctionIndex, - AttributeList::get(F->getContext(), AttributeList::FunctionIndex, B)); + F->removeFnAttr(Attribute::ReadOnly); + F->removeFnAttr(Attribute::ReadNone); // Add in the new attribute. - F->addAttribute(AttributeList::FunctionIndex, - ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); + F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone); if (ReadsMemory) ++NumReadOnly; @@ -495,9 +490,6 @@ determinePointerReadAttrs(Argument *A, static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { bool Changed = false; - AttrBuilder B; - B.addAttribute(Attribute::Returned); - // Check each function in turn, determining if an argument is always returned. for (Function *F : SCCNodes) { // We can infer and propagate function attributes only when we know that the @@ -535,7 +527,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { if (Value *RetArg = FindRetArg()) { auto *A = cast(RetArg); - A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::Returned); ++NumReturned; Changed = true; } @@ -593,9 +585,6 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { ArgumentGraph AG; - AttrBuilder B; - B.addAttribute(Attribute::NoCapture); - // Check each function in turn, determining which pointer arguments are not // captured. for (Function *F : SCCNodes) { @@ -614,7 +603,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) { - A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -633,8 +622,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (!Tracker.Captured) { if (Tracker.Uses.empty()) { // If it's trivially not captured, mark it nocapture now. - A->addAttr( - AttributeList::get(F->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } else { @@ -660,9 +648,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { Self.insert(&*A); Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self); if (R != Attribute::None) { - AttrBuilder B; - B.addAttribute(R); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(R); Changed = true; R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; } @@ -687,7 +673,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { if (ArgumentSCC[0]->Uses.size() == 1 && ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { Argument *A = ArgumentSCC[0]->Definition; - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -729,7 +715,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; } @@ -760,15 +746,12 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { } if (ReadAttr != Attribute::None) { - AttrBuilder B, R; - B.addAttribute(ReadAttr); - R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) { Argument *A = ArgumentSCC[i]->Definition; // Clear out existing readonly/readnone attributes - A->removeAttr( - AttributeList::get(A->getContext(), A->getArgNo() + 1, R)); - A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B)); + A->removeAttr(Attribute::ReadOnly); + A->removeAttr(Attribute::ReadNone); + A->addAttr(ReadAttr); ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg; Changed = true; } @@ -828,7 +811,7 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { case Instruction::Call: case Instruction::Invoke: { CallSite CS(RVI); - if (CS.paramHasAttr(0, Attribute::NoAlias)) + if (CS.hasRetAttr(Attribute::NoAlias)) break; if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction())) break; @@ -851,7 +834,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { // pointers. for (Function *F : SCCNodes) { // Already noalias. - if (F->doesNotAlias(0)) + if (F->returnDoesNotAlias()) continue; // We can infer and propagate function attributes only when we know that the @@ -871,10 +854,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) { bool MadeChange = false; for (Function *F : SCCNodes) { - if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) + if (F->returnDoesNotAlias() || + !F->getReturnType()->isPointerTy()) continue; - F->setDoesNotAlias(0); + F->setReturnDoesNotAlias(); ++NumNoAlias; MadeChange = true; } @@ -1203,6 +1187,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { SCCNodes.insert(F); } + // Skip it if the SCC only contains optnone functions. + if (SCCNodes.empty()) + return Changed; + Changed |= addArgumentReturnedAttrs(SCCNodes); Changed |= addReadAttrs(SCCNodes, AARGetter); Changed |= addArgumentAttrs(SCCNodes); diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index d1bf6e3553ace07d2869a7e3aee07ab282a0ecc8..6d34ab8b0d9608153ae43c54c48ea344096824fc 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IntrinsicInst.h" @@ -25,7 +26,6 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SourceMgr.h" @@ -117,7 +117,7 @@ namespace { /// - [insert you fancy metric here] static const GlobalValueSummary * selectCallee(const ModuleSummaryIndex &Index, - const GlobalValueSummaryList &CalleeSummaryList, + ArrayRef> CalleeSummaryList, unsigned Threshold, StringRef CallerModulePath) { auto It = llvm::find_if( CalleeSummaryList, @@ -168,19 +168,6 @@ selectCallee(const ModuleSummaryIndex &Index, return cast(It->get()); } -/// Return the summary for the function \p GUID that fits the \p Threshold, or -/// null if there's no match. -static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID, - unsigned Threshold, - const ModuleSummaryIndex &Index, - StringRef CallerModulePath) { - auto CalleeSummaryList = Index.findGlobalValueSummaryList(GUID); - if (CalleeSummaryList == Index.end()) - return nullptr; // This function does not have a summary - return selectCallee(Index, CalleeSummaryList->second, Threshold, - CallerModulePath); -} - using EdgeInfo = std::tuple; @@ -194,19 +181,23 @@ static void computeImportForFunction( FunctionImporter::ImportMapTy &ImportList, StringMap *ExportLists = nullptr) { for (auto &Edge : Summary.calls()) { - auto GUID = Edge.first.getGUID(); - DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n"); + ValueInfo VI = Edge.first; + DEBUG(dbgs() << " edge -> " << VI.getGUID() << " Threshold:" << Threshold + << "\n"); - if (Index.findGlobalValueSummaryList(GUID) == Index.end()) { + if (VI.getSummaryList().empty()) { // For SamplePGO, the indirect call targets for local functions will // have its original name annotated in profile. We try to find the // corresponding PGOFuncName as the GUID. - GUID = Index.getGUIDFromOriginalID(GUID); + auto GUID = Index.getGUIDFromOriginalID(VI.getGUID()); if (GUID == 0) continue; + VI = Index.getValueInfo(GUID); + if (!VI) + continue; } - if (DefinedGVSummaries.count(GUID)) { + if (DefinedGVSummaries.count(VI.getGUID())) { DEBUG(dbgs() << "ignored! Target already in destination module.\n"); continue; } @@ -222,8 +213,8 @@ static void computeImportForFunction( const auto NewThreshold = Threshold * GetBonusMultiplier(Edge.second.Hotness); - auto *CalleeSummary = - selectCallee(GUID, NewThreshold, Index, Summary.modulePath()); + auto *CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold, + Summary.modulePath()); if (!CalleeSummary) { DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n"); continue; @@ -255,7 +246,7 @@ static void computeImportForFunction( const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite); auto ExportModulePath = ResolvedCalleeSummary->modulePath(); - auto &ProcessedThreshold = ImportList[ExportModulePath][GUID]; + auto &ProcessedThreshold = ImportList[ExportModulePath][VI.getGUID()]; /// Since the traversal of the call graph is DFS, we can revisit a function /// a second time with a higher threshold. In this case, it is added back to /// the worklist with the new threshold. @@ -271,7 +262,7 @@ static void computeImportForFunction( // Make exports in the source module. if (ExportLists) { auto &ExportList = (*ExportLists)[ExportModulePath]; - ExportList.insert(GUID); + ExportList.insert(VI.getGUID()); if (!PreviouslyImported) { // This is the first time this function was exported from its source // module, so mark all functions and globals it references as exported @@ -291,7 +282,7 @@ static void computeImportForFunction( } // Insert the newly imported function to the worklist. - Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, GUID); + Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, VI.getGUID()); } } @@ -301,8 +292,7 @@ static void computeImportForFunction( static void ComputeImportForModule( const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList, - StringMap *ExportLists = nullptr, - const DenseSet *DeadSymbols = nullptr) { + StringMap *ExportLists = nullptr) { // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector Worklist; @@ -310,7 +300,7 @@ static void ComputeImportForModule( // Populate the worklist with the import for the functions in the current // module for (auto &GVSummary : DefinedGVSummaries) { - if (DeadSymbols && DeadSymbols->count(GVSummary.first)) { + if (!Index.isGlobalValueLive(GVSummary.second)) { DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n"); continue; } @@ -353,15 +343,14 @@ void llvm::ComputeCrossModuleImport( const ModuleSummaryIndex &Index, const StringMap &ModuleToDefinedGVSummaries, StringMap &ImportLists, - StringMap &ExportLists, - const DenseSet *DeadSymbols) { + StringMap &ExportLists) { // For each module that has function defined, compute the import/export lists. for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) { auto &ImportList = ImportLists[DefinedGVSummaries.first()]; DEBUG(dbgs() << "Computing import for Module '" << DefinedGVSummaries.first() << "'\n"); ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList, - &ExportLists, DeadSymbols); + &ExportLists); } // When computing imports we added all GUIDs referenced by anything @@ -423,84 +412,71 @@ void llvm::ComputeCrossModuleImportForModule( #endif } -DenseSet llvm::computeDeadSymbols( - const ModuleSummaryIndex &Index, +void llvm::computeDeadSymbols( + ModuleSummaryIndex &Index, const DenseSet &GUIDPreservedSymbols) { + assert(!Index.withGlobalValueDeadStripping()); if (!ComputeDead) - return DenseSet(); + return; if (GUIDPreservedSymbols.empty()) // Don't do anything when nothing is live, this is friendly with tests. - return DenseSet(); - DenseSet LiveSymbols = GUIDPreservedSymbols; - SmallVector Worklist; - Worklist.reserve(LiveSymbols.size() * 2); - for (auto GUID : LiveSymbols) { - DEBUG(dbgs() << "Live root: " << GUID << "\n"); - Worklist.push_back(GUID); - } - // Add values flagged in the index as live roots to the worklist. - for (const auto &Entry : Index) { - bool IsLiveRoot = llvm::any_of( - Entry.second, - [&](const std::unique_ptr &Summary) { - return Summary->liveRoot(); - }); - if (!IsLiveRoot) + return; + unsigned LiveSymbols = 0; + SmallVector Worklist; + Worklist.reserve(GUIDPreservedSymbols.size() * 2); + for (auto GUID : GUIDPreservedSymbols) { + ValueInfo VI = Index.getValueInfo(GUID); + if (!VI) continue; - DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n"); - Worklist.push_back(Entry.first); + for (auto &S : VI.getSummaryList()) + S->setLive(true); } - while (!Worklist.empty()) { - auto GUID = Worklist.pop_back_val(); - auto It = Index.findGlobalValueSummaryList(GUID); - if (It == Index.end()) { - DEBUG(dbgs() << "Not in index: " << GUID << "\n"); - continue; - } - - // FIXME: we should only make the prevailing copy live here - for (auto &Summary : It->second) { - for (auto Ref : Summary->refs()) { - auto RefGUID = Ref.getGUID(); - if (LiveSymbols.insert(RefGUID).second) { - DEBUG(dbgs() << "Marking live (ref): " << RefGUID << "\n"); - Worklist.push_back(RefGUID); - } - } - if (auto *FS = dyn_cast(Summary.get())) { - for (auto Call : FS->calls()) { - auto CallGUID = Call.first.getGUID(); - if (LiveSymbols.insert(CallGUID).second) { - DEBUG(dbgs() << "Marking live (call): " << CallGUID << "\n"); - Worklist.push_back(CallGUID); - } - } + // Add values flagged in the index as live roots to the worklist. + for (const auto &Entry : Index) + for (auto &S : Entry.second.SummaryList) + if (S->isLive()) { + DEBUG(dbgs() << "Live root: " << Entry.first << "\n"); + Worklist.push_back(ValueInfo(&Entry)); + ++LiveSymbols; + break; } + + // Make value live and add it to the worklist if it was not live before. + // FIXME: we should only make the prevailing copy live here + auto visit = [&](ValueInfo VI) { + for (auto &S : VI.getSummaryList()) + if (S->isLive()) + return; + for (auto &S : VI.getSummaryList()) + S->setLive(true); + ++LiveSymbols; + Worklist.push_back(VI); + }; + + while (!Worklist.empty()) { + auto VI = Worklist.pop_back_val(); + for (auto &Summary : VI.getSummaryList()) { + for (auto Ref : Summary->refs()) + visit(Ref); + if (auto *FS = dyn_cast(Summary.get())) + for (auto Call : FS->calls()) + visit(Call.first); if (auto *AS = dyn_cast(Summary.get())) { auto AliaseeGUID = AS->getAliasee().getOriginalName(); - if (LiveSymbols.insert(AliaseeGUID).second) { - DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n"); - Worklist.push_back(AliaseeGUID); - } + ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID); + if (AliaseeVI) + visit(AliaseeVI); } } } - DenseSet DeadSymbols; - DeadSymbols.reserve( - std::min(Index.size(), Index.size() - LiveSymbols.size())); - for (auto &Entry : Index) { - auto GUID = Entry.first; - if (!LiveSymbols.count(GUID)) { - DEBUG(dbgs() << "Marking dead: " << GUID << "\n"); - DeadSymbols.insert(GUID); - } - } - DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and " - << DeadSymbols.size() << " symbols Dead \n"); - NumDeadSymbols += DeadSymbols.size(); - NumLiveSymbols += LiveSymbols.size(); - return DeadSymbols; + Index.setWithGlobalValueDeadStripping(); + + unsigned DeadSymbols = Index.size() - LiveSymbols; + DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols + << " symbols Dead \n"); + NumDeadSymbols += DeadSymbols; + NumLiveSymbols += LiveSymbols; } /// Compute the set of summaries needed for a ThinLTO backend compilation of @@ -557,7 +533,7 @@ void llvm::thinLTOResolveWeakForLinkerModule( // need to add support here for creating either a function or // variable declaration, and return the new GlobalValue* for // the caller to use. - assert(false && "Expected function or variable"); + llvm_unreachable("Expected function or variable"); }; auto updateLinkage = [&](GlobalValue &GV) { @@ -621,8 +597,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, return true; // Lookup the linkage recorded in the summaries during global analysis. - const auto &GS = DefinedGlobals.find(GV.getGUID()); - GlobalValue::LinkageTypes Linkage; + auto GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) { // Must have been promoted (possibly conservatively). Find original // name so that we can access the correct summary and see if it can @@ -634,7 +609,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, std::string OrigId = GlobalValue::getGlobalIdentifier( OrigName, GlobalValue::InternalLinkage, TheModule.getSourceFileName()); - const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId)); + GS = DefinedGlobals.find(GlobalValue::getGUID(OrigId)); if (GS == DefinedGlobals.end()) { // Also check the original non-promoted non-globalized name. In some // cases a preempted weak value is linked in as a local copy because @@ -642,15 +617,11 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, // In that case, since it was originally not a local value, it was // recorded in the index using the original name. // FIXME: This may not be needed once PR27866 is fixed. - const auto &GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName)); + GS = DefinedGlobals.find(GlobalValue::getGUID(OrigName)); assert(GS != DefinedGlobals.end()); - Linkage = GS->second->linkage(); - } else { - Linkage = GS->second->linkage(); } - } else - Linkage = GS->second->linkage(); - return !GlobalValue::isLocalLinkage(Linkage); + } + return !GlobalValue::isLocalLinkage(GS->second->linkage()); }; // FIXME: See if we can just internalize directly here via linkage changes @@ -825,7 +796,7 @@ static bool doImportingForModule(Module &M) { // is only enabled when testing importing via the 'opt' tool, which does // not do the ThinLink that would normally determine what values to promote. for (auto &I : *Index) { - for (auto &S : I.second) { + for (auto &S : I.second.SummaryList) { if (GlobalValue::isLocalLinkage(S->linkage())) S->setLinkage(GlobalValue::ExternalLinkage); } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ade4f21ceb5240171c1b22cec766bfce5f73b34c..f277a51ae659aec3d84710d31531e41f02e21908 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // we delete a constant array, we may also be holding pointer to one of its // elements (or an element of one of its elements if we're dealing with an // array of arrays) in the worklist. - SmallVector WorkList(V->user_begin(), V->user_end()); + SmallVector WorkList(V->user_begin(), V->user_end()); while (!WorkList.empty()) { Value *UV = WorkList.pop_back_val(); if (!UV) @@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) { NewU->insertBefore(UI); UI->replaceUsesOfWith(U, NewU); } - U->dropAllReferences(); + // We've replaced all the uses, so destroy the constant. (destroyConstant + // will update value handles and metadata.) + U->destroyConstant(); } } @@ -1979,16 +1981,11 @@ static void ChangeCalleesToFastCall(Function *F) { } } -static AttributeList StripNest(LLVMContext &C, const AttributeList &Attrs) { - for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { - unsigned Index = Attrs.getSlotIndex(i); - if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest)) - continue; - - // There can be only one. - return Attrs.removeAttribute(C, Index, Attribute::Nest); - } - +static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) { + // There can be at most one attribute set with a nest attribute. + unsigned NestIndex; + if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex)) + return Attrs.removeAttribute(C, NestIndex, Attribute::Nest); return Attrs; } diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp index 4705ebe265ae120289e0097a29a55d69234a9a28..e47d881d1127a16aa13a1d51722b62342525def0 100644 --- a/lib/Transforms/IPO/GlobalSplit.cpp +++ b/lib/Transforms/IPO/GlobalSplit.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/GlobalSplit.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Constants.h" @@ -23,6 +22,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 349807496dc2c0dc0f8cd188a76aa61275d64dcc..f79b61037f1dba2c0168cb1dc7becd308e99f8d0 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" @@ -24,6 +23,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; #define DEBUG_TYPE "ipconstprop" diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 89518f3c5faec25bf841babd897766c517ee3a9e..5bb305ca84d037d07e9e101032349faf5c9e949a 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Initialization.h" #include "llvm-c/Transforms/IPO.h" -#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp index 2ef299d9a2f0e9db996c9c572f910083d6f64587..15d7515cc84251e2e95f5ec2e130b52897a19552 100644 --- a/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/InferFunctionAttrs.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 6c83c99ae3be5ebb83a0c3fa10a7eaee5fbade61..ad89e40661c671afe6010caf32f478716c886b98 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -502,7 +502,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache); + InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. @@ -519,44 +519,51 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); - // If this call site is dead and it is to a readonly function, we should - // just delete the call instead of trying to inline it, regardless of - // size. This happens because IPSCCP propagates the result out of the - // call and then we're left with the dead call. - if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) { - DEBUG(dbgs() << " -> Deleting dead call: " << *CS.getInstruction() - << "\n"); - // Update the call graph by deleting the edge from Callee to Caller. - CG[Caller]->removeCallEdgeFor(CS); - CS.getInstruction()->eraseFromParent(); - ++NumCallsDeleted; - } else { - // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) - continue; + // We can only inline direct calls to non-declarations. + if (!Callee || Callee->isDeclaration()) + continue; + Instruction *Instr = CS.getInstruction(); + + bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); + + int InlineHistoryID; + if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. - int InlineHistoryID = CallSites[CSi].second; + InlineHistoryID = CallSites[CSi].second; if (InlineHistoryID != -1 && InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; + } + + // FIXME for new PM: because of the old PM we currently generate ORE and + // in turn BFI on demand. With the new PM, the ORE dependency should + // just become a regular analysis dependency. + OptimizationRemarkEmitter ORE(Caller); + // If the policy determines that we should inline this function, + // delete the call instead. + if (!shouldInline(CS, GetInlineCost, ORE)) + continue; + + // If this call site is dead and it is to a readonly function, we should + // just delete the call instead of trying to inline it, regardless of + // size. This happens because IPSCCP propagates the result out of the + // call and then we're left with the dead call. + if (IsTriviallyDead) { + DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n"); + // Update the call graph by deleting the edge from Callee to Caller. + CG[Caller]->removeCallEdgeFor(CS); + Instr->eraseFromParent(); + ++NumCallsDeleted; + } else { // Get DebugLoc to report. CS will be invalid after Inliner. - DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + DebugLoc DLoc = Instr->getDebugLoc(); BasicBlock *Block = CS.getParent(); - // FIXME for new PM: because of the old PM we currently generate ORE and - // in turn BFI on demand. With the new PM, the ORE dependency should - // just become a regular analysis dependency. - OptimizationRemarkEmitter ORE(Caller); - - // If the policy determines that we should inline this function, - // try to do so. - if (!shouldInline(CS, GetInlineCost, ORE)) - continue; // Attempt to inline the function. using namespace ore; @@ -872,7 +879,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - /*cg=*/nullptr, &GetAssumptionCache, + /*cg=*/nullptr, &GetAssumptionCache, PSI, &FAM.getResult(*(CS.getCaller())), &FAM.getResult(Callee)); diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index 9c83f88b2210c7442bf5220a0df5edd56cab43a3..a8b0f32fd785ec79b6eb4d7806d4564756e9796d 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation +required_libraries = Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index f898c3b5a935855e201d8055eaa58c250c2a0822..c74b0a35e296df696315e952455263fbe05da4a9 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Dominators.h" @@ -22,6 +21,7 @@ #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CodeExtractor.h" diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 785207efbe5c8e2df3494ddfa75ce170dafa97ba..b406c22c69d7ae534586ebac22c11f1f0fe9e448 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -206,17 +207,26 @@ struct ByteArrayInfo { class GlobalTypeMember final : TrailingObjects { GlobalObject *GO; size_t NTypes; + // For functions: true if this is a definition (either in the merged module or + // in one of the thinlto modules). + bool IsDefinition; + // For functions: true if this function is either defined or used in a thinlto + // module and its jumptable entry needs to be exported to thinlto backends. + bool IsExported; friend TrailingObjects; size_t numTrailingObjects(OverloadToken) const { return NTypes; } public: static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, + bool IsDefinition, bool IsExported, ArrayRef Types) { auto *GTM = static_cast(Alloc.Allocate( totalSizeToAlloc(Types.size()), alignof(GlobalTypeMember))); GTM->GO = GO; GTM->NTypes = Types.size(); + GTM->IsDefinition = IsDefinition; + GTM->IsExported = IsExported; std::uninitialized_copy(Types.begin(), Types.end(), GTM->getTrailingObjects()); return GTM; @@ -224,6 +234,12 @@ public: GlobalObject *getGlobal() const { return GO; } + bool isDefinition() const { + return IsDefinition; + } + bool isExported() const { + return IsExported; + } ArrayRef types() const { return makeArrayRef(getTrailingObjects(), NTypes); } @@ -235,7 +251,6 @@ class LowerTypeTestsModule { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; - bool LinkerSubsectionsViaSymbols; Triple::ArchType Arch; Triple::OSType OS; Triple::ObjectFormatType ObjectFormat; @@ -295,6 +310,7 @@ class LowerTypeTestsModule { void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); TypeIdLowering importTypeId(StringRef TypeId); void importTypeTest(CallInst *CI); + void importFunction(Function *F, bool isDefinition); BitSetInfo buildBitSet(Metadata *TypeId, @@ -475,13 +491,9 @@ void LowerTypeTestsModule::allocateByteArrays() { // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures // that the pc-relative displacement is folded into the lea instead of the // test instruction getting another displacement. - if (LinkerSubsectionsViaSymbols) { - BAI->ByteArray->replaceAllUsesWith(GEP); - } else { - GlobalAlias *Alias = GlobalAlias::create( - Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); - BAI->ByteArray->replaceAllUsesWith(Alias); - } + GlobalAlias *Alias = GlobalAlias::create( + Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); + BAI->ByteArray->replaceAllUsesWith(Alias); BAI->ByteArray->eraseFromParent(); } @@ -502,7 +514,7 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, return createMaskedBitTest(B, TIL.InlineBits, BitOffset); } else { Constant *ByteArray = TIL.TheByteArray; - if (!LinkerSubsectionsViaSymbols && AvoidReuse && !ImportSummary) { + if (AvoidReuse && !ImportSummary) { // Each use of the byte array uses a different alias. This makes the // backend less likely to reuse previously computed byte array addresses, // improving the security of the CFI mechanism based on this pass. @@ -608,8 +620,25 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, if (TIL.TheKind == TypeTestResolution::AllOnes) return OffsetInRange; - TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); - IRBuilder<> ThenB(Term); + // See if the intrinsic is used in the following common pattern: + // br(llvm.type.test(...), thenbb, elsebb) + // where nothing happens between the type test and the br. + // If so, create slightly simpler IR. + if (CI->hasOneUse()) + if (auto *Br = dyn_cast(*CI->user_begin())) + if (CI->getNextNode() == Br) { + BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator()); + BasicBlock *Else = Br->getSuccessor(1); + BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange); + NewBr->setMetadata(LLVMContext::MD_prof, + Br->getMetadata(LLVMContext::MD_prof)); + ReplaceInstWithInst(InitialBB->getTerminator(), NewBr); + + IRBuilder<> ThenB(CI); + return createBitSetTest(ThenB, TIL, BitOffset); + } + + IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false)); // Now that we know that the offset is in range and aligned, load the // appropriate bit from the bitset. @@ -680,17 +709,13 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); - if (LinkerSubsectionsViaSymbols) { - GV->replaceAllUsesWith(CombinedGlobalElemPtr); - } else { - assert(GV->getType()->getAddressSpace() == 0); - GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, - GV->getLinkage(), "", - CombinedGlobalElemPtr, &M); - GAlias->setVisibility(GV->getVisibility()); - GAlias->takeName(GV); - GV->replaceAllUsesWith(GAlias); - } + assert(GV->getType()->getAddressSpace() == 0); + GlobalAlias *GAlias = + GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(), + "", CombinedGlobalElemPtr, &M); + GAlias->setVisibility(GV->getVisibility()); + GAlias->takeName(GV); + GV->replaceAllUsesWith(GAlias); GV->eraseFromParent(); } } @@ -812,6 +837,41 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) { CI->eraseFromParent(); } +// ThinLTO backend: the function F has a jump table entry; update this module +// accordingly. isDefinition describes the type of the jump table entry. +void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { + assert(F->getType()->getAddressSpace() == 0); + + // Declaration of a local function - nothing to do. + if (F->isDeclarationForLinker() && isDefinition) + return; + + GlobalValue::VisibilityTypes Visibility = F->getVisibility(); + std::string Name = F->getName(); + Function *FDecl; + + if (F->isDeclarationForLinker() && !isDefinition) { + // Declaration of an external function. + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name + ".cfi_jt", &M); + FDecl->setVisibility(GlobalValue::HiddenVisibility); + } else { + // Definition. + assert(isDefinition); + F->setName(Name + ".cfi"); + F->setLinkage(GlobalValue::ExternalLinkage); + F->setVisibility(GlobalValue::HiddenVisibility); + FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, + Name, &M); + FDecl->setVisibility(Visibility); + } + + if (F->isWeakForLinker()) + replaceWeakDeclarationWithJumpTablePtr(F, FDecl); + else + F->replaceAllUsesWith(FDecl); +} + void LowerTypeTestsModule::lowerTypeTestCalls( ArrayRef TypeIds, Constant *CombinedGlobalAddr, const DenseMap &GlobalLayout) { @@ -1135,7 +1195,6 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // arithmetic that we normally use for globals. // FIXME: find a better way to represent the jumptable in the IR. - assert(!Functions.empty()); // Build a simple layout based on the regular layout of jump tables. @@ -1159,6 +1218,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( // references to the original functions with references to the aliases. for (unsigned I = 0; I != Functions.size(); ++I) { Function *F = cast(Functions[I]->getGlobal()); + bool IsDefinition = Functions[I]->isDefinition(); Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( ConstantExpr::getInBoundsGetElementPtr( @@ -1166,8 +1226,18 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( ArrayRef{ConstantInt::get(IntPtrTy, 0), ConstantInt::get(IntPtrTy, I)}), F->getType()); - if (LinkerSubsectionsViaSymbols || F->isDeclarationForLinker()) { - + if (Functions[I]->isExported()) { + if (IsDefinition) { + ExportSummary->cfiFunctionDefs().insert(F->getName()); + } else { + GlobalAlias *JtAlias = GlobalAlias::create( + F->getValueType(), 0, GlobalValue::ExternalLinkage, + F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M); + JtAlias->setVisibility(GlobalValue::HiddenVisibility); + ExportSummary->cfiFunctionDecls().insert(F->getName()); + } + } + if (!IsDefinition) { if (F->isWeakForLinker()) replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr); else @@ -1175,9 +1245,8 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( } else { assert(F->getType()->getAddressSpace() == 0); - GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0, - F->getLinkage(), "", - CombinedGlobalElemPtr, &M); + GlobalAlias *FAlias = GlobalAlias::create( + F->getValueType(), 0, F->getLinkage(), "", CombinedGlobalElemPtr, &M); FAlias->setVisibility(F->getVisibility()); FAlias->takeName(F); if (FAlias->hasName()) @@ -1302,7 +1371,6 @@ LowerTypeTestsModule::LowerTypeTestsModule( : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) { assert(!(ExportSummary && ImportSummary)); Triple TargetTriple(M.getTargetTriple()); - LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); Arch = TargetTriple.getArch(); OS = TargetTriple.getOS(); ObjectFormat = TargetTriple.getObjectFormat(); @@ -1347,15 +1415,37 @@ bool LowerTypeTestsModule::runForTesting(Module &M) { bool LowerTypeTestsModule::lower() { Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); - if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary) + if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary && + !ImportSummary) return false; if (ImportSummary) { - for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); - UI != UE;) { - auto *CI = cast((*UI++).getUser()); - importTypeTest(CI); + if (TypeTestFunc) { + for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); + UI != UE;) { + auto *CI = cast((*UI++).getUser()); + importTypeTest(CI); + } + } + + SmallVector Defs; + SmallVector Decls; + for (auto &F : M) { + // CFI functions are either external, or promoted. A local function may + // have the same name, but it's not the one we are looking for. + if (F.hasLocalLinkage()) + continue; + if (ImportSummary->cfiFunctionDefs().count(F.getName())) + Defs.push_back(&F); + else if (ImportSummary->cfiFunctionDecls().count(F.getName())) + Decls.push_back(&F); } + + for (auto F : Defs) + importFunction(F, /*isDefinition*/ true); + for (auto F : Decls) + importFunction(F, /*isDefinition*/ false); + return true; } @@ -1381,6 +1471,58 @@ bool LowerTypeTestsModule::lower() { llvm::DenseMap TypeIdInfo; unsigned I = 0; SmallVector Types; + + struct ExportedFunctionInfo { + CfiFunctionLinkage Linkage; + MDNode *FuncMD; // {name, linkage, type[, type...]} + }; + DenseMap ExportedFunctions; + if (ExportSummary) { + NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); + if (CfiFunctionsMD) { + for (auto FuncMD : CfiFunctionsMD->operands()) { + assert(FuncMD->getNumOperands() >= 2); + StringRef FunctionName = + cast(FuncMD->getOperand(0))->getString(); + if (!ExportSummary->isGUIDLive(GlobalValue::getGUID( + GlobalValue::dropLLVMManglingEscape(FunctionName)))) + continue; + CfiFunctionLinkage Linkage = static_cast( + cast(FuncMD->getOperand(1)) + ->getValue() + ->getUniqueInteger() + .getZExtValue()); + auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}}); + if (!P.second && P.first->second.Linkage != CFL_Definition) + P.first->second = {Linkage, FuncMD}; + } + + for (const auto &P : ExportedFunctions) { + StringRef FunctionName = P.first; + CfiFunctionLinkage Linkage = P.second.Linkage; + MDNode *FuncMD = P.second.FuncMD; + Function *F = M.getFunction(FunctionName); + if (!F) + F = Function::Create( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalVariable::ExternalLinkage, FunctionName, &M); + + if (Linkage == CFL_Definition) + F->eraseMetadata(LLVMContext::MD_type); + + if (F->isDeclaration()) { + if (Linkage == CFL_WeakDeclaration) + F->setLinkage(GlobalValue::ExternalWeakLinkage); + + SmallVector Types; + for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) + F->addMetadata(LLVMContext::MD_type, + *cast(FuncMD->getOperand(I).get())); + } + } + } + } + for (GlobalObject &GO : M.global_objects()) { if (isa(GO) && GO.isDeclarationForLinker()) continue; @@ -1390,7 +1532,15 @@ bool LowerTypeTestsModule::lower() { if (Types.empty()) continue; - auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types); + bool IsDefinition = !GO.isDeclarationForLinker(); + bool IsExported = false; + if (isa(GO) && ExportedFunctions.count(GO.getName())) { + IsDefinition |= ExportedFunctions[GO.getName()].Linkage == CFL_Definition; + IsExported = true; + } + + auto *GTM = + GlobalTypeMember::create(Alloc, &GO, IsDefinition, IsExported, Types); for (MDNode *Type : Types) { verifyTypeMDNode(&GO, Type); auto &Info = TypeIdInfo[cast(Type)->getOperand(1)]; @@ -1440,11 +1590,10 @@ bool LowerTypeTestsModule::lower() { } for (auto &P : *ExportSummary) { - for (auto &S : P.second) { + for (auto &S : P.second.SummaryList) { auto *FS = dyn_cast(S.get()); - if (!FS) + if (!FS || !ExportSummary->isGlobalValueLive(FS)) continue; - // FIXME: Only add live functions. for (GlobalValue::GUID G : FS->type_tests()) for (Metadata *MD : MetadataByGUID[G]) AddTypeIdUse(MD).IsExported = true; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 4ce4de13c93847924f837fb2064e40ccf84c2d61..0e478ba607be2d576f2925a299f89444aea6c89a 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -207,11 +207,13 @@ private: /// A work queue of functions that may have been modified and should be /// analyzed again. - std::vector Deferred; + std::vector Deferred; /// Checks the rules of order relation introduced among functions set. /// Returns true, if sanity check has been passed, and false if failed. - bool doSanityCheck(std::vector &Worklist); +#ifndef NDEBUG + bool doSanityCheck(std::vector &Worklist); +#endif /// Insert a ComparableFunction into the FnTree, or merge it away if it's /// equal to one that's already present. @@ -283,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } -bool MergeFunctions::doSanityCheck(std::vector &Worklist) { +#ifndef NDEBUG +bool MergeFunctions::doSanityCheck(std::vector &Worklist) { if (const unsigned Max = NumFunctionsForSanityCheck) { unsigned TripleNumber = 0; bool Valid = true; @@ -291,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; unsigned i = 0; - for (std::vector::iterator I = Worklist.begin(), E = Worklist.end(); + for (std::vector::iterator I = Worklist.begin(), + E = Worklist.end(); I != E && i < Max; ++I, ++i) { unsigned j = i; - for (std::vector::iterator J = I; J != E && j < Max; ++J, ++j) { + for (std::vector::iterator J = I; J != E && j < Max; + ++J, ++j) { Function *F1 = cast(*I); Function *F2 = cast(*J); int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); @@ -312,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { continue; unsigned k = j; - for (std::vector::iterator K = J; K != E && k < Max; + for (std::vector::iterator K = J; K != E && k < Max; ++k, ++K, ++TripleNumber) { if (K == J) continue; @@ -351,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector &Worklist) { } return true; } +#endif bool MergeFunctions::runOnModule(Module &M) { if (skipModule(M)) @@ -381,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) { // consider merging it. Otherwise it is dropped and never considered again. if ((I != S && std::prev(I)->first == I->first) || (std::next(I) != IE && std::next(I)->first == I->first) ) { - Deferred.push_back(WeakVH(I->second)); + Deferred.push_back(WeakTrackingVH(I->second)); } } do { - std::vector Worklist; + std::vector Worklist; Deferred.swap(Worklist); DEBUG(doSanityCheck(Worklist)); @@ -395,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) { DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); // Insert functions and merge them. - for (WeakVH &I : Worklist) { + for (WeakTrackingVH &I : Worklist) { if (!I) continue; Function *F = cast(I); @@ -432,19 +438,15 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { // Transferring other attributes may help other optimizations, but that // should be done uniformly and not in this ad-hoc way. auto &Context = New->getContext(); - auto NewFuncAttrs = New->getAttributes(); - auto CallSiteAttrs = CS.getAttributes(); - - CallSiteAttrs = CallSiteAttrs.addAttributes( - Context, AttributeList::ReturnIndex, NewFuncAttrs.getRetAttributes()); - - for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) { - AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx); - if (Attrs.hasAttributes()) - CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs); - } - - CS.setAttributes(CallSiteAttrs); + auto NewPAL = New->getAttributes(); + SmallVector NewArgAttrs; + for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) + NewArgAttrs.push_back(NewPAL.getParamAttributes(argIdx)); + // Don't transfer attributes from the function to the callee. Function + // attributes typically aren't relevant to the calling convention or ABI. + CS.setAttributes(AttributeList::get(Context, /*FnAttrs=*/AttributeSet(), + NewPAL.getRetAttributes(), + NewArgAttrs)); remove(CS.getInstruction()->getParent()->getParent()); U->set(BitcastNew); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index a2f6e5639d9d48a6859bf994e35e8acbd01c0827..8840435af6421c62201efc15163aa3f48ed3cf5b 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -16,8 +16,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -27,19 +34,177 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; -#define DEBUG_TYPE "partialinlining" - -STATISTIC(NumPartialInlined, "Number of functions partially inlined"); +#define DEBUG_TYPE "partial-inlining" + +STATISTIC(NumPartialInlined, + "Number of callsites functions partially inlined into."); + +// Command line option to disable partial-inlining. The default is false: +static cl::opt + DisablePartialInlining("disable-partial-inlining", cl::init(false), + cl::Hidden, cl::desc("Disable partial ininling")); +// This is an option used by testing: +static cl::opt SkipCostAnalysis("skip-partial-inlining-cost-analysis", + cl::init(false), cl::ZeroOrMore, + cl::ReallyHidden, + cl::desc("Skip Cost Analysis")); + +static cl::opt MaxNumInlineBlocks( + "max-num-inline-blocks", cl::init(5), cl::Hidden, + cl::desc("Max Number of Blocks To be Partially Inlined")); + +// Command line option to set the maximum number of partial inlining allowed +// for the module. The default value of -1 means no limit. +static cl::opt MaxNumPartialInlining( + "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore, + cl::desc("Max number of partial inlining. The default is unlimited")); + +// Used only when PGO or user annotated branch data is absent. It is +// the least value that is used to weigh the outline region. If BFI +// produces larger value, the BFI value will be used. +static cl::opt + OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Relative frequency of outline region to " + "the entry block")); + +static cl::opt ExtraOutliningPenalty( + "partial-inlining-extra-penalty", cl::init(0), cl::Hidden, + cl::desc("A debug option to add additional penalty to the computed one.")); namespace { + +struct FunctionOutliningInfo { + FunctionOutliningInfo() + : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr), + ReturnBlockPreds() {} + // Returns the number of blocks to be inlined including all blocks + // in Entries and one return block. + unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; } + + // A set of blocks including the function entry that guard + // the region to be outlined. + SmallVector Entries; + // The return block that is not included in the outlined region. + BasicBlock *ReturnBlock; + // The dominating block of the region to be outlined. + BasicBlock *NonReturnBlock; + // The set of blocks in Entries that that are predecessors to ReturnBlock + SmallVector ReturnBlockPreds; +}; + struct PartialInlinerImpl { - PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {} + PartialInlinerImpl( + std::function *GetAC, + std::function *GTTI, + Optional> GBFI, + ProfileSummaryInfo *ProfSI) + : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} bool run(Module &M); Function *unswitchFunction(Function *F); + // This class speculatively clones the the function to be partial inlined. + // At the end of partial inlining, the remaining callsites to the cloned + // function that are not partially inlined will be fixed up to reference + // the original function, and the cloned function will be erased. + struct FunctionCloner { + FunctionCloner(Function *F, FunctionOutliningInfo *OI); + ~FunctionCloner(); + + // Prepare for function outlining: making sure there is only + // one incoming edge from the extracted/outlined region to + // the return block. + void NormalizeReturnBlock(); + + // Do function outlining: + Function *doFunctionOutlining(); + + Function *OrigFunc = nullptr; + Function *ClonedFunc = nullptr; + Function *OutlinedFunc = nullptr; + BasicBlock *OutliningCallBB = nullptr; + // ClonedFunc is inlined in one of its callers after function + // outlining. + bool IsFunctionInlined = false; + // The cost of the region to be outlined. + int OutlinedRegionCost = 0; + std::unique_ptr ClonedOI = nullptr; + std::unique_ptr ClonedFuncBFI = nullptr; + }; + private: - InlineFunctionInfo IFI; + int NumPartialInlining = 0; + std::function *GetAssumptionCache; + std::function *GetTTI; + Optional> GetBFI; + ProfileSummaryInfo *PSI; + + // Return the frequency of the OutlininingBB relative to F's entry point. + // The result is no larger than 1 and is represented using BP. + // (Note that the outlined region's 'head' block can only have incoming + // edges from the guarding entry blocks). + BranchProbability getOutliningCallBBRelativeFreq(FunctionCloner &Cloner); + + // Return true if the callee of CS should be partially inlined with + // profit. + bool shouldPartialInline(CallSite CS, FunctionCloner &Cloner, + BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE); + + // Try to inline DuplicateFunction (cloned from F with call to + // the OutlinedFunction into its callers. Return true + // if there is any successful inlining. + bool tryPartialInline(FunctionCloner &Cloner); + + // Compute the mapping from use site of DuplicationFunction to the enclosing + // BB's profile count. + void computeCallsiteToProfCountMap(Function *DuplicateFunction, + DenseMap &SiteCountMap); + + bool IsLimitReached() { + return (MaxNumPartialInlining != -1 && + NumPartialInlining >= MaxNumPartialInlining); + } + + static CallSite getCallSite(User *U) { + CallSite CS; + if (CallInst *CI = dyn_cast(U)) + CS = CallSite(CI); + else if (InvokeInst *II = dyn_cast(U)) + CS = CallSite(II); + else + llvm_unreachable("All uses must be calls"); + return CS; + } + + static CallSite getOneCallSiteTo(Function *F) { + User *User = *F->user_begin(); + return getCallSite(User); + } + + std::tuple getOneDebugLoc(Function *F) { + CallSite CS = getOneCallSiteTo(F); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + return std::make_tuple(DLoc, Block); + } + + // Returns the costs associated with function outlining: + // - The first value is the non-weighted runtime cost for making the call + // to the outlined function, including the addtional setup cost in the + // outlined function itself; + // - The second value is the estimated size of the new call sequence in + // basic block Cloner.OutliningCallBB; + std::tuple computeOutliningCosts(FunctionCloner &Cloner); + // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to + // approximate both the size and runtime cost (Note that in the current + // inline cost analysis, there is no clear distinction there either). + static int computeBBInlineCost(BasicBlock *BB); + + std::unique_ptr computeOutliningInfo(Function *F); + }; + struct PartialInlinerLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid PartialInlinerLegacyPass() : ModulePass(ID) { @@ -48,124 +213,713 @@ struct PartialInlinerLegacyPass : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } bool runOnModule(Module &M) override { if (skipModule(M)) return false; AssumptionCacheTracker *ACT = &getAnalysis(); + TargetTransformInfoWrapperPass *TTIWP = + &getAnalysis(); + ProfileSummaryInfo *PSI = + getAnalysis().getPSI(); + std::function GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - return PartialInlinerImpl(IFI).run(M); + + std::function GetTTI = + [&TTIWP](Function &F) -> TargetTransformInfo & { + return TTIWP->getTTI(F); + }; + + return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); } }; } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { - // First, verify that this function is an unswitching candidate... +std::unique_ptr +PartialInlinerImpl::computeOutliningInfo(Function *F) { BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) - return nullptr; + return std::unique_ptr(); + + // Returns true if Succ is BB's successor + auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { + return is_contained(successors(BB), Succ); + }; + + auto SuccSize = [](BasicBlock *BB) { + return std::distance(succ_begin(BB), succ_end(BB)); + }; + + auto IsReturnBlock = [](BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + return isa(TI); + }; + + auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) { + if (IsReturnBlock(Succ1)) + return std::make_tuple(Succ1, Succ2); + if (IsReturnBlock(Succ2)) + return std::make_tuple(Succ2, Succ1); + + return std::make_tuple(nullptr, nullptr); + }; + + // Detect a triangular shape: + auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) { + if (IsSuccessor(Succ1, Succ2)) + return std::make_tuple(Succ1, Succ2); + if (IsSuccessor(Succ2, Succ1)) + return std::make_tuple(Succ2, Succ1); + + return std::make_tuple(nullptr, nullptr); + }; + + std::unique_ptr OutliningInfo = + llvm::make_unique(); + + BasicBlock *CurrEntry = EntryBlock; + bool CandidateFound = false; + do { + // The number of blocks to be inlined has already reached + // the limit. When MaxNumInlineBlocks is set to 0 or 1, this + // disables partial inlining for the function. + if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks) + break; + + if (SuccSize(CurrEntry) != 2) + break; + + BasicBlock *Succ1 = *succ_begin(CurrEntry); + BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1); + + BasicBlock *ReturnBlock, *NonReturnBlock; + std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); + + if (ReturnBlock) { + OutliningInfo->Entries.push_back(CurrEntry); + OutliningInfo->ReturnBlock = ReturnBlock; + OutliningInfo->NonReturnBlock = NonReturnBlock; + CandidateFound = true; + break; + } + + BasicBlock *CommSucc; + BasicBlock *OtherSucc; + std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2); + + if (!CommSucc) + break; - BasicBlock *ReturnBlock = nullptr; - BasicBlock *NonReturnBlock = nullptr; - unsigned ReturnCount = 0; - for (BasicBlock *BB : successors(EntryBlock)) { - if (isa(BB->getTerminator())) { - ReturnBlock = BB; - ReturnCount++; - } else - NonReturnBlock = BB; + OutliningInfo->Entries.push_back(CurrEntry); + CurrEntry = OtherSucc; + + } while (true); + + if (!CandidateFound) + return std::unique_ptr(); + + // Do sanity check of the entries: threre should not + // be any successors (not in the entry set) other than + // {ReturnBlock, NonReturnBlock} + assert(OutliningInfo->Entries[0] == &F->front() && + "Function Entry must be the first in Entries vector"); + DenseSet Entries; + for (BasicBlock *E : OutliningInfo->Entries) + Entries.insert(E); + + // Returns true of BB has Predecessor which is not + // in Entries set. + auto HasNonEntryPred = [Entries](BasicBlock *BB) { + for (auto Pred : predecessors(BB)) { + if (!Entries.count(Pred)) + return true; + } + return false; + }; + auto CheckAndNormalizeCandidate = + [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) { + for (BasicBlock *E : OutliningInfo->Entries) { + for (auto Succ : successors(E)) { + if (Entries.count(Succ)) + continue; + if (Succ == OutliningInfo->ReturnBlock) + OutliningInfo->ReturnBlockPreds.push_back(E); + else if (Succ != OutliningInfo->NonReturnBlock) + return false; + } + // There should not be any outside incoming edges either: + if (HasNonEntryPred(E)) + return false; + } + return true; + }; + + if (!CheckAndNormalizeCandidate(OutliningInfo.get())) + return std::unique_ptr(); + + // Now further growing the candidate's inlining region by + // peeling off dominating blocks from the outlining region: + while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) { + BasicBlock *Cand = OutliningInfo->NonReturnBlock; + if (SuccSize(Cand) != 2) + break; + + if (HasNonEntryPred(Cand)) + break; + + BasicBlock *Succ1 = *succ_begin(Cand); + BasicBlock *Succ2 = *(succ_begin(Cand) + 1); + + BasicBlock *ReturnBlock, *NonReturnBlock; + std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); + if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock) + break; + + if (NonReturnBlock->getSinglePredecessor() != Cand) + break; + + // Now grow and update OutlininigInfo: + OutliningInfo->Entries.push_back(Cand); + OutliningInfo->NonReturnBlock = NonReturnBlock; + OutliningInfo->ReturnBlockPreds.push_back(Cand); + Entries.insert(Cand); } - if (ReturnCount != 1) - return nullptr; + return OutliningInfo; +} + +// Check if there is PGO data or user annoated branch data: +static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) { + if (F->getEntryCount()) + return true; + // Now check if any of the entry block has MD_prof data: + for (auto *E : OI->Entries) { + BranchInst *BR = dyn_cast(E->getTerminator()); + if (!BR || BR->isUnconditional()) + continue; + uint64_t T, F; + if (BR->extractProfMetadata(T, F)) + return true; + } + return false; +} + +BranchProbability +PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { + + auto EntryFreq = + Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock()); + auto OutliningCallFreq = + Cloner.ClonedFuncBFI->getBlockFreq(Cloner.OutliningCallBB); + + auto OutlineRegionRelFreq = + BranchProbability::getBranchProbability(OutliningCallFreq.getFrequency(), + EntryFreq.getFrequency()); + + if (hasProfileData(Cloner.OrigFunc, Cloner.ClonedOI.get())) + return OutlineRegionRelFreq; + + // When profile data is not available, we need to be conservative in + // estimating the overall savings. Static branch prediction can usually + // guess the branch direction right (taken/non-taken), but the guessed + // branch probability is usually not biased enough. In case when the + // outlined region is predicted to be likely, its probability needs + // to be made higher (more biased) to not under-estimate the cost of + // function outlining. On the other hand, if the outlined region + // is predicted to be less likely, the predicted probablity is usually + // higher than the actual. For instance, the actual probability of the + // less likely target is only 5%, but the guessed probablity can be + // 40%. In the latter case, there is no need for further adjustement. + // FIXME: add an option for this. + if (OutlineRegionRelFreq < BranchProbability(45, 100)) + return OutlineRegionRelFreq; + + OutlineRegionRelFreq = std::max( + OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100)); + + return OutlineRegionRelFreq; +} + +bool PartialInlinerImpl::shouldPartialInline( + CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, + OptimizationRemarkEmitter &ORE) { + + using namespace ore; + if (SkipCostAnalysis) + return true; + + Instruction *Call = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + assert(Callee == Cloner.ClonedFunc); + + Function *Caller = CS.getCaller(); + auto &CalleeTTI = (*GetTTI)(*Callee); + InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, + *GetAssumptionCache, GetBFI, PSI); + + if (IC.isAlways()) { + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + << NV("Callee", Cloner.OrigFunc) + << " should always be fully inlined, not partially"); + return false; + } + + if (IC.isNever()) { + ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " + << NV("Caller", Caller) + << " because it should never be inlined (cost=never)"); + return false; + } + + if (!IC) { + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " + << NV("Caller", Caller) << " because too costly to inline (cost=" + << NV("Cost", IC.getCost()) << ", threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return false; + } + const DataLayout &DL = Caller->getParent()->getDataLayout(); + + // The savings of eliminating the call: + int NonWeightedSavings = getCallsiteCost(CS, DL); + BlockFrequency NormWeightedSavings(NonWeightedSavings); + + // Weighted saving is smaller than weighted cost, return false + if (NormWeightedSavings < WeightedOutliningRcost) { + ORE.emit( + OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call) + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " + << NV("Caller", Caller) << " runtime overhead (overhead=" + << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) + << ", savings=" + << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")" + << " of making the outlined call is too high"); + + return false; + } + + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) + << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " + << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) + << " (threshold=" + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + return true; +} + +// TODO: Ideally we should share Inliner's InlineCost Analysis code. +// For now use a simplified version. The returned 'InlineCost' will be used +// to esimate the size cost as well as runtime cost of the BB. +int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { + int InlineCost = 0; + const DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (isa(I)) + continue; + + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::Alloca: + continue; + case Instruction::GetElementPtr: + if (cast(I)->hasAllZeroIndices()) + continue; + default: + break; + } + + IntrinsicInst *IntrInst = dyn_cast(I); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + } + + if (CallInst *CI = dyn_cast(I)) { + InlineCost += getCallsiteCost(CallSite(CI), DL); + continue; + } + + if (InvokeInst *II = dyn_cast(I)) { + InlineCost += getCallsiteCost(CallSite(II), DL); + continue; + } + + if (SwitchInst *SI = dyn_cast(I)) { + InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost; + continue; + } + InlineCost += InlineConstants::InstrCost; + } + return InlineCost; +} + +std::tuple +PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { + + // Now compute the cost of the call sequence to the outlined function + // 'OutlinedFunction' in BB 'OutliningCallBB': + int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB); + + // Now compute the cost of the extracted/outlined function itself: + int OutlinedFunctionCost = 0; + for (BasicBlock &BB : *Cloner.OutlinedFunc) { + OutlinedFunctionCost += computeBBInlineCost(&BB); + } + + assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost && + "Outlined function cost should be no less than the outlined region"); + // The code extractor introduces a new root and exit stub blocks with + // additional unconditional branches. Those branches will be eliminated + // later with bb layout. The cost should be adjusted accordingly: + OutlinedFunctionCost -= 2 * InlineConstants::InstrCost; + + int OutliningRuntimeOverhead = + OutliningFuncCallCost + + (OutlinedFunctionCost - Cloner.OutlinedRegionCost) + + ExtraOutliningPenalty; + + return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead); +} + +// Create the callsite to profile count map which is +// used to update the original function's entry count, +// after the function is partially inlined into the callsite. +void PartialInlinerImpl::computeCallsiteToProfCountMap( + Function *DuplicateFunction, + DenseMap &CallSiteToProfCountMap) { + std::vector Users(DuplicateFunction->user_begin(), + DuplicateFunction->user_end()); + Function *CurrentCaller = nullptr; + std::unique_ptr TempBFI; + BlockFrequencyInfo *CurrentCallerBFI = nullptr; + + auto ComputeCurrBFI = [&,this](Function *Caller) { + // For the old pass manager: + if (!GetBFI) { + DominatorTree DT(*Caller); + LoopInfo LI(DT); + BranchProbabilityInfo BPI(*Caller, LI); + TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI)); + CurrentCallerBFI = TempBFI.get(); + } else { + // New pass manager: + CurrentCallerBFI = &(*GetBFI)(*Caller); + } + }; + + for (User *User : Users) { + CallSite CS = getCallSite(User); + Function *Caller = CS.getCaller(); + if (CurrentCaller != Caller) { + CurrentCaller = Caller; + ComputeCurrBFI(Caller); + } else { + assert(CurrentCallerBFI && "CallerBFI is not set"); + } + BasicBlock *CallBB = CS.getInstruction()->getParent(); + auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB); + if (Count) + CallSiteToProfCountMap[User] = *Count; + else + CallSiteToProfCountMap[User] = 0; + } +} + +PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, + FunctionOutliningInfo *OI) + : OrigFunc(F) { + ClonedOI = llvm::make_unique(); // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; - Function *DuplicateFunction = CloneFunction(F, VMap); - DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); - BasicBlock *NewEntryBlock = cast(VMap[EntryBlock]); - BasicBlock *NewReturnBlock = cast(VMap[ReturnBlock]); - BasicBlock *NewNonReturnBlock = cast(VMap[NonReturnBlock]); + ClonedFunc = CloneFunction(F, VMap); + ClonedOI->ReturnBlock = cast(VMap[OI->ReturnBlock]); + ClonedOI->NonReturnBlock = cast(VMap[OI->NonReturnBlock]); + for (BasicBlock *BB : OI->Entries) { + ClonedOI->Entries.push_back(cast(VMap[BB])); + } + for (BasicBlock *E : OI->ReturnBlockPreds) { + BasicBlock *NewE = cast(VMap[E]); + ClonedOI->ReturnBlockPreds.push_back(NewE); + } // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. - F->replaceAllUsesWith(DuplicateFunction); + F->replaceAllUsesWith(ClonedFunc); +} + +void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { + + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; // Special hackery is needed with PHI nodes that have inputs from more than // one extracted block. For simplicity, just split the PHIs into a two-level // sequence of PHIs, some of which will go in the extracted region, and some // of which will go outside. - BasicBlock *PreReturn = NewReturnBlock; - NewReturnBlock = NewReturnBlock->splitBasicBlock( - NewReturnBlock->getFirstNonPHI()->getIterator()); + BasicBlock *PreReturn = ClonedOI->ReturnBlock; + // only split block when necessary: + PHINode *FirstPhi = getFirstPHI(PreReturn); + unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size(); + + if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1) + return; + + auto IsTrivialPhi = [](PHINode *PN) -> Value * { + Value *CommonValue = PN->getIncomingValue(0); + if (all_of(PN->incoming_values(), + [&](Value *V) { return V == CommonValue; })) + return CommonValue; + return nullptr; + }; + + ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock( + ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator()); BasicBlock::iterator I = PreReturn->begin(); - Instruction *Ins = &NewReturnBlock->front(); + Instruction *Ins = &ClonedOI->ReturnBlock->front(); + SmallVector DeadPhis; while (I != PreReturn->end()) { PHINode *OldPhi = dyn_cast(I); if (!OldPhi) break; - PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); + PHINode *RetPhi = + PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); OldPhi->replaceAllUsesWith(RetPhi); - Ins = NewReturnBlock->getFirstNonPHI(); + Ins = ClonedOI->ReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), - NewEntryBlock); - OldPhi->removeIncomingValue(NewEntryBlock); + for (BasicBlock *E : ClonedOI->ReturnBlockPreds) { + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E); + OldPhi->removeIncomingValue(E); + } + // After incoming values splitting, the old phi may become trivial. + // Keeping the trivial phi can introduce definition inside the outline + // region which is live-out, causing necessary overhead (load, store + // arg passing etc). + if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) { + OldPhi->replaceAllUsesWith(OldPhiVal); + DeadPhis.push_back(OldPhi); + } ++I; - } - NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + } + for (auto *DP : DeadPhis) + DP->eraseFromParent(); + + for (auto E : ClonedOI->ReturnBlockPreds) { + E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock); + } +} + +Function *PartialInlinerImpl::FunctionCloner::doFunctionOutlining() { + // Returns true if the block is to be partial inlined into the caller + // (i.e. not to be extracted to the out of line function) + auto ToBeInlined = [&, this](BasicBlock *BB) { + return BB == ClonedOI->ReturnBlock || + (std::find(ClonedOI->Entries.begin(), ClonedOI->Entries.end(), BB) != + ClonedOI->Entries.end()); + }; // Gather up the blocks that we're going to extract. std::vector ToExtract; - ToExtract.push_back(NewNonReturnBlock); - for (BasicBlock &BB : *DuplicateFunction) - if (&BB != NewEntryBlock && &BB != NewReturnBlock && - &BB != NewNonReturnBlock) + ToExtract.push_back(ClonedOI->NonReturnBlock); + OutlinedRegionCost += + PartialInlinerImpl::computeBBInlineCost(ClonedOI->NonReturnBlock); + for (BasicBlock &BB : *ClonedFunc) + if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) { ToExtract.push_back(&BB); + // FIXME: the code extractor may hoist/sink more code + // into the outlined function which may make the outlining + // overhead (the difference of the outlined function cost + // and OutliningRegionCost) look larger. + OutlinedRegionCost += computeBBInlineCost(&BB); + } // The CodeExtractor needs a dominator tree. DominatorTree DT; - DT.recalculate(*DuplicateFunction); + DT.recalculate(*ClonedFunc); // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo. LoopInfo LI(DT); - BranchProbabilityInfo BPI(*DuplicateFunction, LI); - BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI); + BranchProbabilityInfo BPI(*ClonedFunc, LI); + ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); // Extract the body of the if. - Function *ExtractedFunction = - CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI) - .extractCodeRegion(); + OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, + ClonedFuncBFI.get(), &BPI) + .extractCodeRegion(); + + if (OutlinedFunc) { + OutliningCallBB = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc) + .getInstruction() + ->getParent(); + assert(OutliningCallBB->getParent() == ClonedFunc); + } - // Inline the top-level if test into all callers. - std::vector Users(DuplicateFunction->user_begin(), - DuplicateFunction->user_end()); - for (User *User : Users) - if (CallInst *CI = dyn_cast(User)) - InlineFunction(CI, IFI); - else if (InvokeInst *II = dyn_cast(User)) - InlineFunction(II, IFI); + return OutlinedFunc; +} +PartialInlinerImpl::FunctionCloner::~FunctionCloner() { // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. - DuplicateFunction->replaceAllUsesWith(F); - DuplicateFunction->eraseFromParent(); + ClonedFunc->replaceAllUsesWith(OrigFunc); + ClonedFunc->eraseFromParent(); + if (!IsFunctionInlined) { + // Remove the function that is speculatively created if there is no + // reference. + if (OutlinedFunc) + OutlinedFunc->eraseFromParent(); + } +} - ++NumPartialInlined; +Function *PartialInlinerImpl::unswitchFunction(Function *F) { + + if (F->hasAddressTaken()) + return nullptr; + + // Let inliner handle it + if (F->hasFnAttribute(Attribute::AlwaysInline)) + return nullptr; + + if (F->hasFnAttribute(Attribute::NoInline)) + return nullptr; + + if (PSI->isFunctionEntryCold(F)) + return nullptr; + + if (F->user_begin() == F->user_end()) + return nullptr; + + std::unique_ptr OI = computeOutliningInfo(F); - return ExtractedFunction; + if (!OI) + return nullptr; + + FunctionCloner Cloner(F, OI.get()); + Cloner.NormalizeReturnBlock(); + Function *OutlinedFunction = Cloner.doFunctionOutlining(); + + bool AnyInline = tryPartialInline(Cloner); + + if (AnyInline) + return OutlinedFunction; + + return nullptr; +} + +bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { + int NonWeightedRcost; + int SizeCost; + + if (Cloner.OutlinedFunc == nullptr) + return false; + + std::tie(SizeCost, NonWeightedRcost) = computeOutliningCosts(Cloner); + + auto RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner); + auto WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq; + + // The call sequence to the outlined function is larger than the original + // outlined region size, it does not increase the chances of inlining + // the function with outlining (The inliner usies the size increase to + // model the cost of inlining a callee). + if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) { + OptimizationRemarkEmitter ORE(Cloner.OrigFunc); + DebugLoc DLoc; + BasicBlock *Block; + std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", + DLoc, Block) + << ore::NV("Function", Cloner.OrigFunc) + << " not partially inlined into callers (Original Size = " + << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost) + << ", Size of call sequence to outlined function = " + << ore::NV("NewSize", SizeCost) << ")"); + return false; + } + + assert(Cloner.OrigFunc->user_begin() == Cloner.OrigFunc->user_end() && + "F's users should all be replaced!"); + + std::vector Users(Cloner.ClonedFunc->user_begin(), + Cloner.ClonedFunc->user_end()); + + DenseMap CallSiteToProfCountMap; + if (Cloner.OrigFunc->getEntryCount()) + computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); + + auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount(); + uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); + + bool AnyInline = false; + for (User *User : Users) { + CallSite CS = getCallSite(User); + + if (IsLimitReached()) + continue; + + OptimizationRemarkEmitter ORE(CS.getCaller()); + + if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) + continue; + + ORE.emit( + OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()) + << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " + << ore::NV("Caller", CS.getCaller())); + + InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); + InlineFunction(CS, IFI); + + // Now update the entry count: + if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) { + uint64_t CallSiteCount = CallSiteToProfCountMap[User]; + CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount); + } + + AnyInline = true; + NumPartialInlining++; + // Update the stats + NumPartialInlined++; + } + + if (AnyInline) { + Cloner.IsFunctionInlined = true; + if (CalleeEntryCount) + Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); + } + + return AnyInline; } bool PartialInlinerImpl::run(Module &M) { + if (DisablePartialInlining) + return false; + std::vector Worklist; Worklist.reserve(M.size()); for (Function &F : M) @@ -203,6 +957,8 @@ char PartialInlinerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) @@ -213,12 +969,25 @@ ModulePass *llvm::createPartialInliningPass() { PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult(M).getManager(); + std::function GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; - InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); - if (PartialInlinerImpl(IFI).run(M)) + + std::function GetBFI = + [&FAM](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult(F); + }; + + std::function GetTTI = + [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + + ProfileSummaryInfo *PSI = &AM.getResult(M); + + if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 6cc6d3b63f49c633ea99811c9b5c2cf60df164db..4bc64ab698ff9b4e44dfb9a4b034d215e9d30508 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -38,13 +38,18 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Vectorize.h" using namespace llvm; static cl::opt -RunLoopVectorization("vectorize-loops", cl::Hidden, - cl::desc("Run the Loop vectorization passes")); + RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); + +static cl::opt + RunLoopVectorization("vectorize-loops", cl::Hidden, + cl::desc("Run the Loop vectorization passes")); static cl::opt RunSLPVectorization("vectorize-slp", cl::Hidden, @@ -136,15 +141,28 @@ static cl::opt PreInlineThreshold( cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); +static cl::opt EnableEarlyCSEMemSSA( + "enable-earlycse-memssa", cl::init(false), cl::Hidden, + cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = off)")); + static cl::opt EnableGVNHoist( - "enable-gvn-hoist", cl::init(true), cl::Hidden, - cl::desc("Enable the GVN hoisting pass (default = on)")); + "enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), cl::Hidden, cl::desc("Disable shrink-wrap library calls")); +static cl::opt + EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false), + cl::Hidden, + cl::desc("Enable the simple loop unswitch pass.")); + +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = off)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -245,8 +263,6 @@ void PassManagerBuilder::populateFunctionPassManager( FPM.add(createCFGSimplificationPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); - if (EnableGVNHoist) - FPM.add(createGVNHoistPass()); FPM.add(createLowerExpectIntrinsicPass()); } @@ -284,13 +300,26 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { } if (!PGOInstrUse.empty()) MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + // Indirect call promotion that promotes intra-module targets only. + // For ThinLTO this is done earlier due to interactions with globalopt + // for imported functions. We don't run this at -O0. + if (OptLevel > 0) + MPM.add( + createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies + if (EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } + // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. @@ -312,7 +341,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); MPM.add(createLICMPass()); // Hoist loop invariants - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); + if (EnableSimpleLoopUnswitch) + MPM.add(createSimpleLoopUnswitchLegacyPass()); + else + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars @@ -414,11 +446,14 @@ void PassManagerBuilder::populateModulePassManager( else if (!GlobalExtensions->empty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + + // Rename anon globals to be able to export them in the summary. + // This has to be done after we add the extensions to the pass manager + // as there could be passes (e.g. Adddress sanitizer) which introduce + // new unnamed globals. if (PrepareForThinLTO) - // Rename anon globals to be able to export them in the summary. MPM.add(createNameAnonGlobalPass()); - - addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); return; } @@ -468,16 +503,10 @@ void PassManagerBuilder::populateModulePassManager( // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd // profile annotation in backend more difficult. - if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) { - /// PGO instrumentation is added during the compile phase for ThinLTO, do - /// not run it a second time + // PGO instrumentation is added during the compile phase for ThinLTO, do + // not run it a second time + if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); - // Indirect call promotion that promotes intra-module targets only. - // For ThinLTO this is done earlier due to interactions with globalopt - // for imported functions. - MPM.add( - createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); - } if (EnableNonLTOGlobalsModRef) // We add a module alias analysis pass here. In part due to bugs in the @@ -504,6 +533,8 @@ void PassManagerBuilder::populateModulePassManager( // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (RunPartialInlining) + MPM.add(createPartialInliningPass()); if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) @@ -677,6 +708,11 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLoopSinkPass()); // Get rid of LCSSA nodes. MPM.add(createInstructionSimplifierPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + MPM.add(createCFGSimplificationPass()); + addExtensionsToPM(EP_OptimizerLast, MPM); } @@ -881,6 +917,12 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (OptLevel != 0) addLTOOptimizationPasses(PM); + else { + // The whole-program-devirt pass needs to run at -O0 because only it knows + // about the llvm.type.checked.load intrinsic: it needs to both lower the + // intrinsic itself and handle it in the summary. + PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); + } // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index d9acb9b1a743b11c0165f0fe3f05bfe1d52f54f6..3fd59847a005f32ae44791d2ce20fc27bd5ab641 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -14,10 +14,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/EHPersonalities.h" @@ -28,6 +26,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index bae08d9b9ee76d2c56682789313911486f8e8b12..67bc8f5f6b7adb7a348ec4a5b16bec7a4b1c4494 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/SampleProfReader.h" @@ -162,6 +163,8 @@ protected: ErrorOr getInstWeight(const Instruction &I); ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; + std::vector + findIndirectCallFunctionSamples(const Instruction &I) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; bool inlineHotFunctions(Function &F, DenseSet &ImportGUIDs); @@ -206,6 +209,12 @@ protected: /// the same number of times. EquivalenceClassMap EquivalenceClass; + /// Map from function name to Function *. Used to find the function from + /// the function name. If the function name contains suffix, additional + /// entry is added to map from the stripped name to the function if there + /// is one-to-one mapping. + StringMap SymbolMap; + /// \brief Dominance, post-dominance and loop information. std::unique_ptr DT; std::unique_ptr> PDT; @@ -330,11 +339,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const { // If there are inlined callsites in this function, count the samples found // in the respective bodies. However, do not bother counting callees with 0 // total samples, these are callees that were never invoked at runtime. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Count += countUsedRecords(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countUsedRecords(CalleeSamples); + } return Count; } @@ -347,11 +357,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const { unsigned Count = FS->getBodySamples().size(); // Only count records in hot callsites. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Count += countBodyRecords(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Count += countBodyRecords(CalleeSamples); + } return Count; } @@ -366,11 +377,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const { Total += I.second.getSamples(); // Only count samples in hot callsites. - for (const auto &I : FS->getCallsiteSamples()) { - const FunctionSamples *CalleeSamples = &I.second; - if (callsiteIsHot(FS, CalleeSamples)) - Total += countBodySamples(CalleeSamples); - } + for (const auto &I : FS->getCallsiteSamples()) + for (const auto &J : I.second) { + const FunctionSamples *CalleeSamples = &J.second; + if (callsiteIsHot(FS, CalleeSamples)) + Total += countBodySamples(CalleeSamples); + } return Total; } @@ -559,12 +571,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { if (!DIL) { return nullptr; } + + StringRef CalleeName; + if (const CallInst *CI = dyn_cast(&Inst)) + if (Function *Callee = CI->getCalledFunction()) + CalleeName = Callee->getName(); + const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return nullptr; return FS->findFunctionSamplesAt( - LineLocation(getOffset(DIL), DIL->getBaseDiscriminator())); + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName); +} + +/// Returns a vector of FunctionSamples that are the indirect call targets +/// of \p Inst. The vector is sorted by the total number of samples. +std::vector +SampleProfileLoader::findIndirectCallFunctionSamples( + const Instruction &Inst) const { + const DILocation *DIL = Inst.getDebugLoc(); + std::vector R; + + if (!DIL) { + return R; + } + + const FunctionSamples *FS = findFunctionSamples(Inst); + if (FS == nullptr) + return R; + + if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt( + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) { + if (M->size() == 0) + return R; + for (const auto &NameFS : *M) { + R.push_back(&NameFS.second); + } + std::sort(R.begin(), R.end(), + [](const FunctionSamples *L, const FunctionSamples *R) { + return L->getTotalSamples() > R->getTotalSamples(); + }); + } + return R; } /// \brief Get the FunctionSamples for an instruction. @@ -578,18 +627,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { /// \returns the FunctionSamples pointer to the inlined instance. const FunctionSamples * SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { - SmallVector S; + SmallVector, 10> S; const DILocation *DIL = Inst.getDebugLoc(); - if (!DIL) { + if (!DIL) return Samples; + + const DILocation *PrevDIL = DIL; + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + S.push_back(std::make_pair( + LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); + PrevDIL = DIL; } - for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) - S.push_back(LineLocation(getOffset(DIL), DIL->getBaseDiscriminator())); if (S.size() == 0) return Samples; const FunctionSamples *FS = Samples; for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { - FS = FS->findFunctionSamplesAt(S[i]); + FS = FS->findFunctionSamplesAt(S[i].first, S[i].second); } return FS; } @@ -623,7 +677,7 @@ bool SampleProfileLoader::inlineHotFunctions( for (auto &I : BB.getInstList()) { const FunctionSamples *FS = nullptr; if ((isa(I) || isa(I)) && - (FS = findCalleeFunctionSamples(I))) { + !isa(I) && (FS = findCalleeFunctionSamples(I))) { Candidates.push_back(&I); if (callsiteIsHot(Samples, FS)) Hot = true; @@ -638,25 +692,37 @@ bool SampleProfileLoader::inlineHotFunctions( Function *CalledFunction = CallSite(I).getCalledFunction(); Instruction *DI = I; if (!CalledFunction && !PromotedInsns.count(I) && - CallSite(I).isIndirectCall()) { - auto CalleeFunctionName = findCalleeFunctionSamples(*I)->getName(); - const char *Reason = "Callee function not available"; - CalledFunction = F.getParent()->getFunction(CalleeFunctionName); - if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { - // The indirect target was promoted and inlined in the profile, as a - // result, we do not have profile info for the branch probability. - // We set the probability to 80% taken to indicate that the static - // call is likely taken. - DI = dyn_cast( - promoteIndirectCall(I, CalledFunction, 80, 100, false) - ->stripPointerCasts()); - PromotedInsns.insert(I); - } else { - DEBUG(dbgs() << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason << "\n"); - continue; + CallSite(I).isIndirectCall()) + for (const auto *FS : findIndirectCallFunctionSamples(*I)) { + auto CalleeFunctionName = FS->getName(); + // If it is a recursive call, we do not inline it as it could bloat + // the code exponentially. There is way to better handle this, e.g. + // clone the caller first, and inline the cloned caller if it is + // recursive. As llvm does not inline recursive calls, we will simply + // ignore it instead of handling it explicitly. + if (CalleeFunctionName == F.getName()) + continue; + const char *Reason = "Callee function not available"; + auto R = SymbolMap.find(CalleeFunctionName); + if (R == SymbolMap.end()) + continue; + CalledFunction = R->getValue(); + if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { + // The indirect target was promoted and inlined in the profile, as a + // result, we do not have profile info for the branch probability. + // We set the probability to 80% taken to indicate that the static + // call is likely taken. + DI = dyn_cast( + promoteIndirectCall(I, CalledFunction, 80, 100, false) + ->stripPointerCasts()); + PromotedInsns.insert(I); + } else { + DEBUG(dbgs() << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason + << "\n"); + continue; + } } - } if (!CalledFunction || !CalledFunction->getSubprogram()) { findCalleeFunctionSamples(*I)->findImportedFunctions( ImportGUIDs, F.getParent(), @@ -1132,8 +1198,11 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!isa(TI) && !isa(TI)) continue; + DebugLoc BranchLoc = TI->getDebugLoc(); DEBUG(dbgs() << "\nGetting weights for branch at line " - << TI->getDebugLoc().getLine() << ".\n"); + << ((BranchLoc) ? Twine(BranchLoc.getLine()) + : Twine("")) + << ".\n"); SmallVector Weights; uint32_t MaxWeight = 0; DebugLoc MaxDestLoc; @@ -1170,7 +1239,6 @@ void SampleProfileLoader::propagateWeights(Function &F) { DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); - DebugLoc BranchLoc = TI->getDebugLoc(); emitOptimizationRemark( Ctx, DEBUG_TYPE, F, MaxDestLoc, Twine("most popular destination for conditional branches at ") + @@ -1365,6 +1433,26 @@ bool SampleProfileLoader::runOnModule(Module &M) { for (const auto &I : Reader->getProfiles()) TotalCollectedSamples += I.second.getTotalSamples(); + // Populate the symbol map. + for (const auto &N_F : M.getValueSymbolTable()) { + std::string OrigName = N_F.getKey(); + Function *F = dyn_cast(N_F.getValue()); + if (F == nullptr) + continue; + SymbolMap[OrigName] = F; + auto pos = OrigName.find('.'); + if (pos != std::string::npos) { + std::string NewName = OrigName.substr(0, pos); + auto r = SymbolMap.insert(std::make_pair(NewName, F)); + // Failiing to insert means there is already an entry in SymbolMap, + // thus there are multiple functions that are mapped to the same + // stripped name. In this case of name conflicting, set the value + // to nullptr to avoid confusion. + if (!r.second) + r.first->second = nullptr; + } + } + bool retval = false; for (auto &F : M) if (!F.isDeclaration()) { diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index fb64367eef917c77ac9e2a7dfe0ff9a0d83e676d..de1b51e206ff3eb1d1eefb3cb4a61b7c1818cb96 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -30,6 +29,7 @@ #include "llvm/IR/TypeFinder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 65deb82cd2a5fbd11a13df6f5d9321c08ced3561..802f470ffe1fbac393a3ab8b079ba09b8dde1912 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -6,16 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This pass prepares a module containing type metadata for ThinLTO by splitting -// it into regular and thin LTO parts if possible, and writing both parts to -// a multi-module bitcode file. Modules that do not contain type metadata are -// written unmodified as a single module. -// -//===----------------------------------------------------------------------===// +#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Constants.h" @@ -30,45 +25,15 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; namespace { -// Produce a unique identifier for this module by taking the MD5 sum of the -// names of the module's strong external symbols. This identifier is -// normally guaranteed to be unique, or the program would fail to link due to -// multiply defined symbols. -// -// If the module has no strong external symbols (such a module may still have a -// semantic effect if it performs global initialization), we cannot produce a -// unique identifier for this module, so we return the empty string, which -// causes the entire module to be written as a regular LTO module. -std::string getModuleId(Module *M) { - MD5 Md5; - bool ExportsSymbols = false; - for (auto &GV : M->global_values()) { - if (GV.isDeclaration() || GV.getName().startswith("llvm.") || - !GV.hasExternalLinkage()) - continue; - ExportsSymbols = true; - Md5.update(GV.getName()); - Md5.update(ArrayRef{0}); - } - - if (!ExportsSymbols) - return ""; - - MD5::MD5Result R; - Md5.final(R); - - SmallString<32> Str; - MD5::stringifyResult(R, Str); - return ("$" + Str).str(); -} - // Promote each local-linkage entity defined by ExportM and used by ImportM by // changing visibility and appending the given ModuleId. -void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { +void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId, + SetVector &PromoteExtra) { DenseMap RenamedComdats; for (auto &ExportGV : ExportM.global_values()) { if (!ExportGV.hasLocalLinkage()) @@ -76,7 +41,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { auto Name = ExportGV.getName(); GlobalValue *ImportGV = ImportM.getNamedValue(Name); - if (!ImportGV || ImportGV->use_empty()) + if ((!ImportGV || ImportGV->use_empty()) && !PromoteExtra.count(&ExportGV)) continue; std::string NewName = (Name + ModuleId).str(); @@ -89,8 +54,10 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { ExportGV.setLinkage(GlobalValue::ExternalLinkage); ExportGV.setVisibility(GlobalValue::HiddenVisibility); - ImportGV->setName(NewName); - ImportGV->setVisibility(GlobalValue::HiddenVisibility); + if (ImportGV) { + ImportGV->setName(NewName); + ImportGV->setVisibility(GlobalValue::HiddenVisibility); + } } if (!RenamedComdats.empty()) @@ -209,7 +176,7 @@ void filterModule( else GO = new GlobalVariable( *M, GA->getValueType(), false, GlobalValue::ExternalLinkage, - (Constant *)nullptr, "", (GlobalVariable *)nullptr, + nullptr, "", nullptr, GA->getThreadLocalMode(), GA->getType()->getAddressSpace()); GO->takeName(GA); GA->replaceAllUsesWith(GO); @@ -251,7 +218,7 @@ void forEachVirtualFunction(Constant *C, function_ref Fn) { void splitAndWriteThinLTOBitcode( raw_ostream &OS, raw_ostream *ThinLinkOS, function_ref AARGetter, Module &M) { - std::string ModuleId = getModuleId(&M); + std::string ModuleId = getUniqueModuleId(&M); if (ModuleId.empty()) { // We couldn't generate a module ID for this module, just write it out as a // regular LTO module. @@ -332,6 +299,11 @@ void splitAndWriteThinLTOBitcode( F.setComdat(nullptr); } + SetVector CfiFunctions; + for (auto &F : M) + if ((!F.hasLocalLinkage() || F.hasAddressTaken()) && HasTypeMetadata(&F)) + CfiFunctions.insert(&F); + // Remove all globals with type metadata, globals with comdats that live in // MergedM, and aliases pointing to such globals from the thin LTO module. filterModule(&M, [&](const GlobalValue *GV) { @@ -344,14 +316,49 @@ void splitAndWriteThinLTOBitcode( return true; }); - promoteInternals(*MergedM, M, ModuleId); - promoteInternals(M, *MergedM, ModuleId); + promoteInternals(*MergedM, M, ModuleId, CfiFunctions); + promoteInternals(M, *MergedM, ModuleId, CfiFunctions); + + SmallVector CfiFunctionMDs; + for (auto V : CfiFunctions) { + Function &F = *cast(V); + SmallVector Types; + F.getMetadata(LLVMContext::MD_type, Types); + + auto &Ctx = MergedM->getContext(); + SmallVector Elts; + Elts.push_back(MDString::get(Ctx, F.getName())); + CfiFunctionLinkage Linkage; + if (!F.isDeclarationForLinker()) + Linkage = CFL_Definition; + else if (F.isWeakForLinker()) + Linkage = CFL_WeakDeclaration; + else + Linkage = CFL_Declaration; + Elts.push_back(ConstantAsMetadata::get( + llvm::ConstantInt::get(Type::getInt8Ty(Ctx), Linkage))); + for (auto Type : Types) + Elts.push_back(Type); + CfiFunctionMDs.push_back(MDTuple::get(Ctx, Elts)); + } - simplifyExternals(*MergedM); + if(!CfiFunctionMDs.empty()) { + NamedMDNode *NMD = MergedM->getOrInsertNamedMetadata("cfi.functions"); + for (auto MD : CfiFunctionMDs) + NMD->addOperand(MD); + } + simplifyExternals(*MergedM); // FIXME: Try to re-use BSI and PFI from the original module here. - ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr); + ProfileSummaryInfo PSI(M); + ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); + + // Mark the merged module as requiring full LTO. We still want an index for + // it though, so that it can participate in summary-based dead stripping. + MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + ModuleSummaryIndex MergedMIndex = + buildModuleSummaryIndex(*MergedM, nullptr, &PSI); SmallVector Buffer; @@ -362,7 +369,9 @@ void splitAndWriteThinLTOBitcode( ModuleHash ModHash = {{0}}; W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/true, &ModHash); - W.writeModule(MergedM.get()); + W.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, + &MergedMIndex); + W.writeStrtab(); OS << Buffer; // If a minimized bitcode module was requested for the thin link, @@ -374,7 +383,9 @@ void splitAndWriteThinLTOBitcode( StripDebugInfo(M); W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/false, &ModHash); - W2.writeModule(MergedM.get()); + W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, + &MergedMIndex); + W2.writeStrtab(); *ThinLinkOS << Buffer; } } @@ -463,3 +474,15 @@ ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, raw_ostream *ThinLinkOS) { return new WriteThinLTOBitcode(Str, ThinLinkOS); } + +PreservedAnalyses +llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + writeThinLTOBitcode(OS, ThinLinkOS, + [&FAM](Function &F) -> AAResults & { + return FAM.getResult(F); + }, + M, &AM.getResult(M)); + return PreservedAnalyses::all(); +} diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index cb7d487b68b0ba55c86d33f80077993dce87eb05..00769cd6322929f13ba0462f1c5aaaaaf28be2f6 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -46,9 +46,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/TypeMetadataUtils.h" @@ -1322,7 +1322,7 @@ bool DevirtModule::run() { } for (auto &P : *ExportSummary) { - for (auto &S : P.second) { + for (auto &S : P.second.SummaryList) { auto *FS = dyn_cast(S.get()); if (!FS) continue; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 1077121f8cb2b30304bc070f919757b65fd0b984..287a5167fe2ae9e23cbd6bcee127f9ed449493cc 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -794,6 +795,11 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { if (Opnd->isConstant()) continue; + // The constant check above is really for a few special constant + // coefficients. + if (isa(Opnd->getSymVal())) + continue; + const FAddendCoef &CE = Opnd->getCoef(); if (CE.isMinusOne() || CE.isMinusTwo()) NegOpndNum++; @@ -841,108 +847,28 @@ Value *FAddCombine::createAddendVal(const FAddend &Opnd, bool &NeedNeg) { return createFMul(OpndVal, Coeff.getValue(Instr->getType())); } -// If one of the operands only has one non-zero bit, and if the other -// operand has a known-zero bit in a more significant place than it (not -// including the sign bit) the ripple may go up to and fill the zero, but -// won't change the sign. For example, (X & ~4) + 1. -static bool checkRippleForAdd(const APInt &Op0KnownZero, - const APInt &Op1KnownZero) { - APInt Op1MaybeOne = ~Op1KnownZero; - // Make sure that one of the operand has at most one bit set to 1. - if (Op1MaybeOne.countPopulation() != 1) - return false; - - // Find the most significant known 0 other than the sign bit. - int BitWidth = Op0KnownZero.getBitWidth(); - APInt Op0KnownZeroTemp(Op0KnownZero); - Op0KnownZeroTemp.clearBit(BitWidth - 1); - int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1; - - int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1; - assert(Op1OnePosition >= 0); - - // This also covers the case of no known zero, since in that case - // Op0ZeroPosition is -1. - return Op0ZeroPosition >= Op1OnePosition; -} - -/// Return true if we can prove that: -/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) -/// This basically requires proving that the add in the original type would not -/// overflow to change the sign bit or have a carry out. -bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS, - Instruction &CxtI) { - // There are different heuristics we can use for this. Here are some simple - // ones. - - // If LHS and RHS each have at least two sign bits, the addition will look - // like - // - // XX..... + - // YY..... - // - // If the carry into the most significant position is 0, X and Y can't both - // be 1 and therefore the carry out of the addition is also 0. - // - // If the carry into the most significant position is 1, X and Y can't both - // be 0 and therefore the carry out of the addition is also 1. - // - // Since the carry into the most significant position is always equal to - // the carry out of the addition, there is no signed overflow. - if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && - ComputeNumSignBits(RHS, 0, &CxtI) > 1) - return true; - - unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); - - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); - - // Addition of two 2's complement numbers having opposite signs will never - // overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) - return true; - - // Check if carry bit of addition will not cause overflow. - if (checkRippleForAdd(LHSKnownZero, RHSKnownZero)) - return true; - if (checkRippleForAdd(RHSKnownZero, LHSKnownZero)) - return true; - - return false; -} - /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nsw LHS, RHS) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. /// TODO: Handle this for Vectors. -bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowSignedSub(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // If LHS and RHS each have at least two sign bits, the subtraction // cannot overflow. if (ComputeNumSignBits(LHS, 0, &CxtI) > 1 && ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; - unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - APInt LHSKnownZero(BitWidth, 0); - APInt LHSKnownOne(BitWidth, 0); - computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI); + KnownBits LHSKnown = computeKnownBits(LHS, 0, &CxtI); - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI); + KnownBits RHSKnown = computeKnownBits(RHS, 0, &CxtI); // Subtraction of two 2's complement numbers having identical signs will // never overflow. - if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) || - (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1])) + if ((LHSKnown.isNegative() && RHSKnown.isNegative()) || + (LHSKnown.isNonNegative() && RHSKnown.isNonNegative())) return true; // TODO: implement logic similar to checkRippleForAdd @@ -951,16 +877,13 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS, /// \brief Return true if we can prove that: /// (sub LHS, RHS) === (sub nuw LHS, RHS) -bool InstCombiner::WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // If the LHS is negative and the RHS is non-negative, no unsigned wrap. - bool LHSKnownNonNegative, LHSKnownNegative; - bool RHSKnownNonNegative, RHSKnownNegative; - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, /*Depth=*/0, - &CxtI); - ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, /*Depth=*/0, - &CxtI); - if (LHSKnownNegative && RHSKnownNonNegative) + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI); + if (LHSKnown.isNegative() && RHSKnown.isNonNegative()) return true; return false; @@ -1027,6 +950,58 @@ static Value *checkForNegativeOperand(BinaryOperator &I, return nullptr; } +static Instruction *foldAddWithConstant(BinaryOperator &Add, + InstCombiner::BuilderTy &Builder) { + Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1); + const APInt *C; + if (!match(Op1, m_APInt(C))) + return nullptr; + + if (C->isSignMask()) { + // If wrapping is not allowed, then the addition must set the sign bit: + // X + (signmask) --> X | signmask + if (Add.hasNoSignedWrap() || Add.hasNoUnsignedWrap()) + return BinaryOperator::CreateOr(Op0, Op1); + + // If wrapping is allowed, then the addition flips the sign bit of LHS: + // X + (signmask) --> X ^ signmask + return BinaryOperator::CreateXor(Op0, Op1); + } + + Value *X; + const APInt *C2; + Type *Ty = Add.getType(); + + // Is this add the last step in a convoluted sext? + // add(zext(xor i16 X, -32768), -32768) --> sext X + if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) && + C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C) + return CastInst::Create(Instruction::SExt, X, Ty); + + // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C)) + // FIXME: This should check hasOneUse to not increase the instruction count? + if (C->isNegative() && + match(Op0, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2)))) && + C->sge(-C2->sext(C->getBitWidth()))) { + Constant *NewC = + ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth())); + return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty); + } + + // Shifts and add used to flip and mask off the low bit: + // add (ashr (shl i32 X, 31), 31), 1 --> and (not X), 1 + const APInt *C3; + if (C->isOneValue() && + match(Op0, + m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C2)), m_APInt(C3)))) && + C2 == C3 && *C2 == Ty->getScalarSizeInBits() - 1) { + Value *NotX = Builder.CreateNot(X); + return BinaryOperator::CreateAnd(NotX, ConstantInt::get(Ty, 1)); + } + + return nullptr; +} + Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); @@ -1034,49 +1009,20 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + if (Value *V = + SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - const APInt *RHSC; - if (match(RHS, m_APInt(RHSC))) { - if (RHSC->isSignBit()) { - // If wrapping is not allowed, then the addition must set the sign bit: - // X + (signbit) --> X | signbit - if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) - return BinaryOperator::CreateOr(LHS, RHS); - - // If wrapping is allowed, then the addition flips the sign bit of LHS: - // X + (signbit) --> X ^ signbit - return BinaryOperator::CreateXor(LHS, RHS); - } - - // Is this add the last step in a convoluted sext? - Value *X; - const APInt *C; - if (match(LHS, m_ZExt(m_Xor(m_Value(X), m_APInt(C)))) && - C->isMinSignedValue() && - C->sext(LHS->getType()->getScalarSizeInBits()) == *RHSC) { - // add(zext(xor i16 X, -32768), -32768) --> sext X - return CastInst::Create(Instruction::SExt, X, LHS->getType()); - } - - if (RHSC->isNegative() && - match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) && - RHSC->sge(-C->sext(RHSC->getBitWidth()))) { - // (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val)) - Constant *NewC = - ConstantInt::get(X->getType(), *C + RHSC->trunc(C->getBitWidth())); - return new ZExtInst(Builder->CreateNUWAdd(X, NewC), I.getType()); - } - } + if (Instruction *X = foldAddWithConstant(I, *Builder)) + return X; - // FIXME: Use the match above instead of dyn_cast to allow these transforms - // for splat vectors. + // FIXME: This should be moved into the above helper function to allow these + // transforms for splat vectors. if (ConstantInt *CI = dyn_cast(RHS)) { // zext(bool) + C -> bool ? C + 1 : C if (ZExtInst *ZI = dyn_cast(LHS)) @@ -1112,17 +1058,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // If this is a xor that was canonicalized from a sub, turn it back into // a sub and fuse this add with it. if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { - IntegerType *IT = cast(I.getType()); - APInt LHSKnownOne(IT->getBitWidth(), 0); - APInt LHSKnownZero(IT->getBitWidth(), 0); - computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I); - if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) + KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I); + if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } - // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, - // transform them into (X + (signbit ^ C)) - if (XorRHS->getValue().isSignBit()) + // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C, + // transform them into (X + (signmask ^ C)) + if (XorRHS->getValue().isSignMask()) return BinaryOperator::CreateAdd(XorLHS, ConstantExpr::getXor(XorRHS, CI)); } @@ -1236,7 +1179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (ConstantExpr::getSExt(CI, I.getType()) == RHSC && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { + willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1253,7 +1196,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (LHSConv->getOperand(0)->getType() == RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSConv->getOperand(0), + willNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), @@ -1272,8 +1215,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (ConstantExpr::getZExt(CI, I.getType()) == RHSC && - computeOverflowForUnsignedAdd(LHSConv->getOperand(0), CI, &I) == - OverflowResult::NeverOverflows) { + willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder->CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); @@ -1290,9 +1232,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (LHSConv->getOperand(0)->getType() == RHSConv->getOperand(0)->getType() && (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - computeOverflowForUnsignedAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), - &I) == OverflowResult::NeverOverflows) { + willNotOverflowUnsignedAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), I)) { // Insert the new integer add. Value *NewAdd = Builder->CreateNUWAdd( LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); @@ -1333,16 +1274,14 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - // TODO(jingyue): Consider WillNotOverflowSignedAdd and - // WillNotOverflowUnsignedAdd to reduce the number of invocations of + // TODO(jingyue): Consider willNotOverflowSignedAdd and + // willNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. - if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && - computeOverflowForUnsignedAdd(LHS, RHS, &I) == - OverflowResult::NeverOverflows) { + if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedAdd(LHS, RHS, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -1357,8 +1296,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = - SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (isa(RHS)) @@ -1385,39 +1324,58 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast(LHS)) { Value *LHSIntVal = LHSConv->getOperand(0); + Type *FPType = LHSConv->getType(); + + // TODO: This check is overly conservative. In many cases known bits + // analysis can tell us that the result of the addition has less significant + // bits than the integer type can hold. + auto IsValidPromotion = [](Type *FTy, Type *ITy) { + Type *FScalarTy = FTy->getScalarType(); + Type *IScalarTy = ITy->getScalarType(); + + // Do we have enough bits in the significand to represent the result of + // the integer addition? + unsigned MaxRepresentableBits = + APFloat::semanticsPrecision(FScalarTy->getFltSemantics()); + return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits; + }; // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) // ... if the constant fits in the integer value. This is useful for things // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer // requires a constant pool load, and generally allows the add to be better // instcombined. - if (ConstantFP *CFP = dyn_cast(RHS)) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - CI, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + if (ConstantFP *CFP = dyn_cast(RHS)) + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + willNotOverflowSignedAdd(LHSIntVal, CI, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } - } // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast(RHS)) { Value *RHSIntVal = RHSConv->getOperand(0); - - // Only do this if x/y have the same type, if at least one of them has a - // single use (so we don't increase the number of int->fp conversions), - // and if the integer add will not overflow. - if (LHSIntVal->getType() == RHSIntVal->getType() && - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - RHSIntVal, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + // It's enough to check LHS types only because we require int types to + // be the same for this transform. + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + // Only do this if x/y have the same type, if at least one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSIntVal->getType() == RHSIntVal->getType() && + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + RHSIntVal, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } } } @@ -1529,8 +1487,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + if (Value *V = + SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // (A*B)-(A*C) -> A*(B-C) etc @@ -1572,6 +1531,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; + // Try to fold constant sub into PHI values. + if (PHINode *PN = dyn_cast(Op1)) + if (Instruction *R = foldOpIntoPhi(I, PN)) + return R; + // C-(X+C2) --> (C-C2)-X Constant *C2; if (match(Op1, m_Add(m_Value(X), m_Constant(C2)))) @@ -1594,7 +1558,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) - if (*Op0C == 0) { + if (Op0C->isNullValue()) { Value *X; const APInt *ShAmt; if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) && @@ -1612,10 +1576,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. if (Op0C->isMask()) { - APInt RHSKnownZero(BitWidth, 0); - APInt RHSKnownOne(BitWidth, 0); - computeKnownBits(Op1, RHSKnownZero, RHSKnownOne, 0, &I); - if ((*Op0C | RHSKnownZero).isAllOnesValue()) + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateXor(Op1, Op0); } } @@ -1714,11 +1676,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return replaceInstUsesWith(I, Res); bool Changed = false; - if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) { + if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedSub(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -1732,8 +1694,8 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = - SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // fsub nsz 0, X ==> fsub nsz -0.0, X diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 99a983ab47479b3a8eee251cf1865822f2f1dd67..a881bda5ba98dcb93b1898688575301b30190ac0 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -23,21 +23,6 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" -static inline Value *dyn_castNotVal(Value *V) { - // If this is not(not(x)) don't return that this is a not: we want the two - // not's to be folded first. - if (BinaryOperator::isNot(V)) { - Value *Operand = BinaryOperator::getNotArgument(V); - if (!IsFreeToInvert(Operand, Operand->hasOneUse())) - return Operand; - } - - // Constants can be considered to be not'ed values... - if (ConstantInt *C = dyn_cast(V)) - return ConstantInt::get(C->getType(), ~C->getValue()); - return nullptr; -} - /// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into /// a four bit mask. static unsigned getFCmpCode(FCmpInst::Predicate CC) { @@ -187,12 +172,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, const APInt& AddRHS = OpRHS->getValue(); // Check to see if any bits below the one bit set in AndRHSV are set. - if ((AddRHS & (AndRHSV-1)) == 0) { + if ((AddRHS & (AndRHSV - 1)).isNullValue()) { // If not, the only thing that can effect the output of the AND is // the bit specified by AndRHSV. If that bit is set, the effect of // the XOR is to toggle the bit. If it is clear, then the ADD has // no effect. - if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop + if ((AddRHS & AndRHSV).isNullValue()) { // Bit is not set, noop TheAnd.setOperand(0, X); return &TheAnd; } else { @@ -656,7 +641,7 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // If there is a conflict, we should actually return a false for the // whole construct. if (((BCst->getValue() & DCst->getValue()) & - (CCst->getValue() ^ ECst->getValue())) != 0) + (CCst->getValue() ^ ECst->getValue())).getBoolValue()) return ConstantInt::get(LHS->getType(), !IsAnd); Value *NewOr1 = Builder->CreateOr(B, D); @@ -713,9 +698,8 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, } // This simplification is only valid if the upper range is not negative. - bool IsNegative, IsNotNegative; - ComputeSignBit(RangeEnd, IsNotNegative, IsNegative, /*Depth=*/0, Cmp1); - if (!IsNotNegative) + KnownBits Known = computeKnownBits(RangeEnd, /*Depth=*/0, Cmp1); + if (!Known.isNonNegative()) return nullptr; if (Inverted) @@ -724,8 +708,109 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, return Builder->CreateICmp(NewPred, Input, RangeEnd); } +static Value * +foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, + bool JoinedByAnd, + InstCombiner::BuilderTy *Builder) { + Value *X = LHS->getOperand(0); + if (X != RHS->getOperand(0)) + return nullptr; + + const APInt *C1, *C2; + if (!match(LHS->getOperand(1), m_APInt(C1)) || + !match(RHS->getOperand(1), m_APInt(C2))) + return nullptr; + + // We only handle (X != C1 && X != C2) and (X == C1 || X == C2). + ICmpInst::Predicate Pred = LHS->getPredicate(); + if (Pred != RHS->getPredicate()) + return nullptr; + if (JoinedByAnd && Pred != ICmpInst::ICMP_NE) + return nullptr; + if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ) + return nullptr; + + // The larger unsigned constant goes on the right. + if (C1->ugt(*C2)) + std::swap(C1, C2); + + APInt Xor = *C1 ^ *C2; + if (Xor.isPowerOf2()) { + // If LHSC and RHSC differ by only one bit, then set that bit in X and + // compare against the larger constant: + // (X == C1 || X == C2) --> (X | (C1 ^ C2)) == C2 + // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2 + // We choose an 'or' with a Pow2 constant rather than the inverse mask with + // 'and' because that may lead to smaller codegen from a smaller constant. + Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor)); + return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); + } + + // Special case: get the ordering right when the values wrap around zero. + // Ie, we assumed the constants were unsigned when swapping earlier. + if (C1->isNullValue() && C2->isAllOnesValue()) + std::swap(C1, C2); + + if (*C1 == *C2 - 1) { + // (X == 13 || X == 14) --> X - 13 <=u 1 + // (X != 13 && X != 14) --> X - 13 >u 1 + // An 'add' is the canonical IR form, so favor that over a 'sub'. + Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1))); + auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE; + return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1)); + } + + return nullptr; +} + +// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) +// Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) +Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, + bool JoinedByAnd, + Instruction &CxtI) { + ICmpInst::Predicate Pred = LHS->getPredicate(); + if (Pred != RHS->getPredicate()) + return nullptr; + if (JoinedByAnd && Pred != ICmpInst::ICMP_NE) + return nullptr; + if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ) + return nullptr; + + // TODO support vector splats + ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); + ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); + if (!LHSC || !RHSC || !LHSC->isZero() || !RHSC->isZero()) + return nullptr; + + Value *A, *B, *C, *D; + if (match(LHS->getOperand(0), m_And(m_Value(A), m_Value(B))) && + match(RHS->getOperand(0), m_And(m_Value(C), m_Value(D)))) { + if (A == D || B == D) + std::swap(C, D); + if (B == C) + std::swap(A, B); + + if (A == C && + isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) && + isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) { + Value *Mask = Builder->CreateOr(B, D); + Value *Masked = Builder->CreateAnd(A, Mask); + auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; + return Builder->CreateICmp(NewPred, Masked, Mask); + } + } + + return nullptr; +} + /// Fold (icmp)&(icmp) if possible. -Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, + Instruction &CxtI) { + // Fold (!iszero(A & K1) & !iszero(A & K2)) -> (A & (K1 | K2)) == (K1 | K2) + // if K1 and K2 are a one-bit mask. + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, true, CxtI)) + return V; + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) @@ -754,6 +839,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false)) return V; + if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder)) + return V; + // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0); ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); @@ -798,7 +886,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // Check that the low bits are zero. APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); - if ((Low & AndC->getValue()) == 0 && (Low & BigC->getValue()) == 0) { + if ((Low & AndC->getValue()).isNullValue() && + (Low & BigC->getValue()).isNullValue()) { Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue()); APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue(); Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N); @@ -848,15 +937,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (PredL) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 - case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 - case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 - return LHS; - } case ICmpInst::ICMP_NE: switch (PredR) { default: @@ -872,52 +952,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 return Builder->CreateICmpSLT(LHS0, LHSC); break; // (X != 13 & X s< 15) -> no change - case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15 - case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 - return RHS; case ICmpInst::ICMP_NE: - // Special case to get the ordering right when the values wrap around - // zero. - if (LHSC->getValue() == 0 && RHSC->getValue().isAllOnesValue()) - std::swap(LHSC, RHSC); - if (LHSC == SubOne(RHSC)) { // (X != 13 & X != 14) -> X-13 >u 1 - Constant *AddC = ConstantExpr::getNeg(LHSC); - Value *Add = Builder->CreateAdd(LHS0, AddC, LHS0->getName() + ".off"); - return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1), - LHS0->getName() + ".cmp"); - } - break; // (X != 13 & X != 15) -> no change - } - break; - case ICmpInst::ICMP_ULT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); - case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 - case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13 - return LHS; - } - break; - case ICmpInst::ICMP_SLT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 - case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13 - return LHS; + // Potential folds for this case should already be handled. + break; } break; case ICmpInst::ICMP_UGT: switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 - return RHS; case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 return Builder->CreateICmp(PredL, LHS0, RHSC); @@ -931,9 +974,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { switch (PredR) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 - case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 - return RHS; case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 return Builder->CreateICmp(PredL, LHS0, RHSC); @@ -950,7 +990,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { /// Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of instcombine, this returns /// a Value which should already be inserted into the function. -Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { +Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); @@ -1004,26 +1044,22 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { /// (~A & ~B) == (~(A | B)) /// (~A | ~B) == (~(A & B)) static Instruction *matchDeMorgansLaws(BinaryOperator &I, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { auto Opcode = I.getOpcode(); assert((Opcode == Instruction::And || Opcode == Instruction::Or) && "Trying to match De Morgan's Laws with something other than and/or"); + // Flip the logic operation. - if (Opcode == Instruction::And) - Opcode = Instruction::Or; - else - Opcode = Instruction::And; + Opcode = (Opcode == Instruction::And) ? Instruction::Or : Instruction::And; - Value *Op0 = I.getOperand(0); - Value *Op1 = I.getOperand(1); - // TODO: Use pattern matchers instead of dyn_cast. - if (Value *Op0NotVal = dyn_castNotVal(Op0)) - if (Value *Op1NotVal = dyn_castNotVal(Op1)) - if (Op0->hasOneUse() && Op1->hasOneUse()) { - Value *LogicOp = Builder->CreateBinOp(Opcode, Op0NotVal, Op1NotVal, - I.getName() + ".demorgan"); - return BinaryOperator::CreateNot(LogicOp); - } + Value *A, *B; + if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) && + match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) && + !IsFreeToInvert(A, A->hasOneUse()) && + !IsFreeToInvert(B, B->hasOneUse())) { + Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan"); + return BinaryOperator::CreateNot(AndOr); + } return nullptr; } @@ -1137,8 +1173,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { ICmpInst *ICmp0 = dyn_cast(Cast0Src); ICmpInst *ICmp1 = dyn_cast(Cast1Src); if (ICmp0 && ICmp1) { - Value *Res = LogicOpc == Instruction::And ? FoldAndOfICmps(ICmp0, ICmp1) - : FoldOrOfICmps(ICmp0, ICmp1, &I); + Value *Res = LogicOpc == Instruction::And ? foldAndOfICmps(ICmp0, ICmp1, I) + : foldOrOfICmps(ICmp0, ICmp1, I); if (Res) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; @@ -1149,8 +1185,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { FCmpInst *FCmp0 = dyn_cast(Cast0Src); FCmpInst *FCmp1 = dyn_cast(Cast1Src); if (FCmp0 && FCmp1) { - Value *Res = LogicOpc == Instruction::And ? FoldAndOfFCmps(FCmp0, FCmp1) - : FoldOrOfFCmps(FCmp0, FCmp1); + Value *Res = LogicOpc == Instruction::And ? foldAndOfFCmps(FCmp0, FCmp1) + : foldOrOfFCmps(FCmp0, FCmp1); if (Res) return CastInst::Create(CastOpcode, Res, DestTy); return nullptr; @@ -1185,6 +1221,56 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) { return nullptr; } +static Instruction *foldAndToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::And); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'or' is Op0. + // (A | B) & ~(A & B) --> A ^ B + // (A | B) & ~(B & A) --> A ^ B + if (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateXor(A, B); + + // (A | ~B) & (~A | B) --> ~(A ^ B) + // (A | ~B) & (B | ~A) --> ~(A ^ B) + // (~B | A) & (~A | B) --> ~(A ^ B) + // (~B | A) & (B | ~A) --> ~(A ^ B) + if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B)))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + return nullptr; +} + +static Instruction *foldOrToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::Or); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // Operand complexity canonicalization guarantees that the 'and' is Op0. + // (A & B) | ~(A | B) --> ~(A ^ B) + // (A & B) | ~(B | A) --> ~(A ^ B) + if (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + // (A & ~B) | (~A & B) --> A ^ B + // (A & ~B) | (B & ~A) --> A ^ B + // (~B & A) | (~A & B) --> A ^ B + // (~B & A) | (B & ~A) --> A ^ B + if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))) + return BinaryOperator::CreateXor(A, B); + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -1195,11 +1281,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) - return replaceInstUsesWith(I, V); - - // (A|B)&(A|C) -> A|(B&C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) + if (Value *V = SimplifyAndInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -1207,6 +1289,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldAndToXor(I, *Builder)) + return Xor; + + // (A|B)&(A|C) -> A|(B&C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); @@ -1243,7 +1333,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } case Instruction::Sub: // -x & 1 -> x & 1 - if (AndRHSMask == 1 && match(Op0LHS, m_Zero())) + if (AndRHSMask.isOneValue() && match(Op0LHS, m_Zero())) return BinaryOperator::CreateAnd(Op0RHS, AndRHS); break; @@ -1252,7 +1342,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { case Instruction::LShr: // (1 << x) & 1 --> zext(x == 0) // (1 >> x) & 1 --> zext(x == 0) - if (AndRHSMask == 1 && Op0LHS == AndRHS) { + if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) { Value *NewICmp = Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); return new ZExtInst(NewICmp, I.getType()); @@ -1313,23 +1403,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) return FoldedLogic; - if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) return DeMorgan; { - Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - // (A|B) & ~(A&B) -> A^B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - - // ~(A&B) & (A|B) -> A^B - if (match(Op1, m_Or(m_Value(A), m_Value(B))) && - match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) && - ((A == C && B == D) || (A == D && B == C))) - return BinaryOperator::CreateXor(A, B); - + Value *A = nullptr, *B = nullptr, *C = nullptr; // A&(A^B) => A & ~B { Value *tmpOp0 = Op0; @@ -1356,11 +1434,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } // (A&((~A)|B)) -> A&B - if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) || - match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1))))) + if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A)))) return BinaryOperator::CreateAnd(A, Op1); - if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) || - match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0))))) + if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A)))) return BinaryOperator::CreateAnd(A, Op0); // (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C @@ -1376,13 +1452,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B)))) + // (A | B) & (B ^ (~A)) -> (A & B) + // (B | A) & ((~A) ^ B) -> (A & B) + // (B | A) & (B ^ (~A)) -> (A & B) + if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && + match(Op0, m_c_Or(m_Specific(A), m_Specific(B)))) return BinaryOperator::CreateAnd(A, B); // ((~A) ^ B) & (A | B) -> (A & B) // ((~A) ^ B) & (B | A) -> (A & B) - if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) && + // (B ^ (~A)) & (A | B) -> (A & B) + // (B ^ (~A)) & (B | A) -> (A & B) + if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) return BinaryOperator::CreateAnd(A, B); } @@ -1391,7 +1472,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ICmpInst *LHS = dyn_cast(Op0); ICmpInst *RHS = dyn_cast(Op1); if (LHS && RHS) - if (Value *Res = FoldAndOfICmps(LHS, RHS)) + if (Value *Res = foldAndOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -1399,18 +1480,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldAndOfICmps(LHS, Cmp)) + if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldAndOfICmps(LHS, Cmp)) + if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); } if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldAndOfICmps(Cmp, RHS)) + if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldAndOfICmps(Cmp, RHS)) + if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); } } @@ -1418,7 +1499,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // If and'ing two fcmp, try combine them into one. if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Value *Res = FoldAndOfFCmps(LHS, RHS)) + if (Value *Res = foldAndOfFCmps(LHS, RHS)) return replaceInstUsesWith(I, Res); if (Instruction *CastedAnd = foldCastedBitwiseLogic(I)) @@ -1555,46 +1636,17 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D, } /// Fold (icmp)|(icmp) if possible. -Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, - Instruction *CxtI) { - ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - +Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, + Instruction &CxtI) { // Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2) // if K1 and K2 are a one-bit mask. - ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); - ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); + if (Value *V = foldAndOrOfICmpsOfAndWithPow2(LHS, RHS, false, CxtI)) + return V; - if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero() && - RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) { - - BinaryOperator *LAnd = dyn_cast(LHS->getOperand(0)); - BinaryOperator *RAnd = dyn_cast(RHS->getOperand(0)); - if (LAnd && RAnd && LAnd->hasOneUse() && RHS->hasOneUse() && - LAnd->getOpcode() == Instruction::And && - RAnd->getOpcode() == Instruction::And) { - - Value *Mask = nullptr; - Value *Masked = nullptr; - if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT)) { - Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); - Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); - } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT)) { - Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); - Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); - } + ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); - if (Masked) - return Builder->CreateICmp(ICmpInst::ICMP_NE, Masked, Mask); - } - } + ConstantInt *LHSC = dyn_cast(LHS->getOperand(1)); + ConstantInt *RHSC = dyn_cast(RHS->getOperand(1)); // Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3) // --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3) @@ -1705,6 +1757,9 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true)) return V; + if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder)) + return V; + // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). if (!LHSC || !RHSC) return nullptr; @@ -1772,50 +1827,13 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: - if (LHS->getOperand(0) == RHS->getOperand(0)) { - // if LHSC and RHSC differ only by one bit: - // (A == C1 || A == C2) -> (A | (C1 ^ C2)) == C2 - assert(LHSC->getValue().ule(LHSC->getValue())); - - APInt Xor = LHSC->getValue() ^ RHSC->getValue(); - if (Xor.isPowerOf2()) { - Value *C = Builder->getInt(Xor); - Value *Or = Builder->CreateOr(LHS->getOperand(0), C); - return Builder->CreateICmp(ICmpInst::ICMP_EQ, Or, RHSC); - } - } - - if (LHSC == SubOne(RHSC)) { - // (X == 13 | X == 14) -> X-13 CreateAdd(LHS0, AddC, LHS0->getName() + ".off"); - AddC = ConstantExpr::getSub(AddOne(RHSC), LHSC); - return Builder->CreateICmpULT(Add, AddC); - } - - break; // (X == 13 | X == 15) -> no change + // Potential folds for this case should already be handled. + break; case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change break; - case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15 - case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15 - return RHS; } break; - case ICmpInst::ICMP_NE: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 - case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 - case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 - return LHS; - case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true - case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return Builder->getTrue(); - } case ICmpInst::ICMP_ULT: switch (PredR) { default: @@ -1823,15 +1841,9 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change break; case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 - // If RHSC is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSC->isMaxValue(false)) - return LHS; + assert(!RHSC->isMaxValue(false) && "Missed icmp simplification"); return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, false, false); - case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15 - return RHS; } break; case ICmpInst::ICMP_SLT: @@ -1841,39 +1853,9 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change break; case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 - // If RHSC is [us]MAXINT, it is always false. Not handling - // this can cause overflow. - if (RHSC->isMaxValue(true)) - return LHS; + assert(!RHSC->isMaxValue(true) && "Missed icmp simplification"); return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true, false); - case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15 - case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15 - return RHS; - } - break; - case ICmpInst::ICMP_UGT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 - case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 - return LHS; - case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true - case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return Builder->getTrue(); - } - break; - case ICmpInst::ICMP_SGT: - switch (PredR) { - default: - llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 - case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 - return LHS; - case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true - case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return Builder->getTrue(); } break; } @@ -1882,7 +1864,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, /// Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of instcombine, this returns /// a Value which should already be inserted into the function. -Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { +Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); @@ -2002,11 +1984,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) - return replaceInstUsesWith(I, V); - - // (A&B)|(A&C) -> A&(B|C) etc - if (Value *V = SimplifyUsingDistributiveLaws(I)) + if (Value *V = SimplifyOrInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // See if we can simplify any instructions used by the instruction whose sole @@ -2014,20 +1992,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - if (Value *V = SimplifyBSwap(I)) + // Do this before using distributive laws to catch simple and/or/not patterns. + if (Instruction *Xor = foldOrToXor(I, *Builder)) + return Xor; + + // (A&B)|(A&C) -> A&(B|C) etc + if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (ConstantInt *RHS = dyn_cast(Op1)) { - ConstantInt *C1 = nullptr; Value *X = nullptr; - // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && - Op0->hasOneUse()) { - Value *Or = Builder->CreateOr(X, RHS); - Or->takeName(Op0); - return BinaryOperator::CreateXor(Or, - Builder->getInt(C1->getValue() & ~RHS->getValue())); - } - } + if (Value *V = SimplifyBSwap(I)) + return replaceInstUsesWith(I, V); if (isa(Op1)) if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) @@ -2046,7 +2020,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *NOr = Builder->CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, - cast(Op0)->getOperand(1)); + ConstantInt::get(NOr->getType(), *C)); } // Y|(X^C) -> (X|Y)^C iff Y&C == 0 @@ -2055,7 +2029,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *NOr = Builder->CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, - cast(Op1)->getOperand(1)); + ConstantInt::get(NOr->getType(), *C)); } } @@ -2073,19 +2047,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { match(Op0, m_c_And(m_Specific(A), m_Value(B)))) return BinaryOperator::CreateOr(Op1, B); - // (A & ~B) | (A ^ B) -> (A ^ B) - // (~B & A) | (A ^ B) -> (A ^ B) - if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Xor(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // Commute the 'or' operands. - // (A ^ B) | (A & ~B) -> (A ^ B) - // (A ^ B) | (~B & A) -> (A ^ B) - if (match(Op1, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op0, m_Xor(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - // (A & C)|(B & D) Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && @@ -2094,7 +2055,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ConstantInt *C1 = dyn_cast(C); ConstantInt *C2 = dyn_cast(D); if (C1 && C2) { // (A & C1)|(B & C2) - if ((C1->getValue() & C2->getValue()) == 0) { + if ((C1->getValue() & C2->getValue()).isNullValue()) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 if (match(A, m_Or(m_Value(V1), m_Value(V2))) && @@ -2117,9 +2078,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. ConstantInt *C3 = nullptr, *C4 = nullptr; if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && - (C3->getValue() & ~C1->getValue()) == 0 && + (C3->getValue() & ~C1->getValue()).isNullValue() && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && - (C4->getValue() & ~C2->getValue()) == 0) { + (C4->getValue() & ~C2->getValue()).isNullValue()) { V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); return BinaryOperator::CreateAnd(V2, Builder->getInt(C1->getValue()|C2->getValue())); @@ -2150,23 +2111,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return replaceInstUsesWith(I, V); } - // ((A&~B)|(~A&B)) -> A^B - if ((match(C, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, D); - // ((~B&A)|(~A&B)) -> A^B - if ((match(A, m_Not(m_Specific(D))) && - match(B, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, D); - // ((A&~B)|(B&~A)) -> A^B - if ((match(C, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(A))))) - return BinaryOperator::CreateXor(A, B); - // ((~B&A)|(B&~A)) -> A^B - if ((match(A, m_Not(m_Specific(B))) && - match(D, m_Not(m_Specific(C))))) - return BinaryOperator::CreateXor(C, B); - // ((A|B)&1)|(B&-2) -> (A&1) | B if (match(A, m_Or(m_Value(V1), m_Specific(B))) || match(A, m_Or(m_Specific(B), m_Value(V1)))) { @@ -2194,12 +2138,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C + // FIXME: The two hasOneUse calls here are the same call, maybe we were + // supposed to check Op1->operand(0)? if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) if (Op1->hasOneUse() || cast(Op1)->hasOneUse()) return BinaryOperator::CreateOr(Op0, C); // ((A ^ C) ^ B) | (B ^ A) -> (B ^ A) | C + // FIXME: The two hasOneUse calls here are the same call, maybe we were + // supposed to check Op0->operand(0)? if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) if (Op0->hasOneUse() || cast(Op0)->hasOneUse()) @@ -2209,7 +2157,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C)); - if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) return DeMorgan; // Canonicalize xor to the RHS. @@ -2271,7 +2219,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ICmpInst *LHS = dyn_cast(Op0); ICmpInst *RHS = dyn_cast(Op1); if (LHS && RHS) - if (Value *Res = FoldOrOfICmps(LHS, RHS, &I)) + if (Value *Res = foldOrOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, Res); // TODO: Make this recursive; it's a little tricky because an arbitrary @@ -2279,18 +2227,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *X, *Y; if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) + if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldOrOfICmps(LHS, Cmp, &I)) + if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); } if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) - if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) + if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) - if (Value *Res = FoldOrOfICmps(Cmp, RHS, &I)) + if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); } } @@ -2298,7 +2246,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Value *Res = FoldOrOfFCmps(LHS, RHS)) + if (Value *Res = foldOrOfFCmps(LHS, RHS)) return replaceInstUsesWith(I, Res); if (Instruction *CastedOr = foldCastedBitwiseLogic(I)) @@ -2342,6 +2290,76 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return Changed ? &I : nullptr; } +/// A ^ B can be specified using other logic ops in a variety of patterns. We +/// can fold these early and efficiently by morphing an existing instruction. +static Instruction *foldXorToXor(BinaryOperator &I) { + assert(I.getOpcode() == Instruction::Xor); + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *A, *B; + + // There are 4 commuted variants for each of the basic patterns. + + // (A & B) ^ (A | B) -> A ^ B + // (A & B) ^ (B | A) -> A ^ B + // (A | B) ^ (A & B) -> A ^ B + // (A | B) ^ (B & A) -> A ^ B + if ((match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) || + (match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_c_And(m_Specific(A), m_Specific(B))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + // (A | ~B) ^ (~A | B) -> A ^ B + // (~B | A) ^ (~A | B) -> A ^ B + // (~A | B) ^ (A | ~B) -> A ^ B + // (B | ~A) ^ (A | ~B) -> A ^ B + if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + // (A & ~B) ^ (~A & B) -> A ^ B + // (~B & A) ^ (~A & B) -> A ^ B + // (~A & B) ^ (A & ~B) -> A ^ B + // (B & ~A) ^ (A & ~B) -> A ^ B + if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && + match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) || + (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) && + match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } + + return nullptr; +} + +Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { + if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { + if (LHS->getOperand(0) == RHS->getOperand(1) && + LHS->getOperand(1) == RHS->getOperand(0)) + LHS->swapOperands(); + if (LHS->getOperand(0) == RHS->getOperand(0) && + LHS->getOperand(1) == RHS->getOperand(1)) { + // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) + Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); + unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); + bool isSigned = LHS->isSigned() || RHS->isSigned(); + return getNewICmpValue(isSigned, Code, Op0, Op1, Builder); + } + } + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2352,9 +2370,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); + if (Instruction *NewXor = foldXorToXor(I)) + return NewXor; + // (A&B)^(A&C) -> A&(B^C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); @@ -2367,44 +2388,63 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyBSwap(I)) return replaceInstUsesWith(I, V); - // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I)) { - if (BinaryOperator *Op0I = dyn_cast(NotOp)) { - if (Op0I->getOpcode() == Instruction::And || - Op0I->getOpcode() == Instruction::Or) { - // ~(~X & Y) --> (X | ~Y) - De Morgan's Law - // ~(~X | Y) === (X & ~Y) - De Morgan's Law - if (dyn_castNotVal(Op0I->getOperand(1))) - Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(Op0NotVal, NotY); - return BinaryOperator::CreateAnd(Op0NotVal, NotY); - } + // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. + Value *X, *Y; + + // We must eliminate the and/or (one-use) for these transforms to not increase + // the instruction count. + // ~(~X & Y) --> (X | ~Y) + // ~(Y & ~X) --> (X | ~Y) + if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) { + Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + return BinaryOperator::CreateOr(X, NotY); + } + // ~(~X | Y) --> (X & ~Y) + // ~(Y | ~X) --> (X & ~Y) + if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) { + Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + return BinaryOperator::CreateAnd(X, NotY); + } + + // Is this a 'not' (~) fed by a binary operator? + BinaryOperator *NotVal; + if (match(&I, m_Not(m_BinOp(NotVal)))) { + if (NotVal->getOpcode() == Instruction::And || + NotVal->getOpcode() == Instruction::Or) { + // Apply DeMorgan's Law when inverts are free: + // ~(X & Y) --> (~X | ~Y) + // ~(X | Y) --> (~X & ~Y) + if (IsFreeToInvert(NotVal->getOperand(0), + NotVal->getOperand(0)->hasOneUse()) && + IsFreeToInvert(NotVal->getOperand(1), + NotVal->getOperand(1)->hasOneUse())) { + Value *NotX = Builder->CreateNot(NotVal->getOperand(0), "notlhs"); + Value *NotY = Builder->CreateNot(NotVal->getOperand(1), "notrhs"); + if (NotVal->getOpcode() == Instruction::And) + return BinaryOperator::CreateOr(NotX, NotY); + return BinaryOperator::CreateAnd(NotX, NotY); + } + } - // ~(X & Y) --> (~X | ~Y) - De Morgan's Law - // ~(X | Y) === (~X & ~Y) - De Morgan's Law - if (IsFreeToInvert(Op0I->getOperand(0), - Op0I->getOperand(0)->hasOneUse()) && - IsFreeToInvert(Op0I->getOperand(1), - Op0I->getOperand(1)->hasOneUse())) { - Value *NotX = - Builder->CreateNot(Op0I->getOperand(0), "notlhs"); - Value *NotY = - Builder->CreateNot(Op0I->getOperand(1), "notrhs"); - if (Op0I->getOpcode() == Instruction::And) - return BinaryOperator::CreateOr(NotX, NotY); - return BinaryOperator::CreateAnd(NotX, NotY); - } + // ~(~X >>s Y) --> (X >>s Y) + if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y)))) + return BinaryOperator::CreateAShr(X, Y); - } else if (Op0I->getOpcode() == Instruction::AShr) { - // ~(~X >>s Y) --> (X >>s Y) - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) - return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1)); - } + // If we are inverting a right-shifted constant, we may be able to eliminate + // the 'not' by inverting the constant and using the opposite shift type. + // Canonicalization rules ensure that only a negative constant uses 'ashr', + // but we must check that in case that transform has not fired yet. + const APInt *C; + if (match(NotVal, m_AShr(m_APInt(C), m_Value(Y))) && C->isNegative()) { + // ~(C >>s Y) --> ~C >>u Y (when inverting the replicated sign bits) + Constant *NotC = ConstantInt::get(I.getType(), ~(*C)); + return BinaryOperator::CreateLShr(NotC, Y); + } + + if (match(NotVal, m_LShr(m_APInt(C), m_Value(Y))) && C->isNonNegative()) { + // ~(C >>u Y) --> ~C >>s Y (when inverting the replicated sign bits) + Constant *NotC = ConstantInt::get(I.getType(), ~(*C)); + return BinaryOperator::CreateAShr(NotC, Y); } } @@ -2448,8 +2488,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub(SubOne(NegOp0CI), Op0I->getOperand(0)); - } else if (RHSC->getValue().isSignBit()) { - // (X + C) ^ signbit -> (X + C + signbit) + } else if (RHSC->getValue().isSignMask()) { + // (X + C) ^ signmask -> (X + C + signmask) Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); @@ -2542,40 +2582,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { { Value *A, *B, *C, *D; - // (A & B)^(A | B) -> A ^ B - if (match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_Or(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | B)^(A & B) -> A ^ B - if (match(Op0, m_Or(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { - if ((A == C && B == D) || (A == D && B == C)) - return BinaryOperator::CreateXor(A, B); - } - // (A | ~B) ^ (~A | B) -> A ^ B - // (~B | A) ^ (~A | B) -> A ^ B - if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // (~A | B) ^ (A | ~B) -> A ^ B - if (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B))))) { - return BinaryOperator::CreateXor(A, B); - } - // (A & ~B) ^ (~A & B) -> A ^ B - // (~B & A) ^ (~A & B) -> A ^ B - if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && - match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) - return BinaryOperator::CreateXor(A, B); - - // (~A & B) ^ (A & ~B) -> A ^ B - if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) && - match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B))))) { - return BinaryOperator::CreateXor(A, B); - } // (A ^ C)^(A | B) -> ((~A) & B) ^ C if (match(Op0, m_Xor(m_Value(D), m_Value(C))) && match(Op1, m_Or(m_Value(A), m_Value(B)))) { @@ -2613,23 +2619,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { match(Op1, m_Not(m_Specific(A)))) return BinaryOperator::CreateNot(Builder->CreateAnd(A, B)); - // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) - if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) - if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) - if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { - if (LHS->getOperand(0) == RHS->getOperand(1) && - LHS->getOperand(1) == RHS->getOperand(0)) - LHS->swapOperands(); - if (LHS->getOperand(0) == RHS->getOperand(0) && - LHS->getOperand(1) == RHS->getOperand(1)) { - Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1); - unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS); - bool isSigned = LHS->isSigned() || RHS->isSigned(); - return replaceInstUsesWith(I, - getNewICmpValue(isSigned, Code, Op0, Op1, - Builder)); - } - } + if (auto *LHS = dyn_cast(I.getOperand(0))) + if (auto *RHS = dyn_cast(I.getOperand(1))) + if (Value *V = foldXorOfICmps(LHS, RHS)) + return replaceInstUsesWith(I, V); if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) return CastedXor; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cdae9571851ed84da1f026ebb5e47c17031db35c..c0830a5d211248114049501cd1b809aa738f7395 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -16,9 +16,9 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -44,6 +44,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" @@ -93,75 +94,80 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) { return ConstantVector::get(BoolVec); } -Instruction * -InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) { +Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( + ElementUnorderedAtomicMemCpyInst *AMI) { // Try to unfold this intrinsic into sequence of explicit atomic loads and // stores. // First check that number of elements is compile time constant. - auto *NumElementsCI = dyn_cast(AMI->getNumElements()); - if (!NumElementsCI) + auto *LengthCI = dyn_cast(AMI->getLength()); + if (!LengthCI) return nullptr; // Check that there are not too many elements. - uint64_t NumElements = NumElementsCI->getZExtValue(); + uint64_t LengthInBytes = LengthCI->getZExtValue(); + uint32_t ElementSizeInBytes = AMI->getElementSizeInBytes(); + uint64_t NumElements = LengthInBytes / ElementSizeInBytes; if (NumElements >= UnfoldElementAtomicMemcpyMaxElements) return nullptr; - // Don't unfold into illegal integers - uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8; - if (!getDataLayout().isLegalInteger(ElementSizeInBytes)) - return nullptr; + // Only expand if there are elements to copy. + if (NumElements > 0) { + // Don't unfold into illegal integers + uint64_t ElementSizeInBits = ElementSizeInBytes * 8; + if (!getDataLayout().isLegalInteger(ElementSizeInBits)) + return nullptr; - // Cast source and destination to the correct type. Intrinsic input arguments - // are usually represented as i8*. - // Often operands will be explicitly casted to i8* and we can just strip - // those casts instead of inserting new ones. However it's easier to rely on - // other InstCombine rules which will cover trivial cases anyway. - Value *Src = AMI->getRawSource(); - Value *Dst = AMI->getRawDest(); - Type *ElementPointerType = Type::getIntNPtrTy( - AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace()); - - Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, - "memcpy_unfold.src_casted"); - Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, - "memcpy_unfold.dst_casted"); - - for (uint64_t i = 0; i < NumElements; ++i) { - // Get current element addresses - ConstantInt *ElementIdxCI = - ConstantInt::get(AMI->getContext(), APInt(64, i)); - Value *SrcElementAddr = - Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); - Value *DstElementAddr = - Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); - - // Load from the source. Transfer alignment information and mark load as - // unordered atomic. - LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); - Load->setOrdering(AtomicOrdering::Unordered); - // We know alignment of the first element. It is also guaranteed by the - // verifier that element size is less or equal than first element alignment - // and both of this values are powers of two. - // This means that all subsequent accesses are at least element size - // aligned. - // TODO: We can infer better alignment but there is no evidence that this - // will matter. - Load->setAlignment(i == 0 ? AMI->getSrcAlignment() - : AMI->getElementSizeInBytes()); - Load->setDebugLoc(AMI->getDebugLoc()); - - // Store loaded value via unordered atomic store. - StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); - Store->setOrdering(AtomicOrdering::Unordered); - Store->setAlignment(i == 0 ? AMI->getDstAlignment() - : AMI->getElementSizeInBytes()); - Store->setDebugLoc(AMI->getDebugLoc()); + // Cast source and destination to the correct type. Intrinsic input + // arguments are usually represented as i8*. Often operands will be + // explicitly casted to i8* and we can just strip those casts instead of + // inserting new ones. However it's easier to rely on other InstCombine + // rules which will cover trivial cases anyway. + Value *Src = AMI->getRawSource(); + Value *Dst = AMI->getRawDest(); + Type *ElementPointerType = + Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits, + Src->getType()->getPointerAddressSpace()); + + Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, + "memcpy_unfold.src_casted"); + Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, + "memcpy_unfold.dst_casted"); + + for (uint64_t i = 0; i < NumElements; ++i) { + // Get current element addresses + ConstantInt *ElementIdxCI = + ConstantInt::get(AMI->getContext(), APInt(64, i)); + Value *SrcElementAddr = + Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); + Value *DstElementAddr = + Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); + + // Load from the source. Transfer alignment information and mark load as + // unordered atomic. + LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); + Load->setOrdering(AtomicOrdering::Unordered); + // We know alignment of the first element. It is also guaranteed by the + // verifier that element size is less or equal than first element + // alignment and both of this values are powers of two. This means that + // all subsequent accesses are at least element size aligned. + // TODO: We can infer better alignment but there is no evidence that this + // will matter. + Load->setAlignment(i == 0 ? AMI->getParamAlignment(1) + : ElementSizeInBytes); + Load->setDebugLoc(AMI->getDebugLoc()); + + // Store loaded value via unordered atomic store. + StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); + Store->setOrdering(AtomicOrdering::Unordered); + Store->setAlignment(i == 0 ? AMI->getParamAlignment(0) + : ElementSizeInBytes); + Store->setDebugLoc(AMI->getDebugLoc()); + } } // Set the number of elements of the copy to 0, it will be deleted on the // next iteration. - AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType())); + AMI->setLength(Constant::getNullValue(LengthCI->getType())); return AMI; } @@ -378,7 +384,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, for (unsigned i = 0; i != NumSubElts; ++i) { unsigned SubEltIdx = (NumSubElts - 1) - i; auto SubElt = cast(CDV->getElementAsConstant(SubEltIdx)); - Count = Count.shl(BitWidth); + Count <<= BitWidth; Count |= SubElt->getValue().zextOrTrunc(64); } } @@ -392,7 +398,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II, unsigned BitWidth = SVT->getPrimitiveSizeInBits(); // If shift-by-zero then just return the original value. - if (Count == 0) + if (Count.isNullValue()) return Vec; // Handle cases when Shift >= BitWidth. @@ -839,7 +845,8 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, // Length bits. if (CI0) { APInt Elt = CI0->getValue(); - Elt = Elt.lshr(Index).zextOrTrunc(Length); + Elt.lshrInPlace(Index); + Elt = Elt.zextOrTrunc(Length); return LowConstantHighUndef(Elt.getZExtValue()); } @@ -1036,7 +1043,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II, // The PD variants uses bit 1 to select per-lane element index, so // shift down to convert to generic shuffle mask index. if (IsPD) - Index = Index.lshr(1); + Index.lshrInPlace(1); // The _256 variants are a bit trickier since the mask bits always index // into the corresponding 128 half. In order to convert to a generic @@ -1371,36 +1378,31 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { II.getIntrinsicID() == Intrinsic::ctlz) && "Expected cttz or ctlz intrinsic"); Value *Op0 = II.getArgOperand(0); - // FIXME: Try to simplify vectors of integers. - auto *IT = dyn_cast(Op0->getType()); - if (!IT) - return nullptr; - unsigned BitWidth = IT->getBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II); + KnownBits Known = IC.computeKnownBits(Op0, 0, &II); // Create a mask for bits above (ctlz) or below (cttz) the first known one. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; - unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros() - : KnownOne.countLeadingZeros(); - APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits) - : APInt::getHighBitsSet(BitWidth, NumMaskBits); + unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros() + : Known.countMaxLeadingZeros(); + unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros() + : Known.countMinLeadingZeros(); // If all bits above (ctlz) or below (cttz) the first known one are known // zero, this value is constant. // FIXME: This should be in InstSimplify because we're replacing an // instruction with a constant. - if ((Mask & KnownZero) == Mask) { - auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits)); + if (PossibleZeros == DefiniteZeros) { + auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros); return IC.replaceInstUsesWith(II, C); } // If the input to cttz/ctlz is known to be non-zero, // then change the 'ZeroIsUndef' parameter to 'true' // because we know the zero behavior can't affect the result. - if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) { + if (!Known.One.isNullValue() || + isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, + &IC.getDominatorTree())) { if (!match(II.getArgOperand(1), m_One())) { II.setOperand(1, IC.Builder->getTrue()); return &II; @@ -1817,8 +1819,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) { /// lifting. Instruction *InstCombiner::visitCallInst(CallInst &CI) { auto Args = CI.arg_operands(); - if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL, - &TLI, &DT, &AC)) + if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(), + Args.end(), SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); if (isFreeCall(&CI, &TLI)) @@ -1891,12 +1893,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Changed) return II; } - if (auto *AMI = dyn_cast(II)) { - if (Constant *C = dyn_cast(AMI->getNumElements())) + if (auto *AMI = dyn_cast(II)) { + if (Constant *C = dyn_cast(AMI->getLength())) if (C->isNullValue()) return eraseInstFromFunction(*AMI); - if (Instruction *I = SimplifyElementAtomicMemCpy(AMI)) + if (Instruction *I = SimplifyElementUnorderedAtomicMemCpy(AMI)) return I; } @@ -2337,6 +2339,37 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return II; break; } + case Intrinsic::x86_avx512_mask_cmp_pd_128: + case Intrinsic::x86_avx512_mask_cmp_pd_256: + case Intrinsic::x86_avx512_mask_cmp_pd_512: + case Intrinsic::x86_avx512_mask_cmp_ps_128: + case Intrinsic::x86_avx512_mask_cmp_ps_256: + case Intrinsic::x86_avx512_mask_cmp_ps_512: { + // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a) + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + bool Arg0IsZero = match(Arg0, m_Zero()); + if (Arg0IsZero) + std::swap(Arg0, Arg1); + Value *A, *B; + // This fold requires only the NINF(not +/- inf) since inf minus + // inf is nan. + // NSZ(No Signed Zeros) is not needed because zeros of any sign are + // equal for both compares. + // NNAN is not needed because nans compare the same for both compares. + // The compare intrinsic uses the above assumptions and therefore + // doesn't require additional flags. + if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) && + match(Arg1, m_Zero()) && + cast(Arg0)->getFastMathFlags().noInfs())) { + if (Arg0IsZero) + std::swap(A, B); + II->setArgOperand(0, A); + II->setArgOperand(1, B); + return II; + } + break; + } case Intrinsic::x86_avx512_mask_add_ps_512: case Intrinsic::x86_avx512_mask_div_ps_512: @@ -3400,8 +3433,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (auto *CSrc0 = dyn_cast(Src0)) { if (auto *CSrc1 = dyn_cast(Src1)) { Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); - return replaceInstUsesWith(*II, - ConstantExpr::getSExt(CCmp, II->getType())); + if (CCmp->isNullValue()) { + return replaceInstUsesWith( + *II, ConstantExpr::getSExt(CCmp, II->getType())); + } + + // The result of V_ICMP/V_FCMP assembly instructions (which this + // intrinsic exposes) is one bit per thread, masked with the EXEC + // register (which contains the bitmask of live threads). So a + // comparison that always returns true is the same as a read of the + // EXEC register. + Value *NewF = Intrinsic::getDeclaration( + II->getModule(), Intrinsic::read_register, II->getType()); + Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; + MDNode *MD = MDNode::get(II->getContext(), MDArgs); + Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; + CallInst *NewCall = Builder->CreateCall(NewF, Args); + NewCall->addAttribute(AttributeList::FunctionIndex, + Attribute::Convergent); + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); } // Canonicalize constants to RHS. @@ -3567,9 +3618,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // If there is a dominating assume with the same condition as this one, // then this one is redundant, and should be removed. - APInt KnownZero(1, 0), KnownOne(1, 0); - computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II); - if (KnownOne.isAllOnesValue()) + KnownBits Known(1); + computeKnownBits(IIOperand, Known, 0, II); + if (Known.isAllOnes()) return eraseInstFromFunction(*II); // Update the cache of affected values for this assumption (we might be @@ -3788,24 +3839,24 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // Mark any parameters that are known to be non-null with the nonnull // attribute. This is helpful for inlining calls to functions with null // checks on their arguments. - SmallVector Indices; + SmallVector ArgNos; unsigned ArgNo = 0; for (Value *V : CS.args()) { if (V->getType()->isPointerTy() && - !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) && + !CS.paramHasAttr(ArgNo, Attribute::NonNull) && isKnownNonNullAt(V, CS.getInstruction(), &DT)) - Indices.push_back(ArgNo + 1); + ArgNos.push_back(ArgNo); ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); - if (!Indices.empty()) { + if (!ArgNos.empty()) { AttributeList AS = CS.getAttributes(); LLVMContext &Ctx = CS.getInstruction()->getContext(); - AS = AS.addAttribute(Ctx, Indices, - Attribute::get(Ctx, Attribute::NonNull)); + AS = AS.addParamAttribute(Ctx, ArgNos, + Attribute::get(Ctx, Attribute::NonNull)); CS.setAttributes(AS); Changed = true; } @@ -3992,8 +4043,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL)) return false; // Cannot transform this parameter value. - if (AttrBuilder(CallerPAL.getParamAttributes(i + 1)). - overlaps(AttributeFuncs::typeIncompatible(ParamTy))) + if (AttrBuilder(CallerPAL.getParamAttributes(i)) + .overlaps(AttributeFuncs::typeIncompatible(ParamTy))) return false; // Attribute not compatible with transformed value. if (CS.isInAllocaArgument(i)) @@ -4001,7 +4052,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If the parameter is passed as a byval argument, then we have to have a // sized type and the sized type has to have the same size as the old type. - if (ParamTy != ActTy && CallerPAL.hasAttribute(i + 1, Attribute::ByVal)) { + if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast(ParamTy); if (!ParamPTy || !ParamPTy->getElementType()->isSized()) return false; @@ -4036,28 +4087,22 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && - !CallerPAL.isEmpty()) + !CallerPAL.isEmpty()) { // In this case we have more arguments than the new function type, but we // won't be dropping them. Check that these extra arguments have attributes // that are compatible with being a vararg call argument. - for (unsigned i = CallerPAL.getNumSlots(); i; --i) { - unsigned Index = CallerPAL.getSlotIndex(i - 1); - if (Index <= FT->getNumParams()) - break; - - // Check if it has an attribute that's incompatible with varargs. - AttributeList PAttrs = CallerPAL.getSlotAttributes(i - 1); - if (PAttrs.hasAttribute(Index, Attribute::StructRet)) - return false; - } - + unsigned SRetIdx; + if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) && + SRetIdx > FT->getNumParams()) + return false; + } // Okay, we decided that this is a safe thing to do: go ahead and start // inserting cast instructions as necessary. - std::vector Args; + SmallVector Args; + SmallVector ArgAttrs; Args.reserve(NumActualArgs); - SmallVector attrVec; - attrVec.reserve(NumCommonArgs); + ArgAttrs.reserve(NumActualArgs); // Get any return attributes. AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex); @@ -4066,32 +4111,25 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // with the existing attributes. Wipe out any problematic attributes. RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy)); - // Add the new return attributes. - if (RAttrs.hasAttributes()) - attrVec.push_back(AttributeList::get(Caller->getContext(), - AttributeList::ReturnIndex, RAttrs)); - AI = CS.arg_begin(); for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { Type *ParamTy = FT->getParamType(i); - if ((*AI)->getType() == ParamTy) { - Args.push_back(*AI); - } else { - Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy)); - } + Value *NewArg = *AI; + if ((*AI)->getType() != ParamTy) + NewArg = Builder->CreateBitOrPointerCast(*AI, ParamTy); + Args.push_back(NewArg); // Add any parameter attributes. - AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1)); - if (PAttrs.hasAttributes()) - attrVec.push_back( - AttributeList::get(Caller->getContext(), i + 1, PAttrs)); + ArgAttrs.push_back(CallerPAL.getParamAttributes(i)); } // If the function takes more arguments than the call was taking, add them // now. - for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) + for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) { Args.push_back(Constant::getNullValue(FT->getParamType(i))); + ArgAttrs.push_back(AttributeSet()); + } // If we are removing arguments to the function, emit an obnoxious warning. if (FT->getNumParams() < NumActualArgs) { @@ -4100,62 +4138,56 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { Type *PTy = getPromotedType((*AI)->getType()); + Value *NewArg = *AI; if (PTy != (*AI)->getType()) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, PTy, false); - Args.push_back(Builder->CreateCast(opcode, *AI, PTy)); - } else { - Args.push_back(*AI); + NewArg = Builder->CreateCast(opcode, *AI, PTy); } + Args.push_back(NewArg); // Add any parameter attributes. - AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1)); - if (PAttrs.hasAttributes()) - attrVec.push_back( - AttributeList::get(FT->getContext(), i + 1, PAttrs)); + ArgAttrs.push_back(CallerPAL.getParamAttributes(i)); } } } AttributeSet FnAttrs = CallerPAL.getFnAttributes(); - if (CallerPAL.hasAttributes(AttributeList::FunctionIndex)) - attrVec.push_back(AttributeList::get(Callee->getContext(), - AttributeList::FunctionIndex, - AttrBuilder(FnAttrs))); if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttributeList &NewCallerPAL = - AttributeList::get(Callee->getContext(), attrVec); + assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) && + "missing argument attributes"); + LLVMContext &Ctx = Callee->getContext(); + AttributeList NewCallerPAL = AttributeList::get( + Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs); SmallVector OpBundles; CS.getOperandBundlesAsDefs(OpBundles); - Instruction *NC; + CallSite NewCS; if (InvokeInst *II = dyn_cast(Caller)) { - NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles); - NC->takeName(II); - cast(NC)->setCallingConv(II->getCallingConv()); - cast(NC)->setAttributes(NewCallerPAL); + NewCS = Builder->CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args, OpBundles); } else { - CallInst *CI = cast(Caller); - NC = Builder->CreateCall(Callee, Args, OpBundles); - NC->takeName(CI); - // Preserve the weight metadata for the new call instruction. The metadata - // is used by SamplePGO to check callsite's hotness. - uint64_t W; - if (CI->extractProfTotalWeight(W)) - NC->setProfWeight(W); - - cast(NC)->setTailCallKind(CI->getTailCallKind()); - cast(NC)->setCallingConv(CI->getCallingConv()); - cast(NC)->setAttributes(NewCallerPAL); + NewCS = Builder->CreateCall(Callee, Args, OpBundles); + cast(NewCS.getInstruction()) + ->setTailCallKind(cast(Caller)->getTailCallKind()); } + NewCS->takeName(Caller); + NewCS.setCallingConv(CS.getCallingConv()); + NewCS.setAttributes(NewCallerPAL); + + // Preserve the weight metadata for the new call instruction. The metadata + // is used by SamplePGO to check callsite's hotness. + uint64_t W; + if (Caller->extractProfTotalWeight(W)) + NewCS->setProfWeight(W); // Insert a cast of the return type as necessary. + Instruction *NC = NewCS.getInstruction(); Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { if (!NV->getType()->isVoidTy()) { @@ -4215,44 +4247,44 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, AttributeList NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { - unsigned NestIdx = 1; + unsigned NestArgNo = 0; Type *NestTy = nullptr; AttributeSet NestAttr; // Look for a parameter marked with the 'nest' attribute. for (FunctionType::param_iterator I = NestFTy->param_begin(), - E = NestFTy->param_end(); I != E; ++NestIdx, ++I) - if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { + E = NestFTy->param_end(); + I != E; ++NestArgNo, ++I) { + AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo); + if (AS.hasAttribute(Attribute::Nest)) { // Record the parameter type and any other attributes. NestTy = *I; - NestAttr = NestAttrs.getParamAttributes(NestIdx); + NestAttr = AS; break; } + } if (NestTy) { Instruction *Caller = CS.getInstruction(); std::vector NewArgs; - std::vector NewAttrs; + std::vector NewArgAttrs; NewArgs.reserve(CS.arg_size() + 1); - NewAttrs.reserve(CS.arg_size() + 2); + NewArgAttrs.reserve(CS.arg_size()); // Insert the nest argument into the call argument list, which may // mean appending it. Likewise for attributes. - // Add any result attributes. - NewAttrs.push_back(Attrs.getRetAttributes()); - { - unsigned Idx = 1; + unsigned ArgNo = 0; CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); do { - if (Idx == NestIdx) { + if (ArgNo == NestArgNo) { // Add the chain argument and attributes. Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); - NewAttrs.push_back(NestAttr); + NewArgAttrs.push_back(NestAttr); } if (I == E) @@ -4260,16 +4292,13 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original argument and attributes. NewArgs.push_back(*I); - NewAttrs.push_back(Attrs.getParamAttributes(Idx)); + NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); - ++Idx; + ++ArgNo; ++I; } while (true); } - // Add any function attributes. - NewAttrs.push_back(Attrs.getFnAttributes()); - // The trampoline may have been bitcast to a bogus type (FTy). // Handle this by synthesizing a new function type, equal to FTy // with the chain parameter inserted. @@ -4280,12 +4309,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Insert the chain's type into the list of parameter types, which may // mean appending it. { - unsigned Idx = 1; + unsigned ArgNo = 0; FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); do { - if (Idx == NestIdx) + if (ArgNo == NestArgNo) // Add the chain's type. NewTypes.push_back(NestTy); @@ -4295,7 +4324,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the original type. NewTypes.push_back(*I); - ++Idx; + ++ArgNo; ++I; } while (true); } @@ -4308,7 +4337,9 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, NestF->getType() == PointerType::getUnqual(NewFTy) ? NestF : ConstantExpr::getBitCast(NestF, PointerType::getUnqual(NewFTy)); - AttributeList NewPAL = AttributeList::get(FTy->getContext(), NewAttrs); + AttributeList NewPAL = + AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(), + Attrs.getRetAttributes(), NewArgAttrs); SmallVector OpBundles; CS.getOperandBundlesAsDefs(OpBundles); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index e08c301ccdd0c4dfbe91724fb1cd68e5dba22a04..38e95fb11639644e4b6efc2cf8c4468ee898d5f0 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,9 +14,10 @@ #include "InstCombineInternal.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -274,12 +275,12 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return NV; // If we are casting a PHI, then fold the cast into the PHI. - if (isa(Src)) { + if (auto *PN = dyn_cast(Src)) { // Don't do this if it would create a PHI node with an illegal type from a // legal type. if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() || shouldChangeType(CI.getType(), Src->getType())) - if (Instruction *NV = FoldOpIntoPhi(CI)) + if (Instruction *NV = foldOpIntoPhi(CI, PN)) return NV; } @@ -558,6 +559,9 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } + // FIXME: Maybe combine the next two transforms to handle the no cast case + // more efficiently. Support vector types. Cleanup code by using m_OneUse. + // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. Value *A = nullptr; ConstantInt *Cst = nullptr; if (Src->hasOneUse() && @@ -581,23 +585,35 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return CastInst::CreateIntegerCast(Shift, DestTy, false); } + // FIXME: We should canonicalize to zext/trunc and remove this transform. // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type // conversion. // It works because bits coming from sign extension have the same value as // the sign bit of the original value; performing ashr instead of lshr // generates bits of the same value as the sign bit. if (Src->hasOneUse() && - match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) && - cast(Src)->getOperand(0)->hasOneUse()) { + match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst)))) { + Value *SExt = cast(Src)->getOperand(0); + const unsigned SExtSize = SExt->getType()->getPrimitiveSizeInBits(); const unsigned ASize = A->getType()->getPrimitiveSizeInBits(); + const unsigned CISize = CI.getType()->getPrimitiveSizeInBits(); + const unsigned MaxAmt = SExtSize - std::max(CISize, ASize); + unsigned ShiftAmt = Cst->getZExtValue(); + // This optimization can be only performed when zero bits generated by // the original lshr aren't pulled into the value after truncation, so we - // can only shift by values smaller than the size of destination type (in - // bits). - if (Cst->getValue().ult(ASize)) { - Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue()); - Shift->takeName(Src); - return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + // can only shift by values no larger than the number of extension bits. + // FIXME: Instead of bailing when the shift is too large, use and to clear + // the extra bits. + if (ShiftAmt <= MaxAmt) { + if (CISize == ASize) + return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(), + std::min(ShiftAmt, ASize - 1))); + if (SExt->hasOneUse()) { + Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1)); + Shift->takeName(Src); + return CastInst::CreateIntegerCast(Shift, CI.getType(), true); + } } } @@ -645,7 +661,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // zext (x x>>u31 true if signbit set. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. - if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) || + if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) || (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { if (!DoTransform) return ICI; @@ -672,20 +688,18 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. - if ((Op1CV == 0 || Op1CV.isPowerOf2()) && + if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) && // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: - uint32_t BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI); + KnownBits Known = computeKnownBits(ICI->getOperand(0), 0, &CI); - APInt KnownZeroMask(~KnownZero); + APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? if (!DoTransform) return ICI; bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; - if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { + if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), @@ -703,7 +717,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, In->getName() + ".lobit"); } - if ((Op1CV != 0) == isNE) { // Toggle the low bit. + if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); In = Builder->CreateXor(In, One); } @@ -722,17 +736,14 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // may lead to additional simplifications. if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { if (IntegerType *ITy = dyn_cast(CI.getType())) { - uint32_t BitWidth = ITy->getBitWidth(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); - APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI); - computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI); + KnownBits KnownLHS = computeKnownBits(LHS, 0, &CI); + KnownBits KnownRHS = computeKnownBits(RHS, 0, &CI); - if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { - APInt KnownBits = KnownZeroLHS | KnownOneLHS; + if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) { + APInt KnownBits = KnownLHS.Zero | KnownLHS.One; APInt UnknownBit = ~KnownBits; if (UnknownBit.countPopulation() == 1) { if (!DoTransform) return ICI; @@ -740,7 +751,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Value *Result = Builder->CreateXor(LHS, RHS); // Mask off any bits that are set and won't be shifted away. - if (KnownOneLHS.uge(UnknownBit)) + if (KnownLHS.One.uge(UnknownBit)) Result = Builder->CreateAnd(Result, ConstantInt::get(ITy, UnknownBit)); @@ -1048,11 +1059,9 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { // the icmp and sext into bitwise/integer operations. if (ICI->hasOneUse() && ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ - unsigned BitWidth = Op1C->getType()->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI); + KnownBits Known = computeKnownBits(Op0, 0, &CI); - APInt KnownZeroMask(~KnownZero); + APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { Value *In = ICI->getOperand(0); @@ -1089,7 +1098,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { // Distribute the bit over the whole bit width. In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), - BitWidth - 1), "sext"); + KnownZeroMask.getBitWidth() - 1), "sext"); } if (CI.getType() == In->getType()) @@ -1180,9 +1189,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // If we know that the value being extended is positive, we can use a zext // instead. - bool KnownZero, KnownOne; - ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI); - if (KnownZero) { + KnownBits Known = computeKnownBits(Src, 0, &CI); + if (Known.isNonNegative()) { Value *ZExt = Builder->CreateZExt(Src, DestTy); return replaceInstUsesWith(CI, ZExt); } @@ -1591,7 +1599,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // GEP into CI would undo canonicalizing addrspacecast with different // pointer types, causing infinite loops. (!isa(CI) || - GEP->getType() == GEP->getPointerOperand()->getType())) { + GEP->getType() == GEP->getPointerOperandType())) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 2419d4f32885d6b59526e77c551a2694dfd78501..1ef4acfb058c4961c874d6bd1150bbf64a6e8864 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -126,7 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, switch (Pred) { case ICmpInst::ICMP_SLT: // True if LHS s< 0 TrueIfSigned = true; - return RHS == 0; + return RHS.isNullValue(); case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1 TrueIfSigned = true; return RHS.isAllOnesValue(); @@ -140,7 +141,7 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; - return RHS.isSignBit(); + return RHS.isSignMask(); default: return false; } @@ -154,10 +155,10 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { if (!ICmpInst::isSigned(Pred)) return false; - if (C == 0) + if (C.isNullValue()) return ICmpInst::isRelational(Pred); - if (C == 1) { + if (C.isOneValue()) { if (Pred == ICmpInst::ICMP_SLT) { Pred = ICmpInst::ICMP_SLE; return true; @@ -175,42 +176,40 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) { /// Given a signed integer type and a set of known zero and one bits, compute /// the maximum and minimum values that could have the specified known zero and /// known one bits, returning them in Min/Max. -static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, +/// TODO: Move to method on KnownBits struct? +static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known, APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && + assert(Known.getBitWidth() == Min.getBitWidth() && + Known.getBitWidth() == Max.getBitWidth() && "KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); + APInt UnknownBits = ~(Known.Zero|Known.One); // The minimum value is when all unknown bits are zeros, EXCEPT for the sign // bit if it is unknown. - Min = KnownOne; - Max = KnownOne|UnknownBits; + Min = Known.One; + Max = Known.One|UnknownBits; if (UnknownBits.isNegative()) { // Sign bit is unknown - Min.setBit(Min.getBitWidth()-1); - Max.clearBit(Max.getBitWidth()-1); + Min.setSignBit(); + Max.clearSignBit(); } } /// Given an unsigned integer type and a set of known zero and one bits, compute /// the maximum and minimum values that could have the specified known zero and /// known one bits, returning them in Min/Max. -static void computeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero, - const APInt &KnownOne, +/// TODO: Move to method on KnownBits struct? +static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known, APInt &Min, APInt &Max) { - assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() && - KnownZero.getBitWidth() == Min.getBitWidth() && - KnownZero.getBitWidth() == Max.getBitWidth() && + assert(Known.getBitWidth() == Min.getBitWidth() && + Known.getBitWidth() == Max.getBitWidth() && "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth."); - APInt UnknownBits = ~(KnownZero|KnownOne); + APInt UnknownBits = ~(Known.Zero|Known.One); // The minimum value is when the unknown bits are all zeros. - Min = KnownOne; + Min = Known.One; // The maximum value is when the unknown bits are all ones. - Max = KnownOne|UnknownBits; + Max = Known.One|UnknownBits; } /// This is called when we see this pattern: @@ -1194,7 +1193,7 @@ Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A, }; // Don't bother doing any work for cases which InstSimplify handles. - if (AP2 == 0) + if (AP2.isNullValue()) return nullptr; bool IsAShr = isa(I.getOperand(0)); @@ -1253,7 +1252,7 @@ Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A, }; // Don't bother doing any work for cases which InstSimplify handles. - if (AP2 == 0) + if (AP2.isNullValue()) return nullptr; unsigned AP2TrailingZeros = AP2.countTrailingZeros(); @@ -1400,12 +1399,12 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { } // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) - if (*C == 0 && Pred == ICmpInst::ICMP_SGT) { + if (C->isNullValue() && Pred == ICmpInst::ICMP_SGT) { SelectPatternResult SPR = matchSelectPattern(X, A, B); if (SPR.Flavor == SPF_SMIN) { - if (isKnownPositive(A, DL)) + if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, B, Cmp.getOperand(1)); - if (isKnownPositive(B, DL)) + if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, A, Cmp.getOperand(1)); } } @@ -1466,7 +1465,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, const APInt *C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *X = Trunc->getOperand(0); - if (*C == 1 && C->getBitWidth() > 1) { + if (C->isOneValue() && C->getBitWidth() > 1) { // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V)))) @@ -1479,14 +1478,13 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, // of the high bits truncated out of x are known. unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), SrcBits = X->getType()->getScalarSizeInBits(); - APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - computeKnownBits(X, KnownZero, KnownOne, 0, &Cmp); + KnownBits Known = computeKnownBits(X, 0, &Cmp); // If all the high bits are known, we can do this xform. - if ((KnownZero | KnownOne).countLeadingOnes() >= SrcBits - DstBits) { + if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) { // Pull in the high bits from known-ones set. APInt NewRHS = C->zext(SrcBits); - NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); + NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS)); } } @@ -1507,7 +1505,7 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, // If this is a comparison that tests the signbit (X < 0) or (x > -1), // fold the xor. ICmpInst::Predicate Pred = Cmp.getPredicate(); - if ((Pred == ICmpInst::ICMP_SLT && *C == 0) || + if ((Pred == ICmpInst::ICMP_SLT && C->isNullValue()) || (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue())) { // If the sign bit of the XorCst is not set, there is no change to @@ -1532,14 +1530,14 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, } if (Xor->hasOneUse()) { - // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit)) - if (!Cmp.isEquality() && XorC->isSignBit()) { + // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask)) + if (!Cmp.isEquality() && XorC->isSignMask()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC)); } - // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit)) + // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) if (!Cmp.isEquality() && XorC->isMaxSignedValue()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); @@ -1625,7 +1623,7 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is // preferable because it allows the C2 << Y expression to be hoisted out of a // loop if Y is invariant and X is not. - if (Shift->hasOneUse() && *C1 == 0 && Cmp.isEquality() && + if (Shift->hasOneUse() && C1->isNullValue() && Cmp.isEquality() && !Shift->isArithmeticShift() && !isa(Shift->getOperand(0))) { // Compute C2 << Y. Value *NewShift = @@ -1683,7 +1681,8 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, // (icmp pred (and A, (or (shl 1, B), 1), 0)) // // iff pred isn't signed - if (!Cmp.isSigned() && *C1 == 0 && match(And->getOperand(1), m_One())) { + if (!Cmp.isSigned() && C1->isNullValue() && + match(And->getOperand(1), m_One())) { Constant *One = cast(And->getOperand(1)); Value *Or = And->getOperand(0); Value *A, *B, *LShr; @@ -1766,7 +1765,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, // (X & C2) != 0 -> (trunc X) < 0 // iff C2 is a power of 2 and it masks the sign bit of a legal integer type. const APInt *C2; - if (And->hasOneUse() && *C == 0 && match(Y, m_APInt(C2))) { + if (And->hasOneUse() && C->isNullValue() && match(Y, m_APInt(C2))) { int32_t ExactLogBase2 = C2->exactLogBase2(); if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); @@ -1786,7 +1785,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, const APInt *C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); - if (*C == 1) { + if (C->isOneValue()) { // icmp slt signum(V) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V)))) @@ -1803,7 +1802,7 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1)); } - if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse()) + if (!Cmp.isEquality() || !C->isNullValue() || !Or->hasOneUse()) return nullptr; Value *P, *Q; @@ -2038,7 +2037,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0 Value *X = Shr->getOperand(0); CmpInst::Predicate Pred = Cmp.getPredicate(); - if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && *C == 0) + if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && + C->isNullValue()) return new ICmpInst(Pred, X, Cmp.getOperand(1)); const APInt *ShiftVal; @@ -2129,7 +2129,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, if (!match(UDiv->getOperand(0), m_APInt(C2))) return nullptr; - assert(C2 != 0 && "udiv 0, X should have been simplified already."); + assert(*C2 != 0 && "udiv 0, X should have been simplified already."); // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) Value *Y = UDiv->getOperand(1); @@ -2142,7 +2142,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) { - assert(C != 0 && "icmp ult X, 0 should have been simplified already."); + assert(*C != 0 && "icmp ult X, 0 should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_UGT, Y, ConstantInt::get(Y->getType(), C2->udiv(*C))); } @@ -2180,7 +2180,8 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, // INT_MIN will also fail if the divisor is 1. Although folds of all these // division-by-constant cases should be present, we can not assert that they // have happened before we reach this icmp instruction. - if (*C2 == 0 || *C2 == 1 || (DivIsSigned && C2->isAllOnesValue())) + if (C2->isNullValue() || C2->isOneValue() || + (DivIsSigned && C2->isAllOnesValue())) return nullptr; // TODO: We could do all of the computations below using APInt. @@ -2226,7 +2227,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false); } } else if (C2->isStrictlyPositive()) { // Divisor is > 0. - if (*C == 0) { // (X / pos) op 0 + if (C->isNullValue()) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); HiBound = RangeSize; @@ -2247,7 +2248,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, } else if (C2->isNegative()) { // Divisor is < 0. if (Div->isExact()) RangeSize = ConstantExpr::getNeg(RangeSize); - if (*C == 0) { // (X / neg) op 0 + if (C->isNullValue()) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) LoBound = AddOne(RangeSize); HiBound = ConstantExpr::getNeg(RangeSize); @@ -2339,15 +2340,15 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y) - if (Pred == ICmpInst::ICMP_SGT && *C == 0) + if (Pred == ICmpInst::ICMP_SGT && C->isNullValue()) return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y) - if (Pred == ICmpInst::ICMP_SLT && *C == 0) + if (Pred == ICmpInst::ICMP_SLT && C->isNullValue()) return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y) - if (Pred == ICmpInst::ICMP_SLT && *C == 1) + if (Pred == ICmpInst::ICMP_SLT && C->isOneValue()) return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); } @@ -2402,9 +2403,9 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, const APInt &Upper = CR.getUpper(); const APInt &Lower = CR.getLower(); if (Cmp.isSigned()) { - if (Lower.isSignBit()) + if (Lower.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper)); - if (Upper.isSignBit()) + if (Upper.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower)); } else { if (Lower.isMinValue()) @@ -2522,7 +2523,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, switch (BO->getOpcode()) { case Instruction::SRem: // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (*C == 0 && BO->hasOneUse()) { + if (C->isNullValue() && BO->hasOneUse()) { const APInt *BOC; if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName()); @@ -2539,7 +2540,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, Constant *SubC = ConstantExpr::getSub(RHS, cast(BOp1)); return new ICmpInst(Pred, BOp0, SubC); } - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. if (Value *NegVal = dyn_castNegVal(BOp1)) @@ -2560,7 +2561,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // For the xor case, we can xor two constants together, eliminating // the explicit xor. return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC)); - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((xor A, B) != 0) with (A != B) return new ICmpInst(Pred, BOp0, BOp1); } @@ -2573,7 +2574,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // Replace ((sub BOC, B) != C) with (B != BOC-C). Constant *SubC = ConstantExpr::getSub(cast(BOp0), RHS); return new ICmpInst(Pred, BOp1, SubC); - } else if (*C == 0) { + } else if (C->isNullValue()) { // Replace ((sub A, B) != 0) with (A != B). return new ICmpInst(Pred, BOp0, BOp1); } @@ -2604,14 +2605,14 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, break; // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->isSignBit()) { + if (BOC->isSignMask()) { Constant *Zero = Constant::getNullValue(BOp0->getType()); auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(NewPred, BOp0, Zero); } // ((X & ~7) == 0) --> X < 8 - if (*C == 0 && (~(*BOC) + 1).isPowerOf2()) { + if (C->isNullValue() && (~(*BOC) + 1).isPowerOf2()) { Constant *NegBOC = ConstantExpr::getNeg(cast(BOp1)); auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(NewPred, BOp0, NegBOC); @@ -2620,9 +2621,9 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, break; } case Instruction::Mul: - if (*C == 0 && BO->hasNoSignedWrap()) { + if (C->isNullValue() && BO->hasNoSignedWrap()) { const APInt *BOC; - if (match(BOp1, m_APInt(BOC)) && *BOC != 0) { + if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) { // The trivial case (mul X, 0) is handled by InstSimplify. // General case : (mul X, C) != 0 iff X != 0 // (mul X, C) == 0 iff X == 0 @@ -2631,7 +2632,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, } break; case Instruction::UDiv: - if (*C == 0) { + if (C->isNullValue()) { // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A) auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; return new ICmpInst(NewPred, BOp1, BOp0); @@ -2670,7 +2671,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != - bool IsZero = *C == 0; + bool IsZero = C->isNullValue(); if (IsZero || *C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); @@ -2709,7 +2710,7 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I)) + if (Instruction *NV = foldOpIntoPhi(I, cast(LHSI))) return NV; break; case Instruction::Select: { @@ -2786,6 +2787,9 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { } /// Try to fold icmp (binop), X or icmp X, (binop). +/// TODO: A large part of this logic is duplicated in InstSimplify's +/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code +/// duplication. Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -2795,7 +2799,7 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { if (!BO0 && !BO1) return nullptr; - CmpInst::Predicate Pred = I.getPredicate(); + const CmpInst::Predicate Pred = I.getPredicate(); bool NoOp0WrapProblem = false, NoOp1WrapProblem = false; if (BO0 && isa(BO0)) NoOp0WrapProblem = @@ -3028,56 +3032,69 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { break; case Instruction::Add: case Instruction::Sub: - case Instruction::Xor: + case Instruction::Xor: { if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b - return new ICmpInst(I.getPredicate(), BO0->getOperand(0), - BO1->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { - if (CI->getValue().isSignBit()) { - ICmpInst::Predicate Pred = + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + + const APInt *C; + if (match(BO0->getOperand(1), m_APInt(C))) { + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b + if (C->isSignMask()) { + ICmpInst::Predicate NewPred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); - return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); } - if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) { - ICmpInst::Predicate Pred = + // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b + if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) { + ICmpInst::Predicate NewPred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); - Pred = I.getSwappedPredicate(Pred); - return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + NewPred = I.getSwappedPredicate(NewPred); + return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); } } break; - case Instruction::Mul: + } + case Instruction::Mul: { if (!I.isEquality()) break; - if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get( - I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - AP.countTrailingZeros())); + const APInt *C; + if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() && + !C->isOneValue()) { + // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask) + // Mask = -1 >> count-trailing-zeros(C). + if (unsigned TZs = C->countTrailingZeros()) { + Constant *Mask = ConstantInt::get( + BO0->getType(), + APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); - return new ICmpInst(I.getPredicate(), And1, And2); + return new ICmpInst(Pred, And1, And2); } + // If there are no trailing zeros in the multiplier, just eliminate + // the multiplies (no masking is needed): + // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; + } case Instruction::UDiv: case Instruction::LShr: - if (I.isSigned()) + if (I.isSigned() || !BO0->isExact() || !BO1->isExact()) break; - LLVM_FALLTHROUGH; + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + case Instruction::SDiv: + if (!I.isEquality() || !BO0->isExact() || !BO1->isExact()) + break; + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + case Instruction::AShr: if (!BO0->isExact() || !BO1->isExact()) break; - return new ICmpInst(I.getPredicate(), BO0->getOperand(0), - BO1->getOperand(0)); + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); + case Instruction::Shl: { bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); @@ -3085,8 +3102,7 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { break; if (!NSW && I.isSigned()) break; - return new ICmpInst(I.getPredicate(), BO0->getOperand(0), - BO1->getOperand(0)); + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } } } @@ -3097,7 +3113,7 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { auto BitwiseAnd = m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value())); - if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) { + if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) { auto *Zero = Constant::getNullValue(BO0->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero); } @@ -3500,7 +3516,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, // We can strength reduce this signed add into a regular add if we can prove // that it will never overflow. if (OCF == OCF_SIGNED_ADD) - if (WillNotOverflowSignedAdd(LHS, RHS, OrigI)) + if (willNotOverflowSignedAdd(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), true); break; @@ -3513,11 +3529,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, return SetResult(LHS, Builder->getFalse(), false); if (OCF == OCF_SIGNED_SUB) { - if (WillNotOverflowSignedSub(LHS, RHS, OrigI)) + if (willNotOverflowSignedSub(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), true); } else { - if (WillNotOverflowUnsignedSub(LHS, RHS, OrigI)) + if (willNotOverflowUnsignedSub(LHS, RHS, OrigI)) return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), true); } @@ -3547,7 +3563,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, return SetResult(LHS, Builder->getFalse(), false); if (OCF == OCF_SIGNED_MUL) - if (WillNotOverflowSignedMul(LHS, RHS, OrigI)) + if (willNotOverflowSignedMul(LHS, RHS, OrigI)) return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), true); break; @@ -3797,7 +3813,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth, bool isSignCheck) { if (isSignCheck) - return APInt::getSignBit(BitWidth); + return APInt::getSignMask(BitWidth); ConstantInt *CI = dyn_cast(I.getOperand(1)); if (!CI) return APInt::getAllOnesValue(BitWidth); @@ -3810,16 +3826,14 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth, // greater than the RHS must differ in a bit higher than these due to carry. case ICmpInst::ICMP_UGT: { unsigned trailingOnes = RHS.countTrailingOnes(); - APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes); - return ~lowBitsSet; + return APInt::getBitsSetFrom(BitWidth, trailingOnes); } // Similarly, for a ULT comparison, we don't care about the trailing zeros. // Any value less than the RHS must differ in a higher bit because of carries. case ICmpInst::ICMP_ULT: { unsigned trailingZeros = RHS.countTrailingZeros(); - APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros); - return ~lowBitsSet; + return APInt::getBitsSetFrom(BitWidth, trailingZeros); } default: @@ -4003,16 +4017,16 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { IsSignBit = isSignBitCheck(Pred, *CmpC, UnusedBit); } - APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0); - APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0); + KnownBits Op0Known(BitWidth); + KnownBits Op1Known(BitWidth); if (SimplifyDemandedBits(&I, 0, getDemandedBitsLHSMask(I, BitWidth, IsSignBit), - Op0KnownZero, Op0KnownOne, 0)) + Op0Known, 0)) return &I; if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth), - Op1KnownZero, Op1KnownOne, 0)) + Op1Known, 0)) return &I; // Given the known and unknown bits, compute a range that the LHS could be @@ -4021,15 +4035,11 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0); APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0); if (I.isSigned()) { - computeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min, - Op0Max); - computeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min, - Op1Max); + computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); + computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); } else { - computeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min, - Op0Max); - computeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min, - Op1Max); + computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max); + computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max); } // If Min and Max are known to be the same, then SimplifyDemandedBits @@ -4056,8 +4066,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // If all bits are known zero except for one, then we know at most one bit // is set. If the comparison is against zero, then this is a check to see if // *that* bit is set. - APInt Op0KnownZeroInverted = ~Op0KnownZero; - if (~Op1KnownZero == 0) { + APInt Op0KnownZeroInverted = ~Op0Known.Zero; + if (Op1Known.isZero()) { // If the LHS is an AND with the same constant, look through it. Value *LHS = nullptr; const APInt *LHSC; @@ -4087,7 +4097,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // Check if the LHS is 8 >>u x and the result is a power of 2 like 1. const APInt *CI; - if (Op0KnownZeroInverted == 1 && + if (Op0KnownZeroInverted.isOneValue() && match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) { // ((8 >>u X) & 1) == 0 -> X != 3 // ((8 >>u X) & 1) != 0 -> X == 3 @@ -4195,8 +4205,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { // Turn a signed comparison into an unsigned one if both operands are known to // have the same sign. if (I.isSigned() && - ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) || - (Op0KnownOne.isNegative() && Op1KnownOne.isNegative()))) + ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) || + (Op0Known.One.isNegative() && Op1Known.One.isNegative()))) return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1); return nullptr; @@ -4260,6 +4270,80 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne)); } +/// Integer compare with boolean values can always be turned into bitwise ops. +static Instruction *canonicalizeICmpBool(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + Value *A = I.getOperand(0), *B = I.getOperand(1); + assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only"); + + // A boolean compared to true/false can be simplified to Op0/true/false in + // 14 out of the 20 (10 predicates * 2 constants) possible combinations. + // Cases not handled by InstSimplify are always 'not' of Op0. + if (match(B, m_Zero())) { + switch (I.getPredicate()) { + case CmpInst::ICMP_EQ: // A == 0 -> !A + case CmpInst::ICMP_ULE: // A <=u 0 -> !A + case CmpInst::ICMP_SGE: // A >=s 0 -> !A + return BinaryOperator::CreateNot(A); + default: + llvm_unreachable("ICmp i1 X, C not simplified as expected."); + } + } else if (match(B, m_One())) { + switch (I.getPredicate()) { + case CmpInst::ICMP_NE: // A != 1 -> !A + case CmpInst::ICMP_ULT: // A !A + case CmpInst::ICMP_SGT: // A >s -1 -> !A + return BinaryOperator::CreateNot(A); + default: + llvm_unreachable("ICmp i1 X, C not simplified as expected."); + } + } + + switch (I.getPredicate()) { + default: + llvm_unreachable("Invalid icmp instruction!"); + case ICmpInst::ICMP_EQ: + // icmp eq i1 A, B -> ~(A ^ B) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + + case ICmpInst::ICMP_NE: + // icmp ne i1 A, B -> A ^ B + return BinaryOperator::CreateXor(A, B); + + case ICmpInst::ICMP_UGT: + // icmp ugt -> icmp ult + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULT: + // icmp ult i1 A, B -> ~A & B + return BinaryOperator::CreateAnd(Builder.CreateNot(A), B); + + case ICmpInst::ICMP_SGT: + // icmp sgt -> icmp slt + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLT: + // icmp slt i1 A, B -> A & ~B + return BinaryOperator::CreateAnd(Builder.CreateNot(B), A); + + case ICmpInst::ICMP_UGE: + // icmp uge -> icmp ule + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: + // icmp ule i1 A, B -> ~A | B + return BinaryOperator::CreateOr(Builder.CreateNot(A), B); + + case ICmpInst::ICMP_SGE: + // icmp sge -> icmp sle + std::swap(A, B); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: + // icmp sle i1 A, B -> A | ~B + return BinaryOperator::CreateOr(Builder.CreateNot(B), A); + } +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -4276,8 +4360,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Changed = true; } - if (Value *V = - SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I)) + if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // comparing -val or val with non-zero is the same as just comparing val @@ -4297,49 +4381,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } - Type *Ty = Op0->getType(); - - // icmp's with boolean values can always be turned into bitwise operations - if (Ty->getScalarType()->isIntegerTy(1)) { - switch (I.getPredicate()) { - default: llvm_unreachable("Invalid icmp instruction!"); - case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Value *Xor = Builder->CreateXor(Op0, Op1, I.getName() + "tmp"); - return BinaryOperator::CreateNot(Xor); - } - case ICmpInst::ICMP_NE: // icmp ne i1 A, B -> A^B - return BinaryOperator::CreateXor(Op0, Op1); - - case ICmpInst::ICMP_UGT: - std::swap(Op0, Op1); // Change icmp ugt -> icmp ult - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp"); - return BinaryOperator::CreateAnd(Not, Op1); - } - case ICmpInst::ICMP_SGT: - std::swap(Op0, Op1); // Change icmp sgt -> icmp slt - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp"); - return BinaryOperator::CreateAnd(Not, Op0); - } - case ICmpInst::ICMP_UGE: - std::swap(Op0, Op1); // Change icmp uge -> icmp ule - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp"); - return BinaryOperator::CreateOr(Not, Op1); - } - case ICmpInst::ICMP_SGE: - std::swap(Op0, Op1); // Change icmp sge -> icmp sle - LLVM_FALLTHROUGH; - case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp"); - return BinaryOperator::CreateOr(Not, Op0); - } - } - } + if (Op0->getType()->getScalarType()->isIntegerTy(1)) + if (Instruction *Res = canonicalizeICmpBool(I, *Builder)) + return Res; if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I)) return NewICmp; @@ -4463,18 +4507,21 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality()) + isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); - // ~x < ~y --> y < x - // ~x < cst --> ~cst < x + // ~X < ~Y --> Y < X + // ~X < C --> X > ~C if (match(Op0, m_Not(m_Value(A)))) { if (match(Op1, m_Not(m_Value(B)))) return new ICmpInst(I.getPredicate(), B, A); - if (ConstantInt *RHSC = dyn_cast(Op1)) - return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); + + const APInt *C; + if (match(Op1, m_APInt(C))) + return new ICmpInst(I.getSwappedPredicate(), A, + ConstantInt::get(Op1->getType(), ~(*C))); } Instruction *AddI = nullptr; @@ -4785,8 +4832,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, - I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I)) + if (Value *V = + SimplifyFCmpInst(I.getPredicate(), Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Simplify 'fcmp pred X, X' @@ -4875,7 +4923,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I)) + if (Instruction *NV = foldOpIntoPhi(I, cast(LHSI))) return NV; break; case Instruction::SIToFP: diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index ab15fd84b24a0688feef64b7f8b487d02f36d360..1a7db146df426e2e5a9327c3b05ad23e6fa6808e 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -17,9 +17,12 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -27,10 +30,9 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/IR/DIBuilder.h" #define DEBUG_TYPE "instcombine" @@ -72,6 +74,27 @@ static inline unsigned getComplexity(Value *V) { return isa(V) ? (isa(V) ? 0 : 1) : 2; } +/// Predicate canonicalization reduces the number of patterns that need to be +/// matched by other transforms. For example, we may swap the operands of a +/// conditional branch or select to create a compare with a canonical (inverted) +/// predicate which is then more likely to be matched with other values. +static inline bool isCanonicalPredicate(CmpInst::Predicate Pred) { + switch (Pred) { + case CmpInst::ICMP_NE: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + // TODO: There are 16 FCMP predicates. Should others be (not) canonical? + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_OGE: + return false; + default: + return true; + } +} + /// \brief Add one to a Constant static inline Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); @@ -193,7 +216,7 @@ private: TargetLibraryInfo &TLI; DominatorTree &DT; const DataLayout &DL; - + const SimplifyQuery SQ; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. LoopInfo *LI; @@ -203,11 +226,11 @@ private: public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, - AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) + AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, + const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), LI(LI), MadeIRChange(false) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// @@ -252,11 +275,7 @@ public: Instruction *visitSDiv(BinaryOperator &I); Instruction *visitFDiv(BinaryOperator &I); Value *simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, bool Inverted); - Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS); - Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); - Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI); - Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C); Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A, @@ -387,10 +406,27 @@ private: bool DoTransform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); - bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowUnsignedSub(Value *LHS, Value *RHS, Instruction &CxtI); - bool WillNotOverflowSignedMul(Value *LHS, Value *RHS, Instruction &CxtI); + bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { + return computeOverflowForSignedAdd(LHS, RHS, &CxtI) == + OverflowResult::NeverOverflows; + }; + bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { + return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) == + OverflowResult::NeverOverflows; + }; + bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowUnsignedSub(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const; + bool willNotOverflowUnsignedMul(const Value *LHS, const Value *RHS, + const Instruction &CxtI) const { + return computeOverflowForUnsignedMul(LHS, RHS, &CxtI) == + OverflowResult::NeverOverflows; + }; Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); @@ -411,6 +447,14 @@ private: Instruction::CastOps isEliminableCastPair(const CastInst *CI1, const CastInst *CI2); + Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); + Value *foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); + Value *foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); + Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); + + Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, + bool JoinedByAnd, Instruction &CxtI); public: /// \brief Inserts an instruction \p New before instruction \p Old /// @@ -489,33 +533,44 @@ public: return nullptr; // Don't do anything with FI } - void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, - unsigned Depth, Instruction *CxtI) const { - return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI, - &DT); + void computeKnownBits(const Value *V, KnownBits &Known, + unsigned Depth, const Instruction *CxtI) const { + llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); + } + KnownBits computeKnownBits(const Value *V, unsigned Depth, + const Instruction *CxtI) const { + return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); + } + + bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false, + unsigned Depth = 0, + const Instruction *CxtI = nullptr) { + return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT); } - bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0, - Instruction *CxtI = nullptr) const { + bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); } - unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0, - Instruction *CxtI = nullptr) const { + unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } - void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, - unsigned Depth = 0, Instruction *CxtI = nullptr) const { - return llvm::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI, - &DT); - } - OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS, - const Instruction *CxtI) { + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); } - OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS, - const Instruction *CxtI) { + OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); } + OverflowResult computeOverflowForSignedAdd(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); + } /// Maximum size of array considered when transforming. uint64_t MaxArraySizeForCombine; @@ -534,19 +589,31 @@ private: /// value, or null if it didn't simplify. Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); + /// This tries to simplify binary operations by factorizing out common terms + /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). + Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &, + Instruction::BinaryOps, Value *, Value *, Value *, + Value *); + /// \brief Attempts to replace V with a simpler value based on the demanded /// bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth, - Instruction *CxtI); + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known, + unsigned Depth, Instruction *CxtI); bool SimplifyDemandedBits(Instruction *I, unsigned Op, - const APInt &DemandedMask, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0); + const APInt &DemandedMask, KnownBits &Known, + unsigned Depth = 0); + /// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne + /// bits. It also tries to handle simplifications that can be done based on + /// DemandedMask, but without modifying the Instruction. + Value *SimplifyMultipleUseDemandedBits(Instruction *I, + const APInt &DemandedMask, + KnownBits &Known, + unsigned Depth, Instruction *CxtI); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. - Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, - const APInt &DemandedMask, APInt &KnownZero, - APInt &KnownOne); + Value *simplifyShrShlDemandedBits( + Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, + const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known); /// \brief Tries to simplify operands to an integer instruction based on its /// demanded bits. @@ -562,7 +629,7 @@ private: /// Given a binary operator, cast instruction, or select which has a PHI node /// as operand #0, see if we can fold the instruction into the PHI (which is /// only possible if all operands to the PHI are constants). - Instruction *FoldOpIntoPhi(Instruction &I); + Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN); /// Given an instruction with a select as one operand and a constant as the /// other operand, try to fold the binary operator into the select arguments. @@ -659,7 +726,8 @@ private: Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); - Instruction *SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI); + Instruction * + SimplifyElementUnorderedAtomicMemCpy(ElementUnorderedAtomicMemCpyInst *AMI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 6288e054f1bc57c9f285f04ac89a416d9bb9a761..a4d84ae81aa02489d3ffaf96808252d2cc96f722 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -885,10 +885,8 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, // first non-zero index. auto IsAllNonNegative = [&]() { for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) { - bool KnownNonNegative, KnownNegative; - IC.ComputeSignBit(GEPI->getOperand(i), KnownNonNegative, - KnownNegative, 0, MemI); - if (KnownNonNegative) + KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI); + if (Known.isNonNegative()) continue; return false; } @@ -931,6 +929,18 @@ static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, return nullptr; } +static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) { + if (GetElementPtrInst *GEPI = dyn_cast(Op)) { + const Value *GEPI0 = GEPI->getOperand(0); + if (isa(GEPI0) && GEPI->getPointerAddressSpace() == 0) + return true; + } + if (isa(Op) || + (isa(Op) && LI.getPointerAddressSpace() == 0)) + return true; + return false; +} + Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); @@ -979,27 +989,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (!LI.isUnordered()) return nullptr; // load(gep null, ...) -> unreachable - if (GetElementPtrInst *GEPI = dyn_cast(Op)) { - const Value *GEPI0 = GEPI->getOperand(0); - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(GEPI0) && GEPI->getPointerAddressSpace() == 0){ - // Insert a new store to null instruction before the load to indicate - // that this code is not reachable. We do this instead of inserting - // an unreachable instruction directly because we cannot modify the - // CFG. - new StoreInst(UndefValue::get(LI.getType()), - Constant::getNullValue(Op->getType()), &LI); - return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); - } - } - // load null/undef -> unreachable - // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(Op) || - (isa(Op) && LI.getPointerAddressSpace() == 0)) { - // Insert a new store to null instruction before the load to indicate that - // this code is not reachable. We do this instead of inserting an - // unreachable instruction directly because we cannot modify the CFG. + // TODO: Consider a target hook for valid address spaces for this xforms. + if (canSimplifyNullLoadOrGEP(LI, Op)) { + // Insert a new store to null instruction before the load to indicate + // that this code is not reachable. We do this instead of inserting + // an unreachable instruction directly because we cannot modify the + // CFG. new StoreInst(UndefValue::get(LI.getType()), Constant::getNullValue(Op->getType()), &LI); return replaceInstUsesWith(LI, UndefValue::get(LI.getType())); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index a238f3f05056689bf393a84c6284bcae3dd10f38..365c4ba75154a74fc2226a6161d04d6f8f69c3be 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -47,9 +47,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, // inexact. Similarly for <<. BinaryOperator *I = dyn_cast(V); if (I && I->isLogicalShift() && - isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0, - &IC.getAssumptionCache(), &CxtI, - &IC.getDominatorTree())) { + IC.isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, &CxtI)) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) { @@ -132,8 +130,9 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { /// \brief Return true if we can prove that: /// (mul LHS, RHS) === (mul nsw LHS, RHS) -bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, - Instruction &CxtI) { +bool InstCombiner::willNotOverflowSignedMul(const Value *LHS, + const Value *RHS, + const Instruction &CxtI) const { // Multiplying n * m significant bits yields a result of n + m significant // bits. If the total number of significant bits does not exceed the // result bit width (minus 1), there is no overflow. @@ -162,11 +161,9 @@ bool InstCombiner::WillNotOverflowSignedMul(Value *LHS, Value *RHS, // product is exactly the minimum negative number. // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000 // For simplicity we just check if at least one side is not negative. - bool LHSNonNegative, LHSNegative; - bool RHSNonNegative, RHSNegative; - ComputeSignBit(LHS, LHSNonNegative, LHSNegative, /*Depth=*/0, &CxtI); - ComputeSignBit(RHS, RHSNonNegative, RHSNegative, /*Depth=*/0, &CxtI); - if (LHSNonNegative || RHSNonNegative) + KnownBits LHSKnown = computeKnownBits(LHS, /*Depth=*/0, &CxtI); + KnownBits RHSKnown = computeKnownBits(RHS, /*Depth=*/0, &CxtI); + if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) return true; } return false; @@ -179,7 +176,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyMulInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -386,7 +383,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType()); if (ConstantExpr::getSExt(CI, I.getType()) == Op1C && - WillNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { + willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); @@ -403,7 +400,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Op0Conv->getOperand(0)->getType() == Op1Conv->getOperand(0)->getType() && (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) && - WillNotOverflowSignedMul(Op0Conv->getOperand(0), + willNotOverflowSignedMul(Op0Conv->getOperand(0), Op1Conv->getOperand(0), I)) { // Insert the new integer mul. Value *NewMul = Builder->CreateNSWMul( @@ -422,8 +419,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Constant *CI = ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType()); if (ConstantExpr::getZExt(CI, I.getType()) == Op1C && - computeOverflowForUnsignedMul(Op0Conv->getOperand(0), CI, &I) == - OverflowResult::NeverOverflows) { + willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = Builder->CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); @@ -440,9 +436,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (Op0Conv->getOperand(0)->getType() == Op1Conv->getOperand(0)->getType() && (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) && - computeOverflowForUnsignedMul(Op0Conv->getOperand(0), - Op1Conv->getOperand(0), - &I) == OverflowResult::NeverOverflows) { + willNotOverflowUnsignedMul(Op0Conv->getOperand(0), + Op1Conv->getOperand(0), I)) { // Insert the new integer mul. Value *NewMul = Builder->CreateNUWMul( Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); @@ -451,14 +446,12 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) { + if (!I.hasNoSignedWrap() && willNotOverflowSignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoSignedWrap(true); } - if (!I.hasNoUnsignedWrap() && - computeOverflowForUnsignedMul(Op0, Op1, &I) == - OverflowResult::NeverOverflows) { + if (!I.hasNoUnsignedWrap() && willNotOverflowUnsignedMul(Op0, Op1, I)) { Changed = true; I.setHasNoUnsignedWrap(true); } @@ -606,8 +599,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (isa(Op0)) std::swap(Op0, Op1); - if (Value *V = - SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); bool AllowReassociate = I.hasUnsafeAlgebra(); @@ -938,28 +931,27 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } - if (*C2 != 0) // avoid X udiv 0 + if (!C2->isNullValue()) // avoid X udiv 0 if (Instruction *FoldedDiv = foldOpWithConstantIntoOperand(I)) return FoldedDiv; } } - if (ConstantInt *One = dyn_cast(Op0)) { - if (One->isOne() && !I.getType()->isIntegerTy(1)) { - bool isSigned = I.getOpcode() == Instruction::SDiv; - if (isSigned) { - // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the - // result is one, if Op1 is -1 then the result is minus one, otherwise - // it's zero. - Value *Inc = Builder->CreateAdd(Op1, One); - Value *Cmp = Builder->CreateICmpULT( - Inc, ConstantInt::get(I.getType(), 3)); - return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); - } else { - // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the - // result is one, otherwise it's zero. - return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType()); - } + if (match(Op0, m_One())) { + assert(!I.getType()->getScalarType()->isIntegerTy(1) && + "i1 divide not removed?"); + if (I.getOpcode() == Instruction::SDiv) { + // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the + // result is one, if Op1 is -1 then the result is minus one, otherwise + // it's zero. + Value *Inc = Builder->CreateAdd(Op1, Op0); + Value *Cmp = Builder->CreateICmpULT( + Inc, ConstantInt::get(I.getType(), 3)); + return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); + } else { + // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the + // result is one, otherwise it's zero. + return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType()); } } @@ -1112,7 +1104,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyUDivInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1185,7 +1177,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySDivInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer div common cases @@ -1238,25 +1230,23 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - if (I.getType()->isIntegerTy()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op0, Mask, 0, &I)) { - if (MaskedValueIsZero(Op1, Mask, 0, &I)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - BO->setIsExact(I.isExact()); - return BO; - } + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); + if (MaskedValueIsZero(Op0, Mask, 0, &I)) { + if (MaskedValueIsZero(Op1, Mask, 0, &I)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + BO->setIsExact(I.isExact()); + return BO; + } - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { - // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) - // Safe because the only negative value (1 << Y) can take on is - // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have - // the sign bit set. - auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); - BO->setIsExact(I.isExact()); - return BO; - } + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + BO->setIsExact(I.isExact()); + return BO; } } @@ -1300,7 +1290,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT, &AC)) + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (isa(Op0)) @@ -1455,16 +1445,16 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { if (SelectInst *SI = dyn_cast(Op0I)) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - } else if (isa(Op0I)) { + } else if (auto *PN = dyn_cast(Op0I)) { using namespace llvm::PatternMatch; const APInt *Op1Int; if (match(Op1, m_APInt(Op1Int)) && !Op1Int->isMinValue() && (I.getOpcode() == Instruction::URem || !Op1Int->isMinSignedValue())) { - // FoldOpIntoPhi will speculate instructions to the end of the PHI's + // foldOpIntoPhi will speculate instructions to the end of the PHI's // predecessor blocks, so do this only if we know the srem or urem // will not fault. - if (Instruction *NV = FoldOpIntoPhi(I)) + if (Instruction *NV = foldOpIntoPhi(I, PN)) return NV; } } @@ -1484,7 +1474,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyURemInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *common = commonIRemTransforms(I)) @@ -1497,7 +1487,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); @@ -1527,7 +1517,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySRemInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle the integer rem common cases @@ -1546,13 +1536,11 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - if (I.getType()->isIntegerTy()) { - APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask, 0, &I) && - MaskedValueIsZero(Op0, Mask, 0, &I)) { - // X srem Y -> X urem Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateURem(Op0, Op1, I.getName()); - } + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); + if (MaskedValueIsZero(Op1, Mask, 0, &I) && + MaskedValueIsZero(Op0, Mask, 0, &I)) { + // X srem Y -> X urem Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateURem(Op0, Op1, I.getName()); } // If it's a constant vector, flip any negative values positive. @@ -1603,7 +1591,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { return replaceInstUsesWith(I, V); if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), - DL, &TLI, &DT, &AC)) + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); // Handle cases involving: rem X, (select Cond, Y, Z) diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index d8574175307b3cf0614af12cc28160dcaad0492a..5dbf1e85b05b99a818d6767e831085997907980a 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -16,9 +16,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/IR/DebugInfo.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -457,8 +457,8 @@ Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) { } // The more common cases of a phi with no constant operands or just one - // variable operand are handled by FoldPHIArgOpIntoPHI() and FoldOpIntoPhi() - // respectively. FoldOpIntoPhi() wants to do the opposite transform that is + // variable operand are handled by FoldPHIArgOpIntoPHI() and foldOpIntoPhi() + // respectively. foldOpIntoPhi() wants to do the opposite transform that is // performed here. It tries to replicate a cast in the phi operand's basic // block to expose other folding opportunities. Thus, InstCombine will // infinite loop without this check. @@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyInstruction(&PN, SQ.getWithInstruction(&PN))) return replaceInstUsesWith(PN, V); if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 84dace5db760073dfbc439bc8f38cd1db458cf20..b9674d85634dc86bf7af2fc059e42a1bb758f6ca 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace PatternMatch; @@ -618,7 +619,7 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, { unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType()->getScalarType()); - APInt MinSignedValue = APInt::getSignBit(BitWidth); + APInt MinSignedValue = APInt::getSignedMinValue(BitWidth); Value *X; const APInt *Y, *C; bool TrueWhenUnset; @@ -1053,8 +1054,10 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) { // If the select condition element is false, choose from the 2nd vector. Mask.push_back(ConstantInt::get(Int32Ty, i + NumElts)); } else if (isa(Elt)) { - // If the select condition element is undef, the shuffle mask is undef. - Mask.push_back(UndefValue::get(Int32Ty)); + // Undef in a select condition (choose one of the operands) does not mean + // the same thing as undef in a shuffle mask (any value is acceptable), so + // give up. + return nullptr; } else { // Bail out on a constant expression. return nullptr; @@ -1118,8 +1121,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *FalseVal = SI.getFalseValue(); Type *SelType = SI.getType(); - if (Value *V = - SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, + SQ.getWithInstruction(&SI))) return replaceInstUsesWith(SI, V); if (Instruction *I = canonicalizeSelectToShuffle(SI)) @@ -1382,11 +1385,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // See if we can fold the select into a phi node if the condition is a select. - if (isa(SI.getCondition())) + if (auto *PN = dyn_cast(SI.getCondition())) // The true/false values have to be live in the PHI predecessor's blocks. if (canSelectOperandBeMappingIntoPredBlock(TrueVal, SI) && canSelectOperandBeMappingIntoPredBlock(FalseVal, SI)) - if (Instruction *NV = FoldOpIntoPhi(SI)) + if (Instruction *NV = foldOpIntoPhi(SI, PN)) return NV; if (SelectInst *TrueSI = dyn_cast(TrueVal)) { @@ -1474,11 +1477,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // The motivation for this call into value tracking is to take advantage of // the assumption cache, so make sure that is populated. if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) { - APInt KnownOne(1, 0), KnownZero(1, 0); - computeKnownBits(CondVal, KnownZero, KnownOne, 0, &SI); - if (KnownOne == 1) + KnownBits Known(1); + computeKnownBits(CondVal, Known, 0, &SI); + if (Known.One.isOneValue()) return replaceInstUsesWith(SI, TrueVal); - if (KnownZero == 1) + if (Known.Zero.isOneValue()) return replaceInstUsesWith(SI, FalseVal); } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 9aa679c60e47b358d6a3f28ffecc636707f7e9a4..8cec865c6422a0d7398a3183d26aa0eb34ded7ad 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -44,7 +44,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *A; Constant *C; if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C)))) - if (isKnownNonNegative(A, DL) && isKnownNonNegative(C, DL)) + if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) && + isKnownNonNegative(C, DL, 0, &AC, &I, &DT)) return BinaryOperator::Create( I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A); @@ -370,7 +371,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, MaskV <<= Op1C->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1C->getZExtValue()); + MaskV.lshrInPlace(Op1C->getZExtValue()); } // shift1 & 0x00FF @@ -519,8 +520,9 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) + if (Value *V = + SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), + SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *V = commonShiftTransforms(I)) @@ -618,7 +620,8 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) + if (Value *V = + SimplifyLShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) @@ -679,6 +682,31 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask)); } + if (match(Op0, m_SExt(m_Value(X))) && + (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) { + // Are we moving the sign bit to the low bit and widening with high zeros? + unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits(); + if (ShAmt == BitWidth - 1) { + // lshr (sext i1 X to iN), N-1 --> zext X to iN + if (SrcTyBitWidth == 1) + return new ZExtInst(X, Ty); + + // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN + if (Op0->hasOneUse()) { + Value *NewLShr = Builder->CreateLShr(X, SrcTyBitWidth - 1); + return new ZExtInst(NewLShr, Ty); + } + } + + // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN + if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) { + // The new shift amount can't be more than the narrow source type. + unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1); + Value *AShr = Builder->CreateAShr(X, NewShAmt); + return new ZExtInst(AShr, Ty); + } + } + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) { unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); // Oversized shifts are simplified to zero in InstSimplify. @@ -702,7 +730,8 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { return replaceInstUsesWith(I, V); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) + if (Value *V = + SimplifyAShrInst(Op0, Op1, I.isExact(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); if (Instruction *R = commonShiftTransforms(I)) @@ -760,7 +789,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { } // See if we can turn a signed shr into an unsigned shr. - if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I)) + if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I)) return BinaryOperator::CreateLShr(Op0, Op1); return nullptr; diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 934fcfe78cb3058f626350411ef5b1ad53c90901..03841164b58dee7b8ddf3fdccd5cda7cb0380f22 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -26,7 +27,7 @@ using namespace llvm::PatternMatch; /// constant integer. If so, check to see if there are any bits set in the /// constant that are not demanded. If so, shrink the constant and return true. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded) { + const APInt &Demanded) { assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -37,13 +38,11 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded = Demanded.zextOrTrunc(C->getBitWidth()); - if ((~Demanded & *C) == 0) + if (C->isSubsetOf(Demanded)) return false; // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= *C; - I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded)); + I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded)); return true; } @@ -54,10 +53,10 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, /// the instruction has any properties that allow us to simplify its operands. bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { unsigned BitWidth = Inst.getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + KnownBits Known(BitWidth); APInt DemandedMask(APInt::getAllOnesValue(BitWidth)); - Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, + Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known, 0, &Inst); if (!V) return false; if (V == &Inst) return true; @@ -70,11 +69,11 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { /// change and false otherwise. bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, - APInt &KnownZero, APInt &KnownOne, + KnownBits &Known, unsigned Depth) { Use &U = I->getOperandUse(OpNo); - Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, - KnownOne, Depth, I); + Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known, + Depth, I); if (!NewVal) return false; U = NewVal; return true; @@ -88,15 +87,16 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, /// with a constant or one of its operands. In such cases, this function does /// the replacement and returns true. In all other cases, it returns false after /// analyzing the expression and setting KnownOne and known to be one in the -/// expression. KnownZero contains all the bits that are known to be zero in the -/// expression. These are provided to potentially allow the caller (which might -/// recursively be SimplifyDemandedBits itself) to simplify the expression. -/// KnownOne and KnownZero always follow the invariant that: -/// KnownOne & KnownZero == 0. -/// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and -/// KnownZero may only be accurate for those bits set in DemandedMask. Note also -/// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the -/// same. +/// expression. Known.Zero contains all the bits that are known to be zero in +/// the expression. These are provided to potentially allow the caller (which +/// might recursively be SimplifyDemandedBits itself) to simplify the +/// expression. +/// Known.One and Known.Zero always follow the invariant that: +/// Known.One & Known.Zero == 0. +/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and +/// Known.Zero may only be accurate for those bits set in DemandedMask. Note +/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all +/// be the same. /// /// This returns null if it did not change anything and it permits no /// simplification. This returns V itself if it did some simplification of V's @@ -104,8 +104,7 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo, /// some other non-null value if it found out that V is equal to another value /// in the context where the specified bits are demanded, but not for all users. Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth, + KnownBits &Known, unsigned Depth, Instruction *CxtI) { assert(V != nullptr && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); @@ -113,122 +112,34 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Type *VTy = V->getType(); assert( (!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) && - KnownZero.getBitWidth() == BitWidth && - KnownOne.getBitWidth() == BitWidth && - "Value *V, DemandedMask, KnownZero and KnownOne " - "must have same BitWidth"); - const APInt *C; - if (match(V, m_APInt(C))) { - // We know all of the bits for a scalar constant or a splat vector constant! - KnownOne = *C & DemandedMask; - KnownZero = ~KnownOne & DemandedMask; - return nullptr; - } - if (isa(V)) { - // We know all of the bits for a constant! - KnownOne.clearAllBits(); - KnownZero = DemandedMask; + Known.getBitWidth() == BitWidth && + "Value *V, DemandedMask and Known must have same BitWidth"); + + if (isa(V)) { + computeKnownBits(V, Known, Depth, CxtI); return nullptr; } - KnownZero.clearAllBits(); - KnownOne.clearAllBits(); - if (DemandedMask == 0) { // Not demanding any bits from V. - if (isa(V)) - return nullptr; + Known.resetAll(); + if (DemandedMask.isNullValue()) // Not demanding any bits from V. return UndefValue::get(VTy); - } if (Depth == 6) // Limit search depth. return nullptr; - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); - Instruction *I = dyn_cast(V); if (!I) { - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); return nullptr; // Only analyze instructions. } // If there are multiple uses of this value and we aren't at the root, then // we can't do any simplifications of the operands, because DemandedMask // only reflects the bits demanded by *one* of the users. - if (Depth != 0 && !I->hasOneUse()) { - // Despite the fact that we can't simplify this instruction in all User's - // context, we can at least compute the knownzero/knownone bits, and we can - // do simplifications that apply to *just* the one user if we know that - // this instruction has a simpler value in that context. - if (I->getOpcode() == Instruction::And) { - // If either the LHS or the RHS are Zero, the result is zero. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); - - // If all of the demanded bits are known 1 on one side, return the other. - // These bits cannot contribute to the result of the 'and' in this - // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) - return I->getOperand(1); + if (Depth != 0 && !I->hasOneUse()) + return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI); - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - - } else if (I->getOpcode() == Instruction::Or) { - // We can simplify (X|Y) -> X or Y in the user's context if we know that - // only bits from X or Y are demanded. - - // If either the LHS or the RHS are One, the result is One. - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); - - // If all of the demanded bits are known zero on one side, return the - // other. These bits cannot contribute to the result of the 'or' in this - // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) - return I->getOperand(1); - } else if (I->getOpcode() == Instruction::Xor) { - // We can simplify (X^Y) -> X or Y in the user's context if we know that - // only bits from X or Y are demanded. - - computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1, - CxtI); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); - - // If all of the demanded bits are known zero on one side, return the - // other. - if ((DemandedMask & RHSKnownZero) == DemandedMask) - return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) - return I->getOperand(1); - } - - // Compute the KnownZero/KnownOne bits to simplify things downstream. - computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); - return nullptr; - } + KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth); // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedMask to all bits so that we can try to simplify the @@ -239,121 +150,106 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(I, Known, Depth, CxtI); break; - case Instruction::And: + case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownZero, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, + Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); + + // Output known-0 are known to be clear if zero in either the LHS | RHS. + APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; + // Output known-1 bits are only known if set in both the LHS & RHS. + APInt IKnownOne = RHSKnown.One & LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)| - (RHSKnownOne & LHSKnownOne))) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne); + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) + return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) return I->getOperand(1); - // If all of the demanded bits in the inputs are known zeros, return zero. - if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Constant::getNullValue(VTy); - // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero)) return I; - // Output known-1 bits are only known if set in both the LHS & RHS. - KnownOne = RHSKnownOne & LHSKnownOne; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - KnownZero = RHSKnownZero | LHSKnownZero; + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; - case Instruction::Or: + } + case Instruction::Or: { // If either the LHS or the RHS are One, the result is One. - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownOne, LHSKnownZero, - LHSKnownOne, Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, + Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; + // Output known-1 are known. to be set if s.et in either the LHS | RHS. + APInt IKnownOne = RHSKnown.One | LHSKnown.One; // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)| - (RHSKnownOne | LHSKnownOne))) == DemandedMask) - return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne); + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) + return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) + if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) + if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; - // Output known-0 bits are only known if clear in both the LHS & RHS. - KnownZero = RHSKnownZero & LHSKnownZero; - // Output known-1 are known to be set if set in either the LHS | RHS. - KnownOne = RHSKnownOne | LHSKnownOne; + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); break; + } case Instruction::Xor: { - if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 0, DemandedMask, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt IKnownZero = (RHSKnownZero & LHSKnownZero) | - (RHSKnownOne & LHSKnownOne); + APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | + (RHSKnown.One & LHSKnown.One); // Output known-1 are known to be set if set in only one of the LHS, RHS. - APInt IKnownOne = (RHSKnownZero & LHSKnownOne) | - (RHSKnownOne & LHSKnownZero); + APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | + (RHSKnown.One & LHSKnown.Zero); // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & RHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) { Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -364,14 +260,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - // all known - if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); - Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); - return InsertNewInstWith(And, *I); - } + if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) && + RHSKnown.One.isSubsetOf(LHSKnown.One)) { + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnown.One & DemandedMask); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + return InsertNewInstWith(And, *I); } // If the RHS is a constant, see if we can simplify it. @@ -387,10 +281,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && isa(I->getOperand(1)) && isa(LHSInst->getOperand(1)) && - (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) { ConstantInt *AndRHS = cast(LHSInst->getOperand(1)); ConstantInt *XorRHS = cast(I->getOperand(1)); - APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask); Constant *AndC = ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); @@ -404,9 +298,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne); + Known.Zero = std::move(IKnownZero); // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero); + Known.One = std::move(IKnownOne); break; } case Instruction::Select: @@ -416,13 +310,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) return nullptr; - if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnownZero, RHSKnownOne, - Depth + 1) || - SimplifyDemandedBits(I, 1, DemandedMask, LHSKnownZero, LHSKnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); - assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(I, 1, DemandedMask) || @@ -430,21 +322,22 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; // Only known if known in both the LHS and RHS. - KnownOne = RHSKnownOne & LHSKnownOne; - KnownZero = RHSKnownZero & LHSKnownZero; + Known.One = RHSKnown.One & LHSKnown.One; + Known.Zero = RHSKnown.Zero & LHSKnown.Zero; break; + case Instruction::ZExt: case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask = DemandedMask.zext(truncBf); - KnownZero = KnownZero.zext(truncBf); - KnownOne = KnownOne.zext(truncBf); - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth); + KnownBits InputKnown(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) return I; - DemandedMask = DemandedMask.trunc(BitWidth); - KnownZero = KnownZero.trunc(BitWidth); - KnownOne = KnownOne.trunc(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + Known = Known.zextOrTrunc(BitWidth); + // Any top bits are known to be zero. + if (BitWidth > SrcBitWidth) + Known.Zero.setBitsFrom(SrcBitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case Instruction::BitCast: @@ -464,65 +357,38 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Don't touch a vector-to-scalar bitcast. return nullptr; - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; - case Instruction::ZExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - DemandedMask = DemandedMask.trunc(SrcBitWidth); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne, - Depth + 1)) - return I; - DemandedMask = DemandedMask.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - // The top bits are known to be zero. - KnownZero.setBitsFrom(SrcBitWidth); - break; - } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt InputDemandedBits = DemandedMask & - APInt::getLowBitsSet(BitWidth, SrcBitWidth); + APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth); - APInt NewBits(APInt::getBitsSetFrom(BitWidth, SrcBitWidth)); // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. - if ((NewBits & DemandedMask) != 0) + if (DemandedMask.getActiveBits() > SrcBitWidth) InputDemandedBits.setBit(SrcBitWidth-1); - InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); - KnownZero = KnownZero.trunc(SrcBitWidth); - KnownOne = KnownOne.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, InputDemandedBits, KnownZero, KnownOne, - Depth + 1)) + KnownBits InputKnown(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1)) return I; - InputDemandedBits = InputDemandedBits.zext(BitWidth); - KnownZero = KnownZero.zext(BitWidth); - KnownOne = KnownOne.zext(BitWidth); - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. // If the input sign bit is known zero, or if the NewBits are not demanded // convert this into a zero extension. - if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { - // Convert to ZExt cast + if (InputKnown.isNonNegative() || + DemandedMask.getActiveBits() <= SrcBitWidth) { + // Convert to ZExt cast. CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); return InsertNewInstWith(NewCast, *I); - } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set - KnownOne |= NewBits; - } + } + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + Known = InputKnown.sext(BitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case Instruction::Add: @@ -535,11 +401,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // significant bit and all those below it. APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ)); if (ShrinkDemandedConstant(I, 0, DemandedFromOps) || - SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnownZero, LHSKnownOne, - Depth + 1) || + SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) || ShrinkDemandedConstant(I, 1, DemandedFromOps) || - SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnownZero, RHSKnownOne, - Depth + 1)) { + SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) { // Disable the nsw and nuw flags here: We can no longer guarantee that // we won't wrap after simplification. Removing the nsw/nuw flags is // legal here because the top bit is not demanded. @@ -551,30 +415,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If we are known to be adding/subtracting zeros to every bit below // the highest demanded bit, we just return the other side. - if ((DemandedFromOps & RHSKnownZero) == DemandedFromOps) + if (DemandedFromOps.isSubsetOf(RHSKnown.Zero)) return I->getOperand(0); // We can't do this with the LHS for subtraction. if (I->getOpcode() == Instruction::Add && - (DemandedFromOps & LHSKnownZero) == DemandedFromOps) + DemandedFromOps.isSubsetOf(LHSKnown.Zero)) return I->getOperand(1); } // Otherwise just hand the add/sub off to computeKnownBits to fill in // the known zeros and ones. - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); break; } - case Instruction::Shl: - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - { - Value *VarX; ConstantInt *C1; - if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { - Instruction *Shr = cast(I->getOperand(0)); - Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, - KnownZero, KnownOne); - if (R) - return R; - } + case Instruction::Shl: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { + const APInt *ShrAmt; + if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) { + Instruction *Shr = cast(I->getOperand(0)); + if (Value *R = simplifyShrShlDemandedBits( + Shr, *ShrAmt, I, *SA, DemandedMask, Known)) + return R; } uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); @@ -587,20 +449,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, else if (IOp->hasNoUnsignedWrap()) DemandedMaskIn.setHighBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero <<= ShiftAmt; - KnownOne <<= ShiftAmt; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known.Zero <<= ShiftAmt; + Known.One <<= ShiftAmt; // low bits known zero. if (ShiftAmt) - KnownZero.setLowBits(ShiftAmt); + Known.Zero.setLowBits(ShiftAmt); } break; - case Instruction::LShr: - // For a logical shift right - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + } + case Instruction::LShr: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. @@ -611,22 +473,22 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (cast(I)->isExact()) DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); - KnownZero = KnownZero.lshr(ShiftAmt); - KnownOne = KnownOne.lshr(ShiftAmt); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + Known.Zero.lshrInPlace(ShiftAmt); + Known.One.lshrInPlace(ShiftAmt); if (ShiftAmt) - KnownZero.setHighBits(ShiftAmt); // high bits known zero. + Known.Zero.setHighBits(ShiftAmt); // high bits known zero. } break; - case Instruction::AShr: + } + case Instruction::AShr: { // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedMask == 1) { + if (DemandedMask.isOneValue()) { // Perform the logical shift right. Instruction *NewVal = BinaryOperator::CreateLShr( I->getOperand(0), I->getOperand(1), I->getName()); @@ -635,52 +497,53 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the sign bit is the only bit demanded by this ashr, then there is no // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignBit()) + if (DemandedMask.isSignMask()) return I->getOperand(0); - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - // If any of the "high bits" are demanded, we should set the sign bit as + // If any of the high bits are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) - DemandedMaskIn.setBit(BitWidth-1); + DemandedMaskIn.setSignBit(); // If the shift is exact, then it does demand the low bits (and knows that // they are zero). if (cast(I)->isExact()) DemandedMaskIn.setLowBits(ShiftAmt); - if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, - Depth + 1)) + if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); - KnownZero = KnownZero.lshr(ShiftAmt); - KnownOne = KnownOne.lshr(ShiftAmt); + Known.Zero.lshrInPlace(ShiftAmt); + Known.One.lshrInPlace(ShiftAmt); // Handle the sign bits. - APInt SignBit(APInt::getSignBit(BitWidth)); + APInt SignMask(APInt::getSignMask(BitWidth)); // Adjust to where it is now in the mask. - SignBit = SignBit.lshr(ShiftAmt); + SignMask.lshrInPlace(ShiftAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || - (HighBits & ~DemandedMask) == HighBits) { - // Perform the logical shift right. - BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), - SA, I->getName()); - NewVal->setIsExact(cast(I)->isExact()); - return InsertNewInstWith(NewVal, *I); - } else if ((KnownOne & SignBit) != 0) { // New bits are known one. - KnownOne |= HighBits; + if (BitWidth <= ShiftAmt || Known.Zero[BitWidth-ShiftAmt-1] || + !DemandedMask.intersects(HighBits)) { + BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), + I->getOperand(1)); + LShr->setIsExact(cast(I)->isExact()); + return InsertNewInstWith(LShr, *I); + } else if (Known.One.intersects(SignMask)) { // New bits are known one. + Known.One |= HighBits; } } break; + } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce @@ -693,49 +556,47 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I->getOperand(0); APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne, - Depth + 1)) + APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); + if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1)) return I; // The low bits of LHS are unchanged by the srem. - KnownZero = LHSKnownZero & LowBits; - KnownOne = LHSKnownOne & LowBits; + Known.Zero = LHSKnown.Zero & LowBits; + Known.One = LHSKnown.One & LowBits; // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits)) - KnownZero |= ~LowBits; + if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero)) + Known.Zero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0)) - KnownOne |= ~LowBits; + if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One)) + Known.One |= ~LowBits; - assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + break; } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1, - CxtI); + if (DemandedMask.isSignBitSet()) { + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI); // If it's known zero, our sign bit is also zero. - if (LHSKnownZero.isNegative()) - KnownZero.setSignBit(); + if (LHSKnown.isNonNegative()) + Known.makeNonNegative(); } break; case Instruction::URem: { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + KnownBits Known2(BitWidth); APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I, 0, AllOnes, KnownZero2, KnownOne2, Depth + 1) || - SimplifyDemandedBits(I, 1, AllOnes, KnownZero2, KnownOne2, Depth + 1)) + if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) || + SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1)) return I; - unsigned Leaders = KnownZero2.countLeadingOnes(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; + unsigned Leaders = Known2.countMinLeadingZeros(); + Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; break; } case Instruction::Call: @@ -795,29 +656,156 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If we don't need any of low bits then return zero, // we know that DemandedMask is non-zero already. APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth); - if (DemandedElts == 0) + if (DemandedElts.isNullValue()) return ConstantInt::getNullValue(VTy); // We know that the upper bits are set to zero. - KnownZero.setBitsFrom(ArgWidth); + Known.Zero.setBitsFrom(ArgWidth); return nullptr; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero.setBitsFrom(32); + Known.Zero.setBitsFrom(32); return nullptr; } } - computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); + computeKnownBits(V, Known, Depth, CxtI); break; } // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) - return Constant::getIntegerValue(VTy, KnownOne); + if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) + return Constant::getIntegerValue(VTy, Known.One); + return nullptr; +} + +/// Helper routine of SimplifyDemandedUseBits. It computes Known +/// bits. It also tries to handle simplifications that can be done based on +/// DemandedMask, but without modifying the Instruction. +Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I, + const APInt &DemandedMask, + KnownBits &Known, + unsigned Depth, + Instruction *CxtI) { + unsigned BitWidth = DemandedMask.getBitWidth(); + Type *ITy = I->getType(); + + KnownBits LHSKnown(BitWidth); + KnownBits RHSKnown(BitWidth); + + // Despite the fact that we can't simplify this instruction in all User's + // context, we can at least compute the known bits, and we can + // do simplifications that apply to *just* the one user if we know that + // this instruction has a simpler value in that context. + switch (I->getOpcode()) { + case Instruction::And: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, + CxtI); + + // Output known-0 are known to be clear if zero in either the LHS | RHS. + APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; + // Output known-1 bits are only known if set in both the LHS & RHS. + APInt IKnownOne = RHSKnown.One & LHSKnown.One; + + // If the client is only demanding bits that we know, return the known + // constant. + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) + return Constant::getIntegerValue(ITy, IKnownOne); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) + return I->getOperand(0); + if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) + return I->getOperand(1); + + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); + break; + } + case Instruction::Or: { + // We can simplify (X|Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + // If either the LHS or the RHS are One, the result is One. + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, + CxtI); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; + // Output known-1 are known to be set if set in either the LHS | RHS. + APInt IKnownOne = RHSKnown.One | LHSKnown.One; + + // If the client is only demanding bits that we know, return the known + // constant. + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) + return Constant::getIntegerValue(ITy, IKnownOne); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) + return I->getOperand(0); + if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) + return I->getOperand(1); + + Known.Zero = std::move(IKnownZero); + Known.One = std::move(IKnownOne); + break; + } + case Instruction::Xor: { + // We can simplify (X^Y) -> X or Y in the user's context if we know that + // only bits from X or Y are demanded. + + computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI); + computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, + CxtI); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | + (RHSKnown.One & LHSKnown.One); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) | + (RHSKnown.One & LHSKnown.Zero); + + // If the client is only demanding bits that we know, return the known + // constant. + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) + return Constant::getIntegerValue(ITy, IKnownOne); + + // If all of the demanded bits are known zero on one side, return the + // other. + if (DemandedMask.isSubsetOf(RHSKnown.Zero)) + return I->getOperand(0); + if (DemandedMask.isSubsetOf(LHSKnown.Zero)) + return I->getOperand(1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + Known.Zero = std::move(IKnownZero); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + Known.One = std::move(IKnownOne); + break; + } + default: + // Compute the Known bits to simplify things downstream. + computeKnownBits(I, Known, Depth, CxtI); + + // If this user is only demanding bits that we know, return the known + // constant. + if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) + return Constant::getIntegerValue(ITy, Known.One); + + break; + } + return nullptr; } + /// Helper routine of SimplifyDemandedUseBits. It tries to simplify /// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into /// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign @@ -835,29 +823,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, /// /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was /// not successful. -Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, - Instruction *Shl, - const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne) { - - const APInt &ShlOp1 = cast(Shl->getOperand(1))->getValue(); - const APInt &ShrOp1 = cast(Shr->getOperand(1))->getValue(); +Value * +InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, + Instruction *Shl, const APInt &ShlOp1, + const APInt &DemandedMask, + KnownBits &Known) { if (!ShlOp1 || !ShrOp1) - return nullptr; // Noop. + return nullptr; // No-op. Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); - unsigned BitWidth = Ty->getIntegerBitWidth(); + unsigned BitWidth = Ty->getScalarSizeInBits(); if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) return nullptr; // Undef. unsigned ShlAmt = ShlOp1.getZExtValue(); unsigned ShrAmt = ShrOp1.getZExtValue(); - KnownOne.clearAllBits(); - KnownZero.setLowBits(ShlAmt - 1); - KnownZero &= DemandedMask; + Known.One.clearAllBits(); + Known.Zero.setLowBits(ShlAmt - 1); + Known.Zero &= DemandedMask; APInt BitMask1(APInt::getAllOnesValue(BitWidth)); APInt BitMask2(APInt::getAllOnesValue(BitWidth)); @@ -923,7 +908,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, return nullptr; } - if (DemandedElts == 0) { // If nothing is demanded, provide undef. + if (DemandedElts.isNullValue()) { // If nothing is demanded, provide undef. UndefElts = EltMask; return UndefValue::get(V->getType()); } @@ -1529,7 +1514,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane); LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); - LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum)); + LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum); UndefElts |= LaneElts; } } @@ -1566,7 +1551,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts.setHighBits(VWidth / 2); break; case Intrinsic::amdgcn_buffer_load: - case Intrinsic::amdgcn_buffer_load_format: { + case Intrinsic::amdgcn_buffer_load_format: + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + case Intrinsic::amdgcn_image_getlod: { if (VWidth == 1 || !DemandedElts.isMask()) return nullptr; @@ -1581,8 +1611,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts); - Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(), - NewTy); + auto IID = II->getIntrinsicID(); + + bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load || + IID == Intrinsic::amdgcn_buffer_load_format; + + Function *NewIntrin = IsBuffer ? + Intrinsic::getDeclaration(M, IID, NewTy) : + // Samplers have 3 mangled types. + Intrinsic::getDeclaration(M, IID, + { NewTy, II->getArgOperand(0)->getType(), + II->getArgOperand(1)->getType()}); SmallVector Args; for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) @@ -1594,6 +1633,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); NewCall->takeName(II); NewCall->copyMetadata(*II); + + if (!IsBuffer) { + ConstantInt *DMask = dyn_cast(NewCall->getArgOperand(3)); + if (DMask) { + unsigned DMaskVal = DMask->getZExtValue() & 0xf; + + unsigned PopCnt = 0; + unsigned NewDMask = 0; + for (unsigned I = 0; I < 4; ++I) { + const unsigned Bit = 1 << I; + if (!!(DMaskVal & Bit)) { + if (++PopCnt > NewNumElts) + break; + + NewDMask |= Bit; + } + } + + NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask)); + } + } + + if (NewNumElts == 1) { return Builder->CreateInsertElement(UndefValue::get(V->getType()), NewCall, static_cast(0)); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e89b400a4afc8f3f71f0a1642b537fcdba71ecde..926e46655eb860c301063ee1e68764f9e7a38c5b 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -144,8 +144,9 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { } Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { - if (Value *V = SimplifyExtractElementInst( - EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(), + EI.getIndexOperand(), + SQ.getWithInstruction(&EI))) return replaceInstUsesWith(EI, V); // If vector val is constant with all elements the same, replace EI with @@ -440,7 +441,7 @@ static void replaceExtractElements(InsertElementInst *InsElt, if (!OldExt || OldExt->getParent() != WideVec->getParent()) continue; auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1)); - NewExt->insertAfter(WideVec); + NewExt->insertAfter(OldExt); IC.replaceInstUsesWith(*OldExt, NewExt); } } @@ -1140,8 +1141,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SmallVector Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); - if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), - SVI.getType(), DL, &TLI, &DT, &AC)) + if (auto *V = SimplifyShuffleVectorInst( + LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI))) return replaceInstUsesWith(SVI, V); bool MadeChange = false; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index f8b930f577125df9dada35cf6002a3b2e432b4ba..a2dd27d282a5f051d9ab9b82dba99c2c9360f948 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -33,7 +33,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/InstCombine/InstCombine.h" #include "InstCombineInternal.h" #include "llvm-c/Initialization.h" #include "llvm/ADT/SmallPtrSet.h" @@ -60,7 +59,9 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -148,9 +149,9 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { bool Overflow = false; if (Opcode == Instruction::Add) - BVal->sadd_ov(*CVal, Overflow); + (void)BVal->sadd_ov(*CVal, Overflow); else - BVal->ssub_ov(*CVal, Overflow); + (void)BVal->ssub_ov(*CVal, Overflow); return !Overflow; } @@ -255,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "A op V". I.setOperand(0, A); I.setOperand(1, V); @@ -284,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op C". I.setOperand(0, V); I.setOperand(1, C); @@ -312,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = I.getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "V op B". I.setOperand(0, V); I.setOperand(1, B); @@ -332,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, SQ.getWithInstruction(&I))) { // It simplifies to V. Form "B op V". I.setOperand(0, B); I.setOperand(1, V); @@ -471,8 +472,7 @@ static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) { static Instruction::BinaryOps getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, BinaryOperator *Op, Value *&LHS, Value *&RHS) { - if (!Op) - return Instruction::BinaryOpsEnd; + assert(Op && "Expected a binary operator"); LHS = Op->getOperand(0); RHS = Op->getOperand(1); @@ -498,15 +498,11 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -static Value *tryFactorization(InstCombiner::BuilderTy *Builder, - const DataLayout &DL, BinaryOperator &I, - Instruction::BinaryOps InnerOpcode, Value *A, - Value *B, Value *C, Value *D) { - - // If any of A, B, C, D are null, we can not factor I, return early. - // Checking A and C should be enough. - if (!A || !C || !B || !D) - return nullptr; +Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, + BinaryOperator &I, + Instruction::BinaryOps InnerOpcode, + Value *A, Value *B, Value *C, Value *D) { + assert(A && B && C && D && "All values must be provided"); Value *V = nullptr; Value *SimplifiedInst = nullptr; @@ -525,7 +521,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "A op' (B op D)". // If "B op D" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, B, D, DL); + V = SimplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) @@ -544,7 +540,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder, std::swap(C, D); // Consider forming "(A op C) op' B". // If "A op C" simplifies then it can be formed with no cost. - V = SimplifyBinOp(TopLevelOpcode, A, C, DL); + V = SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. @@ -600,31 +596,39 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); - // Factorization. - Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; - auto TopLevelOpcode = I.getOpcode(); - auto LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B); - auto RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D); - - // The instruction has the form "(A op' B) op (C op' D)". Try to factorize - // a common term. - if (LHSOpcode == RHSOpcode) { - if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D)) - return V; - } - - // The instruction has the form "(A op' B) op (C)". Try to factorize common - // term. - if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS, - getIdentityValue(LHSOpcode, RHS))) - return V; + { + // Factorization. + Value *A, *B, *C, *D; + Instruction::BinaryOps LHSOpcode, RHSOpcode; + if (Op0) + LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B); + if (Op1) + RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D); + + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + if (Op0 && Op1 && LHSOpcode == RHSOpcode) + if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D)) + return V; + + // The instruction has the form "(A op' B) op (C)". Try to factorize common + // term. + if (Op0) + if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) + if (Value *V = + tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident)) + return V; - // The instruction has the form "(B) op (C op' D)". Try to factorize common - // term. - if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, - getIdentityValue(RHSOpcode, LHS), C, D)) - return V; + // The instruction has the form "(B) op (C op' D)". Try to factorize common + // term. + if (Op1) + if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) + if (Value *V = + tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D)) + return V; + } // Expansion. if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { @@ -634,18 +638,12 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) { + if (Value *L = + SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) + if (Value *R = + SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; - // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. - if ((L == A && R == B) || - (Instruction::isCommutative(InnerOpcode) && L == B && R == A)) - return Op0; - // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) - return V; - // Otherwise, create a new instruction. C = Builder->CreateBinOp(InnerOpcode, L, R); C->takeName(&I); return C; @@ -659,18 +657,12 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op' // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL)) - if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) { + if (Value *L = + SimplifyBinOp(TopLevelOpcode, A, B, SQ.getWithInstruction(&I))) + if (Value *R = + SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; - // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. - if ((L == B && R == C) || - (Instruction::isCommutative(InnerOpcode) && L == C && R == B)) - return Op1; - // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL)) - return V; - // Otherwise, create a new instruction. A = Builder->CreateBinOp(InnerOpcode, L, R); A->takeName(&I); return A; @@ -683,15 +675,17 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (auto *SI1 = dyn_cast(RHS)) { if (SI0->getCondition() == SI1->getCondition()) { Value *SI = nullptr; - if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue(), DL, &TLI, &DT, &AC)) + if (Value *V = + SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue(), SQ.getWithInstruction(&I))) SI = Builder->CreateSelect(SI0->getCondition(), Builder->CreateBinOp(TopLevelOpcode, SI0->getTrueValue(), SI1->getTrueValue()), V); - if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), - SI1->getTrueValue(), DL, &TLI, &DT, &AC)) + if (Value *V = + SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), + SI1->getTrueValue(), SQ.getWithInstruction(&I))) SI = Builder->CreateSelect( SI0->getCondition(), V, Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), @@ -836,8 +830,29 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { - PHINode *PN = cast(I.getOperand(0)); +static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, + InstCombiner *IC) { + bool ConstIsRHS = isa(I->getOperand(1)); + Constant *C = cast(I->getOperand(ConstIsRHS)); + + if (auto *InC = dyn_cast(InV)) { + if (ConstIsRHS) + return ConstantExpr::get(I->getOpcode(), InC, C); + return ConstantExpr::get(I->getOpcode(), C, InC); + } + + Value *Op0 = InV, *Op1 = C; + if (!ConstIsRHS) + std::swap(Op0, Op1); + + Value *RI = IC->Builder->CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); + auto *FPInst = dyn_cast(RI); + if (FPInst && isa(FPInst)) + FPInst->copyFastMathFlags(I); + return RI; +} + +Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) return nullptr; @@ -924,9 +939,19 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // `TrueVInPred`. if (InC && !isa(InC) && isa(InC)) InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; - else + else { + // Generate the select in the same block as PN's current incoming block. + // Note: ThisBB need not be the NonConstBB because vector constants + // which are constants by definition are handled here. + // FIXME: This can lead to an increase in IR generation because we might + // generate selects for vector constant phi operand, that could not be + // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For + // non-vector phis, this transformation was always profitable because + // the select would be generated exactly once in the NonConstBB. + Builder->SetInsertPoint(ThisBB->getTerminator()); InV = Builder->CreateSelect(PN->getIncomingValue(i), TrueVInPred, FalseVInPred, "phitmp"); + } NewPN->addIncoming(InV, ThisBB); } } else if (CmpInst *CI = dyn_cast(&I)) { @@ -943,19 +968,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } - } else if (I.getNumOperands() == 2) { - Constant *C = cast(I.getOperand(1)); + } else if (auto *BO = dyn_cast(&I)) { for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = nullptr; - if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - InV = ConstantExpr::get(I.getOpcode(), InC, C); - } else { - InV = Builder->CreateBinOp(cast(I).getOpcode(), - PN->getIncomingValue(i), C, "phitmp"); - auto *FPInst = dyn_cast(InV); - if (FPInst && isa(FPInst)) - FPInst->copyFastMathFlags(&I); - } + Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), this); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { @@ -987,8 +1002,8 @@ Instruction *InstCombiner::foldOpWithConstantIntoOperand(BinaryOperator &I) { if (auto *Sel = dyn_cast(I.getOperand(0))) { if (Instruction *NewSel = FoldOpIntoSelect(I, Sel)) return NewSel; - } else if (isa(I.getOperand(0))) { - if (Instruction *NewPhi = FoldOpIntoPhi(I)) + } else if (auto *PN = dyn_cast(I.getOperand(0))) { + if (Instruction *NewPhi = foldOpIntoPhi(I, PN)) return NewPhi; } return nullptr; @@ -1400,8 +1415,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); - if (Value *V = - SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, + SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -1590,7 +1605,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (SO1->getType() != GO1->getType()) return nullptr; - Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC); + Value *Sum = + SimplifyAddInst(GO1, SO1, false, false, SQ.getWithInstruction(&GEP)); // Only do the combine when we are sure the cost after the // merge is never more than that before the merge. if (Sum == nullptr) @@ -1950,9 +1966,9 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI, return isAllocLikeFn(V, TLI) && V != AI; } -static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, - const TargetLibraryInfo *TLI) { +static bool isAllocSiteRemovable(Instruction *AI, + SmallVectorImpl &Users, + const TargetLibraryInfo *TLI) { SmallVector Worklist; Worklist.push_back(AI); @@ -1965,6 +1981,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, // Give up the moment we see something we can't handle. return false; + case Instruction::AddrSpaceCast: case Instruction::BitCast: case Instruction::GetElementPtr: Users.emplace_back(I); @@ -2036,7 +2053,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. - SmallVector Users; + SmallVector Users; if (isAllocSiteRemovable(&MI, Users, &TLI)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { // Lowering all @llvm.objectsize calls first because they may @@ -2066,7 +2083,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { replaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), C->isFalseWhenEqual())); - } else if (isa(I) || isa(I)) { + } else if (isa(I) || isa(I) || + isa(I)) { replaceInstUsesWith(*I, UndefValue::get(I->getType())); } eraseInstFromFunction(*I); @@ -2182,11 +2200,9 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { // There might be assume intrinsics dominating this return that completely // determine the value. If so, constant fold it. - unsigned BitWidth = VTy->getPrimitiveSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI); - if ((KnownZero|KnownOne).isAllOnesValue()) - RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne)); + KnownBits Known = computeKnownBits(ResultOp, 0, &RI); + if (Known.isConstant()) + RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant())); return nullptr; } @@ -2213,37 +2229,18 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } - // Canonicalize fcmp_one -> fcmp_oeq - FCmpInst::Predicate FPred; Value *Y; - if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || - FPred == FCmpInst::FCMP_OGE) { - FCmpInst *Cond = cast(BI.getCondition()); - Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); - - // Swap Destinations and condition. - BI.swapSuccessors(); - Worklist.Add(Cond); - return &BI; - } - - // Canonicalize icmp_ne -> icmp_eq - ICmpInst::Predicate IPred; - if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest)) && - BI.getCondition()->hasOneUse()) - if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || - IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || - IPred == ICmpInst::ICMP_SGE) { - ICmpInst *Cond = cast(BI.getCondition()); - Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); - // Swap Destinations and condition. - BI.swapSuccessors(); - Worklist.Add(Cond); - return &BI; - } + // Canonicalize, for example, icmp_ne -> icmp_eq or fcmp_one -> fcmp_oeq. + CmpInst::Predicate Pred; + if (match(&BI, m_Br(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), TrueDest, + FalseDest)) && + !isCanonicalPredicate(Pred)) { + // Swap destinations and condition. + CmpInst *Cond = cast(BI.getCondition()); + Cond->setPredicate(CmpInst::getInversePredicate(Pred)); + BI.swapSuccessors(); + Worklist.Add(Cond); + return &BI; + } return nullptr; } @@ -2264,11 +2261,9 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { return &SI; } - unsigned BitWidth = cast(Cond->getType())->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI); - unsigned LeadingKnownZeros = KnownZero.countLeadingOnes(); - unsigned LeadingKnownOnes = KnownOne.countLeadingOnes(); + KnownBits Known = computeKnownBits(Cond, 0, &SI); + unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); + unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); // Compute the number of leading bits we can ignore. // TODO: A better way to determine this would use ComputeNumSignBits(). @@ -2279,12 +2274,12 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes()); } - unsigned NewWidth = BitWidth - std::max(LeadingKnownZeros, LeadingKnownOnes); + unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); // Shrink the condition operand if the new type is smaller than the old type. // This may produce a non-standard type for the switch, but that's ok because // the backend should extend back to a legal type for the target. - if (NewWidth > 0 && NewWidth < BitWidth) { + if (NewWidth > 0 && NewWidth < Known.getBitWidth()) { IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); Builder->SetInsertPoint(&SI); Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc"); @@ -2306,8 +2301,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (!EV.hasIndices()) return replaceInstUsesWith(EV, Agg); - if (Value *V = - SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC)) + if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), + SQ.getWithInstruction(&EV))) return replaceInstUsesWith(EV, V); if (InsertValueInst *IV = dyn_cast(Agg)) { @@ -2864,12 +2859,9 @@ bool InstCombiner::run() { // a value even when the operands are not all constants. Type *Ty = I->getType(); if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = Ty->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I); - if ((KnownZero | KnownOne).isAllOnesValue()) { - Constant *C = ConstantInt::get(Ty, KnownOne); + KnownBits Known = computeKnownBits(I, /*Depth*/0, I); + if (Known.isConstant()) { + Constant *C = ConstantInt::get(Ty, Known.getConstant()); DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << " from: " << *I << '\n'); @@ -3057,7 +3049,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, } } - InstrsForInstCombineWorklist.push_back(Inst); + // Skip processing debug intrinsics in InstCombine. Processing these call instructions + // consumes non-trivial amount of time and provides no value for the optimization. + if (!isa(Inst)) + InstrsForInstCombineWorklist.push_back(Inst); } // Recursively visit successors. If this is a branch or switch on a @@ -3146,7 +3141,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, // Lower dbg.declare intrinsics otherwise their value may be clobbered // by instcombiner. - bool DbgDeclaresChanged = LowerDbgDeclare(F); + bool MadeIRChange = LowerDbgDeclare(F); // Iterate while there is work to do. int Iteration = 0; @@ -3155,18 +3150,17 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); - bool Changed = prepareICWorklistFromFunction(F, DL, &TLI, Worklist); + MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines, AA, AC, TLI, DT, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; - Changed |= IC.run(); - if (!Changed) + if (!IC.run()) break; } - return DbgDeclaresChanged || Iteration > 1; + return MadeIRChange || Iteration > 1; } PreservedAnalyses InstCombinePass::run(Function &F, diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 94cfc69ed5551342f6d37545d9e3be01df0b5b46..7eea44d6aca03a670378611a58dd653e62fd1880 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -101,6 +101,10 @@ static const char *const kAsanRegisterImageGlobalsName = "__asan_register_image_globals"; static const char *const kAsanUnregisterImageGlobalsName = "__asan_unregister_image_globals"; +static const char *const kAsanRegisterElfGlobalsName = + "__asan_register_elf_globals"; +static const char *const kAsanUnregisterElfGlobalsName = + "__asan_unregister_elf_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init"; @@ -120,8 +124,11 @@ static const char *const kAsanPoisonStackMemoryName = "__asan_poison_stack_memory"; static const char *const kAsanUnpoisonStackMemoryName = "__asan_unpoison_stack_memory"; + +// ASan version script has __asan_* wildcard. Triple underscore prevents a +// linker (gold) warning about attempting to export a local symbol. static const char *const kAsanGlobalsRegisteredFlagName = - "__asan_globals_registered"; + "___asan_globals_registered"; static const char *const kAsanOptionDetectUseAfterReturn = "__asan_option_detect_stack_use_after_return"; @@ -265,11 +272,17 @@ static cl::opt cl::Hidden, cl::init(false)); static cl::opt - ClUseMachOGlobalsSection("asan-globals-live-support", - cl::desc("Use linker features to support dead " - "code stripping of globals " - "(Mach-O only)"), - cl::Hidden, cl::init(true)); + ClUseGlobalsGC("asan-globals-live-support", + cl::desc("Use linker features to support dead " + "code stripping of globals"), + cl::Hidden, cl::init(true)); + +// This is on by default even though there is a bug in gold: +// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 +static cl::opt + ClWithComdat("asan-with-comdat", + cl::desc("Place ASan constructors in comdat sections"), + cl::Hidden, cl::init(true)); // Debug flags. static cl::opt ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, @@ -594,22 +607,36 @@ struct AddressSanitizer : public FunctionPass { }; class AddressSanitizerModule : public ModulePass { - public: +public: explicit AddressSanitizerModule(bool CompileKernel = false, - bool Recover = false) + bool Recover = false, + bool UseGlobalsGC = true) : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan), - Recover(Recover || ClRecover) {} + Recover(Recover || ClRecover), + UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), + // Not a typo: ClWithComdat is almost completely pointless without + // ClUseGlobalsGC (because then it only works on modules without + // globals, which are rare); it is a prerequisite for ClUseGlobalsGC; + // and both suffer from gold PR19002 for which UseGlobalsGC constructor + // argument is designed as workaround. Therefore, disable both + // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to + // do globals-gc. + UseCtorComdat(UseGlobalsGC && ClWithComdat) {} bool runOnModule(Module &M) override; - static char ID; // Pass identification, replacement for typeid + static char ID; // Pass identification, replacement for typeid StringRef getPassName() const override { return "AddressSanitizerModule"; } private: void initializeCallbacks(Module &M); - bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); + bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat); void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers); + void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M, + ArrayRef ExtendedGlobals, + ArrayRef MetadataInitializers, + const std::string &UniqueModuleId); void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, ArrayRef MetadataInitializers); @@ -620,7 +647,8 @@ private: GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer, StringRef OriginalName); - void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); + void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata, + StringRef InternalSuffix); IRBuilder<> CreateAsanModuleDtor(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); @@ -635,6 +663,8 @@ private: GlobalsMetadata GlobalsMD; bool CompileKernel; bool Recover; + bool UseGlobalsGC; + bool UseCtorComdat; Type *IntptrTy; LLVMContext *C; Triple TargetTriple; @@ -645,6 +675,11 @@ private: Function *AsanUnregisterGlobals; Function *AsanRegisterImageGlobals; Function *AsanUnregisterImageGlobals; + Function *AsanRegisterElfGlobals; + Function *AsanUnregisterElfGlobals; + + Function *AsanCtorFunction = nullptr; + Function *AsanDtorFunction = nullptr; }; // Stack poisoning does not play well with exception handling. @@ -913,9 +948,10 @@ INITIALIZE_PASS( "ModulePass", false, false) ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, - bool Recover) { + bool Recover, + bool UseGlobalsGC) { assert(!CompileKernel || Recover); - return new AddressSanitizerModule(CompileKernel, Recover); + return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -1428,8 +1464,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, GlobalValue *ModuleName) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return; + + ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return; - ConstantArray *CA = cast(GV->getInitializer()); for (Use &OP : CA->operands()) { if (isa(OP)) continue; ConstantStruct *CS = cast(OP); @@ -1537,9 +1578,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // binary in order to allow the linker to properly dead strip. This is only // supported on recent versions of ld64. bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const { - if (!ClUseMachOGlobalsSection) - return false; - if (!TargetTriple.isOSBinFormatMachO()) return false; @@ -1594,12 +1632,22 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { checkSanitizerInterfaceFunction(M.getOrInsertFunction( kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy)); AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); + + AsanRegisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage); + + AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction( + M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy)); + AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage); } // Put the metadata and the instrumented global in the same group. This ensures // that the metadata is discarded if the instrumented global is discarded. void AddressSanitizerModule::SetComdatForGlobalMetadata( - GlobalVariable *G, GlobalVariable *Metadata) { + GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) { Module &M = *G->getParent(); Comdat *C = G->getComdat(); if (!C) { @@ -1609,7 +1657,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata( assert(G->hasLocalLinkage()); G->setName(Twine(kAsanGenPrefix) + "_anon_global"); } - C = M.getOrInsertComdat(G->getName()); + + if (!InternalSuffix.empty() && G->hasLocalLinkage()) { + std::string Name = G->getName(); + Name += InternalSuffix; + C = M.getOrInsertComdat(Name); + } else { + C = M.getOrInsertComdat(G->getName()); + } + // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF. if (TargetTriple.isOSBinFormatCOFF()) C->setSelectionKind(Comdat::NoDuplicates); @@ -1630,17 +1686,16 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer, : GlobalVariable::PrivateLinkage; GlobalVariable *Metadata = new GlobalVariable( M, Initializer->getType(), false, Linkage, Initializer, - Twine("__asan_global_") + GlobalValue::getRealLinkageName(OriginalName)); + Twine("__asan_global_") + GlobalValue::dropLLVMManglingEscape(OriginalName)); Metadata->setSection(getGlobalMetadataSection()); return Metadata; } IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { - Function *AsanDtorFunction = + AsanDtorFunction = Function::Create(FunctionType::get(Type::getVoidTy(*C), false), GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB)); } @@ -1665,8 +1720,67 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF( "global metadata will not be padded appropriately"); Metadata->setAlignment(SizeOfGlobalStruct); - SetComdatForGlobalMetadata(G, Metadata); + SetComdatForGlobalMetadata(G, Metadata, ""); + } +} + +void AddressSanitizerModule::InstrumentGlobalsELF( + IRBuilder<> &IRB, Module &M, ArrayRef ExtendedGlobals, + ArrayRef MetadataInitializers, + const std::string &UniqueModuleId) { + assert(ExtendedGlobals.size() == MetadataInitializers.size()); + + SmallVector MetadataGlobals(ExtendedGlobals.size()); + for (size_t i = 0; i < ExtendedGlobals.size(); i++) { + GlobalVariable *G = ExtendedGlobals[i]; + GlobalVariable *Metadata = + CreateMetadataGlobal(M, MetadataInitializers[i], G->getName()); + MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); + Metadata->setMetadata(LLVMContext::MD_associated, MD); + MetadataGlobals[i] = Metadata; + + SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId); } + + // Update llvm.compiler.used, adding the new metadata globals. This is + // needed so that during LTO these variables stay alive. + if (!MetadataGlobals.empty()) + appendToCompilerUsed(M, MetadataGlobals); + + // RegisteredFlag serves two purposes. First, we can pass it to dladdr() + // to look up the loaded image that contains it. Second, we can store in it + // whether registration has already occurred, to prevent duplicate + // registration. + // + // Common linkage ensures that there is only one global per shared library. + GlobalVariable *RegisteredFlag = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::CommonLinkage, + ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); + RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); + + // Create start and stop symbols. + GlobalVariable *StartELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__start_" + getGlobalMetadataSection()); + StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + GlobalVariable *StopELFMetadata = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, + "__stop_" + getGlobalMetadataSection()); + StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + + // Create a call to register the globals with the runtime. + IRB.CreateCall(AsanRegisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); + + // We also need to unregister globals at the end, e.g., when a shared library + // gets closed. + IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); + IRB_Dtor.CreateCall(AsanUnregisterElfGlobals, + {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), + IRB.CreatePointerCast(StartELFMetadata, IntptrTy), + IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); } void AddressSanitizerModule::InstrumentGlobalsMachO( @@ -1677,7 +1791,7 @@ void AddressSanitizerModule::InstrumentGlobalsMachO( // On recent Mach-O platforms, use a structure which binds the liveness of // the global variable to the metadata struct. Keep the list of "Liveness" GV // created to be added to llvm.compiler.used - StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr); + StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy); SmallVector LivenessGlobals(ExtendedGlobals.size()); for (size_t i = 0; i < ExtendedGlobals.size(); i++) { @@ -1688,9 +1802,9 @@ void AddressSanitizerModule::InstrumentGlobalsMachO( // On recent Mach-O platforms, we emit the global metadata in a way that // allows the linker to properly strip dead globals. - auto LivenessBinder = ConstantStruct::get( - LivenessTy, Initializer->getAggregateElement(0u), - ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr); + auto LivenessBinder = + ConstantStruct::get(LivenessTy, Initializer->getAggregateElement(0u), + ConstantExpr::getPointerCast(Metadata, IntptrTy)); GlobalVariable *Liveness = new GlobalVariable( M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder, Twine("__asan_binder_") + G->getName()); @@ -1756,7 +1870,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray( // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { +// Sets *CtorComdat to true if the global registration code emitted into the +// asan constructor is comdat-compatible. +bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) { + *CtorComdat = false; GlobalsMD.init(M); SmallVector GlobalsToChange; @@ -1766,7 +1883,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { } size_t n = GlobalsToChange.size(); - if (n == 0) return false; + if (n == 0) { + *CtorComdat = true; + return false; + } auto &DL = M.getDataLayout(); @@ -1782,7 +1902,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, IntptrTy, IntptrTy, nullptr); + IntptrTy, IntptrTy, IntptrTy); SmallVector NewGlobals(n); SmallVector Initializers(n); @@ -1818,10 +1938,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); - StructType *NewTy = StructType::get(Ty, RightRedZoneTy, nullptr); - Constant *NewInitializer = - ConstantStruct::get(NewTy, G->getInitializer(), - Constant::getNullValue(RightRedZoneTy), nullptr); + StructType *NewTy = StructType::get(Ty, RightRedZoneTy); + Constant *NewInitializer = ConstantStruct::get( + NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy)); // Create a new global variable with enough space for a redzone. GlobalValue::LinkageTypes Linkage = G->getLinkage(); @@ -1902,7 +2021,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, MD.IsDynInit), SourceLoc, - ConstantExpr::getPointerCast(ODRIndicator, IntptrTy), nullptr); + ConstantExpr::getPointerCast(ODRIndicator, IntptrTy)); if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true; @@ -1911,9 +2030,16 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { Initializers[i] = Initializer; } - if (TargetTriple.isOSBinFormatCOFF()) { + std::string ELFUniqueModuleId = + (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M) + : ""; + + if (!ELFUniqueModuleId.empty()) { + InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId); + *CtorComdat = true; + } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers); - } else if (ShouldUseMachOGlobalsSection()) { + } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) { InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); } else { InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers); @@ -1938,17 +2064,36 @@ bool AddressSanitizerModule::runOnModule(Module &M) { if (CompileKernel) return false; - Function *AsanCtorFunction; + // Create a module constructor. A destructor is created lazily because not all + // platforms, and not all modules need it. std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + bool CtorComdat = true; bool Changed = false; // TODO(glider): temporarily disabled globals instrumentation for KASan. if (ClGlobals) { IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); - Changed |= InstrumentGlobals(IRB, M); + Changed |= InstrumentGlobals(IRB, M, &CtorComdat); + } + + // Put the constructor and destructor in comdat if both + // (1) global instrumentation is not TU-specific + // (2) target is ELF. + if (UseCtorComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { + AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, + AsanCtorFunction); + if (AsanDtorFunction) { + AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority, + AsanDtorFunction); + } + } else { + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); + if (AsanDtorFunction) + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); } return Changed; @@ -2586,7 +2731,7 @@ void FunctionStackPoisoner::processStaticAllocas() { Value *NewAllocaPtr = IRB.CreateIntToPtr( IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); - replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/true); + replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref); AI->replaceAllUsesWith(NewAllocaPtr); } diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index d4c8369fa9d3bf309bf923add5de693ab7548e8d..a193efe902cf5e1bb462db85b9b715ca500c3078 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetFolder.h" @@ -25,6 +24,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" using namespace llvm; #define DEBUG_TYPE "bounds-checking" diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 7ff69b9eb7f4277364f7e38de091c010e6784710..f2806e278e6e19d18df257bab9d3e59b739ea504 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMInstrumentation Instrumentation.cpp InstrProfiling.cpp PGOInstrumentation.cpp + PGOMemOPSizeOpt.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp EfficiencySanitizer.cpp diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 4e454f0c95b6598b69921a79b1f25ba592988216..a33490f6e4acf5141935d120cf13737cc19d42f9 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -44,15 +44,14 @@ /// For more information, please refer to the design document: /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" @@ -63,6 +62,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/SpecialCaseList.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -254,7 +254,7 @@ class DataFlowSanitizer : public ModulePass { MDNode *ColdCallWeights; DFSanABIList ABIList; DenseMap UnwrappedFnMap; - AttributeList ReadOnlyNoneAttrs; + AttrBuilder ReadOnlyNoneAttrs; bool DFSanRuntimeShadowMask; Value *getShadowAddress(Value *Addr, Instruction *Pos); @@ -388,7 +388,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { ArgTypes.push_back(ShadowPtrTy); Type *RetType = T->getReturnType(); if (!RetType->isVoidTy()) - RetType = StructType::get(RetType, ShadowTy, (Type *)nullptr); + RetType = StructType::get(RetType, ShadowTy); return FunctionType::get(RetType, ArgTypes, T->isVarArg()); } @@ -476,16 +476,14 @@ bool DataFlowSanitizer::doInitialization(Module &M) { GetArgTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ArgTLSTy), - (Type *)nullptr))); + FunctionType::get(PointerType::getUnqual(ArgTLSTy), false))); } if (GetRetvalTLSPtr) { RetvalTLS = nullptr; GetRetvalTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ShadowTy), - (Type *)nullptr))); + FunctionType::get(PointerType::getUnqual(ShadowTy), false))); } ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); @@ -544,16 +542,12 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, NewF->copyAttributesFrom(F); NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - F->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); if (F->isVarArg()) { - NewF->removeAttributes( - AttributeList::FunctionIndex, - AttributeList().addAttribute(*Ctx, AttributeList::FunctionIndex, - "split-stack")); + NewF->removeAttributes(AttributeList::FunctionIndex, + AttrBuilder().addAttribute("split-stack")); CallInst::Create(DFSanVarargWrapperFn, IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", BB); @@ -629,16 +623,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) { F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy); if (Function *F = dyn_cast(DFSanCheckedUnionFn)) { F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - F->addAttribute(1, Attribute::ZExt); - F->addAttribute(2, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); + F->addParamAttr(1, Attribute::ZExt); } DFSanUnionLoadFn = Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy); @@ -652,7 +646,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanSetLabelFn = Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy); if (Function *F = dyn_cast(DFSanSetLabelFn)) { - F->addAttribute(1, Attribute::ZExt); + F->addParamAttr(0, Attribute::ZExt); } DFSanNonzeroLabelFn = Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); @@ -698,9 +692,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { } } - AttrBuilder B; - B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); - ReadOnlyNoneAttrs = AttributeList::get(*Ctx, AttributeList::FunctionIndex, B); + ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) + .addAttribute(Attribute::ReadNone); // First, change the ABI of every function in the module. ABI-listed // functions keep their original ABI and get a wrapper function. @@ -722,9 +715,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { NewF->copyAttributesFrom(&F); NewF->removeAttributes( AttributeList::ReturnIndex, - AttributeList::get( - NewF->getContext(), AttributeList::ReturnIndex, - AttributeFuncs::typeIncompatible(NewFT->getReturnType()))); + AttributeFuncs::typeIncompatible(NewFT->getReturnType())); for (Function::arg_iterator FArg = F.arg_begin(), NewFArg = NewF->arg_begin(), FArgEnd = F.arg_end(); @@ -989,8 +980,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { if (AvoidNewBlocks) { CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2}); Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); CCS.Block = Pos->getParent(); CCS.Shadow = Call; @@ -1002,8 +993,8 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) { IRBuilder<> ThenIRB(BI); CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2}); Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt); - Call->addAttribute(1, Attribute::ZExt); - Call->addAttribute(2, Attribute::ZExt); + Call->addParamAttr(0, Attribute::ZExt); + Call->addParamAttr(1, Attribute::ZExt); BasicBlock *Tail = BI->getSuccessor(0); PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front()); diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp index 7dea1dee756acd6747630c1d307d2dbdf3f9cfd2..6864d295525c3e3c70316c0125044f4400981b5a 100644 --- a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp +++ b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -18,7 +18,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -32,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -398,8 +398,8 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( // u64 *ArrayCounter; // }; auto *StructInfoTy = - StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, - Int8PtrPtrTy, Int64PtrTy, Int64PtrTy, nullptr); + StructType::get(Int8PtrTy, Int32Ty, Int32Ty, Int32PtrTy, Int32PtrTy, + Int8PtrPtrTy, Int64PtrTy, Int64PtrTy); auto *StructInfoPtrTy = StructInfoTy->getPointerTo(); // This structure should be kept consistent with the CacheFragInfo struct // in the runtime library. @@ -408,8 +408,7 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( // u32 NumStructs; // StructInfo *Structs; // }; - auto *CacheFragInfoTy = - StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy, nullptr); + auto *CacheFragInfoTy = StructType::get(Int8PtrTy, Int32Ty, StructInfoPtrTy); std::vector Vec = M.getIdentifiedStructTypes(); unsigned NumStructs = 0; @@ -457,24 +456,23 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( ArrayCounterIdx[0] = ConstantInt::get(Int32Ty, 0); ArrayCounterIdx[1] = ConstantInt::get(Int32Ty, getArrayCounterIdx(StructTy)); - Initializers.push_back( - ConstantStruct::get( - StructInfoTy, - ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), - ConstantInt::get(Int32Ty, - DL.getStructLayout(StructTy)->getSizeInBytes()), - ConstantInt::get(Int32Ty, StructTy->getNumElements()), - Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) : - ConstantExpr::getPointerCast(Offset, Int32PtrTy), - Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) : - ConstantExpr::getPointerCast(Size, Int32PtrTy), - TypeName == nullptr ? ConstantPointerNull::get(Int8PtrPtrTy) : - ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), - ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, - FieldCounterIdx), - ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, - ArrayCounterIdx), - nullptr)); + Initializers.push_back(ConstantStruct::get( + StructInfoTy, + ConstantExpr::getPointerCast(StructCounterName, Int8PtrTy), + ConstantInt::get(Int32Ty, + DL.getStructLayout(StructTy)->getSizeInBytes()), + ConstantInt::get(Int32Ty, StructTy->getNumElements()), + Offset == nullptr ? ConstantPointerNull::get(Int32PtrTy) + : ConstantExpr::getPointerCast(Offset, Int32PtrTy), + Size == nullptr ? ConstantPointerNull::get(Int32PtrTy) + : ConstantExpr::getPointerCast(Size, Int32PtrTy), + TypeName == nullptr + ? ConstantPointerNull::get(Int8PtrPtrTy) + : ConstantExpr::getPointerCast(TypeName, Int8PtrPtrTy), + ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, + FieldCounterIdx), + ConstantExpr::getGetElementPtr(CounterArrayTy, Counters, + ArrayCounterIdx))); } // Structs. Constant *StructInfo; @@ -491,11 +489,8 @@ GlobalVariable *EfficiencySanitizer::createCacheFragInfoGV( auto *CacheFragInfoGV = new GlobalVariable( M, CacheFragInfoTy, true, GlobalVariable::InternalLinkage, - ConstantStruct::get(CacheFragInfoTy, - UnitName, - ConstantInt::get(Int32Ty, NumStructs), - StructInfo, - nullptr)); + ConstantStruct::get(CacheFragInfoTy, UnitName, + ConstantInt::get(Int32Ty, NumStructs), StructInfo)); return CacheFragInfoGV; } diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index 61d627673c907076dee6d15df4125bb6523332cf..0d308810009d5c33d7d8853ce9016f0c5c3f6bbe 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -56,8 +56,6 @@ using namespace llvm; STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions."); STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites."); -STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); -STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); // Command line option to disable indirect-call promotion with the default as // false. This is for debug purpose. @@ -70,13 +68,13 @@ static cl::opt DisableICP("disable-icp", cl::init(false), cl::Hidden, // For debug use only. static cl::opt ICPCutOff("icp-cutoff", cl::init(0), cl::Hidden, cl::ZeroOrMore, - cl::desc("Max number of promotions for this compilaiton")); + cl::desc("Max number of promotions for this compilation")); // If ICPCSSkip is non zero, the first ICPCSSkip callsites will be skipped. // For debug use only. static cl::opt ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden, cl::ZeroOrMore, - cl::desc("Skip Callsite up to this number for this compilaiton")); + cl::desc("Skip Callsite up to this number for this compilation")); // Set if the pass is called in LTO optimization. The difference for LTO mode // is the pass won't prefix the source module name to the internal linkage @@ -111,44 +109,6 @@ static cl::opt ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden, cl::desc("Dump IR after transformation happens")); -// The minimum call count to optimize memory intrinsic calls. -static cl::opt - MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, - cl::init(1000), - cl::desc("The minimum count to optimize memory " - "intrinsic calls")); - -// Command line option to disable memory intrinsic optimization. The default is -// false. This is for debug purpose. -static cl::opt DisableMemOPOPT("disable-memop-opt", cl::init(false), - cl::Hidden, cl::desc("Disable optimize")); - -// The percent threshold to optimize memory intrinsic calls. -static cl::opt - MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), - cl::Hidden, cl::ZeroOrMore, - cl::desc("The percentage threshold for the " - "memory intrinsic calls optimization")); - -// Maximum number of versions for optimizing memory intrinsic call. -static cl::opt - MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, - cl::ZeroOrMore, - cl::desc("The max version for the optimized memory " - " intrinsic calls")); - -// Scale the counts from the annotation using the BB count value. -static cl::opt - MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, - cl::desc("Scale the memop size counts using the basic " - " block count value")); - -// This option sets the rangge of precise profile memop sizes. -extern cl::opt MemOPSizeRange; - -// This option sets the value that groups large memop sizes -extern cl::opt MemOPSizeLarge; - namespace { class PGOIndirectCallPromotionLegacyPass : public ModulePass { public: @@ -173,24 +133,6 @@ private: // the promoted direct call. bool SamplePGO; }; - -class PGOMemOPSizeOptLegacyPass : public FunctionPass { -public: - static char ID; - - PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { - initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - StringRef getPassName() const override { return "PGOMemOPSize"; } - -private: - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addPreserved(); - } -}; } // end anonymous namespace char PGOIndirectCallPromotionLegacyPass::ID = 0; @@ -204,19 +146,6 @@ ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO); } -char PGOMemOPSizeOptLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", - "Optimize memory intrinsic using its size value profile", - false, false) - -FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { - return new PGOMemOPSizeOptLegacyPass(); -} - namespace { // The class for main data structure to promote indirect calls to conditional // direct calls. @@ -749,277 +678,3 @@ PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, return PreservedAnalyses::none(); } - -namespace { -class MemOPSizeOpt : public InstVisitor { -public: - MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) - : Func(Func), BFI(BFI), Changed(false) { - ValueDataArray = - llvm::make_unique(MemOPMaxVersion + 2); - // Get the MemOPSize range information from option MemOPSizeRange, - getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, - PreciseRangeLast); - } - bool isChanged() const { return Changed; } - void perform() { - WorkList.clear(); - visit(Func); - - for (auto &MI : WorkList) { - ++NumOfPGOMemOPAnnotate; - if (perform(MI)) { - Changed = true; - ++NumOfPGOMemOPOpt; - DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName() - << "is Transformed.\n"); - } - } - } - - void visitMemIntrinsic(MemIntrinsic &MI) { - Value *Length = MI.getLength(); - // Not perform on constant length calls. - if (dyn_cast(Length)) - return; - WorkList.push_back(&MI); - } - -private: - Function &Func; - BlockFrequencyInfo &BFI; - bool Changed; - std::vector WorkList; - // Start of the previse range. - int64_t PreciseRangeStart; - // Last value of the previse range. - int64_t PreciseRangeLast; - // The space to read the profile annotation. - std::unique_ptr ValueDataArray; - bool perform(MemIntrinsic *MI); - - // This kind shows which group the value falls in. For PreciseValue, we have - // the profile count for that value. LargeGroup groups the values that are in - // range [LargeValue, +inf). NonLargeGroup groups the rest of values. - enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; - - MemOPSizeKind getMemOPSizeKind(int64_t Value) const { - if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) - return LargeGroup; - if (Value == PreciseRangeLast + 1) - return NonLargeGroup; - return PreciseValue; - } -}; - -static const char *getMIName(const MemIntrinsic *MI) { - switch (MI->getIntrinsicID()) { - case Intrinsic::memcpy: - return "memcpy"; - case Intrinsic::memmove: - return "memmove"; - case Intrinsic::memset: - return "memset"; - default: - return "unknown"; - } -} - -static bool isProfitable(uint64_t Count, uint64_t TotalCount) { - assert(Count <= TotalCount); - if (Count < MemOPCountThreshold) - return false; - if (Count < TotalCount * MemOPPercentThreshold / 100) - return false; - return true; -} - -static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, - uint64_t Denom) { - if (!MemOPScaleCount) - return Count; - bool Overflowed; - uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); - return ScaleCount / Denom; -} - -bool MemOPSizeOpt::perform(MemIntrinsic *MI) { - assert(MI); - if (MI->getIntrinsicID() == Intrinsic::memmove) - return false; - - uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; - uint64_t TotalCount; - if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, - ValueDataArray.get(), NumVals, TotalCount)) - return false; - - uint64_t ActualCount = TotalCount; - uint64_t SavedTotalCount = TotalCount; - if (MemOPScaleCount) { - auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); - if (!BBEdgeCount) - return false; - ActualCount = *BBEdgeCount; - } - - if (ActualCount < MemOPCountThreshold) - return false; - - ArrayRef VDs(ValueDataArray.get(), NumVals); - TotalCount = ActualCount; - if (MemOPScaleCount) - DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount - << " denominator = " << SavedTotalCount << "\n"); - - // Keeping track of the count of the default case: - uint64_t RemainCount = TotalCount; - SmallVector SizeIds; - SmallVector CaseCounts; - uint64_t MaxCount = 0; - unsigned Version = 0; - // Default case is in the front -- save the slot here. - CaseCounts.push_back(0); - for (auto &VD : VDs) { - int64_t V = VD.Value; - uint64_t C = VD.Count; - if (MemOPScaleCount) - C = getScaledCount(C, ActualCount, SavedTotalCount); - - // Only care precise value here. - if (getMemOPSizeKind(V) != PreciseValue) - continue; - - // ValueCounts are sorted on the count. Break at the first un-profitable - // value. - if (!isProfitable(C, RemainCount)) - break; - - SizeIds.push_back(V); - CaseCounts.push_back(C); - if (C > MaxCount) - MaxCount = C; - - assert(RemainCount >= C); - RemainCount -= C; - - if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) - break; - } - - if (Version == 0) - return false; - - CaseCounts[0] = RemainCount; - if (RemainCount > MaxCount) - MaxCount = RemainCount; - - uint64_t SumForOpt = TotalCount - RemainCount; - DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs " - << TotalCount << "\n"); - DEBUG( - for (auto &VD - : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); - - DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version - << " Versions\n"); - - // mem_op(..., size) - // ==> - // switch (size) { - // case s1: - // mem_op(..., s1); - // goto merge_bb; - // case s2: - // mem_op(..., s2); - // goto merge_bb; - // ... - // default: - // mem_op(..., size); - // goto merge_bb; - // } - // merge_bb: - - BasicBlock *BB = MI->getParent(); - DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); - DEBUG(dbgs() << *BB << "\n"); - - BasicBlock *DefaultBB = SplitBlock(BB, MI); - BasicBlock::iterator It(*MI); - ++It; - assert(It != DefaultBB->end()); - BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); - DefaultBB->setName("MemOP.Default"); - MergeBB->setName("MemOP.Merge"); - - auto &Ctx = Func.getContext(); - IRBuilder<> IRB(BB); - BB->getTerminator()->eraseFromParent(); - Value *SizeVar = MI->getLength(); - SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); - - // Clear the value profile data. - MI->setMetadata(LLVMContext::MD_prof, nullptr); - - DEBUG(dbgs() << "\n\n== Basic Block After==\n"); - - for (uint64_t SizeId : SizeIds) { - ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); - BasicBlock *CaseBB = BasicBlock::Create( - Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); - Instruction *NewInst = MI->clone(); - // Fix the argument. - dyn_cast(NewInst)->setLength(CaseSizeId); - CaseBB->getInstList().push_back(NewInst); - IRBuilder<> IRBCase(CaseBB); - IRBCase.CreateBr(MergeBB); - SI->addCase(CaseSizeId, CaseBB); - DEBUG(dbgs() << *CaseBB << "\n"); - } - setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); - - DEBUG(dbgs() << *BB << "\n"); - DEBUG(dbgs() << *DefaultBB << "\n"); - DEBUG(dbgs() << *MergeBB << "\n"); - - emitOptimizationRemark(Func.getContext(), "memop-opt", Func, - MI->getDebugLoc(), - Twine("optimize ") + getMIName(MI) + " with count " + - Twine(SumForOpt) + " out of " + Twine(TotalCount) + - " for " + Twine(Version) + " versions"); - - return true; -} -} // namespace - -static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { - if (DisableMemOPOPT) - return false; - - if (F.hasFnAttribute(Attribute::OptimizeForSize)) - return false; - MemOPSizeOpt MemOPSizeOpt(F, BFI); - MemOPSizeOpt.perform(); - return MemOPSizeOpt.isChanged(); -} - -bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis().getBFI(); - return PGOMemOPSizeOptImpl(F, BFI); -} - -namespace llvm { -char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; - -PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, - FunctionAnalysisManager &FAM) { - auto &BFI = FAM.getResult(F); - bool Changed = PGOMemOPSizeOptImpl(F, BFI); - if (!Changed) - return PreservedAnalyses::all(); - auto PA = PreservedAnalyses(); - PA.preserve(); - return PA; -} -} // namespace llvm diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 1f8bcb9a330ea1e3f32f4e9142f5e9bf17cc7096..37f88d5f95f18006eaf459e6962247f86f631bee 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -28,10 +28,10 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" @@ -140,30 +140,6 @@ llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) { return new InstrProfilingLegacyPass(Options); } -bool InstrProfiling::isMachO() const { - return Triple(M->getTargetTriple()).isOSBinFormatMachO(); -} - -/// Get the section name for the counter variables. -StringRef InstrProfiling::getCountersSection() const { - return getInstrProfCountersSectionName(isMachO()); -} - -/// Get the section name for the name variables. -StringRef InstrProfiling::getNameSection() const { - return getInstrProfNameSectionName(isMachO()); -} - -/// Get the section name for the profile data variables. -StringRef InstrProfiling::getDataSection() const { - return getInstrProfDataSectionName(isMachO()); -} - -/// Get the section name for the coverage mapping data. -StringRef InstrProfiling::getCoverageSection() const { - return getInstrProfCoverageSectionName(isMachO()); -} - static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { InstrProfIncrementInst *Inc = dyn_cast(Instr); if (Inc) @@ -182,6 +158,7 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) { UsedVars.clear(); getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart, MemOPSizeRangeLast); + TT = Triple(M.getTargetTriple()); // We did not know how many value sites there would be inside // the instrumented function. This is counting the number of instrumented @@ -264,7 +241,7 @@ static Constant *getOrInsertValueProfilingCall(Module &M, if (Function *FunRes = dyn_cast(Res)) { if (auto AK = TLI.getExtAttrForI32Param(false)) - FunRes->addAttribute(3, AK); + FunRes->addParamAttr(2, AK); } return Res; } @@ -315,7 +292,7 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args); } if (auto AK = TLI->getExtAttrForI32Param(false)) - Call->addAttribute(3, AK); + Call->addParamAttr(2, AK); Ind->replaceAllUsesWith(Call); Ind->eraseFromParent(); } @@ -366,14 +343,24 @@ static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) { static inline bool shouldRecordFunctionAddr(Function *F) { // Check the linkage + bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage(); if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && - !F->hasAvailableExternallyLinkage()) + !HasAvailableExternallyLinkage) return true; + + // A function marked 'alwaysinline' with available_externally linkage can't + // have its address taken. Doing so would create an undefined external ref to + // the function, which would fail to link. + if (HasAvailableExternallyLinkage && + F->hasFnAttribute(Attribute::AlwaysInline)) + return false; + // Prohibit function address recording if the function is both internal and // COMDAT. This avoids the profile data variable referencing internal symbols // in COMDAT. if (F->hasLocalLinkage() && F->hasComdat()) return false; + // Check uses of this function for other than direct calls or invokes to it. // Inline virtual functions have linkeOnceODR linkage. When a key method // exists, the vtable will only be emitted in the TU where the key method @@ -442,7 +429,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { Constant::getNullValue(CounterTy), getVarName(Inc, getInstrProfCountersVarPrefix())); CounterPtr->setVisibility(NamePtr->getVisibility()); - CounterPtr->setSection(getCountersSection()); + CounterPtr->setSection( + getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat())); CounterPtr->setAlignment(8); CounterPtr->setComdat(ProfileVarsComdat); @@ -462,7 +450,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { Constant::getNullValue(ValuesTy), getVarName(Inc, getInstrProfValuesVarPrefix())); ValuesVar->setVisibility(NamePtr->getVisibility()); - ValuesVar->setSection(getInstrProfValuesSectionName(isMachO())); + ValuesVar->setSection( + getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); ValuesVar->setAlignment(8); ValuesVar->setComdat(ProfileVarsComdat); ValuesPtrExpr = @@ -495,7 +484,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) { ConstantStruct::get(DataTy, DataVals), getVarName(Inc, getInstrProfDataVarPrefix())); Data->setVisibility(NamePtr->getVisibility()); - Data->setSection(getDataSection()); + Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat())); Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT); Data->setComdat(ProfileVarsComdat); @@ -557,7 +546,8 @@ void InstrProfiling::emitVNodes() { auto *VNodesVar = new GlobalVariable( *M, VNodesTy, false, GlobalValue::PrivateLinkage, Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); - VNodesVar->setSection(getInstrProfVNodesSectionName(isMachO())); + VNodesVar->setSection( + getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat())); UsedVars.push_back(VNodesVar); } @@ -580,7 +570,8 @@ void InstrProfiling::emitNameData() { GlobalValue::PrivateLinkage, NamesVal, getInstrProfNamesVarName()); NamesSize = CompressedNameStr.size(); - NamesVar->setSection(getNameSection()); + NamesVar->setSection( + getInstrProfSectionName(IPSK_name, TT.getObjectFormat())); UsedVars.push_back(NamesVar); for (auto *NamePtr : ReferencedNames) @@ -676,7 +667,6 @@ void InstrProfiling::emitInitialization() { GlobalVariable *ProfileNameVar = new GlobalVariable( *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage, ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)); - Triple TT(M->getTargetTriple()); if (TT.supportsCOMDAT()) { ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); ProfileNameVar->setComdat(M->getOrInsertComdat( diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ddc594bed8a4233deee7dcdd44bff9b18c458093..df4ee9969c02f826be16cce64b0f269b5c38d997 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1576,13 +1576,16 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy, bool Signed = false) { Type *srcTy = V->getType(); + size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy); + size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy); + if (srcSizeInBits > 1 && dstSizeInBits == 1) + return IRB.CreateICmpNE(V, getCleanShadow(V)); + if (dstTy->isIntegerTy() && srcTy->isIntegerTy()) return IRB.CreateIntCast(V, dstTy, Signed); if (dstTy->isVectorTy() && srcTy->isVectorTy() && dstTy->getVectorNumElements() == srcTy->getVectorNumElements()) return IRB.CreateIntCast(V, dstTy, Signed); - size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy); - size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy); Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits)); Value *V2 = IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed); @@ -2084,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor { switch (I.getNumArgOperands()) { case 3: assert(isa(I.getArgOperand(2)) && "Invalid rounding mode"); + LLVM_FALLTHROUGH; case 2: CopyOp = I.getArgOperand(0); ConvertOp = I.getArgOperand(1); @@ -2607,10 +2611,7 @@ struct MemorySanitizerVisitor : public InstVisitor { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - Func->removeAttributes(AttributeList::FunctionIndex, - AttributeList::get(Func->getContext(), - AttributeList::FunctionIndex, - B)); + Func->removeAttributes(AttributeList::FunctionIndex, B); } maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI); @@ -2638,12 +2639,12 @@ struct MemorySanitizerVisitor : public InstVisitor { " Shadow: " << *ArgShadow << "\n"); bool ArgIsInitialized = false; const DataLayout &DL = F.getParent()->getDataLayout(); - if (CS.paramHasAttr(i + 1, Attribute::ByVal)) { + if (CS.paramHasAttr(i, Attribute::ByVal)) { assert(A->getType()->isPointerTy() && "ByVal argument is not a pointer!"); Size = DL.getTypeAllocSize(A->getType()->getPointerElementType()); if (ArgOffset + Size > kParamTLSSize) break; - unsigned ParamAlignment = CS.getParamAlignment(i + 1); + unsigned ParamAlignment = CS.getParamAlignment(i); unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); Store = IRB.CreateMemCpy(ArgShadowBase, getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), @@ -2976,7 +2977,7 @@ struct VarArgAMD64Helper : public VarArgHelper { Value *A = *ArgIt; unsigned ArgNo = CS.getArgumentNo(ArgIt); bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); - bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal); + bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal); if (IsByVal) { // ByVal arguments always go to the overflow area. // Fixed arguments passed through the overflow area will be stepped @@ -3497,12 +3498,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper { Value *A = *ArgIt; unsigned ArgNo = CS.getArgumentNo(ArgIt); bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams(); - bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal); + bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal); if (IsByVal) { assert(A->getType()->isPointerTy()); Type *RealTy = A->getType()->getPointerElementType(); uint64_t ArgSize = DL.getTypeAllocSize(RealTy); - uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1); + uint64_t ArgAlign = CS.getParamAlignment(ArgNo); if (ArgAlign < 8) ArgAlign = 8; VAArgOffset = alignTo(VAArgOffset, ArgAlign); @@ -3659,9 +3660,7 @@ bool MemorySanitizer::runOnFunction(Function &F) { AttrBuilder B; B.addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::ReadNone); - F.removeAttributes( - AttributeList::FunctionIndex, - AttributeList::get(F.getContext(), AttributeList::FunctionIndex, B)); + F.removeAttributes(AttributeList::FunctionIndex, B); return Visitor.runOnFunction(); } diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 990bcec109de7e3768df763fe5ff2270090abab7..b2d95271479c301e924933f301710e4c0405f6e2 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -180,7 +180,15 @@ static cl::opt static cl::opt PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " - "memory instrinsic size profiling.")); + "memory intrinsic size profiling.")); + +// Emit branch probability as optimization remarks. +static cl::opt + EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden, + cl::desc("When this option is on, the annotated " + "branch probability will be emitted as " + " optimization remarks: -Rpass-analysis=" + "pgo-instr-use")); // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -192,6 +200,39 @@ extern cl::opt ViewBlockFreqFuncName; namespace { +// Return a string describing the branch condition that can be +// used in static branch probability heuristics: +std::string getBranchCondString(Instruction *TI) { + BranchInst *BI = dyn_cast(TI); + if (!BI || !BI->isConditional()) + return std::string(); + + Value *Cond = BI->getCondition(); + ICmpInst *CI = dyn_cast(Cond); + if (!CI) + return std::string(); + + std::string result; + raw_string_ostream OS(result); + OS << CmpInst::getPredicateName(CI->getPredicate()) << "_"; + CI->getOperand(0)->getType()->print(OS, true); + + Value *RHS = CI->getOperand(1); + ConstantInt *CV = dyn_cast(RHS); + if (CV) { + if (CV->isZero()) + OS << "_Zero"; + else if (CV->isOne()) + OS << "_One"; + else if (CV->isAllOnesValue()) + OS << "_MinusOne"; + else + OS << "_Const"; + } + OS.flush(); + return result; +} + /// The select instruction visitor plays three roles specified /// by the mode. In \c VM_counting mode, it simply counts the number of /// select instructions. In \c VM_instrument mode, it inserts code to count @@ -1424,6 +1465,29 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts, for (const auto &W : Weights) { dbgs() << W << " "; } dbgs() << "\n";); TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + if (EmitBranchProbability) { + std::string BrCondStr = getBranchCondString(TI); + if (BrCondStr.empty()) + return; + + unsigned WSum = + std::accumulate(Weights.begin(), Weights.end(), 0, + [](unsigned w1, unsigned w2) { return w1 + w2; }); + uint64_t TotalCount = + std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0, + [](uint64_t c1, uint64_t c2) { return c1 + c2; }); + BranchProbability BP(Weights[0], WSum); + std::string BranchProbStr; + raw_string_ostream OS(BranchProbStr); + OS << BP; + OS << " (total count : " << TotalCount << ")"; + OS.flush(); + Function *F = TI->getParent()->getParent(); + emitOptimizationRemarkAnalysis( + F->getContext(), "pgo-use-annot", *F, TI->getDebugLoc(), + Twine(BrCondStr) + + " is true with probability : " + Twine(BranchProbStr)); + } } template <> struct GraphTraits { diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0bc9ddfbe4d338de0480e1e434149600446dbfb0 --- /dev/null +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -0,0 +1,419 @@ +//===-- PGOMemOPSizeOpt.cpp - Optimizations based on value profiling ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the transformation that optimizes memory intrinsics +// such as memcpy using the size value profile. When memory intrinsic size +// value profile metadata is available, a single memory intrinsic is expanded +// to a sequence of guarded specialized versions that are called with the +// hottest size(s), for later expansion into more optimal inline sequences. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/PassSupport.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/PGOInstrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "pgo-memop-opt" + +STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized."); +STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated."); + +// The minimum call count to optimize memory intrinsic calls. +static cl::opt + MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore, + cl::init(1000), + cl::desc("The minimum count to optimize memory " + "intrinsic calls")); + +// Command line option to disable memory intrinsic optimization. The default is +// false. This is for debug purpose. +static cl::opt DisableMemOPOPT("disable-memop-opt", cl::init(false), + cl::Hidden, cl::desc("Disable optimize")); + +// The percent threshold to optimize memory intrinsic calls. +static cl::opt + MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40), + cl::Hidden, cl::ZeroOrMore, + cl::desc("The percentage threshold for the " + "memory intrinsic calls optimization")); + +// Maximum number of versions for optimizing memory intrinsic call. +static cl::opt + MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden, + cl::ZeroOrMore, + cl::desc("The max version for the optimized memory " + " intrinsic calls")); + +// Scale the counts from the annotation using the BB count value. +static cl::opt + MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden, + cl::desc("Scale the memop size counts using the basic " + " block count value")); + +// This option sets the rangge of precise profile memop sizes. +extern cl::opt MemOPSizeRange; + +// This option sets the value that groups large memop sizes +extern cl::opt MemOPSizeLarge; + +namespace { +class PGOMemOPSizeOptLegacyPass : public FunctionPass { +public: + static char ID; + + PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) { + initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "PGOMemOPSize"; } + +private: + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + } +}; +} // end anonymous namespace + +char PGOMemOPSizeOptLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt", + "Optimize memory intrinsic using its size value profile", + false, false) + +FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() { + return new PGOMemOPSizeOptLegacyPass(); +} + +namespace { +class MemOPSizeOpt : public InstVisitor { +public: + MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI) + : Func(Func), BFI(BFI), Changed(false) { + ValueDataArray = + llvm::make_unique(MemOPMaxVersion + 2); + // Get the MemOPSize range information from option MemOPSizeRange, + getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart, + PreciseRangeLast); + } + bool isChanged() const { return Changed; } + void perform() { + WorkList.clear(); + visit(Func); + + for (auto &MI : WorkList) { + ++NumOfPGOMemOPAnnotate; + if (perform(MI)) { + Changed = true; + ++NumOfPGOMemOPOpt; + DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName() + << "is Transformed.\n"); + } + } + } + + void visitMemIntrinsic(MemIntrinsic &MI) { + Value *Length = MI.getLength(); + // Not perform on constant length calls. + if (dyn_cast(Length)) + return; + WorkList.push_back(&MI); + } + +private: + Function &Func; + BlockFrequencyInfo &BFI; + bool Changed; + std::vector WorkList; + // Start of the previse range. + int64_t PreciseRangeStart; + // Last value of the previse range. + int64_t PreciseRangeLast; + // The space to read the profile annotation. + std::unique_ptr ValueDataArray; + bool perform(MemIntrinsic *MI); + + // This kind shows which group the value falls in. For PreciseValue, we have + // the profile count for that value. LargeGroup groups the values that are in + // range [LargeValue, +inf). NonLargeGroup groups the rest of values. + enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup }; + + MemOPSizeKind getMemOPSizeKind(int64_t Value) const { + if (Value == MemOPSizeLarge && MemOPSizeLarge != 0) + return LargeGroup; + if (Value == PreciseRangeLast + 1) + return NonLargeGroup; + return PreciseValue; + } +}; + +static const char *getMIName(const MemIntrinsic *MI) { + switch (MI->getIntrinsicID()) { + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memset: + return "memset"; + default: + return "unknown"; + } +} + +static bool isProfitable(uint64_t Count, uint64_t TotalCount) { + assert(Count <= TotalCount); + if (Count < MemOPCountThreshold) + return false; + if (Count < TotalCount * MemOPPercentThreshold / 100) + return false; + return true; +} + +static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num, + uint64_t Denom) { + if (!MemOPScaleCount) + return Count; + bool Overflowed; + uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed); + return ScaleCount / Denom; +} + +bool MemOPSizeOpt::perform(MemIntrinsic *MI) { + assert(MI); + if (MI->getIntrinsicID() == Intrinsic::memmove) + return false; + + uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2; + uint64_t TotalCount; + if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions, + ValueDataArray.get(), NumVals, TotalCount)) + return false; + + uint64_t ActualCount = TotalCount; + uint64_t SavedTotalCount = TotalCount; + if (MemOPScaleCount) { + auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent()); + if (!BBEdgeCount) + return false; + ActualCount = *BBEdgeCount; + } + + ArrayRef VDs(ValueDataArray.get(), NumVals); + DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount + << "\n"); + DEBUG( + for (auto &VD + : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; }); + + if (ActualCount < MemOPCountThreshold) + return false; + // Skip if the total value profiled count is 0, in which case we can't + // scale up the counts properly (and there is no profitable transformation). + if (TotalCount == 0) + return false; + + TotalCount = ActualCount; + if (MemOPScaleCount) + DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount + << " denominator = " << SavedTotalCount << "\n"); + + // Keeping track of the count of the default case: + uint64_t RemainCount = TotalCount; + uint64_t SavedRemainCount = SavedTotalCount; + SmallVector SizeIds; + SmallVector CaseCounts; + uint64_t MaxCount = 0; + unsigned Version = 0; + // Default case is in the front -- save the slot here. + CaseCounts.push_back(0); + for (auto &VD : VDs) { + int64_t V = VD.Value; + uint64_t C = VD.Count; + if (MemOPScaleCount) + C = getScaledCount(C, ActualCount, SavedTotalCount); + + // Only care precise value here. + if (getMemOPSizeKind(V) != PreciseValue) + continue; + + // ValueCounts are sorted on the count. Break at the first un-profitable + // value. + if (!isProfitable(C, RemainCount)) + break; + + SizeIds.push_back(V); + CaseCounts.push_back(C); + if (C > MaxCount) + MaxCount = C; + + assert(RemainCount >= C); + RemainCount -= C; + assert(SavedRemainCount >= VD.Count); + SavedRemainCount -= VD.Count; + + if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0) + break; + } + + if (Version == 0) + return false; + + CaseCounts[0] = RemainCount; + if (RemainCount > MaxCount) + MaxCount = RemainCount; + + uint64_t SumForOpt = TotalCount - RemainCount; + + DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version + << " Versions (covering " << SumForOpt << " out of " + << TotalCount << ")\n"); + + // mem_op(..., size) + // ==> + // switch (size) { + // case s1: + // mem_op(..., s1); + // goto merge_bb; + // case s2: + // mem_op(..., s2); + // goto merge_bb; + // ... + // default: + // mem_op(..., size); + // goto merge_bb; + // } + // merge_bb: + + BasicBlock *BB = MI->getParent(); + DEBUG(dbgs() << "\n\n== Basic Block Before ==\n"); + DEBUG(dbgs() << *BB << "\n"); + auto OrigBBFreq = BFI.getBlockFreq(BB); + + BasicBlock *DefaultBB = SplitBlock(BB, MI); + BasicBlock::iterator It(*MI); + ++It; + assert(It != DefaultBB->end()); + BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It)); + MergeBB->setName("MemOP.Merge"); + BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency()); + DefaultBB->setName("MemOP.Default"); + + auto &Ctx = Func.getContext(); + IRBuilder<> IRB(BB); + BB->getTerminator()->eraseFromParent(); + Value *SizeVar = MI->getLength(); + SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size()); + + // Clear the value profile data. + MI->setMetadata(LLVMContext::MD_prof, nullptr); + // If all promoted, we don't need the MD.prof metadata. + if (SavedRemainCount > 0 || Version != NumVals) + // Otherwise we need update with the un-promoted records back. + annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version), + SavedRemainCount, IPVK_MemOPSize, NumVals); + + DEBUG(dbgs() << "\n\n== Basic Block After==\n"); + + for (uint64_t SizeId : SizeIds) { + ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId); + BasicBlock *CaseBB = BasicBlock::Create( + Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB); + Instruction *NewInst = MI->clone(); + // Fix the argument. + dyn_cast(NewInst)->setLength(CaseSizeId); + CaseBB->getInstList().push_back(NewInst); + IRBuilder<> IRBCase(CaseBB); + IRBCase.CreateBr(MergeBB); + SI->addCase(CaseSizeId, CaseBB); + DEBUG(dbgs() << *CaseBB << "\n"); + } + setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount); + + DEBUG(dbgs() << *BB << "\n"); + DEBUG(dbgs() << *DefaultBB << "\n"); + DEBUG(dbgs() << *MergeBB << "\n"); + + emitOptimizationRemark(Func.getContext(), "memop-opt", Func, + MI->getDebugLoc(), + Twine("optimize ") + getMIName(MI) + " with count " + + Twine(SumForOpt) + " out of " + Twine(TotalCount) + + " for " + Twine(Version) + " versions"); + + return true; +} +} // namespace + +static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) { + if (DisableMemOPOPT) + return false; + + if (F.hasFnAttribute(Attribute::OptimizeForSize)) + return false; + MemOPSizeOpt MemOPSizeOpt(F, BFI); + MemOPSizeOpt.perform(); + return MemOPSizeOpt.isChanged(); +} + +bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) { + BlockFrequencyInfo &BFI = + getAnalysis().getBFI(); + return PGOMemOPSizeOptImpl(F, BFI); +} + +namespace llvm { +char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID; + +PreservedAnalyses PGOMemOPSizeOpt::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &BFI = FAM.getResult(F); + bool Changed = PGOMemOPSizeOptImpl(F, BFI); + if (!Changed) + return PreservedAnalyses::all(); + auto PA = PreservedAnalyses(); + PA.preserve(); + return PA; +} +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index fa0c7cc5a4c53710a15cbea956b06de98e184ba9..e3c36c98ab0db4fd5d0fc115aa596211375d0e3e 100644 --- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -7,24 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Coverage instrumentation that works with AddressSanitizer -// and potentially with other Sanitizers. -// -// We create a Guard variable with the same linkage -// as the function and inject this code into the entry block (SCK_Function) -// or all blocks (SCK_BB): -// if (Guard < 0) { -// __sanitizer_cov(&Guard); -// } -// The accesses to Guard are atomic. The rest of the logic is -// in __sanitizer_cov (it's fine to call it more than once). -// -// With SCK_Edge we also split critical edges this effectively -// instrumenting all edges. -// -// This coverage implementation provides very limited data: -// it only tells if a given function (block) was ever executed. No counters. -// But for many use cases this is what we need and the added slowdown small. +// Coverage instrumentation done on LLVM IR level, works with Sanitizers. // //===----------------------------------------------------------------------===// @@ -56,16 +39,8 @@ using namespace llvm; #define DEBUG_TYPE "sancov" -static const char *const SanCovModuleInitName = "__sanitizer_cov_module_init"; -static const char *const SanCovName = "__sanitizer_cov"; -static const char *const SanCovWithCheckName = "__sanitizer_cov_with_check"; -static const char *const SanCovIndirCallName = "__sanitizer_cov_indir_call16"; static const char *const SanCovTracePCIndirName = "__sanitizer_cov_trace_pc_indir"; -static const char *const SanCovTraceEnterName = - "__sanitizer_cov_trace_func_enter"; -static const char *const SanCovTraceBBName = - "__sanitizer_cov_trace_basic_block"; static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1"; static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2"; @@ -82,34 +57,30 @@ static const char *const SanCovTracePCGuardName = "__sanitizer_cov_trace_pc_guard"; static const char *const SanCovTracePCGuardInitName = "__sanitizer_cov_trace_pc_guard_init"; +static const char *const SanCov8bitCountersInitName = + "__sanitizer_cov_8bit_counters_init"; + +static const char *const SanCovGuardsSectionName = "sancov_guards"; +static const char *const SanCovCountersSectionName = "sancov_cntrs"; static cl::opt ClCoverageLevel( "sanitizer-coverage-level", cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " - "3: all blocks and critical edges, " - "4: above plus indirect calls"), - cl::Hidden, cl::init(0)); - -static cl::opt ClCoverageBlockThreshold( - "sanitizer-coverage-block-threshold", - cl::desc("Use a callback with a guard check inside it if there are" - " more than this number of blocks."), + "3: all blocks and critical edges"), cl::Hidden, cl::init(0)); -static cl::opt - ClExperimentalTracing("sanitizer-coverage-experimental-tracing", - cl::desc("Experimental basic-block tracing: insert " - "callbacks at every basic block"), - cl::Hidden, cl::init(false)); - -static cl::opt ClExperimentalTracePC("sanitizer-coverage-trace-pc", - cl::desc("Experimental pc tracing"), - cl::Hidden, cl::init(false)); +static cl::opt ClTracePC("sanitizer-coverage-trace-pc", + cl::desc("Experimental pc tracing"), cl::Hidden, + cl::init(false)); static cl::opt ClTracePCGuard("sanitizer-coverage-trace-pc-guard", cl::desc("pc tracing with a guard"), cl::Hidden, cl::init(false)); +static cl::opt ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters", + cl::desc("increments 8-bit counter for every edge"), + cl::Hidden, cl::init(false)); + static cl::opt ClCMPTracing("sanitizer-coverage-trace-compares", cl::desc("Tracing of CMP and similar instructions"), @@ -128,16 +99,6 @@ static cl::opt cl::desc("Reduce the number of instrumented blocks"), cl::Hidden, cl::init(true)); -// Experimental 8-bit counters used as an additional search heuristic during -// coverage-guided fuzzing. -// The counters are not thread-friendly: -// - contention on these counters may cause significant slowdown; -// - the counter updates are racy and the results may be inaccurate. -// They are also inaccurate due to 8-bit integer overflow. -static cl::opt ClUse8bitCounters("sanitizer-coverage-8bit-counters", - cl::desc("Experimental 8-bit counters"), - cl::Hidden, cl::init(false)); - namespace { SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { @@ -168,13 +129,15 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel); Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType); Options.IndirectCalls |= CLOpts.IndirectCalls; - Options.TraceBB |= ClExperimentalTracing; Options.TraceCmp |= ClCMPTracing; Options.TraceDiv |= ClDIVTracing; Options.TraceGep |= ClGEPTracing; - Options.Use8bitCounters |= ClUse8bitCounters; - Options.TracePC |= ClExperimentalTracePC; + Options.TracePC |= ClTracePC; Options.TracePCGuard |= ClTracePCGuard; + Options.Inline8bitCounters |= ClInline8bitCounters; + if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters) + Options.TracePCGuard = true; // TracePCGuard is default. + Options.NoPrune |= !ClPruneBlocks; return Options; } @@ -206,43 +169,71 @@ private: void InjectTraceForSwitch(Function &F, ArrayRef SwitchTraceTargets); bool InjectCoverage(Function &F, ArrayRef AllBlocks); - void CreateFunctionGuardArray(size_t NumGuards, Function &F); - void SetNoSanitizeMetadata(Instruction *I); - void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, - bool UseCalls); - unsigned NumberOfInstrumentedBlocks() { - return SanCovFunction->getNumUses() + - SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() + - SanCovTraceEnter->getNumUses(); + GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, + Function &F, Type *Ty, + const char *Section); + void CreateFunctionLocalArrays(size_t NumGuards, Function &F); + void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx); + void CreateInitCallForSection(Module &M, const char *InitFunctionName, + Type *Ty, const std::string &Section); + + void SetNoSanitizeMetadata(Instruction *I) { + I->setMetadata(I->getModule()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); } - StringRef getSanCovTracePCGuardSection() const; - StringRef getSanCovTracePCGuardSectionStart() const; - StringRef getSanCovTracePCGuardSectionEnd() const; - Function *SanCovFunction; - Function *SanCovWithCheckFunction; - Function *SanCovIndirCallFunction, *SanCovTracePCIndir; - Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC, *SanCovTracePCGuard; + + std::string getSectionName(const std::string &Section) const; + std::string getSectionStart(const std::string &Section) const; + std::string getSectionEnd(const std::string &Section) const; + Function *SanCovTracePCIndir; + Function *SanCovTracePC, *SanCovTracePCGuard; Function *SanCovTraceCmpFunction[4]; Function *SanCovTraceDivFunction[2]; Function *SanCovTraceGepFunction; Function *SanCovTraceSwitchFunction; InlineAsm *EmptyAsm; - Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy; + Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, + *Int8Ty, *Int8PtrTy; Module *CurModule; Triple TargetTriple; LLVMContext *C; const DataLayout *DL; - GlobalVariable *GuardArray; GlobalVariable *FunctionGuardArray; // for trace-pc-guard. - GlobalVariable *EightBitCounterArray; - bool HasSancovGuardsSection; + GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters. SanitizerCoverageOptions Options; }; } // namespace +void SanitizerCoverageModule::CreateInitCallForSection( + Module &M, const char *InitFunctionName, Type *Ty, + const std::string &Section) { + IRBuilder<> IRB(M.getContext()); + Function *CtorFunc; + GlobalVariable *SecStart = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, + getSectionStart(Section)); + SecStart->setVisibility(GlobalValue::HiddenVisibility); + GlobalVariable *SecEnd = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, + nullptr, getSectionEnd(Section)); + SecEnd->setVisibility(GlobalValue::HiddenVisibility); + + std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( + M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty}, + {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)}); + + if (TargetTriple.supportsCOMDAT()) { + // Use comdat to dedup CtorFunc. + CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); + } else { + appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); + } +} + bool SanitizerCoverageModule::runOnModule(Module &M) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; @@ -250,26 +241,21 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { DL = &M.getDataLayout(); CurModule = &M; TargetTriple = Triple(M.getTargetTriple()); - HasSancovGuardsSection = false; + FunctionGuardArray = nullptr; + Function8bitCounterArray = nullptr; IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); IntptrPtrTy = PointerType::getUnqual(IntptrTy); Type *VoidTy = Type::getVoidTy(*C); IRBuilder<> IRB(*C); - Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); Int64Ty = IRB.getInt64Ty(); Int32Ty = IRB.getInt32Ty(); + Int8Ty = IRB.getInt8Ty(); - SanCovFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy)); - SanCovWithCheckFunction = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy)); SanCovTracePCIndir = checkSanitizerInterfaceFunction( M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy)); - SanCovIndirCallFunction = - checkSanitizerInterfaceFunction(M.getOrInsertFunction( - SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy)); SanCovTraceCmpFunction[0] = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty())); @@ -305,105 +291,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) { M.getOrInsertFunction(SanCovTracePCName, VoidTy)); SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction( SanCovTracePCGuardName, VoidTy, Int32PtrTy)); - SanCovTraceEnter = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy)); - SanCovTraceBB = checkSanitizerInterfaceFunction( - M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy)); - - // At this point we create a dummy array of guards because we don't - // know how many elements we will need. - Type *Int32Ty = IRB.getInt32Ty(); - Type *Int8Ty = IRB.getInt8Ty(); - - if (!Options.TracePCGuard) - GuardArray = - new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); - if (Options.Use8bitCounters) - EightBitCounterArray = - new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage, - nullptr, "__sancov_gen_cov_tmp"); for (auto &F : M) runOnFunction(F); - auto N = NumberOfInstrumentedBlocks(); - - GlobalVariable *RealGuardArray = nullptr; - if (!Options.TracePCGuard) { - // Now we know how many elements we need. Create an array of guards - // with one extra element at the beginning for the size. - Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1); - RealGuardArray = new GlobalVariable( - M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov"); - - // Replace the dummy array with the real one. - GuardArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealGuardArray, Int32PtrTy)); - GuardArray->eraseFromParent(); - } - - GlobalVariable *RealEightBitCounterArray; - if (Options.Use8bitCounters) { - // Make sure the array is 16-aligned. - static const int CounterAlignment = 16; - Type *Int8ArrayNTy = ArrayType::get(Int8Ty, alignTo(N, CounterAlignment)); - RealEightBitCounterArray = new GlobalVariable( - M, Int8ArrayNTy, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Int8ArrayNTy), "__sancov_gen_cov_counter"); - RealEightBitCounterArray->setAlignment(CounterAlignment); - EightBitCounterArray->replaceAllUsesWith( - IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy)); - EightBitCounterArray->eraseFromParent(); - } - - // Create variable for module (compilation unit) name - Constant *ModNameStrConst = - ConstantDataArray::getString(M.getContext(), M.getName(), true); - GlobalVariable *ModuleName = new GlobalVariable( - M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage, - ModNameStrConst, "__sancov_gen_modname"); - if (Options.TracePCGuard) { - if (HasSancovGuardsSection) { - Function *CtorFunc; - GlobalVariable *SecStart = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionStart()); - SecStart->setVisibility(GlobalValue::HiddenVisibility); - GlobalVariable *SecEnd = new GlobalVariable( - M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr, - getSanCovTracePCGuardSectionEnd()); - SecEnd->setVisibility(GlobalValue::HiddenVisibility); - - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovTracePCGuardInitName, - {Int32PtrTy, Int32PtrTy}, - {IRB.CreatePointerCast(SecStart, Int32PtrTy), - IRB.CreatePointerCast(SecEnd, Int32PtrTy)}); - - if (TargetTriple.supportsCOMDAT()) { - // Use comdat to dedup CtorFunc. - CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName)); - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc); - } else { - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } - } - } else if (!Options.TracePC) { - Function *CtorFunc; - std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions( - M, SanCovModuleCtorName, SanCovModuleInitName, - {Int32PtrTy, IntptrTy, Int8PtrTy, Int8PtrTy}, - {IRB.CreatePointerCast(RealGuardArray, Int32PtrTy), - ConstantInt::get(IntptrTy, N), - Options.Use8bitCounters - ? IRB.CreatePointerCast(RealEightBitCounterArray, Int8PtrTy) - : Constant::getNullValue(Int8PtrTy), - IRB.CreatePointerCast(ModuleName, Int8PtrTy)}); - - appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority); - } + if (FunctionGuardArray) + CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy, + SanCovGuardsSectionName); + if (Function8bitCounterArray) + CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy, + SanCovCountersSectionName); return true; } @@ -435,8 +332,10 @@ static bool isFullPostDominator(const BasicBlock *BB, return true; } -static bool shouldInstrumentBlock(const Function& F, const BasicBlock *BB, const DominatorTree *DT, - const PostDominatorTree *PDT) { +static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, + const DominatorTree *DT, + const PostDominatorTree *PDT, + const SanitizerCoverageOptions &Options) { // Don't insert coverage for unreachable blocks: we will never call // __sanitizer_cov() for them, so counting them in // NumberOfInstrumentedBlocks() might complicate calculation of code coverage @@ -450,10 +349,13 @@ static bool shouldInstrumentBlock(const Function& F, const BasicBlock *BB, const if (BB->getFirstInsertionPt() == BB->end()) return false; - if (!ClPruneBlocks || &F.getEntryBlock() == BB) + if (Options.NoPrune || &F.getEntryBlock() == BB) return true; - return !(isFullDominator(BB, DT) || isFullPostDominator(BB, PDT)); + // Do not instrument full dominators, or full post-dominators with multiple + // predecessors. + return !isFullDominator(BB, DT) + && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor()); } bool SanitizerCoverageModule::runOnFunction(Function &F) { @@ -489,7 +391,7 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { &getAnalysis(F).getPostDomTree(); for (auto &BB : F) { - if (shouldInstrumentBlock(F, &BB, DT, PDT)) + if (shouldInstrumentBlock(F, &BB, DT, PDT, Options)) BlocksToInstrument.push_back(&BB); for (auto &Inst : BB) { if (Options.IndirectCalls) { @@ -522,17 +424,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) { InjectTraceForGep(F, GepTraceTargets); return true; } -void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards, - Function &F) { - if (!Options.TracePCGuard) return; - HasSancovGuardsSection = true; - ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards); - FunctionGuardArray = new GlobalVariable( - *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage, - Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_"); + +GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( + size_t NumElements, Function &F, Type *Ty, const char *Section) { + ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); + auto Array = new GlobalVariable( + *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, + Constant::getNullValue(ArrayTy), "__sancov_gen_"); if (auto Comdat = F.getComdat()) - FunctionGuardArray->setComdat(Comdat); - FunctionGuardArray->setSection(getSanCovTracePCGuardSection()); + Array->setComdat(Comdat); + Array->setSection(getSectionName(Section)); + return Array; +} +void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards, + Function &F) { + if (Options.TracePCGuard) + FunctionGuardArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int32Ty, SanCovGuardsSectionName); + if (Options.Inline8bitCounters) + Function8bitCounterArray = CreateFunctionLocalArrayInSection( + NumGuards, F, Int8Ty, SanCovCountersSectionName); } bool SanitizerCoverageModule::InjectCoverage(Function &F, @@ -542,14 +453,13 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F, case SanitizerCoverageOptions::SCK_None: return false; case SanitizerCoverageOptions::SCK_Function: - CreateFunctionGuardArray(1, F); - InjectCoverageAtBlock(F, F.getEntryBlock(), 0, false); + CreateFunctionLocalArrays(1, F); + InjectCoverageAtBlock(F, F.getEntryBlock(), 0); return true; default: { - bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size(); - CreateFunctionGuardArray(AllBlocks.size(), F); + CreateFunctionLocalArrays(AllBlocks.size(), F); for (size_t i = 0, N = AllBlocks.size(); i < N; i++) - InjectCoverageAtBlock(F, *AllBlocks[i], i, UseCalls); + InjectCoverageAtBlock(F, *AllBlocks[i], i); return true; } } @@ -566,26 +476,14 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef IndirCalls) { if (IndirCalls.empty()) return; - const int CacheSize = 16; - const int CacheAlignment = 64; // Align for better performance. - Type *Ty = ArrayType::get(IntptrTy, CacheSize); + assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters); for (auto I : IndirCalls) { IRBuilder<> IRB(I); CallSite CS(I); Value *Callee = CS.getCalledValue(); if (isa(Callee)) continue; - GlobalVariable *CalleeCache = new GlobalVariable( - *F.getParent(), Ty, false, GlobalValue::PrivateLinkage, - Constant::getNullValue(Ty), "__sancov_gen_callee_cache"); - CalleeCache->setAlignment(CacheAlignment); - if (Options.TracePC || Options.TracePCGuard) - IRB.CreateCall(SanCovTracePCIndir, - IRB.CreatePointerCast(Callee, IntptrTy)); - else - IRB.CreateCall(SanCovIndirCallFunction, - {IRB.CreatePointerCast(Callee, IntptrTy), - IRB.CreatePointerCast(CalleeCache, IntptrTy)}); + IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); } } @@ -685,13 +583,8 @@ void SanitizerCoverageModule::InjectTraceForCmp( } } -void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) { - I->setMetadata(I->getModule()->getMDKindID("nosanitize"), - MDNode::get(*C, None)); -} - void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, - size_t Idx, bool UseCalls) { + size_t Idx) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; @@ -711,83 +604,48 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else if (Options.TracePCGuard) { + } + if (Options.TracePCGuard) { auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), Int32PtrTy); - if (!UseCalls) { - auto GuardLoad = IRB.CreateLoad(GuardPtr); - GuardLoad->setAtomic(AtomicOrdering::Monotonic); - GuardLoad->setAlignment(8); - SetNoSanitizeMetadata(GuardLoad); // Don't instrument with e.g. asan. - auto Cmp = IRB.CreateICmpNE( - GuardLoad, Constant::getNullValue(GuardLoad->getType())); - auto Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - } IRB.CreateCall(SanCovTracePCGuard, GuardPtr); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } else { - Value *GuardP = IRB.CreateAdd( - IRB.CreatePointerCast(GuardArray, IntptrTy), - ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); - GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); - if (Options.TraceBB) { - IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); - } else if (UseCalls) { - IRB.CreateCall(SanCovWithCheckFunction, GuardP); - } else { - LoadInst *Load = IRB.CreateLoad(GuardP); - Load->setAtomic(AtomicOrdering::Monotonic); - Load->setAlignment(4); - SetNoSanitizeMetadata(Load); - Value *Cmp = - IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); - Instruction *Ins = SplitBlockAndInsertIfThen( - Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); - IRB.SetInsertPoint(Ins); - IRB.SetCurrentDebugLocation(EntryLoc); - // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. - IRB.CreateCall(SanCovFunction, GuardP); - IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. - } } - - if (Options.Use8bitCounters) { - IRB.SetInsertPoint(&*IP); - Value *P = IRB.CreateAdd( - IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), - ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); - P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); - LoadInst *LI = IRB.CreateLoad(P); - Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); - StoreInst *SI = IRB.CreateStore(Inc, P); - SetNoSanitizeMetadata(LI); - SetNoSanitizeMetadata(SI); + if (Options.Inline8bitCounters) { + auto CounterPtr = IRB.CreateGEP( + Function8bitCounterArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(CounterPtr); + auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1)); + auto Store = IRB.CreateStore(Inc, CounterPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); } } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const { +std::string +SanitizerCoverageModule::getSectionName(const std::string &Section) const { if (TargetTriple.getObjectFormat() == Triple::COFF) return ".SCOV$M"; if (TargetTriple.isOSBinFormatMachO()) - return "__DATA,__sancov_guards"; - return "__sancov_guards"; + return "__DATA,__" + Section; + return "__" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const { +std::string +SanitizerCoverageModule::getSectionStart(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$start$__DATA$__sancov_guards"; - return "__start___sancov_guards"; + return "\1section$start$__DATA$__" + Section; + return "__start___" + Section; } -StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const { +std::string +SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { if (TargetTriple.isOSBinFormatMachO()) - return "\1section$end$__DATA$__sancov_guards"; - return "__stop___sancov_guards"; + return "\1section$end$__DATA$__" + Section; + return "__stop___" + Section; } diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 2ec6d09594dee4207d966858b7c1a3a06d2c5e94..a991792bf5a3983127e849765d0e6f95f9e11824 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,7 +19,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -42,6 +41,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" @@ -272,7 +272,7 @@ static bool isVtableAccess(Instruction *I) { // Do not instrument known races/"benign races" that come from compiler // instrumentatin. The user has no way of suppressing them. -static bool shouldInstrumentReadWriteFromAddress(Value *Addr) { +static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { // Peel off GEPs and BitCasts. Addr = Addr->stripInBoundsOffsets(); @@ -280,8 +280,9 @@ static bool shouldInstrumentReadWriteFromAddress(Value *Addr) { if (GV->hasSection()) { StringRef SectionName = GV->getSection(); // Check if the global is in the PGO counters section. - if (SectionName.endswith(getInstrProfCountersSectionName( - /*AddSegment=*/false))) + auto OF = Triple(M->getTargetTriple()).getObjectFormat(); + if (SectionName.endswith( + getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) return false; } @@ -343,13 +344,13 @@ void ThreadSanitizer::chooseInstructionsToInstrument( for (Instruction *I : reverse(Local)) { if (StoreInst *Store = dyn_cast(I)) { Value *Addr = Store->getPointerOperand(); - if (!shouldInstrumentReadWriteFromAddress(Addr)) + if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) continue; WriteTargets.insert(Addr); } else { LoadInst *Load = cast(I); Value *Addr = Load->getPointerOperand(); - if (!shouldInstrumentReadWriteFromAddress(Addr)) + if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) continue; if (WriteTargets.count(Addr)) { // We will write to this temp, so no reason to analyze the read. diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index c541fa4c8bee7255f760cb2a77523ce7b98cfda9..cb3b5757f8d0c387591cfda4eb9848eeb7b92ac4 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -163,7 +163,7 @@ private: AttributeList Attr = AttributeList().addAttribute( C, AttributeList::FunctionIndex, Attribute::NoUnwind); - Attr = Attr.addAttribute(C, 1, Attribute::NoCapture); + Attr = Attr.addParamAttribute(C, 0, Attribute::NoCapture); FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false); diff --git a/lib/Transforms/ObjCARC/BlotMapVector.h b/lib/Transforms/ObjCARC/BlotMapVector.h index ef075bdccbfed08e695b9887ac99349e6f662b16..9c5cf6f5f5ab0e06a5c9f526519b94d1ac259494 100644 --- a/lib/Transforms/ObjCARC/BlotMapVector.h +++ b/lib/Transforms/ObjCARC/BlotMapVector.h @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include #include +#include namespace llvm { /// \brief An associative container with fast insertion-order (deterministic) diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 9d78e5ae3b9b6cd2217461761f76ef9332663e78..464805051c65fd3e63fa41f48fccdddeffb923ba 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -20,8 +20,8 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "llvm/IR/CFG.h" diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index f02b75f0b45600df64c31d7c7bdba7d291d2469c..cd9b3d96a14f724d28de4b6fd0393523b9dccfcf 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -69,6 +69,19 @@ static inline void EraseInstruction(Instruction *CI) { RecursivelyDeleteTriviallyDeadInstructions(OldArg); } +/// If Inst is a ReturnRV and its operand is a call or invoke, return the +/// operand. Otherwise return null. +static inline const Instruction *getreturnRVOperand(const Instruction &Inst, + ARCInstKind Class) { + if (Class != ARCInstKind::RetainRV) + return nullptr; + + const auto *Opnd = Inst.getOperand(0)->stripPointerCasts(); + if (const auto *C = dyn_cast(Opnd)) + return C; + return dyn_cast(Opnd); +} + } // end namespace objcarc } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index a86eaaec76412ede8872866eb1351d479c650083..e70e7591f6a703b48d55e82542e0b499bb71c5dd 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -26,9 +26,9 @@ // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. -#include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Dominators.h" diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 3c73376c990680db13eceb113ca3797624f7c12d..8f3a33f66c7f5f400a36ddcde575a9f48fc31437 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -24,10 +24,10 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "BlotMapVector.h" #include "DependencyAnalysis.h" +#include "ObjCARC.h" #include "ProvenanceAnalysis.h" #include "PtrState.h" #include "llvm/ADT/DenseMap.h" diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp index 9ffdfb4f7f9c6a1c24f94f0f451464c0dcf3c0a0..62fc52f6d091b984dd2219ac04fa444f70619f59 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp @@ -22,8 +22,8 @@ /// //===----------------------------------------------------------------------===// -#include "ObjCARC.h" #include "ProvenanceAnalysis.h" +#include "ObjCARC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp index c274e8182fb5a0d5137972a390e93a6fbfc5ded0..870a5f600fd84af1742ea50641f2c342502474c9 100644 --- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp +++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp @@ -8,13 +8,13 @@ //===----------------------------------------------------------------------===// #include "ProvenanceAnalysis.h" -#include "llvm/Pass.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/IR/InstIterator.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index a5afc8ad977cb23302a507681b0a0edf340d0c6a..d13e941044f14835bbef5158ac066c88d8569c9c 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -244,6 +244,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, ARCInstKind Class) { + auto SetSeqAndInsertReverseInsertPt = [&](Sequence NewSeq){ + assert(!HasReverseInsertPts()); + SetSeq(NewSeq); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa(Inst)) + InsertReverseInsertPt(&*BB->getFirstInsertionPt()); + else + InsertReverseInsertPt(&*++Inst->getIterator()); + }; + // Check for possible direct uses. switch (GetSeq()) { case S_Release: @@ -251,26 +263,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, if (CanUse(Inst, Ptr, PA, Class)) { DEBUG(dbgs() << " CanUse: Seq: " << GetSeq() << "; " << *Ptr << "\n"); - assert(!HasReverseInsertPts()); - // If this is an invoke instruction, we're scanning it as part of - // one of its successor blocks, since we can't insert code after it - // in its own block, and we don't want to split critical edges. - if (isa(Inst)) - InsertReverseInsertPt(&*BB->getFirstInsertionPt()); - else - InsertReverseInsertPt(&*++Inst->getIterator()); - SetSeq(S_Use); + SetSeqAndInsertReverseInsertPt(S_Use); } else if (Seq == S_Release && IsUser(Class)) { DEBUG(dbgs() << " PreciseReleaseUse: Seq: " << GetSeq() << "; " << *Ptr << "\n"); // Non-movable releases depend on any possible objc pointer use. - SetSeq(S_Stop); - assert(!HasReverseInsertPts()); - // As above; handle invoke specially. - if (isa(Inst)) - InsertReverseInsertPt(&*BB->getFirstInsertionPt()); - else - InsertReverseInsertPt(&*++Inst->getIterator()); + SetSeqAndInsertReverseInsertPt(S_Stop); + } else if (const auto *Call = getreturnRVOperand(*Inst, Class)) { + if (CanUse(Call, Ptr, PA, GetBasicARCInstKind(Call))) { + DEBUG(dbgs() << " ReleaseUse: Seq: " << GetSeq() << "; " + << *Ptr << "\n"); + SetSeqAndInsertReverseInsertPt(S_Stop); + } } break; case S_Stop: @@ -351,8 +355,10 @@ bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA, ARCInstKind Class) { - // Check for possible releases. - if (!CanAlterRefCount(Inst, Ptr, PA, Class)) + // Check for possible releases. Treat clang.arc.use as a releasing instruction + // to prevent sinking a retain past it. + if (!CanAlterRefCount(Inst, Ptr, PA, Class) && + Class != ARCInstKind::IntrinsicUser) return false; DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h index 9749e44822b2d8aeedd10cf8ff86c9fe3824d348..87298fa59bfdc9a4d3bc800e216b94c823b0e5ea 100644 --- a/lib/Transforms/ObjCARC/PtrState.h +++ b/lib/Transforms/ObjCARC/PtrState.h @@ -21,8 +21,8 @@ #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Value.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { namespace objcarc { diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index fd931c521c8f1786297e3015ee9c120f2dfbce94..99480f12da9e3fdb69e67b197cdca8f5c6bc0411 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -19,12 +19,11 @@ #define AA_NAME "alignment-from-assumptions" #define DEBUG_TYPE AA_NAME #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -35,6 +34,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; STATISTIC(NumLoadAlignChanged, diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b323ab3bd443c4b1996cc55806c052e47af2b06a..f5196cc461815240a395d3954f26f3e5eae51ef2 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_library(LLVMScalarOpts GuardWidening.cpp GVN.cpp GVNHoist.cpp + GVNSink.cpp IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp @@ -55,6 +56,7 @@ add_llvm_library(LLVMScalarOpts Scalar.cpp Scalarizer.cpp SeparateConstOffsetFromGEP.cpp + SimpleLoopUnswitch.cpp SimplifyCFGPass.cpp Sink.cpp SpeculativeExecution.cpp diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index ee6333e88716b8920cdb94402b0c8d8d6958c03b..c3810366bf22d376adbb1182020c6468cbd24e40 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -53,6 +53,12 @@ using namespace consthoist; STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); +static cl::opt ConstHoistWithBlockFrequency( + "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to reduce the " + "chance to execute const materialization more frequently than " + "without hoisting.")); + namespace { /// \brief The constant hoisting pass. class ConstantHoistingLegacyPass : public FunctionPass { @@ -68,6 +74,8 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (ConstHoistWithBlockFrequency) + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -82,6 +90,7 @@ private: char ConstantHoistingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist", @@ -99,9 +108,13 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) { DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n"); DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); - bool MadeChange = Impl.runImpl( - Fn, getAnalysis().getTTI(Fn), - getAnalysis().getDomTree(), Fn.getEntryBlock()); + bool MadeChange = + Impl.runImpl(Fn, getAnalysis().getTTI(Fn), + getAnalysis().getDomTree(), + ConstHoistWithBlockFrequency + ? &getAnalysis().getBFI() + : nullptr, + Fn.getEntryBlock()); if (MadeChange) { DEBUG(dbgs() << "********** Function after Constant Hoisting: " @@ -148,33 +161,142 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst, return IDom->getBlock()->getTerminator(); } +/// \brief Given \p BBs as input, find another set of BBs which collectively +/// dominates \p BBs and have the minimal sum of frequencies. Return the BB +/// set found in \p BBs. +static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, + BasicBlock *Entry, + SmallPtrSet &BBs) { + assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); + // Nodes on the current path to the root. + SmallPtrSet Path; + // Candidates includes any block 'BB' in set 'BBs' that is not strictly + // dominated by any other blocks in set 'BBs', and all nodes in the path + // in the dominator tree from Entry to 'BB'. + SmallPtrSet Candidates; + for (auto BB : BBs) { + Path.clear(); + // Walk up the dominator tree until Entry or another BB in BBs + // is reached. Insert the nodes on the way to the Path. + BasicBlock *Node = BB; + // The "Path" is a candidate path to be added into Candidates set. + bool isCandidate = false; + do { + Path.insert(Node); + if (Node == Entry || Candidates.count(Node)) { + isCandidate = true; + break; + } + assert(DT.getNode(Node)->getIDom() && + "Entry doens't dominate current Node"); + Node = DT.getNode(Node)->getIDom()->getBlock(); + } while (!BBs.count(Node)); + + // If isCandidate is false, Node is another Block in BBs dominating + // current 'BB'. Drop the nodes on the Path. + if (!isCandidate) + continue; + + // Add nodes on the Path into Candidates. + Candidates.insert(Path.begin(), Path.end()); + } + + // Sort the nodes in Candidates in top-down order and save the nodes + // in Orders. + unsigned Idx = 0; + SmallVector Orders; + Orders.push_back(Entry); + while (Idx != Orders.size()) { + BasicBlock *Node = Orders[Idx++]; + for (auto ChildDomNode : DT.getNode(Node)->getChildren()) { + if (Candidates.count(ChildDomNode->getBlock())) + Orders.push_back(ChildDomNode->getBlock()); + } + } + + // Visit Orders in bottom-up order. + typedef std::pair, BlockFrequency> + InsertPtsCostPair; + // InsertPtsMap is a map from a BB to the best insertion points for the + // subtree of BB (subtree not including the BB itself). + DenseMap InsertPtsMap; + InsertPtsMap.reserve(Orders.size() + 1); + for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) { + BasicBlock *Node = *RIt; + bool NodeInBBs = BBs.count(Node); + SmallPtrSet &InsertPts = InsertPtsMap[Node].first; + BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second; + + // Return the optimal insert points in BBs. + if (Node == Entry) { + BBs.clear(); + if (InsertPtsFreq > BFI.getBlockFreq(Node)) + BBs.insert(Entry); + else + BBs.insert(InsertPts.begin(), InsertPts.end()); + break; + } + + BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock(); + // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child + // will update its parent's ParentInsertPts and ParentPtsFreq. + SmallPtrSet &ParentInsertPts = InsertPtsMap[Parent].first; + BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; + // Choose to insert in Node or in subtree of Node. + if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) { + ParentInsertPts.insert(Node); + ParentPtsFreq += BFI.getBlockFreq(Node); + } else { + ParentInsertPts.insert(InsertPts.begin(), InsertPts.end()); + ParentPtsFreq += InsertPtsFreq; + } + } +} + /// \brief Find an insertion point that dominates all uses. -Instruction *ConstantHoistingPass::findConstantInsertionPoint( +SmallPtrSet ConstantHoistingPass::findConstantInsertionPoint( const ConstantInfo &ConstInfo) const { assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); // Collect all basic blocks. SmallPtrSet BBs; + SmallPtrSet InsertPts; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); - if (BBs.count(Entry)) - return &Entry->front(); + if (BBs.count(Entry)) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } + + if (BFI) { + findBestInsertionSet(*DT, *BFI, Entry, BBs); + for (auto BB : BBs) { + BasicBlock::iterator InsertPt = BB->begin(); + for (; isa(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + ; + InsertPts.insert(&*InsertPt); + } + return InsertPts; + } while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; BB1 = *BBs.begin(); BB2 = *std::next(BBs.begin()); BB = DT->findNearestCommonDominator(BB1, BB2); - if (BB == Entry) - return &Entry->front(); + if (BB == Entry) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } BBs.erase(BB1); BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); Instruction &FirstInst = (*BBs.begin())->front(); - return findMatInsertPt(&FirstInst); + InsertPts.insert(findMatInsertPt(&FirstInst)); + return InsertPts; } @@ -557,29 +679,54 @@ bool ConstantHoistingPass::emitBaseConstants() { bool MadeChange = false; for (auto const &ConstInfo : ConstantVec) { // Hoist and hide the base constant behind a bitcast. - Instruction *IP = findConstantInsertionPoint(ConstInfo); - IntegerType *Ty = ConstInfo.BaseConstant->getType(); - Instruction *Base = - new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); - DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB " - << IP->getParent()->getName() << '\n' << *Base << '\n'); - NumConstantsHoisted++; + SmallPtrSet IPSet = findConstantInsertionPoint(ConstInfo); + assert(!IPSet.empty() && "IPSet is empty"); + + unsigned UsesNum = 0; + unsigned ReBasesNum = 0; + for (Instruction *IP : IPSet) { + IntegerType *Ty = ConstInfo.BaseConstant->getType(); + Instruction *Base = + new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); + DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant + << ") to BB " << IP->getParent()->getName() << '\n' + << *Base << '\n'); + + // Emit materialization code for all rebased constants. + unsigned Uses = 0; + for (auto const &RCI : ConstInfo.RebasedConstants) { + for (auto const &U : RCI.Uses) { + Uses++; + BasicBlock *OrigMatInsertBB = + findMatInsertPt(U.Inst, U.OpndIdx)->getParent(); + // If Base constant is to be inserted in multiple places, + // generate rebase for U using the Base dominating U. + if (IPSet.size() == 1 || + DT->dominates(Base->getParent(), OrigMatInsertBB)) { + emitBaseConstants(Base, RCI.Offset, U); + ReBasesNum++; + } + } + } + UsesNum = Uses; - // Emit materialization code for all rebased constants. - for (auto const &RCI : ConstInfo.RebasedConstants) { - NumConstantsRebased++; - for (auto const &U : RCI.Uses) - emitBaseConstants(Base, RCI.Offset, U); + // Use the same debug location as the last user of the constant. + assert(!Base->use_empty() && "The use list is empty!?"); + assert(isa(Base->user_back()) && + "All uses should be instructions."); + Base->setDebugLoc(cast(Base->user_back())->getDebugLoc()); } + (void)UsesNum; + (void)ReBasesNum; + // Expect all uses are rebased after rebase is done. + assert(UsesNum == ReBasesNum && "Not all uses are rebased"); + + NumConstantsHoisted++; - // Use the same debug location as the last user of the constant. - assert(!Base->use_empty() && "The use list is empty!?"); - assert(isa(Base->user_back()) && - "All uses should be instructions."); - Base->setDebugLoc(cast(Base->user_back())->getDebugLoc()); + // Base constant is also included in ConstInfo.RebasedConstants, so + // deduct 1 from ConstInfo.RebasedConstants.size(). + NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1; - // Correct for base constant, which we counted above too. - NumConstantsRebased--; MadeChange = true; } return MadeChange; @@ -595,9 +742,11 @@ void ConstantHoistingPass::deleteDeadCastInst() const { /// \brief Optimize expensive integer constants in the given function. bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, - DominatorTree &DT, BasicBlock &Entry) { + DominatorTree &DT, BlockFrequencyInfo *BFI, + BasicBlock &Entry) { this->TTI = &TTI; this->DT = &DT; + this->BFI = BFI; this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); @@ -628,7 +777,10 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); - if (!runImpl(F, TTI, DT, F.getEntryBlock())) + auto BFI = ConstHoistWithBlockFrequency + ? &AM.getResult(F) + : nullptr; + if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock())) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 9e982194bac7ed874bad047ba8ccbd1f3b0487dc..4fa27891a97432151c1e138fa12a544b34f48c14 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -18,15 +18,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Constant.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ed5ad002f601348b6ff74809b7825c5e8d240085..2a4c9526dfcd934460d21f98cfd83445995a7046 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" @@ -26,6 +26,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -95,7 +96,8 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) { return true; } -static bool processPHI(PHINode *P, LazyValueInfo *LVI) { +static bool processPHI(PHINode *P, LazyValueInfo *LVI, + const SimplifyQuery &SQ) { bool Changed = false; BasicBlock *BB = P->getParent(); @@ -149,9 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI) { Changed = true; } - // FIXME: Provide TLI, DT, AT to SimplifyInstruction. - const DataLayout &DL = BB->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(P, DL)) { + if (Value *V = SimplifyInstruction(P, SQ)) { P->replaceAllUsesWith(V); P->eraseFromParent(); Changed = true; @@ -305,7 +305,7 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) { /// Infer nonnull attributes for the arguments at the specified callsite. static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { - SmallVector Indices; + SmallVector ArgNos; unsigned ArgNo = 0; for (Value *V : CS.args()) { @@ -313,23 +313,24 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { // Try to mark pointer typed parameters as non-null. We skip the // relatively expensive analysis for constants which are obviously either // null or non-null to start with. - if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) && + if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) && !isa(V) && LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) - Indices.push_back(ArgNo + 1); + ArgNos.push_back(ArgNo); ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); - if (Indices.empty()) + if (ArgNos.empty()) return false; AttributeList AS = CS.getAttributes(); LLVMContext &Ctx = CS.getInstruction()->getContext(); - AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull)); + AS = AS.addParamAttribute(Ctx, ArgNos, + Attribute::get(Ctx, Attribute::NonNull)); CS.setAttributes(AS); return true; @@ -442,9 +443,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { bool Changed = false; if (!NUW) { - ConstantRange NUWRange = - LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange, - OBO::NoUnsignedWrap); + ConstantRange NUWRange = ConstantRange::makeGuaranteedNoWrapRegion( + BinaryOperator::Add, LRange, OBO::NoUnsignedWrap); if (!NUWRange.isEmptySet()) { bool NewNUW = NUWRange.contains(LazyRRange()); AddOp->setHasNoUnsignedWrap(NewNUW); @@ -452,9 +452,8 @@ static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { } } if (!NSW) { - ConstantRange NSWRange = - LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange, - OBO::NoSignedWrap); + ConstantRange NSWRange = ConstantRange::makeGuaranteedNoWrapRegion( + BinaryOperator::Add, LRange, OBO::NoSignedWrap); if (!NSWRange.isEmptySet()) { bool NewNSW = NSWRange.contains(LazyRRange()); AddOp->setHasNoSignedWrap(NewNSW); @@ -488,9 +487,8 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) { ConstantInt::getFalse(C->getContext()); } -static bool runImpl(Function &F, LazyValueInfo *LVI) { +static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) { bool FnChanged = false; - // Visiting in a pre-order depth-first traversal causes us to simplify early // blocks before querying later blocks (which require us to analyze early // blocks). Eagerly simplifying shallow blocks means there is strictly less @@ -505,7 +503,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI) { BBChanged |= processSelect(cast(II), LVI); break; case Instruction::PHI: - BBChanged |= processPHI(cast(II), LVI); + BBChanged |= processPHI(cast(II), LVI, SQ); break; case Instruction::ICmp: case Instruction::FCmp: @@ -553,7 +551,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI) { BBChanged = true; } } - }; + } FnChanged |= BBChanged; } @@ -566,14 +564,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { return false; LazyValueInfo *LVI = &getAnalysis().getLVI(); - return runImpl(F, LVI); + return runImpl(F, LVI, getBestSimplifyQuery(*this, F)); } PreservedAnalyses CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) { LazyValueInfo *LVI = &AM.getResult(F); - bool Changed = runImpl(F, LVI); + bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F)); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 07a0ba9b12221c4901af3ae1c41cd597462b2e55..fa4806e884c30efd061dfbfc5d6f7ef26f0e846e 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -19,10 +19,10 @@ #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 04479b6e49ac8513a468c14ba8ededc9712fcbd9..0f92760a874b54b0361abb18c6b6674635d3668e 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -253,6 +254,7 @@ public: const TargetTransformInfo &TTI; DominatorTree &DT; AssumptionCache &AC; + const SimplifyQuery SQ; MemorySSA *MSSA; std::unique_ptr MSSAUpdater; typedef RecyclingAllocator< @@ -315,9 +317,10 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, - DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) - : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), + EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, DominatorTree &DT, + AssumptionCache &AC, MemorySSA *MSSA) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), MSSAUpdater(make_unique(MSSA)), CurrentGeneration(0) { } @@ -504,7 +507,7 @@ private: if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) { // Optimize MemoryPhi nodes that may become redundant by having all the // same input values once MA is removed. - SmallVector PhisToCheck; + SmallSetVector PhisToCheck; SmallVector WorkQueue; WorkQueue.push_back(MA); // Process MemoryPhi nodes in FIFO order using a ever-growing vector since @@ -515,7 +518,7 @@ private: for (auto *U : WI->users()) if (MemoryPhi *MP = dyn_cast(U)) - PhisToCheck.push_back(MP); + PhisToCheck.insert(MP); MSSAUpdater->removeMemoryAccess(WI); @@ -604,7 +607,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (unsigned Count = replaceDominatedUsesWith( CondInst, TorF, DT, BasicBlockEdge(Pred, BB))) { Changed = true; - NumCSECVP = NumCSECVP + Count; + NumCSECVP += Count; } } } @@ -616,8 +619,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { /// stores which can occur in bitfield code among other things. Instruction *LastStore = nullptr; - const DataLayout &DL = BB->getModule()->getDataLayout(); - // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { @@ -635,10 +636,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Skip assume intrinsics, they don't really have side effects (although // they're marked as such to ensure preservation of control dependencies), - // and this pass will not disturb any of the assumption's control - // dependencies. + // and this pass will not bother with its removal. However, we should mark + // its condition as true for all dominated blocks. if (match(Inst, m_Intrinsic())) { - DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); + auto *CondI = + dyn_cast(cast(Inst)->getArgOperand(0)); + if (CondI && SimpleValue::canHandle(CondI)) { + DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n'); + AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } else + DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); continue; } @@ -658,10 +665,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (match(Inst, m_Intrinsic())) { if (auto *CondI = dyn_cast(cast(Inst)->getArgOperand(0))) { - // The condition we're on guarding here is true for all dominated - // locations. - if (SimpleValue::canHandle(CondI)) + if (SimpleValue::canHandle(CondI)) { + // Do we already know the actual value of this condition? + if (auto *KnownCond = AvailableValues.lookup(CondI)) { + // Is the condition known to be true? + if (isa(KnownCond) && + cast(KnownCond)->isOneValue()) { + DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + continue; + } else + // Use the known value if it wasn't true. + cast(Inst)->setArgOperand(0, KnownCond); + } + // The condition we're on guarding here is true for all dominated + // locations. AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } } // Guard intrinsics read all memory, but don't write any memory. @@ -673,7 +695,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { + if (Value *V = SimplifyInstruction(Inst, SQ)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); bool Killed = false; if (!Inst->use_empty()) { @@ -964,7 +986,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F, auto *MSSA = UseMemorySSA ? &AM.getResult(F).getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); if (!CSE.run()) return PreservedAnalyses::all(); @@ -1008,7 +1030,7 @@ public: auto *MSSA = UseMemorySSA ? &getAnalysis().getMSSA() : nullptr; - EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); return CSE.run(); } diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index 185cdbdda37811501f5eb2e0ffaec5876a931011..063df779a30bb9b9f93f6f9d48c20ba444d859b3 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/CFG.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp index 8a5af6195f1b5718ab4eecd2d492d2b048cb32ba..b105ece8dc7c47a3e34b8024057aae2f7b06698b 100644 --- a/lib/Transforms/Scalar/Float2Int.cpp +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -137,13 +137,13 @@ void Float2IntPass::findRoots(Function &F, SmallPtrSet &Roots) { } // Helper - mark I as having been traversed, having range R. -ConstantRange Float2IntPass::seen(Instruction *I, ConstantRange R) { +void Float2IntPass::seen(Instruction *I, ConstantRange R) { DEBUG(dbgs() << "F2I: " << *I << ":" << R << "\n"); - if (SeenInsts.find(I) != SeenInsts.end()) - SeenInsts.find(I)->second = R; + auto IT = SeenInsts.find(I); + if (IT != SeenInsts.end()) + IT->second = std::move(R); else - SeenInsts.insert(std::make_pair(I, R)); - return R; + SeenInsts.insert(std::make_pair(I, std::move(R))); } // Helper - get a range representing a poison value. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index be696df548d52f5681ebe3f0b6844b5f2b1bcb97..0490d93f64553808f7ac922d1be42c5941739c8a 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) { // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. const DataLayout &DL = I->getModule()->getDataLayout(); - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { + if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) { bool Changed = false; if (!I->use_empty()) { I->replaceAllUsesWith(V); @@ -2057,7 +2057,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { // If we failed insertion, make sure we remove the instruction. DEBUG(verifyRemoved(PREInstr)); - delete PREInstr; + PREInstr->deleteValue(); return false; } } diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index 6adfe130d148b6e41ae488cb0f5e41d34dfea3da..29de792bd248cf828f895087d32fc31c1030db4e 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -41,14 +41,15 @@ // ret void //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -1010,6 +1011,7 @@ public: AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } }; } // namespace @@ -1026,6 +1028,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { PreservedAnalyses PA; PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5fd2dfc118b4b44d72ee2cefd092d3468efba16e --- /dev/null +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -0,0 +1,883 @@ +//===- GVNSink.cpp - sink expressions into successors -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file GVNSink.cpp +/// This pass attempts to sink instructions into successors, reducing static +/// instruction count and enabling if-conversion. +/// +/// We use a variant of global value numbering to decide what can be sunk. +/// Consider: +/// +/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] +/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] +/// \ / +/// [ %e = phi i32 %a2, %c2 ] +/// [ add i32 %e, 4 ] +/// +/// +/// GVN would number %a1 and %c1 differently because they compute different +/// results - the VN of an instruction is a function of its opcode and the +/// transitive closure of its operands. This is the key property for hoisting +/// and CSE. +/// +/// What we want when sinking however is for a numbering that is a function of +/// the *uses* of an instruction, which allows us to answer the question "if I +/// replace %a1 with %c1, will it contribute in an equivalent way to all +/// successive instructions?". The PostValueTable class in GVN provides this +/// mapping. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/GVNExpression.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "gvn-sink" + +STATISTIC(NumRemoved, "Number of instructions removed"); + +namespace llvm { +namespace GVNExpression { + +LLVM_DUMP_METHOD void Expression::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +} +} + +namespace { + +static bool isMemoryInst(const Instruction *I) { + return isa(I) || isa(I) || + (isa(I) && !cast(I)->doesNotAccessMemory()) || + (isa(I) && !cast(I)->doesNotAccessMemory()); +} + +/// Iterates through instructions in a set of blocks in reverse order from the +/// first non-terminator. For example (assume all blocks have size n): +/// LockstepReverseIterator I([B1, B2, B3]); +/// *I-- = [B1[n], B2[n], B3[n]]; +/// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; +/// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; +/// ... +/// +/// It continues until all blocks have been exhausted. Use \c getActiveBlocks() +/// to +/// determine which blocks are still going and the order they appear in the +/// list returned by operator*. +class LockstepReverseIterator { + ArrayRef Blocks; + SmallPtrSet ActiveBlocks; + SmallVector Insts; + bool Fail; + +public: + LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + ActiveBlocks.clear(); + for (BasicBlock *BB : Blocks) + ActiveBlocks.insert(BB); + Insts.clear(); + for (BasicBlock *BB : Blocks) { + if (BB->size() <= 1) { + // Block wasn't big enough - only contained a terminator. + ActiveBlocks.erase(BB); + continue; + } + Insts.push_back(BB->getTerminator()->getPrevNode()); + } + if (Insts.empty()) + Fail = true; + } + + bool isValid() const { return !Fail; } + ArrayRef operator*() const { return Insts; } + SmallPtrSet &getActiveBlocks() { return ActiveBlocks; } + + void restrictToBlocks(SmallPtrSetImpl &Blocks) { + for (auto II = Insts.begin(); II != Insts.end();) { + if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == + Blocks.end()) { + ActiveBlocks.erase((*II)->getParent()); + II = Insts.erase(II); + } else { + ++II; + } + } + } + + void operator--() { + if (Fail) + return; + SmallVector NewInsts; + for (auto *Inst : Insts) { + if (Inst == &Inst->getParent()->front()) + ActiveBlocks.erase(Inst->getParent()); + else + NewInsts.push_back(Inst->getPrevNode()); + } + if (NewInsts.empty()) { + Fail = true; + return; + } + Insts = NewInsts; + } +}; + +//===----------------------------------------------------------------------===// + +/// Candidate solution for sinking. There may be different ways to +/// sink instructions, differing in the number of instructions sunk, +/// the number of predecessors sunk from and the number of PHIs +/// required. +struct SinkingInstructionCandidate { + unsigned NumBlocks; + unsigned NumInstructions; + unsigned NumPHIs; + unsigned NumMemoryInsts; + int Cost = -1; + SmallVector Blocks; + + void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { + unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; + unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; + Cost = (NumInstructions * (NumBlocks - 1)) - + (NumExtraPHIs * + NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. + - SplitEdgeCost; + } + bool operator>(const SinkingInstructionCandidate &Other) const { + return Cost > Other.Cost; + } +}; + +#ifndef NDEBUG +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SinkingInstructionCandidate &C) { + OS << ""; + return OS; +} +#endif + +//===----------------------------------------------------------------------===// + +/// Describes a PHI node that may or may not exist. These track the PHIs +/// that must be created if we sunk a sequence of instructions. It provides +/// a hash function for efficient equality comparisons. +class ModelledPHI { + SmallVector Values; + SmallVector Blocks; + +public: + ModelledPHI() {} + ModelledPHI(const PHINode *PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) + Blocks.push_back(PN->getIncomingBlock(I)); + std::sort(Blocks.begin(), Blocks.end()); + + // This assumes the PHI is already well-formed and there aren't conflicting + // incoming values for the same block. + for (auto *B : Blocks) + Values.push_back(PN->getIncomingValueForBlock(B)); + } + /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI + /// without the same ID. + /// \note This is specifically for DenseMapInfo - do not use this! + static ModelledPHI createDummy(size_t ID) { + ModelledPHI M; + M.Values.push_back(reinterpret_cast(ID)); + return M; + } + + /// Create a PHI from an array of incoming values and incoming blocks. + template + ModelledPHI(const VArray &V, const BArray &B) { + std::copy(V.begin(), V.end(), std::back_inserter(Values)); + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + } + + /// Create a PHI from [I[OpNum] for I in Insts]. + template + ModelledPHI(ArrayRef Insts, unsigned OpNum, const BArray &B) { + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + for (auto *I : Insts) + Values.push_back(I->getOperand(OpNum)); + } + + /// Restrict the PHI's contents down to only \c NewBlocks. + /// \c NewBlocks must be a subset of \c this->Blocks. + void restrictToBlocks(const SmallPtrSetImpl &NewBlocks) { + auto BI = Blocks.begin(); + auto VI = Values.begin(); + while (BI != Blocks.end()) { + assert(VI != Values.end()); + if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) == + NewBlocks.end()) { + BI = Blocks.erase(BI); + VI = Values.erase(VI); + } else { + ++BI; + ++VI; + } + } + assert(Blocks.size() == NewBlocks.size()); + } + + ArrayRef getValues() const { return Values; } + + bool areAllIncomingValuesSame() const { + return all_of(Values, [&](Value *V) { return V == Values[0]; }); + } + bool areAllIncomingValuesSameType() const { + return all_of( + Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); + } + bool areAnyIncomingValuesConstant() const { + return any_of(Values, [&](Value *V) { return isa(V); }); + } + // Hash functor + unsigned hash() const { + return (unsigned)hash_combine_range(Values.begin(), Values.end()); + } + bool operator==(const ModelledPHI &Other) const { + return Values == Other.Values && Blocks == Other.Blocks; + } +}; + +template struct DenseMapInfo { + static inline ModelledPHI &getEmptyKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(0); + return Dummy; + } + static inline ModelledPHI &getTombstoneKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(1); + return Dummy; + } + static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } + static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { + return LHS == RHS; + } +}; + +typedef DenseSet> ModelledPHISet; + +//===----------------------------------------------------------------------===// +// ValueTable +//===----------------------------------------------------------------------===// +// This is a value number table where the value number is a function of the +// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know +// that the program would be equivalent if we replaced A with PHI(A, B). +//===----------------------------------------------------------------------===// + +/// A GVN expression describing how an instruction is used. The operands +/// field of BasicExpression is used to store uses, not operands. +/// +/// This class also contains fields for discriminators used when determining +/// equivalence of instructions with sideeffects. +class InstructionUseExpr : public GVNExpression::BasicExpression { + unsigned MemoryUseOrder = -1; + bool Volatile = false; + +public: + InstructionUseExpr(Instruction *I, ArrayRecycler &R, + BumpPtrAllocator &A) + : GVNExpression::BasicExpression(I->getNumUses()) { + allocateOperands(R, A); + setOpcode(I->getOpcode()); + setType(I->getType()); + + for (auto &U : I->uses()) + op_push_back(U.getUser()); + std::sort(op_begin(), op_end()); + } + void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } + void setVolatile(bool V) { Volatile = V; } + + virtual hash_code getHashValue() const { + return hash_combine(GVNExpression::BasicExpression::getHashValue(), + MemoryUseOrder, Volatile); + } + + template hash_code getHashValue(Function MapFn) { + hash_code H = + hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile); + for (auto *V : operands()) + H = hash_combine(H, MapFn(V)); + return H; + } +}; + +class ValueTable { + DenseMap ValueNumbering; + DenseMap ExpressionNumbering; + DenseMap HashNumbering; + BumpPtrAllocator Allocator; + ArrayRecycler Recycler; + uint32_t nextValueNumber; + + /// Create an expression for I based on its opcode and its uses. If I + /// touches or reads memory, the expression is also based upon its memory + /// order - see \c getMemoryUseOrder(). + InstructionUseExpr *createExpr(Instruction *I) { + InstructionUseExpr *E = + new (Allocator) InstructionUseExpr(I, Recycler, Allocator); + if (isMemoryInst(I)) + E->setMemoryUseOrder(getMemoryUseOrder(I)); + + if (CmpInst *C = dyn_cast(I)) { + CmpInst::Predicate Predicate = C->getPredicate(); + E->setOpcode((C->getOpcode() << 8) | Predicate); + } + return E; + } + + /// Helper to compute the value number for a memory instruction + /// (LoadInst/StoreInst), including checking the memory ordering and + /// volatility. + template InstructionUseExpr *createMemoryExpr(Inst *I) { + if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic()) + return nullptr; + InstructionUseExpr *E = createExpr(I); + E->setVolatile(I->isVolatile()); + return E; + } + +public: + /// Returns the value number for the specified value, assigning + /// it a new number if it did not have one before. + uint32_t lookupOrAdd(Value *V) { + auto VI = ValueNumbering.find(V); + if (VI != ValueNumbering.end()) + return VI->second; + + if (!isa(V)) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + Instruction *I = cast(V); + InstructionUseExpr *exp = nullptr; + switch (I->getOpcode()) { + case Instruction::Load: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Store: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Call: + case Instruction::Invoke: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + case Instruction::Select: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::InsertValue: + case Instruction::GetElementPtr: + exp = createExpr(I); + break; + default: + break; + } + + if (!exp) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t e = ExpressionNumbering[exp]; + if (!e) { + hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); }); + auto I = HashNumbering.find(H); + if (I != HashNumbering.end()) { + e = I->second; + } else { + e = nextValueNumber++; + HashNumbering[H] = e; + ExpressionNumbering[exp] = e; + } + } + ValueNumbering[V] = e; + return e; + } + + /// Returns the value number of the specified value. Fails if the value has + /// not yet been numbered. + uint32_t lookup(Value *V) const { + auto VI = ValueNumbering.find(V); + assert(VI != ValueNumbering.end() && "Value not numbered?"); + return VI->second; + } + + /// Removes all value numberings and resets the value table. + void clear() { + ValueNumbering.clear(); + ExpressionNumbering.clear(); + HashNumbering.clear(); + Recycler.clear(Allocator); + nextValueNumber = 1; + } + + ValueTable() : nextValueNumber(1) {} + + /// \c Inst uses or touches memory. Return an ID describing the memory state + /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), + /// the exact same memory operations happen after I1 and I2. + /// + /// This is a very hard problem in general, so we use domain-specific + /// knowledge that we only ever check for equivalence between blocks sharing a + /// single immediate successor that is common, and when determining if I1 == + /// I2 we will have already determined that next(I1) == next(I2). This + /// inductive property allows us to simply return the value number of the next + /// instruction that defines memory. + uint32_t getMemoryUseOrder(Instruction *Inst) { + auto *BB = Inst->getParent(); + for (auto I = std::next(Inst->getIterator()), E = BB->end(); + I != E && !I->isTerminator(); ++I) { + if (!isMemoryInst(&*I)) + continue; + if (isa(&*I)) + continue; + CallInst *CI = dyn_cast(&*I); + if (CI && CI->onlyReadsMemory()) + continue; + InvokeInst *II = dyn_cast(&*I); + if (II && II->onlyReadsMemory()) + continue; + return lookupOrAdd(&*I); + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// + +class GVNSink { +public: + GVNSink() : VN() {} + bool run(Function &F) { + DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); + + unsigned NumSunk = 0; + ReversePostOrderTraversal RPOT(&F); + for (auto *N : RPOT) + NumSunk += sinkBB(N); + + return NumSunk > 0; + } + +private: + ValueTable VN; + + bool isInstructionBlacklisted(Instruction *I) { + // These instructions may change or break semantics if moved. + if (isa(I) || I->isEHPad() || isa(I) || + I->getType()->isTokenTy()) + return true; + return false; + } + + /// The main heuristic function. Analyze the set of instructions pointed to by + /// LRI and return a candidate solution if these instructions can be sunk, or + /// None otherwise. + Optional analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents); + + /// Create a ModelledPHI for each PHI in BB, adding to PHIs. + void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, + SmallPtrSetImpl &PHIContents) { + for (auto &I : *BB) { + auto *PN = dyn_cast(&I); + if (!PN) + return; + + auto MPHI = ModelledPHI(PN); + PHIs.insert(MPHI); + for (auto *V : MPHI.getValues()) + PHIContents.insert(V); + } + } + + /// The main instruction sinking driver. Set up state and try and sink + /// instructions into BBEnd from its predecessors. + unsigned sinkBB(BasicBlock *BBEnd); + + /// Perform the actual mechanics of sinking an instruction from Blocks into + /// BBEnd, which is their only successor. + void sinkLastInstruction(ArrayRef Blocks, BasicBlock *BBEnd); + + /// Remove PHIs that all have the same incoming value. + void foldPointlessPHINodes(BasicBlock *BB) { + auto I = BB->begin(); + while (PHINode *PN = dyn_cast(I++)) { + if (!all_of(PN->incoming_values(), + [&](const Value *V) { return V == PN->getIncomingValue(0); })) + continue; + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } + } +}; + +Optional GVNSink::analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents) { + auto Insts = *LRI; + DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I + : Insts) { + I->dump(); + } dbgs() << " ]\n";); + + DenseMap VNums; + for (auto *I : Insts) { + uint32_t N = VN.lookupOrAdd(I); + DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + if (N == ~0U) + return None; + VNums[N]++; + } + unsigned VNumToSink = + std::max_element(VNums.begin(), VNums.end(), + [](const std::pair &I, + const std::pair &J) { + return I.second < J.second; + }) + ->first; + + if (VNums[VNumToSink] == 1) + // Can't sink anything! + return None; + + // Now restrict the number of incoming blocks down to only those with + // VNumToSink. + auto &ActivePreds = LRI.getActiveBlocks(); + unsigned InitialActivePredSize = ActivePreds.size(); + SmallVector NewInsts; + for (auto *I : Insts) { + if (VN.lookup(I) != VNumToSink) + ActivePreds.erase(I->getParent()); + else + NewInsts.push_back(I); + } + for (auto *I : NewInsts) + if (isInstructionBlacklisted(I)) + return None; + + // If we've restricted the incoming blocks, restrict all needed PHIs also + // to that set. + bool RecomputePHIContents = false; + if (ActivePreds.size() != InitialActivePredSize) { + ModelledPHISet NewNeededPHIs; + for (auto P : NeededPHIs) { + P.restrictToBlocks(ActivePreds); + NewNeededPHIs.insert(P); + } + NeededPHIs = NewNeededPHIs; + LRI.restrictToBlocks(ActivePreds); + RecomputePHIContents = true; + } + + // The sunk instruction's results. + ModelledPHI NewPHI(NewInsts, ActivePreds); + + // Does sinking this instruction render previous PHIs redundant? + if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) { + NeededPHIs.erase(NewPHI); + RecomputePHIContents = true; + } + + if (RecomputePHIContents) { + // The needed PHIs have changed, so recompute the set of all needed + // values. + PHIContents.clear(); + for (auto &PHI : NeededPHIs) + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + // Is this instruction required by a later PHI that doesn't match this PHI? + // if so, we can't sink this instruction. + for (auto *V : NewPHI.getValues()) + if (PHIContents.count(V)) + // V exists in this PHI, but the whole PHI is different to NewPHI + // (else it would have been removed earlier). We cannot continue + // because this isn't representable. + return None; + + // Which operands need PHIs? + // FIXME: If any of these fail, we should partition up the candidates to + // try and continue making progress. + Instruction *I0 = NewInsts[0]; + for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { + ModelledPHI PHI(NewInsts, OpNum, ActivePreds); + if (PHI.areAllIncomingValuesSame()) + continue; + if (!canReplaceOperandWithVariable(I0, OpNum)) + // We can 't create a PHI from this instruction! + return None; + if (NeededPHIs.count(PHI)) + continue; + if (!PHI.areAllIncomingValuesSameType()) + return None; + // Don't create indirect calls! The called value is the final operand. + if ((isa(I0) || isa(I0)) && OpNum == E - 1 && + PHI.areAnyIncomingValuesConstant()) + return None; + + NeededPHIs.reserve(NeededPHIs.size()); + NeededPHIs.insert(PHI); + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + if (isMemoryInst(NewInsts[0])) + ++MemoryInstNum; + + SinkingInstructionCandidate Cand; + Cand.NumInstructions = ++InstNum; + Cand.NumMemoryInsts = MemoryInstNum; + Cand.NumBlocks = ActivePreds.size(); + Cand.NumPHIs = NeededPHIs.size(); + for (auto *C : ActivePreds) + Cand.Blocks.push_back(C); + + return Cand; +} + +unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { + DEBUG(dbgs() << "GVNSink: running on basic block "; + BBEnd->printAsOperand(dbgs()); dbgs() << "\n"); + SmallVector Preds; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa(T) || isa(T)) + Preds.push_back(B); + else + return 0; + } + if (Preds.size() < 2) + return 0; + std::sort(Preds.begin(), Preds.end()); + + unsigned NumOrigPreds = Preds.size(); + // We can only sink instructions through unconditional branches. + for (auto I = Preds.begin(); I != Preds.end();) { + if ((*I)->getTerminator()->getNumSuccessors() != 1) + I = Preds.erase(I); + else + ++I; + } + + LockstepReverseIterator LRI(Preds); + SmallVector Candidates; + unsigned InstNum = 0, MemoryInstNum = 0; + ModelledPHISet NeededPHIs; + SmallPtrSet PHIContents; + analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); + unsigned NumOrigPHIs = NeededPHIs.size(); + + while (LRI.isValid()) { + auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, + NeededPHIs, PHIContents); + if (!Cand) + break; + Cand->calculateCost(NumOrigPHIs, Preds.size()); + Candidates.emplace_back(*Cand); + --LRI; + } + + std::stable_sort( + Candidates.begin(), Candidates.end(), + [](const SinkingInstructionCandidate &A, + const SinkingInstructionCandidate &B) { return A > B; }); + DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C + : Candidates) dbgs() + << " " << C << "\n";); + + // Pick the top candidate, as long it is positive! + if (Candidates.empty() || Candidates.front().Cost <= 0) + return 0; + auto C = Candidates.front(); + + DEBUG(dbgs() << " -- Sinking: " << C << "\n"); + BasicBlock *InsertBB = BBEnd; + if (C.Blocks.size() < NumOrigPreds) { + DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs()); + dbgs() << "\n"); + InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); + if (!InsertBB) { + DEBUG(dbgs() << " -- FAILED to split edge!\n"); + // Edge couldn't be split. + return 0; + } + } + + for (unsigned I = 0; I < C.NumInstructions; ++I) + sinkLastInstruction(C.Blocks, InsertBB); + + return C.NumInstructions; +} + +void GVNSink::sinkLastInstruction(ArrayRef Blocks, + BasicBlock *BBEnd) { + SmallVector Insts; + for (BasicBlock *BB : Blocks) + Insts.push_back(BB->getTerminator()->getPrevNode()); + Instruction *I0 = Insts.front(); + + SmallVector NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + // Update metadata and IR flags. + for (auto *I : Insts) + if (I != I0) { + combineMetadataForCSE(I0, I); + I0->andIRFlags(I); + } + + for (auto *I : Insts) + if (I != I0) + I->replaceAllUsesWith(I0); + foldPointlessPHINodes(BBEnd); + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); + + NumRemoved += Insts.size() - 1; +} + +//////////////////////////////////////////////////////////////////////////////// +// Pass machinery / boilerplate + +class GVNSinkLegacyPass : public FunctionPass { +public: + static char ID; + + GVNSinkLegacyPass() : FunctionPass(ID) { + initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + GVNSink G; + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + } +}; +} // namespace + +PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { + GVNSink G; + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +char GVNSinkLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) + +FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp index 7019287954a15ff7aac442139dc77c834f3cb33c..fb7c6e15758d350a9465056b99ceff3011647d3f 100644 --- a/lib/Transforms/Scalar/GuardWidening.cpp +++ b/lib/Transforms/Scalar/GuardWidening.cpp @@ -40,7 +40,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/GuardWidening.h" -#include "llvm/Pass.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/Analysis/LoopInfo.h" @@ -50,7 +49,9 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -536,10 +537,8 @@ bool GuardWideningImpl::parseRangeChecks( Changed = true; } else if (match(Check.getBase(), m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { - unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(OpLHS, KnownZero, KnownOne, DL); - if ((OpRHS->getValue() & KnownZero) == OpRHS->getValue()) { + KnownBits Known = computeKnownBits(OpLHS, DL); + if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { Check.setBase(OpLHS); APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); Check.setOffset(ConstantInt::get(Ctx, NewOffset)); @@ -612,16 +611,16 @@ bool GuardWideningImpl::combineRangeChecks( // We have a series of f+1 checks as: // // I+k_0 u< L ... Chk_0 - // I_k_1 u< L ... Chk_1 + // I+k_1 u< L ... Chk_1 // ... - // I_k_f u< L ... Chk_(f+1) + // I+k_f u< L ... Chk_f // - // with forall i in [0,f): k_f-k_i u< k_f-k_0 ... Precond_0 + // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 // k_f != k_0 ... Precond_2 // // Claim: - // Chk_0 AND Chk_(f+1) implies all the other checks + // Chk_0 AND Chk_f implies all the other checks // // Informal proof sketch: // diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index dcb2a4a0c6e6bba92857e2850a7bdeddd568395e..10782963177c6eab2037fe3152a147fa25c408a2 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -86,6 +86,10 @@ static cl::opt UsePostIncrementRanges( cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), cl::init(true)); +static cl::opt +DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), + cl::desc("Disable Linear Function Test Replace optimization")); + namespace { struct RewritePhi; @@ -97,7 +101,7 @@ class IndVarSimplify { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; - SmallVector DeadInsts; + SmallVector DeadInsts; bool Changed = false; bool isValidRewrite(Value *FromVal, Value *ToVal); @@ -415,8 +419,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { Compare->getName()); // In the following deletions, PN may become dead and may be deleted. - // Use a WeakVH to observe whether this happens. - WeakVH WeakPH = PN; + // Use a WeakTrackingVH to observe whether this happens. + WeakTrackingVH WeakPH = PN; // Delete the old floating point exit comparison. The branch starts using the // new comparison. @@ -451,7 +455,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { // BasicBlock *Header = L->getHeader(); - SmallVector PHIs; + SmallVector PHIs; for (BasicBlock::iterator I = Header->begin(); PHINode *PN = dyn_cast(I); ++I) PHIs.push_back(PN); @@ -901,7 +905,7 @@ class WidenIV { PHINode *WidePhi; Instruction *WideInc; const SCEV *WideIncExpr; - SmallVectorImpl &DeadInsts; + SmallVectorImpl &DeadInsts; SmallPtrSet Widened; SmallVector NarrowIVUsers; @@ -941,20 +945,13 @@ class WidenIV { } public: - WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, - ScalarEvolution *SEv, DominatorTree *DTree, - SmallVectorImpl &DI, bool HasGuards) : - OrigPhi(WI.NarrowIV), - WideType(WI.WidestNativeType), - LI(LInfo), - L(LI->getLoopFor(OrigPhi->getParent())), - SE(SEv), - DT(DTree), - HasGuards(HasGuards), - WidePhi(nullptr), - WideInc(nullptr), - WideIncExpr(nullptr), - DeadInsts(DI) { + WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, + DominatorTree *DTree, SmallVectorImpl &DI, + bool HasGuards) + : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), + L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), + HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; } @@ -1830,6 +1827,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { // An IV counter must preserve its type. if (IncI->getNumOperands() == 2) break; + LLVM_FALLTHROUGH; default: return nullptr; } @@ -2419,7 +2417,8 @@ bool IndVarSimplify::run(Loop *L) { // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - if (canExpandBackedgeTakenCount(L, SE, Rewriter) && needsLFTR(L, DT)) { + if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) && + needsLFTR(L, DT)) { PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT); if (IndVar) { // Check preconditions for proper SCEVExpander operation. SCEV does not diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 85db6e5e11052ad483026755ef3c44ab260d64be..2f96c3064b8617aece88b90cbf7bb7894930eba9 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -59,8 +59,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; @@ -1228,7 +1228,12 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef BBs) { Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent, ValueToValueMapTy &VM) { - Loop &New = LPM.addLoop(Parent); + Loop &New = *new Loop(); + if (Parent) + Parent->addChildLoop(&New); + else + LI.addTopLevelLoop(&New); + LPM.addLoop(New); // Add all of the blocks in Original to the new loop. for (auto *BB : Original->blocks()) @@ -1366,28 +1371,35 @@ bool LoopConstrainer::run() { DT.recalculate(F); + // We need to first add all the pre and post loop blocks into the loop + // structures (as part of createClonedLoopStructure), and then update the + // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating + // LI when LoopSimplifyForm is generated. + Loop *PreL = nullptr, *PostL = nullptr; if (!PreLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PreL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map); - formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Pre loops are slow paths, we do not need to perform any loop - // optimizations on them. - DisableAllLoopOptsOnLoop(*L); } if (!PostLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PostL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map); + } + + // This function canonicalizes the loop into Loop-Simplify and LCSSA forms. + auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) { formLCSSARecursively(*L, DT, &LI, &SE); simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Post loops are slow paths, we do not need to perform any loop + // Pre/post loops are slow paths, we do not need to perform any loop // optimizations on them. - DisableAllLoopOptsOnLoop(*L); - } - - formLCSSARecursively(OriginalLoop, DT, &LI, &SE); - simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true); + if (!IsOriginalLoop) + DisableAllLoopOptsOnLoop(*L); + }; + if (PreL) + CanonicalizeLoop(PreL, false); + if (PostL) + CanonicalizeLoop(PostL, false); + CanonicalizeLoop(&OriginalLoop, true); return true; } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 5d8701431a2ce7242eeb4877c3995425e41e6623..3c8fbd35bf8c1bfe41f9064670f224c8240c56a7 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -89,7 +89,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" @@ -100,6 +99,7 @@ #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -138,7 +138,7 @@ private: // Tries to infer the specific address space of each address expression in // Postorder. - void inferAddressSpaces(const std::vector &Postorder, + void inferAddressSpaces(ArrayRef Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const; bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; @@ -147,22 +147,22 @@ private: // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. bool - rewriteWithNewAddressSpaces(const std::vector &Postorder, + rewriteWithNewAddressSpaces(ArrayRef Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; void appendsFlatAddressExpressionToPostorderStack( - Value *V, std::vector> *PostorderStack, - DenseSet *Visited) const; + Value *V, std::vector> &PostorderStack, + DenseSet &Visited) const; bool rewriteIntrinsicOperands(IntrinsicInst *II, Value *OldV, Value *NewV) const; void collectRewritableIntrinsicOperands( IntrinsicInst *II, - std::vector> *PostorderStack, - DenseSet *Visited) const; + std::vector> &PostorderStack, + DenseSet &Visited) const; - std::vector collectFlatAddressExpressions(Function &F) const; + std::vector collectFlatAddressExpressions(Function &F) const; Value *cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, @@ -204,7 +204,6 @@ static bool isAddressExpression(const Value &V) { // // Precondition: V is an address expression. static SmallVector getPointerOperands(const Value &V) { - assert(isAddressExpression(V)); const Operator &Op = cast(V); switch (Op.getOpcode()) { case Instruction::PHI: { @@ -254,8 +253,8 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, // TODO: Move logic to TTI? void InferAddressSpaces::collectRewritableIntrinsicOperands( - IntrinsicInst *II, std::vector> *PostorderStack, - DenseSet *Visited) const { + IntrinsicInst *II, std::vector> &PostorderStack, + DenseSet &Visited) const { switch (II->getIntrinsicID()) { case Intrinsic::objectsize: case Intrinsic::amdgcn_atomic_inc: @@ -272,19 +271,39 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands( // If V is an unvisited flat address expression, appends V to PostorderStack // and marks it as visited. void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( - Value *V, std::vector> *PostorderStack, - DenseSet *Visited) const { + Value *V, std::vector> &PostorderStack, + DenseSet &Visited) const { assert(V->getType()->isPointerTy()); + + // Generic addressing expressions may be hidden in nested constant + // expressions. + if (ConstantExpr *CE = dyn_cast(V)) { + // TODO: Look in non-address parts, like icmp operands. + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.push_back(std::make_pair(CE, false)); + + return; + } + if (isAddressExpression(*V) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { - if (Visited->insert(V).second) - PostorderStack->push_back(std::make_pair(V, false)); + if (Visited.insert(V).second) { + PostorderStack.push_back(std::make_pair(V, false)); + + Operator *Op = cast(V); + for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { + if (ConstantExpr *CE = dyn_cast(Op->getOperand(I))) { + if (isAddressExpression(*CE) && Visited.insert(CE).second) + PostorderStack.emplace_back(CE, false); + } + } + } } } // Returns all flat address expressions in function F. The elements are ordered // ordered in postorder. -std::vector +std::vector InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // This function implements a non-recursive postorder traversal of a partial // use-def graph of function F. @@ -293,14 +312,18 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { DenseSet Visited; auto PushPtrOperand = [&](Value *Ptr) { - appendsFlatAddressExpressionToPostorderStack(Ptr, &PostorderStack, - &Visited); + appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, + Visited); }; - // We only explore address expressions that are reachable from loads and - // stores for now because we aim at generating faster loads and stores. + // Look at operations that may be interesting accelerate by moving to a known + // address space. We aim at generating after loads and stores, but pure + // addressing calculations may also be faster. for (Instruction &I : instructions(F)) { - if (auto *LI = dyn_cast(&I)) + if (auto *GEP = dyn_cast(&I)) { + if (!GEP->getType()->isVectorTy()) + PushPtrOperand(GEP->getPointerOperand()); + } else if (auto *LI = dyn_cast(&I)) PushPtrOperand(LI->getPointerOperand()); else if (auto *SI = dyn_cast(&I)) PushPtrOperand(SI->getPointerOperand()); @@ -316,30 +339,34 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { if (auto *MTI = dyn_cast(MI)) PushPtrOperand(MTI->getRawSource()); } else if (auto *II = dyn_cast(&I)) - collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited); + collectRewritableIntrinsicOperands(II, PostorderStack, Visited); else if (ICmpInst *Cmp = dyn_cast(&I)) { // FIXME: Handle vectors of pointers if (Cmp->getOperand(0)->getType()->isPointerTy()) { PushPtrOperand(Cmp->getOperand(0)); PushPtrOperand(Cmp->getOperand(1)); } + } else if (auto *ASC = dyn_cast(&I)) { + if (!ASC->getType()->isVectorTy()) + PushPtrOperand(ASC->getPointerOperand()); } } - std::vector Postorder; // The resultant postorder. + std::vector Postorder; // The resultant postorder. while (!PostorderStack.empty()) { + Value *TopVal = PostorderStack.back().first; // If the operands of the expression on the top are already explored, // adds that expression to the resultant postorder. if (PostorderStack.back().second) { - Postorder.push_back(PostorderStack.back().first); + Postorder.push_back(TopVal); PostorderStack.pop_back(); continue; } // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().second = true; - for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { - appendsFlatAddressExpressionToPostorderStack(PtrOperand, &PostorderStack, - &Visited); + for (Value *PtrOperand : getPointerOperands(*TopVal)) { + appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, + Visited); } } return Postorder; @@ -473,6 +500,7 @@ static Value *cloneConstantExprWithNewAddressSpace( } // Computes the operands of the new constant expression. + bool IsNew = false; SmallVector NewOperands; for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) { Constant *Operand = CE->getOperand(Index); @@ -482,6 +510,7 @@ static Value *cloneConstantExprWithNewAddressSpace( // bitcast, and getelementptr) do not incur cycles in the data flow graph // and (2) this function is called on constant expressions in postorder. if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) { + IsNew = true; NewOperands.push_back(cast(NewOperand)); } else { // Otherwise, reuses the old operand. @@ -489,6 +518,11 @@ static Value *cloneConstantExprWithNewAddressSpace( } } + // If !IsNew, we will replace the Value with itself. However, replaced values + // are assumed to wrapped in a addrspace cast later so drop it now. + if (!IsNew) + return nullptr; + if (CE->getOpcode() == Instruction::GetElementPtr) { // Needs to specify the source type while constructing a getelementptr // constant expression. @@ -556,7 +590,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return false; // Collects all flat address expressions in postorder. - std::vector Postorder = collectFlatAddressExpressions(F); + std::vector Postorder = collectFlatAddressExpressions(F); // Runs a data-flow analysis to refine the address spaces of every expression // in Postorder. @@ -568,8 +602,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) { return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); } +// Constants need to be tracked through RAUW to handle cases with nested +// constant expressions, so wrap values in WeakTrackingVH. void InferAddressSpaces::inferAddressSpaces( - const std::vector &Postorder, + ArrayRef Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const { SetVector Worklist(Postorder.begin(), Postorder.end()); // Initially, all expressions are in the uninitialized address space. @@ -781,8 +817,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I, } bool InferAddressSpaces::rewriteWithNewAddressSpaces( - const std::vector &Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { + ArrayRef Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer // operands are converted, the clone is naturally in the new address space by @@ -809,8 +845,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); } + SmallVector DeadInstructions; + // Replaces the uses of the old address expressions with the new ones. - for (Value *V : Postorder) { + for (const WeakTrackingVH &WVH : Postorder) { + assert(WVH && "value was unexpectedly deleted"); + Value *V = WVH; Value *NewV = ValueWithNewAddrSpace.lookup(V); if (NewV == nullptr) continue; @@ -818,6 +858,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( DEBUG(dbgs() << "Replacing the uses of " << *V << "\n with\n " << *NewV << '\n'); + if (Constant *C = dyn_cast(V)) { + Constant *Replace = ConstantExpr::getAddrSpaceCast(cast(NewV), + C->getType()); + if (C != Replace) { + DEBUG(dbgs() << "Inserting replacement const cast: " + << Replace << ": " << *Replace << '\n'); + C->replaceAllUsesWith(Replace); + V = Replace; + } + } + Value::use_iterator I, E, Next; for (I = V->use_begin(), E = V->use_end(); I != E; ) { Use &U = *I; @@ -878,6 +929,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } + if (AddrSpaceCastInst *ASC = dyn_cast(CurUser)) { + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + if (ASC->getDestAddressSpace() == NewAS) { + ASC->replaceAllUsesWith(NewV); + DeadInstructions.push_back(ASC); + continue; + } + } + // Otherwise, replaces the use with flat(NewV). if (Instruction *I = dyn_cast(V)) { BasicBlock::iterator InsertPos = std::next(I->getIterator()); @@ -891,10 +951,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( } } - if (V->use_empty()) - RecursivelyDeleteTriviallyDeadInstructions(V); + if (V->use_empty()) { + if (Instruction *I = dyn_cast(V)) + DeadInstructions.push_back(I); + } } + for (Instruction *I : DeadInstructions) + RecursivelyDeleteTriviallyDeadInstructions(I); + return true; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 08eb95a1a3d3e8396777b85b57dd39f433be4f01..c120036464d0a4fb24f3d85cc6debb899f358830 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -12,16 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/JumpThreading.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" @@ -36,6 +35,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -132,7 +132,7 @@ bool JumpThreading::runOnFunction(Function &F) { bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -152,7 +152,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, bool HasProfileData = F.getEntryCount().hasValue(); if (HasProfileData) { LoopInfo LI{DominatorTree(F)}; - BPI.reset(new BranchProbabilityInfo(F, LI)); + BPI.reset(new BranchProbabilityInfo(F, LI, &TLI)); BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } @@ -253,6 +253,35 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, return EverChanged; } +// Replace uses of Cond with ToVal when safe to do so. If all uses are +// replaced, we can remove Cond. We cannot blindly replace all uses of Cond +// because we may incorrectly replace uses when guards/assumes are uses of +// of `Cond` and we used the guards/assume to reason about the `Cond` value +// at the end of block. RAUW unconditionally replaces all uses +// including the guards/assumes themselves and the uses before the +// guard/assume. +static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) { + assert(Cond->getType() == ToVal->getType()); + auto *BB = Cond->getParent(); + // We can unconditionally replace all uses in non-local blocks (i.e. uses + // strictly dominated by BB), since LVI information is true from the + // terminator of BB. + replaceNonLocalUsesWith(Cond, ToVal); + for (Instruction &I : reverse(*BB)) { + // Reached the Cond whose uses we are trying to replace, so there are no + // more uses. + if (&I == Cond) + break; + // We only replace uses in instructions that are guaranteed to reach the end + // of BB, where we know Cond is ToVal. + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + I.replaceUsesOfWith(Cond, ToVal); + } + if (Cond->use_empty() && !Cond->mayHaveSideEffects()) + Cond->eraseFromParent(); +} + /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block /// when exceeding the threshold. If duplication is impossible, returns ~0U. @@ -557,7 +586,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( Value *LHS = PN->getIncomingValue(i); Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); - Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); + Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL}); if (!Res) { if (!isa(RHS)) continue; @@ -580,17 +609,17 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // If comparing a live-in value against a constant, see if we know the // live-in value on any predecessors. - if (isa(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) { + if (isa(Cmp->getOperand(1)) && !Cmp->getType()->isVectorTy()) { + Constant *CmpConst = cast(Cmp->getOperand(1)); + if (!isa(Cmp->getOperand(0)) || cast(Cmp->getOperand(0))->getParent() != BB) { - Constant *RHSCst = cast(Cmp->getOperand(1)); - for (BasicBlock *P : predecessors(BB)) { // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, P, BB, CxtI ? CxtI : Cmp); + CmpConst, P, BB, CxtI ? CxtI : Cmp); if (Res == LazyValueInfo::Unknown) continue; @@ -603,21 +632,19 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // Try to find a constant value for the LHS of a comparison, // and evaluate it statically if we can. - if (Constant *CmpConst = dyn_cast(Cmp->getOperand(1))) { - PredValueInfoTy LHSVals; - ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, - WantInteger, CxtI); - - for (const auto &LHSVal : LHSVals) { - Constant *V = LHSVal.first; - Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), - V, CmpConst); - if (Constant *KC = getKnownConstant(Folded, WantInteger)) - Result.push_back(std::make_pair(KC, LHSVal.second)); - } + PredValueInfoTy LHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, + WantInteger, CxtI); - return !Result.empty(); + for (const auto &LHSVal : LHSVals) { + Constant *V = LHSVal.first; + Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(), + V, CmpConst); + if (Constant *KC = getKnownConstant(Folded, WantInteger)) + Result.push_back(std::make_pair(KC, LHSVal.second)); } + + return !Result.empty(); } } @@ -835,14 +862,18 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { CondBr->eraseFromParent(); if (CondCmp->use_empty()) CondCmp->eraseFromParent(); + // We can safely replace *some* uses of the CondInst if it has + // exactly one value as returned by LVI. RAUW is incorrect in the + // presence of guards and assumes, that have the `Cond` as the use. This + // is because we use the guards/assume to reason about the `Cond` value + // at the end of block, but RAUW unconditionally replaces all uses + // including the guards/assumes themselves and the uses before the + // guard/assume. else if (CondCmp->getParent() == BB) { - // If the fact we just learned is true for all uses of the - // condition, replace it with a constant value auto *CI = Ret == LazyValueInfo::True ? ConstantInt::getTrue(CondCmp->getType()) : ConstantInt::getFalse(CondCmp->getType()); - CondCmp->replaceAllUsesWith(CI); - CondCmp->eraseFromParent(); + ReplaceFoldableUses(CondCmp, CI); } return true; } @@ -1250,37 +1281,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; + Constant *OnlyVal = nullptr; + Constant *MultipleVal = (Constant *)(intptr_t)~0ULL; + unsigned PredWithKnownDest = 0; for (const auto &PredValue : PredValues) { BasicBlock *Pred = PredValue.second; if (!SeenPreds.insert(Pred).second) continue; // Duplicate predecessor entry. - // If the predecessor ends with an indirect goto, we can't change its - // destination. - if (isa(Pred->getTerminator())) - continue; - Constant *Val = PredValue.first; BasicBlock *DestBB; if (isa(Val)) DestBB = nullptr; - else if (BranchInst *BI = dyn_cast(BB->getTerminator())) + else if (BranchInst *BI = dyn_cast(BB->getTerminator())) { + assert(isa(Val) && "Expecting a constant integer"); DestBB = BI->getSuccessor(cast(Val)->isZero()); - else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) { + } else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) { + assert(isa(Val) && "Expecting a constant integer"); DestBB = SI->findCaseValue(cast(Val))->getCaseSuccessor(); } else { assert(isa(BB->getTerminator()) && "Unexpected terminator"); + assert(isa(Val) && "Expecting a constant blockaddress"); DestBB = cast(Val)->getBasicBlock(); } // If we have exactly one destination, remember it for efficiency below. - if (PredToDestList.empty()) + if (PredToDestList.empty()) { OnlyDest = DestBB; - else if (OnlyDest != DestBB) - OnlyDest = MultipleDestSentinel; + OnlyVal = Val; + } else { + if (OnlyDest != DestBB) + OnlyDest = MultipleDestSentinel; + // It possible we have same destination, but different value, e.g. default + // case in switchinst. + if (Val != OnlyVal) + OnlyVal = MultipleVal; + } + + // We know where this predecessor is going. + ++PredWithKnownDest; + + // If the predecessor ends with an indirect goto, we can't change its + // destination. + if (isa(Pred->getTerminator())) + continue; PredToDestList.push_back(std::make_pair(Pred, DestBB)); } @@ -1289,6 +1336,45 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (PredToDestList.empty()) return false; + // If all the predecessors go to a single known successor, we want to fold, + // not thread. By doing so, we do not need to duplicate the current block and + // also miss potential opportunities in case we dont/cant duplicate. + if (OnlyDest && OnlyDest != MultipleDestSentinel) { + if (PredWithKnownDest == + (size_t)std::distance(pred_begin(BB), pred_end(BB))) { + bool SeenFirstBranchToOnlyDest = false; + for (BasicBlock *SuccBB : successors(BB)) { + if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest) + SeenFirstBranchToOnlyDest = true; // Don't modify the first branch. + else + SuccBB->removePredecessor(BB, true); // This is unreachable successor. + } + + // Finally update the terminator. + TerminatorInst *Term = BB->getTerminator(); + BranchInst::Create(OnlyDest, Term); + Term->eraseFromParent(); + + // If the condition is now dead due to the removal of the old terminator, + // erase it. + if (auto *CondInst = dyn_cast(Cond)) { + if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) + CondInst->eraseFromParent(); + // We can safely replace *some* uses of the CondInst if it has + // exactly one value as returned by LVI. RAUW is incorrect in the + // presence of guards and assumes, that have the `Cond` as the use. This + // is because we use the guards/assume to reason about the `Cond` value + // at the end of block, but RAUW unconditionally replaces all uses + // including the guards/assumes themselves and the uses before the + // guard/assume. + else if (OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) + ReplaceFoldableUses(CondInst, OnlyVal); + } + return true; + } + } + // Determine which is the most common successor. If we have many inputs and // this block is a switch, we want to start by threading the batch that goes // to the most popular destination first. If we only know about one @@ -1853,11 +1939,12 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred( // If this instruction can be simplified after the operands are updated, // just use the simplified value instead. This frequently happens due to // phi translation. - if (Value *IV = - SimplifyInstruction(New, BB->getModule()->getDataLayout())) { + if (Value *IV = SimplifyInstruction( + New, + {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) { ValueMapping[&*BI] = IV; if (!New->mayHaveSideEffects()) { - delete New; + New->deleteValue(); New = nullptr; } } else { diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 340c81fed0fdacaddaa6da457968abdbc3cf9e9f..37b9c4b1094e0b5d0119f91541364e992b5b6be5 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -546,7 +546,7 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, // If there are escaping uses of invariant.start instruction, the load maybe // non-invariant. if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || - II->hasNUsesOrMore(1)) + !II->use_empty()) continue; unsigned InvariantSizeInBits = cast(II->getArgOperand(0))->getSExtValue() * 8; diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 02215d3450c23f80d032cd0dbec30d41ed99c80b..025ba1bfedc187b28801bd0b52ba876a487f699c 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -11,7 +11,6 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -28,6 +27,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -228,7 +228,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { L.Load->replaceAllUsesWith(V); } - NumLoadsCombined = NumLoadsCombined + Loads.size(); + NumLoadsCombined += Loads.size(); return true; } diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 73e8ce0e1d93cfbe769f1c26848df2e89b67e90f..3151ccd279c41e73da82fa1b18415e31b0edeadc 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -29,6 +30,21 @@ using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); +/// This function deletes dead loops. The caller of this function needs to +/// guarantee that the loop is infact dead. Here we handle two kinds of dead +/// loop. The first kind (\p isLoopDead) is where only invariant values from +/// within the loop are used outside of it. The second kind (\p +/// isLoopNeverExecuted) is where the loop is provably never executed. We can +/// always remove never executed loops since they will not cause any +/// difference to program behaviour. +/// +/// This also updates the relevant analysis information in \p DT, \p SE, and \p +/// LI. It also updates the loop PM if an updater struct is provided. +// TODO: This function will be used by loop-simplifyCFG as well. So, move this +// to LoopUtils.cpp +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater = nullptr); /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -84,12 +100,44 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, return true; } +/// This function returns true if there is no viable path from the +/// entry block to the header of \p L. Right now, it only does +/// a local search to save compile time. +static bool isLoopNeverExecuted(Loop *L) { + using namespace PatternMatch; + + auto *Preheader = L->getLoopPreheader(); + // TODO: We can relax this constraint, since we just need a loop + // predecessor. + assert(Preheader && "Needs preheader!"); + + if (Preheader == &Preheader->getParent()->getEntryBlock()) + return false; + // All predecessors of the preheader should have a constant conditional + // branch, with the loop's preheader as not-taken. + for (auto *Pred: predecessors(Preheader)) { + BasicBlock *Taken, *NotTaken; + ConstantInt *Cond; + if (!match(Pred->getTerminator(), + m_Br(m_ConstantInt(Cond), Taken, NotTaken))) + return false; + if (!Cond->getZExtValue()) + std::swap(Taken, NotTaken); + if (Taken == Preheader) + return false; + } + assert(!pred_empty(Preheader) && + "Preheader should have predecessors at this point!"); + // All the predecessors have the loop preheader as not-taken target. + return true; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of /// the program other than finite running time. This never removes a loop that -/// might be infinite, as doing so could change the halting/non-halting nature -/// of a program. +/// might be infinite (unless it is never executed), as doing so could change +/// the halting/non-halting nature of a program. /// /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in /// order to make various safety checks work. @@ -97,9 +145,6 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE, /// \returns true if any changes were made. This may mutate the loop even if it /// is unable to delete it due to hoisting trivially loop invariant /// instructions out of the loop. -/// -/// This also updates the relevant analysis information in \p DT, \p SE, and \p -/// LI. It also updates the loop PM if an updater struct is provided. static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); @@ -119,6 +164,17 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (L->begin() != L->end()) return false; + + BasicBlock *ExitBlock = L->getUniqueExitBlock(); + + if (ExitBlock && isLoopNeverExecuted(L)) { + deleteDeadLoop(L, DT, SE, LI, true /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + return true; + } + + // The remaining checks below are for a loop being dead because all statements + // in the loop are invariant. SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -126,7 +182,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (!ExitBlock) return false; @@ -141,6 +196,19 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, if (isa(S)) return Changed; + deleteDeadLoop(L, DT, SE, LI, false /* LoopIsNeverExecuted */, Updater); + ++NumDeleted; + + return true; +} + +static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, bool LoopIsNeverExecuted, + LPMUpdater *Updater) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert(Preheader && "Preheader should exist!"); + // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. // @@ -156,17 +224,29 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // to determine what it needs to clean up. SE.forgetLoop(L); + auto *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Should have a unique exit block!"); + // Connect the preheader directly to the exit block. - TerminatorInst *TI = Preheader->getTerminator(); - TI->replaceUsesOfWith(L->getHeader(), ExitBlock); + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + Preheader->getTerminator()->replaceUsesOfWith(L->getHeader(), ExitBlock); - // Rewrite phis in the exit block to get their inputs from - // the preheader instead of the exiting block. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. BasicBlock *ExitingBlock = ExitingBlocks[0]; BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast(BI)) { int j = P->getBasicBlockIndex(ExitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); + if (LoopIsNeverExecuted) + P->setIncomingValue(j, UndefValue::get(P->getType())); P->setIncomingBlock(j, Preheader); for (unsigned i = 1; i < ExitingBlocks.size(); ++i) P->removeIncomingValue(ExitingBlocks[i]); @@ -211,9 +291,6 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // The last step is to update LoopInfo now that we've eliminated this loop. LI.markAsRemoved(L); - ++NumDeleted; - - return true; } PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, @@ -254,7 +331,6 @@ Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); } bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) { if (skipLoop(L)) return false; - DominatorTree &DT = getAnalysis().getDomTree(); ScalarEvolution &SE = getAnalysis().getSE(); LoopInfo &LI = getAnalysis().getLoopInfo(); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 946d85d7360fd021b0885b5af08c52a474dd316b..8b435050ac769b3051cce5051d53d4b4f6900bc1 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -110,6 +110,16 @@ private: bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + /// Return code for isLegalStore() + enum LegalStoreKind { + None = 0, + Memset, + MemsetPattern, + Memcpy, + UnorderedAtomicMemcpy, + DontUse // Dummy retval never to be used. Allows catching errors in retval + // handling. + }; /// \name Countable Loop Idiom Handling /// @{ @@ -119,8 +129,7 @@ private: SmallVectorImpl &ExitBlocks); void collectStores(BasicBlock *BB); - bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern, - bool &ForMemcpy); + LegalStoreKind isLegalStore(StoreInst *SI); bool processLoopStores(SmallVectorImpl &SL, const SCEV *BECount, bool ForMemset); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); @@ -144,6 +153,10 @@ private: bool recognizePopcount(); void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var); + bool recognizeAndInsertCTLZ(); + void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, + PHINode *CntPhi, Value *Var, const DebugLoc DL, + bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); /// @} }; @@ -339,15 +352,24 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { return ConstantArray::get(AT, std::vector(ArraySize, C)); } -bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, - bool &ForMemsetPattern, bool &ForMemcpy) { +LoopIdiomRecognize::LegalStoreKind +LoopIdiomRecognize::isLegalStore(StoreInst *SI) { + // Don't touch volatile stores. - if (!SI->isSimple()) - return false; + if (SI->isVolatile()) + return LegalStoreKind::None; + // We only want simple or unordered-atomic stores. + if (!SI->isUnordered()) + return LegalStoreKind::None; + + // Don't convert stores of non-integral pointer types to memsets (which stores + // integers). + if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) + return LegalStoreKind::None; // Avoid merging nontemporal stores. if (SI->getMetadata(LLVMContext::MD_nontemporal)) - return false; + return LegalStoreKind::None; Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); @@ -355,7 +377,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, // Reject stores that are so large that they overflow an unsigned. uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) - return false; + return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a @@ -363,11 +385,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, const SCEVAddRecExpr *StoreEv = dyn_cast(SE->getSCEV(StorePtr)); if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) - return false; + return LegalStoreKind::None; // Check to see if we have a constant stride. if (!isa(StoreEv->getOperand(1))) - return false; + return LegalStoreKind::None; // See if the store can be turned into a memset. @@ -378,22 +400,23 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = nullptr; + // Note: memset and memset_pattern on unordered-atomic is yet not supported + bool UnorderedAtomic = SI->isUnordered() && !SI->isSimple(); + // If we're allowed to form a memset, and the stored value would be // acceptable for memset, use it. - if (HasMemset && SplatValue && + if (!UnorderedAtomic && HasMemset && SplatValue && // Verify that the stored value is loop invariant. If not, we can't // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // It looks like we can use SplatValue. - ForMemset = true; - return true; - } else if (HasMemsetPattern && + return LegalStoreKind::Memset; + } else if (!UnorderedAtomic && HasMemsetPattern && // Don't create memset_pattern16s with address spaces. StorePtr->getType()->getPointerAddressSpace() == 0 && (PatternValue = getMemSetPatternValue(StoredVal, DL))) { // It looks like we can use PatternValue! - ForMemsetPattern = true; - return true; + return LegalStoreKind::MemsetPattern; } // Otherwise, see if the store can be turned into a memcpy. @@ -403,12 +426,17 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, APInt Stride = getStoreStride(StoreEv); unsigned StoreSize = getStoreSizeInBytes(SI, DL); if (StoreSize != Stride && StoreSize != -Stride) - return false; + return LegalStoreKind::None; // The store must be feeding a non-volatile load. LoadInst *LI = dyn_cast(SI->getValueOperand()); - if (!LI || !LI->isSimple()) - return false; + + // Only allow non-volatile loads + if (!LI || LI->isVolatile()) + return LegalStoreKind::None; + // Only allow simple or unordered-atomic loads + if (!LI->isUnordered()) + return LegalStoreKind::None; // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a @@ -416,18 +444,19 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset, const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(LI->getPointerOperand())); if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) - return false; + return LegalStoreKind::None; // The store and load must share the same stride. if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) - return false; + return LegalStoreKind::None; // Success. This store can be converted into a memcpy. - ForMemcpy = true; - return true; + UnorderedAtomic = UnorderedAtomic || LI->isAtomic(); + return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy + : LegalStoreKind::Memcpy; } // This store can't be transformed into a memset/memcpy. - return false; + return LegalStoreKind::None; } void LoopIdiomRecognize::collectStores(BasicBlock *BB) { @@ -439,24 +468,29 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) { if (!SI) continue; - bool ForMemset = false; - bool ForMemsetPattern = false; - bool ForMemcpy = false; // Make sure this is a strided store with a constant stride. - if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy)) - continue; - - // Save the store locations. - if (ForMemset) { + switch (isLegalStore(SI)) { + case LegalStoreKind::None: + // Nothing to do + break; + case LegalStoreKind::Memset: { // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); StoreRefsForMemset[Ptr].push_back(SI); - } else if (ForMemsetPattern) { + } break; + case LegalStoreKind::MemsetPattern: { // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); StoreRefsForMemsetPattern[Ptr].push_back(SI); - } else if (ForMemcpy) + } break; + case LegalStoreKind::Memcpy: + case LegalStoreKind::UnorderedAtomicMemcpy: StoreRefsForMemcpy.push_back(SI); + break; + default: + assert(false && "unhandled return value"); + break; + } } } @@ -494,7 +528,7 @@ bool LoopIdiomRecognize::runOnLoopBlock( Instruction *Inst = &*I++; // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast(Inst)) { - WeakVH InstPtr(&*I); + WeakTrackingVH InstPtr(&*I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; @@ -778,6 +812,11 @@ bool LoopIdiomRecognize::processLoopStridedStore( if (NegStride) Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE); + // TODO: ideally we should still be able to generate memset if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!isSafeToExpand(Start, *SE)) + return false; + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -809,6 +848,11 @@ bool LoopIdiomRecognize::processLoopStridedStore( SCEV::FlagNUW); } + // TODO: ideally we should still be able to generate memset if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!isSafeToExpand(NumBytesS, *SE)) + return false; + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); @@ -851,10 +895,10 @@ bool LoopIdiomRecognize::processLoopStridedStore( /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like -/// for (i) A[i] = B[i]; +/// for (i) A[i] = B[i]; bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount) { - assert(SI->isSimple() && "Expected only non-volatile stores."); + assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores."); Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); @@ -864,7 +908,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, // The store must be feeding a non-volatile load. LoadInst *LI = cast(SI->getValueOperand()); - assert(LI->isSimple() && "Expected only non-volatile stores."); + assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a @@ -938,6 +982,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); + if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); @@ -945,9 +990,37 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); - CallInst *NewCall = - Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, - std::min(SI->getAlignment(), LI->getAlignment())); + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); + CallInst *NewCall = nullptr; + // Check whether to generate an unordered atomic memcpy: + // If the load or store are atomic, then they must neccessarily be unordered + // by previous checks. + if (!SI->isAtomic() && !LI->isAtomic()) + NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); + else { + // We cannot allow unaligned ops for unordered load/store, so reject + // anything where the alignment isn't at least the element size. + if (Align < StoreSize) + return false; + + // If the element.atomic memcpy is not lowered into explicit + // loads/stores later, then it will be lowered into an element-size + // specific lib call. If the lib call doesn't exist for our store size, then + // we shouldn't generate the memcpy. + if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) + return false; + + NewCall = Builder.CreateElementUnorderedAtomicMemCpy( + StoreBasePtr, LoadBasePtr, NumBytes, StoreSize); + + // Propagate alignment info onto the pointer args. Note that unordered + // atomic loads/stores are *required* by the spec to have an alignment + // but non-atomic loads/stores may not. + NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(), + SI->getAlignment())); + NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(), + LI->getAlignment())); + } NewCall->setDebugLoc(SI->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" @@ -979,7 +1052,7 @@ bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, } bool LoopIdiomRecognize::runOnNoncountableLoop() { - return recognizePopcount(); + return recognizePopcount() || recognizeAndInsertCTLZ(); } /// Check if the given conditional branch is based on the comparison between @@ -1007,6 +1080,17 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { return nullptr; } +// Check if the recurrence variable `VarX` is in the right form to create +// the idiom. Returns the value coerced to a PHINode if so. +static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX, + BasicBlock *LoopEntry) { + auto *PhiX = dyn_cast(VarX); + if (PhiX && PhiX->getParent() == LoopEntry && + (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX)) + return PhiX; + return nullptr; +} + /// Return true iff the idiom is detected in the loop. /// /// Additionally: @@ -1082,13 +1166,9 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, } // step 3: Check the recurrence of variable X - { - PhiX = dyn_cast(VarX1); - if (!PhiX || - (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { - return false; - } - } + PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry); + if (!PhiX) + return false; // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 { @@ -1104,8 +1184,8 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, if (!Inc || !Inc->isOne()) continue; - PHINode *Phi = dyn_cast(Inst->getOperand(0)); - if (!Phi || Phi->getParent() != LoopEntry) + PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); + if (!Phi) continue; // Check if the result of the instruction is live of the loop. @@ -1144,6 +1224,169 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, return true; } +/// Return true if the idiom is detected in the loop. +/// +/// Additionally: +/// 1) \p CntInst is set to the instruction Counting Leading Zeros (CTLZ) +/// or nullptr if there is no such. +/// 2) \p CntPhi is set to the corresponding phi node +/// or nullptr if there is no such. +/// 3) \p Var is set to the value whose CTLZ could be used. +/// 4) \p DefX is set to the instruction calculating Loop exit condition. +/// +/// The core idiom we are trying to detect is: +/// \code +/// if (x0 == 0) +/// goto loop-exit // the precondition of the loop +/// cnt0 = init-val; +/// do { +/// x = phi (x0, x.next); //PhiX +/// cnt = phi(cnt0, cnt.next); +/// +/// cnt.next = cnt + 1; +/// ... +/// x.next = x >> 1; // DefX +/// ... +/// } while(x.next != 0); +/// +/// loop-exit: +/// \endcode +static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, + Instruction *&CntInst, PHINode *&CntPhi, + Instruction *&DefX) { + BasicBlock *LoopEntry; + Value *VarX = nullptr; + + DefX = nullptr; + PhiX = nullptr; + CntInst = nullptr; + CntPhi = nullptr; + LoopEntry = *(CurLoop->block_begin()); + + // step 1: Check if the loop-back branch is in desirable form. + if (Value *T = matchCondition( + dyn_cast(LoopEntry->getTerminator()), LoopEntry)) + DefX = dyn_cast(T); + else + return false; + + // step 2: detect instructions corresponding to "x.next = x >> 1" + if (!DefX || DefX->getOpcode() != Instruction::AShr) + return false; + if (ConstantInt *Shft = dyn_cast(DefX->getOperand(1))) + if (!Shft || !Shft->isOne()) + return false; + VarX = DefX->getOperand(0); + + // step 3: Check the recurrence of variable X + PhiX = getRecurrenceVar(VarX, DefX, LoopEntry); + if (!PhiX) + return false; + + // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1 + // TODO: We can skip the step. If loop trip count is known (CTLZ), + // then all uses of "cnt.next" could be optimized to the trip count + // plus "cnt0". Currently it is not optimized. + // This step could be used to detect POPCNT instruction: + // cnt.next = cnt + (x.next & 1) + for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(), + IterE = LoopEntry->end(); + Iter != IterE; Iter++) { + Instruction *Inst = &*Iter; + if (Inst->getOpcode() != Instruction::Add) + continue; + + ConstantInt *Inc = dyn_cast(Inst->getOperand(1)); + if (!Inc || !Inc->isOne()) + continue; + + PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); + if (!Phi) + continue; + + CntInst = Inst; + CntPhi = Phi; + break; + } + if (!CntInst) + return false; + + return true; +} + +/// Recognize CTLZ idiom in a non-countable loop and convert the loop +/// to countable (with CTLZ trip count). +/// If CTLZ inserted as a new trip count returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeAndInsertCTLZ() { + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + Instruction *CntInst, *DefX; + PHINode *CntPhi, *PhiX; + if (!detectCTLZIdiom(CurLoop, PhiX, CntInst, CntPhi, DefX)) + return false; + + bool IsCntPhiUsedOutsideLoop = false; + for (User *U : CntPhi->users()) + if (!CurLoop->contains(dyn_cast(U))) { + IsCntPhiUsedOutsideLoop = true; + break; + } + bool IsCntInstUsedOutsideLoop = false; + for (User *U : CntInst->users()) + if (!CurLoop->contains(dyn_cast(U))) { + IsCntInstUsedOutsideLoop = true; + break; + } + // If both CntInst and CntPhi are used outside the loop the profitability + // is questionable. + if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop) + return false; + + // For some CPUs result of CTLZ(X) intrinsic is undefined + // when X is 0. If we can not guarantee X != 0, we need to check this + // when expand. + bool ZeroCheck = false; + // It is safe to assume Preheader exist as it was checked in + // parent function RunOnLoop. + BasicBlock *PH = CurLoop->getLoopPreheader(); + Value *InitX = PhiX->getIncomingValueForBlock(PH); + // If we check X != 0 before entering the loop we don't need a zero + // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the + // loop (if it is used we count CTLZ(X >> 1)). + if (!IsCntPhiUsedOutsideLoop) + if (BasicBlock *PreCondBB = PH->getSinglePredecessor()) + if (BranchInst *PreCondBr = + dyn_cast(PreCondBB->getTerminator())) { + if (matchCondition(PreCondBr, PH) == InitX) + ZeroCheck = true; + } + + // Check if CTLZ intrinsic is profitable. Assume it is always profitable + // if we delete the loop (the loop has only 6 instructions): + // %n.addr.0 = phi [ %n, %entry ], [ %shr, %while.cond ] + // %i.0 = phi [ %i0, %entry ], [ %inc, %while.cond ] + // %shr = ashr %n.addr.0, 1 + // %tobool = icmp eq %shr, 0 + // %inc = add nsw %i.0, 1 + // br i1 %tobool + + IRBuilder<> Builder(PH->getTerminator()); + SmallVector Ops = + {InitX, ZeroCheck ? Builder.getTrue() : Builder.getFalse()}; + ArrayRef Args(Ops); + if (CurLoop->getHeader()->size() != 6 && + TTI->getIntrinsicCost(Intrinsic::ctlz, InitX->getType(), Args) > + TargetTransformInfo::TCC_Basic) + return false; + + const DebugLoc DL = DefX->getDebugLoc(); + transformLoopToCountable(PH, CntInst, CntPhi, InitX, DL, ZeroCheck, + IsCntPhiUsedOutsideLoop); + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic @@ -1207,6 +1450,134 @@ static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, return CI; } +static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, + const DebugLoc &DL, bool ZeroCheck) { + Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()}; + Type *Tys[] = {Val->getType()}; + + Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); + Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys); + CallInst *CI = IRBuilder.CreateCall(Func, Ops); + CI->setDebugLoc(DL); + + return CI; +} + +/// Transform the following loop: +/// loop: +/// CntPhi = PHI [Cnt0, CntInst] +/// PhiX = PHI [InitX, DefX] +/// CntInst = CntPhi + 1 +/// DefX = PhiX >> 1 +// LOOP_BODY +/// Br: loop if (DefX != 0) +/// Use(CntPhi) or Use(CntInst) +/// +/// Into: +/// If CntPhi used outside the loop: +/// CountPrev = BitWidth(InitX) - CTLZ(InitX >> 1) +/// Count = CountPrev + 1 +/// else +/// Count = BitWidth(InitX) - CTLZ(InitX) +/// loop: +/// CntPhi = PHI [Cnt0, CntInst] +/// PhiX = PHI [InitX, DefX] +/// PhiCount = PHI [Count, Dec] +/// CntInst = CntPhi + 1 +/// DefX = PhiX >> 1 +/// Dec = PhiCount - 1 +/// LOOP_BODY +/// Br: loop if (Dec != 0) +/// Use(CountPrev + Cnt0) // Use(CntPhi) +/// or +/// Use(Count + Cnt0) // Use(CntInst) +/// +/// If LOOP_BODY is empty the loop will be deleted. +/// If CntInst and DefX are not used in LOOP_BODY they will be removed. +void LoopIdiomRecognize::transformLoopToCountable( + BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX, + const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { + BranchInst *PreheaderBr = dyn_cast(Preheader->getTerminator()); + + // Step 1: Insert the CTLZ instruction at the end of the preheader block + // Count = BitWidth - CTLZ(InitX); + // If there are uses of CntPhi create: + // CountPrev = BitWidth - CTLZ(InitX >> 1); + IRBuilder<> Builder(PreheaderBr); + Builder.SetCurrentDebugLocation(DL); + Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; + + if (IsCntPhiUsedOutsideLoop) + InitXNext = Builder.CreateAShr(InitX, + ConstantInt::get(InitX->getType(), 1)); + else + InitXNext = InitX; + CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck); + Count = Builder.CreateSub( + ConstantInt::get(CTLZ->getType(), + CTLZ->getType()->getIntegerBitWidth()), + CTLZ); + if (IsCntPhiUsedOutsideLoop) { + CountPrev = Count; + Count = Builder.CreateAdd( + CountPrev, + ConstantInt::get(CountPrev->getType(), 1)); + } + if (IsCntPhiUsedOutsideLoop) + NewCount = Builder.CreateZExtOrTrunc(CountPrev, + cast(CntInst->getType())); + else + NewCount = Builder.CreateZExtOrTrunc(Count, + cast(CntInst->getType())); + + // If the CTLZ counter's initial value is not zero, insert Add Inst. + Value *CntInitVal = CntPhi->getIncomingValueForBlock(Preheader); + ConstantInt *InitConst = dyn_cast(CntInitVal); + if (!InitConst || !InitConst->isZero()) + NewCount = Builder.CreateAdd(NewCount, CntInitVal); + + // Step 2: Insert new IV and loop condition: + // loop: + // ... + // PhiCount = PHI [Count, Dec] + // ... + // Dec = PhiCount - 1 + // ... + // Br: loop if (Dec != 0) + BasicBlock *Body = *(CurLoop->block_begin()); + auto *LbBr = dyn_cast(Body->getTerminator()); + ICmpInst *LbCond = cast(LbBr->getCondition()); + Type *Ty = Count->getType(); + + PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front()); + + Builder.SetInsertPoint(LbCond); + Instruction *TcDec = cast( + Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1), + "tcdec", false, true)); + + TcPhi->addIncoming(Count, Preheader); + TcPhi->addIncoming(TcDec, Body); + + CmpInst::Predicate Pred = + (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + LbCond->setPredicate(Pred); + LbCond->setOperand(0, TcDec); + LbCond->setOperand(1, ConstantInt::get(Ty, 0)); + + // Step 3: All the references to the original counter outside + // the loop are replaced with the NewCount -- the value returned from + // __builtin_ctlz(x). + if (IsCntPhiUsedOutsideLoop) + CntPhi->replaceUsesOutsideBlock(NewCount, Body); + else + CntInst->replaceUsesOutsideBlock(NewCount, Body); + + // step 4: Forget the "non-computable" trip-count SCEV associated with the + // loop. The loop would otherwise not be deleted even if it becomes empty. + SE->forgetLoop(CurLoop); +} + void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var) { diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 28e71ca05436cb8d2e692e831815a8930e0b6027..af095560cc0259c1a368581096701b2569771d38 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, DL, TLI, DT, AC); + Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC}); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index cf63cb660db8cd87a70623feb7c74a6c7c72bcf4..20b37c4b70e6d0dabcdbb3b17e9f8c30a63bdf04 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -197,8 +197,7 @@ public: continue; // Only progagate the value if they are of the same type. - if (Store->getPointerOperand()->getType() != - Load->getPointerOperand()->getType()) + if (Store->getPointerOperandType() != Load->getPointerOperandType()) continue; Candidates.emplace_front(Load, Store); diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 0ce60442932615cdb6f8ee6190dacf10b9efc9b0..9b12ba180444b58989430f1811ce29b023acf2b9 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -37,7 +37,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" -#include "llvm/Pass.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -48,6 +47,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -58,12 +58,30 @@ using namespace llvm; namespace { class LoopPredication { + /// Represents an induction variable check: + /// icmp Pred, , + struct LoopICmp { + ICmpInst::Predicate Pred; + const SCEVAddRecExpr *IV; + const SCEV *Limit; + LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, + const SCEV *Limit) + : Pred(Pred), IV(IV), Limit(Limit) {} + LoopICmp() {} + }; + ScalarEvolution *SE; Loop *L; const DataLayout *DL; BasicBlock *Preheader; + Optional parseLoopICmp(ICmpInst *ICI); + + Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, + ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, + Instruction *InsertAt); + Optional widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, IRBuilder<> &Builder); bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); @@ -116,16 +134,10 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } -/// If ICI can be widened to a loop invariant condition emits the loop -/// invariant condition in the loop preheader and return it, otherwise -/// returns None. -Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, - SCEVExpander &Expander, - IRBuilder<> &Builder) { - DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); - DEBUG(ICI->dump()); - +Optional +LoopPredication::parseLoopICmp(ICmpInst *ICI) { ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); const SCEV *LHSS = SE->getSCEV(LHS); @@ -135,17 +147,54 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, if (isa(RHSS)) return None; - // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index + // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV if (SE->isLoopInvariant(LHSS, L)) { std::swap(LHS, RHS); std::swap(LHSS, RHSS); Pred = ICmpInst::getSwappedPredicate(Pred); } - if (!SE->isLoopInvariant(RHSS, L) || !isSafeToExpand(RHSS, *SE)) + + const SCEVAddRecExpr *AR = dyn_cast(LHSS); + if (!AR || AR->getLoop() != L) return None; - const SCEVAddRecExpr *IndexAR = dyn_cast(LHSS); - if (!IndexAR || IndexAR->getLoop() != L) + return LoopICmp(Pred, AR, RHSS); +} + +Value *LoopPredication::expandCheck(SCEVExpander &Expander, + IRBuilder<> &Builder, + ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, Instruction *InsertAt) { + Type *Ty = LHS->getType(); + assert(Ty == RHS->getType() && "expandCheck operands have different types?"); + Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); + Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt); + return Builder.CreateICmp(Pred, LHSV, RHSV); +} + +/// If ICI can be widened to a loop invariant condition emits the loop +/// invariant condition in the loop preheader and return it, otherwise +/// returns None. +Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, + SCEVExpander &Expander, + IRBuilder<> &Builder) { + DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); + DEBUG(ICI->dump()); + + auto RangeCheck = parseLoopICmp(ICI); + if (!RangeCheck) { + DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); + return None; + } + + ICmpInst::Predicate Pred = RangeCheck->Pred; + const SCEVAddRecExpr *IndexAR = RangeCheck->IV; + const SCEV *RHSS = RangeCheck->Limit; + + auto CanExpand = [this](const SCEV *S) { + return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); + }; + if (!CanExpand(RHSS)) return None; DEBUG(dbgs() << "IndexAR: "); @@ -170,17 +219,13 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, DEBUG(dbgs() << "NewLHSS: "); DEBUG(NewLHSS->dump()); - if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE)) + if (!CanExpand(NewLHSS)) return None; DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n"); - Type *Ty = LHS->getType(); Instruction *InsertAt = Preheader->getTerminator(); - assert(Ty == RHS->getType() && "icmp operands have different types?"); - Value *NewLHS = Expander.expandCodeFor(NewLHSS, Ty, InsertAt); - Value *NewRHS = Expander.expandCodeFor(RHSS, Ty, InsertAt); - return Builder.CreateICmp(Pred, NewLHS, NewRHS); + return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt); } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, @@ -272,6 +317,9 @@ bool LoopPredication::runOnLoop(Loop *Loop) { if (II->getIntrinsicID() == Intrinsic::experimental_guard) Guards.push_back(II); + if (Guards.empty()) + return false; + SCEVExpander Expander(*SE, *DL, "loop-predication"); bool Changed = false; diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 86058fe0b1aaab64753528dec24d1018bafdaf8e..fc0216e76a5bba00625206def2f20fd76ab520a2 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -11,10 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -31,6 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -557,7 +557,7 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) { Instruction *UUser = dyn_cast(UU); // Skip SExt if we are extending an nsw value // TODO: Allow ZExt too - if (BO->hasNoSignedWrap() && UUser && UUser->getNumUses() == 1 && + if (BO->hasNoSignedWrap() && UUser && UUser->hasOneUse() && isa(UUser)) UUser = dyn_cast(*(UUser->user_begin())); if (!isCompareUsedByBranch(UUser)) @@ -852,7 +852,7 @@ collectPossibleRoots(Instruction *Base, std::map &Roots) { for (auto &KV : Roots) { if (KV.first == 0) continue; - if (KV.second->getNumUses() != NumBaseUses) { + if (!KV.second->hasNUses(NumBaseUses)) { DEBUG(dbgs() << "LRR: Aborting - Root and Base #users not the same: " << "#Base=" << NumBaseUses << ", #Root=" << KV.second->getNumUses() << "\n"); @@ -867,7 +867,7 @@ void LoopReroll::DAGRootTracker:: findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) { // Does the user look like it could be part of a root set? // All its users must be simple arithmetic ops. - if (I->getNumUses() > IL_MaxRerollIterations) + if (I->hasNUsesOrMore(IL_MaxRerollIterations + 1)) return; if (I != IV && findRootsBase(I, SubsumedInsts)) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index e5689368de80d38073e51fd376630911e5a139cb..7312d97f8efe126d580eb1d5c18b6583d373c318 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -58,13 +58,14 @@ class LoopRotate { AssumptionCache *AC; DominatorTree *DT; ScalarEvolution *SE; + const SimplifyQuery &SQ; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE) - : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) { - } + DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ) + : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), + SQ(SQ) {} bool processLoop(Loop *L); private: @@ -311,8 +312,6 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { for (; PHINode *PN = dyn_cast(I); ++I) ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - // For the rest of the instructions, either hoist to the OrigPreheader if // possible or create a clone in the OldPreHeader if not. TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); @@ -342,14 +341,13 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // With the operands remapped, see if the instruction constant folds or is // otherwise simplifyable. This commonly occurs because the entry from PHI // nodes allows icmps and other instructions to fold. - // FIXME: Provide TLI, DT, AC to SimplifyInstruction. - Value *V = SimplifyInstruction(C, DL); + Value *V = SimplifyInstruction(C, SQ); if (V && LI->replacementPreservesLCSSAForm(C, V)) { // If so, then delete the temporary instruction and stick the folded value // in the map. ValueMap[Inst] = V; if (!C->mayHaveSideEffects()) { - delete C; + C->deleteValue(); C = nullptr; } } else { @@ -671,7 +669,10 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; - LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE); + const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL); + LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + SQ); bool Changed = LR.processLoop(&L); if (!Changed) @@ -714,7 +715,8 @@ public: auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; - LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE); + const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); + LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ); return LR.processLoop(L); } }; diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index a5a81c33a8ebd1f36044faca0c29c1877154e0b5..35c05e84fd68d85b73f7f9c4291920f7ef677b15 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) { bool Changed = false; // Copy blocks into a temporary array to avoid iterator invalidation issues // as we remove them. - SmallVector Blocks(L.blocks()); + SmallVector Blocks(L.blocks()); for (auto &Block : Blocks) { // Attempt to merge blocks in the trivial case. Don't modify blocks which diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 1dad080efbff44ee51edf5ff471b13576d01847f..b027278b24f2ed48035759d46d1c6c8410df510c 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -131,7 +131,7 @@ static cl::opt EnablePhiElim( // The flag adds instruction count to solutions cost comparision. static cl::opt InsnsCost( - "lsr-insns-cost", cl::Hidden, cl::init(false), + "lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model")); // Flag to choose how to narrow complex lsr solution @@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S, /// If any of the instructions is the specified set are trivially dead, delete /// them and see if this makes any of their operands subsequently dead. static bool -DeleteTriviallyDeadInstructions(SmallVectorImpl &DeadInsts) { +DeleteTriviallyDeadInstructions(SmallVectorImpl &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { @@ -950,39 +950,37 @@ namespace { /// This class is used to measure and compare candidate formulae. class Cost { - /// TODO: Some of these could be merged. Also, a lexical ordering - /// isn't always optimal. - unsigned Insns; - unsigned NumRegs; - unsigned AddRecCost; - unsigned NumIVMuls; - unsigned NumBaseAdds; - unsigned ImmCost; - unsigned SetupCost; - unsigned ScaleCost; + TargetTransformInfo::LSRCost C; public: - Cost() - : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), - ImmCost(0), SetupCost(0), ScaleCost(0) {} + Cost() { + C.Insns = 0; + C.NumRegs = 0; + C.AddRecCost = 0; + C.NumIVMuls = 0; + C.NumBaseAdds = 0; + C.ImmCost = 0; + C.SetupCost = 0; + C.ScaleCost = 0; + } - bool operator<(const Cost &Other) const; + bool isLess(Cost &Other, const TargetTransformInfo &TTI); void Lose(); #ifndef NDEBUG // Once any of the metrics loses, they must all remain losers. bool isValid() { - return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds - | ImmCost | SetupCost | ScaleCost) != ~0u) - || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds - & ImmCost & SetupCost & ScaleCost) == ~0u); + return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds + | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u) + || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds + & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u); } #endif bool isLoser() { assert(isValid() && "invalid cost"); - return NumRegs == ~0u; + return C.NumRegs == ~0u; } void RateFormula(const TargetTransformInfo &TTI, @@ -1170,10 +1168,10 @@ void Cost::RateRegister(const SCEV *Reg, } // Otherwise, it will be an invariant with respect to Loop L. - ++NumRegs; + ++C.NumRegs; return; } - AddRecCost += 1; /// TODO: This should be a function of the stride. + C.AddRecCost += 1; /// TODO: This should be a function of the stride. // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. @@ -1185,7 +1183,7 @@ void Cost::RateRegister(const SCEV *Reg, } } } - ++NumRegs; + ++C.NumRegs; // Rough heuristic; favor registers which don't require extra setup // instructions in the preheader. @@ -1194,9 +1192,9 @@ void Cost::RateRegister(const SCEV *Reg, !(isa(Reg) && (isa(cast(Reg)->getStart()) || isa(cast(Reg)->getStart())))) - ++SetupCost; + ++C.SetupCost; - NumIVMuls += isa(Reg) && + C.NumIVMuls += isa(Reg) && SE.hasComputableLoopEvolution(Reg, L); } @@ -1229,9 +1227,9 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, SmallPtrSetImpl *LoserRegs) { assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); // Tally up the registers. - unsigned PrevAddRecCost = AddRecCost; - unsigned PrevNumRegs = NumRegs; - unsigned PrevNumBaseAdds = NumBaseAdds; + unsigned PrevAddRecCost = C.AddRecCost; + unsigned PrevNumRegs = C.NumRegs; + unsigned PrevNumBaseAdds = C.NumBaseAdds; if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { Lose(); @@ -1251,45 +1249,51 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, return; } - // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as - // additional instruction (at least fill). - unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; - if (NumRegs > TTIRegNum) { - // Cost already exceeded TTIRegNum, then only newly added register can add - // new instructions. - if (PrevNumRegs > TTIRegNum) - Insns += (NumRegs - PrevNumRegs); - else - Insns += (NumRegs - TTIRegNum); - } - // Determine how many (unfolded) adds we'll need inside the loop. size_t NumBaseParts = F.getNumRegs(); if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += + C.NumBaseAdds += NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); - NumBaseAdds += (F.UnfoldedOffset != 0); + C.NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. - ScaleCost += getScalingFactorCost(TTI, LU, F, *L); + C.ScaleCost += getScalingFactorCost(TTI, LU, F, *L); // Tally up the non-zero immediates. for (const LSRFixup &Fixup : LU.Fixups) { int64_t O = Fixup.Offset; int64_t Offset = (uint64_t)O + F.BaseOffset; if (F.BaseGV) - ImmCost += 64; // Handle symbolic values conservatively. + C.ImmCost += 64; // Handle symbolic values conservatively. // TODO: This should probably be the pointer size. else if (Offset != 0) - ImmCost += APInt(64, Offset, true).getMinSignedBits(); + C.ImmCost += APInt(64, Offset, true).getMinSignedBits(); // Check with target if this offset with this instruction is // specifically not supported. if ((isa(Fixup.UserInst) || isa(Fixup.UserInst)) && !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) - NumBaseAdds++; + C.NumBaseAdds++; + } + + // If we don't count instruction cost exit here. + if (!InsnsCost) { + assert(isValid() && "invalid cost"); + return; + } + + // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as + // additional instruction (at least fill). + unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1; + if (C.NumRegs > TTIRegNum) { + // Cost already exceeded TTIRegNum, then only newly added register can add + // new instructions. + if (PrevNumRegs > TTIRegNum) + C.Insns += (C.NumRegs - PrevNumRegs); + else + C.Insns += (C.NumRegs - TTIRegNum); } // If ICmpZero formula ends with not 0, it could not be replaced by @@ -1302,55 +1306,54 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, // For {-10, +, 1}: // i = i + 1; if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd()) - Insns++; + C.Insns++; // Each new AddRec adds 1 instruction to calculation. - Insns += (AddRecCost - PrevAddRecCost); + C.Insns += (C.AddRecCost - PrevAddRecCost); // BaseAdds adds instructions for unfolded registers. if (LU.Kind != LSRUse::ICmpZero) - Insns += NumBaseAdds - PrevNumBaseAdds; + C.Insns += C.NumBaseAdds - PrevNumBaseAdds; assert(isValid() && "invalid cost"); } /// Set this cost to a losing value. void Cost::Lose() { - Insns = ~0u; - NumRegs = ~0u; - AddRecCost = ~0u; - NumIVMuls = ~0u; - NumBaseAdds = ~0u; - ImmCost = ~0u; - SetupCost = ~0u; - ScaleCost = ~0u; + C.Insns = ~0u; + C.NumRegs = ~0u; + C.AddRecCost = ~0u; + C.NumIVMuls = ~0u; + C.NumBaseAdds = ~0u; + C.ImmCost = ~0u; + C.SetupCost = ~0u; + C.ScaleCost = ~0u; } /// Choose the lower cost. -bool Cost::operator<(const Cost &Other) const { - if (InsnsCost && Insns != Other.Insns) - return Insns < Other.Insns; - return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost, - ImmCost, SetupCost) < - std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls, - Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost, - Other.SetupCost); +bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) { + if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && + C.Insns != Other.C.Insns) + return C.Insns < Other.C.Insns; + return TTI.isLSRCostLess(C, Other.C); } void Cost::print(raw_ostream &OS) const { - OS << Insns << " instruction" << (Insns == 1 ? " " : "s "); - OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s"); - if (AddRecCost != 0) - OS << ", with addrec cost " << AddRecCost; - if (NumIVMuls != 0) - OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s"); - if (NumBaseAdds != 0) - OS << ", plus " << NumBaseAdds << " base add" - << (NumBaseAdds == 1 ? "" : "s"); - if (ScaleCost != 0) - OS << ", plus " << ScaleCost << " scale cost"; - if (ImmCost != 0) - OS << ", plus " << ImmCost << " imm cost"; - if (SetupCost != 0) - OS << ", plus " << SetupCost << " setup cost"; + if (InsnsCost) + OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); + OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); + if (C.AddRecCost != 0) + OS << ", with addrec cost " << C.AddRecCost; + if (C.NumIVMuls != 0) + OS << ", plus " << C.NumIVMuls << " IV mul" + << (C.NumIVMuls == 1 ? "" : "s"); + if (C.NumBaseAdds != 0) + OS << ", plus " << C.NumBaseAdds << " base add" + << (C.NumBaseAdds == 1 ? "" : "s"); + if (C.ScaleCost != 0) + OS << ", plus " << C.ScaleCost << " scale cost"; + if (C.ImmCost != 0) + OS << ", plus " << C.ImmCost << " imm cost"; + if (C.SetupCost != 0) + OS << ", plus " << C.SetupCost << " setup cost"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1845,7 +1848,7 @@ class LSRInstance { void FinalizeChain(IVChain &Chain); void CollectChains(); void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts); + SmallVectorImpl &DeadInsts); void CollectInterestingTypesAndFactors(); void CollectFixupsAndInitialFormulae(); @@ -1920,19 +1923,15 @@ class LSRInstance { const LSRUse &LU, SCEVExpander &Rewriter) const; - Value *Expand(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, - SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts) const; + Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, + BasicBlock::iterator IP, SCEVExpander &Rewriter, + SmallVectorImpl &DeadInsts) const; void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts) const; - void Rewrite(const LSRUse &LU, const LSRFixup &LF, - const Formula &F, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl &DeadInsts) const; + void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts) const; + SmallVectorImpl &DeadInsts) const; void ImplementSolution(const SmallVectorImpl &Solution); public: @@ -3014,7 +3013,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, /// Generate an add or subtract for each IVInc in a chain to materialize the IV /// user's operand from the previous IV user's operand. void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts) { // Find the new IVOperand for the head of the chain. It may have been replaced // by LSR. const IVInc &Head = Chain.Incs[0]; @@ -3160,8 +3159,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. - N = TransformForPostIncUse(Normalize, N, CI, nullptr, - TmpPostIncLoops, SE, DT); + N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); } @@ -3810,6 +3808,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) continue; + F.canonicalize(*L); (void)InsertFormula(LU, LUIdx, F); } } @@ -3907,8 +3906,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // Compute the difference between the two. int64_t Imm = (uint64_t)JImm - M->first; - for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1; - LUIdx = UsedByIndices.find_next(LUIdx)) + for (unsigned LUIdx : UsedByIndices.set_bits()) // Make a memo of this use, offset, and register tuple. if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second) WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); @@ -4110,7 +4108,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { Cost CostBest; Regs.clear(); CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU); - if (CostF < CostBest) + if (CostF.isLess(CostBest, TTI)) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" @@ -4578,7 +4576,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, NewCost = CurCost; NewRegs = CurRegs; NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU); - if (NewCost < SolutionCost) { + if (NewCost.isLess(SolutionCost, TTI)) { Workspace.push_back(&F); if (Workspace.size() != Uses.size()) { SolveRecurse(Solution, SolutionCost, Workspace, NewCost, @@ -4760,12 +4758,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). -Value *LSRInstance::Expand(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - BasicBlock::iterator IP, +Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, BasicBlock::iterator IP, SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts) const { + SmallVectorImpl &DeadInsts) const { if (LU.RigidFormula) return LF.OperandValToReplace; @@ -4799,11 +4795,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, assert(!Reg->isZero() && "Zero allocated in a base register!"); // If we're expanding for a post-inc user, make the post-inc adjustment. - PostIncLoopSet &Loops = const_cast(LF.PostIncLoops); - Reg = TransformForPostIncUse(Denormalize, Reg, - LF.UserInst, LF.OperandValToReplace, - Loops, SE, DT); - + Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE); Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); } @@ -4814,9 +4806,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, // If we're expanding for a post-inc user, make the post-inc adjustment. PostIncLoopSet &Loops = const_cast(LF.PostIncLoops); - ScaledS = TransformForPostIncUse(Denormalize, ScaledS, - LF.UserInst, LF.OperandValToReplace, - Loops, SE, DT); + ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE); if (LU.Kind == LSRUse::ICmpZero) { // Expand ScaleReg as if it was part of the base regs. @@ -4946,12 +4936,9 @@ Value *LSRInstance::Expand(const LSRUse &LU, /// Helper for Rewrite. PHI nodes are special because the use of their operands /// effectively happens in their predecessor blocks, so the expression may need /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI(PHINode *PN, - const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts) const { +void LSRInstance::RewriteForPHI( + PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, + SCEVExpander &Rewriter, SmallVectorImpl &DeadInsts) const { DenseMap Inserted; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5023,11 +5010,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN, /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"), and update the UserInst to reference the newly /// expanded value. -void LSRInstance::Rewrite(const LSRUse &LU, - const LSRFixup &LF, - const Formula &F, - SCEVExpander &Rewriter, - SmallVectorImpl &DeadInsts) const { +void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, + const Formula &F, SCEVExpander &Rewriter, + SmallVectorImpl &DeadInsts) const { // First, find an insertion point that dominates UserInst. For PHI nodes, // find the nearest block which dominates all the relevant uses. if (PHINode *PN = dyn_cast(LF.UserInst)) { @@ -5065,7 +5050,7 @@ void LSRInstance::ImplementSolution( const SmallVectorImpl &Solution) { // Keep track of instructions we may have made dead, so that // we can remove them after we are done working. - SmallVector DeadInsts; + SmallVector DeadInsts; SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(), "lsr"); @@ -5315,7 +5300,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Remove any extra phis created by processing inner loops. Changed |= DeleteDeadPHIs(L->getHeader()); if (EnablePhiElim && L->isLoopSimplifyForm()) { - SmallVector DeadInsts; + SmallVector DeadInsts; const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Rewriter(SE, DL, "lsr"); #ifndef NDEBUG diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index a99c9999c61912cfbc5bb9bbda52e6642178c6c4..d0c96fa627a4722c8273d72fdc9add419f4df017 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This pass transforms loops that contain branches on loop-invariant conditions -// to have multiple loops. For example, it turns the left into the right code: +// to multiple loops. For example, it turns the left into the right code: // // for (...) if (lic) // A for (...) @@ -26,34 +26,34 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DivergenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -831,7 +831,12 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, /// mapping the blocks with the specified map. static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM) { - Loop &New = LPM->addLoop(PL); + Loop &New = *new Loop(); + if (PL) + PL->addChildLoop(&New); + else + LI->addTopLevelLoop(&New); + LPM->addLoop(New); // Add all of the blocks in L to the new loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); @@ -1231,11 +1236,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, LoopProcessWorklist.push_back(NewLoop); redoLoop = true; - // Keep a WeakVH holding onto LIC. If the first call to RewriteLoopBody + // Keep a WeakTrackingVH holding onto LIC. If the first call to + // RewriteLoopBody // deletes the instruction (for example by simplifying a PHI that feeds into // the condition that we're unswitching on), we don't rewrite the second // iteration. - WeakVH LICHandle(LIC); + WeakTrackingVH LICHandle(LIC); // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. @@ -1262,7 +1268,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector &Worklist, Loop *L, LPPassManager *LPM) { - DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); + DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1275,7 +1281,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, LPM->deleteSimpleAnalysisValue(I, L); RemoveFromWorklist(I, Worklist); I->replaceAllUsesWith(V); - I->eraseFromParent(); + if (!I->mayHaveSideEffects()) + I->eraseFromParent(); ++NumSimplify; } @@ -1431,7 +1438,7 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { // Simple DCE. if (isInstructionTriviallyDead(I)) { - DEBUG(dbgs() << "Remove dead instruction '" << *I); + DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index a143b9a3c645fc29774b469eee0d8b306ef9b964..46f8a356426562cd00c5ce690e2c7515e7eb52d3 100644 --- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -83,6 +84,151 @@ static bool handleSwitchExpect(SwitchInst &SI) { return true; } +/// Handler for PHINodes that define the value argument to an +/// @llvm.expect call. +/// +/// If the operand of the phi has a constant value and it 'contradicts' +/// with the expected value of phi def, then the corresponding incoming +/// edge of the phi is unlikely to be taken. Using that information, +/// the branch probability info for the originating branch can be inferred. +static void handlePhiDef(CallInst *Expect) { + Value &Arg = *Expect->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast(Expect->getArgOperand(1)); + if (!ExpectedValue) + return; + const APInt &ExpectedPhiValue = ExpectedValue->getValue(); + + // Walk up in backward a list of instructions that + // have 'copy' semantics by 'stripping' the copies + // until a PHI node or an instruction of unknown kind + // is reached. Negation via xor is also handled. + // + // C = PHI(...); + // B = C; + // A = B; + // D = __builtin_expect(A, 0); + // + Value *V = &Arg; + SmallVector Operations; + while (!isa(V)) { + if (ZExtInst *ZExt = dyn_cast(V)) { + V = ZExt->getOperand(0); + Operations.push_back(ZExt); + continue; + } + + if (SExtInst *SExt = dyn_cast(V)) { + V = SExt->getOperand(0); + Operations.push_back(SExt); + continue; + } + + BinaryOperator *BinOp = dyn_cast(V); + if (!BinOp || BinOp->getOpcode() != Instruction::Xor) + return; + + ConstantInt *CInt = dyn_cast(BinOp->getOperand(1)); + if (!CInt) + return; + + V = BinOp->getOperand(0); + Operations.push_back(BinOp); + } + + // Executes the recorded operations on input 'Value'. + auto ApplyOperations = [&](const APInt &Value) { + APInt Result = Value; + for (auto Op : llvm::reverse(Operations)) { + switch (Op->getOpcode()) { + case Instruction::Xor: + Result ^= cast(Op->getOperand(1))->getValue(); + break; + case Instruction::ZExt: + Result = Result.zext(Op->getType()->getIntegerBitWidth()); + break; + case Instruction::SExt: + Result = Result.sext(Op->getType()->getIntegerBitWidth()); + break; + default: + llvm_unreachable("Unexpected operation"); + } + } + return Result; + }; + + auto *PhiDef = dyn_cast(V); + + // Get the first dominating conditional branch of the operand + // i's incoming block. + auto GetDomConditional = [&](unsigned i) -> BranchInst * { + BasicBlock *BB = PhiDef->getIncomingBlock(i); + BranchInst *BI = dyn_cast(BB->getTerminator()); + if (BI && BI->isConditional()) + return BI; + BB = BB->getSinglePredecessor(); + if (!BB) + return nullptr; + BI = dyn_cast(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + return nullptr; + return BI; + }; + + // Now walk through all Phi operands to find phi oprerands with values + // conflicting with the expected phi output value. Any such operand + // indicates the incoming edge to that operand is unlikely. + for (unsigned i = 0, e = PhiDef->getNumIncomingValues(); i != e; ++i) { + + Value *PhiOpnd = PhiDef->getIncomingValue(i); + ConstantInt *CI = dyn_cast(PhiOpnd); + if (!CI) + continue; + + // Not an interesting case when IsUnlikely is false -- we can not infer + // anything useful when the operand value matches the expected phi + // output. + if (ExpectedPhiValue == ApplyOperations(CI->getValue())) + continue; + + BranchInst *BI = GetDomConditional(i); + if (!BI) + continue; + + MDBuilder MDB(PhiDef->getContext()); + + // There are two situations in which an operand of the PhiDef comes + // from a given successor of a branch instruction BI. + // 1) When the incoming block of the operand is the successor block; + // 2) When the incoming block is BI's enclosing block and the + // successor is the PhiDef's enclosing block. + // + // Returns true if the operand which comes from OpndIncomingBB + // comes from outgoing edge of BI that leads to Succ block. + auto *OpndIncomingBB = PhiDef->getIncomingBlock(i); + auto IsOpndComingFromSuccessor = [&](BasicBlock *Succ) { + if (OpndIncomingBB == Succ) + // If this successor is the incoming block for this + // Phi operand, then this successor does lead to the Phi. + return true; + if (OpndIncomingBB == BI->getParent() && Succ == PhiDef->getParent()) + // Otherwise, if the edge is directly from the branch + // to the Phi, this successor is the one feeding this + // Phi operand. + return true; + return false; + }; + + if (IsOpndComingFromSuccessor(BI->getSuccessor(1))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight)); + else if (IsOpndComingFromSuccessor(BI->getSuccessor(0))) + BI->setMetadata( + LLVMContext::MD_prof, + MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight)); + } +} + // Handle both BranchInst and SelectInst. template static bool handleBrSelExpect(BrSelInst &BSI) { @@ -98,10 +244,18 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { CallInst *CI; ICmpInst *CmpI = dyn_cast(BSI.getCondition()); + CmpInst::Predicate Predicate; + ConstantInt *CmpConstOperand = nullptr; if (!CmpI) { CI = dyn_cast(BSI.getCondition()); + Predicate = CmpInst::ICMP_NE; } else { - if (CmpI->getPredicate() != CmpInst::ICMP_NE) + Predicate = CmpI->getPredicate(); + if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ) + return false; + + CmpConstOperand = dyn_cast(CmpI->getOperand(1)); + if (!CmpConstOperand) return false; CI = dyn_cast(CmpI->getOperand(0)); } @@ -109,6 +263,13 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { if (!CI) return false; + uint64_t ValueComparedTo = 0; + if (CmpConstOperand) { + if (CmpConstOperand->getBitWidth() > 64) + return false; + ValueComparedTo = CmpConstOperand->getZExtValue(); + } + Function *Fn = CI->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) return false; @@ -121,9 +282,8 @@ template static bool handleBrSelExpect(BrSelInst &BSI) { MDBuilder MDB(CI->getContext()); MDNode *Node; - // If expect value is equal to 1 it means that we are more likely to take - // branch 0, in other case more likely is branch 1. - if (ExpectedValue->isOne()) + if ((ExpectedValue->getZExtValue() == ValueComparedTo) == + (Predicate == CmpInst::ICMP_EQ)) Node = MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight); else Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight); @@ -173,6 +333,10 @@ static bool lowerExpectIntrinsic(Function &F) { Function *Fn = CI->getCalledFunction(); if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + // Before erasing the llvm.expect, walk backward to find + // phi that define llvm.expect's first arg, and + // infer branch probability: + handlePhiDef(CI); Value *Exp = CI->getArgOperand(0); CI->replaceAllUsesWith(Exp); CI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp index 4f413715ffe68f52e34acbe7908ff8cd1ec2973b..070114a84cc50c0921d75e38ae5c11ba564a7a19 100644 --- a/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp +++ b/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp @@ -17,10 +17,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a3f3f25c1e0f6c6b4cfe7eb56a7f922617453881..7896396f0898d15871f23fafcc594731f65778d4 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -12,11 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -31,12 +32,12 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" @@ -49,7 +50,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Utils/Local.h" #include #include @@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) { // Get the alignment of the byval. If the call doesn't specify the alignment, // then it is some target specific value that we can't know. - unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); + unsigned ByValAlign = CS.getParamAlignment(ArgNo); if (ByValAlign == 0) return false; // If it is greater than the memcpy, then we check to see if we can force the diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index c5bf2f28d1852fb406f33a1a88c9b65c593ce1e3..d0bfe3603897306b8a92fef7a1d8fab18c5fda61 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) { Changed = true; SE->forgetValue(&*I); I->replaceAllUsesWith(NewI); - // If SeenExprs constains I's WeakVH, that entry will be replaced with + // If SeenExprs constains I's WeakTrackingVH, that entry will be + // replaced with // nullptr. RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI); I = NewI->getIterator(); @@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // Add the rewritten instruction to SeenExprs; the original instruction // is deleted. const SCEV *NewSCEV = SE->getSCEV(&*I); - SeenExprs[NewSCEV].push_back(WeakVH(&*I)); + SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I)); // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I) // is equivalent to I. However, ScalarEvolution::getSCEV may // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose @@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) { // // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll. if (NewSCEV != OldSCEV) - SeenExprs[OldSCEV].push_back(WeakVH(&*I)); + SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I)); } } } @@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr, // future instruction either. Therefore, we pop it out of the stack. This // optimization makes the algorithm O(n). while (!Candidates.empty()) { - // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed + // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's + // removed // during rewriting. if (Value *Candidate = Candidates.back()) { Instruction *CandidateInstruction = cast(Candidate); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 6e58b5f8128309e002f1efff4f6ad42d716c27f7..cbbd55512c9f51b936fe34506196b59c90cecf39 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -30,9 +30,19 @@ /// tracks what operations have a given value number (IE it also tracks the /// reverse mapping from value number -> operations with that value number), so /// that it only needs to reprocess the instructions that are affected when -/// something's value number changes. The rest of the algorithm is devoted to -/// performing symbolic evaluation, forward propagation, and simplification of -/// operations based on the value numbers deduced so far. +/// something's value number changes. The vast majority of complexity and code +/// in this file is devoted to tracking what value numbers could change for what +/// instructions when various things happen. The rest of the algorithm is +/// devoted to performing symbolic evaluation, forward propagation, and +/// simplification of operations based on the value numbers deduced so far +/// +/// In order to make the GVN mostly-complete, we use a technique derived from +/// "Detection of Redundant Expressions: A Complete and Polynomial-time +/// Algorithm in SSA" by R.R. Pai. The source of incompleteness in most SSA +/// based GVN algorithms is related to their inability to detect equivalence +/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)). +/// We resolve this issue by generating the equivalent "phi of ops" form for +/// each op of phis we see, in a way that only takes polynomial time to resolve. /// /// We also do not perform elimination by using any published algorithm. All /// published algorithms are O(Instructions). Instead, we use a technique that @@ -51,7 +61,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -63,6 +72,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -81,7 +91,6 @@ #include "llvm/Transforms/Scalar/GVNExpression.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Analysis/MemorySSA.h" #include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/VNCoercion.h" #include @@ -104,12 +113,14 @@ STATISTIC(NumGVNLeaderChanges, "Number of leader changes"); STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes"); STATISTIC(NumGVNAvoidedSortedLeaderChanges, "Number of avoided sorted leader changes"); -STATISTIC(NumGVNNotMostDominatingLeader, - "Number of times a member dominated it's new classes' leader"); STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated"); +STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created"); +STATISTIC(NumGVNPHIOfOpsEliminations, + "Number of things eliminated using PHI of ops"); DEBUG_COUNTER(VNCounter, "newgvn-vn", "Controls which instructions are value numbered") - +DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi", + "Controls which instructions we create phi of ops for") // Currently store defining access refinement is too slow due to basicaa being // egregiously slow. This flag lets us keep it working while we work on this // issue. @@ -133,6 +144,79 @@ PHIExpression::~PHIExpression() = default; } } +// Tarjan's SCC finding algorithm with Nuutila's improvements +// SCCIterator is actually fairly complex for the simple thing we want. +// It also wants to hand us SCC's that are unrelated to the phi node we ask +// about, and have us process them there or risk redoing work. +// Graph traits over a filter iterator also doesn't work that well here. +// This SCC finder is specialized to walk use-def chains, and only follows +// instructions, +// not generic values (arguments, etc). +struct TarjanSCC { + + TarjanSCC() : Components(1) {} + + void Start(const Instruction *Start) { + if (Root.lookup(Start) == 0) + FindSCC(Start); + } + + const SmallPtrSetImpl &getComponentFor(const Value *V) const { + unsigned ComponentID = ValueToComponent.lookup(V); + + assert(ComponentID > 0 && + "Asking for a component for a value we never processed"); + return Components[ComponentID]; + } + +private: + void FindSCC(const Instruction *I) { + Root[I] = ++DFSNum; + // Store the DFS Number we had before it possibly gets incremented. + unsigned int OurDFS = DFSNum; + for (auto &Op : I->operands()) { + if (auto *InstOp = dyn_cast(Op)) { + if (Root.lookup(Op) == 0) + FindSCC(InstOp); + if (!InComponent.count(Op)) + Root[I] = std::min(Root.lookup(I), Root.lookup(Op)); + } + } + // See if we really were the root of a component, by seeing if we still have + // our DFSNumber. If we do, we are the root of the component, and we have + // completed a component. If we do not, we are not the root of a component, + // and belong on the component stack. + if (Root.lookup(I) == OurDFS) { + unsigned ComponentID = Components.size(); + Components.resize(Components.size() + 1); + auto &Component = Components.back(); + Component.insert(I); + DEBUG(dbgs() << "Component root is " << *I << "\n"); + InComponent.insert(I); + ValueToComponent[I] = ComponentID; + // Pop a component off the stack and label it. + while (!Stack.empty() && Root.lookup(Stack.back()) >= OurDFS) { + auto *Member = Stack.back(); + DEBUG(dbgs() << "Component member is " << *Member << "\n"); + Component.insert(Member); + InComponent.insert(Member); + ValueToComponent[Member] = ComponentID; + Stack.pop_back(); + } + } else { + // Part of a component, push to stack + Stack.push_back(I); + } + } + unsigned int DFSNum = 1; + SmallPtrSet InComponent; + DenseMap Root; + SmallVector Stack; + // Store the components as vector of ptr sets, because we need the topo order + // of SCC's, but not individual member order + SmallVector, 8> Components; + DenseMap ValueToComponent; +}; // Congruence classes represent the set of expressions/instructions // that are all the same *during some scope in the function*. // That is, because of the way we perform equality propagation, and @@ -209,7 +293,6 @@ public: // Forward propagation info const Expression *getDefiningExpr() const { return DefiningExpr; } - void setDefiningExpr(const Expression *E) { DefiningExpr = E; } // Value member set bool empty() const { return Members.empty(); } @@ -243,6 +326,9 @@ public: --StoreCount; } + // True if this class has no memory members. + bool definesNoMemory() const { return StoreCount == 0 && memory_empty(); } + // Return true if two congruence classes are equivalent to each other. This // means // that every field but the ID number and the dead field are equivalent. @@ -292,6 +378,15 @@ private: }; namespace llvm { +struct ExactEqualsExpression { + const Expression &E; + explicit ExactEqualsExpression(const Expression &E) : E(E) {} + hash_code getComputedHash() const { return E.getComputedHash(); } + bool operator==(const Expression &Other) const { + return E.exactlyEquals(Other); + } +}; + template <> struct DenseMapInfo { static const Expression *getEmptyKey() { auto Val = static_cast(-1); @@ -303,15 +398,30 @@ template <> struct DenseMapInfo { Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } - static unsigned getHashValue(const Expression *V) { - return static_cast(V->getHashValue()); + static unsigned getHashValue(const Expression *E) { + return E->getComputedHash(); } + static unsigned getHashValue(const ExactEqualsExpression &E) { + return E.getComputedHash(); + } + static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) { + if (RHS == getTombstoneKey() || RHS == getEmptyKey()) + return false; + return LHS == *RHS; + } + static bool isEqual(const Expression *LHS, const Expression *RHS) { if (LHS == RHS) return true; if (LHS == getTombstoneKey() || RHS == getTombstoneKey() || LHS == getEmptyKey() || RHS == getEmptyKey()) return false; + // Compare hashes before equality. This is *not* what the hashtable does, + // since it is computing it modulo the number of buckets, whereas we are + // using the full hash keyspace. Since the hashes are precomputed, this + // check is *much* faster than equality. + if (LHS->getComputedHash() != RHS->getComputedHash()) + return false; return *LHS == *RHS; } }; @@ -321,19 +431,26 @@ namespace { class NewGVN { Function &F; DominatorTree *DT; - AssumptionCache *AC; const TargetLibraryInfo *TLI; AliasAnalysis *AA; MemorySSA *MSSA; MemorySSAWalker *MSSAWalker; const DataLayout &DL; std::unique_ptr PredInfo; - BumpPtrAllocator ExpressionAllocator; - ArrayRecycler ArgRecycler; + + // These are the only two things the create* functions should have + // side-effects on due to allocating memory. + mutable BumpPtrAllocator ExpressionAllocator; + mutable ArrayRecycler ArgRecycler; + mutable TarjanSCC SCCFinder; + const SimplifyQuery SQ; // Number of function arguments, used by ranking unsigned int NumFuncArgs; + // RPOOrdering of basic blocks + DenseMap RPOOrdering; + // Congruence class info. // This class is called INITIAL in the paper. It is the class everything @@ -347,16 +464,44 @@ class NewGVN { // Value Mappings. DenseMap ValueToClass; DenseMap ValueToExpression; + // Value PHI handling, used to make equivalence between phi(op, op) and + // op(phi, phi). + // These mappings just store various data that would normally be part of the + // IR. + DenseSet PHINodeUses; + // Map a temporary instruction we created to a parent block. + DenseMap TempToBlock; + // Map between the temporary phis we created and the real instructions they + // are known equivalent to. + DenseMap RealToTemp; + // In order to know when we should re-process instructions that have + // phi-of-ops, we track the set of expressions that they needed as + // leaders. When we discover new leaders for those expressions, we process the + // associated phi-of-op instructions again in case they have changed. The + // other way they may change is if they had leaders, and those leaders + // disappear. However, at the point they have leaders, there are uses of the + // relevant operands in the created phi node, and so they will get reprocessed + // through the normal user marking we perform. + mutable DenseMap> AdditionalUsers; + DenseMap> + ExpressionToPhiOfOps; + // Map from basic block to the temporary operations we created + DenseMap> PHIOfOpsPHIs; + // Map from temporary operation to MemoryAccess. + DenseMap TempToMemory; + // Set of all temporary instructions we created. + DenseSet AllTempInstructions; // Mapping from predicate info we used to the instructions we used it with. // In order to correctly ensure propagation, we must keep track of what // comparisons we used, so that when the values of the comparisons change, we // propagate the information to the places we used the comparison. - DenseMap> PredicateToUsers; - // Mapping from MemoryAccess we used to the MemoryAccess we used it with. Has + mutable DenseMap> + PredicateToUsers; // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for // stores, we no longer can rely solely on the def-use chains of MemorySSA. - DenseMap> MemoryToUsers; + mutable DenseMap> + MemoryToUsers; // A table storing which memorydefs/phis represent a memory state provably // equivalent to another memory state. @@ -378,10 +523,19 @@ class NewGVN { enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique }; DenseMap MemoryPhiState; + enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; + mutable DenseMap InstCycleState; // Expression to class mapping. using ExpressionClassMap = DenseMap; ExpressionClassMap ExpressionToClass; + // We have a single expression that represents currently DeadExpressions. + // For dead expressions we can prove will stay dead, we mark them with + // DFS number zero. However, it's possible in the case of phi nodes + // for us to assume/prove all arguments are dead during fixpointing. + // We use DeadExpression for that case. + DeadExpression *SingletonDeadExpression = nullptr; + // Which values have changed as a result of leader changes. SmallPtrSet LeaderChanges; @@ -424,26 +578,32 @@ public: NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA, const DataLayout &DL) - : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), - PredInfo(make_unique(F, *DT, *AC)) {} + : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), + PredInfo(make_unique(F, *DT, *AC)), SQ(DL, TLI, DT, AC) { + } bool runGVN(); private: // Expression handling. - const Expression *createExpression(Instruction *); - const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *); - PHIExpression *createPHIExpression(Instruction *); - const VariableExpression *createVariableExpression(Value *); - const ConstantExpression *createConstantExpression(Constant *); - const Expression *createVariableOrConstant(Value *V); - const UnknownExpression *createUnknownExpression(Instruction *); + const Expression *createExpression(Instruction *) const; + const Expression *createBinaryExpression(unsigned, Type *, Value *, + Value *) const; + PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, + bool &OriginalOpsConstant) const; + const DeadExpression *createDeadExpression() const; + const VariableExpression *createVariableExpression(Value *) const; + const ConstantExpression *createConstantExpression(Constant *) const; + const Expression *createVariableOrConstant(Value *V) const; + const UnknownExpression *createUnknownExpression(Instruction *) const; const StoreExpression *createStoreExpression(StoreInst *, - const MemoryAccess *); + const MemoryAccess *) const; LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, - const MemoryAccess *); - const CallExpression *createCallExpression(CallInst *, const MemoryAccess *); - const AggregateValueExpression *createAggregateValueExpression(Instruction *); - bool setBasicExpressionInfo(Instruction *, BasicExpression *); + const MemoryAccess *) const; + const CallExpression *createCallExpression(CallInst *, + const MemoryAccess *) const; + const AggregateValueExpression * + createAggregateValueExpression(Instruction *) const; + bool setBasicExpressionInfo(Instruction *, BasicExpression *) const; // Congruence class handling. CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) { @@ -471,6 +631,9 @@ private: return CClass; } void initializeCongruenceClasses(Function &F); + const Expression *makePossiblePhiOfOps(Instruction *, + SmallPtrSetImpl &); + void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); // Value number an Instruction or MemoryPhi. void valueNumberMemoryPhi(MemoryPhi *); @@ -478,17 +641,19 @@ private: // Symbolic evaluation. const Expression *checkSimplificationResults(Expression *, Instruction *, - Value *); - const Expression *performSymbolicEvaluation(Value *); + Value *) const; + const Expression *performSymbolicEvaluation(Value *, + SmallPtrSetImpl &) const; const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *, - Instruction *, MemoryAccess *); - const Expression *performSymbolicLoadEvaluation(Instruction *); - const Expression *performSymbolicStoreEvaluation(Instruction *); - const Expression *performSymbolicCallEvaluation(Instruction *); - const Expression *performSymbolicPHIEvaluation(Instruction *); - const Expression *performSymbolicAggrValueEvaluation(Instruction *); - const Expression *performSymbolicCmpEvaluation(Instruction *); - const Expression *performSymbolicPredicateInfoEvaluation(Instruction *); + Instruction *, + MemoryAccess *) const; + const Expression *performSymbolicLoadEvaluation(Instruction *) const; + const Expression *performSymbolicStoreEvaluation(Instruction *) const; + const Expression *performSymbolicCallEvaluation(Instruction *) const; + const Expression *performSymbolicPHIEvaluation(Instruction *) const; + const Expression *performSymbolicAggrValueEvaluation(Instruction *) const; + const Expression *performSymbolicCmpEvaluation(Instruction *) const; + const Expression *performSymbolicPredicateInfoEvaluation(Instruction *) const; // Congruence finding. bool someEquivalentDominates(const Instruction *, const Instruction *) const; @@ -503,7 +668,7 @@ private: bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To); CongruenceClass *getMemoryClass(const MemoryAccess *MA) const; const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const; - bool isMemoryAccessTop(const MemoryAccess *) const; + bool isMemoryAccessTOP(const MemoryAccess *) const; // Ranking unsigned int getRank(const Value *) const; @@ -527,19 +692,26 @@ private: void replaceInstruction(Instruction *, Value *); void markInstructionForDeletion(Instruction *); void deleteInstructionsInBlock(BasicBlock *); + Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const; // New instruction creation. void handleNewInstruction(Instruction *){}; // Various instruction touch utilities + template + void for_each_found(Map &, const KeyType &, Func); + template + void touchAndErase(Map &, const KeyType &); void markUsersTouched(Value *); void markMemoryUsersTouched(const MemoryAccess *); void markMemoryDefTouched(const MemoryAccess *); void markPredicateUsersTouched(Instruction *); void markValueLeaderChangeTouched(CongruenceClass *CC); void markMemoryLeaderChangeTouched(CongruenceClass *CC); - void addPredicateUsers(const PredicateBase *, Instruction *); - void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U); + void markPhiOfOpsChanged(const Expression *E); + void addPredicateUsers(const PredicateBase *, Instruction *) const; + void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const; + void addAdditionalUsers(Value *To, Value *User) const; // Main loop of value numbering void iterateTouchedInstructions(); @@ -547,12 +719,18 @@ private: // Utilities. void cleanupTables(); std::pair assignDFSNumbers(BasicBlock *, unsigned); - void updateProcessedCount(Value *V); + void updateProcessedCount(const Value *V); void verifyMemoryCongruency() const; void verifyIterationSettled(Function &F); - bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const; + void verifyStoreExpressions() const; + bool singleReachablePHIPath(SmallPtrSet &, + const MemoryAccess *, const MemoryAccess *) const; BasicBlock *getBlockForValue(Value *V) const; - void deleteExpression(const Expression *E); + void deleteExpression(const Expression *E) const; + MemoryUseOrDef *getMemoryAccess(const Instruction *) const; + MemoryAccess *getDefiningAccess(const MemoryAccess *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *) const; + template T *getMinDFSOfRange(const Range &) const; unsigned InstrToDFSNum(const Value *V) const { assert(isa(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -572,8 +750,8 @@ private: ? InstrToDFSNum(cast(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } - - template T *getMinDFSOfRange(const Range &) const; + bool isCycleFree(const Instruction *) const; + bool isBackedge(BasicBlock *From, BasicBlock *To) const; // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. std::pair StartingVNCounter; @@ -601,34 +779,60 @@ bool StoreExpression::equals(const Expression &Other) const { return true; } +// Determine if the edge From->To is a backedge +bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const { + if (From == To) + return true; + auto *FromDTN = DT->getNode(From); + auto *ToDTN = DT->getNode(To); + return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN); +} + #ifndef NDEBUG static std::string getBlockName(const BasicBlock *B) { return DOTGraphTraits::getSimpleNodeLabel(B, nullptr); } #endif +// Get a MemoryAccess for an instruction, fake or real. +MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const { + auto *Result = MSSA->getMemoryAccess(I); + return Result ? Result : TempToMemory.lookup(I); +} + +// Get a MemoryPhi for a basic block. These are all real. +MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const { + return MSSA->getMemoryAccess(BB); +} + // Get the basic block from an instruction/memory value. BasicBlock *NewGVN::getBlockForValue(Value *V) const { - if (auto *I = dyn_cast(V)) - return I->getParent(); - else if (auto *MP = dyn_cast(V)) - return MP->getBlock(); - llvm_unreachable("Should have been able to figure out a block for our value"); - return nullptr; + if (auto *I = dyn_cast(V)) { + auto *Parent = I->getParent(); + if (Parent) + return Parent; + Parent = TempToBlock.lookup(V); + assert(Parent && "Every fake instruction should have a block"); + return Parent; + } + + auto *MP = dyn_cast(V); + assert(MP && "Should have been an instruction or a MemoryPhi"); + return MP->getBlock(); } // Delete a definitely dead expression, so it can be reused by the expression // allocator. Some of these are not in creation functions, so we have to accept // const versions. -void NewGVN::deleteExpression(const Expression *E) { +void NewGVN::deleteExpression(const Expression *E) const { assert(isa(E)); auto *BE = cast(E); const_cast(BE)->deallocateOperands(ArgRecycler); ExpressionAllocator.Deallocate(E); } - -PHIExpression *NewGVN::createPHIExpression(Instruction *I) { - BasicBlock *PHIBlock = I->getParent(); +PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, + bool &OriginalOpsConstant) const { + BasicBlock *PHIBlock = getBlockForValue(I); auto *PN = cast(I); auto *E = new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); @@ -637,24 +841,49 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) { E->setType(I->getType()); E->setOpcode(I->getOpcode()); + // NewGVN assumes the operands of a PHI node are in a consistent order across + // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix + // this in LLVM at some point we don't want GVN to find wrong congruences. + // Therefore, here we sort uses in predecessor order. + // We're sorting the values by pointer. In theory this might be cause of + // non-determinism, but here we don't rely on the ordering for anything + // significant, e.g. we don't create new instructions based on it so we're + // fine. + SmallVector PHIOperands; + for (const Use &U : PN->operands()) + PHIOperands.push_back(&U); + std::sort(PHIOperands.begin(), PHIOperands.end(), + [&](const Use *U1, const Use *U2) { + return PN->getIncomingBlock(*U1) < PN->getIncomingBlock(*U2); + }); + // Filter out unreachable phi operands. - auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) { - return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock}); + auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { + if (*U == PN) + return false; + if (!ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock})) + return false; + // Things in TOPClass are equivalent to everything. + if (ValueToClass.lookup(*U) == TOPClass) + return false; + if (lookupOperandLeader(*U) == PN) + return false; + return true; }); - std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), - [&](const Use &U) -> Value * { - // Don't try to transform self-defined phis. - if (U == PN) - return PN; - return lookupOperandLeader(U); + [&](const Use *U) -> Value * { + auto *BB = PN->getIncomingBlock(*U); + HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); + OriginalOpsConstant = + OriginalOpsConstant && isa(*U); + return lookupOperandLeader(*U); }); return E; } // Set basic expression info (Arguments, type, opcode) for Expression // E from Instruction I in block B. -bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) { +bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) const { bool AllConstant = true; if (auto *GEP = dyn_cast(I)) E->setType(GEP->getSourceElementType()); @@ -667,7 +896,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) { // whether all members are constant. std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) { auto Operand = lookupOperandLeader(O); - AllConstant &= isa(Operand); + AllConstant = AllConstant && isa(Operand); return Operand; }); @@ -675,7 +904,8 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) { } const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, - Value *Arg1, Value *Arg2) { + Value *Arg1, + Value *Arg2) const { auto *E = new (ExpressionAllocator) BasicExpression(2); E->setType(T); @@ -692,8 +922,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, E->op_push_back(lookupOperandLeader(Arg1)); E->op_push_back(lookupOperandLeader(Arg2)); - Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI, - DT, AC); + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V)) return SimplifiedE; return E; @@ -705,7 +934,8 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, // TODO: Once finished, this should not take an Instruction, we only // use it for printing. const Expression *NewGVN::checkSimplificationResults(Expression *E, - Instruction *I, Value *V) { + Instruction *I, + Value *V) const { if (!V) return nullptr; if (auto *C = dyn_cast(V)) { @@ -727,9 +957,17 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, CongruenceClass *CC = ValueToClass.lookup(V); if (CC && CC->getDefiningExpr()) { + // If we simplified to something else, we need to communicate + // that we're users of the value we simplified to. + if (I != V) { + // Don't add temporary instructions to the user lists. + if (!AllTempInstructions.count(I)) + addAdditionalUsers(V, I); + } + if (I) DEBUG(dbgs() << "Simplified " << *I << " to " - << " expression " << *V << "\n"); + << " expression " << *CC->getDefiningExpr() << "\n"); NumGVNOpsSimplified++; deleteExpression(E); return CC->getDefiningExpr(); @@ -737,7 +975,7 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, return nullptr; } -const Expression *NewGVN::createExpression(Instruction *I) { +const Expression *NewGVN::createExpression(Instruction *I) const { auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands()); bool AllConstant = setBasicExpressionInfo(I, E); @@ -774,8 +1012,8 @@ const Expression *NewGVN::createExpression(Instruction *I) { "Wrong types on cmp instruction"); assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() && E->getOperand(1)->getType() == I->getOperand(1)->getType())); - Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa(I)) { @@ -784,23 +1022,23 @@ const Expression *NewGVN::createExpression(Instruction *I) { assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() && E->getOperand(2)->getType() == I->getOperand(2)->getType()); Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), - E->getOperand(2), DL, TLI, DT, AC); + E->getOperand(2), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } } else if (I->isBinaryOp()) { - Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), - DL, TLI, DT, AC); + Value *V = + SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (auto *BI = dyn_cast(I)) { - Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC); + Value *V = + SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (isa(I)) { - Value *V = SimplifyGEPInst(E->getType(), - ArrayRef(E->op_begin(), E->op_end()), - DL, TLI, DT, AC); + Value *V = SimplifyGEPInst( + E->getType(), ArrayRef(E->op_begin(), E->op_end()), SQ); if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) return SimplifiedE; } else if (AllConstant) { @@ -823,7 +1061,7 @@ const Expression *NewGVN::createExpression(Instruction *I) { } const AggregateValueExpression * -NewGVN::createAggregateValueExpression(Instruction *I) { +NewGVN::createAggregateValueExpression(Instruction *I) const { if (auto *II = dyn_cast(I)) { auto *E = new (ExpressionAllocator) AggregateValueExpression(I->getNumOperands(), II->getNumIndices()); @@ -842,32 +1080,38 @@ NewGVN::createAggregateValueExpression(Instruction *I) { llvm_unreachable("Unhandled type of aggregate value operation"); } -const VariableExpression *NewGVN::createVariableExpression(Value *V) { +const DeadExpression *NewGVN::createDeadExpression() const { + // DeadExpression has no arguments and all DeadExpression's are the same, + // so we only need one of them. + return SingletonDeadExpression; +} + +const VariableExpression *NewGVN::createVariableExpression(Value *V) const { auto *E = new (ExpressionAllocator) VariableExpression(V); E->setOpcode(V->getValueID()); return E; } -const Expression *NewGVN::createVariableOrConstant(Value *V) { +const Expression *NewGVN::createVariableOrConstant(Value *V) const { if (auto *C = dyn_cast(V)) return createConstantExpression(C); return createVariableExpression(V); } -const ConstantExpression *NewGVN::createConstantExpression(Constant *C) { +const ConstantExpression *NewGVN::createConstantExpression(Constant *C) const { auto *E = new (ExpressionAllocator) ConstantExpression(C); E->setOpcode(C->getValueID()); return E; } -const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) { +const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const { auto *E = new (ExpressionAllocator) UnknownExpression(I); E->setOpcode(I->getOpcode()); return E; } -const CallExpression *NewGVN::createCallExpression(CallInst *CI, - const MemoryAccess *MA) { +const CallExpression * +NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const { // FIXME: Add operand bundles for calls. auto *E = new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA); @@ -913,7 +1157,7 @@ bool NewGVN::someEquivalentDominates(const Instruction *Inst, Value *NewGVN::lookupOperandLeader(Value *V) const { CongruenceClass *CC = ValueToClass.lookup(V); if (CC) { - // Everything in TOP is represneted by undef, as it can be any value. + // Everything in TOP is represented by undef, as it can be any value. // We do have to make sure we get the type right though, so we can't set the // RepLeader to undef. if (CC == TOPClass) @@ -927,22 +1171,21 @@ Value *NewGVN::lookupOperandLeader(Value *V) const { const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const { auto *CC = getMemoryClass(MA); assert(CC->getMemoryLeader() && - "Every MemoryAccess should be mapped to a " - "congruence class with a represenative memory " - "access"); + "Every MemoryAccess should be mapped to a congruence class with a " + "representative memory access"); return CC->getMemoryLeader(); } // Return true if the MemoryAccess is really equivalent to everything. This is // equivalent to the lattice value "TOP" in most lattices. This is the initial // state of all MemoryAccesses. -bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const { +bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const { return getMemoryClass(MA) == TOPClass; } LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, LoadInst *LI, - const MemoryAccess *MA) { + const MemoryAccess *MA) const { auto *E = new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA)); E->allocateOperands(ArgRecycler, ExpressionAllocator); @@ -960,8 +1203,8 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, return E; } -const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, - const MemoryAccess *MA) { +const StoreExpression * +NewGVN::createStoreExpression(StoreInst *SI, const MemoryAccess *MA) const { auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand()); auto *E = new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA); @@ -978,11 +1221,11 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, return E; } -const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const { // Unlike loads, we never try to eliminate stores, so we do not check if they // are simple and avoid value numbering them. auto *SI = cast(I); - auto *StoreAccess = MSSA->getMemoryAccess(SI); + auto *StoreAccess = getMemoryAccess(SI); // Get the expression, if any, for the RHS of the MemoryDef. const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess(); if (EnableStoreRefinement) @@ -990,7 +1233,6 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) { // If we bypassed the use-def chains, make sure we add a use. if (StoreRHS != StoreAccess->getDefiningAccess()) addMemoryUsers(StoreRHS, StoreAccess); - StoreRHS = lookupMemoryLeader(StoreRHS); // If we are defined by ourselves, use the live on entry def. if (StoreRHS == StoreAccess) @@ -1019,9 +1261,9 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) { dyn_cast(lookupOperandLeader(SI->getValueOperand()))) { if ((lookupOperandLeader(LI->getPointerOperand()) == lookupOperandLeader(SI->getPointerOperand())) && - (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) == + (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) == StoreRHS)) - return createVariableExpression(LI); + return createStoreExpression(SI, StoreRHS); } } @@ -1036,7 +1278,7 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) { const Expression * NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, LoadInst *LI, Instruction *DepInst, - MemoryAccess *DefiningAccess) { + MemoryAccess *DefiningAccess) const { assert((!LI || LI->isSimple()) && "Not a simple load"); if (auto *DepSI = dyn_cast(DepInst)) { // Can't forward from non-atomic to atomic without violating memory model. @@ -1111,7 +1353,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, return nullptr; } -const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const { auto *LI = cast(I); // We can eliminate in favor of non-simple loads, but we won't be able to @@ -1123,8 +1365,9 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) { // Load of undef is undef. if (isa(LoadAddressLeader)) return createConstantExpression(UndefValue::get(LI->getType())); - - MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + MemoryAccess *OriginalAccess = getMemoryAccess(I); + MemoryAccess *DefiningAccess = + MSSAWalker->getClobberingMemoryAccess(OriginalAccess); if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { if (auto *MD = dyn_cast(DefiningAccess)) { @@ -1149,7 +1392,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) { } const Expression * -NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { +NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const { auto *PI = PredInfo->getPredicateInfoFor(I); if (!PI) return nullptr; @@ -1194,7 +1437,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { return nullptr; if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) { - DEBUG(dbgs() << "Copy is not of any condition operands!"); + DEBUG(dbgs() << "Copy is not of any condition operands!\n"); return nullptr; } Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0)); @@ -1213,6 +1456,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { // operands are equal, because assumes must always be true. if (CmpInst::isTrueWhenEqual(Predicate)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } } @@ -1225,6 +1469,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) || (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createVariableOrConstant(FirstOp); } // Handle the special case of floating point. @@ -1232,6 +1477,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) && isa(FirstOp) && !cast(FirstOp)->isZero()) { addPredicateUsers(PI, I); + addAdditionalUsers(Cmp->getOperand(0), I); return createConstantExpression(cast(FirstOp)); } } @@ -1239,7 +1485,7 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) { } // Evaluate read only and pure calls, and create an expression result. -const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const { auto *CI = cast(I); if (auto *II = dyn_cast(I)) { // Instrinsics with the returned attribute are copies of arguments. @@ -1276,8 +1522,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, DEBUG(dbgs() << "Setting " << *From); DEBUG(dbgs() << " equivalent to congruence class "); DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader "); - DEBUG(dbgs() << *NewClass->getMemoryLeader()); - DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << *NewClass->getMemoryLeader() << "\n"); auto LookupResult = MemoryAccessToClass.find(From); bool Changed = false; @@ -1291,7 +1536,7 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, NewClass->memory_insert(MP); // This may have killed the class if it had no non-memory members if (OldClass->getMemoryLeader() == From) { - if (OldClass->memory_empty()) { + if (OldClass->definesNoMemory()) { OldClass->setMemoryLeader(nullptr); } else { OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); @@ -1313,35 +1558,80 @@ bool NewGVN::setMemoryClass(const MemoryAccess *From, return Changed; } +// Determine if a instruction is cycle-free. That means the values in the +// instruction don't depend on any expressions that can change value as a result +// of the instruction. For example, a non-cycle free instruction would be v = +// phi(0, v+1). +bool NewGVN::isCycleFree(const Instruction *I) const { + // In order to compute cycle-freeness, we do SCC finding on the instruction, + // and see what kind of SCC it ends up in. If it is a singleton, it is + // cycle-free. If it is not in a singleton, it is only cycle free if the + // other members are all phi nodes (as they do not compute anything, they are + // copies). + auto ICS = InstCycleState.lookup(I); + if (ICS == ICS_Unknown) { + SCCFinder.Start(I); + auto &SCC = SCCFinder.getComponentFor(I); + // It's cycle free if it's size 1 or or the SCC is *only* phi nodes. + if (SCC.size() == 1) + InstCycleState.insert({I, ICS_CycleFree}); + else { + bool AllPhis = + llvm::all_of(SCC, [](const Value *V) { return isa(V); }); + ICS = AllPhis ? ICS_CycleFree : ICS_Cycle; + for (auto *Member : SCC) + if (auto *MemberPhi = dyn_cast(Member)) + InstCycleState.insert({MemberPhi, ICS}); + } + } + if (ICS == ICS_Cycle) + return false; + return true; +} + // Evaluate PHI nodes symbolically, and create an expression result. -const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { - auto *E = cast(createPHIExpression(I)); +const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { + // True if one of the incoming phi edges is a backedge. + bool HasBackedge = false; + // All constant tracks the state of whether all the *original* phi operands + // This is really shorthand for "this phi cannot cycle due to forward + // change in value of the phi is guaranteed not to later change the value of + // the phi. IE it can't be v = phi(undef, v+1) + bool AllConstant = true; + auto *E = + cast(createPHIExpression(I, HasBackedge, AllConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. - - // See if all arguaments are the same. + // See if all arguments are the same. // We track if any were undef because they need special handling. bool HasUndef = false; - auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) { - if (Arg == I) - return false; + auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) { if (isa(Arg)) { HasUndef = true; return false; } return true; }); - // If we are left with no operands, it's undef + // If we are left with no operands, it's dead. if (Filtered.begin() == Filtered.end()) { - DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" - << "\n"); + // If it has undef at this point, it means there are no-non-undef arguments, + // and thus, the value of the phi node must be undef. + if (HasUndef) { + DEBUG(dbgs() << "PHI Node " << *I + << " has no non-undef arguments, valuing it as undef\n"); + return createConstantExpression(UndefValue::get(I->getType())); + } + + DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n"); deleteExpression(E); - return createConstantExpression(UndefValue::get(I->getType())); + return createDeadExpression(); } + unsigned NumOps = 0; Value *AllSameValue = *(Filtered.begin()); ++Filtered.begin(); // Can't use std::equal here, sadly, because filter.begin moves. - if (llvm::all_of(Filtered, [AllSameValue](const Value *V) { - return V == AllSameValue; + if (llvm::all_of(Filtered, [&](Value *Arg) { + ++NumOps; + return Arg == AllSameValue; })) { // In LLVM's non-standard representation of phi nodes, it's possible to have // phi nodes with cycles (IE dependent on other phis that are .... dependent @@ -1353,12 +1643,27 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { // We also special case undef, so that if we have an undef, we can't use the // common value unless it dominates the phi block. if (HasUndef) { + // If we have undef and at least one other value, this is really a + // multivalued phi, and we need to know if it's cycle free in order to + // evaluate whether we can ignore the undef. The other parts of this are + // just shortcuts. If there is no backedge, or all operands are + // constants, or all operands are ignored but the undef, it also must be + // cycle free. + if (!AllConstant && HasBackedge && NumOps > 0 && + !isa(AllSameValue) && !isCycleFree(I)) + return E; + // Only have to check for instructions if (auto *AllSameInst = dyn_cast(AllSameValue)) if (!someEquivalentDominates(AllSameInst, I)) return E; } - + // Can't simplify to something that comes later in the iteration. + // Otherwise, when and if it changes congruence class, we will never catch + // up. We will always be a class behind it. + if (isa(AllSameValue) && + InstrToDFSNum(AllSameValue) > InstrToDFSNum(I)) + return E; NumGVNPhisAllSame++; DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue << "\n"); @@ -1368,7 +1673,8 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) { return E; } -const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) { +const Expression * +NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const { if (auto *EI = dyn_cast(I)) { auto *II = dyn_cast(EI->getAggregateOperand()); if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) { @@ -1406,7 +1712,7 @@ const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) { return createAggregateValueExpression(I); } -const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { +const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { auto *CI = dyn_cast(I); // See if our operands are equal to those of a previous predicate, and if so, // if it implies true or false. @@ -1483,15 +1789,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { if (PBranch->TrueEdge) { // If we know the previous predicate is true and we are in the true // edge then we may be implied true or false. - if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getTrue(CI->getType())); } - if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate, - BranchPredicate)) { + if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate, + OurPredicate)) { addPredicateUsers(PI, I); return createConstantExpression( ConstantInt::getFalse(CI->getType())); @@ -1520,8 +1826,18 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) { return createExpression(I); } +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa(V) || isa(V); +} + // Substitute and symbolize the value before value numbering. -const Expression *NewGVN::performSymbolicEvaluation(Value *V) { +const Expression * +NewGVN::performSymbolicEvaluation(Value *V, + SmallPtrSetImpl &Visited) const { const Expression *E = nullptr; if (auto *C = dyn_cast(V)) E = createConstantExpression(C); @@ -1599,15 +1915,43 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V) { return E; } +// Look up a container in a map, and then call a function for each thing in the +// found container. +template +void NewGVN::for_each_found(Map &M, const KeyType &Key, Func F) { + const auto Result = M.find_as(Key); + if (Result != M.end()) + for (typename Map::mapped_type::value_type Mapped : Result->second) + F(Mapped); +} + +// Look up a container of values/instructions in a map, and touch all the +// instructions in the container. Then erase value from the map. +template +void NewGVN::touchAndErase(Map &M, const KeyType &Key) { + const auto Result = M.find_as(Key); + if (Result != M.end()) { + for (const typename Map::mapped_type::value_type Mapped : Result->second) + TouchedInstructions.set(InstrToDFSNum(Mapped)); + M.erase(Result); + } +} + +void NewGVN::addAdditionalUsers(Value *To, Value *User) const { + if (isa(To)) + AdditionalUsers[To].insert(User); +} + void NewGVN::markUsersTouched(Value *V) { // Now mark the users as touched. for (auto *User : V->users()) { assert(isa(User) && "Use of value not within an instruction?"); TouchedInstructions.set(InstrToDFSNum(User)); } + touchAndErase(AdditionalUsers, V); } -void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) { +void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const { DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n"); MemoryToUsers[To].insert(U); } @@ -1621,16 +1965,15 @@ void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) { return; for (auto U : MA->users()) TouchedInstructions.set(MemoryToDFSNum(U)); - const auto Result = MemoryToUsers.find(MA); - if (Result != MemoryToUsers.end()) { - for (auto *User : Result->second) - TouchedInstructions.set(MemoryToDFSNum(User)); - MemoryToUsers.erase(Result); - } + touchAndErase(MemoryToUsers, MA); } // Add I to the set of users of a given predicate. -void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) { +void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const { + // Don't add temporary instructions to the user lists. + if (AllTempInstructions.count(I)) + return; + if (auto *PBranch = dyn_cast(PB)) PredicateToUsers[PBranch->Condition].insert(I); else if (auto *PAssume = dyn_cast(PB)) @@ -1639,12 +1982,7 @@ void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) { // Touch all the predicates that depend on this instruction. void NewGVN::markPredicateUsersTouched(Instruction *I) { - const auto Result = PredicateToUsers.find(I); - if (Result != PredicateToUsers.end()) { - for (auto *User : Result->second) - TouchedInstructions.set(InstrToDFSNum(User)); - PredicateToUsers.erase(Result); - } + touchAndErase(PredicateToUsers, I); } // Mark users affected by a memory leader change. @@ -1683,15 +2021,14 @@ const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const { // TODO: If this ends up to slow, we can maintain a next memory leader like we // do for regular leaders. // Make sure there will be a leader to find - assert((CC->getStoreCount() > 0 || !CC->memory_empty()) && - "Can't get next leader if there is none"); + assert(!CC->definesNoMemory() && "Can't get next leader if there is none"); if (CC->getStoreCount() > 0) { if (auto *NL = dyn_cast_or_null(CC->getNextLeader().first)) - return MSSA->getMemoryAccess(NL); + return getMemoryAccess(NL); // Find the store with the minimum DFS number. auto *V = getMinDFSOfRange(make_filter_range( *CC, [&](const Value *V) { return isa(V); })); - return MSSA->getMemoryAccess(cast(V)); + return getMemoryAccess(cast(V)); } assert(CC->getStoreCount() == 0); @@ -1756,7 +2093,7 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, setMemoryClass(InstMA, NewClass); // Now, fixup the old class if necessary if (OldClass->getMemoryLeader() == InstMA) { - if (OldClass->getStoreCount() != 0 || !OldClass->memory_empty()) { + if (!OldClass->definesNoMemory()) { OldClass->setMemoryLeader(getNextMemoryLeader(OldClass)); DEBUG(dbgs() << "Memory class leader change for class " << OldClass->getID() << " to " @@ -1776,31 +2113,11 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (I == OldClass->getNextLeader().first) OldClass->resetNextLeader(); - // It's possible, though unlikely, for us to discover equivalences such - // that the current leader does not dominate the old one. - // This statistic tracks how often this happens. - // We assert on phi nodes when this happens, currently, for debugging, because - // we want to make sure we name phi node cycles properly. - if (isa(NewClass->getLeader()) && NewClass->getLeader() && - I != NewClass->getLeader()) { - auto *IBB = I->getParent(); - auto *NCBB = cast(NewClass->getLeader())->getParent(); - bool Dominated = - IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader()); - Dominated = Dominated || DT->properlyDominates(IBB, NCBB); - if (Dominated) { - ++NumGVNNotMostDominatingLeader; - assert( - !isa(I) && - "New class for instruction should not be dominated by instruction"); - } - } + OldClass->erase(I); + NewClass->insert(I); if (NewClass->getLeader() != I) NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)}); - - OldClass->erase(I); - NewClass->insert(I); // Handle our special casing of stores. if (auto *SI = dyn_cast(I)) { OldClass->decStoreCount(); @@ -1814,10 +2131,8 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) { // If it's a store expression we are using, it means we are not equivalent // to something earlier. - if (isa(E)) { - assert(lookupOperandLeader(SI->getValueOperand()) != - NewClass->getLeader()); - NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + if (auto *SE = dyn_cast(E)) { + NewClass->setStoredValue(SE->getStoredValue()); markValueLeaderChangeTouched(NewClass); // Shift the new class leader to be the store DEBUG(dbgs() << "Changing leader of congruence class " @@ -1835,17 +2150,26 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // instructions before. // If it's not a memory use, set the MemoryAccess equivalence - auto *InstMA = dyn_cast_or_null(MSSA->getMemoryAccess(I)); - bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA; + auto *InstMA = dyn_cast_or_null(getMemoryAccess(I)); if (InstMA) moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass); ValueToClass[I] = NewClass; // See if we destroyed the class or need to swap leaders. if (OldClass->empty() && OldClass != TOPClass) { if (OldClass->getDefiningExpr()) { - DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr() + DEBUG(dbgs() << "Erasing expression " << *OldClass->getDefiningExpr() << " from table\n"); - ExpressionToClass.erase(OldClass->getDefiningExpr()); + // We erase it as an exact expression to make sure we don't just erase an + // equivalent one. + auto Iter = ExpressionToClass.find_as( + ExactEqualsExpression(*OldClass->getDefiningExpr())); + if (Iter != ExpressionToClass.end()) + ExpressionToClass.erase(Iter); +#ifdef EXPENSIVE_CHECKS + assert( + (*OldClass->getDefiningExpr() != *E || ExpressionToClass.lookup(E)) && + "We erased the expression we just inserted, which should not happen"); +#endif } } else if (OldClass->getLeader() == I) { // When the leader changes, the value numbering of @@ -1862,52 +2186,35 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, if (OldClass->getStoredValue()) OldClass->setStoredValue(nullptr); } - // If we destroy the old access leader and it's a store, we have to - // effectively destroy the congruence class. When it comes to scalars, - // anything with the same value is as good as any other. That means that - // one leader is as good as another, and as long as you have some leader for - // the value, you are good.. When it comes to *memory states*, only one - // particular thing really represents the definition of a given memory - // state. Once it goes away, we need to re-evaluate which pieces of memory - // are really still equivalent. The best way to do this is to re-value - // number things. The only way to really make that happen is to destroy the - // rest of the class. In order to effectively destroy the class, we reset - // ExpressionToClass for each by using the ValueToExpression mapping. The - // members later get marked as touched due to the leader change. We will - // create new congruence classes, and the pieces that are still equivalent - // will end back together in a new class. If this becomes too expensive, it - // is possible to use a versioning scheme for the congruence classes to - // avoid the expressions finding this old class. Note that the situation is - // different for memory phis, becuase they are evaluated anew each time, and - // they become equal not by hashing, but by seeing if all operands are the - // same (or only one is reachable). - if (OldClass->getStoreCount() > 0 && InstWasMemoryLeader) { - DEBUG(dbgs() << "Kicking everything out of class " << OldClass->getID() - << " because MemoryAccess leader changed"); - for (auto Member : *OldClass) - ExpressionToClass.erase(ValueToExpression.lookup(Member)); - } OldClass->setLeader(getNextValueLeader(OldClass)); OldClass->resetNextLeader(); markValueLeaderChangeTouched(OldClass); } } +// For a given expression, mark the phi of ops instructions that could have +// changed as a result. +void NewGVN::markPhiOfOpsChanged(const Expression *E) { + touchAndErase(ExpressionToPhiOfOps, ExactEqualsExpression(*E)); +} + // Perform congruence finding on a given value numbering expression. void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { - ValueToExpression[I] = E; // This is guaranteed to return something, since it will at least find // TOP. - CongruenceClass *IClass = ValueToClass[I]; + CongruenceClass *IClass = ValueToClass.lookup(I); assert(IClass && "Should have found a IClass"); // Dead classes should have been eliminated from the mapping. assert(!IClass->isDead() && "Found a dead class"); - CongruenceClass *EClass; + CongruenceClass *EClass = nullptr; if (const auto *VE = dyn_cast(E)) { - EClass = ValueToClass[VE->getVariableValue()]; - } else { + EClass = ValueToClass.lookup(VE->getVariableValue()); + } else if (isa(E)) { + EClass = TOPClass; + } + if (!EClass) { auto lookupResult = ExpressionToClass.insert({E, nullptr}); // If it's not in the value table, create a new congruence class. @@ -1922,7 +2229,7 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { } else if (const auto *SE = dyn_cast(E)) { StoreInst *SI = SE->getStoreInst(); NewClass->setLeader(SI); - NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand())); + NewClass->setStoredValue(SE->getStoredValue()); // The RepMemoryAccess field will be filled in properly by the // moveValueToNewCongruenceClass call. } else { @@ -1957,14 +2264,34 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) { if (ClassChanged || LeaderChanged) { DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E << "\n"); - if (ClassChanged) + if (ClassChanged) { moveValueToNewCongruenceClass(I, E, IClass, EClass); + markPhiOfOpsChanged(E); + } + markUsersTouched(I); - if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + if (MemoryAccess *MA = getMemoryAccess(I)) markMemoryUsersTouched(MA); if (auto *CI = dyn_cast(I)) markPredicateUsersTouched(CI); } + // If we changed the class of the store, we want to ensure nothing finds the + // old store expression. In particular, loads do not compare against stored + // value, so they will find old store expressions (and associated class + // mappings) if we leave them in the table. + if (ClassChanged && isa(I)) { + auto *OldE = ValueToExpression.lookup(I); + // It could just be that the old class died. We don't want to erase it if we + // just moved classes. + if (OldE && isa(OldE) && *E != *OldE) { + // Erase this as an exact expression to ensure we don't erase expressions + // equivalent to it. + auto Iter = ExpressionToClass.find_as(ExactEqualsExpression(*OldE)); + if (Iter != ExpressionToClass.end()) + ExpressionToClass.erase(Iter); + } + } + ValueToExpression[I] = E; } // Process the fact that Edge (from, to) is reachable, including marking @@ -1986,7 +2313,7 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { // impact predicates. Otherwise, only mark the phi nodes as touched, as // they are the only thing that depend on new edges. Anything using their // values will get propagated to if necessary. - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To)) + if (MemoryAccess *MemPhi = getMemoryAccess(To)) TouchedInstructions.set(InstrToDFSNum(MemPhi)); auto BI = To->begin(); @@ -1994,6 +2321,9 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { TouchedInstructions.set(InstrToDFSNum(&*BI)); ++BI; } + for_each_found(PHIOfOpsPHIs, To, [&](const PHINode *I) { + TouchedInstructions.set(InstrToDFSNum(I)); + }); } } } @@ -2083,7 +2413,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { // This also may be a memory defining terminator, in which case, set it // equivalent only to itself. // - auto *MA = MSSA->getMemoryAccess(TI); + auto *MA = getMemoryAccess(TI); if (MA && !isa(MA)) { auto *CC = ensureLeaderOfMemoryClass(MA); if (setMemoryClass(MA, CC)) @@ -2092,6 +2422,149 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { } } +void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, + Instruction *ExistingValue) { + InstrDFS[Op] = InstrToDFSNum(ExistingValue); + AllTempInstructions.insert(Op); + PHIOfOpsPHIs[BB].push_back(Op); + TempToBlock[Op] = BB; + if (ExistingValue) + RealToTemp[ExistingValue] = Op; +} + +static bool okayForPHIOfOps(const Instruction *I) { + return isa(I) || isa(I) || isa(I) || + isa(I); +} + +// When we see an instruction that is an op of phis, generate the equivalent phi +// of ops form. +const Expression * +NewGVN::makePossiblePhiOfOps(Instruction *I, + SmallPtrSetImpl &Visited) { + if (!okayForPHIOfOps(I)) + return nullptr; + + if (!Visited.insert(I).second) + return nullptr; + // For now, we require the instruction be cycle free because we don't + // *always* create a phi of ops for instructions that could be done as phi + // of ops, we only do it if we think it is useful. If we did do it all the + // time, we could remove the cycle free check. + if (!isCycleFree(I)) + return nullptr; + + unsigned IDFSNum = InstrToDFSNum(I); + SmallPtrSet ProcessedPHIs; + // TODO: We don't do phi translation on memory accesses because it's + // complicated. For a load, we'd need to be able to simulate a new memoryuse, + // which we don't have a good way of doing ATM. + auto *MemAccess = getMemoryAccess(I); + // If the memory operation is defined by a memory operation this block that + // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi + // can't help, as it would still be killed by that memory operation. + if (MemAccess && !isa(MemAccess->getDefiningAccess()) && + MemAccess->getDefiningAccess()->getBlock() == I->getParent()) + return nullptr; + + // Convert op of phis to phi of ops + for (auto &Op : I->operands()) { + // TODO: We can't handle expressions that must be recursively translated + // IE + // a = phi (b, c) + // f = use a + // g = f + phi of something + // To properly make a phi of ops for g, we'd have to properly translate and + // use the instruction for f. We should add this by splitting out the + // instruction creation we do below. + if (isa(Op) && PHINodeUses.count(cast(Op))) + return nullptr; + if (!isa(Op)) + continue; + auto *OpPHI = cast(Op); + // No point in doing this for one-operand phis. + if (OpPHI->getNumOperands() == 1) + continue; + if (!DebugCounter::shouldExecute(PHIOfOpsCounter)) + return nullptr; + SmallVector, 4> Ops; + auto *PHIBlock = getBlockForValue(OpPHI); + for (auto PredBB : OpPHI->blocks()) { + Value *FoundVal = nullptr; + // We could just skip unreachable edges entirely but it's tricky to do + // with rewriting existing phi nodes. + if (ReachableEdges.count({PredBB, PHIBlock})) { + // Clone the instruction, create an expression from it, and see if we + // have a leader. + Instruction *ValueOp = I->clone(); + if (MemAccess) + TempToMemory.insert({ValueOp, MemAccess}); + + for (auto &Op : ValueOp->operands()) { + Op = Op->DoPHITranslation(PHIBlock, PredBB); + // When this operand changes, it could change whether there is a + // leader for us or not. + addAdditionalUsers(Op, I); + } + // Make sure it's marked as a temporary instruction. + AllTempInstructions.insert(ValueOp); + // and make sure anything that tries to add it's DFS number is + // redirected to the instruction we are making a phi of ops + // for. + InstrDFS.insert({ValueOp, IDFSNum}); + const Expression *E = performSymbolicEvaluation(ValueOp, Visited); + InstrDFS.erase(ValueOp); + AllTempInstructions.erase(ValueOp); + ValueOp->deleteValue(); + if (MemAccess) + TempToMemory.erase(ValueOp); + if (!E) + return nullptr; + FoundVal = findPhiOfOpsLeader(E, PredBB); + if (!FoundVal) { + ExpressionToPhiOfOps[E].insert(I); + return nullptr; + } + if (auto *SI = dyn_cast(FoundVal)) + FoundVal = SI->getValueOperand(); + } else { + DEBUG(dbgs() << "Skipping phi of ops operand for incoming block " + << getBlockName(PredBB) + << " because the block is unreachable\n"); + FoundVal = UndefValue::get(I->getType()); + } + + Ops.push_back({FoundVal, PredBB}); + DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " + << getBlockName(PredBB) << "\n"); + } + auto *ValuePHI = RealToTemp.lookup(I); + bool NewPHI = false; + if (!ValuePHI) { + ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands()); + addPhiOfOps(ValuePHI, PHIBlock, I); + NewPHI = true; + NumGVNPHIOfOpsCreated++; + } + if (NewPHI) { + for (auto PHIOp : Ops) + ValuePHI->addIncoming(PHIOp.first, PHIOp.second); + } else { + unsigned int i = 0; + for (auto PHIOp : Ops) { + ValuePHI->setIncomingValue(i, PHIOp.first); + ValuePHI->setIncomingBlock(i, PHIOp.second); + ++i; + } + } + + DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I + << "\n"); + return performSymbolicEvaluation(ValuePHI, Visited); + } + return nullptr; +} + // The algorithm initially places the values of the routine in the TOP // congruence class. The leader of TOP is the undetermined value `undef`. // When the algorithm has finished, values still in TOP are unreachable. @@ -2112,12 +2585,13 @@ void NewGVN::initializeCongruenceClasses(Function &F) { MemoryAccessToClass[MSSA->getLiveOnEntryDef()] = createMemoryClass(MSSA->getLiveOnEntryDef()); - for (auto &B : F) { + for (auto DTN : nodes(DT)) { + BasicBlock *BB = DTN->getBlock(); // All MemoryAccesses are equivalent to live on entry to start. They must // be initialized to something so that initial changes are noticed. For // the maximal answer, we initialize them all to be the same as // liveOnEntry. - auto *MemoryBlockDefs = MSSA->getBlockDefs(&B); + auto *MemoryBlockDefs = MSSA->getBlockDefs(BB); if (MemoryBlockDefs) for (const auto &Def : *MemoryBlockDefs) { MemoryAccessToClass[&Def] = TOPClass; @@ -2132,7 +2606,13 @@ void NewGVN::initializeCongruenceClasses(Function &F) { if (MD && isa(MD->getMemoryInst())) TOPClass->incStoreCount(); } - for (auto &I : B) { + for (auto &I : *BB) { + // TODO: Move to helper + if (isa(&I)) + for (auto *U : I.users()) + if (auto *UInst = dyn_cast(U)) + if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst)) + PHINodeUses.insert(UInst); // Don't insert void terminators into the class. We don't value number // them, and they just end up sitting in TOP. if (isa(I) && I.getType()->isVoidTy()) @@ -2157,12 +2637,35 @@ void NewGVN::cleanupTables() { CongruenceClasses[i] = nullptr; } + // Destroy the value expressions + SmallVector TempInst(AllTempInstructions.begin(), + AllTempInstructions.end()); + AllTempInstructions.clear(); + + // We have to drop all references for everything first, so there are no uses + // left as we delete them. + for (auto *I : TempInst) { + I->dropAllReferences(); + } + + while (!TempInst.empty()) { + auto *I = TempInst.back(); + TempInst.pop_back(); + I->deleteValue(); + } + ValueToClass.clear(); ArgRecycler.clear(ExpressionAllocator); ExpressionAllocator.Reset(); CongruenceClasses.clear(); ExpressionToClass.clear(); ValueToExpression.clear(); + RealToTemp.clear(); + AdditionalUsers.clear(); + ExpressionToPhiOfOps.clear(); + TempToBlock.clear(); + TempToMemory.clear(); + PHIOfOpsPHIs.clear(); ReachableBlocks.clear(); ReachableEdges.clear(); #ifndef NDEBUG @@ -2178,14 +2681,17 @@ void NewGVN::cleanupTables() { MemoryToUsers.clear(); } +// Assign local DFS number mapping to instructions, and leave space for Value +// PHI's. std::pair NewGVN::assignDFSNumbers(BasicBlock *B, unsigned Start) { unsigned End = Start; - if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) { + if (MemoryAccess *MemPhi = getMemoryAccess(B)) { InstrDFS[MemPhi] = End++; DFSToInstr.emplace_back(MemPhi); } + // Then the real block goes next. for (auto &I : *B) { // There's no need to call isInstructionTriviallyDead more than once on // an instruction. Therefore, once we know that an instruction is dead @@ -2196,7 +2702,6 @@ std::pair NewGVN::assignDFSNumbers(BasicBlock *B, markInstructionForDeletion(&I); continue; } - InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } @@ -2207,7 +2712,7 @@ std::pair NewGVN::assignDFSNumbers(BasicBlock *B, return std::make_pair(Start, End); } -void NewGVN::updateProcessedCount(Value *V) { +void NewGVN::updateProcessedCount(const Value *V) { #ifndef NDEBUG if (ProcessedCount.count(V) == 0) { ProcessedCount.insert({V, 1}); @@ -2221,12 +2726,13 @@ void NewGVN::updateProcessedCount(Value *V) { // Evaluate MemoryPhi nodes symbolically, just like PHI nodes void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // If all the arguments are the same, the MemoryPhi has the same value as the - // argument. - // Filter out unreachable blocks and self phis from our operands. + // argument. Filter out unreachable blocks and self phis from our operands. + // TODO: We could do cycle-checking on the memory phis to allow valueizing for + // self-phi checking. const BasicBlock *PHIBlock = MP->getBlock(); auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) { - return lookupMemoryLeader(cast(U)) != MP && - !isMemoryAccessTop(cast(U)) && + return cast(U) != MP && + !isMemoryAccessTOP(cast(U)) && ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock}); }); // If all that is left is nothing, our memoryphi is undef. We keep it as @@ -2279,18 +2785,25 @@ void NewGVN::valueNumberInstruction(Instruction *I) { DEBUG(dbgs() << "Processing instruction " << *I << "\n"); if (!I->isTerminator()) { const Expression *Symbolized = nullptr; + SmallPtrSet Visited; if (DebugCounter::shouldExecute(VNCounter)) { - Symbolized = performSymbolicEvaluation(I); + Symbolized = performSymbolicEvaluation(I, Visited); + // Make a phi of ops if necessary + if (Symbolized && !isa(Symbolized) && + !isa(Symbolized) && PHINodeUses.count(I)) { + auto *PHIE = makePossiblePhiOfOps(I, Visited); + if (PHIE) + Symbolized = PHIE; + } + } else { // Mark the instruction as unused so we don't value number it again. InstrDFS[I] = 0; } // If we couldn't come up with a symbolic expression, use the unknown // expression - if (Symbolized == nullptr) { + if (Symbolized == nullptr) Symbolized = createUnknownExpression(I); - } - performCongruenceFinding(I, Symbolized); } else { // Handle terminators that return values. All of them produce values we @@ -2306,13 +2819,23 @@ void NewGVN::valueNumberInstruction(Instruction *I) { // Check if there is a path, using single or equal argument phi nodes, from // First to Second. -bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, - const MemoryAccess *Second) const { +bool NewGVN::singleReachablePHIPath( + SmallPtrSet &Visited, const MemoryAccess *First, + const MemoryAccess *Second) const { if (First == Second) return true; if (MSSA->isLiveOnEntryDef(First)) return false; + // This is not perfect, but as we're just verifying here, we can live with + // the loss of precision. The real solution would be that of doing strongly + // connected component finding in this routine, and it's probably not worth + // the complexity for the time being. So, we just keep a set of visited + // MemoryAccess and return true when we hit a cycle. + if (Visited.count(First)) + return true; + Visited.insert(First); + const auto *EndDef = First; for (auto *ChainDef : optimized_def_chain(First)) { if (ChainDef == Second) @@ -2335,7 +2858,8 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First, Okay = std::equal(OperandList.begin(), OperandList.end(), OperandList.begin()); if (Okay) - return singleReachablePHIPath(cast(OperandList[0]), Second); + return singleReachablePHIPath(Visited, cast(OperandList[0]), + Second); return false; } @@ -2351,12 +2875,11 @@ void NewGVN::verifyMemoryCongruency() const { continue; if (CC->getStoreCount() != 0) { assert((CC->getStoredValue() || !isa(CC->getLeader())) && - "Any class with a store as a " - "leader should have a " - "representative stored value\n"); + "Any class with a store as a leader should have a " + "representative stored value"); assert(CC->getMemoryLeader() && - "Any congruence class with a store should " - "have a representative access\n"); + "Any congruence class with a store should have a " + "representative access"); } if (CC->getMemoryLeader()) @@ -2376,30 +2899,40 @@ void NewGVN::verifyMemoryCongruency() const { auto ReachableAccessPred = [&](const std::pair Pair) { bool Result = ReachableBlocks.count(Pair.first->getBlock()); - if (!Result) + if (!Result || MSSA->isLiveOnEntryDef(Pair.first) || + MemoryToDFSNum(Pair.first) == 0) return false; - if (MSSA->isLiveOnEntryDef(Pair.first)) - return true; if (auto *MemDef = dyn_cast(Pair.first)) return !isInstructionTriviallyDead(MemDef->getMemoryInst()); - if (MemoryToDFSNum(Pair.first) == 0) + + // We could have phi nodes which operands are all trivially dead, + // so we don't process them. + if (auto *MemPHI = dyn_cast(Pair.first)) { + for (auto &U : MemPHI->incoming_values()) { + if (Instruction *I = dyn_cast(U.get())) { + if (!isInstructionTriviallyDead(I)) + return true; + } + } return false; + } + return true; }; auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred); for (auto KV : Filtered) { - assert(KV.second != TOPClass && - "Memory not unreachable but ended up in TOP"); if (auto *FirstMUD = dyn_cast(KV.first)) { auto *SecondMUD = dyn_cast(KV.second->getMemoryLeader()); - if (FirstMUD && SecondMUD) - assert((singleReachablePHIPath(FirstMUD, SecondMUD) || + if (FirstMUD && SecondMUD) { + SmallPtrSet VisitedMAS; + assert((singleReachablePHIPath(VisitedMAS, FirstMUD, SecondMUD) || ValueToClass.lookup(FirstMUD->getMemoryInst()) == ValueToClass.lookup(SecondMUD->getMemoryInst())) && "The instructions for these memory operations should have " "been in the same congruence class or reachable through" "a single argument phi"); + } } else if (auto *FirstMP = dyn_cast(KV.first)) { // We can only sanely verify that MemoryDefs in the operand list all have // the same class. @@ -2474,6 +3007,30 @@ void NewGVN::verifyIterationSettled(Function &F) { #endif } +// Verify that for each store expression in the expression to class mapping, +// only the latest appears, and multiple ones do not appear. +// Because loads do not use the stored value when doing equality with stores, +// if we don't erase the old store expressions from the table, a load can find +// a no-longer valid StoreExpression. +void NewGVN::verifyStoreExpressions() const { +#ifndef NDEBUG + DenseSet> StoreExpressionSet; + for (const auto &KV : ExpressionToClass) { + if (auto *SE = dyn_cast(KV.first)) { + // Make sure a version that will conflict with loads is not already there + auto Res = + StoreExpressionSet.insert({SE->getOperand(0), SE->getMemoryLeader()}); + assert(Res.second && + "Stored expression conflict exists in expression table"); + auto *ValueExpr = ValueToExpression.lookup(SE->getStoreInst()); + assert(ValueExpr && ValueExpr->equals(*SE) && + "StoreExpression in ExpressionToClass is not latest " + "StoreExpression for value"); + } + } +#endif +} + // This is the main value numbering loop, it iterates over the initial touched // instruction set, propagating value numbers, marking things touched, etc, // until the set of touched instructions is completely empty. @@ -2484,15 +3041,14 @@ void NewGVN::iterateTouchedInstructions() { // Nothing set, nothing to iterate, just return. if (FirstInstr == -1) return; - BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); + const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr)); while (TouchedInstructions.any()) { ++Iterations; // Walk through all the instructions in all the blocks in RPO. // TODO: As we hit a new block, we should push and pop equalities into a // table lookupOperandLeader can use, to catch things PredicateInfo // might miss, like edge-only equivalences. - for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; - InstrNum = TouchedInstructions.find_next(InstrNum)) { + for (unsigned InstrNum : TouchedInstructions.set_bits()) { // This instruction was found to be dead. We don't bother looking // at it again. @@ -2502,7 +3058,7 @@ void NewGVN::iterateTouchedInstructions() { } Value *V = InstrFromDFSNum(InstrNum); - BasicBlock *CurrBlock = getBlockForValue(V); + const BasicBlock *CurrBlock = getBlockForValue(V); // If we hit a new block, do reachability processing. if (CurrBlock != LastBlock) { @@ -2520,6 +3076,9 @@ void NewGVN::iterateTouchedInstructions() { } updateProcessedCount(CurrBlock); } + // Reset after processing (because we may mark ourselves as touched when + // we propagate equalities). + TouchedInstructions.reset(InstrNum); if (auto *MP = dyn_cast(V)) { DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n"); @@ -2530,9 +3089,6 @@ void NewGVN::iterateTouchedInstructions() { llvm_unreachable("Should have been a MemoryPhi or Instruction"); } updateProcessedCount(V); - // Reset after processing (because we may mark ourselves as touched when - // we propagate equalities). - TouchedInstructions.reset(InstrNum); } } NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations); @@ -2545,6 +3101,7 @@ bool NewGVN::runGVN() { bool Changed = false; NumFuncArgs = F.arg_size(); MSSAWalker = MSSA->getWalker(); + SingletonDeadExpression = new (ExpressionAllocator) DeadExpression(); // Count number of instructions for sizing of hash tables, and come // up with a global dfs numbering for instructions. @@ -2559,7 +3116,6 @@ bool NewGVN::runGVN() { // The dominator tree does guarantee that, for a given dom tree node, it's // parent must occur before it in the RPO ordering. Thus, we only need to sort // the siblings. - DenseMap RPOOrdering; ReversePostOrderTraversal RPOT(&F); unsigned Counter = 0; for (auto &B : RPOT) { @@ -2572,30 +3128,19 @@ bool NewGVN::runGVN() { auto *Node = DT->getNode(B); if (Node->getChildren().size() > 1) std::sort(Node->begin(), Node->end(), - [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) { + [&](const DomTreeNode *A, const DomTreeNode *B) { return RPOOrdering[A] < RPOOrdering[B]; }); } // Now a standard depth first ordering of the domtree is equivalent to RPO. - auto DFI = df_begin(DT->getRootNode()); - for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) { - BasicBlock *B = DFI->getBlock(); + for (auto DTN : depth_first(DT->getRootNode())) { + BasicBlock *B = DTN->getBlock(); const auto &BlockRange = assignDFSNumbers(B, ICount); BlockInstRange.insert({B, BlockRange}); ICount += BlockRange.second - BlockRange.first; } - - // Handle forward unreachable blocks and figure out which blocks - // have single preds. - for (auto &B : F) { - // Assign numbers to unreachable blocks. - if (!DFI.nodeVisited(DT->getNode(&B))) { - const auto &BlockRange = assignDFSNumbers(&B, ICount); - BlockInstRange.insert({&B, BlockRange}); - ICount += BlockRange.second - BlockRange.first; - } - } + initializeCongruenceClasses(F); TouchedInstructions.resize(ICount); // Ensure we don't end up resizing the expressionToClass map, as @@ -2606,12 +3151,14 @@ bool NewGVN::runGVN() { // Initialize the touched instructions to include the entry block. const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); TouchedInstructions.set(InstRange.first, InstRange.second); + DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock()) + << " marked reachable\n"); ReachableBlocks.insert(&F.getEntryBlock()); - initializeCongruenceClasses(F); iterateTouchedInstructions(); verifyMemoryCongruency(); verifyIterationSettled(F); + verifyStoreExpressions(); Changed |= eliminateInstructions(F); @@ -2620,7 +3167,8 @@ bool NewGVN::runGVN() { if (!ToErase->use_empty()) ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); - ToErase->eraseFromParent(); + if (ToErase->getParent()) + ToErase->eraseFromParent(); } // Delete all unreachable blocks. @@ -2639,14 +3187,6 @@ bool NewGVN::runGVN() { return Changed; } -// Return true if V is a value that will always be available (IE can -// be placed anywhere) in the function. We don't do globals here -// because they are often worse to put in place. -// TODO: Separate cost from availability -static bool alwaysAvailable(Value *V) { - return isa(V) || isa(V); -} - struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; @@ -2736,9 +3276,21 @@ void NewGVN::convertClassToDFSOrdered( } assert(isa(D) && "The dense set member should always be an instruction"); - VDDef.LocalNum = InstrToDFSNum(D); - DFSOrderedSet.emplace_back(VDDef); Instruction *Def = cast(D); + VDDef.LocalNum = InstrToDFSNum(D); + DFSOrderedSet.push_back(VDDef); + // If there is a phi node equivalent, add it + if (auto *PN = RealToTemp.lookup(Def)) { + auto *PHIE = + dyn_cast_or_null(ValueToExpression.lookup(Def)); + if (PHIE) { + VDDef.Def.setInt(false); + VDDef.Def.setPointer(PN); + VDDef.LocalNum = 0; + DFSOrderedSet.push_back(VDDef); + } + } + unsigned int UseCount = 0; // Now add the uses. for (auto &U : Def->uses()) { @@ -2755,7 +3307,7 @@ void NewGVN::convertClassToDFSOrdered( // they are from. VDUse.LocalNum = InstrDFS.size() + 1; } else { - IBlock = I->getParent(); + IBlock = getBlockForValue(I); VDUse.LocalNum = InstrToDFSNum(I); } @@ -2925,6 +3477,37 @@ private: }; } +// Given a value and a basic block we are trying to see if it is available in, +// see if the value has a leader available in that block. +Value *NewGVN::findPhiOfOpsLeader(const Expression *E, + const BasicBlock *BB) const { + // It would already be constant if we could make it constant + if (auto *CE = dyn_cast(E)) + return CE->getConstantValue(); + if (auto *VE = dyn_cast(E)) + return VE->getVariableValue(); + + auto *CC = ExpressionToClass.lookup(E); + if (!CC) + return nullptr; + if (alwaysAvailable(CC->getLeader())) + return CC->getLeader(); + + for (auto Member : *CC) { + auto *MemberInst = dyn_cast(Member); + // Anything that isn't an instruction is always available. + if (!MemberInst) + return Member; + // If we are looking for something in the same block as the member, it must + // be a leader because this function is looking for operands for a phi node. + if (MemberInst->getParent() == BB || + DT->dominates(MemberInst->getParent(), BB)) { + return Member; + } + } + return nullptr; +} + bool NewGVN::eliminateInstructions(Function &F) { // This is a non-standard eliminator. The normal way to eliminate is // to walk the dominator tree in order, keeping track of available @@ -2955,25 +3538,43 @@ bool NewGVN::eliminateInstructions(Function &F) { // DFS numbers are updated, we compute some ourselves. DT->updateDFSNumbers(); - for (auto &B : F) { - if (!ReachableBlocks.count(&B)) { - for (const auto S : successors(&B)) { - for (auto II = S->begin(); isa(II); ++II) { - auto &Phi = cast(*II); - DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block " - << getBlockName(&B) - << " with undef due to it being unreachable\n"); - for (auto &Operand : Phi.incoming_values()) - if (Phi.getIncomingBlock(Operand) == &B) - Operand.set(UndefValue::get(Phi.getType())); - } + // Go through all of our phi nodes, and kill the arguments associated with + // unreachable edges. + auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) { + for (auto &Operand : PHI.incoming_values()) + if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) { + DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " + << getBlockName(PHI.getIncomingBlock(Operand)) + << " with undef due to it being unreachable\n"); + Operand.set(UndefValue::get(PHI.getType())); + } + }; + SmallPtrSet BlocksWithPhis; + for (auto &B : F) + if ((!B.empty() && isa(*B.begin())) || + (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end())) + BlocksWithPhis.insert(&B); + DenseMap ReachablePredCount; + for (auto KV : ReachableEdges) + ReachablePredCount[KV.getEnd()]++; + for (auto *BB : BlocksWithPhis) + // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in + // the block and subtract the pred count, but it's more complicated. + if (ReachablePredCount.lookup(BB) != + unsigned(std::distance(pred_begin(BB), pred_end(BB)))) { + for (auto II = BB->begin(); isa(II); ++II) { + auto &PHI = cast(*II); + ReplaceUnreachablePHIArgs(PHI, BB); } + for_each_found(PHIOfOpsPHIs, BB, [&](PHINode *PHI) { + ReplaceUnreachablePHIArgs(*PHI, BB); + }); } - } // Map to store the use counts DenseMap UseCounts; - for (CongruenceClass *CC : reverse(CongruenceClasses)) { + for (auto *CC : reverse(CongruenceClasses)) { + DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n"); // Track the equivalent store info so we can decide whether to try // dead store elimination. SmallVector PossibleDeadStores; @@ -2982,13 +3583,15 @@ bool NewGVN::eliminateInstructions(Function &F) { continue; // Everything still in the TOP class is unreachable or dead. if (CC == TOPClass) { -#ifndef NDEBUG - for (auto M : *CC) + for (auto M : *CC) { + auto *VTE = ValueToExpression.lookup(M); + if (VTE && isa(VTE)) + markInstructionForDeletion(cast(M)); assert((!ReachableBlocks.count(cast(M)->getParent()) || InstructionsToErase.count(cast(M))) && "Everything in TOP should be unreachable or dead at this " "point"); -#endif + } continue; } @@ -3018,10 +3621,8 @@ bool NewGVN::eliminateInstructions(Function &F) { } CC->swap(MembersLeft); } else { - DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() - << "\n"); // If this is a singleton, we can skip it. - if (CC->size() != 1) { + if (CC->size() != 1 || RealToTemp.lookup(Leader)) { // This is a stack because equality replacement/etc may place // constants in the middle of the member list, and we want to use // those constant values in preference to the current leader, over @@ -3043,6 +3644,22 @@ bool NewGVN::eliminateInstructions(Function &F) { // We ignore void things because we can't get a value from them. if (Def && Def->getType()->isVoidTy()) continue; + auto *DefInst = dyn_cast_or_null(Def); + if (DefInst && AllTempInstructions.count(DefInst)) { + auto *PN = cast(DefInst); + + // If this is a value phi and that's the expression we used, insert + // it into the program + // remove from temp instruction list. + AllTempInstructions.erase(PN); + auto *DefBlock = getBlockForValue(Def); + DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def + << " into block " + << getBlockName(getBlockForValue(Def)) << "\n"); + PN->insertBefore(&DefBlock->front()); + Def = PN; + NumGVNPHIOfOpsEliminations++; + } if (EliminationStack.empty()) { DEBUG(dbgs() << "Elimination Stack is empty\n"); @@ -3127,6 +3744,10 @@ bool NewGVN::eliminateInstructions(Function &F) { Value *DominatingLeader = EliminationStack.back(); + auto *II = dyn_cast(DominatingLeader); + if (II && II->getIntrinsicID() == Intrinsic::ssa_copy) + DominatingLeader = II->getOperand(0); + // Don't replace our existing users with ourselves. if (U->get() == DominatingLeader) continue; @@ -3147,6 +3768,8 @@ bool NewGVN::eliminateInstructions(Function &F) { // It's about to be alive again. if (LeaderUseCount == 0 && isa(DominatingLeader)) ProbablyDead.erase(cast(DominatingLeader)); + if (LeaderUseCount == 0 && II) + ProbablyDead.insert(II); ++LeaderUseCount; AnythingReplaced = true; } @@ -3201,7 +3824,6 @@ bool NewGVN::eliminateInstructions(Function &F) { } } } - return AnythingReplaced; } @@ -3211,19 +3833,23 @@ bool NewGVN::eliminateInstructions(Function &F) { // we will simplify an operation with all constants so that it doesn't matter // what order they appear in. unsigned int NewGVN::getRank(const Value *V) const { - // Prefer undef to anything else + // Prefer constants to undef to anything else + // Undef is a constant, have to check it first. + // Prefer smaller constants to constantexprs + if (isa(V)) + return 2; if (isa(V)) - return 0; - if (isa(V)) return 1; + if (isa(V)) + return 0; else if (auto *A = dyn_cast(V)) - return 2 + A->getArgNo(); + return 3 + A->getArgNo(); // Need to shift the instruction DFS by number of arguments + 3 to account for // the constant and argument ranking above. unsigned Result = InstrToDFSNum(V); if (Result > 0) - return 3 + NumFuncArgs + Result; + return 4 + NumFuncArgs + Result; // Unreachable or something else, just return a really large number. return ~0; } @@ -3237,6 +3863,7 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const { return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B); } +namespace { class NewGVNLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. @@ -3256,6 +3883,7 @@ private: AU.addPreserved(); } }; +} // namespace bool NewGVNLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 3dcab609078960115a4bcac956f02a1a1d751e03..a20890b22603ea5e812f54c66a36dec188232a1e 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl &Ops, unsigned i, /// Emit a tree of add instructions, summing Ops together /// and returning the result. Insert the tree before I. static Value *EmitAddTreeOfValues(Instruction *I, - SmallVectorImpl &Ops){ + SmallVectorImpl &Ops) { if (Ops.size() == 1) return Ops.back(); Value *V1 = Ops.back(); @@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); - SmallVector NewMulOps; + SmallVector NewMulOps; for (unsigned i = 0; i != Ops.size(); ++i) { // Only try to remove factors from expressions we're allowed to. BinaryOperator *BOp = @@ -1582,7 +1582,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I, } // No need for extra uses anymore. - delete DummyInst; + DummyInst->deleteValue(); unsigned NumAddedValues = NewMulOps.size(); Value *V = EmitAddTreeOfValues(I, NewMulOps); @@ -1922,7 +1922,7 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) { // User must be a binary operator with one or more uses. Instruction *User = I->user_back(); - if (!isa(User) || !User->hasNUsesOrMore(1)) + if (!isa(User) || User->use_empty()) return nullptr; unsigned UserOpcode = User->getOpcode(); diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 615029dd161bb552d2a0bfed464093e82b20a204..96295683314cfaf14f65f9dfcf262122151d5b8c 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -25,6 +24,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f344eb151464a6c82c5deab93256c8fa4c03afd3..a52739bb76f71ecf3e237870c139af8c2047e937 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -7,20 +7,19 @@ // //===----------------------------------------------------------------------===// // -// Rewrite an existing set of gc.statepoints such that they make potential -// relocations performed by the garbage collector explicit in the IR. +// Rewrite call/invoke instructions so as to make potential relocations +// performed by the garbage collector explicit in the IR. // //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/MapVector.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Dominators.h" @@ -28,15 +27,16 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -89,10 +89,10 @@ struct RewriteStatepointsForGC : public ModulePass { Changed |= runOnFunction(F); if (Changed) { - // stripNonValidAttributes asserts that shouldRewriteStatepointsIn + // stripNonValidAttributesAndMetadata asserts that shouldRewriteStatepointsIn // returns true for at least one function in the module. Since at least // one function changed, we know that the precondition is satisfied. - stripNonValidAttributes(M); + stripNonValidAttributesAndMetadata(M); } return Changed; @@ -105,20 +105,24 @@ struct RewriteStatepointsForGC : public ModulePass { AU.addRequired(); } - /// The IR fed into RewriteStatepointsForGC may have had attributes implying - /// dereferenceability that are no longer valid/correct after - /// RewriteStatepointsForGC has run. This is because semantically, after + /// The IR fed into RewriteStatepointsForGC may have had attributes and + /// metadata implying dereferenceability that are no longer valid/correct after + /// RewriteStatepointsForGC has run. This is because semantically, after /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire - /// heap. stripNonValidAttributes (conservatively) restores correctness - /// by erasing all attributes in the module that externally imply - /// dereferenceability. - /// Similar reasoning also applies to the noalias attributes. gc.statepoint - /// can touch the entire heap including noalias objects. - void stripNonValidAttributes(Module &M); - - // Helpers for stripNonValidAttributes - void stripNonValidAttributesFromBody(Function &F); + /// heap. stripNonValidAttributesAndMetadata (conservatively) restores + /// correctness by erasing all attributes in the module that externally imply + /// dereferenceability. Similar reasoning also applies to the noalias + /// attributes and metadata. gc.statepoint can touch the entire heap including + /// noalias objects. + void stripNonValidAttributesAndMetadata(Module &M); + + // Helpers for stripNonValidAttributesAndMetadata + void stripNonValidAttributesAndMetadataFromBody(Function &F); void stripNonValidAttributesFromPrototype(Function &F); + // Certain metadata on instructions are invalid after running RS4GC. + // Optimizations that run after RS4GC can incorrectly use this metadata to + // optimize functions. We drop such metadata on the instruction. + void stripInvalidMetadataFromInstruction(Instruction &I); }; } // namespace @@ -1128,39 +1132,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. -static AttributeList legalizeCallAttributes(AttributeList AS) { - AttributeList Ret; - - for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { - unsigned Index = AS.getSlotIndex(Slot); - - if (Index == AttributeList::ReturnIndex || - Index == AttributeList::FunctionIndex) { - - for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) { - - // Do not allow certain attributes - just skip them - // Safepoint can not be read only or read none. - if (Attr.hasAttribute(Attribute::ReadNone) || - Attr.hasAttribute(Attribute::ReadOnly)) - continue; - - // These attributes control the generation of the gc.statepoint call / - // invoke itself; and once the gc.statepoint is in place, they're of no - // use. - if (isStatepointDirectiveAttr(Attr)) - continue; - - Ret = Ret.addAttributes( - AS.getContext(), Index, - AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr))); - } - } - - // Just skip parameter attributes for now - } - - return Ret; +static AttributeList legalizeCallAttributes(AttributeList AL) { + if (AL.isEmpty()) + return AL; + + // Remove the readonly, readnone, and statepoint function attributes. + AttrBuilder FnAttrs = AL.getFnAttributes(); + FnAttrs.removeAttribute(Attribute::ReadNone); + FnAttrs.removeAttribute(Attribute::ReadOnly); + for (Attribute A : AL.getFnAttributes()) { + if (isStatepointDirectiveAttr(A)) + FnAttrs.remove(A); + } + + // Just skip parameter and return attributes for now + LLVMContext &Ctx = AL.getContext(); + return AttributeList::get(Ctx, AttributeList::FunctionIndex, + AttributeSet::get(Ctx, FnAttrs)); } /// Helper function to place all gc relocates necessary for the given @@ -1402,13 +1390,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Call->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Call->setAttributes(AttributeList::get(Call->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Call; @@ -1431,13 +1416,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */ Invoke->setCallingConv(ToReplace->getCallingConv()); // Currently we will fail on parameter attributes and on certain - // function attributes. - AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); - // In case if we can handle this set of attributes - set up function attrs - // directly on statepoint and return attrs later for gc_result intrinsic. - Invoke->setAttributes(AttributeList::get(Invoke->getContext(), - AttributeList::FunctionIndex, - NewAttrs.getFnAttributes())); + // function attributes. In case if we can handle this set of attributes - + // set up function attrs directly on statepoint and return attrs later for + // gc_result intrinsic. + Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes())); Token = Invoke; @@ -2116,9 +2098,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, // live in the IR. We'll remove all of these when done. SmallVector Holders; - // Insert a dummy call with all of the arguments to the vm_state we'll need - // for the actual safepoint insertion. This ensures reference arguments in - // the deopt argument list are considered live through the safepoint (and + // Insert a dummy call with all of the deopt operands we'll need for the + // actual safepoint insertion as arguments. This ensures reference operands + // in the deopt argument list are considered live through the safepoint (and // thus makes sure they get relocated.) for (CallSite CS : ToUpdate) { SmallVector DeoptValues; @@ -2308,12 +2290,11 @@ static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, if (AH.getDereferenceableOrNullBytes(Index)) R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, AH.getDereferenceableOrNullBytes(Index))); - if (AH.doesNotAlias(Index)) + if (AH.getAttributes().hasAttribute(Index, Attribute::NoAlias)) R.addAttribute(Attribute::NoAlias); if (!R.empty()) - AH.setAttributes(AH.getAttributes().removeAttributes( - Ctx, Index, AttributeList::get(Ctx, Index, R))); + AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R)); } void @@ -2322,19 +2303,51 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { for (Argument &A : F.args()) if (isa(A.getType())) - RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1); + RemoveNonValidAttrAtIndex(Ctx, F, + A.getArgNo() + AttributeList::FirstArgIndex); if (isa(F.getReturnType())) RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); } -void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { +void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) { + + if (!isa(I) && !isa(I)) + return; + // These are the attributes that are still valid on loads and stores after + // RS4GC. + // The metadata implying dereferenceability and noalias are (conservatively) + // dropped. This is because semantically, after RewriteStatepointsForGC runs, + // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can + // touch the entire heap including noalias objects. Note: The reasoning is + // same as stripping the dereferenceability and noalias attributes that are + // analogous to the metadata counterparts. + // We also drop the invariant.load metadata on the load because that metadata + // implies the address operand to the load points to memory that is never + // changed once it became dereferenceable. This is no longer true after RS4GC. + // Similar reasoning applies to invariant.group metadata, which applies to + // loads within a group. + unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa, + LLVMContext::MD_range, + LLVMContext::MD_alias_scope, + LLVMContext::MD_nontemporal, + LLVMContext::MD_nonnull, + LLVMContext::MD_align, + LLVMContext::MD_type}; + + // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC. + I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); + +} + +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) { if (F.empty()) return; LLVMContext &Ctx = F.getContext(); MDBuilder Builder(Ctx); + for (Instruction &I : instructions(F)) { if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); @@ -2355,10 +2368,12 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) { I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); } + stripInvalidMetadataFromInstruction(I); + if (CallSite CS = CallSite(&I)) { for (int i = 0, e = CS.arg_size(); i != e; i++) if (isa(CS.getArgument(i)->getType())) - RemoveNonValidAttrAtIndex(Ctx, CS, i + 1); + RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex); if (isa(CS.getType())) RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex); } @@ -2379,7 +2394,7 @@ static bool shouldRewriteStatepointsIn(Function &F) { return false; } -void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { +void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) { #ifndef NDEBUG assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!"); #endif @@ -2388,7 +2403,7 @@ void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) { stripNonValidAttributesFromPrototype(F); for (Function &F : M) - stripNonValidAttributesFromBody(F); + stripNonValidAttributesAndMetadataFromBody(F); } bool RewriteStatepointsForGC::runOnFunction(Function &F) { diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 8908dae2f5459a42c843cbd326ee181a09484f8d..7a6fa1711411d62ac8ffc383bf4eedf368f85a11 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -515,10 +515,6 @@ private: void visitCmpInst(CmpInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); - void visitLandingPadInst(LandingPadInst &I) { markOverdefined(&I); } - void visitFuncletPadInst(FuncletPadInst &FPI) { - markOverdefined(&FPI); - } void visitCatchSwitchInst(CatchSwitchInst &CPI) { markOverdefined(&CPI); visitTerminatorInst(CPI); @@ -539,17 +535,11 @@ private: void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitFenceInst (FenceInst &I) { /*returns void*/ } - void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { - markOverdefined(&I); - } - void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } - void visitAllocaInst (Instruction &I) { markOverdefined(&I); } - void visitVAArgInst (Instruction &I) { markOverdefined(&I); } - void visitInstruction(Instruction &I) { - // If a new instruction is added to LLVM that we don't handle. + // All the instructions we don't do any special handling for just + // go to overdefined. DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n'); - markOverdefined(&I); // Just in case + markOverdefined(&I); } }; @@ -1117,7 +1107,7 @@ CallOverdefined: // Otherwise, if we have a single return value case, and if the function is // a declaration, maybe we can constant fold it. if (F && F->isDeclaration() && !I->getType()->isStructTy() && - canConstantFoldCallTo(F)) { + canConstantFoldCallTo(CS, F)) { SmallVector Operands; for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end(); @@ -1137,7 +1127,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands, TLI)) { + if (Constant *C = ConstantFoldCall(CS, F, Operands, TLI)) { // call -> undef. if (isa(C)) return; @@ -1779,8 +1769,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. if (F.hasLocalLinkage()) { - if (AddressIsTaken(&F)) + if (F.hasAddressTaken()) { AddressTakenFunctions.insert(&F); + } else { Solver.AddArgumentTrackedFunction(&F); continue; @@ -1824,15 +1815,11 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, if (F.isDeclaration()) continue; - if (Solver.isBlockExecutable(&F.front())) { + if (Solver.isBlockExecutable(&F.front())) for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E; - ++AI) { - if (AI->use_empty()) - continue; - if (tryToReplaceWithConstant(Solver, &*AI)) + ++AI) + if (!AI->use_empty() && tryToReplaceWithConstant(Solver, &*AI)) ++IPNumArgsElimed; - } - } for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!Solver.isBlockExecutable(&*BB)) { diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d01e91a7f2356f4261193881a19b02605579372b..1527f15f18a3317f992ba322dcc8f04a88455fbd 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -25,6 +25,7 @@ #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -325,7 +326,7 @@ private: /// partition. uint64_t BeginOffset, EndOffset; - /// \brief The start end end iterators of this partition. + /// \brief The start and end iterators of this partition. iterator SI, SJ; /// \brief A collection of split slice tails overlapping the partition. @@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter Instruction *OldPtr; // Track post-rewrite users which are PHI nodes and Selects. - SmallPtrSetImpl &PHIUsers; - SmallPtrSetImpl &SelectUsers; + SmallSetVector &PHIUsers; + SmallSetVector &SelectUsers; // Utility IR builder, whose name prefix is setup for each visited use, and // the insertion point is set to point to the user. @@ -2199,8 +2200,8 @@ public: uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, - SmallPtrSetImpl &PHIUsers, - SmallPtrSetImpl &SelectUsers) + SmallSetVector &PHIUsers, + SmallSetVector &SelectUsers) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -2442,7 +2443,7 @@ private: "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); - delete Placeholder; + Placeholder->deleteValue(); } else { LI.replaceAllUsesWith(V); } @@ -3625,10 +3626,12 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { auto *PartPtrTy = PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); @@ -3697,7 +3700,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { int Idx = 0, Size = Offsets.Splits.size(); for (;;) { auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8); - auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); + auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace()); + auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. LoadInst *PLoad; @@ -3705,20 +3709,23 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PLoad = (*SplitLoads)[Idx]; } else { IRB.SetInsertPoint(LI); + auto AS = LI->getPointerAddressSpace(); PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, LoadBasePtr->getName() + "."), + APInt(DL.getPointerSizeInBits(AS), PartOffset), + LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); } // And store this partition. IRB.SetInsertPoint(SI); + auto AS = SI->getPointerAddressSpace(); StoreInst *PStore = IRB.CreateAlignedStore( - PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(), PartOffset), - PartPtrTy, StoreBasePtr->getName() + "."), + PLoad, + getAdjustedPtr(IRB, DL, StoreBasePtr, + APInt(DL.getPointerSizeInBits(AS), PartOffset), + StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); // Now build a new slice for the alloca. @@ -3880,8 +3887,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // fact scheduled for promotion. unsigned PPWOldSize = PostPromotionWorklist.size(); unsigned NumUses = 0; - SmallPtrSet PHIUsers; - SmallPtrSet SelectUsers; + SmallSetVector PHIUsers; + SmallSetVector SelectUsers; AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, @@ -3897,24 +3904,20 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, } NumAllocaPartitionUses += NumUses; - MaxUsesPerAllocaPartition = - std::max(NumUses, MaxUsesPerAllocaPartition); + MaxUsesPerAllocaPartition.updateMax(NumUses); // Now that we've processed all the slices in the new partition, check if any // PHIs or Selects would block promotion. - for (SmallPtrSetImpl::iterator I = PHIUsers.begin(), - E = PHIUsers.end(); - I != E; ++I) - if (!isSafePHIToSpeculate(**I)) { + for (PHINode *PHI : PHIUsers) + if (!isSafePHIToSpeculate(*PHI)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); break; } - for (SmallPtrSetImpl::iterator I = SelectUsers.begin(), - E = SelectUsers.end(); - I != E; ++I) - if (!isSafeSelectToSpeculate(**I)) { + + for (SelectInst *Sel : SelectUsers) + if (!isSafeSelectToSpeculate(*Sel)) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); @@ -4018,8 +4021,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } NumAllocaPartitions += NumPartitions; - MaxPartitionsPerAlloca = - std::max(NumPartitions, MaxPartitionsPerAlloca); + MaxPartitionsPerAlloca.updateMax(NumPartitions); // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 00e3c95f6f06dc7171efa3593b12a0d8e9ae65fe..850a01114eeba817d98c655a68bd94bc3cbde9a6 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -20,11 +20,12 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" -#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" using namespace llvm; @@ -47,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); + initializeGVNSinkLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); @@ -83,6 +85,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeCFGSimplifyPassPass(Registry); initializeLateCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); + initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index c0c09a7e43fe93abb054d4c70d4dcac1d8ff955f..d11855f2f3a93e709806e7aa893b59571bc7c9ee 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,12 +14,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4d594532c3651a99f9f2ebb08147e4eacf135daa..84675f41cdd5eb0f8e25f5bf860e31c60dc1d7ab 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -156,27 +156,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) { // Add I to DominatingExprs if it's an add/sub that can't sign overflow. if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) || match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { - if (isKnownNotFullPoison(I)) { + if (programUndefinedIfFullPoison(I)) { const SCEV *Key = SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS)); DominatingExprs[Key].push_back(I); diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aaab5857e0f102c9cad5adbf56f96e223cb68408 --- /dev/null +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -0,0 +1,808 @@ +//===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include +#include + +#define DEBUG_TYPE "simple-loop-unswitch" + +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumTrivial, "Number of unswitches that are trivial"); + +static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, + Constant &Replacement) { + assert(!isa(LIC) && "Why are we unswitching on a constant?"); + + // Replace uses of LIC in the loop with the given constant. + for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { + // Grab the use and walk past it so we can clobber it in the use list. + Use *U = &*UI++; + Instruction *UserI = dyn_cast(U->getUser()); + if (!UserI || !L.contains(UserI)) + continue; + + // Replace this use within the loop body. + *U = &Replacement; + } +} + +/// Update the dominator tree after removing one exiting predecessor of a loop +/// exit block. +static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, + DominatorTree &DT) { + assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && + "Cannot have empty predecessors of the loop exit block if we split " + "off a block to unswitch!"); + + BasicBlock *IDom = *pred_begin(LoopExitBB); + // Walk all of the other predecessors finding the nearest common dominator + // until all predecessors are covered or we reach the loop header. The loop + // header necessarily dominates all loop exit blocks in loop simplified form + // so we can early-exit the moment we hit that block. + for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); + PI != PE && IDom != L.getHeader(); ++PI) + IDom = DT.findNearestCommonDominator(IDom, *PI); + + DT.changeImmediateDominator(LoopExitBB, IDom); +} + +/// Update the dominator tree after unswitching a particular former exit block. +/// +/// This handles the full update of the dominator tree after hoisting a block +/// that previously was an exit block (or split off of an exit block) up to be +/// reached from the new immediate dominator of the preheader. +/// +/// The common case is simple -- we just move the unswitched block to have an +/// immediate dominator of the old preheader. But in complex cases, there may +/// be other blocks reachable from the unswitched block that are immediately +/// dominated by some node between the unswitched one and the old preheader. +/// All of these also need to be hoisted in the dominator tree. We also want to +/// minimize queries to the dominator tree because each step of this +/// invalidates any DFS numbers that would make queries fast. +static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, + DominatorTree &DT) { + DomTreeNode *OldPHNode = DT[OldPH]; + DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; + // If the dominator tree has already been updated for this unswitched node, + // we're done. This makes it easier to use this routine if there are multiple + // paths to the same unswitched destination. + if (UnswitchedNode->getIDom() == OldPHNode) + return; + + // First collect the domtree nodes that we are hoisting over. These are the + // set of nodes which may have children that need to be hoisted as well. + SmallPtrSet DomChain; + for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; + IDom = IDom->getIDom()) + DomChain.insert(IDom); + + // The unswitched block ends up immediately dominated by the old preheader -- + // regardless of whether it is the loop exit block or split off of the loop + // exit block. + DT.changeImmediateDominator(UnswitchedNode, OldPHNode); + + // For everything that moves up the dominator tree, we need to examine the + // dominator frontier to see if it additionally should move up the dominator + // tree. This lambda appends the dominator frontier for a node on the + // worklist. + // + // Note that we don't currently use the IDFCalculator here for two reasons: + // 1) It computes dominator tree levels for the entire function on each run + // of 'compute'. While this isn't terrible, given that we expect to update + // relatively small subtrees of the domtree, it isn't necessarily the right + // tradeoff. + // 2) The interface doesn't fit this usage well. It doesn't operate in + // append-only, and builds several sets that we don't need. + // + // FIXME: Neither of these issues are a big deal and could be addressed with + // some amount of refactoring of IDFCalculator. That would allow us to share + // the core logic here (which is solving the same core problem). + SmallSetVector Worklist; + SmallVector DomNodes; + SmallPtrSet DomSet; + auto AppendDomFrontier = [&](DomTreeNode *Node) { + assert(DomNodes.empty() && "Must start with no dominator nodes."); + assert(DomSet.empty() && "Must start with an empty dominator set."); + + // First flatten this subtree into sequence of nodes by doing a pre-order + // walk. + DomNodes.push_back(Node); + // We intentionally re-evaluate the size as each node can add new children. + // Because this is a tree walk, this cannot add any duplicates. + for (int i = 0; i < (int)DomNodes.size(); ++i) + DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end()); + + // Now create a set of the basic blocks so we can quickly test for + // dominated successors. We could in theory use the DFS numbers of the + // dominator tree for this, but we want this to remain predictably fast + // even while we mutate the dominator tree in ways that would invalidate + // the DFS numbering. + for (DomTreeNode *InnerN : DomNodes) + DomSet.insert(InnerN->getBlock()); + + // Now re-walk the nodes, appending every successor of every node that isn't + // in the set. Note that we don't append the node itself, even though if it + // is a successor it does not strictly dominate itself and thus it would be + // part of the dominance frontier. The reason we don't append it is that + // the node passed in came *from* the worklist and so it has already been + // processed. + for (DomTreeNode *InnerN : DomNodes) + for (BasicBlock *SuccBB : successors(InnerN->getBlock())) + if (!DomSet.count(SuccBB)) + Worklist.insert(SuccBB); + + DomNodes.clear(); + DomSet.clear(); + }; + + // Append the initial dom frontier nodes. + AppendDomFrontier(UnswitchedNode); + + // Walk the worklist. We grow the list in the loop and so must recompute size. + for (int i = 0; i < (int)Worklist.size(); ++i) { + auto *BB = Worklist[i]; + + DomTreeNode *Node = DT[BB]; + assert(!DomChain.count(Node) && + "Cannot be dominated by a block you can reach!"); + + // If this block had an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree needs to move as it is + // reachable from a node hoisted over this chain. + if (!DomChain.count(Node->getIDom())) + continue; + + DT.changeImmediateDominator(Node, OldPHNode); + + // Now add this node's dominator frontier to the worklist as well. + AppendDomFrontier(Node); + } +} + +/// Check that all the LCSSA PHI nodes in the loop exit block have trivial +/// incoming values along this edge. +static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB, + BasicBlock &ExitBB) { + for (Instruction &I : ExitBB) { + auto *PN = dyn_cast(&I); + if (!PN) + // No more PHIs to check. + return true; + + // If the incoming value for this edge isn't loop invariant the unswitch + // won't be trivial. + if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB))) + return false; + } + llvm_unreachable("Basic blocks should never be empty!"); +} + +/// Rewrite the PHI nodes in an unswitched loop exit basic block. +/// +/// Requires that the loop exit and unswitched basic block are the same, and +/// that the exiting block was a unique predecessor of that block. Rewrites the +/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial +/// PHI nodes from the old preheader that now contains the unswitched +/// terminator. +static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB, + BasicBlock &OldExitingBB, + BasicBlock &OldPH) { + for (Instruction &I : UnswitchedBB) { + auto *PN = dyn_cast(&I); + if (!PN) + // No more PHIs to check. + break; + + // When the loop exit is directly unswitched we just need to update the + // incoming basic block. We loop to handle weird cases with repeated + // incoming blocks, but expect to typically only have one operand here. + for (auto i : seq(0, PN->getNumOperands())) { + assert(PN->getIncomingBlock(i) == &OldExitingBB && + "Found incoming block different from unique predecessor!"); + PN->setIncomingBlock(i, &OldPH); + } + } +} + +/// Rewrite the PHI nodes in the loop exit basic block and the split off +/// unswitched block. +/// +/// Because the exit block remains an exit from the loop, this rewrites the +/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI +/// nodes into the unswitched basic block to select between the value in the +/// old preheader and the loop exit. +static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB, + BasicBlock &UnswitchedBB, + BasicBlock &OldExitingBB, + BasicBlock &OldPH) { + assert(&ExitBB != &UnswitchedBB && + "Must have different loop exit and unswitched blocks!"); + Instruction *InsertPt = &*UnswitchedBB.begin(); + for (Instruction &I : ExitBB) { + auto *PN = dyn_cast(&I); + if (!PN) + // No more PHIs to check. + break; + + auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2, + PN->getName() + ".split", InsertPt); + + // Walk backwards over the old PHI node's inputs to minimize the cost of + // removing each one. We have to do this weird loop manually so that we + // create the same number of new incoming edges in the new PHI as we expect + // each case-based edge to be included in the unswitched switch in some + // cases. + // FIXME: This is really, really gross. It would be much cleaner if LLVM + // allowed us to create a single entry for a predecessor block without + // having separate entries for each "edge" even though these edges are + // required to produce identical results. + for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) { + if (PN->getIncomingBlock(i) != &OldExitingBB) + continue; + + Value *Incoming = PN->removeIncomingValue(i); + NewPN->addIncoming(Incoming, &OldPH); + } + + // Now replace the old PHI with the new one and wire the old one in as an + // input to the new one. + PN->replaceAllUsesWith(NewPN); + NewPN->addIncoming(PN, &ExitBB); + } +} + +/// Unswitch a trivial branch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the branch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and one of the successors is a loop exit. This +/// allows us to unswitch without duplicating the loop, making it trivial. +/// +/// If this routine fails to unswitch the branch it returns false. +/// +/// If the branch can be unswitched, this routine splits the preheader and +/// hoists the branch above that split. Preserves loop simplified form +/// (splitting the exit block as necessary). It simplifies the branch within +/// the loop to an unconditional branch but doesn't remove it entirely. Further +/// cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, + LoopInfo &LI) { + assert(BI.isConditional() && "Can only unswitch a conditional branch!"); + DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n"); + + Value *LoopCond = BI.getCondition(); + + // Need a trivial loop condition to unswitch. + if (!L.isLoopInvariant(LoopCond)) + return false; + + // FIXME: We should compute this once at the start and update it! + SmallVector ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any + // side-effects. If so, determine the value of Cond that causes + // it to do this. + ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); + ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); + int LoopExitSuccIdx = 0; + auto *LoopExitBB = BI.getSuccessor(0); + if (!ExitBlockSet.count(LoopExitBB)) { + std::swap(CondVal, Replacement); + LoopExitSuccIdx = 1; + LoopExitBB = BI.getSuccessor(1); + if (!ExitBlockSet.count(LoopExitBB)) + return false; + } + auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); + assert(L.contains(ContinueBB) && + "Cannot have both successors exit and still be in the loop!"); + + auto *ParentBB = BI.getParent(); + if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) + return false; + + DEBUG(dbgs() << " unswitching trivial branch when: " << CondVal + << " == " << LoopCond << "\n"); + + // Split the preheader, so that we know that there is a safe place to insert + // the conditional branch. We will change the preheader to have a conditional + // branch on LoopCond. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + + // Now that we have a place to insert the conditional branch, create a place + // to branch to: this is the exit block out of the loop that we are + // unswitching. We need to split this if there are other loop predecessors. + // Because the loop is in simplified form, *any* other predecessor is enough. + BasicBlock *UnswitchedBB; + if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { + (void)PredBB; + assert(PredBB == BI.getParent() && + "A branch's parent isn't a predecessor!"); + UnswitchedBB = LoopExitBB; + } else { + UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); + } + + // Now splice the branch to gate reaching the new preheader and re-point its + // successors. + OldPH->getInstList().splice(std::prev(OldPH->end()), + BI.getParent()->getInstList(), BI); + OldPH->getTerminator()->eraseFromParent(); + BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); + BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); + + // Create a new unconditional branch that will continue the loop as a new + // terminator. + BranchInst::Create(ContinueBB, ParentBB); + + // Rewrite the relevant PHI nodes. + if (UnswitchedBB == LoopExitBB) + rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH); + else + rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB, + *ParentBB, *OldPH); + + // Now we need to update the dominator tree. + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + // But if we split something off of the loop exit block then we also removed + // one of the predecessors for the loop exit block and may need to update its + // idom. + if (UnswitchedBB != LoopExitBB) + updateLoopExitIDom(LoopExitBB, L, DT); + + // Since this is an i1 condition we can also trivially replace uses of it + // within the loop with a constant. + replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); + + ++NumTrivial; + ++NumBranches; + return true; +} + +/// Unswitch a trivial switch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the switch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and that at least one of the successors is a loop +/// exit. This allows us to unswitch without duplicating the loop, making it +/// trivial. +/// +/// If this routine fails to unswitch the switch it returns false. +/// +/// If the switch can be unswitched, this routine splits the preheader and +/// copies the switch above that split. If the default case is one of the +/// exiting cases, it copies the non-exiting cases and points them at the new +/// preheader. If the default case is not exiting, it copies the exiting cases +/// and points the default at the preheader. It preserves loop simplified form +/// (splitting the exit blocks as necessary). It simplifies the switch within +/// the loop by removing now-dead cases. If the default case is one of those +/// unswitched, it replaces its destination with a new basic block containing +/// only unreachable. Such basic blocks, while technically loop exits, are not +/// considered for unswitching so this is a stable transform and the same +/// switch will not be revisited. If after unswitching there is only a single +/// in-loop successor, the switch is further simplified to an unconditional +/// branch. Still more cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, + LoopInfo &LI) { + DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n"); + Value *LoopCond = SI.getCondition(); + + // If this isn't switching on an invariant condition, we can't unswitch it. + if (!L.isLoopInvariant(LoopCond)) + return false; + + auto *ParentBB = SI.getParent(); + + // FIXME: We should compute this once at the start and update it! + SmallVector ExitBlocks; + L.getExitBlocks(ExitBlocks); + SmallPtrSet ExitBlockSet(ExitBlocks.begin(), + ExitBlocks.end()); + + SmallVector ExitCaseIndices; + for (auto Case : SI.cases()) { + auto *SuccBB = Case.getCaseSuccessor(); + if (ExitBlockSet.count(SuccBB) && + areLoopExitPHIsLoopInvariant(L, *ParentBB, *SuccBB)) + ExitCaseIndices.push_back(Case.getCaseIndex()); + } + BasicBlock *DefaultExitBB = nullptr; + if (ExitBlockSet.count(SI.getDefaultDest()) && + areLoopExitPHIsLoopInvariant(L, *ParentBB, *SI.getDefaultDest()) && + !isa(SI.getDefaultDest()->getTerminator())) + DefaultExitBB = SI.getDefaultDest(); + else if (ExitCaseIndices.empty()) + return false; + + DEBUG(dbgs() << " unswitching trivial cases...\n"); + + SmallVector, 4> ExitCases; + ExitCases.reserve(ExitCaseIndices.size()); + // We walk the case indices backwards so that we remove the last case first + // and don't disrupt the earlier indices. + for (unsigned Index : reverse(ExitCaseIndices)) { + auto CaseI = SI.case_begin() + Index; + // Save the value of this case. + ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); + // Delete the unswitched cases. + SI.removeCase(CaseI); + } + + // Check if after this all of the remaining cases point at the same + // successor. + BasicBlock *CommonSuccBB = nullptr; + if (SI.getNumCases() > 0 && + std::all_of(std::next(SI.case_begin()), SI.case_end(), + [&SI](const SwitchInst::CaseHandle &Case) { + return Case.getCaseSuccessor() == + SI.case_begin()->getCaseSuccessor(); + })) + CommonSuccBB = SI.case_begin()->getCaseSuccessor(); + + if (DefaultExitBB) { + // We can't remove the default edge so replace it with an edge to either + // the single common remaining successor (if we have one) or an unreachable + // block. + if (CommonSuccBB) { + SI.setDefaultDest(CommonSuccBB); + } else { + BasicBlock *UnreachableBB = BasicBlock::Create( + ParentBB->getContext(), + Twine(ParentBB->getName()) + ".unreachable_default", + ParentBB->getParent()); + new UnreachableInst(ParentBB->getContext(), UnreachableBB); + SI.setDefaultDest(UnreachableBB); + DT.addNewBlock(UnreachableBB, ParentBB); + } + } else { + // If we're not unswitching the default, we need it to match any cases to + // have a common successor or if we have no cases it is the common + // successor. + if (SI.getNumCases() == 0) + CommonSuccBB = SI.getDefaultDest(); + else if (SI.getDefaultDest() != CommonSuccBB) + CommonSuccBB = nullptr; + } + + // Split the preheader, so that we know that there is a safe place to insert + // the switch. + BasicBlock *OldPH = L.getLoopPreheader(); + BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + OldPH->getTerminator()->eraseFromParent(); + + // Now add the unswitched switch. + auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); + + // Rewrite the IR for the unswitched basic blocks. This requires two steps. + // First, we split any exit blocks with remaining in-loop predecessors. Then + // we update the PHIs in one of two ways depending on if there was a split. + // We walk in reverse so that we split in the same order as the cases + // appeared. This is purely for convenience of reading the resulting IR, but + // it doesn't cost anything really. + SmallPtrSet UnswitchedExitBBs; + SmallDenseMap SplitExitBBMap; + // Handle the default exit if necessary. + // FIXME: It'd be great if we could merge this with the loop below but LLVM's + // ranges aren't quite powerful enough yet. + if (DefaultExitBB) { + if (pred_empty(DefaultExitBB)) { + UnswitchedExitBBs.insert(DefaultExitBB); + rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH); + } else { + auto *SplitBB = + SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); + rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB, + *ParentBB, *OldPH); + updateLoopExitIDom(DefaultExitBB, L, DT); + DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; + } + } + // Note that we must use a reference in the for loop so that we update the + // container. + for (auto &CasePair : reverse(ExitCases)) { + // Grab a reference to the exit block in the pair so that we can update it. + BasicBlock *ExitBB = CasePair.second; + + // If this case is the last edge into the exit block, we can simply reuse it + // as it will no longer be a loop exit. No mapping necessary. + if (pred_empty(ExitBB)) { + // Only rewrite once. + if (UnswitchedExitBBs.insert(ExitBB).second) + rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH); + continue; + } + + // Otherwise we need to split the exit block so that we retain an exit + // block from the loop and a target for the unswitched condition. + BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; + if (!SplitExitBB) { + // If this is the first time we see this, do the split and remember it. + SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); + rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB, + *ParentBB, *OldPH); + updateLoopExitIDom(ExitBB, L, DT); + } + // Update the case pair to point to the split block. + CasePair.second = SplitExitBB; + } + + // Now add the unswitched cases. We do this in reverse order as we built them + // in reverse order. + for (auto CasePair : reverse(ExitCases)) { + ConstantInt *CaseVal = CasePair.first; + BasicBlock *UnswitchedBB = CasePair.second; + + NewSI->addCase(CaseVal, UnswitchedBB); + updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); + } + + // If the default was unswitched, re-point it and add explicit cases for + // entering the loop. + if (DefaultExitBB) { + NewSI->setDefaultDest(DefaultExitBB); + updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); + + // We removed all the exit cases, so we just copy the cases to the + // unswitched switch. + for (auto Case : SI.cases()) + NewSI->addCase(Case.getCaseValue(), NewPH); + } + + // If we ended up with a common successor for every path through the switch + // after unswitching, rewrite it to an unconditional branch to make it easy + // to recognize. Otherwise we potentially have to recognize the default case + // pointing at unreachable and other complexity. + if (CommonSuccBB) { + BasicBlock *BB = SI.getParent(); + SI.eraseFromParent(); + BranchInst::Create(CommonSuccBB, BB); + } + + DT.verifyDomTree(); + ++NumTrivial; + ++NumSwitches; + return true; +} + +/// This routine scans the loop to find a branch or switch which occurs before +/// any side effects occur. These can potentially be unswitched without +/// duplicating the loop. If a branch or switch is successfully unswitched the +/// scanning continues to see if subsequent branches or switches have become +/// trivial. Once all trivial candidates have been unswitched, this routine +/// returns. +/// +/// The return value indicates whether anything was unswitched (and therefore +/// changed). +static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, + LoopInfo &LI) { + bool Changed = false; + + // If loop header has only one reachable successor we should keep looking for + // trivial condition candidates in the successor as well. An alternative is + // to constant fold conditions and merge successors into loop header (then we + // only need to check header's terminator). The reason for not doing this in + // LoopUnswitch pass is that it could potentially break LoopPassManager's + // invariants. Folding dead branches could either eliminate the current loop + // or make other loops unreachable. LCSSA form might also not be preserved + // after deleting branches. The following code keeps traversing loop header's + // successors until it finds the trivial condition candidate (condition that + // is not a constant). Since unswitching generates branches with constant + // conditions, this scenario could be very common in practice. + BasicBlock *CurrentBB = L.getHeader(); + SmallPtrSet Visited; + Visited.insert(CurrentBB); + do { + // Check if there are any side-effecting instructions (e.g. stores, calls, + // volatile loads) in the part of the loop that the code *would* execute + // without unswitching. + if (llvm::any_of(*CurrentBB, + [](Instruction &I) { return I.mayHaveSideEffects(); })) + return Changed; + + TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + + if (auto *SI = dyn_cast(CurrentTerm)) { + // Don't bother trying to unswitch past a switch with a constant + // condition. This should be removed prior to running this pass by + // simplify-cfg. + if (isa(SI->getCondition())) + return Changed; + + if (!unswitchTrivialSwitch(L, *SI, DT, LI)) + // Coludn't unswitch this one so we're done. + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // If unswitching turned the terminator into an unconditional branch then + // we can continue. The unswitching logic specifically works to fold any + // cases it can into an unconditional branch to make it easier to + // recognize here. + auto *BI = dyn_cast(CurrentBB->getTerminator()); + if (!BI || BI->isConditional()) + return Changed; + + CurrentBB = BI->getSuccessor(0); + continue; + } + + auto *BI = dyn_cast(CurrentTerm); + if (!BI) + // We do not understand other terminator instructions. + return Changed; + + // Don't bother trying to unswitch past an unconditional branch or a branch + // with a constant value. These should be removed by simplify-cfg prior to + // running this pass. + if (!BI->isConditional() || isa(BI->getCondition())) + return Changed; + + // Found a trivial condition candidate: non-foldable conditional branch. If + // we fail to unswitch this, we can't do anything else that is trivial. + if (!unswitchTrivialBranch(L, *BI, DT, LI)) + return Changed; + + // Mark that we managed to unswitch something. + Changed = true; + + // We unswitched the branch. This should always leave us with an + // unconditional branch that we can follow now. + BI = cast(CurrentBB->getTerminator()); + assert(!BI->isConditional() && + "Cannot form a conditional branch by unswitching1"); + CurrentBB = BI->getSuccessor(0); + + // When continuing, if we exit the loop or reach a previous visited block, + // then we can not reach any trivial condition candidates (unfoldable + // branch instructions or switch instructions) and no unswitch can happen. + } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); + + return Changed; +} + +/// Unswitch control flow predicated on loop invariant conditions. +/// +/// This first hoists all branches or switches which are trivial (IE, do not +/// require duplicating any part of the loop) out of the loop body. It then +/// looks at other loop invariant control flows and tries to unswitch those as +/// well by cloning the loop if the result is small enough. +static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, + AssumptionCache &AC) { + assert(L.isLCSSAForm(DT) && + "Loops must be in LCSSA form before unswitching."); + bool Changed = false; + + // Must be in loop simplified form: we need a preheader and dedicated exits. + if (!L.isLoopSimplifyForm()) + return false; + + // Try trivial unswitch first before loop over other basic blocks in the loop. + Changed |= unswitchAllTrivialConditions(L, DT, LI); + + // FIXME: Add support for non-trivial unswitching by cloning the loop. + + return Changed; +} + +PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + Function &F = *L.getHeader()->getParent(); + (void)F; + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); + + if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) + return PreservedAnalyses::all(); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + AR.DT.verifyDomTree(); +#endif + return getLoopPassPreservedAnalyses(); +} + +namespace { + +class SimpleLoopUnswitchLegacyPass : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + + explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { + initializeSimpleLoopUnswitchLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + getLoopAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); + + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &AC = getAnalysis().getAssumptionCache(F); + + bool Changed = unswitchLoop(*L, DT, LI, AC); + +#ifndef NDEBUG + // Historically this pass has had issues with the dominator tree so verify it + // in asserts builds. + DT.verifyDomTree(); +#endif + return Changed; +} + +char SimpleLoopUnswitchLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", + "Simple unswitch loops", false, false) + +Pass *llvm::createSimpleLoopUnswitchLegacyPass() { + return new SimpleLoopUnswitchLegacyPass(); +} diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 102e9eaeab772e8eb58be4454ae5f4706fe7c71b..5210f165b8742171050ea395870f36d41e389ea1 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -114,7 +114,7 @@ static bool IsAcceptableTarget(Instruction *Inst, BasicBlock *SuccToSinkTo, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!isSafeToSpeculativelyExecute(Inst)) + if (isa(Inst)) return false; // We don't want to sink across a critical edge if we don't dominate the diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp index 2be3f5c533b9a607ec2d56c64849bb7ba46590ad..8b8d6590aa6a08dcd9d6ca671aa5f606c39e277d 100644 --- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -693,7 +693,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) { UnlinkedInst->setOperand(I, nullptr); RecursivelyDeleteTriviallyDeadInstructions(Op); } - delete UnlinkedInst; + UnlinkedInst->deleteValue(); } bool Ret = !UnlinkedInstructions.empty(); UnlinkedInstructions.clear(); diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 49ce0262c97b01fdb1406a2f65e8a5dba64bafdc..486f3e5a43d49a2d854f94273470228bafa2acf9 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SCCIterator.h" @@ -20,6 +19,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index a6b9fee1d8acbc5bbc6669cddb93b1f20421d744..3e5993618c4c0cead9b4ad017abdf9c9a7cc348b 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -51,13 +51,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/TailRecursionElimination.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -76,6 +75,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -492,11 +492,10 @@ static CallInst *findTRECandidate(Instruction *TI, return CI; } -static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, - BasicBlock *&OldEntry, - bool &TailCallsAreMarkedTail, - SmallVectorImpl &ArgumentPHIs, - bool CannotTailCallElimCallsMarkedTail) { +static bool +eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, + bool &TailCallsAreMarkedTail, + SmallVectorImpl &ArgumentPHIs) { // If we are introducing accumulator recursion to eliminate operations after // the call instruction that are both associative and commutative, the initial // value for the accumulator is placed in this variable. If this value is set @@ -707,8 +706,7 @@ static bool foldReturnAndProcessPred(BasicBlock *BB, ReturnInst *Ret, BB->eraseFromParent(); eliminateRecursiveTailCall(CI, RI, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, - CannotTailCallElimCallsMarkedTail); + ArgumentPHIs); ++NumRetDuped; Change = true; } @@ -727,8 +725,7 @@ static bool processReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, return false; return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, - CannotTailCallElimCallsMarkedTail); + ArgumentPHIs); } static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI) { diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 22af21d55c019767ddb6b6030b6b1ad01e4030d5..3d5cbfc93f2e60f03685e79b9cdc1044c1aa7bd0 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted - // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. - SmallVector PHIs; + // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. + SmallVector PHIs; for (BasicBlock::iterator I = BB->begin(); PHINode *PN = dyn_cast(I); ++I) PHIs.push_back(PN); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 6cd9f1614991afa37fd0322b4f781dd24b32d166..ebde1f9a17dd6b0e459c795a5f967843ee2b32a2 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -58,7 +58,7 @@ static bool setOnlyReadsMemory(Function &F) { static bool setOnlyAccessesArgMemory(Function &F) { if (F.onlyAccessesArgMemory()) return false; - F.setOnlyAccessesArgMemory (); + F.setOnlyAccessesArgMemory(); ++NumArgMemOnly; return true; } @@ -71,37 +71,36 @@ static bool setDoesNotThrow(Function &F) { return true; } -static bool setDoesNotCapture(Function &F, unsigned n) { - if (F.doesNotCapture(n)) +static bool setRetDoesNotAlias(Function &F) { + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias)) return false; - F.setDoesNotCapture(n); - ++NumNoCapture; + F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + ++NumNoAlias; return true; } -static bool setOnlyReadsMemory(Function &F, unsigned n) { - if (F.onlyReadsMemory(n)) +static bool setDoesNotCapture(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::NoCapture)) return false; - F.setOnlyReadsMemory(n); - ++NumReadOnlyArg; + F.addParamAttr(ArgNo, Attribute::NoCapture); + ++NumNoCapture; return true; } -static bool setDoesNotAlias(Function &F, unsigned n) { - if (F.doesNotAlias(n)) +static bool setOnlyReadsMemory(Function &F, unsigned ArgNo) { + if (F.hasParamAttribute(ArgNo, Attribute::ReadOnly)) return false; - F.setDoesNotAlias(n); - ++NumNoAlias; + F.addParamAttr(ArgNo, Attribute::ReadOnly); + ++NumReadOnlyArg; return true; } -static bool setNonNull(Function &F, unsigned n) { - assert( - (n != AttributeList::ReturnIndex || F.getReturnType()->isPointerTy()) && - "nonnull applies only to pointers"); - if (F.getAttributes().hasAttribute(n, Attribute::NonNull)) +static bool setRetNonNull(Function &F) { + assert(F.getReturnType()->isPointerTy() && + "nonnull applies only to pointers"); + if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull)) return false; - F.addAttribute(n, Attribute::NonNull); + F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); ++NumNonNull; return true; } @@ -114,9 +113,10 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { bool Changed = false; switch (TheLibFunc) { case LibFunc_strlen: + case LibFunc_wcslen: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strchr: case LibFunc_strrchr: @@ -131,8 +131,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strtold: case LibFunc_strtoull: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_strcpy: case LibFunc_stpcpy: @@ -141,14 +141,14 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strncpy: case LibFunc_stpncpy: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_strxfrm: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_strcmp: // 0,1 case LibFunc_strspn: // 0,1 @@ -159,84 +159,84 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_strncasecmp: // Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_strstr: case LibFunc_strpbrk: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_strtok: case LibFunc_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_setbuf: case LibFunc_setvbuf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_strdup: case LibFunc_strndup: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat: case LibFunc_statvfs: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_sprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_snprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_setitimer: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 1); Changed |= setDoesNotCapture(F, 2); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_system: // May throw; "system" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_malloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_memcmp: Changed |= setOnlyReadsMemory(F); Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_memchr: case LibFunc_memrchr: @@ -247,100 +247,100 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_modff: case LibFunc_modfl: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_memcpy: case LibFunc_mempcpy: case LibFunc_memccpy: case LibFunc_memmove: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_memcpy_chk: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_memalign: - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_mkdir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_mktime: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_realloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_read: // May throw; "read" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_rewind: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_rmdir: case LibFunc_remove: case LibFunc_realpath: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_rename: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_readlink: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_write: // May throw; "write" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_bcopy: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_bcmp: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_bzero: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_calloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_chmod: case LibFunc_chown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ctermid: case LibFunc_clearerr: case LibFunc_closedir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_atoi: case LibFunc_atol: @@ -348,26 +348,26 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_atoll: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_access: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fopen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fdopen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_feof: case LibFunc_free: @@ -384,11 +384,11 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_funlockfile: case LibFunc_ftrylockfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_ferror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); Changed |= setOnlyReadsMemory(F); return Changed; case LibFunc_fputc: @@ -398,51 +398,51 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_frexpl: case LibFunc_fstatvfs: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_fgets: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 3); + Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_fread: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_fwrite: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 3); // FIXME: readonly #1? return Changed; case LibFunc_fputs: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_fscanf: case LibFunc_fprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_fgetpos: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_getc: case LibFunc_getlogin_r: case LibFunc_getc_unlocked: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_getenv: Changed |= setDoesNotThrow(F); Changed |= setOnlyReadsMemory(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_gets: case LibFunc_getchar: @@ -450,132 +450,132 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_getitimer: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_getpwnam: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_ungetc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_uname: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_unlink: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_unsetenv: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_utime: case LibFunc_utimes: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_putc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_puts: case LibFunc_printf: case LibFunc_perror: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_pread: // May throw; "pread" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_pwrite: // May throw; "pwrite" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_putchar: Changed |= setDoesNotThrow(F); return Changed; case LibFunc_popen: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_pclose: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_vscanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vsscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_vfscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_valloc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_vprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_vfprintf: case LibFunc_vsprintf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_vsnprintf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 3); - Changed |= setOnlyReadsMemory(F, 3); + Changed |= setDoesNotCapture(F, 0); + Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_open: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_opendir: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_tmpfile: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_times: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_htonl: case LibFunc_htons: @@ -586,93 +586,93 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { return Changed; case LibFunc_lstat: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_lchown: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_qsort: // May throw; places call through function pointer. - Changed |= setDoesNotCapture(F, 4); + Changed |= setDoesNotCapture(F, 3); return Changed; case LibFunc_dunder_strdup: case LibFunc_dunder_strndup: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_strtok_r: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setDoesNotCapture(F, 1); + Changed |= setOnlyReadsMemory(F, 1); return Changed; case LibFunc_under_IO_getc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_under_IO_putc: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_dunder_isoc99_scanf: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_stat64: case LibFunc_lstat64: case LibFunc_statvfs64: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_dunder_isoc99_sscanf: Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fopen64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); + Changed |= setOnlyReadsMemory(F, 0); Changed |= setOnlyReadsMemory(F, 1); - Changed |= setOnlyReadsMemory(F, 2); return Changed; case LibFunc_fseeko64: case LibFunc_ftello64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 1); + Changed |= setDoesNotCapture(F, 0); return Changed; case LibFunc_tmpfile64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotAlias(F, 0); + Changed |= setRetDoesNotAlias(F); return Changed; case LibFunc_fstat64: case LibFunc_fstatvfs64: Changed |= setDoesNotThrow(F); - Changed |= setDoesNotCapture(F, 2); + Changed |= setDoesNotCapture(F, 1); return Changed; case LibFunc_open64: // May throw; "open" is a valid pthread cancellation point. - Changed |= setDoesNotCapture(F, 1); - Changed |= setOnlyReadsMemory(F, 1); + Changed |= setDoesNotCapture(F, 0); + Changed |= setOnlyReadsMemory(F, 0); return Changed; case LibFunc_gettimeofday: // Currently some platforms have the restrict keyword on the arguments to // gettimeofday. To be conservative, do not add noalias to gettimeofday's // arguments. Changed |= setDoesNotThrow(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); return Changed; case LibFunc_Znwj: // new(unsigned int) case LibFunc_Znwm: // new(unsigned long) @@ -683,17 +683,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) { case LibFunc_msvc_new_array_int: // new[](unsigned int) case LibFunc_msvc_new_array_longlong: // new[](unsigned long long) // Operator new always returns a nonnull noalias pointer - Changed |= setNonNull(F, AttributeList::ReturnIndex); - Changed |= setDoesNotAlias(F, AttributeList::ReturnIndex); + Changed |= setRetNonNull(F); + Changed |= setRetDoesNotAlias(F); return Changed; //TODO: add LibFunc entries for: //case LibFunc_memset_pattern4: //case LibFunc_memset_pattern8: case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); + Changed |= setDoesNotCapture(F, 0); Changed |= setDoesNotCapture(F, 1); - Changed |= setDoesNotCapture(F, 2); - Changed |= setOnlyReadsMemory(F, 2); + Changed |= setOnlyReadsMemory(F, 1); return Changed; // int __nvvm_reflect(const char *) case LibFunc_nvvm_reflect: @@ -889,7 +889,13 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType()); CallInst *CI = B.CreateCall(Callee, Op, Name); - CI->setAttributes(Attrs); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); if (const Function *F = dyn_cast(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 1cfe3bd536482fb1567b4dafec32c4ee724f4f4b..83ec7f55d1afdbac7e8044fb499cc50e5d60a684 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -256,14 +257,14 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V, unsigned HiBits = LongLen - ShortLen; const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); - APInt Zeros(LongLen, 0), Ones(LongLen, 0); + KnownBits Known(LongLen); - computeKnownBits(V, Zeros, Ones, DL); + computeKnownBits(V, Known, DL); - if (Zeros.countLeadingOnes() >= HiBits) + if (Known.countMinLeadingZeros() >= HiBits) return VALRNG_KNOWN_SHORT; - if (Ones.countLeadingZeros() < HiBits) + if (Known.countMaxLeadingZeros() < HiBits) return VALRNG_LIKELY_LONG; // Long integer divisions are often used in hashtable implementations. It's diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 7a21c03da221ab57d6c6234354379ea3f6e5bdb3..83bc05d0311caebbc841dfe8ac11c4b29152a6b5 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_library(LLVMTransformUtils MetaRenamer.cpp ModuleUtils.cpp NameAnonGlobals.cpp + OrderedInstructions.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index ae58d6133d91f3e340a0f9b978d09b38a9a433c0..314c990293cc5701a6824e6b5ebf5a64c9d17dda 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" @@ -31,16 +30,18 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include using namespace llvm; /// See comments in Cloning.h. -BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - ValueToValueMapTy &VMap, +BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, - ClonedCodeInfo *CodeInfo) { + ClonedCodeInfo *CodeInfo, + DebugInfoFinder *DIFinder) { + DenseMap Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); @@ -49,6 +50,10 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // Loop over all instructions, and copy them over. for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { + + if (DIFinder && F->getParent() && II->getDebugLoc()) + DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get()); + Instruction *NewInst = II->clone(); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); @@ -103,32 +108,52 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); - SmallVector AttrVec(NewFunc->arg_size() + 2); + SmallVector NewArgAttrs(NewFunc->arg_size()); AttributeList OldAttrs = OldFunc->getAttributes(); - // Copy the return attributes. - AttrVec[0] = OldAttrs.getRetAttributes(); - // Clone any argument attributes that are present in the VMap. - for (const Argument &OldArg : OldFunc->args()) + for (const Argument &OldArg : OldFunc->args()) { if (Argument *NewArg = dyn_cast(VMap[&OldArg])) { - AttrVec[NewArg->getArgNo() + 1] = - OldAttrs.getParamAttributes(OldArg.getArgNo() + 1); + NewArgAttrs[NewArg->getArgNo()] = + OldAttrs.getParamAttributes(OldArg.getArgNo()); } + } - // Copy any function attributes. - AttrVec.back() = OldAttrs.getFnAttributes(); - - NewFunc->setAttributes(AttributeList::get(NewFunc->getContext(), AttrVec)); + NewFunc->setAttributes( + AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), + OldAttrs.getRetAttributes(), NewArgAttrs)); + + bool MustCloneSP = + OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); + DISubprogram *SP = OldFunc->getSubprogram(); + if (SP) { + assert(!MustCloneSP || ModuleLevelChanges); + // Add mappings for some DebugInfo nodes that we don't want duplicated + // even if they're distinct. + auto &MD = VMap.MD(); + MD[SP->getUnit()].reset(SP->getUnit()); + MD[SP->getType()].reset(SP->getType()); + MD[SP->getFile()].reset(SP->getFile()); + // If we're not cloning into the same module, no need to clone the + // subprogram + if (!MustCloneSP) + MD[SP].reset(SP); + } SmallVector, 1> MDs; OldFunc->getAllMetadata(MDs); - for (auto MD : MDs) + for (auto MD : MDs) { NewFunc->addMetadata( MD.first, *MapMetadata(MD.second, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); + } + + // When we remap instructions, we want to avoid duplicating inlined + // DISubprograms, so record all subprograms we find as we duplicate + // instructions and then freeze them in the MD map. + DebugInfoFinder DIFinder; // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of @@ -139,7 +164,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, + SP ? &DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; @@ -161,6 +187,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Returns.push_back(RI); } + for (DISubprogram *ISP : DIFinder.subprograms()) { + if (ISP != SP) { + VMap.MD()[ISP].reset(ISP); + } + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = @@ -209,7 +241,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap, } SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "", + CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "", CodeInfo); return NewF; @@ -248,7 +280,7 @@ namespace { void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BasicBlock::const_iterator StartingInst, std::vector &ToClone){ - WeakVH &BBEntry = VMap[BB]; + WeakTrackingVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -300,7 +332,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (!NewInst->mayHaveSideEffects()) { VMap[&*II] = V; - delete NewInst; + NewInst->deleteValue(); continue; } } @@ -550,7 +582,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the - // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 4e9d67252d6c5bd50a9559ab6152526212b890cd..d27cb45c7d7f5c12c3d87e0aa3a799a4b3a5c029 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-c/Core.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm-c/Core.h" using namespace llvm; static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) { @@ -96,7 +96,7 @@ std::unique_ptr llvm::CloneModule( else GV = new GlobalVariable( *New, I->getValueType(), false, GlobalValue::ExternalLinkage, - (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr, + nullptr, I->getName(), nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); VMap[&*I] = GV; // We do not copy attributes (mainly because copying between different diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 60ae3745c8357247d87379c8a9a3c3bf2cfd489f..9f4d9c7e3981072cc69233e4abc0e208c2d843a2 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -73,17 +73,17 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, default: return false; case ICmpInst::ICMP_SLT: - // X < 0 is equivalent to (X & SignBit) != 0. + // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: - // X > -1 is equivalent to (X & SignBit) == 0. + // X > -1 is equivalent to (X & SignMask) == 0. if (!C->isAllOnesValue()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 644d93b727b3de9fc4d8ff923c675307460724f5..5d57ed9718fb68a34017baab447dec7226d9052d 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" @@ -73,24 +74,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) { } /// \brief Build a set of blocks to extract if the input blocks are viable. -template -static SetVector buildExtractionBlockSet(IteratorT BBBegin, - IteratorT BBEnd) { +static SetVector +buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT) { + assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; - assert(BBBegin != BBEnd); - // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. - do { - if (!Result.insert(*BBBegin)) - llvm_unreachable("Repeated basic blocks in extraction input"); + for (BasicBlock *BB : BBs) { + + // If this block is dead, don't process it. + if (DT && !DT->isReachableFromEntry(BB)) + continue; - if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) { + if (!Result.insert(BB)) + llvm_unreachable("Repeated basic blocks in extraction input"); + if (!CodeExtractor::isBlockValidForExtraction(*BB)) { Result.clear(); return Result; } - } while (++BBBegin != BBEnd); + } #ifndef NDEBUG for (SetVector::iterator I = std::next(Result.begin()), @@ -106,49 +109,19 @@ static SetVector buildExtractionBlockSet(IteratorT BBBegin, return Result; } -/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. -static SetVector -buildExtractionBlockSet(ArrayRef BBs) { - return buildExtractionBlockSet(BBs.begin(), BBs.end()); -} - -/// \brief Helper to call buildExtractionBlockSet with a RegionNode. -static SetVector -buildExtractionBlockSet(const RegionNode &RN) { - if (!RN.isSubRegion()) - // Just a single BasicBlock. - return buildExtractionBlockSet(RN.getNodeAs()); - - const Region &R = *RN.getNodeAs(); - - return buildExtractionBlockSet(R.block_begin(), R.block_end()); -} - -CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs, - BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} - CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), NumExitBlocks(~0U) {} -CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} - /// definedInRegion - Return true if the specified value is defined in the /// extracted region. static bool definedInRegion(const SetVector &Blocks, Value *V) { @@ -169,16 +142,255 @@ static bool definedInCaller(const SetVector &Blocks, Value *V) { return false; } -void CodeExtractor::findInputsOutputs(ValueSet &Inputs, - ValueSet &Outputs) const { +static BasicBlock *getCommonExitBlock(const SetVector &Blocks) { + BasicBlock *CommonExitBlock = nullptr; + auto hasNonCommonExitSucc = [&](BasicBlock *Block) { + for (auto *Succ : successors(Block)) { + // Internal edges, ok. + if (Blocks.count(Succ)) + continue; + if (!CommonExitBlock) { + CommonExitBlock = Succ; + continue; + } + if (CommonExitBlock == Succ) + continue; + + return true; + } + return false; + }; + + if (any_of(Blocks, hasNonCommonExitSucc)) + return nullptr; + + return CommonExitBlock; +} + +bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( + Instruction *Addr) const { + AllocaInst *AI = cast(Addr->stripInBoundsConstantOffsets()); + Function *Func = (*Blocks.begin())->getParent(); + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + + if (isa(II)) + continue; + + unsigned Opcode = II.getOpcode(); + Value *MemAddr = nullptr; + switch (Opcode) { + case Instruction::Store: + case Instruction::Load: { + if (Opcode == Instruction::Store) { + StoreInst *SI = cast(&II); + MemAddr = SI->getPointerOperand(); + } else { + LoadInst *LI = cast(&II); + MemAddr = LI->getPointerOperand(); + } + // Global variable can not be aliased with locals. + if (dyn_cast(MemAddr)) + break; + Value *Base = MemAddr->stripInBoundsConstantOffsets(); + if (!dyn_cast(Base) || Base == AI) + return false; + break; + } + default: { + IntrinsicInst *IntrInst = dyn_cast(&II); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || + IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) + break; + return false; + } + // Treat all the other cases conservatively if it has side effects. + if (II.mayHaveSideEffects()) + return false; + } + } + } + } + + return true; +} + +BasicBlock * +CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { + BasicBlock *SinglePredFromOutlineRegion = nullptr; + assert(!Blocks.count(CommonExitBlock) && + "Expect a block outside the region!"); + for (auto *Pred : predecessors(CommonExitBlock)) { + if (!Blocks.count(Pred)) + continue; + if (!SinglePredFromOutlineRegion) { + SinglePredFromOutlineRegion = Pred; + } else if (SinglePredFromOutlineRegion != Pred) { + SinglePredFromOutlineRegion = nullptr; + break; + } + } + + if (SinglePredFromOutlineRegion) + return SinglePredFromOutlineRegion; + +#ifndef NDEBUG + auto getFirstPHI = [](BasicBlock *BB) { + BasicBlock::iterator I = BB->begin(); + PHINode *FirstPhi = nullptr; + while (I != BB->end()) { + PHINode *Phi = dyn_cast(I); + if (!Phi) + break; + if (!FirstPhi) { + FirstPhi = Phi; + break; + } + } + return FirstPhi; + }; + // If there are any phi nodes, the single pred either exists or has already + // be created before code extraction. + assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); +#endif + + BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( + CommonExitBlock->getFirstNonPHI()->getIterator()); + + for (auto *Pred : predecessors(CommonExitBlock)) { + if (Blocks.count(Pred)) + continue; + Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); + } + // Now add the old exit block to the outline region. + Blocks.insert(CommonExitBlock); + return CommonExitBlock; +} + +void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, + BasicBlock *&ExitBlock) const { + Function *Func = (*Blocks.begin())->getParent(); + ExitBlock = getCommonExitBlock(Blocks); + + for (BasicBlock &BB : *Func) { + if (Blocks.count(&BB)) + continue; + for (Instruction &II : BB) { + auto *AI = dyn_cast(&II); + if (!AI) + continue; + + // Find the pair of life time markers for address 'Addr' that are either + // defined inside the outline region or can legally be shrinkwrapped into + // the outline region. If there are not other untracked uses of the + // address, return the pair of markers if found; otherwise return a pair + // of nullptr. + auto GetLifeTimeMarkers = + [&](Instruction *Addr, bool &SinkLifeStart, + bool &HoistLifeEnd) -> std::pair { + Instruction *LifeStart = nullptr, *LifeEnd = nullptr; + + for (User *U : Addr->users()) { + IntrinsicInst *IntrInst = dyn_cast(U); + if (IntrInst) { + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { + // Do not handle the case where AI has multiple start markers. + if (LifeStart) + return std::make_pair(nullptr, nullptr); + LifeStart = IntrInst; + } + if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { + if (LifeEnd) + return std::make_pair(nullptr, nullptr); + LifeEnd = IntrInst; + } + continue; + } + // Find untracked uses of the address, bail. + if (!definedInRegion(Blocks, U)) + return std::make_pair(nullptr, nullptr); + } + + if (!LifeStart || !LifeEnd) + return std::make_pair(nullptr, nullptr); + + SinkLifeStart = !definedInRegion(Blocks, LifeStart); + HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); + // Do legality Check. + if ((SinkLifeStart || HoistLifeEnd) && + !isLegalToShrinkwrapLifetimeMarkers(Addr)) + return std::make_pair(nullptr, nullptr); + + // Check to see if we have a place to do hoisting, if not, bail. + if (HoistLifeEnd && !ExitBlock) + return std::make_pair(nullptr, nullptr); + + return std::make_pair(LifeStart, LifeEnd); + }; + + bool SinkLifeStart = false, HoistLifeEnd = false; + auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); + + if (Markers.first) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); + SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); + continue; + } + + // Follow the bitcast. + Instruction *MarkerAddr = nullptr; + for (User *U : AI->users()) { + + if (U->stripInBoundsConstantOffsets() == AI) { + SinkLifeStart = false; + HoistLifeEnd = false; + Instruction *Bitcast = cast(U); + Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); + if (Markers.first) { + MarkerAddr = Bitcast; + continue; + } + } + + // Found unknown use of AI. + if (!definedInRegion(Blocks, U)) { + MarkerAddr = nullptr; + break; + } + } + + if (MarkerAddr) { + if (SinkLifeStart) + SinkCands.insert(Markers.first); + if (!definedInRegion(Blocks, MarkerAddr)) + SinkCands.insert(MarkerAddr); + SinkCands.insert(AI); + if (HoistLifeEnd) + HoistCands.insert(Markers.second); + } + } + } +} + +void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, + const ValueSet &SinkCands) const { + for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. for (Instruction &II : *BB) { for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; - ++OI) - if (definedInCaller(Blocks, *OI)) - Inputs.insert(*OI); + ++OI) { + Value *V = *OI; + if (!SinkCands.count(V) && definedInCaller(Blocks, V)) + Inputs.insert(V); + } for (User *U : II.users()) if (!definedInRegion(Blocks, U)) { @@ -218,9 +430,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); - BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, - Header->getName()+".ce"); + BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -229,11 +439,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { Blocks.insert(NewBB); Header = NewBB; - // Okay, update dominator sets. The blocks that dominate the new one are the - // blocks that dominate TIBB plus the new block itself. - if (DT) - DT->splitBlock(NewBB); - // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { @@ -248,12 +453,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, PN->getName() + ".ce", &NewBB->front()); + PN->replaceAllUsesWith(NewPN); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they @@ -751,7 +958,8 @@ Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) return nullptr; - ValueSet inputs, outputs; + ValueSet inputs, outputs, SinkingCands, HoistingCands; + BasicBlock *CommonExit = nullptr; // Assumption: this is a single-entry code region, and the header is the first // block in the region. @@ -790,8 +998,23 @@ Function *CodeExtractor::extractCodeRegion() { "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); + findAllocas(SinkingCands, HoistingCands, CommonExit); + assert(HoistingCands.empty() || CommonExit); + // Find inputs to, outputs from the code region. - findInputsOutputs(inputs, outputs); + findInputsOutputs(inputs, outputs, SinkingCands); + + // Now sink all instructions which only have non-phi uses inside the region + for (auto *II : SinkingCands) + cast(II)->moveBefore(*newFuncRoot, + newFuncRoot->getFirstInsertionPt()); + + if (!HoistingCands.empty()) { + auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast(II)->moveBefore(TI); + } // Calculate the exit blocks for the extracted region and the total exit // weights for each of those blocks. diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 0eee6e19efac604fe032195545d2cb6cb3043002..6d3d287defdb2fb2074ce7e3f85b636b76413f63 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/CFG.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; diff --git a/lib/Transforms/Utils/EscapeEnumerator.cpp b/lib/Transforms/Utils/EscapeEnumerator.cpp index 8c2386554da56c5271690b3fe684a2d6d6e702ba..78d7474e5b954280fc1de26034133d31860a183d 100644 --- a/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -67,8 +67,7 @@ IRBuilder<> *EscapeEnumerator::Next() { // Create a cleanup block. LLVMContext &C = F.getContext(); BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); - Type *ExnTy = - StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); if (!F.hasPersonalityFn()) { Constant *PersFn = getDefaultPersonalityFn(F.getParent()); F.setPersonalityFn(PersFn); diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 59f176e2f231d59f78f35363a36a3cdfef455e28..c97e544e620a9b672b11d489c4ccddb8a4f18b35 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -439,7 +439,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { + if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { InstResult = C; DEBUG(dbgs() << "Constant folded function call. Result: " << *InstResult << "\n"); diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 7b96fbb11a142143375ad2ccc9d87cd3eab2ee3c..435eff3bef47e0778246fe7387d31914e662612f 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -19,6 +18,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "flattencfg" diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index 73a0b2737e9572d523ba653a8a8463005fde6a0a..0457294361b562d7e82c0ad910dbbd3042bd8792 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -15,8 +15,8 @@ #include "llvm/Transforms/Utils/FunctionComparator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -76,12 +76,14 @@ int FunctionComparator::cmpMem(StringRef L, StringRef R) const { int FunctionComparator::cmpAttrs(const AttributeList L, const AttributeList R) const { - if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets())) return Res; - for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { - AttributeList::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), - RE = R.end(i); + for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) { + AttributeSet LAS = L.getAttributes(i); + AttributeSet RAS = R.getAttributes(i); + AttributeSet::iterator LI = LAS.begin(), LE = LAS.end(); + AttributeSet::iterator RI = RAS.begin(), RE = RAS.end(); for (; LI != LE && RI != RE; ++LI, ++RI) { Attribute LA = *LI; Attribute RA = *RI; diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index b00f4b14068a2f152f2e7b718439528d95678d57..a98d07237b47487698323f665f3219f511060959 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" using namespace llvm; diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index ba4b78ac758a64b788a6aef76a1f00698e70f4de..245fefb38ee88ab19491a636b65da856a917299e 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -18,7 +19,6 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 5d6fbc3325fff5ff6c71e6e3305fb90a74a1e396..2a18c140c7886a8cc832ac8604cb082f9d5b87b6 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -28,13 +27,13 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -43,6 +42,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -1302,41 +1302,6 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// Rebuild the entire inlined-at chain for this instruction so that the top of -/// the chain now is inlined-at the new call site. -static DebugLoc -updateInlinedAtInfo(const DebugLoc &DL, DILocation *InlinedAtNode, - LLVMContext &Ctx, - DenseMap &IANodes) { - SmallVector InlinedAtLocations; - DILocation *Last = InlinedAtNode; - DILocation *CurInlinedAt = DL; - - // Gather all the inlined-at nodes - while (DILocation *IA = CurInlinedAt->getInlinedAt()) { - // Skip any we've already built nodes for - if (DILocation *Found = IANodes[IA]) { - Last = Found; - break; - } - - InlinedAtLocations.push_back(IA); - CurInlinedAt = IA; - } - - // Starting from the top, rebuild the nodes to point to the new inlined-at - // location (then rebuilding the rest of the chain behind it) and update the - // map of already-constructed inlined-at nodes. - for (const DILocation *MD : reverse(InlinedAtLocations)) { - Last = IANodes[MD] = DILocation::getDistinct( - Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last); - } - - // And finally create the normal location for this instruction, referring to - // the new inlined-at chain. - return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), Last); -} - /// Return the result of AI->isStaticAlloca() if AI were moved to the entry /// block. Allocas used in inalloca calls and allocas of dynamic array size /// cannot be static. @@ -1364,14 +1329,16 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, // Cache the inlined-at nodes as they're built so they are reused, without // this every instruction's inlined-at chain would become distinct from each // other. - DenseMap IANodes; + DenseMap IANodes; for (; FI != Fn->end(); ++FI) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (DebugLoc DL = BI->getDebugLoc()) { - BI->setDebugLoc( - updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes)); + auto IA = DebugLoc::appendInlinedAt(DL, InlinedAtNode, BI->getContext(), + IANodes); + auto IDL = DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(), IA); + BI->setDebugLoc(IDL); continue; } @@ -1429,11 +1396,13 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const Optional &CalleeEntryCount, - const Instruction *TheCall) { + const Instruction *TheCall, + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *CallerBFI) { if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) return; Optional CallSiteCount = - ProfileSummaryInfo::getProfileCount(TheCall, nullptr); + PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; uint64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, CalleeEntryCount.getValue()); @@ -1456,16 +1425,16 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, /// The callsite's block count is subtracted from the callee's function entry /// count. static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB, - Instruction *CallInst, Function *Callee) { + Instruction *CallInst, Function *Callee, + ProfileSummaryInfo *PSI) { // If the callee has a original count of N, and the estimated count of // callsite is M, the new callee count is set to N - M. M is estimated from // the caller's entry count, its entry block frequency and the block frequency // of the callsite. Optional CalleeCount = Callee->getEntryCount(); - if (!CalleeCount.hasValue()) + if (!CalleeCount.hasValue() || !PSI) return; - Optional CallCount = - ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI); + Optional CallCount = PSI->getProfileCount(CallInst, CallerBFI); if (!CallCount.hasValue()) return; // Since CallSiteCount is an estimate, it could exceed the original callee @@ -1640,7 +1609,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, - CalledFunc->getParamAlignment(ArgNo+1)); + CalledFunc->getParamAlignment(ArgNo)); if (ActualArg != *AI) ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } @@ -1668,9 +1637,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, CalledFunc->front()); - updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall); + updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, + IFI.PSI, IFI.CallerBFI); // Update the profile count of callee. - updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc); + updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI); // Inject byval arguments initialization. for (std::pair &Init : ByValInit) @@ -2302,7 +2272,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, AssumptionCache *AC = IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr; auto &DL = Caller->getParent()->getDataLayout(); - if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) { + if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 8a1973d1db0518b10c456abc47c3f1b39124cc6d..23ec45edb3efd812a2b54fdc1323ed8254751de7 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -14,10 +14,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; namespace { @@ -26,16 +26,15 @@ namespace { InstNamer() : FunctionPass(ID) { initializeInstNamerPass(*PassRegistry::getPassRegistry()); } - + void getAnalysisUsage(AnalysisUsage &Info) const override { Info.setPreservesAll(); } bool runOnFunction(Function &F) override { - for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); - AI != AE; ++AI) - if (!AI->hasName() && !AI->getType()->isVoidTy()) - AI->setName("arg"); + for (auto &Arg : F.args()) + if (!Arg.hasName()) + Arg.setName("arg"); for (BasicBlock &BB : F) { if (!BB.hasName()) @@ -48,11 +47,11 @@ namespace { return true; } }; - + char InstNamer::ID = 0; } -INITIALIZE_PASS(InstNamer, "instnamer", +INITIALIZE_PASS(InstNamer, "instnamer", "Assign names to anonymous instructions", false, false) char &llvm::InstructionNamerID = InstNamer::ID; //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index 2671840a8bb0972e787f8e35a7b7da5d380844d6..089f2b5f3b181166f3c63aaf1d941765a52cf834 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -85,9 +85,11 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, UsesToRewrite.clear(); Instruction *I = Worklist.pop_back_val(); + assert(!I->getType()->isTokenTy() && "Tokens shouldn't be in the worklist"); BasicBlock *InstBB = I->getParent(); Loop *L = LI.getLoopFor(InstBB); - if (!LoopExitBlocks.count(L)) + assert(L && "Instruction belongs to a BB that's not part of a loop"); + if (!LoopExitBlocks.count(L)) L->getExitBlocks(LoopExitBlocks[L]); assert(LoopExitBlocks.count(L)); const SmallVectorImpl &ExitBlocks = LoopExitBlocks[L]; @@ -95,17 +97,10 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, if (ExitBlocks.empty()) continue; - // Tokens cannot be used in PHI nodes, so we skip over them. - // We can run into tokens which are live out of a loop with catchswitch - // instructions in Windows EH if the catchswitch has one catchpad which - // is inside the loop and another which is not. - if (I->getType()->isTokenTy()) - continue; - for (Use &U : I->uses()) { Instruction *User = cast(U.getUser()); BasicBlock *UserBB = User->getParent(); - if (PHINode *PN = dyn_cast(User)) + if (auto *PN = dyn_cast(User)) UserBB = PN->getIncomingBlock(U); if (InstBB != UserBB && !L->contains(UserBB)) @@ -123,7 +118,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, // DomBB dominates the value, so adjust DomBB to the normal destination // block, which is effectively where the value is first usable. BasicBlock *DomBB = InstBB; - if (InvokeInst *Inv = dyn_cast(I)) + if (auto *Inv = dyn_cast(I)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT.getNode(DomBB); @@ -188,7 +183,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, // block. Instruction *User = cast(UseToRewrite->getUser()); BasicBlock *UserBB = User->getParent(); - if (PHINode *PN = dyn_cast(User)) + if (auto *PN = dyn_cast(User)) UserBB = PN->getIncomingBlock(*UseToRewrite); if (isa(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { @@ -213,13 +208,9 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, // Post process PHI instructions that were inserted into another disjoint // loop and update their exits properly. - for (auto *PostProcessPN : PostProcessPHIs) { - if (PostProcessPN->use_empty()) - continue; - - // Reprocess each PHI instruction. - Worklist.push_back(PostProcessPN); - } + for (auto *PostProcessPN : PostProcessPHIs) + if (!PostProcessPN->use_empty()) + Worklist.push_back(PostProcessPN); // Keep track of PHI nodes that we want to remove because they did not have // any uses rewritten. @@ -237,40 +228,75 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, return Changed; } -/// Return true if the specified block dominates at least -/// one of the blocks in the specified list. -static bool -blockDominatesAnExit(BasicBlock *BB, - DominatorTree &DT, - const SmallVectorImpl &ExitBlocks) { - DomTreeNode *DomNode = DT.getNode(BB); - return any_of(ExitBlocks, [&](BasicBlock *EB) { - return DT.dominates(DomNode, DT.getNode(EB)); - }); +// Compute the set of BasicBlocks in the loop `L` dominating at least one exit. +static void computeBlocksDominatingExits( + Loop &L, DominatorTree &DT, SmallVector &ExitBlocks, + SmallSetVector &BlocksDominatingExits) { + SmallVector BBWorklist; + + // We start from the exit blocks, as every block trivially dominates itself + // (not strictly). + for (BasicBlock *BB : ExitBlocks) + BBWorklist.push_back(BB); + + while (!BBWorklist.empty()) { + BasicBlock *BB = BBWorklist.pop_back_val(); + + // Check if this is a loop header. If this is the case, we're done. + if (L.getHeader() == BB) + continue; + + // Otherwise, add its immediate predecessor in the dominator tree to the + // worklist, unless we visited it already. + BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock(); + + // Exit blocks can have an immediate dominator not beloinging to the + // loop. For an exit block to be immediately dominated by another block + // outside the loop, it implies not all paths from that dominator, to the + // exit block, go through the loop. + // Example: + // + // |---- A + // | | + // | B<-- + // | | | + // |---> C -- + // | + // D + // + // C is the exit block of the loop and it's immediately dominated by A, + // which doesn't belong to the loop. + if (!L.contains(IDomBB)) + continue; + + if (BlocksDominatingExits.insert(IDomBB)) + BBWorklist.push_back(IDomBB); + } } bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE) { bool Changed = false; - // Get the set of exiting blocks. SmallVector ExitBlocks; L.getExitBlocks(ExitBlocks); - if (ExitBlocks.empty()) return false; + SmallSetVector BlocksDominatingExits; + + // We want to avoid use-scanning leveraging dominance informations. + // If a block doesn't dominate any of the loop exits, the none of the values + // defined in the loop can be used outside. + // We compute the set of blocks fullfilling the conditions in advance + // walking the dominator tree upwards until we hit a loop header. + computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits); + SmallVector Worklist; // Look at all the instructions in the loop, checking to see if they have uses // outside the loop. If so, put them into the worklist to rewrite those uses. - for (BasicBlock *BB : L.blocks()) { - // For large loops, avoid use-scanning by using dominance information: In - // particular, if a block does not dominate any of the loop exits, then none - // of the values defined in the block could be used outside the loop. - if (!blockDominatesAnExit(BB, DT, ExitBlocks)) - continue; - + for (BasicBlock *BB : BlocksDominatingExits) { for (Instruction &I : *BB) { // Reject two common cases fast: instructions with no uses (like stores) // and instructions with one use that is in the same block as this. @@ -279,6 +305,13 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, !isa(I.user_back()))) continue; + // Tokens cannot be used in PHI nodes, so we skip over them. + // We can run into tokens which are live out of a loop with catchswitch + // instructions in Windows EH if the catchswitch has one catchpad which + // is inside the loop and another which is not. + if (I.getType()->isTokenTy()) + continue; + Worklist.push_back(&I); } } diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index fe93d6927c6388d87aca47d197f72e44d3d9009d..42aca757c2afda761e097df309faa42c3ec6179b 100644 --- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -48,16 +49,6 @@ using namespace llvm; STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted"); STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); -static cl::opt LibCallsShrinkWrapDoDomainError( - "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with domain errors")); -static cl::opt LibCallsShrinkWrapDoRangeError( - "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with range errors")); -static cl::opt LibCallsShrinkWrapDoPoleError( - "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden, - cl::desc("Perform shrink-wrap on lib calls with pole errors")); - namespace { class LibCallsShrinkWrapLegacyPass : public FunctionPass { public: @@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap", namespace { class LibCallsShrinkWrap : public InstVisitor { public: - LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){}; - bool isChanged() const { return Changed; } + LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT) + : TLI(TLI), DT(DT){}; void visitCallInst(CallInst &CI) { checkCandidate(CI); } - void perform() { + bool perform() { + bool Changed = false; for (auto &CI : WorkList) { DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() << "\n"); @@ -94,6 +86,7 @@ public: DEBUG(dbgs() << "Transformed\n"); } } + return Changed; } private: @@ -134,8 +127,8 @@ private: } const TargetLibraryInfo &TLI; + DominatorTree *DT; SmallVector WorkList; - bool Changed; }; } // end anonymous namespace @@ -241,8 +234,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_atanhf: // Same as atanh case LibFunc_atanhl: // Same as atanh { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedTwoCond; Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f); break; @@ -262,8 +253,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_logbf: // Same as log case LibFunc_logbl: // Same as log { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f); break; @@ -274,8 +263,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, case LibFunc_log1pf: // Same as log1p case LibFunc_log1pl: // Same as log1p { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) - return false; ++NumWrappedOneCond; Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f); break; @@ -285,9 +272,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI, // RangeError: overflow or underflow case LibFunc_powf: case LibFunc_powl: { - if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError || - !LibCallsShrinkWrapDoRangeError) - return false; Cond = generateCondForPow(CI, Func); if (Cond == nullptr) return false; @@ -346,7 +330,7 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI, UpperBound = 11356.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedOneCond; @@ -410,7 +394,7 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI, UpperBound = 11383.0f; break; default: - llvm_unreachable("Should be reach here"); + llvm_unreachable("Unhandled library call!"); } ++NumWrappedTwoCond; @@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI, // Wrap conditions that can potentially generate errno to the library call. void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { - assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst"); + assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst"); MDNode *BranchWeights = MDBuilder(CI->getContext()).createBranchWeights(1, 2000); + TerminatorInst *NewInst = - SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights); + SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT); BasicBlock *CallBB = NewInst->getParent(); CallBB->setName("cdce.call"); - CallBB->getSingleSuccessor()->setName("cdce.end"); + BasicBlock *SuccBB = CallBB->getSingleSuccessor(); + assert(SuccBB && "The split block should have a single successor"); + SuccBB->setName("cdce.end"); CI->removeFromParent(); CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); DEBUG(dbgs() << "== Basic Block After =="); @@ -522,32 +509,38 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) { TLI.getLibFunc(*Callee, Func); assert(Func && "perform() is not expecting an empty function"); - if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func)) - return true; - - if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func)) + if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func)) return true; - return performCallErrors(CI, Func); } void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); AU.addPreserved(); AU.addRequired(); } -static bool runImpl(Function &F, const TargetLibraryInfo &TLI) { +static bool runImpl(Function &F, const TargetLibraryInfo &TLI, + DominatorTree *DT) { if (F.hasFnAttribute(Attribute::OptimizeForSize)) return false; - LibCallsShrinkWrap CCDCE(TLI); + LibCallsShrinkWrap CCDCE(TLI, DT); CCDCE.visit(F); - CCDCE.perform(); - return CCDCE.isChanged(); + bool Changed = CCDCE.perform(); + +// Verify the dominator after we've updated it locally. +#ifndef NDEBUG + if (DT) + DT->verifyDomTree(); +#endif + return Changed; } bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) { auto &TLI = getAnalysis().getTLI(); - return runImpl(F, TLI); + auto *DTWP = getAnalysisIfAvailable(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + return runImpl(F, TLI, DT); } namespace llvm { @@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() { PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F, FunctionAnalysisManager &FAM) { auto &TLI = FAM.getResult(F); - bool Changed = runImpl(F, TLI); - if (!Changed) + auto *DT = FAM.getCachedResult(F); + if (!runImpl(F, TLI, DT)) return PreservedAnalyses::all(); auto PA = PreservedAnalyses(); PA.preserve(); + PA.preserve(); return PA; } } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 18b29226c2ef5adc9983c61d5d0ab01b394c3fe7..2af671636cbdb11be6ce71162a715e6fa7bd48af 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -22,8 +22,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -45,6 +45,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -561,7 +562,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { // that can be removed. BB->removePredecessor(Pred, true); - WeakVH PhiIt = &BB->front(); + WeakTrackingVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast(PhiIt)); Value *OldPhiIt = PhiIt; @@ -1036,17 +1037,15 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT); - unsigned TrailZ = KnownZero.countTrailingOnes(); + KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT); + unsigned TrailZ = Known.countMinTrailingZeros(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ); // LLVM doesn't support alignments larger than this currently. Align = std::min(Align, +Value::MaximumAlignment); @@ -1104,8 +1103,9 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar, void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder) { auto *DIVar = DDI->getVariable(); - auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); + auto *DIExpr = DDI->getExpression(); + Value *DV = SI->getOperand(0); // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1115,34 +1115,28 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (SExtInst *SExt = dyn_cast(SI->getOperand(0))) ExtendedArg = dyn_cast(SExt->getOperand(0)); if (ExtendedArg) { - // We're now only describing a subset of the variable. The fragment we're - // describing will always be smaller than the variable size, because - // VariableSize == Size of Alloca described by DDI. Since SI stores - // to the alloca described by DDI, if it's first operand is an extend, - // we're guaranteed that before extension, the value was narrower than - // the size of the alloca, hence the size of the described variable. - SmallVector Ops; - unsigned FragmentOffset = 0; - // If this already is a bit fragment, we drop the bit fragment from the - // expression and record the offset. - auto Fragment = DIExpr->getFragmentInfo(); - if (Fragment) { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3); - FragmentOffset = Fragment->OffsetInBits; - } else { - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); + // If this DDI was already describing only a fragment of a variable, ensure + // that fragment is appropriately narrowed here. + // But if a fragment wasn't used, describe the value as the original + // argument (rather than the zext or sext) so that it remains described even + // if the sext/zext is optimized away. This widens the variable description, + // leaving it up to the consumer to know how the smaller value may be + // represented in a larger register. + if (auto Fragment = DIExpr->getFragmentInfo()) { + unsigned FragmentOffset = Fragment->OffsetInBits; + SmallVector Ops(DIExpr->elements_begin(), + DIExpr->elements_end() - 3); + Ops.push_back(dwarf::DW_OP_LLVM_fragment); + Ops.push_back(FragmentOffset); + const DataLayout &DL = DDI->getModule()->getDataLayout(); + Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); + DIExpr = Builder.createExpression(Ops); } - Ops.push_back(dwarf::DW_OP_LLVM_fragment); - Ops.push_back(FragmentOffset); - const DataLayout &DL = DDI->getModule()->getDataLayout(); - Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); - auto NewDIExpr = Builder.createExpression(Ops); - if (!LdStHasDebugValue(DIVar, NewDIExpr, SI)) - Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr, - DDI->getDebugLoc(), SI); - } else if (!LdStHasDebugValue(DIVar, DIExpr, SI)) - Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr, - DDI->getDebugLoc(), SI); + DV = ExtendedArg; + } + if (!LdStHasDebugValue(DIVar, DIExpr, SI)) + Builder.insertDbgValueIntrinsic(DV, 0, DIVar, DIExpr, DDI->getDebugLoc(), + SI); } /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value @@ -1227,13 +1221,9 @@ bool llvm::LowerDbgDeclare(Function &F) { // This is a call by-value or some other instruction that // takes a pointer to the variable. Insert a *value* // intrinsic that describes the alloca. - SmallVector NewDIExpr; - auto *DIExpr = DDI->getExpression(); - NewDIExpr.push_back(dwarf::DW_OP_deref); - NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()); DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(), - DIB.createExpression(NewDIExpr), - DDI->getDebugLoc(), CI); + DDI->getExpression(), DDI->getDebugLoc(), + CI); } } DDI->eraseFromParent(); @@ -1262,33 +1252,6 @@ void llvm::findDbgValues(SmallVectorImpl &DbgValues, Value *V) { DbgValues.push_back(DVI); } -static void appendOffset(SmallVectorImpl &Ops, int64_t Offset) { - if (Offset > 0) { - Ops.push_back(dwarf::DW_OP_plus); - Ops.push_back(Offset); - } else if (Offset < 0) { - Ops.push_back(dwarf::DW_OP_minus); - Ops.push_back(-Offset); - } -} - -/// Prepend \p DIExpr with a deref and offset operation. -static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, - bool Deref, int64_t Offset) { - if (!Deref && !Offset) - return DIExpr; - // Create a copy of the original DIDescriptor for user variable, prepending - // "deref" operation to a list of address elements, as new llvm.dbg.declare - // will take a value storing address of the memory for variable, not - // alloca itself. - SmallVector Ops; - if (Deref) - Ops.push_back(dwarf::DW_OP_deref); - appendOffset(Ops, Offset); - if (DIExpr) - Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); - return Builder.createExpression(Ops); -} bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, Instruction *InsertBefore, DIBuilder &Builder, @@ -1300,9 +1263,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress, auto *DIVar = DDI->getVariable(); auto *DIExpr = DDI->getExpression(); assert(DIVar && "Missing variable"); - - DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset); - + DIExpr = DIExpression::prepend(DIExpr, Deref, Offset); // Insert llvm.dbg.declare immediately after the original alloca, and remove // old llvm.dbg.declare. Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); @@ -1335,7 +1296,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress, if (Offset) { SmallVector Ops; Ops.push_back(dwarf::DW_OP_deref); - appendOffset(Ops, Offset); + DIExpression::appendOffset(Ops, Offset); Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end()); DIExpr = Builder.createExpression(Ops); } @@ -1378,12 +1339,16 @@ void llvm::salvageDebugInfo(Instruction &I) { unsigned BitWidth = M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); APInt Offset(BitWidth, 0); - // Rewrite a constant GEP into a DIExpression. + // Rewrite a constant GEP into a DIExpression. Since we are performing + // arithmetic to compute the variable's *value* in the DIExpression, we + // need to mark the expression with a DW_OP_stack_value. if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - // GEP offsets are i32 and thus alwaus fit into an int64_t. - DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue()); + // GEP offsets are i32 and thus always fit into an int64_t. + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, + Offset.getSExtValue(), + DIExpression::WithStackValue); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1395,7 +1360,7 @@ void llvm::salvageDebugInfo(Instruction &I) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); DIBuilder DIB(M, /*AllowUnresolved*/ false); - DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr)); DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1504,7 +1469,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, II->setAttributes(CI->getAttributes()); // Make sure that anything using the call now uses the invoke! This also - // updates the CallGraph if present, because it uses a WeakVH. + // updates the CallGraph if present, because it uses a WeakTrackingVH. CI->replaceAllUsesWith(II); // Delete the original call @@ -1809,46 +1774,62 @@ void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) { combineMetadata(K, J, KnownIDs); } -unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, - DominatorTree &DT, - const BasicBlockEdge &Root) { +template +static unsigned replaceDominatedUsesWith(Value *From, Value *To, + const RootType &Root, + const DominatesFn &Dominates) { assert(From->getType() == To->getType()); - + unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); - UI != UE; ) { + UI != UE;) { Use &U = *UI++; - if (DT.dominates(Root, U)) { - U.set(To); - DEBUG(dbgs() << "Replace dominated use of '" - << From->getName() << "' as " - << *To << " in " << *U << "\n"); - ++Count; - } + if (!Dominates(Root, U)) + continue; + U.set(To); + DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " + << *To << " in " << *U << "\n"); + ++Count; } return Count; } -unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, - DominatorTree &DT, - const BasicBlock *BB) { - assert(From->getType() == To->getType()); +unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) { + assert(From->getType() == To->getType()); + auto *BB = From->getParent(); + unsigned Count = 0; - unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); UI != UE;) { Use &U = *UI++; auto *I = cast(U.getUser()); - if (DT.properlyDominates(BB, I->getParent())) { - U.set(To); - DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " - << *To << " in " << *U << "\n"); - ++Count; - } + if (I->getParent() == BB) + continue; + U.set(To); + ++Count; } return Count; } +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlockEdge &Root) { + auto Dominates = [&DT](const BasicBlockEdge &Root, const Use &U) { + return DT.dominates(Root, U); + }; + return ::replaceDominatedUsesWith(From, To, Root, Dominates); +} + +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + DominatorTree &DT, + const BasicBlock *BB) { + auto ProperlyDominates = [&DT](const BasicBlock *BB, const Use &U) { + auto *I = cast(U.getUser())->getParent(); + return DT.properlyDominates(BB, I); + }; + return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); +} + bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { // Check if the function is specifically marked as a gc leaf function. if (CS.hasFnAttr("gc-leaf-function")) @@ -2128,3 +2109,48 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( !F->doesNotAccessMemory()) CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin); } + +bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: + // Many arithmetic intrinsics have no issue taking a + // variable, however it's hard to distingish these from + // specials such as @llvm.frameaddress that require a constant. + if (isa(I)) + return false; + + // Constant bundle operands may need to retain their constant-ness for + // correctness. + if (ImmutableCallSite(I).isBundleOperand(OpIdx)) + return false; + return true; + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::ExtractValue: + case Instruction::InsertValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::Alloca: + return false; + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = gep_type_begin(I); + for (auto E = std::next(It, OpIdx); It != E; ++It) + if (It.isStruct()) + return false; + return true; + } +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index e7ba19665d5917d0151559754cea897492f22d40..f3db278ef1e49d9d840ee1d89b12e426a000f49f 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -38,15 +38,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LoopSimplify.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -65,6 +64,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" @@ -210,7 +210,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { PHINode *PN = cast(I); ++I; - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -628,7 +628,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast(I++)); ) - if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { if (SE) SE->forgetValue(PN); if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 3c669ce644e204da4e283430cf463ab993a23ebb..f2527f89e83e5fabf43b792ff15a1a00680b98cc 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" @@ -39,6 +38,7 @@ #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; #define DEBUG_TYPE "loop-unroll" @@ -318,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + // The current loop unroll pass can only unroll loops with a single latch + // that's a conditional branch exiting the loop. + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast(LatchBlock->getTerminator()); @@ -328,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, return false; } + auto CheckSuccessors = [&](unsigned S1, unsigned S2) { + return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + }; + + if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { + DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" + " exiting the loop can be unrolled\n"); + return false; + } + if (Header->hasAddressTaken()) { // The loop-rotate pass can be helpful to avoid this in many cases. DEBUG(dbgs() << @@ -743,7 +757,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { - SmallVector DeadInsts; + SmallVector DeadInsts; simplifyLoopIVs(L, SE, DT, LI, DeadInsts); // Aggressively clean up dead instructions that simplifyLoopIVs already @@ -763,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = &*I++; - if (Value *V = SimplifyInstruction(Inst, DL)) + if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) if (LI->replacementPreservesLCSSAForm(Inst, V)) Inst->replaceAllUsesWith(V); if (isInstructionTriviallyDead(Inst)) diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index 73c14f5606b73035e2f046136ddd03ec191ac5ea..5c21490793e794ac1779706bd62e2abb0d9e6ecd 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -46,6 +46,11 @@ static cl::opt UnrollForcePeelCount( "unroll-force-peel-count", cl::init(0), cl::Hidden, cl::desc("Force a peel count regardless of profiling information.")); +// Designates that a Phi is estimated to become invariant after an "infinite" +// number of loop iterations (i.e. only may become an invariant if the loop is +// fully unrolled). +static const unsigned InfiniteIterationsToInvariance = UINT_MAX; + // Check whether we are capable of peeling this loop. static bool canPeel(Loop *L) { // Make sure the loop is in simplified form @@ -66,10 +71,62 @@ static bool canPeel(Loop *L) { return true; } +// This function calculates the number of iterations after which the given Phi +// becomes an invariant. The pre-calculated values are memorized in the map. The +// function (shortcut is I) is calculated according to the following definition: +// Given %x = phi , ..., [%y, %back.edge]. +// If %y is a loop invariant, then I(%x) = 1. +// If %y is a Phi from the loop header, I(%x) = I(%y) + 1. +// Otherwise, I(%x) is infinite. +// TODO: Actually if %y is an expression that depends only on Phi %z and some +// loop invariants, we can estimate I(%x) = I(%z) + 1. The example +// looks like: +// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration. +// %y = phi(0, 5), +// %a = %y + 1. +static unsigned calculateIterationsToInvariance( + PHINode *Phi, Loop *L, BasicBlock *BackEdge, + SmallDenseMap &IterationsToInvariance) { + assert(Phi->getParent() == L->getHeader() && + "Non-loop Phi should not be checked for turning into invariant."); + assert(BackEdge == L->getLoopLatch() && "Wrong latch?"); + // If we already know the answer, take it from the map. + auto I = IterationsToInvariance.find(Phi); + if (I != IterationsToInvariance.end()) + return I->second; + + // Otherwise we need to analyze the input from the back edge. + Value *Input = Phi->getIncomingValueForBlock(BackEdge); + // Place infinity to map to avoid infinite recursion for cycled Phis. Such + // cycles can never stop on an invariant. + IterationsToInvariance[Phi] = InfiniteIterationsToInvariance; + unsigned ToInvariance = InfiniteIterationsToInvariance; + + if (L->isLoopInvariant(Input)) + ToInvariance = 1u; + else if (PHINode *IncPhi = dyn_cast(Input)) { + // Only consider Phis in header block. + if (IncPhi->getParent() != L->getHeader()) + return InfiniteIterationsToInvariance; + // If the input becomes an invariant after X iterations, then our Phi + // becomes an invariant after X + 1 iterations. + unsigned InputToInvariance = calculateIterationsToInvariance( + IncPhi, L, BackEdge, IterationsToInvariance); + if (InputToInvariance != InfiniteIterationsToInvariance) + ToInvariance = InputToInvariance + 1u; + } + + // If we found that this Phi lies in an invariant chain, update the map. + if (ToInvariance != InfiniteIterationsToInvariance) + IterationsToInvariance[Phi] = ToInvariance; + return ToInvariance; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount) { + assert(LoopSize > 0 && "Zero loop size is not allowed!"); UP.PeelCount = 0; if (!canPeel(L)) return; @@ -78,30 +135,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (!L->empty()) return; - // Try to find a Phi node that has the same loop invariant as an input from - // its only back edge. If there is such Phi, peeling 1 iteration from the - // loop is profitable, because starting from 2nd iteration we will have an - // invariant instead of this Phi. - if (LoopSize <= UP.Threshold) { + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N + // iterations of the loop. For this we compute the number for iterations after + // which every Phi is guaranteed to become an invariant, and try to peel the + // maximum number of iterations among these values, thus turning all those + // Phis into invariants. + // First, check that we can peel at least one iteration. + if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) { + // Store the pre-calculated values here. + SmallDenseMap IterationsToInvariance; + // Now go through all Phis to calculate their the number of iterations they + // need to become invariants. + unsigned DesiredPeelCount = 0; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); - BasicBlock *Header = L->getHeader(); - // Iterate over Phis to find one with invariant input on back edge. - bool FoundCandidate = false; - PHINode *Phi; - for (auto BI = Header->begin(); isa(&*BI); ++BI) { - Phi = cast(&*BI); - Value *Input = Phi->getIncomingValueForBlock(BackEdge); - if (L->isLoopInvariant(Input)) { - FoundCandidate = true; - break; - } + for (auto BI = L->getHeader()->begin(); isa(&*BI); ++BI) { + PHINode *Phi = cast(&*BI); + unsigned ToInvariance = calculateIterationsToInvariance( + Phi, L, BackEdge, IterationsToInvariance); + if (ToInvariance != InfiniteIterationsToInvariance) + DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } - if (FoundCandidate) { - DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi - << " because starting from 2nd iteration it is always" - << " an invariant\n"); - UP.PeelCount = 1; + if (DesiredPeelCount > 0) { + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); + // Consider max peel count limitation. + assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); + DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" + << " some Phis into invariants.\n"); + UP.PeelCount = DesiredPeelCount; return; } } diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 85db734fb182755cda9ea1025a5f01c3c58b5cfb..a920cd86a26a8c38d18b49658f0c43fa4695bde7 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" @@ -37,6 +36,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" #include using namespace llvm; @@ -512,6 +512,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, BasicBlock *Latch = L->getLoopLatch(); + // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the + // targets of the Latch be the single exit block out of the loop. This needs + // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. + BranchInst *LatchBR = cast(Latch->getTerminator()); + assert( + (LatchBR->getSuccessor(0) == Exit || LatchBR->getSuccessor(1) == Exit) && + "one of the loop latch successors should be " + "the exit block!"); + // Avoid warning of unused `LatchBR` variable in release builds. + (void)LatchBR; // Loop structure is the following: // // PreHeader diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 444bc16e0a1567ecc3d45ea9ba19fe588c2e6d76..412f6129407ed67a7b6e6d3a130cca96eaf659c1 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -15,13 +15,13 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -553,22 +553,13 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop, if (!Previous || !TheLoop->contains(Previous) || isa(Previous)) return false; + // Ensure every user of the phi node is dominated by the previous value. + // The dominance requirement ensures the loop vectorizer will not need to + // vectorize the initial value prior to the first iteration of the loop. for (User *U : Phi->users()) if (auto *I = dyn_cast(U)) { - // Ensure every user of the phi node is dominated by the previous value. - // The dominance requirement ensures the loop vectorizer will not need to - // vectorize the initial value prior to the first iteration of the loop. if (!DT->dominates(Previous, I)) return false; - // When the phi node has users outside the loop, the current logic for - // fixFirstOrderRecurrences may generate incorrect code. Specifically, we - // extract the last element from the vectorized phi, which would be the - // update to the phi before exiting the loop. However, what we want is the - // previous phi value before the update (i.e. the second last update - // before end of the vectorized loop). - // See added test cases in first-order-recurrence.ll - if (!TheLoop->contains(I)) - return false; } return true; @@ -1121,3 +1112,203 @@ Optional llvm::getLoopEstimatedTripCount(Loop *L) { else return (FalseVal + (TrueVal / 2)) / TrueVal; } + +/// \brief Adds a 'fast' flag to floating point operations. +static Value *addFastMathFlag(Value *V) { + if (isa(V)) { + FastMathFlags Flags; + Flags.setUnsafeAlgebra(); + cast(V)->setFastMathFlags(Flags); + } + return V; +} + +// Helper to generate a log2 shuffle reduction. +Value * +llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, + RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, + ArrayRef RedOps) { + unsigned VF = Src->getType()->getVectorNumElements(); + // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles + // and vector ops, reducing the set of values being computed by half each + // round. + assert(isPowerOf2_32(VF) && + "Reduction emission only supported for pow2 vectors!"); + Value *TmpVec = Src; + SmallVector ShuffleMask(VF, nullptr); + for (unsigned i = VF; i != 1; i >>= 1) { + // Move the upper half of the vector to the lower half. + for (unsigned j = 0; j != i / 2; ++j) + ShuffleMask[j] = Builder.getInt32(i / 2 + j); + + // Fill the rest of the mask with undef. + std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), + UndefValue::get(Builder.getInt32Ty())); + + Value *Shuf = Builder.CreateShuffleVector( + TmpVec, UndefValue::get(TmpVec->getType()), + ConstantVector::get(ShuffleMask), "rdx.shuf"); + + if (Op != Instruction::ICmp && Op != Instruction::FCmp) { + // Floating point operations had to be 'fast' to enable the reduction. + TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, + TmpVec, Shuf, "bin.rdx")); + } else { + assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && + "Invalid min/max"); + TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec, + Shuf); + } + if (!RedOps.empty()) + propagateIRFlags(TmpVec, RedOps); + } + // The result is in the first element of the vector. + return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); +} + +/// Create a simple vector reduction specified by an opcode and some +/// flags (if generating min/max reductions). +Value *llvm::createSimpleTargetReduction( + IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, + Value *Src, TargetTransformInfo::ReductionFlags Flags, + ArrayRef RedOps) { + assert(isa(Src->getType()) && "Type must be a vector"); + + Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType()); + std::function BuildFunc; + using RD = RecurrenceDescriptor; + RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; + // TODO: Support creating ordered reductions. + FastMathFlags FMFUnsafe; + FMFUnsafe.setUnsafeAlgebra(); + + switch (Opcode) { + case Instruction::Add: + BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; + break; + case Instruction::Mul: + BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; + break; + case Instruction::And: + BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; + break; + case Instruction::Or: + BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; + break; + case Instruction::Xor: + BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; + break; + case Instruction::FAdd: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src); + cast(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::FMul: + BuildFunc = [&]() { + auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src); + cast(Rdx)->setFastMathFlags(FMFUnsafe); + return Rdx; + }; + break; + case Instruction::ICmp: + if (Flags.IsMaxOp) { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; + BuildFunc = [&]() { + return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); + }; + } else { + MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; + BuildFunc = [&]() { + return Builder.CreateIntMinReduce(Src, Flags.IsSigned); + }; + } + break; + case Instruction::FCmp: + if (Flags.IsMaxOp) { + MinMaxKind = RD::MRK_FloatMax; + BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; + } else { + MinMaxKind = RD::MRK_FloatMin; + BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; + } + break; + default: + llvm_unreachable("Unhandled opcode"); + break; + } + if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) + return BuildFunc(); + return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); +} + +/// Create a vector reduction using a given recurrence descriptor. +Value *llvm::createTargetReduction(IRBuilder<> &Builder, + const TargetTransformInfo *TTI, + RecurrenceDescriptor &Desc, Value *Src, + bool NoNaN) { + // TODO: Support in-order reductions based on the recurrence descriptor. + RecurrenceDescriptor::RecurrenceKind RecKind = Desc.getRecurrenceKind(); + TargetTransformInfo::ReductionFlags Flags; + Flags.NoNaN = NoNaN; + auto getSimpleRdx = [&](unsigned Opc) { + return createSimpleTargetReduction(Builder, TTI, Opc, Src, Flags); + }; + switch (RecKind) { + case RecurrenceDescriptor::RK_FloatAdd: + return getSimpleRdx(Instruction::FAdd); + case RecurrenceDescriptor::RK_FloatMult: + return getSimpleRdx(Instruction::FMul); + case RecurrenceDescriptor::RK_IntegerAdd: + return getSimpleRdx(Instruction::Add); + case RecurrenceDescriptor::RK_IntegerMult: + return getSimpleRdx(Instruction::Mul); + case RecurrenceDescriptor::RK_IntegerAnd: + return getSimpleRdx(Instruction::And); + case RecurrenceDescriptor::RK_IntegerOr: + return getSimpleRdx(Instruction::Or); + case RecurrenceDescriptor::RK_IntegerXor: + return getSimpleRdx(Instruction::Xor); + case RecurrenceDescriptor::RK_IntegerMinMax: { + switch (Desc.getMinMaxRecurrenceKind()) { + case RecurrenceDescriptor::MRK_SIntMax: + Flags.IsSigned = true; + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_UIntMax: + Flags.IsMaxOp = true; + break; + case RecurrenceDescriptor::MRK_SIntMin: + Flags.IsSigned = true; + break; + case RecurrenceDescriptor::MRK_UIntMin: + break; + default: + llvm_unreachable("Unhandled MRK"); + } + return getSimpleRdx(Instruction::ICmp); + } + case RecurrenceDescriptor::RK_FloatMinMax: { + Flags.IsMaxOp = + Desc.getMinMaxRecurrenceKind() == RecurrenceDescriptor::MRK_FloatMax; + return getSimpleRdx(Instruction::FCmp); + } + default: + llvm_unreachable("Unhandled RecKind"); + } +} + +void llvm::propagateIRFlags(Value *I, ArrayRef VL) { + if (auto *VecOp = dyn_cast(I)) { + if (auto *I0 = dyn_cast(VL[0])) { + // VecOVp is initialized to the 0th scalar, so start counting from index + // '1'. + VecOp->copyIRFlags(I0); + for (int i = 1, e = VL.size(); i < e; ++i) { + if (auto *Scalar = dyn_cast(VL[i])) + VecOp->andIRFlags(Scalar); + } + } + } +} diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index c7cb561b5e21d0699a927870de7543fc57396c0c..0a51f9a0e4a26eee228a3595d70c5b371a3b0a3a 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index b375d51005d574319b19996d4175634a7f92a189..890afbc46e636b714545958ea4b3317553ed21d2 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -24,6 +23,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include @@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, Value *Val = SI->getCondition(); // The value we are switching on... BasicBlock* Default = SI->getDefaultDest(); + // Don't handle unreachable blocks. If there are successors with phis, this + // would leave them behind with missing predecessors. + if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) || + CurBlock->getSinglePredecessor() == CurBlock) { + DeleteList.insert(CurBlock); + return; + } + // If there is only the default destination, just branch. if (!SI->getNumCases()) { BranchInst::Create(Default, CurBlock); diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 481c6aa29c3a1d12c38cb6f7a856d9108f6a1a10..9f2ad540c83dd0b8266a094f23e581a2f226a3f6 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -23,6 +22,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index dbe42c201dd4f77e1a803eb7cb5d2375f2025204..2ef3d6336ae2b3224a0546113b3a4ee118846e59 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -35,7 +35,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, // Upgrade a 2-field global array type to the new 3-field format if needed. if (Data && OldEltTy->getNumElements() < 3) EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy(), nullptr); + IRB.getInt8PtrTy()); else EltTy = OldEltTy; if (Constant *Init = GVCtor->getInitializer()) { @@ -44,10 +44,10 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, for (unsigned i = 0; i != n; ++i) { auto Ctor = cast(Init->getOperand(i)); if (EltTy != OldEltTy) - Ctor = ConstantStruct::get( - EltTy, Ctor->getAggregateElement((unsigned)0), - Ctor->getAggregateElement(1), - Constant::getNullValue(IRB.getInt8PtrTy()), nullptr); + Ctor = + ConstantStruct::get(EltTy, Ctor->getAggregateElement((unsigned)0), + Ctor->getAggregateElement(1), + Constant::getNullValue(IRB.getInt8PtrTy())); CurrentCtors.push_back(Ctor); } } @@ -55,7 +55,7 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, } else { // Use the new three-field struct if there isn't one already. EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - IRB.getInt8PtrTy(), nullptr); + IRB.getInt8PtrTy()); } // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. @@ -237,3 +237,35 @@ void llvm::filterDeadComdatFunctions( ComdatEntriesCovered.end(); }); } + +std::string llvm::getUniqueModuleId(Module *M) { + MD5 Md5; + bool ExportsSymbols = false; + auto AddGlobal = [&](GlobalValue &GV) { + if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + !GV.hasExternalLinkage()) + return; + ExportsSymbols = true; + Md5.update(GV.getName()); + Md5.update(ArrayRef{0}); + }; + + for (auto &F : *M) + AddGlobal(F); + for (auto &GV : M->globals()) + AddGlobal(GV); + for (auto &GA : M->aliases()) + AddGlobal(GA); + for (auto &IF : M->ifuncs()) + AddGlobal(IF); + + if (!ExportsSymbols) + return ""; + + MD5::MD5Result R; + Md5.final(R); + + SmallString<32> Str; + MD5::stringifyResult(R, Str); + return ("$" + Str).str(); +} diff --git a/lib/Transforms/Utils/OrderedInstructions.cpp b/lib/Transforms/Utils/OrderedInstructions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e67e0def5b9ec21e22e4531aa571ffc3795f653 --- /dev/null +++ b/lib/Transforms/Utils/OrderedInstructions.cpp @@ -0,0 +1,33 @@ +//===-- OrderedInstructions.cpp - Instruction dominance function ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utility to check dominance relation of 2 instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/OrderedInstructions.h" +using namespace llvm; + +/// Given 2 instructions, use OrderedBasicBlock to check for dominance relation +/// if the instructions are in the same basic block, Otherwise, use dominator +/// tree. +bool OrderedInstructions::dominates(const Instruction *InstA, + const Instruction *InstB) const { + const BasicBlock *IBB = InstA->getParent(); + // Use ordered basic block to do dominance check in case the 2 instructions + // are in the same basic block. + if (IBB == InstB->getParent()) { + auto OBB = OBBMap.find(IBB); + if (OBB == OBBMap.end()) + OBB = OBBMap.insert({IBB, make_unique(IBB)}).first; + return OBB->second->dominates(InstA, InstB); + } else { + return DT->dominates(InstA->getParent(), InstB->getParent()); + } +} diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index 8877aeafecdec8ebbce57a989a4c0286dce6a7d0..1260e35e934ded52717a4ecd774388708891113e 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -460,6 +460,9 @@ void PredicateInfo::buildPredicateInfo() { if (auto *BI = dyn_cast(BranchBB->getTerminator())) { if (!BI->isConditional()) continue; + // Can't insert conditional information if they all go to the same place. + if (BI->getSuccessor(0) == BI->getSuccessor(1)) + continue; processBranch(BI, BranchBB, OpsToRename); } else if (auto *SI = dyn_cast(BranchBB->getTerminator())) { processSwitch(SI, BranchBB, OpsToRename); @@ -541,7 +544,40 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter, // // TODO: Use this algorithm to perform fast single-variable renaming in // promotememtoreg and memoryssa. -void PredicateInfo::renameUses(SmallPtrSetImpl &OpsToRename) { +void PredicateInfo::renameUses(SmallPtrSetImpl &OpSet) { + // Sort OpsToRename since we are going to iterate it. + SmallVector OpsToRename(OpSet.begin(), OpSet.end()); + std::sort(OpsToRename.begin(), OpsToRename.end(), [&](const Value *A, + const Value *B) { + auto *ArgA = dyn_cast_or_null(A); + auto *ArgB = dyn_cast_or_null(B); + + // If A and B are args, order them based on their arg no. + if (ArgA && !ArgB) + return true; + if (ArgB && !ArgA) + return false; + if (ArgA && ArgB) + return ArgA->getArgNo() < ArgB->getArgNo(); + + // Else, A are B are instructions. + // If they belong to different BBs, order them by the dominance of BBs. + auto *AInst = cast(A); + auto *BInst = cast(B); + if (AInst->getParent() != BInst->getParent()) + return DT.dominates(AInst->getParent(), BInst->getParent()); + + // Else, A and B belong to the same BB. + // Order A and B by their dominance. + auto *BB = AInst->getParent(); + auto LookupResult = OBBMap.find(BB); + if (LookupResult != OBBMap.end()) + return LookupResult->second->dominates(AInst, BInst); + + auto Result = OBBMap.insert({BB, make_unique(BB)}); + return Result.first->second->dominates(AInst, BInst); + }); + ValueDFS_Compare Compare(OBBMap); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a33b85c4ee69ae9d08324dade18fa77b4d613bc8..cdba982e6641fdcf0b4921eac381771f7d481f20 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -225,10 +225,10 @@ struct PromoteMem2Reg { std::vector Allocas; DominatorTree &DT; DIBuilder DIB; - /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; + const SimplifyQuery SQ; /// Reverse mapping of Allocas. DenseMap AllocaLookup; @@ -270,7 +270,8 @@ public: AssumptionCache *AC) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC) {} + AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), + nullptr, &DT, AC) {} void run(); @@ -673,8 +674,6 @@ void PromoteMem2Reg::run() { A->eraseFromParent(); } - const DataLayout &DL = F.getParent()->getDataLayout(); - // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -699,7 +698,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { + if (Value *V = SimplifyInstruction(PN, SQ)) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 8b6a2c3766d268cb20c9d28fbb265958fedc80a6..6ccf54e49dd31e9944c1295de713b5dba44263dd 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -30,7 +31,6 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" #include #include diff --git a/lib/Transforms/Utils/SanitizerStats.cpp b/lib/Transforms/Utils/SanitizerStats.cpp index 9afd175c10ed55be04bd622d4bb52ff813385161..8c23957ac43e915b27fb894a782f00b84ea8562a 100644 --- a/lib/Transforms/Utils/SanitizerStats.cpp +++ b/lib/Transforms/Utils/SanitizerStats.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SanitizerStats.h" -#include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 127a44df5344fec7d05023ae201ae481adabb8f9..0970c436e6655a0a49b8078f41118f6f7250ac90 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -15,13 +15,13 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" @@ -29,8 +29,8 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" @@ -55,11 +55,11 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -595,7 +595,7 @@ private: Span = Span.inverse(); // If there are a ton of values, we don't want to make a ginormous switch. - if (Span.getSetSize().ugt(8) || Span.isEmptySet()) { + if (Span.isSizeLargerThan(8) || Span.isEmptySet()) { return false; } @@ -1375,53 +1375,6 @@ HoistTerminator: return true; } -// Is it legal to place a variable in operand \c OpIdx of \c I? -// FIXME: This should be promoted to Instruction. -static bool canReplaceOperandWithVariable(const Instruction *I, - unsigned OpIdx) { - // We can't have a PHI with a metadata type. - if (I->getOperand(OpIdx)->getType()->isMetadataTy()) - return false; - - // Early exit. - if (!isa(I->getOperand(OpIdx))) - return true; - - switch (I->getOpcode()) { - default: - return true; - case Instruction::Call: - case Instruction::Invoke: - // FIXME: many arithmetic intrinsics have no issue taking a - // variable, however it's hard to distingish these from - // specials such as @llvm.frameaddress that require a constant. - if (isa(I)) - return false; - - // Constant bundle operands may need to retain their constant-ness for - // correctness. - if (ImmutableCallSite(I).isBundleOperand(OpIdx)) - return false; - - return true; - - case Instruction::ShuffleVector: - // Shufflevector masks are constant. - return OpIdx != 2; - case Instruction::ExtractValue: - case Instruction::InsertValue: - // All operands apart from the first are constant. - return OpIdx == 0; - case Instruction::Alloca: - return false; - case Instruction::GetElementPtr: - if (OpIdx == 0) - return true; - gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1); - return It.isSequential(); - } -} - // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -2230,11 +2183,11 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL, } // Check for trivial simplification. - if (Value *V = SimplifyInstruction(N, DL)) { + if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { - delete N; // Instruction folded away, don't need actual inst + N->deleteValue(); // Instruction folded away, don't need actual inst N = nullptr; } } else { @@ -2306,7 +2259,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); - if (Value *V = SimplifyInstruction(PN, DL)) { + if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); continue; @@ -3055,6 +3008,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { BasicBlock *QFB = QBI->getSuccessor(1); BasicBlock *PostBB = QFB->getSingleSuccessor(); + // Make sure we have a good guess for PostBB. If QTB's only successor is + // QFB, then QFB is a better PostBB. + if (QTB->getSingleSuccessor() == QFB) + PostBB = QFB; + + // If we couldn't find a good PostBB, stop. + if (!PostBB) + return false; + bool InvertPCond = false, InvertQCond = false; // Canonicalize fallthroughs to the true branches. if (PFB == QBI->getParent()) { @@ -3079,14 +3041,13 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) { auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; - if (!PostBB || - !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) return false; if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) return false; - if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2) + if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2)) return false; // OK, this is a sequence of two diamonds or triangles. @@ -3536,7 +3497,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - if (Value *V = SimplifyInstruction(ICI, DL)) { + if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } @@ -3746,7 +3707,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) { if (!isa(I)) return false; - SmallSet TrivialUnwindBlocks; + SmallSetVector TrivialUnwindBlocks; auto *PhiLPInst = cast(RI->getValue()); // Check incoming blocks to see if any of them are trivial. @@ -4359,8 +4320,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); - APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI); + KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) @@ -4371,8 +4331,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, // Gather dead cases. SmallVector DeadCases; for (auto &Case : SI->cases()) { - APInt CaseVal = Case.getCaseValue()->getValue(); - if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne || + const APInt &CaseVal = Case.getCaseValue()->getValue(); + if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); @@ -4386,7 +4346,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, bool HasDefault = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = - Bits - (KnownZero | KnownOne).countPopulation(); + Bits - (Known.Zero | Known.One).countPopulation(); assert(NumUnknownBits <= Bits); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && @@ -4937,7 +4897,7 @@ SwitchLookupTable::SwitchLookupTable( LinearMappingPossible = false; break; } - APInt Val = ConstVal->getValue(); + const APInt &Val = ConstVal->getValue(); if (I != 0) { APInt Dist = Val - PrevVal; if (I == 1) { diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index a4cc6a031ad4c5cc5f513f8d074e4f207eba8118..faa14046b1e3c2c89b96bb30526290b909080565 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -51,13 +51,13 @@ namespace { ScalarEvolution *SE; DominatorTree *DT; - SmallVectorImpl &DeadInsts; + SmallVectorImpl &DeadInsts; bool Changed; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI,SmallVectorImpl &Dead) + LoopInfo *LI, SmallVectorImpl &Dead) : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -352,7 +352,7 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { return false; typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)( - const SCEV *, const SCEV *, SCEV::NoWrapFlags); + const SCEV *, const SCEV *, SCEV::NoWrapFlags, unsigned); typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)( const SCEV *, Type *); @@ -406,10 +406,11 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) { IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2); const SCEV *A = - (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy); + (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0u), + WideTy); const SCEV *B = (SE->*Operation)((SE->*Extension)(LHS, WideTy), - (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap); + (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap, 0u); if (A != B) return false; @@ -530,8 +531,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return false; const SCEV *(ScalarEvolution::*GetExprForBO)(const SCEV *, const SCEV *, - SCEV::NoWrapFlags); - + SCEV::NoWrapFlags, unsigned); switch (BO->getOpcode()) { default: return false; @@ -560,7 +560,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getZeroExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getZeroExtendExpr(LHS, WideTy), SE->getZeroExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoUnsignedWrap(); SE->forgetValue(BO); @@ -572,7 +572,7 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, const SCEV *ExtendAfterOp = SE->getSignExtendExpr(SE->getSCEV(BO), WideTy); const SCEV *OpAfterExtend = (SE->*GetExprForBO)( SE->getSignExtendExpr(LHS, WideTy), SE->getSignExtendExpr(RHS, WideTy), - SCEV::FlagAnyWrap); + SCEV::FlagAnyWrap, 0u); if (ExtendAfterOp == OpAfterExtend) { BO->setHasNoSignedWrap(); SE->forgetValue(BO); @@ -701,7 +701,7 @@ void IVVisitor::anchor() { } /// Simplify instructions that use this induction variable /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead, + LoopInfo *LI, SmallVectorImpl &Dead, IVVisitor *V) { SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); SIV.simplifyUsers(CurrIV, V); @@ -711,7 +711,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, /// Simplify users of induction variables within this /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead) { + LoopInfo *LI, SmallVectorImpl &Dead) { bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, Dead); diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index f6070868de44e28f46aa719a087a33e98d425a8e..2ea15f65cef9aa6ba4ed5836b65b0d35387670d7 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -27,18 +27,16 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "instsimplify" STATISTIC(NumSimplified, "Number of redundant instructions removed"); -static bool runImpl(Function &F, const DominatorTree *DT, - const TargetLibraryInfo *TLI, AssumptionCache *AC, +static bool runImpl(Function &F, const SimplifyQuery &SQ, OptimizationRemarkEmitter *ORE) { - const DataLayout &DL = F.getParent()->getDataLayout(); SmallPtrSet S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -56,7 +54,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, // Don't waste time simplifying unused instructions. if (!I->use_empty()) { - if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) { + if (Value *V = SimplifyInstruction(I, SQ, ORE)) { // Mark all uses for resimplification next time round the loop. for (User *U : I->users()) Next->insert(cast(U)); @@ -65,7 +63,7 @@ static bool runImpl(Function &F, const DominatorTree *DT, Changed = true; } } - if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) { + if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { // RecursivelyDeleteTriviallyDeadInstruction can remove more than one // instruction, so simply incrementing the iterator does not work. // When instructions get deleted re-iterate instead. @@ -113,8 +111,9 @@ namespace { &getAnalysis().getAssumptionCache(F); OptimizationRemarkEmitter *ORE = &getAnalysis().getORE(); - - return runImpl(F, DT, TLI, AC, ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, TLI, DT, AC); + return runImpl(F, SQ, ORE); } }; } @@ -141,7 +140,9 @@ PreservedAnalyses InstSimplifierPass::run(Function &F, auto &TLI = AM.getResult(F); auto &AC = AM.getResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE); + const DataLayout &DL = F.getParent()->getDataLayout(); + const SimplifyQuery SQ(DL, &TLI, &DT, &AC); + bool Changed = runImpl(F, SQ, &ORE); if (!Changed) return PreservedAnalyses::all(); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index aa71e3669ea27804b2b01e07d41a54a93fb96dd7..b723b65f35e594173a1c1c05ac52ae1ea10e2d30 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -30,16 +30,13 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; -static cl::opt - ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden, - cl::desc("Treat error-reporting calls as cold")); - static cl::opt EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden, cl::init(false), @@ -88,20 +85,6 @@ static bool isCallingConvCCompatible(CallInst *CI) { return false; } -/// Return true if it only matters that the value is equal or not-equal to zero. -static bool isOnlyUsedInZeroEqualityComparison(Value *V) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast(U)) - if (IC->isEquality()) - if (Constant *C = dyn_cast(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - /// Return true if it is only used in equality comparisons with With. static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { for (User *U : V->users()) { @@ -429,59 +412,68 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { return Dst; } -Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { +Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, + unsigned CharSize) { Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src)) + if (uint64_t Len = GetStringLength(Src, CharSize)) return ConstantInt::get(CI->getType(), Len - 1); // If s is a constant pointer pointing to a string literal, we can fold - // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // strlen(s + x) to strlen(s) - x, when x is known to be in the range // [0, strlen(s)] or the string has a single null terminator '\0' at the end. - // We only try to simplify strlen when the pointer s points to an array + // We only try to simplify strlen when the pointer s points to an array // of i8. Otherwise, we would need to scale the offset x before doing the - // subtraction. This will make the optimization more complex, and it's not - // very useful because calling strlen for a pointer of other types is + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is // very uncommon. if (GEPOperator *GEP = dyn_cast(Src)) { - if (!isGEPBasedOnPointerToString(GEP)) + if (!isGEPBasedOnPointerToString(GEP, CharSize)) return nullptr; - StringRef Str; - if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { - size_t NullTermIdx = Str.find('\0'); - - // If the string does not have '\0', leave it to strlen to compute - // its length. - if (NullTermIdx == StringRef::npos) - return nullptr; - + ConstantDataArraySlice Slice; + if (getConstantDataArrayInfo(GEP->getOperand(0), Slice, CharSize)) { + uint64_t NullTermIdx; + if (Slice.Array == nullptr) { + NullTermIdx = 0; + } else { + NullTermIdx = ~((uint64_t)0); + for (uint64_t I = 0, E = Slice.Length; I < E; ++I) { + if (Slice.Array->getElementAsInteger(I + Slice.Offset) == 0) { + NullTermIdx = I; + break; + } + } + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == ~((uint64_t)0)) + return nullptr; + } + Value *Offset = GEP->getOperand(2); - unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, - nullptr); - KnownZero.flipAllBits(); - size_t ArrSize = + KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); + Known.Zero.flipAllBits(); + uint64_t ArrSize = cast(GEP->getSourceElementType())->getNumElements(); - // KnownZero's bits are flipped, so zeros in KnownZero now represent - // bits known to be zeros in Offset, and ones in KnowZero represent + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent // bits unknown in Offset. Therefore, Offset is known to be in range - // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // [0, NullTermIdx] when the flipped KnownZero is non-negative and // unsigned-less-than NullTermIdx. // - // If Offset is not provably in the range [0, NullTermIdx], we can still - // optimize if we can prove that the program has undefined behavior when - // Offset is outside that range. That is the case when GEP->getOperand(0) + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) // is a pointer to an object whose memory extent is NullTermIdx+1. - if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) || (GEP->isInBounds() && isa(GEP->getOperand(0)) && - NullTermIdx == ArrSize - 1)) - return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + NullTermIdx == ArrSize - 1)) { + Offset = B.CreateSExtOrTrunc(Offset, CI->getType()); + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), Offset); + } } return nullptr; @@ -489,8 +481,8 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast(Src)) { - uint64_t LenTrue = GetStringLength(SI->getTrueValue()); - uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); + uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); if (LenTrue && LenFalse) { Function *Caller = CI->getParent()->getParent(); emitOptimizationRemark(CI->getContext(), "simplify-libcalls", *Caller, @@ -510,6 +502,17 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { return nullptr; } +Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) { + return optimizeStringLength(CI, B, 8); +} + +Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { + Module &M = *CI->getParent()->getParent()->getParent(); + unsigned WCharSize = TLI->getWCharSize(M) * 8; + + return optimizeStringLength(CI, B, WCharSize); +} + Value *LibCallSimplifier::optimizeStrPBrk(CallInst *CI, IRBuilder<> &B) { StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -542,7 +545,7 @@ Value *LibCallSimplifier::optimizeStrTo(CallInst *CI, IRBuilder<> &B) { if (isa(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attribute::NoCapture); + CI->addParamAttr(0, Attribute::NoCapture); } return nullptr; @@ -735,8 +738,8 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); if (!LenC) return nullptr; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 return Constant::getNullValue(CI->getType()); @@ -846,6 +849,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, // Is the inner call really malloc()? Function *InnerCallee = Malloc->getCalledFunction(); + if (!InnerCallee) + return nullptr; + LibFunc Func; if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) || Func != LibFunc_malloc) @@ -930,6 +936,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, if (V == nullptr) return nullptr; + // If call isn't an intrinsic, check that it isn't within a function with the + // same name as the float version of this call. + // + // e.g. inline float expf(float val) { return (float) exp((double) val); } + // + // A similar such definition exists in the MinGW-w64 math.h header file which + // when compiled with -O2 -ffast-math causes the generation of infinite loops + // where expf is called. + if (!Callee->isIntrinsic()) { + const Function *F = CI->getFunction(); + StringRef FName = F->getName(); + StringRef CalleeName = Callee->getName(); + if ((FName.size() == (CalleeName.size() + 1)) && + (FName.back() == 'f') && + FName.startswith(CalleeName)) + return nullptr; + } + // Propagate fast-math flags from the existing call to the new call. IRBuilder<>::FastMathFlagGuard Guard(B); B.setFastMathFlags(CI->getFastMathFlags()); @@ -1434,11 +1458,11 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg, // x86_64 can't use {float, float} since that would be returned in both // xmm0 and xmm1, which isn't what a real struct would do. ResTy = T.getArch() == Triple::x86_64 - ? static_cast(VectorType::get(ArgTy, 2)) - : static_cast(StructType::get(ArgTy, ArgTy, nullptr)); + ? static_cast(VectorType::get(ArgTy, 2)) + : static_cast(StructType::get(ArgTy, ArgTy)); } else { Name = "__sincospi_stret"; - ResTy = StructType::get(ArgTy, ArgTy, nullptr); + ResTy = StructType::get(ArgTy, ArgTy); } Module *M = OrigCallee->getParent(); @@ -1632,7 +1656,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B, } static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) { - if (!ColdErrorCalls || !Callee || !Callee->isDeclaration()) + if (!Callee || !Callee->isDeclaration()) return false; if (StreamArg < 0) @@ -2010,6 +2034,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI, return optimizeMemMove(CI, Builder); case LibFunc_memset: return optimizeMemSet(CI, Builder); + case LibFunc_wcslen: + return optimizeWcslen(CI, Builder); default: break; } diff --git a/lib/Transforms/Utils/StripGCRelocates.cpp b/lib/Transforms/Utils/StripGCRelocates.cpp index f3d3fadb51e93a9c197316e10a310bf6c5d71c93..49dc15cf5e7c0bee4e3e278eb96801c5b81ec831 100644 --- a/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/lib/Transforms/Utils/StripGCRelocates.cpp @@ -20,8 +20,8 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; diff --git a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index 66dbf335cb95396ca9d04dd703013f7e707ec5d0..cd0378e0140cd83e2f66f5891739a20573cf3596 100644 --- a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" using namespace llvm; namespace { diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 6d136636ce709c1e0565dc956eaa866b27db88c2..20107553665f62bf84b1ec18cabaa6c400e5962d 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -59,9 +59,9 @@ #define DEBUG_TYPE "symbol-rewriter" #include "llvm/Transforms/Utils/SymbolRewriter.h" -#include "llvm/Pass.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index 7106483c3bd2a4037c2e458a34d07fad88bedc57..f6c7d1c4989eb8d686a85f3900690ef3a30f0aa2 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" #include "llvm-c/Initialization.h" +#include "llvm/InitializePasses.h" #include "llvm/PassRegistry.h" using namespace llvm; diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index 4aeea02b1b1bfce6d6971cdd69f00332e3f2b0eb..60d9ede2c4871fdc71b9fe7268c840e9df7fc18b 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -24,6 +24,11 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) return false; + // Don't coerce non-integral pointers to integers or vice versa. + if (DL.isNonIntegralPointerType(StoredVal->getType()) != + DL.isNonIntegralPointerType(LoadTy)) + return false; + return true; } @@ -298,6 +303,15 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, const DataLayout &DL) { LLVMContext &Ctx = SrcVal->getType()->getContext(); + // If two pointers are in the same address space, they have the same size, + // so we don't need to do any truncation, etc. This avoids introducing + // ptrtoint instructions for pointers that may be non-integral. + if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() && + cast(SrcVal->getType())->getAddressSpace() == + cast(LoadTy)->getAddressSpace()) { + return SrcVal; + } + uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; // Compute which bits of the stored value are being used by the load. Convert diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index f77c10b6dd4730f253f08289bdf4bc1183cd0d1d..930972924c3c04d36d622a205b5c6dff4aba14d6 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -121,6 +121,8 @@ public: void addFlags(RemapFlags Flags); + void remapGlobalObjectMetadata(GlobalObject &GO); + Value *mapValue(const Value *V); void remapInstruction(Instruction *I); void remapFunction(Function &F); @@ -802,6 +804,7 @@ void Mapper::flush() { switch (E.Kind) { case WorklistEntry::MapGlobalInit: E.Data.GVInit.GV->setInitializer(mapConstant(E.Data.GVInit.Init)); + remapGlobalObjectMetadata(*E.Data.GVInit.GV); break; case WorklistEntry::MapAppendingVar: { unsigned PrefixSize = AppendingInits.size() - E.AppendingGVNumNewMembers; @@ -892,6 +895,14 @@ void Mapper::remapInstruction(Instruction *I) { I->mutateType(TypeMapper->remapType(I->getType())); } +void Mapper::remapGlobalObjectMetadata(GlobalObject &GO) { + SmallVector, 8> MDs; + GO.getAllMetadata(MDs); + GO.clearMetadata(); + for (const auto &I : MDs) + GO.addMetadata(I.first, *cast(mapMetadata(I.second))); +} + void Mapper::remapFunction(Function &F) { // Remap the operands. for (Use &Op : F.operands()) @@ -899,11 +910,7 @@ void Mapper::remapFunction(Function &F) { Op = mapValue(Op); // Remap the metadata attachments. - SmallVector, 8> MDs; - F.getAllMetadata(MDs); - F.clearMetadata(); - for (const auto &I : MDs) - F.addMetadata(I.first, *cast(mapMetadata(I.second))); + remapGlobalObjectMetadata(F); // Remap the argument types. if (TypeMapper) @@ -942,11 +949,10 @@ void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix, Constant *NewV; if (IsOldCtorDtor) { auto *S = cast(V); - auto *E1 = mapValue(S->getOperand(0)); - auto *E2 = mapValue(S->getOperand(1)); - Value *Null = Constant::getNullValue(VoidPtrTy); - NewV = - ConstantStruct::get(cast(EltTy), E1, E2, Null, nullptr); + auto *E1 = cast(mapValue(S->getOperand(0))); + auto *E2 = cast(mapValue(S->getOperand(1))); + Constant *Null = Constant::getNullValue(VoidPtrTy); + NewV = ConstantStruct::get(cast(EltTy), E1, E2, Null); } else { NewV = cast_or_null(mapValue(V)); } diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index c83b3f7b225bc243b3ebb09517d1475d27fe66c3..78453aaa16ceb8bb967e1606ef0783b00d7decce 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #define BBV_NAME "bb-vectorize" -#include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -50,6 +49,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Vectorize.h" #include using namespace llvm; diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 4409d7a404f8b8ddc1b4593c6bffa52ff8854a6f..9cf66382b5817a3e209a4373c78dbf13a66d1ff0 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" @@ -65,7 +66,9 @@ public: bool run(); private: - Value *getPointerOperand(Value *I); + Value *getPointerOperand(Value *I) const; + + GetElementPtrInst *getSourceGEP(Value *Src) const; unsigned getPointerAddressSpace(Value *I); @@ -215,7 +218,7 @@ bool Vectorizer::run() { return Changed; } -Value *Vectorizer::getPointerOperand(Value *I) { +Value *Vectorizer::getPointerOperand(Value *I) const { if (LoadInst *LI = dyn_cast(I)) return LI->getPointerOperand(); if (StoreInst *SI = dyn_cast(I)) @@ -231,6 +234,19 @@ unsigned Vectorizer::getPointerAddressSpace(Value *I) { return -1; } +GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const { + // First strip pointer bitcasts. Make sure pointee size is the same with + // and without casts. + // TODO: a stride set by the add instruction below can match the difference + // in pointee type size here. Currently it will not be vectorized. + Value *SrcPtr = getPointerOperand(Src); + Value *SrcBase = SrcPtr->stripPointerCasts(); + if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) == + DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType())) + SrcPtr = SrcBase; + return dyn_cast(SrcPtr); +} + // FIXME: Merge with llvm::isConsecutiveAccess bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { Value *PtrA = getPointerOperand(A); @@ -283,8 +299,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // Look through GEPs after checking they're the same except for the last // index. - GetElementPtrInst *GEPA = dyn_cast(getPointerOperand(A)); - GetElementPtrInst *GEPB = dyn_cast(getPointerOperand(B)); + GetElementPtrInst *GEPA = getSourceGEP(A); + GetElementPtrInst *GEPB = getSourceGEP(B); if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands()) return false; unsigned FinalIndex = GEPA->getNumOperands() - 1; @@ -328,11 +344,9 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) { // If any bits are known to be zero other than the sign bit in OpA, we can // add 1 to it while guaranteeing no overflow of any sort. if (!Safe) { - APInt KnownZero(BitWidth, 0); - APInt KnownOne(BitWidth, 0); - computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT); - KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1); - if (KnownZero != 0) + KnownBits Known(BitWidth); + computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT); + if (Known.countMaxTrailingOnes() < (BitWidth - 1)) Safe = true; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index b0ced809d94eb55d0bec394b7f86fd5b5bf87e47..1abdb2484850656486147667972988380e249268 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -391,13 +391,14 @@ public: TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM), AddedSafetyChecks(false) {} - // Perform the actual loop widening (vectorization). - void vectorize() { - // Create a new empty loop. Unlink the old loop and connect the new one. - createEmptyLoop(); - // Widen each instruction in the old loop to a new one in the new loop. - vectorizeLoop(); - } + /// Create a new empty loop. Unlink the old loop and connect the new one. + void createVectorizedLoopSkeleton(); + + /// Vectorize a single instruction within the innermost loop. + void vectorizeInstruction(Instruction &I); + + /// Fix the vectorized code, taking care of header phi's, live-outs, and more. + void fixVectorizedLoop(); // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } @@ -422,10 +423,8 @@ protected: // When we if-convert we need to create edge masks. We have to cache values // so that we don't end up with exponential recursion/IR. typedef DenseMap, VectorParts> - EdgeMaskCache; - - /// Create an empty loop, based on the loop ranges of the old loop. - void createEmptyLoop(); + EdgeMaskCacheTy; + typedef DenseMap BlockMaskCacheTy; /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, @@ -435,8 +434,6 @@ protected: /// Create a new induction variable inside L. PHINode *createInductionVariable(Loop *L, Value *Start, Value *End, Value *Step, Instruction *DL); - /// Copy and widen the instructions from the old loop. - virtual void vectorizeLoop(); /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(); @@ -449,10 +446,10 @@ protected: /// vectorizing this phi node. void fixReduction(PHINode *Phi); - /// \brief The Loop exit block may have single value PHI nodes where the - /// incoming value is 'Undef'. While vectorizing we only handled real values - /// that were defined inside the loop. Here we fix the 'undef case'. - /// See PR14725. + /// \brief The Loop exit block may have single value PHI nodes with some + /// incoming value. While vectorizing we only handled real values + /// that were defined inside the loop and we should have one value for + /// each predecessor of its parent basic block. See PR14725. void fixLCSSAPHIs(); /// Iteratively sink the scalarized operands of a predicated instruction into @@ -463,10 +460,6 @@ protected: /// respective conditions. void predicateInstructions(); - /// Collect the instructions from the original loop that would be trivially - /// dead in the vectorized loop if generated. - void collectTriviallyDeadInstructions(); - /// Shrinks vector element sizes to the smallest bitwidth they can be legally /// represented as. void truncateToMinimalBitwidths(); @@ -479,9 +472,6 @@ protected: /// and DST. VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst); - /// A helper function to vectorize a single BB within the innermost loop. - void vectorizeBlockInLoop(BasicBlock *BB); - /// Vectorize a single PHINode in a block. This method handles the induction /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. @@ -495,8 +485,7 @@ protected: /// of scalars. If \p IfPredicateInstr is true we need to 'hide' each /// scalarized instruction behind an if block predicated on the control /// dependence of the instruction. - virtual void scalarizeInstruction(Instruction *Instr, - bool IfPredicateInstr = false); + void scalarizeInstruction(Instruction *Instr, bool IfPredicateInstr = false); /// Vectorize Load and Store instructions, virtual void vectorizeMemoryInstruction(Instruction *Instr); @@ -783,7 +772,8 @@ protected: /// Store instructions that should be predicated, as a pair /// SmallVector, 4> PredicatedInstructions; - EdgeMaskCache MaskCache; + EdgeMaskCacheTy EdgeMaskCache; + BlockMaskCacheTy BlockMaskCache; /// Trip count of the original loop. Value *TripCount; /// Trip count of the widened loop (TripCount - TripCount % (VF*UF)) @@ -798,14 +788,6 @@ protected: // Record whether runtime checks are added. bool AddedSafetyChecks; - // Holds instructions from the original loop whose counterparts in the - // vectorized loop would be trivially dead if generated. For example, - // original induction update instructions can become dead because we - // separately emit induction "steps" when generating code for the new loop. - // Similarly, we create a new latch condition when setting up the structure - // of the new loop, so the old one can become dead. - SmallPtrSet DeadInstructions; - // Holds the end values for each induction variable. We save the end values // so we can later fix-up the external users of the induction variables. DenseMap IVEndValues; @@ -1704,6 +1686,9 @@ public: /// access that can be widened. bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1); + // Returns true if the NoNaN attribute is set on the function. + bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } + private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -2106,6 +2091,10 @@ private: /// The data is collected per VF. DenseMap> Scalars; + /// Holds the instructions (address computations) that are forced to be + /// scalarized. + DenseMap> ForcedScalars; + /// Returns the expected difference in cost from scalarizing the expression /// feeding a predicated instruction \p PredInst. The instructions to /// scalarize and their scalar costs are collected in \p ScalarCosts. A @@ -2189,7 +2178,10 @@ public: /// passed Legality checks. class LoopVectorizationPlanner { public: - LoopVectorizationPlanner(LoopVectorizationCostModel &CM) : CM(CM) {} + LoopVectorizationPlanner(Loop *OrigLoop, LoopInfo *LI, + LoopVectorizationLegality *Legal, + LoopVectorizationCostModel &CM) + : OrigLoop(OrigLoop), LI(LI), Legal(Legal), CM(CM) {} ~LoopVectorizationPlanner() {} @@ -2197,7 +2189,25 @@ public: LoopVectorizationCostModel::VectorizationFactor plan(bool OptForSize, unsigned UserVF); + /// Generate the IR code for the vectorized loop. + void executePlan(InnerLoopVectorizer &ILV); + +protected: + /// Collect the instructions from the original loop that would be trivially + /// dead in the vectorized loop if generated. + void collectTriviallyDeadInstructions( + SmallPtrSetImpl &DeadInstructions); + private: + /// The loop that we evaluate. + Loop *OrigLoop; + + /// Loop Info analysis. + LoopInfo *LI; + + /// The legality analysis. + LoopVectorizationLegality *Legal; + /// The profitablity analysis. LoopVectorizationCostModel &CM; }; @@ -3040,7 +3050,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (CreateGatherScatter) { Value *MaskPart = Legal->isMaskRequired(LI) ? Mask[Part] : nullptr; NewLI = Builder.CreateMaskedGather(VectorGep[Part], Alignment, MaskPart, - 0, "wide.masked.gather"); + nullptr, "wide.masked.gather"); Entry[Part] = NewLI; } else { // Calculate the pointer for the specific unroll-part. @@ -3365,7 +3375,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) { LVer->prepareNoAliasMetadata(); } -void InnerLoopVectorizer::createEmptyLoop() { +void InnerLoopVectorizer::createVectorizedLoopSkeleton() { /* In this function we generate a new loop. The new loop will contain the vectorized instructions while the old loop will continue to run the @@ -3590,8 +3600,12 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, IRBuilder<> B(MiddleBlock->getTerminator()); Value *CountMinusOne = B.CreateSub( CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1)); - Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(), - "cast.cmo"); + Value *CMO = + !II.getStep()->getType()->isIntegerTy() + ? B.CreateCast(Instruction::SIToFP, CountMinusOne, + II.getStep()->getType()) + : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType()); + CMO->setName("cast.cmo"); Value *Escape = II.transform(B, CMO, PSE.getSE(), DL); Escape->setName("ind.escape"); MissingVals[UI] = Escape; @@ -3800,7 +3814,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { if (auto *BO = dyn_cast(I)) { NewI = B.CreateBinOp(BO->getOpcode(), ShrinkOperand(BO->getOperand(0)), ShrinkOperand(BO->getOperand(1))); - cast(NewI)->copyIRFlags(I); + + // Any wrapping introduced by shrinking this operation shouldn't be + // considered undefined behavior. So, we can't unconditionally copy + // arithmetic wrapping flags to NewI. + cast(NewI)->copyIRFlags(I, /*IncludeWrapFlags=*/false); } else if (auto *CI = dyn_cast(I)) { NewI = B.CreateICmp(CI->getPredicate(), ShrinkOperand(CI->getOperand(0)), @@ -3883,29 +3901,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { } } -void InnerLoopVectorizer::vectorizeLoop() { - //===------------------------------------------------===// - // - // Notice: any optimization or new instruction that go - // into the code below should be also be implemented in - // the cost-model. - // - //===------------------------------------------------===// - - // Collect instructions from the original loop that will become trivially - // dead in the vectorized loop. We don't need to vectorize these - // instructions. - collectTriviallyDeadInstructions(); - - // Scan the loop in a topological order to ensure that defs are vectorized - // before users. - LoopBlocksDFS DFS(OrigLoop); - DFS.perform(LI); - - // Vectorize all of the blocks in the original loop. - for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) - vectorizeBlockInLoop(BB); - +void InnerLoopVectorizer::fixVectorizedLoop() { // Insert truncates and extends for any truncated instructions as hints to // InstCombine. if (VF > 1) @@ -4042,8 +4038,11 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { // Set the insertion point after the previous value if it is an instruction. // Note that the previous value may have been constant-folded so it is not - // guaranteed to be an instruction in the vector loop. - if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1])) + // guaranteed to be an instruction in the vector loop. Also, if the previous + // value is a phi node, we should insert after all the phi nodes to avoid + // breaking basic block verification. + if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1]) || + isa(PreviousParts[UF - 1])) Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt()); else Builder.SetInsertPoint( @@ -4077,24 +4076,34 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch()); // Extract the last vector element in the middle block. This will be the - // initial value for the recurrence when jumping to the scalar loop. - // FIXME: Note that the last vector element need not always be the correct one: - // consider a loop where we have phi uses outside the loop - we need the - // second last iteration value and not the last one). For now, we avoid - // considering such cases as firstOrderRecurrences (see - // isFirstOrderRecurrence). - auto *Extract = Incoming; + // initial value for the recurrence when jumping to the scalar loop. + auto *ExtractForScalar = Incoming; if (VF > 1) { Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); - Extract = Builder.CreateExtractElement(Extract, Builder.getInt32(VF - 1), - "vector.recur.extract"); - } + ExtractForScalar = Builder.CreateExtractElement( + ExtractForScalar, Builder.getInt32(VF - 1), "vector.recur.extract"); + } + // Extract the second last element in the middle block if the + // Phi is used outside the loop. We need to extract the phi itself + // and not the last element (the phi update in the current iteration). This + // will be the value when jumping to the exit block from the LoopMiddleBlock, + // when the scalar loop is not run at all. + Value *ExtractForPhiUsedOutsideLoop = nullptr; + if (VF > 1) + ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement( + Incoming, Builder.getInt32(VF - 2), "vector.recur.extract.for.phi"); + // When loop is unrolled without vectorizing, initialize + // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of + // `Incoming`. This is analogous to the vectorized case above: extracting the + // second last element when VF > 1. + else if (UF > 1) + ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2]; // Fix the initial value of the original recurrence in the scalar loop. Builder.SetInsertPoint(&*LoopScalarPreHeader->begin()); auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init"); for (auto *BB : predecessors(LoopScalarPreHeader)) { - auto *Incoming = BB == LoopMiddleBlock ? Extract : ScalarInit; + auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit; Start->addIncoming(Incoming, BB); } @@ -4111,7 +4120,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { if (!LCSSAPhi) break; if (LCSSAPhi->getIncomingValue(0) == Phi) { - LCSSAPhi->addIncoming(Extract, LoopMiddleBlock); + LCSSAPhi->addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock); break; } } @@ -4189,7 +4198,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { cast(VecRdxPhi[part]) ->addIncoming(StartVal, LoopVectorPreHeader); cast(VecRdxPhi[part]) - ->addIncoming(Val[part], LoopVectorBody); + ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)->getLoopLatch()); } // Before each round, move the insertion point right between @@ -4241,39 +4250,9 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { } if (VF > 1) { - // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles - // and vector ops, reducing the set of values being computed by half each - // round. - assert(isPowerOf2_32(VF) && - "Reduction emission only supported for pow2 vectors!"); - Value *TmpVec = ReducedPartRdx; - SmallVector ShuffleMask(VF, nullptr); - for (unsigned i = VF; i != 1; i >>= 1) { - // Move the upper half of the vector to the lower half. - for (unsigned j = 0; j != i / 2; ++j) - ShuffleMask[j] = Builder.getInt32(i / 2 + j); - - // Fill the rest of the mask with undef. - std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), - UndefValue::get(Builder.getInt32Ty())); - - Value *Shuf = Builder.CreateShuffleVector( - TmpVec, UndefValue::get(TmpVec->getType()), - ConstantVector::get(ShuffleMask), "rdx.shuf"); - - if (Op != Instruction::ICmp && Op != Instruction::FCmp) - // Floating point operations had to be 'fast' to enable the reduction. - TmpVec = addFastMathFlag(Builder.CreateBinOp( - (Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx")); - else - TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, - TmpVec, Shuf); - } - - // The result is in the first element of the vector. + bool NoNaN = Legal->hasFunNoNaNAttr(); ReducedPartRdx = - Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); - + createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, NoNaN); // If the reduction can be performed in a smaller type, we need to extend // the reduction to the wider type before we branch to the original loop. if (Phi->getType() != RdxDesc.getRecurrenceType()) @@ -4328,32 +4307,11 @@ void InnerLoopVectorizer::fixLCSSAPHIs() { auto *LCSSAPhi = dyn_cast(&LEI); if (!LCSSAPhi) break; - if (LCSSAPhi->getNumIncomingValues() == 1) - LCSSAPhi->addIncoming(UndefValue::get(LCSSAPhi->getType()), - LoopMiddleBlock); - } -} - -void InnerLoopVectorizer::collectTriviallyDeadInstructions() { - BasicBlock *Latch = OrigLoop->getLoopLatch(); - - // We create new control-flow for the vectorized loop, so the original - // condition will be dead after vectorization if it's only used by the - // branch. - auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0)); - if (Cmp && Cmp->hasOneUse()) - DeadInstructions.insert(Cmp); - - // We create new "steps" for induction variable updates to which the original - // induction variables map. An original update instruction will be dead if - // all its users except the induction variable are dead. - for (auto &Induction : *Legal->getInductionVars()) { - PHINode *Ind = Induction.first; - auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); - if (all_of(IndUpdate->users(), [&](User *U) -> bool { - return U == Ind || DeadInstructions.count(cast(U)); - })) - DeadInstructions.insert(IndUpdate); + if (LCSSAPhi->getNumIncomingValues() == 1) { + assert(OrigLoop->isLoopInvariant(LCSSAPhi->getIncomingValue(0)) && + "Incoming value isn't loop invariant"); + LCSSAPhi->addIncoming(LCSSAPhi->getIncomingValue(0), LoopMiddleBlock); + } } } @@ -4502,14 +4460,15 @@ void InnerLoopVectorizer::predicateInstructions() { for (auto KV : PredicatedInstructions) { BasicBlock::iterator I(KV.first); BasicBlock *Head = I->getParent(); - auto *BB = SplitBlock(Head, &*std::next(I), DT, LI); auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false, /*BranchWeights=*/nullptr, DT, LI); I->moveBefore(T); sinkScalarOperands(&*I); - I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if"); - BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue"); + BasicBlock *PredicatedBlock = I->getParent(); + Twine BBNamePrefix = Twine("pred.") + I->getOpcodeName(); + PredicatedBlock->setName(BBNamePrefix + ".if"); + PredicatedBlock->getSingleSuccessor()->setName(BBNamePrefix + ".continue"); // If the instruction is non-void create a Phi node at reconvergence point. if (!I->getType()->isVoidTy()) { @@ -4548,8 +4507,8 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { // Look for cached value. std::pair Edge(Src, Dst); - EdgeMaskCache::iterator ECEntryIt = MaskCache.find(Edge); - if (ECEntryIt != MaskCache.end()) + EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge); + if (ECEntryIt != EdgeMaskCache.end()) return ECEntryIt->second; VectorParts SrcMask = createBlockInMask(Src); @@ -4568,11 +4527,11 @@ InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { for (unsigned part = 0; part < UF; ++part) EdgeMask[part] = Builder.CreateAnd(EdgeMask[part], SrcMask[part]); - MaskCache[Edge] = EdgeMask; + EdgeMaskCache[Edge] = EdgeMask; return EdgeMask; } - MaskCache[Edge] = SrcMask; + EdgeMaskCache[Edge] = SrcMask; return SrcMask; } @@ -4580,10 +4539,17 @@ InnerLoopVectorizer::VectorParts InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); + // Look for cached value. + BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB); + if (BCEntryIt != BlockMaskCache.end()) + return BCEntryIt->second; + // Loop incoming mask is all-one. if (OrigLoop->getHeader() == BB) { Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1); - return getVectorValue(C); + const VectorParts &BlockMask = getVectorValue(C); + BlockMaskCache[BB] = BlockMask; + return BlockMask; } // This is the block mask. We OR all incoming edges, and with zero. @@ -4597,6 +4563,7 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { BlockMask[part] = Builder.CreateOr(BlockMask[part], EM[part]); } + BlockMaskCache[BB] = BlockMask; return BlockMask; } @@ -4720,333 +4687,324 @@ static bool mayDivideByZero(Instruction &I) { return !CInt || CInt->isZero(); } -void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB) { - // For each instruction in the old loop. - for (Instruction &I : *BB) { - - // If the instruction will become trivially dead when vectorized, we don't - // need to generate it. - if (DeadInstructions.count(&I)) - continue; +void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) { + // Scalarize instructions that should remain scalar after vectorization. + if (VF > 1 && + !(isa(&I) || isa(&I) || isa(&I)) && + shouldScalarizeInstruction(&I)) { + scalarizeInstruction(&I, Legal->isScalarWithPredication(&I)); + return; + } - // Scalarize instructions that should remain scalar after vectorization. - if (VF > 1 && - !(isa(&I) || isa(&I) || - isa(&I)) && - shouldScalarizeInstruction(&I)) { - scalarizeInstruction(&I, Legal->isScalarWithPredication(&I)); - continue; - } + switch (I.getOpcode()) { + case Instruction::Br: + // Nothing to do for PHIs and BR, since we already took care of the + // loop control flow instructions. + break; + case Instruction::PHI: { + // Vectorize PHINodes. + widenPHIInstruction(&I, UF, VF); + break; + } // End of PHI. + case Instruction::GetElementPtr: { + // Construct a vector GEP by widening the operands of the scalar GEP as + // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP + // results in a vector of pointers when at least one operand of the GEP + // is vector-typed. Thus, to keep the representation compact, we only use + // vector-typed operands for loop-varying values. + auto *GEP = cast(&I); + VectorParts Entry(UF); - switch (I.getOpcode()) { - case Instruction::Br: - // Nothing to do for PHIs and BR, since we already took care of the - // loop control flow instructions. - continue; - case Instruction::PHI: { - // Vectorize PHINodes. - widenPHIInstruction(&I, UF, VF); - continue; - } // End of PHI. - case Instruction::GetElementPtr: { - // Construct a vector GEP by widening the operands of the scalar GEP as - // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP - // results in a vector of pointers when at least one operand of the GEP - // is vector-typed. Thus, to keep the representation compact, we only use - // vector-typed operands for loop-varying values. - auto *GEP = cast(&I); - VectorParts Entry(UF); + if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) { + // If we are vectorizing, but the GEP has only loop-invariant operands, + // the GEP we build (by only using vector-typed operands for + // loop-varying values) would be a scalar pointer. Thus, to ensure we + // produce a vector of pointers, we need to either arbitrarily pick an + // operand to broadcast, or broadcast a clone of the original GEP. + // Here, we broadcast a clone of the original. + // + // TODO: If at some point we decide to scalarize instructions having + // loop-invariant operands, this special case will no longer be + // required. We would add the scalarization decision to + // collectLoopScalars() and teach getVectorValue() to broadcast + // the lane-zero scalar value. + auto *Clone = Builder.Insert(GEP->clone()); + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = Builder.CreateVectorSplat(VF, Clone); + } else { + // If the GEP has at least one loop-varying operand, we are sure to + // produce a vector of pointers. But if we are only unrolling, we want + // to produce a scalar GEP for each unroll part. Thus, the GEP we + // produce with the code below will be scalar (if VF == 1) or vector + // (otherwise). Note that for the unroll-only case, we still maintain + // values in the vector mapping with initVector, as we do for other + // instructions. + for (unsigned Part = 0; Part < UF; ++Part) { - if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) { - // If we are vectorizing, but the GEP has only loop-invariant operands, - // the GEP we build (by only using vector-typed operands for - // loop-varying values) would be a scalar pointer. Thus, to ensure we - // produce a vector of pointers, we need to either arbitrarily pick an - // operand to broadcast, or broadcast a clone of the original GEP. - // Here, we broadcast a clone of the original. - // - // TODO: If at some point we decide to scalarize instructions having - // loop-invariant operands, this special case will no longer be - // required. We would add the scalarization decision to - // collectLoopScalars() and teach getVectorValue() to broadcast - // the lane-zero scalar value. - auto *Clone = Builder.Insert(GEP->clone()); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = Builder.CreateVectorSplat(VF, Clone); - } else { - // If the GEP has at least one loop-varying operand, we are sure to - // produce a vector of pointers. But if we are only unrolling, we want - // to produce a scalar GEP for each unroll part. Thus, the GEP we - // produce with the code below will be scalar (if VF == 1) or vector - // (otherwise). Note that for the unroll-only case, we still maintain - // values in the vector mapping with initVector, as we do for other - // instructions. - for (unsigned Part = 0; Part < UF; ++Part) { - - // The pointer operand of the new GEP. If it's loop-invariant, we - // won't broadcast it. - auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand()) - ? GEP->getPointerOperand() - : getVectorValue(GEP->getPointerOperand())[Part]; - - // Collect all the indices for the new GEP. If any index is - // loop-invariant, we won't broadcast it. - SmallVector Indices; - for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) { - if (OrigLoop->isLoopInvariant(U.get())) - Indices.push_back(U.get()); - else - Indices.push_back(getVectorValue(U.get())[Part]); - } - - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, - // but it should be a vector, otherwise. - auto *NewGEP = GEP->isInBounds() - ? Builder.CreateInBoundsGEP(Ptr, Indices) - : Builder.CreateGEP(Ptr, Indices); - assert((VF == 1 || NewGEP->getType()->isVectorTy()) && - "NewGEP is not a pointer vector"); - Entry[Part] = NewGEP; + // The pointer operand of the new GEP. If it's loop-invariant, we + // won't broadcast it. + auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand()) + ? GEP->getPointerOperand() + : getVectorValue(GEP->getPointerOperand())[Part]; + + // Collect all the indices for the new GEP. If any index is + // loop-invariant, we won't broadcast it. + SmallVector Indices; + for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) { + if (OrigLoop->isLoopInvariant(U.get())) + Indices.push_back(U.get()); + else + Indices.push_back(getVectorValue(U.get())[Part]); } + + // Create the new GEP. Note that this GEP may be a scalar if VF == 1, + // but it should be a vector, otherwise. + auto *NewGEP = GEP->isInBounds() + ? Builder.CreateInBoundsGEP(Ptr, Indices) + : Builder.CreateGEP(Ptr, Indices); + assert((VF == 1 || NewGEP->getType()->isVectorTy()) && + "NewGEP is not a pointer vector"); + Entry[Part] = NewGEP; } + } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, GEP); + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, GEP); + break; + } + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::URem: + // Scalarize with predication if this instruction may divide by zero and + // block execution is conditional, otherwise fallthrough. + if (Legal->isScalarWithPredication(&I)) { + scalarizeInstruction(&I, true); break; } - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::URem: - // Scalarize with predication if this instruction may divide by zero and - // block execution is conditional, otherwise fallthrough. - if (Legal->isScalarWithPredication(&I)) { - scalarizeInstruction(&I, true); - continue; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: { - // Just widen binops. - auto *BinOp = cast(&I); - setDebugLocFromInst(Builder, BinOp); - const VectorParts &A = getVectorValue(BinOp->getOperand(0)); - const VectorParts &B = getVectorValue(BinOp->getOperand(1)); - - // Use this vector value for all users of the original instruction. - VectorParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]); + LLVM_FALLTHROUGH; + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + // Just widen binops. + auto *BinOp = cast(&I); + setDebugLocFromInst(Builder, BinOp); + const VectorParts &A = getVectorValue(BinOp->getOperand(0)); + const VectorParts &B = getVectorValue(BinOp->getOperand(1)); - if (BinaryOperator *VecOp = dyn_cast(V)) - VecOp->copyIRFlags(BinOp); + // Use this vector value for all users of the original instruction. + VectorParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]); - Entry[Part] = V; - } + if (BinaryOperator *VecOp = dyn_cast(V)) + VecOp->copyIRFlags(BinOp); - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, BinOp); - break; + Entry[Part] = V; } - case Instruction::Select: { - // Widen selects. - // If the selector is loop invariant we can create a select - // instruction with a scalar condition. Otherwise, use vector-select. - auto *SE = PSE.getSE(); - bool InvariantCond = - SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop); - setDebugLocFromInst(Builder, &I); - - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - const VectorParts &Cond = getVectorValue(I.getOperand(0)); - const VectorParts &Op0 = getVectorValue(I.getOperand(1)); - const VectorParts &Op1 = getVectorValue(I.getOperand(2)); - - auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0); - VectorParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) { - Entry[Part] = Builder.CreateSelect( - InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]); - } + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, BinOp); + break; + } + case Instruction::Select: { + // Widen selects. + // If the selector is loop invariant we can create a select + // instruction with a scalar condition. Otherwise, use vector-select. + auto *SE = PSE.getSE(); + bool InvariantCond = + SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop); + setDebugLocFromInst(Builder, &I); + + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + const VectorParts &Cond = getVectorValue(I.getOperand(0)); + const VectorParts &Op0 = getVectorValue(I.getOperand(1)); + const VectorParts &Op1 = getVectorValue(I.getOperand(2)); + + auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0); - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); - break; + VectorParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) { + Entry[Part] = Builder.CreateSelect( + InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]); } - case Instruction::ICmp: - case Instruction::FCmp: { - // Widen compares. Generate vector compares. - bool FCmp = (I.getOpcode() == Instruction::FCmp); - auto *Cmp = dyn_cast(&I); - setDebugLocFromInst(Builder, Cmp); - const VectorParts &A = getVectorValue(Cmp->getOperand(0)); - const VectorParts &B = getVectorValue(Cmp->getOperand(1)); - VectorParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) { - Value *C = nullptr; - if (FCmp) { - C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]); - cast(C)->copyFastMathFlags(Cmp); - } else { - C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]); - } - Entry[Part] = C; + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); + break; + } + + case Instruction::ICmp: + case Instruction::FCmp: { + // Widen compares. Generate vector compares. + bool FCmp = (I.getOpcode() == Instruction::FCmp); + auto *Cmp = dyn_cast(&I); + setDebugLocFromInst(Builder, Cmp); + const VectorParts &A = getVectorValue(Cmp->getOperand(0)); + const VectorParts &B = getVectorValue(Cmp->getOperand(1)); + VectorParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) { + Value *C = nullptr; + if (FCmp) { + C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]); + cast(C)->copyFastMathFlags(Cmp); + } else { + C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]); } + Entry[Part] = C; + } - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); + break; + } + + case Instruction::Store: + case Instruction::Load: + vectorizeMemoryInstruction(&I); + break; + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: { + auto *CI = dyn_cast(&I); + setDebugLocFromInst(Builder, CI); + + // Optimize the special case where the source is a constant integer + // induction variable. Notice that we can only optimize the 'trunc' case + // because (a) FP conversions lose precision, (b) sext/zext may wrap, and + // (c) other casts depend on pointer size. + if (Cost->isOptimizableIVTruncate(CI, VF)) { + widenIntOrFpInduction(cast(CI->getOperand(0)), + cast(CI)); break; } - case Instruction::Store: - case Instruction::Load: - vectorizeMemoryInstruction(&I); + /// Vectorize casts. + Type *DestTy = + (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); + + const VectorParts &A = getVectorValue(CI->getOperand(0)); + VectorParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) + Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy); + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); + break; + } + + case Instruction::Call: { + // Ignore dbg intrinsics. + if (isa(I)) break; - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - auto *CI = dyn_cast(&I); - setDebugLocFromInst(Builder, CI); - - // Optimize the special case where the source is a constant integer - // induction variable. Notice that we can only optimize the 'trunc' case - // because (a) FP conversions lose precision, (b) sext/zext may wrap, and - // (c) other casts depend on pointer size. - if (Cost->isOptimizableIVTruncate(CI, VF)) { - widenIntOrFpInduction(cast(CI->getOperand(0)), - cast(CI)); - break; - } + setDebugLocFromInst(Builder, &I); - /// Vectorize casts. - Type *DestTy = - (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF); + Module *M = I.getParent()->getParent()->getParent(); + auto *CI = cast(&I); - const VectorParts &A = getVectorValue(CI->getOperand(0)); - VectorParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) - Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy); - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); + StringRef FnName = CI->getCalledFunction()->getName(); + Function *F = CI->getCalledFunction(); + Type *RetTy = ToVectorTy(CI->getType(), VF); + SmallVector Tys; + for (Value *ArgOperand : CI->arg_operands()) + Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); + + Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || + ID == Intrinsic::lifetime_start)) { + scalarizeInstruction(&I); + break; + } + // The flag shows whether we use Intrinsic or a usual Call for vectorized + // version of the instruction. + // Is it beneficial to perform intrinsic call compared to lib call? + bool NeedToScalarize; + unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); + bool UseVectorIntrinsic = + ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; + if (!UseVectorIntrinsic && NeedToScalarize) { + scalarizeInstruction(&I); break; } - case Instruction::Call: { - // Ignore dbg intrinsics. - if (isa(I)) - break; - setDebugLocFromInst(Builder, &I); - - Module *M = BB->getParent()->getParent(); - auto *CI = cast(&I); - - StringRef FnName = CI->getCalledFunction()->getName(); - Function *F = CI->getCalledFunction(); - Type *RetTy = ToVectorTy(CI->getType(), VF); - SmallVector Tys; - for (Value *ArgOperand : CI->arg_operands()) - Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); - - Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start)) { - scalarizeInstruction(&I); - break; - } - // The flag shows whether we use Intrinsic or a usual Call for vectorized - // version of the instruction. - // Is it beneficial to perform intrinsic call compared to lib call? - bool NeedToScalarize; - unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize); - bool UseVectorIntrinsic = - ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost; - if (!UseVectorIntrinsic && NeedToScalarize) { - scalarizeInstruction(&I); - break; - } - - VectorParts Entry(UF); - for (unsigned Part = 0; Part < UF; ++Part) { - SmallVector Args; - for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - Value *Arg = CI->getArgOperand(i); - // Some intrinsics have a scalar argument - don't replace it with a - // vector. - if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) { - const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i)); - Arg = VectorArg[Part]; - } - Args.push_back(Arg); + VectorParts Entry(UF); + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector Args; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + Value *Arg = CI->getArgOperand(i); + // Some intrinsics have a scalar argument - don't replace it with a + // vector. + if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) { + const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i)); + Arg = VectorArg[Part]; } + Args.push_back(Arg); + } - Function *VectorF; - if (UseVectorIntrinsic) { - // Use vector version of the intrinsic. - Type *TysForDecl[] = {CI->getType()}; - if (VF > 1) - TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); - VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); - } else { - // Use vector version of the library call. - StringRef VFnName = TLI->getVectorizedFunction(FnName, VF); - assert(!VFnName.empty() && "Vector function name is empty."); - VectorF = M->getFunction(VFnName); - if (!VectorF) { - // Generate a declaration - FunctionType *FTy = FunctionType::get(RetTy, Tys, false); - VectorF = - Function::Create(FTy, Function::ExternalLinkage, VFnName, M); - VectorF->copyAttributesFrom(F); - } + Function *VectorF; + if (UseVectorIntrinsic) { + // Use vector version of the intrinsic. + Type *TysForDecl[] = {CI->getType()}; + if (VF > 1) + TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); + VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); + } else { + // Use vector version of the library call. + StringRef VFnName = TLI->getVectorizedFunction(FnName, VF); + assert(!VFnName.empty() && "Vector function name is empty."); + VectorF = M->getFunction(VFnName); + if (!VectorF) { + // Generate a declaration + FunctionType *FTy = FunctionType::get(RetTy, Tys, false); + VectorF = + Function::Create(FTy, Function::ExternalLinkage, VFnName, M); + VectorF->copyAttributesFrom(F); } - assert(VectorF && "Can't create vector function."); - - SmallVector OpBundles; - CI->getOperandBundlesAsDefs(OpBundles); - CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); + } + assert(VectorF && "Can't create vector function."); - if (isa(V)) - V->copyFastMathFlags(CI); + SmallVector OpBundles; + CI->getOperandBundlesAsDefs(OpBundles); + CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles); - Entry[Part] = V; - } + if (isa(V)) + V->copyFastMathFlags(CI); - VectorLoopValueMap.initVector(&I, Entry); - addMetadata(Entry, &I); - break; + Entry[Part] = V; } - default: - // All other instructions are unsupported. Scalarize them. - scalarizeInstruction(&I); - break; - } // end of switch. - } // end of for_each instr. + VectorLoopValueMap.initVector(&I, Entry); + addMetadata(Entry, &I); + break; + } + + default: + // All other instructions are unsupported. Scalarize them. + scalarizeInstruction(&I); + break; + } // end of switch. } void InnerLoopVectorizer::updateAnalysis() { @@ -5057,11 +5015,10 @@ void InnerLoopVectorizer::updateAnalysis() { assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && "Entry does not dominate exit."); - // We don't predicate stores by this point, so the vector body should be a - // single loop. - DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader); - - DT->addNewBlock(LoopMiddleBlock, LoopVectorBody); + DT->addNewBlock(LI->getLoopFor(LoopVectorBody)->getHeader(), + LoopVectorPreHeader); + DT->addNewBlock(LoopMiddleBlock, + LI->getLoopFor(LoopVectorBody)->getLoopLatch()); DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]); @@ -5137,12 +5094,18 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { } bool LoopVectorizationLegality::canVectorize() { + // Store the result and return it at the end instead of exiting early, in case + // allowExtraAnalysis is used to report multiple reasons for not vectorizing. + bool Result = true; // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // FIXME: The code is currently dead, since the loop gets sent to @@ -5152,21 +5115,30 @@ bool LoopVectorizationLegality::canVectorize() { if (!TheLoop->empty()) { ORE->emit(createMissedAnalysis("NotInnermostLoop") << "loop is not the innermost loop"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We only handle bottom-tested loops, i.e. loop in which the condition is @@ -5175,7 +5147,10 @@ bool LoopVectorizationLegality::canVectorize() { if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We need to have a loop header. @@ -5186,28 +5161,28 @@ bool LoopVectorizationLegality::canVectorize() { unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); - return false; - } - - // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getBackedgeTakenCount(); - if (ExitCount == PSE.getSE()->getCouldNotCompute()) { - ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations") - << "could not determine number of loop iterations"); - DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } DEBUG(dbgs() << "LV: We can vectorize this loop" @@ -5235,13 +5210,17 @@ bool LoopVectorizationLegality::canVectorize() { << "Too many SCEV assumptions need to be made and checked " << "at runtime"); DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } - // Okay! We can vectorize. At this point we don't have any other mem analysis + // Okay! We've done all the tests. If any have failed, return false. Otherwise + // we can vectorize, and at this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. - return true; + return Result; } static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { @@ -5605,6 +5584,13 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); } + // Insert the forced scalars. + // FIXME: Currently widenPHIInstruction() often creates a dead vector + // induction variable when the PHI user is scalarized. + if (ForcedScalars.count(VF)) + for (auto *I : ForcedScalars.find(VF)->second) + Worklist.insert(I); + // Expand the worklist by looking through any bitcasts and getelementptr // instructions we've already identified as scalar. This is similar to the // expansion step in collectLoopUniforms(); however, here we're only @@ -7162,7 +7148,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, if (VF == 1) { Type *ValTy = getMemInstValueType(I); unsigned Alignment = getMemInstAlignment(I); - unsigned AS = getMemInstAlignment(I); + unsigned AS = getMemInstAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); @@ -7180,11 +7166,16 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (VF > 1 && isProfitableToScalarize(I, VF)) return VectorizationCostTy(InstsToScalarize[VF][I], false); + // Forced scalars do not have any scalarization overhead. + if (VF > 1 && ForcedScalars.count(VF) && + ForcedScalars.find(VF)->second.count(I)) + return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false); + Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); bool TypeNotScalarized = - VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF; + VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF; return VectorizationCostTy(C, TypeNotScalarized); } @@ -7259,6 +7250,62 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { setWideningDecision(&I, VF, Decision, Cost); } } + + // Make sure that any load of address and any other address computation + // remains scalar unless there is gather/scatter support. This avoids + // inevitable extracts into address registers, and also has the benefit of + // activating LSR more, since that pass can't optimize vectorized + // addresses. + if (TTI.prefersVectorizedAddressing()) + return; + + // Start with all scalar pointer uses. + SmallPtrSet AddrDefs; + for (BasicBlock *BB : TheLoop->blocks()) + for (Instruction &I : *BB) { + Instruction *PtrDef = + dyn_cast_or_null(getPointerOperand(&I)); + if (PtrDef && TheLoop->contains(PtrDef) && + getWideningDecision(&I, VF) != CM_GatherScatter) + AddrDefs.insert(PtrDef); + } + + // Add all instructions used to generate the addresses. + SmallVector Worklist; + for (auto *I : AddrDefs) + Worklist.push_back(I); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + for (auto &Op : I->operands()) + if (auto *InstOp = dyn_cast(Op)) + if ((InstOp->getParent() == I->getParent()) && !isa(InstOp) && + AddrDefs.insert(InstOp).second == true) + Worklist.push_back(InstOp); + } + + for (auto *I : AddrDefs) { + if (isa(I)) { + // Setting the desired widening decision should ideally be handled in + // by cost functions, but since this involves the task of finding out + // if the loaded register is involved in an address computation, it is + // instead changed here when we know this is the case. + if (getWideningDecision(I, VF) == CM_Widen) + // Scalarize a widened load of address. + setWideningDecision(I, VF, CM_Scalarize, + (VF * getMemoryInstructionCost(I, 1))); + else if (auto Group = Legal->getInterleavedAccessGroup(I)) { + // Scalarize an interleave group of address loads. + for (unsigned I = 0; I < Group->getFactor(); ++I) { + if (Instruction *Member = Group->getMember(I)) + setWideningDecision(Member, VF, CM_Scalarize, + (VF * getMemoryInstructionCost(Member, 1))); + } + } + } else + // Make sure I gets scalarized and a cost estimate without + // scalarization overhead. + ForcedScalars[VF].insert(I); + } } unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, @@ -7267,7 +7314,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - VectorTy = ToVectorTy(RetTy, VF); + VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. @@ -7313,8 +7360,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, VectorTy, VF - 1, VectorTy); - // TODO: IF-converted IFs become selects. - return 0; + // Phi nodes in non-header blocks (not inductions, reductions, etc.) are + // converted into select instructions. We require N - 1 selects per phi + // node, where N is the number of incoming values. + if (VF > 1 && Phi->getParent() != TheLoop->getHeader()) + return (Phi->getNumIncomingValues() - 1) * + TTI.getCmpSelInstrCost( + Instruction::Select, ToVectorTy(Phi->getType(), VF), + ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); + + return TTI.getCFInstrCost(Instruction::PHI); } case Instruction::UDiv: case Instruction::SDiv: @@ -7345,6 +7400,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, // likely. return Cost / getReciprocalPredBlockProb(); } + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -7392,9 +7448,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } else if (Legal->isUniform(Op2)) { Op2VK = TargetTransformInfo::OK_UniformValue; } - SmallVector Operands(I->operand_values()); - return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, - Op2VK, Op1VP, Op2VP, Operands); + SmallVector Operands(I->operand_values()); + unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; + return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, + Op2VK, Op1VP, Op2VP, Operands); } case Instruction::Select: { SelectInst *SI = cast(I); @@ -7417,7 +7474,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } case Instruction::Store: case Instruction::Load: { - VectorTy = ToVectorTy(getMemInstValueType(I), VF); + unsigned Width = VF; + if (Width > 1) { + InstWidening Decision = getWideningDecision(I, Width); + assert(Decision != CM_Unknown && + "CM decision should be taken at this point"); + if (Decision == CM_Scalarize) + Width = 1; + } + VectorTy = ToVectorTy(getMemInstValueType(I), Width); return getMemoryInstructionCost(I, VF); } case Instruction::ZExt: @@ -7442,7 +7507,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } Type *SrcScalarTy = I->getOperand(0)->getType(); - Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF); + Type *SrcVecTy = + VectorTy->isVectorTy() ? ToVectorTy(SrcScalarTy, VF) : SrcScalarTy; if (canTruncateToMinimalBitwidth(I, VF)) { // This cast is going to be shrunk. This may remove the cast or it might // turn it into slightly different cast. For example, if MinBW == 16, @@ -7462,7 +7528,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } } - return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); + unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; + return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); } case Instruction::Call: { bool NeedToScalarize; @@ -7553,6 +7620,72 @@ LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { return CM.selectVectorizationFactor(MaxVF); } +void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV) { + // Perform the actual loop transformation. + + // 1. Create a new empty loop. Unlink the old loop and connect the new one. + ILV.createVectorizedLoopSkeleton(); + + //===------------------------------------------------===// + // + // Notice: any optimization or new instruction that go + // into the code below should also be implemented in + // the cost-model. + // + //===------------------------------------------------===// + + // 2. Copy and widen instructions from the old loop into the new loop. + + // Collect instructions from the original loop that will become trivially dead + // in the vectorized loop. We don't need to vectorize these instructions. For + // example, original induction update instructions can become dead because we + // separately emit induction "steps" when generating code for the new loop. + // Similarly, we create a new latch condition when setting up the structure + // of the new loop, so the old one can become dead. + SmallPtrSet DeadInstructions; + collectTriviallyDeadInstructions(DeadInstructions); + + // Scan the loop in a topological order to ensure that defs are vectorized + // before users. + LoopBlocksDFS DFS(OrigLoop); + DFS.perform(LI); + + // Vectorize all instructions in the original loop that will not become + // trivially dead when vectorized. + for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) + for (Instruction &I : *BB) + if (!DeadInstructions.count(&I)) + ILV.vectorizeInstruction(I); + + // 3. Fix the vectorized code: take care of header phi's, live-outs, + // predication, updating analyses. + ILV.fixVectorizedLoop(); +} + +void LoopVectorizationPlanner::collectTriviallyDeadInstructions( + SmallPtrSetImpl &DeadInstructions) { + BasicBlock *Latch = OrigLoop->getLoopLatch(); + + // We create new control-flow for the vectorized loop, so the original + // condition will be dead after vectorization if it's only used by the + // branch. + auto *Cmp = dyn_cast(Latch->getTerminator()->getOperand(0)); + if (Cmp && Cmp->hasOneUse()) + DeadInstructions.insert(Cmp); + + // We create new "steps" for induction variable updates to which the original + // induction variables map. An original update instruction will be dead if + // all its users except the induction variable are dead. + for (auto &Induction : *Legal->getInductionVars()) { + PHINode *Ind = Induction.first; + auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); + if (all_of(IndUpdate->users(), [&](User *U) -> bool { + return U == Ind || DeadInstructions.count(cast(U)); + })) + DeadInstructions.insert(IndUpdate); + } +} + void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) { auto *SI = dyn_cast(Instr); bool IfPredicateInstr = (SI && Legal->blockNeedsPredication(SI->getParent())); @@ -7735,7 +7868,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { CM.collectValuesToIgnore(); // Use the planner for vectorization. - LoopVectorizationPlanner LVP(CM); + LoopVectorizationPlanner LVP(L, LI, &LVL, CM); // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); @@ -7829,7 +7962,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM); - Unroller.vectorize(); + LVP.executePlan(Unroller); ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), L->getHeader()) @@ -7839,7 +7972,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // If we decided that it is *legal* to vectorize the loop, then do it. InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM); - LB.vectorize(); + LVP.executePlan(LB); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there are diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index da3ac06ab464eb99ac4bd658da784836b5181a84..d1349535f2982fa98f669a7c86f97c4f4033bb14 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -40,7 +40,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Vectorize.h" #include #include @@ -212,23 +214,6 @@ static unsigned getSameOpcode(ArrayRef VL) { return Opcode; } -/// Get the intersection (logical and) of all of the potential IR flags -/// of each scalar operation (VL) that will be converted into a vector (I). -/// Flag set: NSW, NUW, exact, and all of fast-math. -static void propagateIRFlags(Value *I, ArrayRef VL) { - if (auto *VecOp = dyn_cast(I)) { - if (auto *I0 = dyn_cast(VL[0])) { - // VecOVp is initialized to the 0th scalar, so start counting from index - // '1'. - VecOp->copyIRFlags(I0); - for (int i = 1, e = VL.size(); i < e; ++i) { - if (auto *Scalar = dyn_cast(VL[i])) - VecOp->andIRFlags(Scalar); - } - } - } -} - /// \returns true if all of the values in \p VL have the same type or false /// otherwise. static bool allSameType(ArrayRef VL) { @@ -274,6 +259,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, if (hasVectorInstrinsicScalarOpd(ID, 1)) { return (CI->getArgOperand(1) == Scalar); } + LLVM_FALLTHROUGH; } default: return false; @@ -315,10 +301,10 @@ public: BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, - const DataLayout *DL) + const DataLayout *DL, OptimizationRemarkEmitter *ORE) : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB), - DL(DL), Builder(Se->getContext()) { + DL(DL), ORE(ORE), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); // Use the vector register size specified by the target unless overridden // by a command-line option. @@ -331,7 +317,10 @@ public: else MaxVecRegSize = TTI->getRegisterBitWidth(true); - MinVecRegSize = MinVectorRegSizeOption; + if (MinVectorRegSizeOption.getNumOccurrences()) + MinVecRegSize = MinVectorRegSizeOption; + else + MinVecRegSize = TTI->getMinVectorRegisterBitWidth(); } /// \brief Vectorize the tree that starts with the elements in \p VL. @@ -377,6 +366,8 @@ public: MinBWs.clear(); } + unsigned getTreeSize() const { return VectorizableTree.size(); } + /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); @@ -415,6 +406,8 @@ public: /// vectorizable. We do not vectorize such trees. bool isTreeTinyAndNotFullyVectorizable(); + OptimizationRemarkEmitter *getORE() { return ORE; } + private: struct TreeEntry; @@ -586,12 +579,12 @@ private: void eraseInstruction(Instruction *I) { I->removeFromParent(); I->dropAllReferences(); - DeletedInstructions.push_back(std::unique_ptr(I)); + DeletedInstructions.emplace_back(I); } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector, 8> DeletedInstructions; + SmallVector DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -944,6 +937,8 @@ private: AssumptionCache *AC; DemandedBits *DB; const DataLayout *DL; + OptimizationRemarkEmitter *ORE; + unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt. unsigned MinVecRegSize; // Set by cl::opt (default: 128). /// Instruction builder to construct the vectorized tree. @@ -1835,11 +1830,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { CInt->getValue().isPowerOf2()) Op2VP = TargetTransformInfo::OP_PowerOf2; - int ScalarCost = VecTy->getNumElements() * - TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, - Op2VK, Op1VP, Op2VP); + SmallVector Operands(VL0->operand_values()); + int ScalarCost = + VecTy->getNumElements() * + TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK, Op1VP, + Op2VP, Operands); int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK, - Op1VP, Op2VP); + Op1VP, Op2VP, Operands); return VecCost - ScalarCost; } case Instruction::GetElementPtr: { @@ -3703,10 +3700,8 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. IsKnownPositive = all_of(TreeRoot, [&](Value *R) { - bool KnownZero = false; - bool KnownOne = false; - ComputeSignBit(R, KnownZero, KnownOne, *DL); - return KnownZero; + KnownBits Known = computeKnownBits(R, *DL); + return Known.isNonNegative(); }); // Determine the maximum number of bits required to store the scalar @@ -3786,8 +3781,9 @@ struct SLPVectorizer : public FunctionPass { auto *DT = &getAnalysis().getDomTree(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *DB = &getAnalysis().getDemandedBits(); + auto *ORE = &getAnalysis().getORE(); - return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); + return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -3799,6 +3795,7 @@ struct SLPVectorizer : public FunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); @@ -3817,8 +3814,9 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A auto *DT = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DB = &AM.getResult(F); + auto *ORE = &AM.getResult(F); - bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB); + bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE); if (!Changed) return PreservedAnalyses::all(); @@ -3833,7 +3831,8 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, DominatorTree *DT_, - AssumptionCache *AC_, DemandedBits *DB_) { + AssumptionCache *AC_, DemandedBits *DB_, + OptimizationRemarkEmitter *ORE_) { SE = SE_; TTI = TTI_; TLI = TLI_; @@ -3861,7 +3860,7 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, // Use the bottom up slp vectorizer to construct chains that start with // store instructions. - BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL); + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_); // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to // delete instructions. @@ -3899,11 +3898,13 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, } /// \brief Check that the Values in the slice in VL array are still existent in -/// the WeakVH array. +/// the WeakTrackingVH array. /// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakVH array. -static bool hasValueBeenRAUWed(ArrayRef VL, ArrayRef VH, - unsigned SliceBegin, unsigned SliceSize) { +/// to become invalid. We track when this has happened in the WeakTrackingVH +/// array. +static bool hasValueBeenRAUWed(ArrayRef VL, + ArrayRef VH, unsigned SliceBegin, + unsigned SliceSize) { VL = VL.slice(SliceBegin, SliceSize); VH = VH.slice(SliceBegin, SliceSize); return !std::equal(VL.begin(), VL.end(), VH.begin()); @@ -3921,7 +3922,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, return false; // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector TrackValues(Chain.begin(), Chain.end()); + SmallVector TrackValues(Chain.begin(), Chain.end()); bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. @@ -3948,6 +3949,13 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); + using namespace ore; + R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized", + cast(Chain[i])) + << "Stores SLP vectorized with cost " << NV("Cost", Cost) + << " and with tree size " + << NV("TreeSize", R.getTreeSize())); + R.vectorizeTree(); // Move to the next bundle. @@ -4107,7 +4115,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, bool Changed = false; // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector TrackValues(VL.begin(), VL.end()); + SmallVector TrackValues(VL.begin(), VL.end()); unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; @@ -4146,8 +4154,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (AllowReorder && R.shouldReorder()) { // Conceptually, there is nothing actually preventing us from trying to // reorder a larger list. In fact, we do exactly this when vectorizing - // reductions. However, at this point, we only expect to get here from - // tryToVectorizePair(). + // reductions. However, at this point, we only expect to get here when + // there are exactly two operations. assert(Ops.size() == 2); assert(BuildVectorSlice.empty()); Value *ReorderedOps[] = {Ops[1], Ops[0]}; @@ -4161,6 +4169,12 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); + R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList", + cast(Ops[0])) + << "SLP vectorized with cost " << ore::NV("Cost", Cost) + << " and with tree size " + << ore::NV("TreeSize", R.getTreeSize())); + Value *VectorizedRoot = R.vectorizeTree(); // Reconstruct the build vector by extracting the vectorized root. This @@ -4504,6 +4518,12 @@ public: DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost << ". (HorRdx)\n"); + auto *I0 = cast(VL[0]); + V.getORE()->emit( + OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction", I0) + << "Vectorized horizontal reduction with cost " + << ore::NV("Cost", Cost) << " and with tree size " + << ore::NV("TreeSize", V.getTreeSize())); // Vectorize a tree. DebugLoc Loc = cast(ReducedVals[i])->getDebugLoc(); @@ -4511,7 +4531,7 @@ public: // Emit a reduction. Value *ReducedSubTree = - emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps); + emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps, TTI); if (VectorizedTree) { Builder.SetCurrentDebugLocation(Loc); VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, @@ -4581,33 +4601,31 @@ private: /// \brief Emit a horizontal reduction of the vectorized value. Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder, - unsigned ReduxWidth, ArrayRef RedOps) { + unsigned ReduxWidth, ArrayRef RedOps, + const TargetTransformInfo *TTI) { assert(VectorizedValue && "Need to have a vectorized tree node"); assert(isPowerOf2_32(ReduxWidth) && "We only handle power-of-two reductions for now"); + if (!IsPairwiseReduction) + return createSimpleTargetReduction( + Builder, TTI, ReductionOpcode, VectorizedValue, + TargetTransformInfo::ReductionFlags(), RedOps); + Value *TmpVec = VectorizedValue; for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { - if (IsPairwiseReduction) { - Value *LeftMask = + Value *LeftMask = createRdxShuffleMask(ReduxWidth, i, true, true, Builder); - Value *RightMask = + Value *RightMask = createRdxShuffleMask(ReduxWidth, i, true, false, Builder); - Value *LeftShuf = Builder.CreateShuffleVector( + Value *LeftShuf = Builder.CreateShuffleVector( TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l"); - Value *RightShuf = Builder.CreateShuffleVector( + Value *RightShuf = Builder.CreateShuffleVector( TmpVec, UndefValue::get(TmpVec->getType()), (RightMask), "rdx.shuf.r"); - TmpVec = Builder.CreateBinOp(ReductionOpcode, LeftShuf, RightShuf, - "bin.rdx"); - } else { - Value *UpperHalf = - createRdxShuffleMask(ReduxWidth, i, false, false, Builder); - Value *Shuf = Builder.CreateShuffleVector( - TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf"); - TmpVec = Builder.CreateBinOp(ReductionOpcode, TmpVec, Shuf, "bin.rdx"); - } + TmpVec = + Builder.CreateBinOp(ReductionOpcode, LeftShuf, RightShuf, "bin.rdx"); propagateIRFlags(TmpVec, RedOps); } @@ -4732,56 +4750,18 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } -namespace { -/// Tracks instructons and its children. -class WeakVHWithLevel final : public CallbackVH { - /// Operand index of the instruction currently beeing analized. - unsigned Level = 0; - /// Is this the instruction that should be vectorized, or are we now - /// processing children (i.e. operands of this instruction) for potential - /// vectorization? - bool IsInitial = true; - -public: - explicit WeakVHWithLevel() = default; - WeakVHWithLevel(Value *V) : CallbackVH(V){}; - /// Restart children analysis each time it is repaced by the new instruction. - void allUsesReplacedWith(Value *New) override { - setValPtr(New); - Level = 0; - IsInitial = true; - } - /// Check if the instruction was not deleted during vectorization. - bool isValid() const { return !getValPtr(); } - /// Is the istruction itself must be vectorized? - bool isInitial() const { return IsInitial; } - /// Try to vectorize children. - void clearInitial() { IsInitial = false; } - /// Are all children processed already? - bool isFinal() const { - assert(getValPtr() && - (isa(getValPtr()) && - cast(getValPtr())->getNumOperands() >= Level)); - return getValPtr() && - cast(getValPtr())->getNumOperands() == Level; - } - /// Get next child operation. - Value *nextOperand() { - assert(getValPtr() && isa(getValPtr()) && - cast(getValPtr())->getNumOperands() > Level); - return cast(getValPtr())->getOperand(Level++); - } - virtual ~WeakVHWithLevel() = default; -}; -} // namespace - -/// \brief Attempt to reduce a horizontal reduction. -/// If it is legal to match a horizontal reduction feeding -/// the phi node P with reduction operators Root in a basic block BB, then check -/// if it can be done. -/// \returns true if a horizontal reduction was matched and reduced. -/// \returns false if a horizontal reduction was not matched. -static bool canBeVectorized( +/// Attempt to reduce a horizontal reduction. +/// If it is legal to match a horizontal reduction feeding the phi node \a P +/// with reduction operators \a Root (or one of its operands) in a basic block +/// \a BB, then check if it can be done. If horizontal reduction is not found +/// and root instruction is a binary operation, vectorization of the operands is +/// attempted. +/// \returns true if a horizontal reduction was matched and reduced or operands +/// of one of the binary instruction were vectorized. +/// \returns false if a horizontal reduction was not matched (or not possible) +/// or no vectorization of any binary operation feeding \a Root instruction was +/// performed. +static bool tryToVectorizeHorReductionOrInstOperands( PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI, const function_ref Vectorize) { @@ -4793,56 +4773,62 @@ static bool canBeVectorized( if (Root->getParent() != BB) return false; - SmallVector Stack(1, Root); + // Start analysis starting from Root instruction. If horizontal reduction is + // found, try to vectorize it. If it is not a horizontal reduction or + // vectorization is not possible or not effective, and currently analyzed + // instruction is a binary operation, try to vectorize the operands, using + // pre-order DFS traversal order. If the operands were not vectorized, repeat + // the same procedure considering each operand as a possible root of the + // horizontal reduction. + // Interrupt the process if the Root instruction itself was vectorized or all + // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. + SmallVector, 8> Stack(1, {Root, 0}); SmallSet VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V = Stack.back(); - if (!V) { - Stack.pop_back(); + Value *V; + unsigned Level; + std::tie(V, Level) = Stack.pop_back_val(); + if (!V) continue; - } auto *Inst = dyn_cast(V); - if (!Inst || isa(Inst)) { - Stack.pop_back(); + if (!Inst || isa(Inst)) continue; - } - if (Stack.back().isInitial()) { - Stack.back().clearInitial(); - if (auto *BI = dyn_cast(Inst)) { - HorizontalReduction HorRdx; - if (HorRdx.matchAssociativeReduction(P, BI)) { - if (HorRdx.tryToReduce(R, TTI)) { - Res = true; - P = nullptr; - continue; - } - } - if (P) { - Inst = dyn_cast(BI->getOperand(0)); - if (Inst == P) - Inst = dyn_cast(BI->getOperand(1)); - if (!Inst) { - P = nullptr; - continue; - } + if (auto *BI = dyn_cast(Inst)) { + HorizontalReduction HorRdx; + if (HorRdx.matchAssociativeReduction(P, BI)) { + if (HorRdx.tryToReduce(R, TTI)) { + Res = true; + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; } } - P = nullptr; - if (Vectorize(dyn_cast(Inst), R)) { - Res = true; - continue; + if (P) { + Inst = dyn_cast(BI->getOperand(0)); + if (Inst == P) + Inst = dyn_cast(BI->getOperand(1)); + if (!Inst) { + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + continue; + } } } - if (Stack.back().isFinal()) { - Stack.pop_back(); + // Set P to nullptr to avoid re-analysis of phi node in + // matchAssociativeReduction function unless this is the root node. + P = nullptr; + if (Vectorize(dyn_cast(Inst), R)) { + Res = true; continue; } - if (auto *NextV = dyn_cast(Stack.back().nextOperand())) - if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second && - Stack.size() < RecursionMaxDepth) - Stack.push_back(NextV); + // Try to vectorize operands. + if (++Level < RecursionMaxDepth) + for (auto *Op : Inst->operand_values()) + Stack.emplace_back(Op, Level); } return Res; } @@ -4859,10 +4845,10 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, if (!isa(I)) P = nullptr; // Try to match and vectorize a horizontal reduction. - return canBeVectorized(P, I, BB, R, TTI, - [this](BinaryOperator *BI, BoUpSLP &R) -> bool { - return tryToVectorize(BI, R); - }); + return tryToVectorizeHorReductionOrInstOperands( + P, I, BB, R, TTI, [this](BinaryOperator *BI, BoUpSLP &R) -> bool { + return tryToVectorize(BI, R); + }); } bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { @@ -4904,7 +4890,13 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Try to vectorize them. unsigned NumElts = (SameTypeIt - IncIt); DEBUG(errs() << "SLP: Trying to vectorize starting at PHIs (" << NumElts << ")\n"); - if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R)) { + // The order in which the phi nodes appear in the program does not matter. + // So allow tryToVectorizeList to reorder them if it is beneficial. This + // is done when there are exactly two elements since tryToVectorizeList + // asserts that there are only two values when AllowReorder is true. + bool AllowReorder = NumElts == 2; + if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, + None, AllowReorder)) { // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; @@ -5063,7 +5055,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { SetVector Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakVHs will have nullified the + // initially collected them. If so, the WeakTrackingVHs will have + // nullified the // values, so remove them from the set of candidates. Candidates.remove(nullptr); @@ -5153,6 +5146,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) namespace llvm { diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp index 28e0b2eb9866693b13352cbf103f81bba40ed398..a219283178882e97af770c3de37079c0a9f6d393 100644 --- a/lib/Transforms/Vectorize/Vectorize.cpp +++ b/lib/Transforms/Vectorize/Vectorize.cpp @@ -17,9 +17,9 @@ #include "llvm-c/Initialization.h" #include "llvm-c/Transforms/Vectorize.h" #include "llvm/Analysis/Passes.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" -#include "llvm/IR/LegacyPassManager.h" using namespace llvm; diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp index 431c251feb65ed045a8c3d18d6a1e5768adeca67..d9ce255bc68871eec4bb60d47b8a182d56140b26 100644 --- a/lib/XRay/InstrumentationMap.cpp +++ b/lib/XRay/InstrumentationMap.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/XRay/InstrumentationMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -22,7 +23,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/YAMLTraits.h" -#include "llvm/XRay/InstrumentationMap.h" #include #include #include diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp index d2984697c8a9ea251c2de8ebe86ff4d78cff405c..6677063f944f6224e9d02927327d20d615aac594 100644 --- a/lib/XRay/Trace.cpp +++ b/lib/XRay/Trace.cpp @@ -115,6 +115,7 @@ struct FDRState { uint16_t CPUId; uint16_t ThreadId; uint64_t BaseTSC; + /// Encode some of the state transitions for the FDR log reader as explicit /// checks. These are expectations for the next Record in the stream. enum class Token { @@ -123,8 +124,10 @@ struct FDRState { NEW_CPU_ID_RECORD, FUNCTION_SEQUENCE, SCAN_TO_END_OF_THREAD_BUF, + CUSTOM_EVENT_DATA, }; Token Expects; + // Each threads buffer may have trailing garbage to scan over, so we track our // progress. uint64_t CurrentBufferSize; @@ -143,6 +146,8 @@ Twine fdrStateToTwine(const FDRState::Token &state) { return "FUNCTION_SEQUENCE"; case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF: return "SCAN_TO_END_OF_THREAD_BUF"; + case FDRState::Token::CUSTOM_EVENT_DATA: + return "CUSTOM_EVENT_DATA"; } return "UNKNOWN"; } @@ -212,13 +217,32 @@ Error processFDRWallTimeRecord(FDRState &State, uint8_t RecordFirstByte, return Error::success(); } +/// State transition when a CustomEventMarker is encountered. +Error processCustomEventMarker(FDRState &State, uint8_t RecordFirstByte, + DataExtractor &RecordExtractor, + size_t &RecordSize) { + // We can encounter a CustomEventMarker anywhere in the log, so we can handle + // it regardless of the expectation. However, we do se the expectation to read + // a set number of fixed bytes, as described in the metadata. + uint32_t OffsetPtr = 1; // Read after the first byte. + uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr); + uint64_t TSC = RecordExtractor.getU64(&OffsetPtr); + + // FIXME: Actually represent the record through the API. For now we only skip + // through the data. + (void)TSC; + RecordSize = 16 + DataSize; + return Error::success(); +} + /// Advances the state machine for reading the FDR record type by reading one /// Metadata Record and updating the State appropriately based on the kind of /// record encountered. The RecordKind is encoded in the first byte of the /// Record, which the caller should pass in because they have already read it /// to determine that this is a metadata record as opposed to a function record. Error processFDRMetadataRecord(FDRState &State, uint8_t RecordFirstByte, - DataExtractor &RecordExtractor) { + DataExtractor &RecordExtractor, + size_t &RecordSize) { // The remaining 7 bits are the RecordKind enum. uint8_t RecordKind = RecordFirstByte >> 1; switch (RecordKind) { @@ -247,6 +271,11 @@ Error processFDRMetadataRecord(FDRState &State, uint8_t RecordFirstByte, processFDRWallTimeRecord(State, RecordFirstByte, RecordExtractor)) return E; break; + case 5: // CustomEventMarker + if (auto E = processCustomEventMarker(State, RecordFirstByte, + RecordExtractor, RecordSize)) + return E; + break; default: // Widen the record type to uint16_t to prevent conversion to char. return make_error( @@ -400,7 +429,8 @@ Error loadFDRLog(StringRef Data, XRayFileHeader &FileHeader, bool isMetadataRecord = BitField & 0x01uL; if (isMetadataRecord) { RecordSize = 16; - if (auto E = processFDRMetadataRecord(State, BitField, RecordExtractor)) + if (auto E = processFDRMetadataRecord(State, BitField, RecordExtractor, + RecordSize)) return E; State.CurrentBufferConsumed += RecordSize; } else { // Process Function Record diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt index 79d8fc7df99bf86c7264c0141067363d4353b13a..9102efbdcb460baf89e682aa1046fbf295855009 100644 --- a/projects/CMakeLists.txt +++ b/projects/CMakeLists.txt @@ -22,7 +22,9 @@ endforeach(entry) if(${LLVM_BUILD_RUNTIME}) # MSVC isn't quite working with libc++ yet, disable it until issues are # fixed. - if(NOT MSVC) + # FIXME: LLVM_FORCE_BUILD_RUNTIME is currently used by libc++ to force + # enable the in-tree build when targeting clang-cl. + if(NOT MSVC OR LLVM_FORCE_BUILD_RUNTIME) # Add the projects in reverse order of their dependencies so that the # dependent projects can see the target names of their dependencies. add_llvm_external_project(libunwind) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 8be1d9e7c523425b35422d4ac196dc9342252b3d..6793a49a2ddc113b63d67da8937ca0d131893690 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -29,7 +29,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) # CMake module path. list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_SOURCE_DIR}/../cmake" - "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/Modules" + "${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules" "${LLVM_BINARY_DIR}/lib/cmake/llvm" ) @@ -65,10 +65,28 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) set(LLVM_ENABLE_LIBCXX ON CACHE BOOL "") endif() + set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(SAFE_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) + + include(CheckLibraryExists) + include(CheckCCompilerFlag) + + check_library_exists(c fopen "" LLVM_HAS_C_LIB) + check_c_compiler_flag(-nodefaultlibs LLVM_HAS_NODEFAULTLIBS_FLAG) + if(LLVM_HAS_NODEFAULTLIBS_FLAG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs") + if(LLVM_HAS_C_LIB) + list(APPEND CMAKE_REQUIRED_LIBRARIES c) + endif() + endif() + # Handle common options used by all runtimes. include(AddLLVM) include(HandleLLVMOptions) + set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_LIBRARIES ${SAFE_CMAKE_REQUIRED_LIBRARIES}) + foreach(entry ${runtimes}) get_filename_component(projName ${entry} NAME) @@ -170,6 +188,7 @@ else() # if this is included from LLVM's CMake else() get_cmake_property(variableNames VARIABLES) add_custom_target(builtins) + add_custom_target(install-builtins) foreach(target ${LLVM_BUILTIN_TARGETS}) string(REPLACE "-" ";" builtin_target_list ${target}) foreach(item ${builtin_target_list}) @@ -200,6 +219,7 @@ else() # if this is included from LLVM's CMake USE_TOOLCHAIN ${EXTRA_ARGS}) add_dependencies(builtins builtins-${target}) + add_dependencies(install-builtins install-builtins-${target}) endforeach() endif() set(deps builtins) diff --git a/test/Analysis/AliasSet/unknown-inst-tracking.ll b/test/Analysis/AliasSet/unknown-inst-tracking.ll new file mode 100644 index 0000000000000000000000000000000000000000..da528fbae071aaa2b396d4f0ec4736f83daab283 --- /dev/null +++ b/test/Analysis/AliasSet/unknown-inst-tracking.ll @@ -0,0 +1,25 @@ +; RUN: opt -S -licm -loop-unswitch < %s | FileCheck %s + +; This test checks for a crash. See PR32587. + +@global = external global i32 + +declare i32 @f_1(i8, i32 returned) + +define i32 @f_0() { +; CHECK-LABEL: @f_0( +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %tmp = load i32, i32* @global + %tmp2 = select i1 false, i16 1, i16 0 + br label %bb3 + +bb3: ; preds = %bb3, %bb1 + %tmp4 = phi i8 [ 0, %bb1 ], [ %tmp6, %bb3 ] + %tmp5 = icmp eq i16 %tmp2, 0 + %tmp6 = select i1 %tmp5, i8 %tmp4, i8 1 + %tmp7 = tail call i32 @f_1(i8 %tmp6, i32 1) + br i1 false, label %bb1, label %bb3 +} diff --git a/test/Analysis/BasicAA/cs-cs-arm.ll b/test/Analysis/BasicAA/cs-cs-arm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1580af9ea826a9df31ecb426038ee3cdc4c960af --- /dev/null +++ b/test/Analysis/BasicAA/cs-cs-arm.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +; REQUIRES: arm + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "arm-apple-ios" + +declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind + +define <8 x i16> @test1(i8* %p, <8 x i16> %y) { +entry: + %q = getelementptr i8, i8* %p, i64 16 + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + %c = add <8 x i16> %a, %b + ret <8 x i16> %c + +; CHECK-LABEL: Function: test1: + +; CHECK: NoAlias: i8* %p, i8* %q +; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +} diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll index 0f74dbd92bbdb250c86b7f5d641a982f7c39dc85..870794c251656eac9ff487232b8c1b7cfebdd25c 100644 --- a/test/Analysis/BasicAA/cs-cs.ll +++ b/test/Analysis/BasicAA/cs-cs.ll @@ -2,41 +2,12 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "arm-apple-ios" -declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind - declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind declare void @a_readonly_func(i8 *) noinline nounwind readonly declare void @a_writeonly_func(i8 *) noinline nounwind writeonly -define <8 x i16> @test1(i8* %p, <8 x i16> %y) { -entry: - %q = getelementptr i8, i8* %p, i64 16 - %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind - call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) - %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind - %c = add <8 x i16> %a, %b - ret <8 x i16> %c - -; CHECK-LABEL: Function: test1: - -; CHECK: NoAlias: i8* %p, i8* %q -; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) -; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -} - define void @test2(i8* %P, i8* %Q) nounwind ssp { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) @@ -247,9 +218,9 @@ define void @test7(i8* %P) nounwind ssp { ; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @a_writeonly_func(i8* %P) } -declare void @an_inaccessiblememonly_func() nounwind inaccessiblememonly -declare void @an_inaccessibleorargmemonly_func(i8 *) nounwind inaccessiblemem_or_argmemonly -declare void @an_argmemonly_func(i8 *) nounwind argmemonly +declare void @an_inaccessiblememonly_func() nounwind inaccessiblememonly +declare void @an_inaccessibleorargmemonly_func(i8 *) nounwind inaccessiblemem_or_argmemonly +declare void @an_argmemonly_func(i8 *) nounwind argmemonly define void @test8(i8* %p) { entry: @@ -260,7 +231,7 @@ entry: call void @an_inaccessiblememonly_func() call void @an_inaccessibleorargmemonly_func(i8* %q) call void @an_argmemonly_func(i8* %q) - ret void + ret void ; CHECK-LABEL: Function: test8 ; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func() diff --git a/test/Analysis/BasicAA/intrinsics-arm.ll b/test/Analysis/BasicAA/intrinsics-arm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e15ce1c65c64d7d66ca9d123ccae534bcea2a061 --- /dev/null +++ b/test/Analysis/BasicAA/intrinsics-arm.ll @@ -0,0 +1,31 @@ +; RUN: opt -basicaa -gvn -S < %s | FileCheck %s +; REQUIRES: arm + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" + +; BasicAA should prove that these calls don't interfere, since we've +; specifically special cased exactly these two intrinsics in +; MemoryLocation::getForArgument. + +; CHECK: define <8 x i16> @test1(i8* %p, <8 x i16> %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %q = getelementptr i8, i8* %p, i64 16 +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR:#[0-9]+]] +; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %c = add <8 x i16> %a, %a +define <8 x i16> @test1(i8* %p, <8 x i16> %y) { +entry: + %q = getelementptr i8, i8* %p, i64 16 + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + %c = add <8 x i16> %a, %b + ret <8 x i16> %c +} + +declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind + +; CHECK: attributes #0 = { argmemonly nounwind readonly } +; CHECK: attributes #1 = { argmemonly nounwind } +; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll index 526a039ef7ac7ba6fd5bd4c51127a4d98d0260b1..68e59862bcc1fff3ceabad3fc58b9e7e2f70dd93 100644 --- a/test/Analysis/BasicAA/intrinsics.ll +++ b/test/Analysis/BasicAA/intrinsics.ll @@ -5,38 +5,22 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- ; BasicAA should prove that these calls don't interfere, since they are ; IntrArgReadMem and have noalias pointers. -; CHECK: define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { +; CHECK: define <8 x i16> @test0(<8 x i16>* noalias %p, <8 x i16>* noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { ; CHECK-NEXT: entry: -; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR:#[0-9]+]] -; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %a = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %p, i32 16, <8 x i1> %m, <8 x i16> %pt) [[ATTR:#[0-9]+]] +; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %y, <8 x i16>* %q, i32 16, <8 x i1> %m) ; CHECK-NEXT: %c = add <8 x i16> %a, %a -define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { +define <8 x i16> @test0(<8 x i16>* noalias %p, <8 x i16>* noalias %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { entry: - %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind - call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) - %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind + %a = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %y, <8 x i16>* %q, i32 16, <8 x i1> %m) + %b = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind %c = add <8 x i16> %a, %b ret <8 x i16> %c } -; CHECK: define <8 x i16> @test1(i8* %p, <8 x i16> %y) { -; CHECK-NEXT: entry: -; CHECK-NEXT: %q = getelementptr i8, i8* %p, i64 16 -; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR]] -; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK-NEXT: %c = add <8 x i16> %a, %a -define <8 x i16> @test1(i8* %p, <8 x i16> %y) { -entry: - %q = getelementptr i8, i8* %p, i64 16 - %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind - call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) - %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind - %c = add <8 x i16> %a, %b - ret <8 x i16> %c -} - -declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind ; CHECK: attributes #0 = { argmemonly nounwind readonly } ; CHECK: attributes #1 = { argmemonly nounwind } diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll index 94ea5a3d1d8ea21a948c9388314348d851f17ee3..64e0a82456f11d42465e0c65f404bf2f183b21b5 100644 --- a/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/test/Analysis/BranchProbabilityInfo/basic.ll @@ -372,3 +372,228 @@ exit: ret i32 %result } +define i32 @test_unreachable_with_prof_greater(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_greater' +entry: + %cond = icmp eq i32 %a, 42 + br i1 %cond, label %exit, label %unr, !prof !4 + +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> unr probability is 0x00000001 / 0x80000000 = 0.00% + +unr: + unreachable + +exit: + ret i32 %b +} + +!4 = !{!"branch_weights", i32 0, i32 1} + +define i32 @test_unreachable_with_prof_equal(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_equal' +entry: + %cond = icmp eq i32 %a, 42 + br i1 %cond, label %exit, label %unr, !prof !5 + +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> unr probability is 0x00000001 / 0x80000000 = 0.00% + +unr: + unreachable + +exit: + ret i32 %b +} + +!5 = !{!"branch_weights", i32 2147483647, i32 1} + +define i32 @test_unreachable_with_prof_zero(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_zero' +entry: + %cond = icmp eq i32 %a, 42 + br i1 %cond, label %exit, label %unr, !prof !6 + +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> unr probability is 0x00000001 / 0x80000000 = 0.00% + +unr: + unreachable + +exit: + ret i32 %b +} + +!6 = !{!"branch_weights", i32 0, i32 0} + +define i32 @test_unreachable_with_prof_less(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_prof_less' +entry: + %cond = icmp eq i32 %a, 42 + br i1 %cond, label %exit, label %unr, !prof !7 + +; CHECK: edge entry -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> unr probability is 0x00000000 / 0x80000000 = 0.00% + +unr: + unreachable + +exit: + ret i32 %b +} + +!7 = !{!"branch_weights", i32 1, i32 0} + +define i32 @test_unreachable_with_switch_prof1(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof1' +entry: + switch i32 %i, label %case_a [ i32 1, label %case_b + i32 2, label %case_c + i32 3, label %case_d + i32 4, label %case_e ], !prof !8 +; CHECK: edge entry -> case_a probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_b probability is 0x07ffffff / 0x80000000 = 6.25% +; CHECK: edge entry -> case_c probability is 0x67ffffff / 0x80000000 = 81.25% [HOT edge] +; CHECK: edge entry -> case_d probability is 0x07ffffff / 0x80000000 = 6.25% +; CHECK: edge entry -> case_e probability is 0x07ffffff / 0x80000000 = 6.25% + +case_a: + unreachable + +case_b: + br label %exit +; CHECK: edge case_b -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +case_c: + br label %exit +; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +case_d: + br label %exit +; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +case_e: + br label %exit +; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ %b, %case_b ], + [ %c, %case_c ], + [ %d, %case_d ], + [ %e, %case_e ] + ret i32 %result +} + +!8 = !{!"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4} + +define i32 @test_unreachable_with_switch_prof2(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof2' +entry: + switch i32 %i, label %case_a [ i32 1, label %case_b + i32 2, label %case_c + i32 3, label %case_d + i32 4, label %case_e ], !prof !9 +; CHECK: edge entry -> case_a probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_b probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_c probability is 0x6aaaaaa9 / 0x80000000 = 83.33% [HOT edge] +; CHECK: edge entry -> case_d probability is 0x0aaaaaa9 / 0x80000000 = 8.33% +; CHECK: edge entry -> case_e probability is 0x0aaaaaa9 / 0x80000000 = 8.33% + +case_a: + unreachable + +case_b: + unreachable + +case_c: + br label %exit +; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +case_d: + br label %exit +; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +case_e: + br label %exit +; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ %c, %case_c ], + [ %d, %case_d ], + [ %e, %case_e ] + ret i32 %result +} + +!9 = !{!"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4} + +define i32 @test_unreachable_with_switch_prof3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof3' +entry: + switch i32 %i, label %case_a [ i32 1, label %case_b + i32 2, label %case_c + i32 3, label %case_d + i32 4, label %case_e ], !prof !10 +; CHECK: edge entry -> case_a probability is 0x00000000 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_b probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_c probability is 0x6e08fb82 / 0x80000000 = 85.96% [HOT edge] +; CHECK: edge entry -> case_d probability is 0x08fb823e / 0x80000000 = 7.02% +; CHECK: edge entry -> case_e probability is 0x08fb823e / 0x80000000 = 7.02% + +case_a: + unreachable + +case_b: + unreachable + +case_c: + br label %exit +; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +case_d: + br label %exit +; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +case_e: + br label %exit +; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ %c, %case_c ], + [ %d, %case_d ], + [ %e, %case_e ] + ret i32 %result +} + +!10 = !{!"branch_weights", i32 0, i32 4, i32 64, i32 4, i32 4} + +define i32 @test_unreachable_with_switch_prof4(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +; CHECK: Printing analysis {{.*}} for function 'test_unreachable_with_switch_prof4' +entry: + switch i32 %i, label %case_a [ i32 1, label %case_b + i32 2, label %case_c + i32 3, label %case_d + i32 4, label %case_e ], !prof !11 +; CHECK: edge entry -> case_a probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge entry -> case_b probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge entry -> case_c probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge entry -> case_d probability is 0x1999999a / 0x80000000 = 20.00% +; CHECK: edge entry -> case_e probability is 0x1999999a / 0x80000000 = 20.00% + +case_a: + unreachable + +case_b: + unreachable + +case_c: + unreachable + +case_d: + unreachable + +case_e: + unreachable + +} + +!11 = !{!"branch_weights", i32 0, i32 4, i32 64, i32 4, i32 4} diff --git a/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll b/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll index faa09f9e8a0c489895afc85de54557b2ee31b827..c2681e5e7c8051545e9a5a78262f90fe2b3fb1f0 100644 --- a/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll +++ b/test/Analysis/BranchProbabilityInfo/deopt-intrinsic.ll @@ -9,8 +9,8 @@ entry: %cond = icmp eq i32 %a, 42 br i1 %cond, label %exit, label %deopt -; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> deopt probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> deopt probability is 0x00000001 / 0x80000000 = 0.00% deopt: %rval = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] diff --git a/test/Analysis/BranchProbabilityInfo/libfunc_call.ll b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll new file mode 100644 index 0000000000000000000000000000000000000000..13bc0de90a6169f6842b6dfeb7736f1cd57090dc --- /dev/null +++ b/test/Analysis/BranchProbabilityInfo/libfunc_call.ll @@ -0,0 +1,264 @@ +; RUN: opt < %s -analyze -branch-prob | FileCheck %s +; RUN: opt < %s -analyze -lazy-branch-prob | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +declare i32 @strcmp(i8*, i8*) +declare i32 @strncmp(i8*, i8*, i32) +declare i32 @strcasecmp(i8*, i8*) +declare i32 @strncasecmp(i8*, i8*, i32) +declare i32 @memcmp(i8*, i8*) +declare i32 @nonstrcmp(i8*, i8*) + + +; Check that the result of strcmp is considered more likely to be nonzero than +; zero, and equally likely to be (nonzero) positive or negative. + +define i32 @test_strcmp_eq(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_eq' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp eq i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_ne(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_ne' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp ne i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_sgt' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcmp_slt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcmp_slt' +entry: + %val = call i32 @strcmp(i8* %p, i8* %q) + %cond = icmp slt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + + +; Similarly check other library functions that have the same behaviour + +define i32 @test_strncmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strncmp_sgt' +entry: + %val = call i32 @strncmp(i8* %p, i8* %q, i32 4) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strcasecmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strcasecmp_sgt' +entry: + %val = call i32 @strcasecmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_strncasecmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_strncasecmp_sgt' +entry: + %val = call i32 @strncasecmp(i8* %p, i8* %q, i32 4) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_memcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_memcmp_sgt' +entry: + %val = call i32 @memcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + + +; Check that for the result of a call to a non-library function the default +; heuristic is applied, i.e. positive more likely than negative, nonzero more +; likely than zero. + +define i32 @test_nonstrcmp_eq(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_eq' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp eq i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50% +; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_nonstrcmp_ne(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_ne' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp ne i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} + +define i32 @test_nonstrcmp_sgt(i8* %p, i8* %q) { +; CHECK: Printing analysis {{.*}} for function 'test_nonstrcmp_sgt' +entry: + %val = call i32 @nonstrcmp(i8* %p, i8* %q) + %cond = icmp sgt i32 %val, 0 + br i1 %cond, label %then, label %else +; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50% +; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50% + +then: + br label %exit +; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +else: + br label %exit +; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge] + +exit: + %result = phi i32 [ 0, %then ], [ 1, %else ] + ret i32 %result +} diff --git a/test/Analysis/BranchProbabilityInfo/noreturn.ll b/test/Analysis/BranchProbabilityInfo/noreturn.ll index 0c2fe863d034d0475afe33497681c4babadfed16..0566ca16c2f3aff6c4787dad825d216fd990fbec 100644 --- a/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -9,8 +9,8 @@ define i32 @test1(i32 %a, i32 %b) { entry: %cond = icmp eq i32 %a, 42 br i1 %cond, label %exit, label %abort -; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> abort probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> abort probability is 0x00000001 / 0x80000000 = 0.00% abort: call void @abort() noreturn @@ -27,11 +27,11 @@ entry: i32 2, label %case_b i32 3, label %case_c i32 4, label %case_d] -; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> case_a probability is 0x00000200 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_b probability is 0x00000200 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_c probability is 0x00000200 / 0x80000000 = 0.00% -; CHECK: edge entry -> case_d probability is 0x00000200 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7ffffffc / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> case_a probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_b probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_c probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> case_d probability is 0x00000001 / 0x80000000 = 0.00% case_a: br label %case_b @@ -56,8 +56,8 @@ define i32 @test3(i32 %a, i32 %b) { entry: %cond1 = icmp eq i32 %a, 42 br i1 %cond1, label %exit, label %dom -; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] -; CHECK: edge entry -> dom probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> dom probability is 0x00000001 / 0x80000000 = 0.00% dom: %cond2 = icmp ult i32 %a, 42 @@ -87,8 +87,8 @@ define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast entry: %cmp = icmp sge i32 %idx, %limit br i1 %cmp, label %if.then, label %if.end -; CHECK: edge entry -> if.then probability is 0x00000800 / 0x80000000 = 0.00% -; CHECK: edge entry -> if.end probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge entry -> if.then probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> if.end probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] if.then: ; preds = %entry %exception = call i8* @__cxa_allocate_exception(i64 1) #0 diff --git a/test/Analysis/CFLAliasAnalysis/Andersen/struct.ll b/test/Analysis/CFLAliasAnalysis/Andersen/struct.ll new file mode 100644 index 0000000000000000000000000000000000000000..c1d25c1e3c2140d23f1240ccc9d333fe9fc730df --- /dev/null +++ b/test/Analysis/CFLAliasAnalysis/Andersen/struct.ll @@ -0,0 +1,18 @@ +; Ensures that our struct ops are sane. + +; RUN: opt < %s -disable-basicaa -cfl-anders-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -aa-pipeline=cfl-anders-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +; Since we ignore non-pointer values, we effectively ignore extractvalue +; instructions. This means that %c "doesn't exist" in test_structure's graph, +; so we currently get MayAlias. +; XFAIL: * + +; CHECK-LABEL: Function: test_structure +; CHECK: NoAlias: i64** %c, { i64**, i64** }* %a +define void @test_structure() { + %a = alloca {i64**, i64**}, align 8 + %b = load {i64**, i64**}, {i64**, i64**}* %a + %c = extractvalue {i64**, i64**} %b, 0 + ret void +} diff --git a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll index 50ad61a8f100a900cb7d47020c043d92828d0de1..16bc8a983e48ea6e62615f2c685372664ce9e7c2 100644 --- a/test/Analysis/ConstantFolding/gep-constanfolding-error.ll +++ b/test/Analysis/ConstantFolding/gep-constanfolding-error.ll @@ -44,7 +44,7 @@ entry: %9 = add i32 %f.promoted, %smax %10 = add i32 %9, 2 call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i32 1, i1 false) -; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i64 5, i64 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false) +; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false) ; CHECK-NOT: call void @llvm.memset.p0i8.i32(i8* getelementptr ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i64 1, i64 4, i64 4, i32 1) ret i32 0 } diff --git a/test/Analysis/CostModel/AArch64/falkor.ll b/test/Analysis/CostModel/AArch64/falkor.ll deleted file mode 100644 index e9563191f077b0f8899ce7b47911024c9c4594ec..0000000000000000000000000000000000000000 --- a/test/Analysis/CostModel/AArch64/falkor.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: opt < %s -cost-model -analyze -mcpu=falkor | FileCheck %s - -target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-gnu" - -; CHECK-LABEL: vectorInstrCost -define void @vectorInstrCost() { - - ; Vector extracts - extracting the first element should have a zero cost; - ; all other elements should have a cost of two. - ; - ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0 - ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1 - %t1 = extractelement <2 x i64> undef, i32 0 - %t2 = extractelement <2 x i64> undef, i32 1 - - ; Vector inserts - inserting the first element should have a zero cost; all - ; other elements should have a cost of two. - ; - ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 0 - ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 1 - %t3 = insertelement <2 x i64> undef, i64 undef, i32 0 - %t4 = insertelement <2 x i64> undef, i64 undef, i32 1 - - ret void -} diff --git a/test/Analysis/CostModel/AArch64/free-widening-casts.ll b/test/Analysis/CostModel/AArch64/free-widening-casts.ll new file mode 100644 index 0000000000000000000000000000000000000000..07f32d1d8ba21a41d05f450d07a9dda1b094cb56 --- /dev/null +++ b/test/Analysis/CostModel/AArch64/free-widening-casts.ll @@ -0,0 +1,622 @@ +; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST +; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE + +; COST-LABEL: uaddl_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16> +; CODE-LABEL: uaddl_8h +; CODE: uaddl v0.8h, v0.8b, v1.8b +define <8 x i16> @uaddl_8h(<8 x i8> %a, <8 x i8> %b) { + %tmp0 = zext <8 x i8> %a to <8 x i16> + %tmp1 = zext <8 x i8> %b to <8 x i16> + %tmp2 = add <8 x i16> %tmp0, %tmp1 + ret <8 x i16> %tmp2 +} + +; COST-LABEL: uaddl_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32> +; CODE-LABEL: uaddl_4s +; CODE: uaddl v0.4s, v0.4h, v1.4h +define <4 x i32> @uaddl_4s(<4 x i16> %a, <4 x i16> %b) { + %tmp0 = zext <4 x i16> %a to <4 x i32> + %tmp1 = zext <4 x i16> %b to <4 x i32> + %tmp2 = add <4 x i32> %tmp0, %tmp1 + ret <4 x i32> %tmp2 +} + +; COST-LABEL: uaddl_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64> +; CODE-LABEL: uaddl_2d +; CODE: uaddl v0.2d, v0.2s, v1.2s +define <2 x i64> @uaddl_2d(<2 x i32> %a, <2 x i32> %b) { + %tmp0 = zext <2 x i32> %a to <2 x i64> + %tmp1 = zext <2 x i32> %b to <2 x i64> + %tmp2 = add <2 x i64> %tmp0, %tmp1 + ret <2 x i64> %tmp2 +} + +; COST-LABEL: uaddl2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16> +; CODE-LABEL: uaddl2_8h +; CODE: uaddl2 v2.8h, v0.16b, v1.16b +; CODE-NEXT: uaddl v0.8h, v0.8b, v1.8b +define <16 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) { + %tmp0 = zext <16 x i8> %a to <16 x i16> + %tmp1 = zext <16 x i8> %b to <16 x i16> + %tmp2 = add <16 x i16> %tmp0, %tmp1 + ret <16 x i16> %tmp2 +} + +; COST-LABEL: uaddl2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32> +; CODE-LABEL: uaddl2_4s +; CODE: uaddl2 v2.4s, v0.8h, v1.8h +; CODE-NEXT: uaddl v0.4s, v0.4h, v1.4h +define <8 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) { + %tmp0 = zext <8 x i16> %a to <8 x i32> + %tmp1 = zext <8 x i16> %b to <8 x i32> + %tmp2 = add <8 x i32> %tmp0, %tmp1 + ret <8 x i32> %tmp2 +} + +; COST-LABEL: uaddl2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64> +; CODE-LABEL: uaddl2_2d +; CODE: uaddl2 v2.2d, v0.4s, v1.4s +; CODE-NEXT: uaddl v0.2d, v0.2s, v1.2s +define <4 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) { + %tmp0 = zext <4 x i32> %a to <4 x i64> + %tmp1 = zext <4 x i32> %b to <4 x i64> + %tmp2 = add <4 x i64> %tmp0, %tmp1 + ret <4 x i64> %tmp2 +} + +; COST-LABEL: saddl_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16> +; CODE-LABEL: saddl_8h +; CODE: saddl v0.8h, v0.8b, v1.8b +define <8 x i16> @saddl_8h(<8 x i8> %a, <8 x i8> %b) { + %tmp0 = sext <8 x i8> %a to <8 x i16> + %tmp1 = sext <8 x i8> %b to <8 x i16> + %tmp2 = add <8 x i16> %tmp0, %tmp1 + ret <8 x i16> %tmp2 +} + +; COST-LABEL: saddl_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32> +; CODE-LABEL: saddl_4s +; CODE: saddl v0.4s, v0.4h, v1.4h +define <4 x i32> @saddl_4s(<4 x i16> %a, <4 x i16> %b) { + %tmp0 = sext <4 x i16> %a to <4 x i32> + %tmp1 = sext <4 x i16> %b to <4 x i32> + %tmp2 = add <4 x i32> %tmp0, %tmp1 + ret <4 x i32> %tmp2 +} + +; COST-LABEL: saddl_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64> +; CODE-LABEL: saddl_2d +; CODE: saddl v0.2d, v0.2s, v1.2s +define <2 x i64> @saddl_2d(<2 x i32> %a, <2 x i32> %b) { + %tmp0 = sext <2 x i32> %a to <2 x i64> + %tmp1 = sext <2 x i32> %b to <2 x i64> + %tmp2 = add <2 x i64> %tmp0, %tmp1 + ret <2 x i64> %tmp2 +} + +; COST-LABEL: saddl2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16> +; CODE-LABEL: saddl2_8h +; CODE: saddl2 v2.8h, v0.16b, v1.16b +; CODE-NEXT: saddl v0.8h, v0.8b, v1.8b +define <16 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) { + %tmp0 = sext <16 x i8> %a to <16 x i16> + %tmp1 = sext <16 x i8> %b to <16 x i16> + %tmp2 = add <16 x i16> %tmp0, %tmp1 + ret <16 x i16> %tmp2 +} + +; COST-LABEL: saddl2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32> +; CODE-LABEL: saddl2_4s +; CODE: saddl2 v2.4s, v0.8h, v1.8h +; CODE-NEXT: saddl v0.4s, v0.4h, v1.4h +define <8 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) { + %tmp0 = sext <8 x i16> %a to <8 x i32> + %tmp1 = sext <8 x i16> %b to <8 x i32> + %tmp2 = add <8 x i32> %tmp0, %tmp1 + ret <8 x i32> %tmp2 +} + +; COST-LABEL: saddl2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64> +; CODE-LABEL: saddl2_2d +; CODE: saddl2 v2.2d, v0.4s, v1.4s +; CODE-NEXT: saddl v0.2d, v0.2s, v1.2s +define <4 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) { + %tmp0 = sext <4 x i32> %a to <4 x i64> + %tmp1 = sext <4 x i32> %b to <4 x i64> + %tmp2 = add <4 x i64> %tmp0, %tmp1 + ret <4 x i64> %tmp2 +} + +; COST-LABEL: usubl_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16> +; CODE-LABEL: usubl_8h +; CODE: usubl v0.8h, v0.8b, v1.8b +define <8 x i16> @usubl_8h(<8 x i8> %a, <8 x i8> %b) { + %tmp0 = zext <8 x i8> %a to <8 x i16> + %tmp1 = zext <8 x i8> %b to <8 x i16> + %tmp2 = sub <8 x i16> %tmp0, %tmp1 + ret <8 x i16> %tmp2 +} + +; COST-LABEL: usubl_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32> +; CODE-LABEL: usubl_4s +; CODE: usubl v0.4s, v0.4h, v1.4h +define <4 x i32> @usubl_4s(<4 x i16> %a, <4 x i16> %b) { + %tmp0 = zext <4 x i16> %a to <4 x i32> + %tmp1 = zext <4 x i16> %b to <4 x i32> + %tmp2 = sub <4 x i32> %tmp0, %tmp1 + ret <4 x i32> %tmp2 +} + +; COST-LABEL: usubl_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64> +; CODE-LABEL: usubl_2d +; CODE: usubl v0.2d, v0.2s, v1.2s +define <2 x i64> @usubl_2d(<2 x i32> %a, <2 x i32> %b) { + %tmp0 = zext <2 x i32> %a to <2 x i64> + %tmp1 = zext <2 x i32> %b to <2 x i64> + %tmp2 = sub <2 x i64> %tmp0, %tmp1 + ret <2 x i64> %tmp2 +} + +; COST-LABEL: usubl2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16> +; CODE-LABEL: usubl2_8h +; CODE: usubl2 v2.8h, v0.16b, v1.16b +; CODE-NEXT: usubl v0.8h, v0.8b, v1.8b +define <16 x i16> @usubl2_8h(<16 x i8> %a, <16 x i8> %b) { + %tmp0 = zext <16 x i8> %a to <16 x i16> + %tmp1 = zext <16 x i8> %b to <16 x i16> + %tmp2 = sub <16 x i16> %tmp0, %tmp1 + ret <16 x i16> %tmp2 +} + +; COST-LABEL: usubl2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32> +; CODE-LABEL: usubl2_4s +; CODE: usubl2 v2.4s, v0.8h, v1.8h +; CODE-NEXT: usubl v0.4s, v0.4h, v1.4h +define <8 x i32> @usubl2_4s(<8 x i16> %a, <8 x i16> %b) { + %tmp0 = zext <8 x i16> %a to <8 x i32> + %tmp1 = zext <8 x i16> %b to <8 x i32> + %tmp2 = sub <8 x i32> %tmp0, %tmp1 + ret <8 x i32> %tmp2 +} + +; COST-LABEL: usubl2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64> +; CODE-LABEL: usubl2_2d +; CODE: usubl2 v2.2d, v0.4s, v1.4s +; CODE-NEXT: usubl v0.2d, v0.2s, v1.2s +define <4 x i64> @usubl2_2d(<4 x i32> %a, <4 x i32> %b) { + %tmp0 = zext <4 x i32> %a to <4 x i64> + %tmp1 = zext <4 x i32> %b to <4 x i64> + %tmp2 = sub <4 x i64> %tmp0, %tmp1 + ret <4 x i64> %tmp2 +} + +; COST-LABEL: ssubl_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16> +; CODE-LABEL: ssubl_8h +; CODE: ssubl v0.8h, v0.8b, v1.8b +define <8 x i16> @ssubl_8h(<8 x i8> %a, <8 x i8> %b) { + %tmp0 = sext <8 x i8> %a to <8 x i16> + %tmp1 = sext <8 x i8> %b to <8 x i16> + %tmp2 = sub <8 x i16> %tmp0, %tmp1 + ret <8 x i16> %tmp2 +} + +; COST-LABEL: ssubl_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32> +; CODE-LABEL: ssubl_4s +; CODE: ssubl v0.4s, v0.4h, v1.4h +define <4 x i32> @ssubl_4s(<4 x i16> %a, <4 x i16> %b) { + %tmp0 = sext <4 x i16> %a to <4 x i32> + %tmp1 = sext <4 x i16> %b to <4 x i32> + %tmp2 = sub <4 x i32> %tmp0, %tmp1 + ret <4 x i32> %tmp2 +} + +; COST-LABEL: ssubl_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64> +; CODE-LABEL: ssubl_2d +; CODE: ssubl v0.2d, v0.2s, v1.2s +define <2 x i64> @ssubl_2d(<2 x i32> %a, <2 x i32> %b) { + %tmp0 = sext <2 x i32> %a to <2 x i64> + %tmp1 = sext <2 x i32> %b to <2 x i64> + %tmp2 = sub <2 x i64> %tmp0, %tmp1 + ret <2 x i64> %tmp2 +} + +; COST-LABEL: ssubl2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16> +; CODE-LABEL: ssubl2_8h +; CODE: ssubl2 v2.8h, v0.16b, v1.16b +; CODE-NEXT: ssubl v0.8h, v0.8b, v1.8b +define <16 x i16> @ssubl2_8h(<16 x i8> %a, <16 x i8> %b) { + %tmp0 = sext <16 x i8> %a to <16 x i16> + %tmp1 = sext <16 x i8> %b to <16 x i16> + %tmp2 = sub <16 x i16> %tmp0, %tmp1 + ret <16 x i16> %tmp2 +} + +; COST-LABEL: ssubl2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32> +; CODE-LABEL: ssubl2_4s +; CODE: ssubl2 v2.4s, v0.8h, v1.8h +; CODE-NEXT: ssubl v0.4s, v0.4h, v1.4h +define <8 x i32> @ssubl2_4s(<8 x i16> %a, <8 x i16> %b) { + %tmp0 = sext <8 x i16> %a to <8 x i32> + %tmp1 = sext <8 x i16> %b to <8 x i32> + %tmp2 = sub <8 x i32> %tmp0, %tmp1 + ret <8 x i32> %tmp2 +} + +; COST-LABEL: ssubl2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64> +; CODE-LABEL: ssubl2_2d +; CODE: ssubl2 v2.2d, v0.4s, v1.4s +; CODE-NEXT: ssubl v0.2d, v0.2s, v1.2s +define <4 x i64> @ssubl2_2d(<4 x i32> %a, <4 x i32> %b) { + %tmp0 = sext <4 x i32> %a to <4 x i64> + %tmp1 = sext <4 x i32> %b to <4 x i64> + %tmp2 = sub <4 x i64> %tmp0, %tmp1 + ret <4 x i64> %tmp2 +} + +; COST-LABEL: uaddw_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> +; CODE-LABEL: uaddw_8h +; CODE: uaddw v0.8h, v1.8h, v0.8b +define <8 x i16> @uaddw_8h(<8 x i8> %a, <8 x i16> %b) { + %tmp0 = zext <8 x i8> %a to <8 x i16> + %tmp1 = add <8 x i16> %b, %tmp0 + ret <8 x i16> %tmp1 +} + +; COST-LABEL: uaddw_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> +; CODE-LABEL: uaddw_4s +; CODE: uaddw v0.4s, v1.4s, v0.4h +define <4 x i32> @uaddw_4s(<4 x i16> %a, <4 x i32> %b) { + %tmp0 = zext <4 x i16> %a to <4 x i32> + %tmp1 = add <4 x i32> %b, %tmp0 + ret <4 x i32> %tmp1 +} + +; COST-LABEL: uaddw_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> +; CODE-LABEL: uaddw_2d +; CODE: uaddw v0.2d, v1.2d, v0.2s +define <2 x i64> @uaddw_2d(<2 x i32> %a, <2 x i64> %b) { + %tmp0 = zext <2 x i32> %a to <2 x i64> + %tmp1 = add <2 x i64> %b, %tmp0 + ret <2 x i64> %tmp1 +} + +; COST-LABEL: uaddw2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> +; CODE-LABEL: uaddw2_8h +; CODE: uaddw2 v2.8h, v2.8h, v0.16b +; CODE-NEXT: uaddw v0.8h, v1.8h, v0.8b +define <16 x i16> @uaddw2_8h(<16 x i8> %a, <16 x i16> %b) { + %tmp0 = zext <16 x i8> %a to <16 x i16> + %tmp1 = add <16 x i16> %b, %tmp0 + ret <16 x i16> %tmp1 +} + +; COST-LABEL: uaddw2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> +; CODE-LABEL: uaddw2_4s +; CODE: uaddw2 v2.4s, v2.4s, v0.8h +; CODE-NEXT: uaddw v0.4s, v1.4s, v0.4h +define <8 x i32> @uaddw2_4s(<8 x i16> %a, <8 x i32> %b) { + %tmp0 = zext <8 x i16> %a to <8 x i32> + %tmp1 = add <8 x i32> %b, %tmp0 + ret <8 x i32> %tmp1 +} + +; COST-LABEL: uaddw2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> +; CODE-LABEL: uaddw2_2d +; CODE: uaddw2 v2.2d, v2.2d, v0.4s +; CODE-NEXT: uaddw v0.2d, v1.2d, v0.2s +define <4 x i64> @uaddw2_2d(<4 x i32> %a, <4 x i64> %b) { + %tmp0 = zext <4 x i32> %a to <4 x i64> + %tmp1 = add <4 x i64> %b, %tmp0 + ret <4 x i64> %tmp1 +} + +; COST-LABEL: saddw_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> +; CODE-LABEL: saddw_8h +; CODE: saddw v0.8h, v1.8h, v0.8b +define <8 x i16> @saddw_8h(<8 x i8> %a, <8 x i16> %b) { + %tmp0 = sext <8 x i8> %a to <8 x i16> + %tmp1 = add <8 x i16> %b, %tmp0 + ret <8 x i16> %tmp1 +} + +; COST-LABEL: saddw_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> +; CODE-LABEL: saddw_4s +; CODE: saddw v0.4s, v1.4s, v0.4h +define <4 x i32> @saddw_4s(<4 x i16> %a, <4 x i32> %b) { + %tmp0 = sext <4 x i16> %a to <4 x i32> + %tmp1 = add <4 x i32> %b, %tmp0 + ret <4 x i32> %tmp1 +} + +; COST-LABEL: saddw_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> +; CODE-LABEL: saddw_2d +; CODE: saddw v0.2d, v1.2d, v0.2s +define <2 x i64> @saddw_2d(<2 x i32> %a, <2 x i64> %b) { + %tmp0 = sext <2 x i32> %a to <2 x i64> + %tmp1 = add <2 x i64> %b, %tmp0 + ret <2 x i64> %tmp1 +} + +; COST-LABEL: saddw2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> +; CODE-LABEL: saddw2_8h +; CODE: saddw2 v2.8h, v2.8h, v0.16b +; CODE-NEXT: saddw v0.8h, v1.8h, v0.8b +define <16 x i16> @saddw2_8h(<16 x i8> %a, <16 x i16> %b) { + %tmp0 = sext <16 x i8> %a to <16 x i16> + %tmp1 = add <16 x i16> %b, %tmp0 + ret <16 x i16> %tmp1 +} + +; COST-LABEL: saddw2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> +; CODE-LABEL: saddw2_4s +; CODE: saddw2 v2.4s, v2.4s, v0.8h +; CODE-NEXT: saddw v0.4s, v1.4s, v0.4h +define <8 x i32> @saddw2_4s(<8 x i16> %a, <8 x i32> %b) { + %tmp0 = sext <8 x i16> %a to <8 x i32> + %tmp1 = add <8 x i32> %b, %tmp0 + ret <8 x i32> %tmp1 +} + +; COST-LABEL: saddw2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> +; CODE-LABEL: saddw2_2d +; CODE: saddw2 v2.2d, v2.2d, v0.4s +; CODE-NEXT: saddw v0.2d, v1.2d, v0.2s +define <4 x i64> @saddw2_2d(<4 x i32> %a, <4 x i64> %b) { + %tmp0 = sext <4 x i32> %a to <4 x i64> + %tmp1 = add <4 x i64> %b, %tmp0 + ret <4 x i64> %tmp1 +} + +; COST-LABEL: usubw_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> +; CODE-LABEL: usubw_8h +; CODE: usubw v0.8h, v1.8h, v0.8b +define <8 x i16> @usubw_8h(<8 x i8> %a, <8 x i16> %b) { + %tmp0 = zext <8 x i8> %a to <8 x i16> + %tmp1 = sub <8 x i16> %b, %tmp0 + ret <8 x i16> %tmp1 +} + +; COST-LABEL: usubw_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> +; CODE-LABEL: usubw_4s +; CODE: usubw v0.4s, v1.4s, v0.4h +define <4 x i32> @usubw_4s(<4 x i16> %a, <4 x i32> %b) { + %tmp0 = zext <4 x i16> %a to <4 x i32> + %tmp1 = sub <4 x i32> %b, %tmp0 + ret <4 x i32> %tmp1 +} + +; COST-LABEL: usubw_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> +; CODE-LABEL: usubw_2d +; CODE: usubw v0.2d, v1.2d, v0.2s +define <2 x i64> @usubw_2d(<2 x i32> %a, <2 x i64> %b) { + %tmp0 = zext <2 x i32> %a to <2 x i64> + %tmp1 = sub <2 x i64> %b, %tmp0 + ret <2 x i64> %tmp1 +} + +; COST-LABEL: usubw2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> +; CODE-LABEL: usubw2_8h +; CODE: usubw2 v2.8h, v2.8h, v0.16b +; CODE-NEXT: usubw v0.8h, v1.8h, v0.8b +define <16 x i16> @usubw2_8h(<16 x i8> %a, <16 x i16> %b) { + %tmp0 = zext <16 x i8> %a to <16 x i16> + %tmp1 = sub <16 x i16> %b, %tmp0 + ret <16 x i16> %tmp1 +} + +; COST-LABEL: usubw2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> +; CODE-LABEL: usubw2_4s +; CODE: usubw2 v2.4s, v2.4s, v0.8h +; CODE-NEXT: usubw v0.4s, v1.4s, v0.4h +define <8 x i32> @usubw2_4s(<8 x i16> %a, <8 x i32> %b) { + %tmp0 = zext <8 x i16> %a to <8 x i32> + %tmp1 = sub <8 x i32> %b, %tmp0 + ret <8 x i32> %tmp1 +} + +; COST-LABEL: usubw2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> +; CODE-LABEL: usubw2_2d +; CODE: usubw2 v2.2d, v2.2d, v0.4s +; CODE-NEXT: usubw v0.2d, v1.2d, v0.2s +define <4 x i64> @usubw2_2d(<4 x i32> %a, <4 x i64> %b) { + %tmp0 = zext <4 x i32> %a to <4 x i64> + %tmp1 = sub <4 x i64> %b, %tmp0 + ret <4 x i64> %tmp1 +} + +; COST-LABEL: ssubw_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> +; CODE-LABEL: ssubw_8h +; CODE: ssubw v0.8h, v1.8h, v0.8b +define <8 x i16> @ssubw_8h(<8 x i8> %a, <8 x i16> %b) { + %tmp0 = sext <8 x i8> %a to <8 x i16> + %tmp1 = sub <8 x i16> %b, %tmp0 + ret <8 x i16> %tmp1 +} + +; COST-LABEL: ssubw_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> +; CODE-LABEL: ssubw_4s +; CODE: ssubw v0.4s, v1.4s, v0.4h +define <4 x i32> @ssubw_4s(<4 x i16> %a, <4 x i32> %b) { + %tmp0 = sext <4 x i16> %a to <4 x i32> + %tmp1 = sub <4 x i32> %b, %tmp0 + ret <4 x i32> %tmp1 +} + +; COST-LABEL: ssubw_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> +; CODE-LABEL: ssubw_2d +; CODE: ssubw v0.2d, v1.2d, v0.2s +define <2 x i64> @ssubw_2d(<2 x i32> %a, <2 x i64> %b) { + %tmp0 = sext <2 x i32> %a to <2 x i64> + %tmp1 = sub <2 x i64> %b, %tmp0 + ret <2 x i64> %tmp1 +} + +; COST-LABEL: ssubw2_8h +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> +; CODE-LABEL: ssubw2_8h +; CODE: ssubw2 v2.8h, v2.8h, v0.16b +; CODE-NEXT: ssubw v0.8h, v1.8h, v0.8b +define <16 x i16> @ssubw2_8h(<16 x i8> %a, <16 x i16> %b) { + %tmp0 = sext <16 x i8> %a to <16 x i16> + %tmp1 = sub <16 x i16> %b, %tmp0 + ret <16 x i16> %tmp1 +} + +; COST-LABEL: ssubw2_4s +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> +; CODE-LABEL: ssubw2_4s +; CODE: ssubw2 v2.4s, v2.4s, v0.8h +; CODE-NEXT: ssubw v0.4s, v1.4s, v0.4h +define <8 x i32> @ssubw2_4s(<8 x i16> %a, <8 x i32> %b) { + %tmp0 = sext <8 x i16> %a to <8 x i32> + %tmp1 = sub <8 x i32> %b, %tmp0 + ret <8 x i32> %tmp1 +} + +; COST-LABEL: ssubw2_2d +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> +; CODE-LABEL: ssubw2_2d +; CODE: ssubw2 v2.2d, v2.2d, v0.4s +; CODE-NEXT: ssubw v0.2d, v1.2d, v0.2s +define <4 x i64> @ssubw2_2d(<4 x i32> %a, <4 x i64> %b) { + %tmp0 = sext <4 x i32> %a to <4 x i64> + %tmp1 = sub <4 x i64> %b, %tmp0 + ret <4 x i64> %tmp1 +} + +; COST-LABEL: neg_wrong_operand_order +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> +define <8 x i16> @neg_wrong_operand_order(<8 x i8> %a, <8 x i16> %b) { + %tmp0 = zext <8 x i8> %a to <8 x i16> + %tmp1 = sub <8 x i16> %tmp0, %b + ret <8 x i16> %tmp1 +} + +; COST-LABEL: neg_non_widening_op +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> +define <8 x i16> @neg_non_widening_op(<8 x i8> %a, <8 x i16> %b) { + %tmp0 = zext <8 x i8> %a to <8 x i16> + %tmp1 = udiv <8 x i16> %b, %tmp0 + ret <8 x i16> %tmp1 +} + +; COST-LABEL: neg_dissimilar_operand_kind_0 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16> +define <8 x i16> @neg_dissimilar_operand_kind_0(<8 x i8> %a, <8 x i8> %b) { + %tmp0 = sext <8 x i8> %a to <8 x i16> + %tmp1 = zext <8 x i8> %b to <8 x i16> + %tmp2 = add <8 x i16> %tmp0, %tmp1 + ret <8 x i16> %tmp2 +} + +; COST-LABEL: neg_dissimilar_operand_kind_1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <4 x i8> %a to <4 x i32> +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32> +define <4 x i32> @neg_dissimilar_operand_kind_1(<4 x i8> %a, <4 x i16> %b) { + %tmp0 = zext <4 x i8> %a to <4 x i32> + %tmp1 = zext <4 x i16> %b to <4 x i32> + %tmp2 = add <4 x i32> %tmp0, %tmp1 + ret <4 x i32> %tmp2 +} + +; COST-LABEL: neg_illegal_vector_type_0 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <16 x i4> %a to <16 x i8> +define <16 x i8> @neg_illegal_vector_type_0(<16 x i4> %a, <16 x i8> %b) { + %tmp0 = zext <16 x i4> %a to <16 x i8> + %tmp1 = sub <16 x i8> %b, %tmp0 + ret <16 x i8> %tmp1 +} + +; COST-LABEL: neg_llegal_vector_type_1 +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <1 x i16> %a to <1 x i32> +define <1 x i32> @neg_llegal_vector_type_1(<1 x i16> %a, <1 x i32> %b) { + %tmp0 = zext <1 x i16> %a to <1 x i32> + %tmp1 = add <1 x i32> %b, %tmp0 + ret <1 x i32> %tmp1 +} + +; COST-LABEL: neg_llegal_vector_type_2 +; COST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i64> +define <4 x i64> @neg_llegal_vector_type_2(<4 x i16> %a, <4 x i64> %b) { + %tmp0 = zext <4 x i16> %a to <4 x i64> + %tmp1 = add <4 x i64> %b, %tmp0 + ret <4 x i64> %tmp1 +} + +; COST-LABEL: neg_llegal_vector_type_3 +; COST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp0 = zext <3 x i34> %a to <3 x i68> +define <3 x i68> @neg_llegal_vector_type_3(<3 x i34> %a, <3 x i68> %b) { + %tmp0 = zext <3 x i34> %a to <3 x i68> + %tmp1 = add <3 x i68> %b, %tmp0 + ret <3 x i68> %tmp1 +} diff --git a/test/Analysis/CostModel/AMDGPU/extractelement.ll b/test/Analysis/CostModel/AMDGPU/extractelement.ll index 1efbb5873acb279c3cefa6ee0d67de19d9f990fe..54c8b6c52365cea5e617d5b26395b6a58837bbe0 100644 --- a/test/Analysis/CostModel/AMDGPU/extractelement.ll +++ b/test/Analysis/CostModel/AMDGPU/extractelement.ll @@ -1,7 +1,9 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s -; CHECK: 'extractelement_v2i32' -; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i32> +; GCN: 'extractelement_v2i32' +; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i32> define amdgpu_kernel void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) { %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr %elt = extractelement <2 x i32> %vec, i32 1 @@ -9,8 +11,8 @@ define amdgpu_kernel void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32 ret void } -; CHECK: 'extractelement_v2f32' -; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x float> +; GCN: 'extractelement_v2f32' +; GCN: estimated cost of 0 for {{.*}} extractelement <2 x float> define amdgpu_kernel void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) { %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr %elt = extractelement <2 x float> %vec, i32 1 @@ -18,8 +20,8 @@ define amdgpu_kernel void @extractelement_v2f32(float addrspace(1)* %out, <2 x f ret void } -; CHECK: 'extractelement_v3i32' -; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i32> +; GCN: 'extractelement_v3i32' +; GCN: estimated cost of 0 for {{.*}} extractelement <3 x i32> define amdgpu_kernel void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr) { %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr %elt = extractelement <3 x i32> %vec, i32 1 @@ -27,8 +29,8 @@ define amdgpu_kernel void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32 ret void } -; CHECK: 'extractelement_v4i32' -; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i32> +; GCN: 'extractelement_v4i32' +; GCN: estimated cost of 0 for {{.*}} extractelement <4 x i32> define amdgpu_kernel void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr) { %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr %elt = extractelement <4 x i32> %vec, i32 1 @@ -36,8 +38,8 @@ define amdgpu_kernel void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32 ret void } -; CHECK: 'extractelement_v8i32' -; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i32> +; GCN: 'extractelement_v8i32' +; GCN: estimated cost of 0 for {{.*}} extractelement <8 x i32> define amdgpu_kernel void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr) { %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr %elt = extractelement <8 x i32> %vec, i32 1 @@ -46,8 +48,8 @@ define amdgpu_kernel void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32 } ; FIXME: Should be non-0 -; CHECK: 'extractelement_v8i32_dynindex' -; CHECK: estimated cost of 2 for {{.*}} extractelement <8 x i32> +; GCN: 'extractelement_v8i32_dynindex' +; GCN: estimated cost of 2 for {{.*}} extractelement <8 x i32> define amdgpu_kernel void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr, i32 %idx) { %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr %elt = extractelement <8 x i32> %vec, i32 %idx @@ -55,8 +57,8 @@ define amdgpu_kernel void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, ret void } -; CHECK: 'extractelement_v2i64' -; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i64> +; GCN: 'extractelement_v2i64' +; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i64> define amdgpu_kernel void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) { %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr %elt = extractelement <2 x i64> %vec, i64 1 @@ -64,8 +66,8 @@ define amdgpu_kernel void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64 ret void } -; CHECK: 'extractelement_v3i64' -; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i64> +; GCN: 'extractelement_v3i64' +; GCN: estimated cost of 0 for {{.*}} extractelement <3 x i64> define amdgpu_kernel void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr) { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr %elt = extractelement <3 x i64> %vec, i64 1 @@ -73,8 +75,8 @@ define amdgpu_kernel void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64 ret void } -; CHECK: 'extractelement_v4i64' -; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i64> +; GCN: 'extractelement_v4i64' +; GCN: estimated cost of 0 for {{.*}} extractelement <4 x i64> define amdgpu_kernel void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr) { %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr %elt = extractelement <4 x i64> %vec, i64 1 @@ -82,8 +84,8 @@ define amdgpu_kernel void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64 ret void } -; CHECK: 'extractelement_v8i64' -; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i64> +; GCN: 'extractelement_v8i64' +; GCN: estimated cost of 0 for {{.*}} extractelement <8 x i64> define amdgpu_kernel void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr) { %vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr %elt = extractelement <8 x i64> %vec, i64 1 @@ -91,8 +93,8 @@ define amdgpu_kernel void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64 ret void } -; CHECK: 'extractelement_v4i8' -; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i8> +; GCN: 'extractelement_v4i8' +; GCN: estimated cost of 1 for {{.*}} extractelement <4 x i8> define amdgpu_kernel void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %vaddr) { %vec = load <4 x i8>, <4 x i8> addrspace(1)* %vaddr %elt = extractelement <4 x i8> %vec, i8 1 @@ -100,11 +102,31 @@ define amdgpu_kernel void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> a ret void } -; CHECK: 'extractelement_v2i16' -; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i16> -define amdgpu_kernel void @extractelement_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { +; GCN: 'extractelement_0_v2i16': +; CI: estimated cost of 1 for {{.*}} extractelement <2 x i16> %vec, i16 0 +; VI: estimated cost of 0 for {{.*}} extractelement <2 x i16> +; GFX9: estimated cost of 0 for {{.*}} extractelement <2 x i16> +define amdgpu_kernel void @extractelement_0_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %elt = extractelement <2 x i16> %vec, i16 0 + store i16 %elt, i16 addrspace(1)* %out + ret void +} + +; GCN: 'extractelement_1_v2i16': +; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16> +define amdgpu_kernel void @extractelement_1_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %elt = extractelement <2 x i16> %vec, i16 1 store i16 %elt, i16 addrspace(1)* %out ret void } + +; GCN: 'extractelement_var_v2i16' +; GCN: estimated cost of 1 for {{.*}} extractelement <2 x i16> +define amdgpu_kernel void @extractelement_var_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, i32 %idx) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %elt = extractelement <2 x i16> %vec, i32 %idx + store i16 %elt, i16 addrspace(1)* %out + ret void +} diff --git a/test/Analysis/CostModel/AMDGPU/insertelement.ll b/test/Analysis/CostModel/AMDGPU/insertelement.ll index 6f296a3e7a3454efe10f0b0246b4d03e2b9ec1e0..67ab2607acd5fd6aa7f28d4f65368bbce38c421b 100644 --- a/test/Analysis/CostModel/AMDGPU/insertelement.ll +++ b/test/Analysis/CostModel/AMDGPU/insertelement.ll @@ -1,37 +1,50 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s -; CHECK: 'insertelement_v2i32' -; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i32> +; GCN-LABEL: 'insertelement_v2i32' +; GCN: estimated cost of 0 for {{.*}} insertelement <2 x i32> define amdgpu_kernel void @insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) { %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr - %insert = insertelement <2 x i32> %vec, i32 1, i32 123 + %insert = insertelement <2 x i32> %vec, i32 123, i32 1 store <2 x i32> %insert, <2 x i32> addrspace(1)* %out ret void } -; CHECK: 'insertelement_v2i64' -; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i64> +; GCN-LABEL: 'insertelement_v2i64' +; GCN: estimated cost of 0 for {{.*}} insertelement <2 x i64> define amdgpu_kernel void @insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) { %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr - %insert = insertelement <2 x i64> %vec, i64 1, i64 123 + %insert = insertelement <2 x i64> %vec, i64 123, i64 1 store <2 x i64> %insert, <2 x i64> addrspace(1)* %out ret void } -; CHECK: 'insertelement_v2i16' -; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i16> -define amdgpu_kernel void @insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { +; GCN-LABEL: 'insertelement_0_v2i16' +; CI: estimated cost of 1 for {{.*}} insertelement <2 x i16> +; VI: estimated cost of 0 for {{.*}} insertelement <2 x i16> +; GFX9: estimated cost of 0 for {{.*}} insertelement <2 x i16> +define amdgpu_kernel void @insertelement_0_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr - %insert = insertelement <2 x i16> %vec, i16 1, i16 123 + %insert = insertelement <2 x i16> %vec, i16 123, i16 0 store <2 x i16> %insert, <2 x i16> addrspace(1)* %out ret void } -; CHECK: 'insertelement_v2i8' -; CHECK: estimated cost of 0 for {{.*}} insertelement <2 x i8> -define amdgpu_kernel void @insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) { +; GCN-LABEL: 'insertelement_1_v2i16' +; GCN: estimated cost of 1 for {{.*}} insertelement <2 x i16> +define amdgpu_kernel void @insertelement_1_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %insert = insertelement <2 x i16> %vec, i16 123, i16 1 + store <2 x i16> %insert, <2 x i16> addrspace(1)* %out + ret void +} + +; GCN-LABEL: 'insertelement_1_v2i8' +; GCN: estimated cost of 1 for {{.*}} insertelement <2 x i8> +define amdgpu_kernel void @insertelement_1_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) { %vec = load <2 x i8>, <2 x i8> addrspace(1)* %vaddr - %insert = insertelement <2 x i8> %vec, i8 1, i8 123 + %insert = insertelement <2 x i8> %vec, i8 123, i8 1 store <2 x i8> %insert, <2 x i8> addrspace(1)* %out ret void } diff --git a/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/test/Analysis/CostModel/AMDGPU/shufflevector.ll new file mode 100644 index 0000000000000000000000000000000000000000..cc756c82fed323f1684321cdb2e8d21ef5a80099 --- /dev/null +++ b/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -0,0 +1,43 @@ +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GFX9,GCN %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=VI,GCN %s + +; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer +define amdgpu_kernel void @shufflevector_00_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> zeroinitializer + store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out + ret void +} + +; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> +define amdgpu_kernel void @shufflevector_01_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> + store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out + ret void +} + +; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> +define amdgpu_kernel void @shufflevector_10_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> + store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out + ret void +} + +; GFX9: estimated cost of 0 for {{.*}} shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> +define amdgpu_kernel void @shufflevector_11_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <2 x i32> + store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out + ret void +} + +; GCN: estimated cost of 2 for {{.*}} shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> +define amdgpu_kernel void @shufflevector_02_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr0, <2 x i16> addrspace(1)* %vaddr1) { + %vec0 = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr0 + %vec1 = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr1 + %shuf = shufflevector <2 x i16> %vec0, <2 x i16> %vec1, <2 x i32> + store <2 x i16> %shuf, <2 x i16> addrspace(1)* %out + ret void +} diff --git a/test/Analysis/CostModel/SystemZ/div-pow2.ll b/test/Analysis/CostModel/SystemZ/div-pow2.ll new file mode 100644 index 0000000000000000000000000000000000000000..9ef2dd71e8fa747244bcc7328cb00edfa95f0612 --- /dev/null +++ b/test/Analysis/CostModel/SystemZ/div-pow2.ll @@ -0,0 +1,154 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s + +; Scalar sdiv + +define i64 @fun0(i64 %a) { + %r = sdiv i64 %a, 2 + ret i64 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i64 %a, 2 +} + +define i64 @fun1(i64 %a) { + %r = sdiv i64 %a, -4 + ret i64 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i64 %a, -4 +} + +define i32 @fun2(i32 %a) { + %r = sdiv i32 %a, 8 + ret i32 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i32 %a, 8 +} + +define i32 @fun3(i32 %a) { + %r = sdiv i32 %a, -16 + ret i32 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i32 %a, -16 +} + +define i16 @fun4(i16 %a) { + %r = sdiv i16 %a, 32 + ret i16 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i16 %a, 32 +} + +define i16 @fun5(i16 %a) { + %r = sdiv i16 %a, -64 + ret i16 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i16 %a, -64 +} + +define i8 @fun6(i8 %a) { + %r = sdiv i8 %a, 64 + ret i8 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i8 %a, 64 +} + +define i8 @fun7(i8 %a) { + %r = sdiv i8 %a, -128 + ret i8 %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv i8 %a, -128 +} + + +; Vector sdiv + +define <2 x i64> @fun8(<2 x i64> %a) { + %r = sdiv <2 x i64> %a, + ret <2 x i64> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +} + +define <2 x i64> @fun9(<2 x i64> %a) { + %r = sdiv <2 x i64> %a, + ret <2 x i64> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <2 x i64> %a, +} + +define <4 x i32> @fun10(<4 x i32> %a) { + %r = sdiv <4 x i32> %a, + ret <4 x i32> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, +} + +define <4 x i32> @fun11(<4 x i32> %a) { + %r = sdiv <4 x i32> %a, + ret <4 x i32> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <4 x i32> %a, @fun12(<8 x i16> %a) { + %r = sdiv <8 x i16> %a, + ret <8 x i16> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, @fun13(<8 x i16> %a) { + %r = sdiv <8 x i16> %a, + ret <8 x i16> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <8 x i16> %a, @fun14(<16 x i8> %a) { + %r = sdiv <16 x i8> %a, + ret <16 x i8> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, @fun15(<16 x i8> %a) { + %r = sdiv <16 x i8> %a, + ret <16 x i8> %r +; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %r = sdiv <16 x i8> %a, @fun20(<2 x i64> %a) { + %r = udiv <2 x i64> %a, + ret <2 x i64> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <2 x i64> %a, @fun21(<4 x i32> %a) { + %r = udiv <4 x i32> %a, + ret <4 x i32> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <4 x i32> %a, @fun22(<8 x i16> %a) { + %r = udiv <8 x i16> %a, + ret <8 x i16> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <8 x i16> %a, @fun23(<16 x i8> %a) { + %r = udiv <16 x i8> %a, + ret <16 x i8> %r +; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %r = udiv <16 x i8> %a, @llvm.bitreverse.v8i32(<8 x i32>) declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>) declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>) +declare <8 x i64> @llvm.bitreverse.v8i64(<8 x i64>) +declare <16 x i32> @llvm.bitreverse.v16i32(<16 x i32>) +declare <32 x i16> @llvm.bitreverse.v32i16(<32 x i16>) +declare <64 x i8> @llvm.bitreverse.v64i8(<64 x i8>) + define <2 x i64> @var_bitreverse_v2i64(<2 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v2i64': ; SSE2: Found an estimated cost of 29 for instruction: %bitreverse ; SSE42: Found an estimated cost of 5 for instruction: %bitreverse ; AVX: Found an estimated cost of 5 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 1 for instruction: %bitreverse %bitreverse = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a) ret <2 x i64> %bitreverse @@ -79,19 +89,34 @@ define <4 x i64> @var_bitreverse_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v4i64': ; SSE2: Found an estimated cost of 58 for instruction: %bitreverse ; SSE42: Found an estimated cost of 10 for instruction: %bitreverse -; AVX: Found an estimated cost of 10 for instruction: %bitreverse +; AVX: Found an estimated cost of 12 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 4 for instruction: %bitreverse %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ret <4 x i64> %bitreverse } +define <8 x i64> @var_bitreverse_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i64': +; SSE2: Found an estimated cost of 116 for instruction: %bitreverse +; SSE42: Found an estimated cost of 20 for instruction: %bitreverse +; AVX: Found an estimated cost of 24 for instruction: %bitreverse +; AVX2: Found an estimated cost of 10 for instruction: %bitreverse +; AVX512F: Found an estimated cost of 36 for instruction: %bitreverse +; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse +; XOP: Found an estimated cost of 8 for instruction: %bitreverse + %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) + ret <8 x i64> %bitreverse +} + define <4 x i32> @var_bitreverse_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v4i32': ; SSE2: Found an estimated cost of 27 for instruction: %bitreverse ; SSE42: Found an estimated cost of 5 for instruction: %bitreverse ; AVX: Found an estimated cost of 5 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 1 for instruction: %bitreverse %bitreverse = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a) ret <4 x i32> %bitreverse @@ -101,19 +126,34 @@ define <8 x i32> @var_bitreverse_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i32': ; SSE2: Found an estimated cost of 54 for instruction: %bitreverse ; SSE42: Found an estimated cost of 10 for instruction: %bitreverse -; AVX: Found an estimated cost of 10 for instruction: %bitreverse +; AVX: Found an estimated cost of 12 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 4 for instruction: %bitreverse %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ret <8 x i32> %bitreverse } +define <16 x i32> @var_bitreverse_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i32': +; SSE2: Found an estimated cost of 108 for instruction: %bitreverse +; SSE42: Found an estimated cost of 20 for instruction: %bitreverse +; AVX: Found an estimated cost of 24 for instruction: %bitreverse +; AVX2: Found an estimated cost of 10 for instruction: %bitreverse +; AVX512F: Found an estimated cost of 24 for instruction: %bitreverse +; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse +; XOP: Found an estimated cost of 8 for instruction: %bitreverse + %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) + ret <16 x i32> %bitreverse +} + define <8 x i16> @var_bitreverse_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v8i16': ; SSE2: Found an estimated cost of 27 for instruction: %bitreverse ; SSE42: Found an estimated cost of 5 for instruction: %bitreverse ; AVX: Found an estimated cost of 5 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 1 for instruction: %bitreverse %bitreverse = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a) ret <8 x i16> %bitreverse @@ -123,19 +163,34 @@ define <16 x i16> @var_bitreverse_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i16': ; SSE2: Found an estimated cost of 54 for instruction: %bitreverse ; SSE42: Found an estimated cost of 10 for instruction: %bitreverse -; AVX: Found an estimated cost of 10 for instruction: %bitreverse +; AVX: Found an estimated cost of 12 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 4 for instruction: %bitreverse %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ret <16 x i16> %bitreverse } +define <32 x i16> @var_bitreverse_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v32i16': +; SSE2: Found an estimated cost of 108 for instruction: %bitreverse +; SSE42: Found an estimated cost of 20 for instruction: %bitreverse +; AVX: Found an estimated cost of 24 for instruction: %bitreverse +; AVX2: Found an estimated cost of 10 for instruction: %bitreverse +; AVX512F: Found an estimated cost of 10 for instruction: %bitreverse +; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse +; XOP: Found an estimated cost of 8 for instruction: %bitreverse + %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) + ret <32 x i16> %bitreverse +} + define <16 x i8> @var_bitreverse_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v16i8': ; SSE2: Found an estimated cost of 20 for instruction: %bitreverse ; SSE42: Found an estimated cost of 5 for instruction: %bitreverse ; AVX: Found an estimated cost of 5 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 1 for instruction: %bitreverse %bitreverse = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a) ret <16 x i8> %bitreverse @@ -145,9 +200,23 @@ define <32 x i8> @var_bitreverse_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v32i8': ; SSE2: Found an estimated cost of 40 for instruction: %bitreverse ; SSE42: Found an estimated cost of 10 for instruction: %bitreverse -; AVX: Found an estimated cost of 10 for instruction: %bitreverse +; AVX: Found an estimated cost of 12 for instruction: %bitreverse ; AVX2: Found an estimated cost of 5 for instruction: %bitreverse +; AVX512: Found an estimated cost of 5 for instruction: %bitreverse ; XOP: Found an estimated cost of 4 for instruction: %bitreverse %bitreverse = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a) ret <32 x i8> %bitreverse } + +define <64 x i8> @var_bitreverse_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_bitreverse_v64i8': +; SSE2: Found an estimated cost of 80 for instruction: %bitreverse +; SSE42: Found an estimated cost of 20 for instruction: %bitreverse +; AVX: Found an estimated cost of 24 for instruction: %bitreverse +; AVX2: Found an estimated cost of 10 for instruction: %bitreverse +; AVX512F: Found an estimated cost of 10 for instruction: %bitreverse +; AVX512BW: Found an estimated cost of 5 for instruction: %bitreverse +; XOP: Found an estimated cost of 8 for instruction: %bitreverse + %bitreverse = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %a) + ret <64 x i8> %bitreverse +} diff --git a/test/Analysis/CostModel/X86/ctbits-cost.ll b/test/Analysis/CostModel/X86/ctbits-cost.ll deleted file mode 100644 index 8c7fa9d73151d90de25be690fcf213c9a316b3f3..0000000000000000000000000000000000000000 --- a/test/Analysis/CostModel/X86/ctbits-cost.ll +++ /dev/null @@ -1,587 +0,0 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT - -; Verify the cost of scalar population count instructions. - -declare i64 @llvm.ctpop.i64(i64) -declare i32 @llvm.ctpop.i32(i32) -declare i16 @llvm.ctpop.i16(i16) -declare i8 @llvm.ctpop.i8(i8) - -define i64 @var_ctpop_i64(i64 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i64': -; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop -; POPCNT: Found an estimated cost of 1 for instruction: %ctpop - %ctpop = call i64 @llvm.ctpop.i64(i64 %a) - ret i64 %ctpop -} - -define i32 @var_ctpop_i32(i32 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i32': -; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop -; POPCNT: Found an estimated cost of 1 for instruction: %ctpop - %ctpop = call i32 @llvm.ctpop.i32(i32 %a) - ret i32 %ctpop -} - -define i16 @var_ctpop_i16(i16 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i16': -; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop -; POPCNT: Found an estimated cost of 1 for instruction: %ctpop - %ctpop = call i16 @llvm.ctpop.i16(i16 %a) - ret i16 %ctpop -} - -define i8 @var_ctpop_i8(i8 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i8': -; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop -; POPCNT: Found an estimated cost of 1 for instruction: %ctpop - %ctpop = call i8 @llvm.ctpop.i8(i8 %a) - ret i8 %ctpop -} - -; Verify the cost of vector population count instructions. - -declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) -declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) -declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) -declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) - -declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) -declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) -declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) -declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) - -define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64': -; SSE2: Found an estimated cost of 12 for instruction: %ctpop -; SSE42: Found an estimated cost of 7 for instruction: %ctpop -; AVX: Found an estimated cost of 7 for instruction: %ctpop - %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) - ret <2 x i64> %ctpop -} - -define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64': -; SSE2: Found an estimated cost of 24 for instruction: %ctpop -; SSE42: Found an estimated cost of 14 for instruction: %ctpop -; AVX1: Found an estimated cost of 14 for instruction: %ctpop -; AVX2: Found an estimated cost of 7 for instruction: %ctpop - %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) - ret <4 x i64> %ctpop -} - -define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32': -; SSE2: Found an estimated cost of 15 for instruction: %ctpop -; SSE42: Found an estimated cost of 11 for instruction: %ctpop -; AVX: Found an estimated cost of 11 for instruction: %ctpop - %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) - ret <4 x i32> %ctpop -} - -define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32': -; SSE2: Found an estimated cost of 30 for instruction: %ctpop -; SSE42: Found an estimated cost of 22 for instruction: %ctpop -; AVX1: Found an estimated cost of 22 for instruction: %ctpop -; AVX2: Found an estimated cost of 11 for instruction: %ctpop - %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) - ret <8 x i32> %ctpop -} - -define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16': -; SSE2: Found an estimated cost of 13 for instruction: %ctpop -; SSE42: Found an estimated cost of 9 for instruction: %ctpop -; AVX: Found an estimated cost of 9 for instruction: %ctpop - %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) - ret <8 x i16> %ctpop -} - -define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16': -; SSE2: Found an estimated cost of 26 for instruction: %ctpop -; SSE42: Found an estimated cost of 18 for instruction: %ctpop -; AVX1: Found an estimated cost of 18 for instruction: %ctpop -; AVX2: Found an estimated cost of 9 for instruction: %ctpop - %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a) - ret <16 x i16> %ctpop -} - -define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8': -; SSE2: Found an estimated cost of 10 for instruction: %ctpop -; SSE42: Found an estimated cost of 6 for instruction: %ctpop -; AVX: Found an estimated cost of 6 for instruction: %ctpop - %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) - ret <16 x i8> %ctpop -} - -define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8': -; SSE2: Found an estimated cost of 20 for instruction: %ctpop -; SSE42: Found an estimated cost of 12 for instruction: %ctpop -; AVX1: Found an estimated cost of 12 for instruction: %ctpop -; AVX2: Found an estimated cost of 6 for instruction: %ctpop - %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a) - ret <32 x i8> %ctpop -} - -; Verify the cost of scalar leading zero count instructions. - -declare i64 @llvm.ctlz.i64(i64, i1) -declare i32 @llvm.ctlz.i32(i32, i1) -declare i16 @llvm.ctlz.i16(i16, i1) -declare i8 @llvm.ctlz.i8(i8, i1) - -define i64 @var_ctlz_i64(i64 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0) - ret i64 %ctlz -} - -define i64 @var_ctlz_i64u(i64 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64u': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1) - ret i64 %ctlz -} - -define i32 @var_ctlz_i32(i32 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0) - ret i32 %ctlz -} - -define i32 @var_ctlz_i32u(i32 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32u': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1) - ret i32 %ctlz -} - -define i16 @var_ctlz_i16(i16 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0) - ret i16 %ctlz -} - -define i16 @var_ctlz_i16u(i16 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16u': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1) - ret i16 %ctlz -} - -define i8 @var_ctlz_i8(i8 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0) - ret i8 %ctlz -} - -define i8 @var_ctlz_i8u(i8 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8u': -; CHECK: Found an estimated cost of 1 for instruction: %ctlz - %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1) - ret i8 %ctlz -} - -; Verify the cost of vector leading zero count instructions. - -declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) -declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) -declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) -declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) - -declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) -declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) -declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) -declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) - -define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64': -; SSE2: Found an estimated cost of 25 for instruction: %ctlz -; SSE42: Found an estimated cost of 23 for instruction: %ctlz -; AVX: Found an estimated cost of 23 for instruction: %ctlz - %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0) - ret <2 x i64> %ctlz -} - -define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u': -; SSE2: Found an estimated cost of 25 for instruction: %ctlz -; SSE42: Found an estimated cost of 23 for instruction: %ctlz -; AVX: Found an estimated cost of 23 for instruction: %ctlz - %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1) - ret <2 x i64> %ctlz -} - -define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64': -; SSE2: Found an estimated cost of 50 for instruction: %ctlz -; SSE42: Found an estimated cost of 46 for instruction: %ctlz -; AVX1: Found an estimated cost of 46 for instruction: %ctlz -; AVX2: Found an estimated cost of 23 for instruction: %ctlz - %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0) - ret <4 x i64> %ctlz -} - -define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u': -; SSE2: Found an estimated cost of 50 for instruction: %ctlz -; SSE42: Found an estimated cost of 46 for instruction: %ctlz -; AVX1: Found an estimated cost of 46 for instruction: %ctlz -; AVX2: Found an estimated cost of 23 for instruction: %ctlz - %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1) - ret <4 x i64> %ctlz -} - -define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32': -; SSE2: Found an estimated cost of 26 for instruction: %ctlz -; SSE42: Found an estimated cost of 18 for instruction: %ctlz -; AVX: Found an estimated cost of 18 for instruction: %ctlz - %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0) - ret <4 x i32> %ctlz -} - -define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u': -; SSE2: Found an estimated cost of 26 for instruction: %ctlz -; SSE42: Found an estimated cost of 18 for instruction: %ctlz -; AVX: Found an estimated cost of 18 for instruction: %ctlz - %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1) - ret <4 x i32> %ctlz -} - -define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32': -; SSE2: Found an estimated cost of 52 for instruction: %ctlz -; SSE42: Found an estimated cost of 36 for instruction: %ctlz -; AVX1: Found an estimated cost of 36 for instruction: %ctlz -; AVX2: Found an estimated cost of 18 for instruction: %ctlz - %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0) - ret <8 x i32> %ctlz -} - -define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u': -; SSE2: Found an estimated cost of 52 for instruction: %ctlz -; SSE42: Found an estimated cost of 36 for instruction: %ctlz -; AVX1: Found an estimated cost of 36 for instruction: %ctlz -; AVX2: Found an estimated cost of 18 for instruction: %ctlz - %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1) - ret <8 x i32> %ctlz -} - -define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16': -; SSE2: Found an estimated cost of 20 for instruction: %ctlz -; SSE42: Found an estimated cost of 14 for instruction: %ctlz -; AVX: Found an estimated cost of 14 for instruction: %ctlz - %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0) - ret <8 x i16> %ctlz -} - -define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u': -; SSE2: Found an estimated cost of 20 for instruction: %ctlz -; SSE42: Found an estimated cost of 14 for instruction: %ctlz -; AVX: Found an estimated cost of 14 for instruction: %ctlz - %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1) - ret <8 x i16> %ctlz -} - -define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16': -; SSE2: Found an estimated cost of 40 for instruction: %ctlz -; SSE42: Found an estimated cost of 28 for instruction: %ctlz -; AVX1: Found an estimated cost of 28 for instruction: %ctlz -; AVX2: Found an estimated cost of 14 for instruction: %ctlz - %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0) - ret <16 x i16> %ctlz -} - -define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u': -; SSE2: Found an estimated cost of 40 for instruction: %ctlz -; SSE42: Found an estimated cost of 28 for instruction: %ctlz -; AVX1: Found an estimated cost of 28 for instruction: %ctlz -; AVX2: Found an estimated cost of 14 for instruction: %ctlz - %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1) - ret <16 x i16> %ctlz -} - -define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8': -; SSE2: Found an estimated cost of 17 for instruction: %ctlz -; SSE42: Found an estimated cost of 9 for instruction: %ctlz -; AVX: Found an estimated cost of 9 for instruction: %ctlz - %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0) - ret <16 x i8> %ctlz -} - -define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u': -; SSE2: Found an estimated cost of 17 for instruction: %ctlz -; SSE42: Found an estimated cost of 9 for instruction: %ctlz -; AVX: Found an estimated cost of 9 for instruction: %ctlz - %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1) - ret <16 x i8> %ctlz -} - -define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8': -; SSE2: Found an estimated cost of 34 for instruction: %ctlz -; SSE42: Found an estimated cost of 18 for instruction: %ctlz -; AVX1: Found an estimated cost of 18 for instruction: %ctlz -; AVX2: Found an estimated cost of 9 for instruction: %ctlz - %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0) - ret <32 x i8> %ctlz -} - -define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u': -; SSE2: Found an estimated cost of 34 for instruction: %ctlz -; SSE42: Found an estimated cost of 18 for instruction: %ctlz -; AVX1: Found an estimated cost of 18 for instruction: %ctlz -; AVX2: Found an estimated cost of 9 for instruction: %ctlz - %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1) - ret <32 x i8> %ctlz -} - -; Verify the cost of scalar trailing zero count instructions. - -declare i64 @llvm.cttz.i64(i64, i1) -declare i32 @llvm.cttz.i32(i32, i1) -declare i16 @llvm.cttz.i16(i16, i1) -declare i8 @llvm.cttz.i8(i8, i1) - -define i64 @var_cttz_i64(i64 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0) - ret i64 %cttz -} - -define i64 @var_cttz_i64u(i64 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64u': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1) - ret i64 %cttz -} - -define i32 @var_cttz_i32(i32 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0) - ret i32 %cttz -} - -define i32 @var_cttz_i32u(i32 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32u': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1) - ret i32 %cttz -} - -define i16 @var_cttz_i16(i16 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0) - ret i16 %cttz -} - -define i16 @var_cttz_i16u(i16 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16u': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1) - ret i16 %cttz -} - -define i8 @var_cttz_i8(i8 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0) - ret i8 %cttz -} - -define i8 @var_cttz_i8u(i8 %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8u': -; CHECK: Found an estimated cost of 1 for instruction: %cttz - %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1) - ret i8 %cttz -} - -; Verify the cost of vector trailing zero count instructions. - -declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) -declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) -declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) -declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) - -declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) -declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) -declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) -declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1) - -define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64': -; SSE2: Found an estimated cost of 14 for instruction: %cttz -; SSE42: Found an estimated cost of 10 for instruction: %cttz -; AVX: Found an estimated cost of 10 for instruction: %cttz - %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0) - ret <2 x i64> %cttz -} - -define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u': -; SSE2: Found an estimated cost of 14 for instruction: %cttz -; SSE42: Found an estimated cost of 10 for instruction: %cttz -; AVX: Found an estimated cost of 10 for instruction: %cttz - %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1) - ret <2 x i64> %cttz -} - -define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64': -; SSE2: Found an estimated cost of 28 for instruction: %cttz -; SSE42: Found an estimated cost of 20 for instruction: %cttz -; AVX1: Found an estimated cost of 20 for instruction: %cttz -; AVX2: Found an estimated cost of 10 for instruction: %cttz - %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0) - ret <4 x i64> %cttz -} - -define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u': -; SSE2: Found an estimated cost of 28 for instruction: %cttz -; SSE42: Found an estimated cost of 20 for instruction: %cttz -; AVX1: Found an estimated cost of 20 for instruction: %cttz -; AVX2: Found an estimated cost of 10 for instruction: %cttz - %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1) - ret <4 x i64> %cttz -} - -define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32': -; SSE2: Found an estimated cost of 18 for instruction: %cttz -; SSE42: Found an estimated cost of 14 for instruction: %cttz -; AVX: Found an estimated cost of 14 for instruction: %cttz - %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0) - ret <4 x i32> %cttz -} - -define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u': -; SSE2: Found an estimated cost of 18 for instruction: %cttz -; SSE42: Found an estimated cost of 14 for instruction: %cttz -; AVX: Found an estimated cost of 14 for instruction: %cttz - %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1) - ret <4 x i32> %cttz -} - -define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32': -; SSE2: Found an estimated cost of 36 for instruction: %cttz -; SSE42: Found an estimated cost of 28 for instruction: %cttz -; AVX1: Found an estimated cost of 28 for instruction: %cttz -; AVX2: Found an estimated cost of 14 for instruction: %cttz - %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0) - ret <8 x i32> %cttz -} - -define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u': -; SSE2: Found an estimated cost of 36 for instruction: %cttz -; SSE42: Found an estimated cost of 28 for instruction: %cttz -; AVX1: Found an estimated cost of 28 for instruction: %cttz -; AVX2: Found an estimated cost of 14 for instruction: %cttz - %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1) - ret <8 x i32> %cttz -} - -define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16': -; SSE2: Found an estimated cost of 16 for instruction: %cttz -; SSE42: Found an estimated cost of 12 for instruction: %cttz -; AVX: Found an estimated cost of 12 for instruction: %cttz - %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0) - ret <8 x i16> %cttz -} - -define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u': -; SSE2: Found an estimated cost of 16 for instruction: %cttz -; SSE42: Found an estimated cost of 12 for instruction: %cttz -; AVX: Found an estimated cost of 12 for instruction: %cttz - %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1) - ret <8 x i16> %cttz -} - -define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16': -; SSE2: Found an estimated cost of 32 for instruction: %cttz -; SSE42: Found an estimated cost of 24 for instruction: %cttz -; AVX1: Found an estimated cost of 24 for instruction: %cttz -; AVX2: Found an estimated cost of 12 for instruction: %cttz - %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0) - ret <16 x i16> %cttz -} - -define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u': -; SSE2: Found an estimated cost of 32 for instruction: %cttz -; SSE42: Found an estimated cost of 24 for instruction: %cttz -; AVX1: Found an estimated cost of 24 for instruction: %cttz -; AVX2: Found an estimated cost of 12 for instruction: %cttz - %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1) - ret <16 x i16> %cttz -} - -define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8': -; SSE2: Found an estimated cost of 13 for instruction: %cttz -; SSE42: Found an estimated cost of 9 for instruction: %cttz -; AVX: Found an estimated cost of 9 for instruction: %cttz - %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0) - ret <16 x i8> %cttz -} - -define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u': -; SSE2: Found an estimated cost of 13 for instruction: %cttz -; SSE42: Found an estimated cost of 9 for instruction: %cttz -; AVX: Found an estimated cost of 9 for instruction: %cttz - %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1) - ret <16 x i8> %cttz -} - -define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8': -; SSE2: Found an estimated cost of 26 for instruction: %cttz -; SSE42: Found an estimated cost of 18 for instruction: %cttz -; AVX1: Found an estimated cost of 18 for instruction: %cttz -; AVX2: Found an estimated cost of 9 for instruction: %cttz - %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0) - ret <32 x i8> %cttz -} - -define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { -; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u': -; SSE2: Found an estimated cost of 26 for instruction: %cttz -; SSE42: Found an estimated cost of 18 for instruction: %cttz -; AVX1: Found an estimated cost of 18 for instruction: %cttz -; AVX2: Found an estimated cost of 9 for instruction: %cttz - %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1) - ret <32 x i8> %cttz -} diff --git a/test/Analysis/CostModel/X86/ctlz.ll b/test/Analysis/CostModel/X86/ctlz.ll new file mode 100644 index 0000000000000000000000000000000000000000..769d73915e3683a6fa34e111549848ae8becd499 --- /dev/null +++ b/test/Analysis/CostModel/X86/ctlz.ll @@ -0,0 +1,377 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -mattr=-avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -mattr=+avx512cd -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512CD + +; Verify the cost of scalar leading zero count instructions. + +declare i64 @llvm.ctlz.i64(i64, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i8 @llvm.ctlz.i8(i8, i1) + +define i64 @var_ctlz_i64(i64 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 0) + ret i64 %ctlz +} + +define i64 @var_ctlz_i64u(i64 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i64u': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i64 @llvm.ctlz.i64(i64 %a, i1 1) + ret i64 %ctlz +} + +define i32 @var_ctlz_i32(i32 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 0) + ret i32 %ctlz +} + +define i32 @var_ctlz_i32u(i32 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i32u': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i32 @llvm.ctlz.i32(i32 %a, i1 1) + ret i32 %ctlz +} + +define i16 @var_ctlz_i16(i16 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 0) + ret i16 %ctlz +} + +define i16 @var_ctlz_i16u(i16 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i16u': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i16 @llvm.ctlz.i16(i16 %a, i1 1) + ret i16 %ctlz +} + +define i8 @var_ctlz_i8(i8 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 0) + ret i8 %ctlz +} + +define i8 @var_ctlz_i8u(i8 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_i8u': +; CHECK: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call i8 @llvm.ctlz.i8(i8 %a, i1 1) + ret i8 %ctlz +} + +; Verify the cost of vector leading zero count instructions. + +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) + +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) +declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) +declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) + +declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) +declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) +declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1) +declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1) + +define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64': +; SSE2: Found an estimated cost of 25 for instruction: %ctlz +; SSE42: Found an estimated cost of 23 for instruction: %ctlz +; AVX: Found an estimated cost of 23 for instruction: %ctlz +; AVX512: Found an estimated cost of 23 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0) + ret <2 x i64> %ctlz +} + +define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u': +; SSE2: Found an estimated cost of 25 for instruction: %ctlz +; SSE42: Found an estimated cost of 23 for instruction: %ctlz +; AVX: Found an estimated cost of 23 for instruction: %ctlz +; AVX512: Found an estimated cost of 23 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1) + ret <2 x i64> %ctlz +} + +define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64': +; SSE2: Found an estimated cost of 50 for instruction: %ctlz +; SSE42: Found an estimated cost of 46 for instruction: %ctlz +; AVX1: Found an estimated cost of 48 for instruction: %ctlz +; AVX2: Found an estimated cost of 23 for instruction: %ctlz +; AVX512: Found an estimated cost of 23 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0) + ret <4 x i64> %ctlz +} + +define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u': +; SSE2: Found an estimated cost of 50 for instruction: %ctlz +; SSE42: Found an estimated cost of 46 for instruction: %ctlz +; AVX1: Found an estimated cost of 48 for instruction: %ctlz +; AVX2: Found an estimated cost of 23 for instruction: %ctlz +; AVX512: Found an estimated cost of 23 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1) + ret <4 x i64> %ctlz +} + +define <8 x i64> @var_ctlz_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i64': +; SSE2: Found an estimated cost of 100 for instruction: %ctlz +; SSE42: Found an estimated cost of 92 for instruction: %ctlz +; AVX1: Found an estimated cost of 96 for instruction: %ctlz +; AVX2: Found an estimated cost of 46 for instruction: %ctlz +; AVX512F: Found an estimated cost of 29 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 23 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 0) + ret <8 x i64> %ctlz +} + +define <8 x i64> @var_ctlz_v8i64u(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i64u': +; SSE2: Found an estimated cost of 100 for instruction: %ctlz +; SSE42: Found an estimated cost of 92 for instruction: %ctlz +; AVX1: Found an estimated cost of 96 for instruction: %ctlz +; AVX2: Found an estimated cost of 46 for instruction: %ctlz +; AVX512F: Found an estimated cost of 29 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 23 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 1) + ret <8 x i64> %ctlz +} + +define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32': +; SSE2: Found an estimated cost of 26 for instruction: %ctlz +; SSE42: Found an estimated cost of 18 for instruction: %ctlz +; AVX: Found an estimated cost of 18 for instruction: %ctlz +; AVX512: Found an estimated cost of 18 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0) + ret <4 x i32> %ctlz +} + +define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u': +; SSE2: Found an estimated cost of 26 for instruction: %ctlz +; SSE42: Found an estimated cost of 18 for instruction: %ctlz +; AVX: Found an estimated cost of 18 for instruction: %ctlz +; AVX512: Found an estimated cost of 18 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1) + ret <4 x i32> %ctlz +} + +define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32': +; SSE2: Found an estimated cost of 52 for instruction: %ctlz +; SSE42: Found an estimated cost of 36 for instruction: %ctlz +; AVX1: Found an estimated cost of 38 for instruction: %ctlz +; AVX2: Found an estimated cost of 18 for instruction: %ctlz +; AVX512: Found an estimated cost of 18 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0) + ret <8 x i32> %ctlz +} + +define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u': +; SSE2: Found an estimated cost of 52 for instruction: %ctlz +; SSE42: Found an estimated cost of 36 for instruction: %ctlz +; AVX1: Found an estimated cost of 38 for instruction: %ctlz +; AVX2: Found an estimated cost of 18 for instruction: %ctlz +; AVX512: Found an estimated cost of 18 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1) + ret <8 x i32> %ctlz +} + +define <16 x i32> @var_ctlz_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i32': +; SSE2: Found an estimated cost of 104 for instruction: %ctlz +; SSE42: Found an estimated cost of 72 for instruction: %ctlz +; AVX1: Found an estimated cost of 76 for instruction: %ctlz +; AVX2: Found an estimated cost of 36 for instruction: %ctlz +; AVX512F: Found an estimated cost of 35 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 22 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 0) + ret <16 x i32> %ctlz +} + +define <16 x i32> @var_ctlz_v16i32u(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i32u': +; SSE2: Found an estimated cost of 104 for instruction: %ctlz +; SSE42: Found an estimated cost of 72 for instruction: %ctlz +; AVX1: Found an estimated cost of 76 for instruction: %ctlz +; AVX2: Found an estimated cost of 36 for instruction: %ctlz +; AVX512F: Found an estimated cost of 35 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 22 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 1 for instruction: %ctlz + %ctlz = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 1) + ret <16 x i32> %ctlz +} + +define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16': +; SSE2: Found an estimated cost of 20 for instruction: %ctlz +; SSE42: Found an estimated cost of 14 for instruction: %ctlz +; AVX: Found an estimated cost of 14 for instruction: %ctlz +; AVX512: Found an estimated cost of 14 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz + %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0) + ret <8 x i16> %ctlz +} + +define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u': +; SSE2: Found an estimated cost of 20 for instruction: %ctlz +; SSE42: Found an estimated cost of 14 for instruction: %ctlz +; AVX: Found an estimated cost of 14 for instruction: %ctlz +; AVX512: Found an estimated cost of 14 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz + %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1) + ret <8 x i16> %ctlz +} + +define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16': +; SSE2: Found an estimated cost of 40 for instruction: %ctlz +; SSE42: Found an estimated cost of 28 for instruction: %ctlz +; AVX1: Found an estimated cost of 30 for instruction: %ctlz +; AVX2: Found an estimated cost of 14 for instruction: %ctlz +; AVX512: Found an estimated cost of 14 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz + %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0) + ret <16 x i16> %ctlz +} + +define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u': +; SSE2: Found an estimated cost of 40 for instruction: %ctlz +; SSE42: Found an estimated cost of 28 for instruction: %ctlz +; AVX1: Found an estimated cost of 30 for instruction: %ctlz +; AVX2: Found an estimated cost of 14 for instruction: %ctlz +; AVX512: Found an estimated cost of 14 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz + %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1) + ret <16 x i16> %ctlz +} + +define <32 x i16> @var_ctlz_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i16': +; SSE2: Found an estimated cost of 80 for instruction: %ctlz +; SSE42: Found an estimated cost of 56 for instruction: %ctlz +; AVX1: Found an estimated cost of 60 for instruction: %ctlz +; AVX2: Found an estimated cost of 28 for instruction: %ctlz +; AVX512F: Found an estimated cost of 28 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 18 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 8 for instruction: %ctlz + %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 0) + ret <32 x i16> %ctlz +} + +define <32 x i16> @var_ctlz_v32i16u(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i16u': +; SSE2: Found an estimated cost of 80 for instruction: %ctlz +; SSE42: Found an estimated cost of 56 for instruction: %ctlz +; AVX1: Found an estimated cost of 60 for instruction: %ctlz +; AVX2: Found an estimated cost of 28 for instruction: %ctlz +; AVX512F: Found an estimated cost of 28 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 18 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 8 for instruction: %ctlz + %ctlz = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %a, i1 1) + ret <32 x i16> %ctlz +} + +define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8': +; SSE2: Found an estimated cost of 17 for instruction: %ctlz +; SSE42: Found an estimated cost of 9 for instruction: %ctlz +; AVX: Found an estimated cost of 9 for instruction: %ctlz +; AVX512: Found an estimated cost of 9 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz + %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0) + ret <16 x i8> %ctlz +} + +define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u': +; SSE2: Found an estimated cost of 17 for instruction: %ctlz +; SSE42: Found an estimated cost of 9 for instruction: %ctlz +; AVX: Found an estimated cost of 9 for instruction: %ctlz +; AVX512: Found an estimated cost of 9 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 4 for instruction: %ctlz + %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1) + ret <16 x i8> %ctlz +} + +define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8': +; SSE2: Found an estimated cost of 34 for instruction: %ctlz +; SSE42: Found an estimated cost of 18 for instruction: %ctlz +; AVX1: Found an estimated cost of 20 for instruction: %ctlz +; AVX2: Found an estimated cost of 9 for instruction: %ctlz +; AVX512: Found an estimated cost of 9 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 10 for instruction: %ctlz + %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0) + ret <32 x i8> %ctlz +} + +define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u': +; SSE2: Found an estimated cost of 34 for instruction: %ctlz +; SSE42: Found an estimated cost of 18 for instruction: %ctlz +; AVX1: Found an estimated cost of 20 for instruction: %ctlz +; AVX2: Found an estimated cost of 9 for instruction: %ctlz +; AVX512: Found an estimated cost of 9 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 10 for instruction: %ctlz + %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1) + ret <32 x i8> %ctlz +} + +define <64 x i8> @var_ctlz_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v64i8': +; SSE2: Found an estimated cost of 68 for instruction: %ctlz +; SSE42: Found an estimated cost of 36 for instruction: %ctlz +; AVX1: Found an estimated cost of 40 for instruction: %ctlz +; AVX2: Found an estimated cost of 18 for instruction: %ctlz +; AVX512F: Found an estimated cost of 18 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 17 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 20 for instruction: %ctlz + %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 0) + ret <64 x i8> %ctlz +} + +define <64 x i8> @var_ctlz_v64i8u(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v64i8u': +; SSE2: Found an estimated cost of 68 for instruction: %ctlz +; SSE42: Found an estimated cost of 36 for instruction: %ctlz +; AVX1: Found an estimated cost of 40 for instruction: %ctlz +; AVX2: Found an estimated cost of 18 for instruction: %ctlz +; AVX512F: Found an estimated cost of 18 for instruction: %ctlz +; AVX512BW: Found an estimated cost of 17 for instruction: %ctlz +; AVX512CD: Found an estimated cost of 20 for instruction: %ctlz + %ctlz = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %a, i1 1) + ret <64 x i8> %ctlz +} diff --git a/test/Analysis/CostModel/X86/ctpop.ll b/test/Analysis/CostModel/X86/ctpop.ll new file mode 100644 index 0000000000000000000000000000000000000000..e6a14e98e37a5df21619adbf636f7490a429a8bc --- /dev/null +++ b/test/Analysis/CostModel/X86/ctpop.ll @@ -0,0 +1,196 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 -check-prefix=NOPOPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F -check-prefix=POPCNT +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW -check-prefix=POPCNT + +; Verify the cost of scalar population count instructions. + +declare i64 @llvm.ctpop.i64(i64) +declare i32 @llvm.ctpop.i32(i32) +declare i16 @llvm.ctpop.i16(i16) +declare i8 @llvm.ctpop.i8(i8) + +define i64 @var_ctpop_i64(i64 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i64': +; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop +; POPCNT: Found an estimated cost of 1 for instruction: %ctpop + %ctpop = call i64 @llvm.ctpop.i64(i64 %a) + ret i64 %ctpop +} + +define i32 @var_ctpop_i32(i32 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i32': +; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop +; POPCNT: Found an estimated cost of 1 for instruction: %ctpop + %ctpop = call i32 @llvm.ctpop.i32(i32 %a) + ret i32 %ctpop +} + +define i16 @var_ctpop_i16(i16 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i16': +; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop +; POPCNT: Found an estimated cost of 1 for instruction: %ctpop + %ctpop = call i16 @llvm.ctpop.i16(i16 %a) + ret i16 %ctpop +} + +define i8 @var_ctpop_i8(i8 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_i8': +; NOPOPCNT: Found an estimated cost of 4 for instruction: %ctpop +; POPCNT: Found an estimated cost of 1 for instruction: %ctpop + %ctpop = call i8 @llvm.ctpop.i8(i8 %a) + ret i8 %ctpop +} + +; Verify the cost of vector population count instructions. + +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) +declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) + +declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) +declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) +declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) +declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) + +declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) +declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) +declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>) +declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>) + +define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64': +; SSE2: Found an estimated cost of 12 for instruction: %ctpop +; SSE42: Found an estimated cost of 7 for instruction: %ctpop +; AVX: Found an estimated cost of 7 for instruction: %ctpop +; AVX512: Found an estimated cost of 7 for instruction: %ctpop + %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) + ret <2 x i64> %ctpop +} + +define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64': +; SSE2: Found an estimated cost of 24 for instruction: %ctpop +; SSE42: Found an estimated cost of 14 for instruction: %ctpop +; AVX1: Found an estimated cost of 16 for instruction: %ctpop +; AVX2: Found an estimated cost of 7 for instruction: %ctpop +; AVX512: Found an estimated cost of 7 for instruction: %ctpop + %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) + ret <4 x i64> %ctpop +} + +define <8 x i64> @var_ctpop_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i64': +; SSE2: Found an estimated cost of 48 for instruction: %ctpop +; SSE42: Found an estimated cost of 28 for instruction: %ctpop +; AVX1: Found an estimated cost of 32 for instruction: %ctpop +; AVX2: Found an estimated cost of 14 for instruction: %ctpop +; AVX512F: Found an estimated cost of 16 for instruction: %ctpop +; AVX512BW: Found an estimated cost of 7 for instruction: %ctpop + %ctpop = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a) + ret <8 x i64> %ctpop +} + +define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32': +; SSE2: Found an estimated cost of 15 for instruction: %ctpop +; SSE42: Found an estimated cost of 11 for instruction: %ctpop +; AVX: Found an estimated cost of 11 for instruction: %ctpop +; AVX512: Found an estimated cost of 11 for instruction: %ctpop + %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) + ret <4 x i32> %ctpop +} + +define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32': +; SSE2: Found an estimated cost of 30 for instruction: %ctpop +; SSE42: Found an estimated cost of 22 for instruction: %ctpop +; AVX1: Found an estimated cost of 24 for instruction: %ctpop +; AVX2: Found an estimated cost of 11 for instruction: %ctpop +; AVX512: Found an estimated cost of 11 for instruction: %ctpop + %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) + ret <8 x i32> %ctpop +} + +define <16 x i32> @var_ctpop_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i32': +; SSE2: Found an estimated cost of 60 for instruction: %ctpop +; SSE42: Found an estimated cost of 44 for instruction: %ctpop +; AVX1: Found an estimated cost of 48 for instruction: %ctpop +; AVX2: Found an estimated cost of 22 for instruction: %ctpop +; AVX512F: Found an estimated cost of 24 for instruction: %ctpop +; AVX512BW: Found an estimated cost of 11 for instruction: %ctpop + %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a) + ret <16 x i32> %ctpop +} + +define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16': +; SSE2: Found an estimated cost of 13 for instruction: %ctpop +; SSE42: Found an estimated cost of 9 for instruction: %ctpop +; AVX: Found an estimated cost of 9 for instruction: %ctpop +; AVX512: Found an estimated cost of 9 for instruction: %ctpop + %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) + ret <8 x i16> %ctpop +} + +define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16': +; SSE2: Found an estimated cost of 26 for instruction: %ctpop +; SSE42: Found an estimated cost of 18 for instruction: %ctpop +; AVX1: Found an estimated cost of 20 for instruction: %ctpop +; AVX2: Found an estimated cost of 9 for instruction: %ctpop +; AVX512: Found an estimated cost of 9 for instruction: %ctpop + %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a) + ret <16 x i16> %ctpop +} + +define <32 x i16> @var_ctpop_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i16': +; SSE2: Found an estimated cost of 52 for instruction: %ctpop +; SSE42: Found an estimated cost of 36 for instruction: %ctpop +; AVX1: Found an estimated cost of 40 for instruction: %ctpop +; AVX2: Found an estimated cost of 18 for instruction: %ctpop +; AVX512F: Found an estimated cost of 18 for instruction: %ctpop +; AVX512BW: Found an estimated cost of 9 for instruction: %ctpop + %ctpop = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %a) + ret <32 x i16> %ctpop +} + +define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8': +; SSE2: Found an estimated cost of 10 for instruction: %ctpop +; SSE42: Found an estimated cost of 6 for instruction: %ctpop +; AVX: Found an estimated cost of 6 for instruction: %ctpop +; AVX512: Found an estimated cost of 6 for instruction: %ctpop + %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) + ret <16 x i8> %ctpop +} + +define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8': +; SSE2: Found an estimated cost of 20 for instruction: %ctpop +; SSE42: Found an estimated cost of 12 for instruction: %ctpop +; AVX1: Found an estimated cost of 14 for instruction: %ctpop +; AVX2: Found an estimated cost of 6 for instruction: %ctpop +; AVX512: Found an estimated cost of 6 for instruction: %ctpop + %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a) + ret <32 x i8> %ctpop +} + +define <64 x i8> @var_ctpop_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v64i8': +; SSE2: Found an estimated cost of 40 for instruction: %ctpop +; SSE42: Found an estimated cost of 24 for instruction: %ctpop +; AVX1: Found an estimated cost of 28 for instruction: %ctpop +; AVX2: Found an estimated cost of 12 for instruction: %ctpop +; AVX512F: Found an estimated cost of 12 for instruction: %ctpop +; AVX512BW: Found an estimated cost of 6 for instruction: %ctpop + %ctpop = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %a) + ret <64 x i8> %ctpop +} diff --git a/test/Analysis/CostModel/X86/cttz.ll b/test/Analysis/CostModel/X86/cttz.ll new file mode 100644 index 0000000000000000000000000000000000000000..e7a39781385e2fa8f951bf916d08a7c2a13a6e12 --- /dev/null +++ b/test/Analysis/CostModel/X86/cttz.ll @@ -0,0 +1,352 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512F +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX512 -check-prefix=AVX512BW + +; Verify the cost of scalar trailing zero count instructions. + +declare i64 @llvm.cttz.i64(i64, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i8 @llvm.cttz.i8(i8, i1) + +define i64 @var_cttz_i64(i64 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 0) + ret i64 %cttz +} + +define i64 @var_cttz_i64u(i64 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i64u': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i64 @llvm.cttz.i64(i64 %a, i1 1) + ret i64 %cttz +} + +define i32 @var_cttz_i32(i32 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 0) + ret i32 %cttz +} + +define i32 @var_cttz_i32u(i32 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i32u': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i32 @llvm.cttz.i32(i32 %a, i1 1) + ret i32 %cttz +} + +define i16 @var_cttz_i16(i16 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 0) + ret i16 %cttz +} + +define i16 @var_cttz_i16u(i16 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i16u': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 1) + ret i16 %cttz +} + +define i8 @var_cttz_i8(i8 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 0) + ret i8 %cttz +} + +define i8 @var_cttz_i8u(i8 %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_i8u': +; CHECK: Found an estimated cost of 1 for instruction: %cttz + %cttz = call i8 @llvm.cttz.i8(i8 %a, i1 1) + ret i8 %cttz +} + +; Verify the cost of vector trailing zero count instructions. + +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) +declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) +declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) + +declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) +declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) +declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) +declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1) + +declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1) +declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) +declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1) +declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1) + +define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64': +; SSE2: Found an estimated cost of 14 for instruction: %cttz +; SSE42: Found an estimated cost of 10 for instruction: %cttz +; AVX: Found an estimated cost of 10 for instruction: %cttz +; AVX512: Found an estimated cost of 10 for instruction: %cttz + %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0) + ret <2 x i64> %cttz +} + +define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u': +; SSE2: Found an estimated cost of 14 for instruction: %cttz +; SSE42: Found an estimated cost of 10 for instruction: %cttz +; AVX: Found an estimated cost of 10 for instruction: %cttz +; AVX512: Found an estimated cost of 10 for instruction: %cttz + %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1) + ret <2 x i64> %cttz +} + +define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64': +; SSE2: Found an estimated cost of 28 for instruction: %cttz +; SSE42: Found an estimated cost of 20 for instruction: %cttz +; AVX1: Found an estimated cost of 22 for instruction: %cttz +; AVX2: Found an estimated cost of 10 for instruction: %cttz +; AVX512: Found an estimated cost of 10 for instruction: %cttz + %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0) + ret <4 x i64> %cttz +} + +define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u': +; SSE2: Found an estimated cost of 28 for instruction: %cttz +; SSE42: Found an estimated cost of 20 for instruction: %cttz +; AVX1: Found an estimated cost of 22 for instruction: %cttz +; AVX2: Found an estimated cost of 10 for instruction: %cttz +; AVX512: Found an estimated cost of 10 for instruction: %cttz + %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1) + ret <4 x i64> %cttz +} + +define <8 x i64> @var_cttz_v8i64(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i64': +; SSE2: Found an estimated cost of 56 for instruction: %cttz +; SSE42: Found an estimated cost of 40 for instruction: %cttz +; AVX1: Found an estimated cost of 44 for instruction: %cttz +; AVX2: Found an estimated cost of 20 for instruction: %cttz +; AVX512F: Found an estimated cost of 20 for instruction: %cttz +; AVX512BW: Found an estimated cost of 10 for instruction: %cttz + %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 0) + ret <8 x i64> %cttz +} + +define <8 x i64> @var_cttz_v8i64u(<8 x i64> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i64u': +; SSE2: Found an estimated cost of 56 for instruction: %cttz +; SSE42: Found an estimated cost of 40 for instruction: %cttz +; AVX1: Found an estimated cost of 44 for instruction: %cttz +; AVX2: Found an estimated cost of 20 for instruction: %cttz +; AVX512F: Found an estimated cost of 20 for instruction: %cttz +; AVX512BW: Found an estimated cost of 10 for instruction: %cttz + %cttz = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %a, i1 1) + ret <8 x i64> %cttz +} + +define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32': +; SSE2: Found an estimated cost of 18 for instruction: %cttz +; SSE42: Found an estimated cost of 14 for instruction: %cttz +; AVX: Found an estimated cost of 14 for instruction: %cttz +; AVX512: Found an estimated cost of 14 for instruction: %cttz + %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0) + ret <4 x i32> %cttz +} + +define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u': +; SSE2: Found an estimated cost of 18 for instruction: %cttz +; SSE42: Found an estimated cost of 14 for instruction: %cttz +; AVX: Found an estimated cost of 14 for instruction: %cttz +; AVX512: Found an estimated cost of 14 for instruction: %cttz + %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1) + ret <4 x i32> %cttz +} + +define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32': +; SSE2: Found an estimated cost of 36 for instruction: %cttz +; SSE42: Found an estimated cost of 28 for instruction: %cttz +; AVX1: Found an estimated cost of 30 for instruction: %cttz +; AVX2: Found an estimated cost of 14 for instruction: %cttz +; AVX512: Found an estimated cost of 14 for instruction: %cttz + %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0) + ret <8 x i32> %cttz +} + +define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u': +; SSE2: Found an estimated cost of 36 for instruction: %cttz +; SSE42: Found an estimated cost of 28 for instruction: %cttz +; AVX1: Found an estimated cost of 30 for instruction: %cttz +; AVX2: Found an estimated cost of 14 for instruction: %cttz +; AVX512: Found an estimated cost of 14 for instruction: %cttz + %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1) + ret <8 x i32> %cttz +} + +define <16 x i32> @var_cttz_v16i32(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i32': +; SSE2: Found an estimated cost of 72 for instruction: %cttz +; SSE42: Found an estimated cost of 56 for instruction: %cttz +; AVX1: Found an estimated cost of 60 for instruction: %cttz +; AVX2: Found an estimated cost of 28 for instruction: %cttz +; AVX512F: Found an estimated cost of 28 for instruction: %cttz +; AVX512BW: Found an estimated cost of 14 for instruction: %cttz + %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 0) + ret <16 x i32> %cttz +} + +define <16 x i32> @var_cttz_v16i32u(<16 x i32> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i32u': +; SSE2: Found an estimated cost of 72 for instruction: %cttz +; SSE42: Found an estimated cost of 56 for instruction: %cttz +; AVX1: Found an estimated cost of 60 for instruction: %cttz +; AVX2: Found an estimated cost of 28 for instruction: %cttz +; AVX512F: Found an estimated cost of 28 for instruction: %cttz +; AVX512BW: Found an estimated cost of 14 for instruction: %cttz + %cttz = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %a, i1 1) + ret <16 x i32> %cttz +} + +define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16': +; SSE2: Found an estimated cost of 16 for instruction: %cttz +; SSE42: Found an estimated cost of 12 for instruction: %cttz +; AVX: Found an estimated cost of 12 for instruction: %cttz +; AVX512: Found an estimated cost of 12 for instruction: %cttz + %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0) + ret <8 x i16> %cttz +} + +define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u': +; SSE2: Found an estimated cost of 16 for instruction: %cttz +; SSE42: Found an estimated cost of 12 for instruction: %cttz +; AVX: Found an estimated cost of 12 for instruction: %cttz +; AVX512: Found an estimated cost of 12 for instruction: %cttz + %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1) + ret <8 x i16> %cttz +} + +define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16': +; SSE2: Found an estimated cost of 32 for instruction: %cttz +; SSE42: Found an estimated cost of 24 for instruction: %cttz +; AVX1: Found an estimated cost of 26 for instruction: %cttz +; AVX2: Found an estimated cost of 12 for instruction: %cttz +; AVX512: Found an estimated cost of 12 for instruction: %cttz + %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0) + ret <16 x i16> %cttz +} + +define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u': +; SSE2: Found an estimated cost of 32 for instruction: %cttz +; SSE42: Found an estimated cost of 24 for instruction: %cttz +; AVX1: Found an estimated cost of 26 for instruction: %cttz +; AVX2: Found an estimated cost of 12 for instruction: %cttz +; AVX512: Found an estimated cost of 12 for instruction: %cttz + %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1) + ret <16 x i16> %cttz +} + +define <32 x i16> @var_cttz_v32i16(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i16': +; SSE2: Found an estimated cost of 64 for instruction: %cttz +; SSE42: Found an estimated cost of 48 for instruction: %cttz +; AVX1: Found an estimated cost of 52 for instruction: %cttz +; AVX2: Found an estimated cost of 24 for instruction: %cttz +; AVX512F: Found an estimated cost of 24 for instruction: %cttz +; AVX512BW: Found an estimated cost of 12 for instruction: %cttz + %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 0) + ret <32 x i16> %cttz +} + +define <32 x i16> @var_cttz_v32i16u(<32 x i16> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i16u': +; SSE2: Found an estimated cost of 64 for instruction: %cttz +; SSE42: Found an estimated cost of 48 for instruction: %cttz +; AVX1: Found an estimated cost of 52 for instruction: %cttz +; AVX2: Found an estimated cost of 24 for instruction: %cttz +; AVX512F: Found an estimated cost of 24 for instruction: %cttz +; AVX512BW: Found an estimated cost of 12 for instruction: %cttz + %cttz = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %a, i1 1) + ret <32 x i16> %cttz +} + +define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8': +; SSE2: Found an estimated cost of 13 for instruction: %cttz +; SSE42: Found an estimated cost of 9 for instruction: %cttz +; AVX: Found an estimated cost of 9 for instruction: %cttz +; AVX512: Found an estimated cost of 9 for instruction: %cttz + %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0) + ret <16 x i8> %cttz +} + +define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u': +; SSE2: Found an estimated cost of 13 for instruction: %cttz +; SSE42: Found an estimated cost of 9 for instruction: %cttz +; AVX: Found an estimated cost of 9 for instruction: %cttz +; AVX512: Found an estimated cost of 9 for instruction: %cttz + %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1) + ret <16 x i8> %cttz +} + +define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8': +; SSE2: Found an estimated cost of 26 for instruction: %cttz +; SSE42: Found an estimated cost of 18 for instruction: %cttz +; AVX1: Found an estimated cost of 20 for instruction: %cttz +; AVX2: Found an estimated cost of 9 for instruction: %cttz +; AVX512: Found an estimated cost of 9 for instruction: %cttz + %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0) + ret <32 x i8> %cttz +} + +define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u': +; SSE2: Found an estimated cost of 26 for instruction: %cttz +; SSE42: Found an estimated cost of 18 for instruction: %cttz +; AVX1: Found an estimated cost of 20 for instruction: %cttz +; AVX2: Found an estimated cost of 9 for instruction: %cttz +; AVX512: Found an estimated cost of 9 for instruction: %cttz + %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1) + ret <32 x i8> %cttz +} + +define <64 x i8> @var_cttz_v64i8(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v64i8': +; SSE2: Found an estimated cost of 52 for instruction: %cttz +; SSE42: Found an estimated cost of 36 for instruction: %cttz +; AVX1: Found an estimated cost of 40 for instruction: %cttz +; AVX2: Found an estimated cost of 18 for instruction: %cttz +; AVX512F: Found an estimated cost of 18 for instruction: %cttz +; AVX512BW: Found an estimated cost of 9 for instruction: %cttz + %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 0) + ret <64 x i8> %cttz +} + +define <64 x i8> @var_cttz_v64i8u(<64 x i8> %a) { +; CHECK: 'Cost Model Analysis' for function 'var_cttz_v64i8u': +; SSE2: Found an estimated cost of 52 for instruction: %cttz +; SSE42: Found an estimated cost of 36 for instruction: %cttz +; AVX1: Found an estimated cost of 40 for instruction: %cttz +; AVX2: Found an estimated cost of 18 for instruction: %cttz +; AVX512F: Found an estimated cost of 18 for instruction: %cttz +; AVX512BW: Found an estimated cost of 9 for instruction: %cttz + %cttz = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %a, i1 1) + ret <64 x i8> %cttz +} diff --git a/test/Analysis/CostModel/X86/div.ll b/test/Analysis/CostModel/X86/div.ll index 0ac06ff75ebec97bea587fc24cca9576a5602137..dabaaef3596ab4bdc70fed26640137b957263654 100644 --- a/test/Analysis/CostModel/X86/div.ll +++ b/test/Analysis/CostModel/X86/div.ll @@ -139,14 +139,14 @@ define i32 @sdiv_uniformconst() { ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv - ; AVX1: cost of 30 {{.*}} %V8i32 = sdiv + ; AVX1: cost of 32 {{.*}} %V8i32 = sdiv ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv %V8i32 = sdiv <8 x i32> undef, ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv - ; AVX1: cost of 60 {{.*}} %V16i32 = sdiv + ; AVX1: cost of 64 {{.*}} %V16i32 = sdiv ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv ; AVX512: cost of 15 {{.*}} %V16i32 = sdiv %V16i32 = sdiv <16 x i32> undef, @@ -157,12 +157,12 @@ define i32 @sdiv_uniformconst() { ; AVX: cost of 6 {{.*}} %V8i16 = sdiv %V8i16 = sdiv <8 x i16> undef, ; SSE: cost of 12 {{.*}} %V16i16 = sdiv - ; AVX1: cost of 12 {{.*}} %V16i16 = sdiv + ; AVX1: cost of 14 {{.*}} %V16i16 = sdiv ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv %V16i16 = sdiv <16 x i16> undef, ; SSE: cost of 24 {{.*}} %V32i16 = sdiv - ; AVX1: cost of 24 {{.*}} %V32i16 = sdiv + ; AVX1: cost of 28 {{.*}} %V32i16 = sdiv ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv @@ -203,12 +203,12 @@ define i32 @udiv_uniformconst() { ; AVX: cost of 15 {{.*}} %V4i32 = udiv %V4i32 = udiv <4 x i32> undef, ; SSE: cost of 30 {{.*}} %V8i32 = udiv - ; AVX1: cost of 30 {{.*}} %V8i32 = udiv + ; AVX1: cost of 32 {{.*}} %V8i32 = udiv ; AVX2: cost of 15 {{.*}} %V8i32 = udiv ; AVX512: cost of 15 {{.*}} %V8i32 = udiv %V8i32 = udiv <8 x i32> undef, ; SSE: cost of 60 {{.*}} %V16i32 = udiv - ; AVX1: cost of 60 {{.*}} %V16i32 = udiv + ; AVX1: cost of 64 {{.*}} %V16i32 = udiv ; AVX2: cost of 30 {{.*}} %V16i32 = udiv ; AVX512: cost of 15 {{.*}} %V16i32 = udiv %V16i32 = udiv <16 x i32> undef, @@ -219,12 +219,12 @@ define i32 @udiv_uniformconst() { ; AVX: cost of 6 {{.*}} %V8i16 = udiv %V8i16 = udiv <8 x i16> undef, ; SSE: cost of 12 {{.*}} %V16i16 = udiv - ; AVX1: cost of 12 {{.*}} %V16i16 = udiv + ; AVX1: cost of 14 {{.*}} %V16i16 = udiv ; AVX2: cost of 6 {{.*}} %V16i16 = udiv ; AVX512: cost of 6 {{.*}} %V16i16 = udiv %V16i16 = udiv <16 x i16> undef, ; SSE: cost of 24 {{.*}} %V32i16 = udiv - ; AVX1: cost of 24 {{.*}} %V32i16 = udiv + ; AVX1: cost of 28 {{.*}} %V32i16 = udiv ; AVX2: cost of 12 {{.*}} %V32i16 = udiv ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv @@ -269,14 +269,14 @@ define i32 @sdiv_uniformconstpow2() { ; SSE2: cost of 38 {{.*}} %V8i32 = sdiv ; SSSE3: cost of 38 {{.*}} %V8i32 = sdiv ; SSE42: cost of 30 {{.*}} %V8i32 = sdiv - ; AVX1: cost of 30 {{.*}} %V8i32 = sdiv + ; AVX1: cost of 32 {{.*}} %V8i32 = sdiv ; AVX2: cost of 15 {{.*}} %V8i32 = sdiv ; AVX512: cost of 15 {{.*}} %V8i32 = sdiv %V8i32 = sdiv <8 x i32> undef, ; SSE2: cost of 76 {{.*}} %V16i32 = sdiv ; SSSE3: cost of 76 {{.*}} %V16i32 = sdiv ; SSE42: cost of 60 {{.*}} %V16i32 = sdiv - ; AVX1: cost of 60 {{.*}} %V16i32 = sdiv + ; AVX1: cost of 64 {{.*}} %V16i32 = sdiv ; AVX2: cost of 30 {{.*}} %V16i32 = sdiv ; AVX512: cost of 15 {{.*}} %V16i32 = sdiv %V16i32 = sdiv <16 x i32> undef, @@ -287,12 +287,12 @@ define i32 @sdiv_uniformconstpow2() { ; AVX: cost of 6 {{.*}} %V8i16 = sdiv %V8i16 = sdiv <8 x i16> undef, ; SSE: cost of 12 {{.*}} %V16i16 = sdiv - ; AVX1: cost of 12 {{.*}} %V16i16 = sdiv + ; AVX1: cost of 14 {{.*}} %V16i16 = sdiv ; AVX2: cost of 6 {{.*}} %V16i16 = sdiv ; AVX512: cost of 6 {{.*}} %V16i16 = sdiv %V16i16 = sdiv <16 x i16> undef, ; SSE: cost of 24 {{.*}} %V32i16 = sdiv - ; AVX1: cost of 24 {{.*}} %V32i16 = sdiv + ; AVX1: cost of 28 {{.*}} %V32i16 = sdiv ; AVX2: cost of 12 {{.*}} %V32i16 = sdiv ; AVX512F: cost of 12 {{.*}} %V32i16 = sdiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = sdiv @@ -333,12 +333,12 @@ define i32 @udiv_uniformconstpow2() { ; AVX: cost of 15 {{.*}} %V4i32 = udiv %V4i32 = udiv <4 x i32> undef, ; SSE: cost of 30 {{.*}} %V8i32 = udiv - ; AVX1: cost of 30 {{.*}} %V8i32 = udiv + ; AVX1: cost of 32 {{.*}} %V8i32 = udiv ; AVX2: cost of 15 {{.*}} %V8i32 = udiv ; AVX512: cost of 15 {{.*}} %V8i32 = udiv %V8i32 = udiv <8 x i32> undef, ; SSE: cost of 60 {{.*}} %V16i32 = udiv - ; AVX1: cost of 60 {{.*}} %V16i32 = udiv + ; AVX1: cost of 64 {{.*}} %V16i32 = udiv ; AVX2: cost of 30 {{.*}} %V16i32 = udiv ; AVX512: cost of 15 {{.*}} %V16i32 = udiv %V16i32 = udiv <16 x i32> undef, @@ -349,12 +349,12 @@ define i32 @udiv_uniformconstpow2() { ; AVX: cost of 6 {{.*}} %V8i16 = udiv %V8i16 = udiv <8 x i16> undef, ; SSE: cost of 12 {{.*}} %V16i16 = udiv - ; AVX1: cost of 12 {{.*}} %V16i16 = udiv + ; AVX1: cost of 14 {{.*}} %V16i16 = udiv ; AVX2: cost of 6 {{.*}} %V16i16 = udiv ; AVX512: cost of 6 {{.*}} %V16i16 = udiv %V16i16 = udiv <16 x i16> undef, ; SSE: cost of 24 {{.*}} %V32i16 = udiv - ; AVX1: cost of 24 {{.*}} %V32i16 = udiv + ; AVX1: cost of 28 {{.*}} %V32i16 = udiv ; AVX2: cost of 12 {{.*}} %V32i16 = udiv ; AVX512F: cost of 12 {{.*}} %V32i16 = udiv ; AVX512BW: cost of 6 {{.*}} %V32i16 = udiv diff --git a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index f2d3f3f0ce639640693f657f094c449ce58227cc..b673399e428ffb7f06e1ce20c858021802aff258 100644 --- a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -78,10 +78,10 @@ define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x ; SKX-LABEL: test_gather_2f64 ; SKX: Found an estimated cost of 7 {{.*}}.gather -%res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) ret <2 x double> %res } -declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { @@ -94,7 +94,7 @@ define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> % ; SKX-LABEL: test_gather_4i32 ; SKX: Found an estimated cost of 6 {{.*}}.gather -%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ret <4 x i32> %res } @@ -109,10 +109,10 @@ define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) ; SKX-LABEL: test_gather_4i32_const_mask ; SKX: Found an estimated cost of 6 {{.*}}.gather -%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ret <4 x i32> %res } -declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0) define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { @@ -128,7 +128,7 @@ define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ret <16 x float>%res } @@ -146,7 +146,7 @@ define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, < %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ret <16 x float>%res } @@ -164,7 +164,7 @@ define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i3 %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ret <16 x float>%res } @@ -185,7 +185,7 @@ define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind %sext_ind = sext <16 x i32> %ind to <16 x i64> %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind - %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ret <16 x float>%res } @@ -204,7 +204,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind %imask = bitcast i16 %mask to <16 x i1> - call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) + call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ret void } @@ -218,11 +218,11 @@ define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { ; SKX-LABEL: test_scatter_8i32 ; SKX: Found an estimated cost of 10 {{.*}}.scatter - call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) + call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ret void } -declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask) +declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask) define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; AVX2-LABEL: test_scatter_4i32 @@ -234,7 +234,7 @@ define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SKX-LABEL: test_scatter_4i32 ; SKX: Found an estimated cost of 6 {{.*}}.scatter - call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ret void } @@ -252,7 +252,7 @@ define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) %sext_ind = sext <4 x i32> %ind to <4 x i64> %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind - %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ret <4 x float>%res } @@ -270,14 +270,14 @@ define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { %sext_ind = sext <4 x i32> %ind to <4 x i64> %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind - %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ret <4 x float>%res } -declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> ) -declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask) -declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask) -declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> ) +declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask) +declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask) +declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>) declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) diff --git a/test/Analysis/CostModel/X86/vector_gep.ll b/test/Analysis/CostModel/X86/vector_gep.ll index e49f25871d66e5d1190b39df58e995dd0b978684..17f70dfc7a7c59ab03d149e48c7c19a643e65f0f 100644 --- a/test/Analysis/CostModel/X86/vector_gep.ll +++ b/test/Analysis/CostModel/X86/vector_gep.ll @@ -3,7 +3,7 @@ %struct.S = type { [1000 x i32] } -declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){ %temp = insertelement <4 x i64> undef, i64 %base, i32 0 @@ -12,6 +12,6 @@ define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){ %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32] %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector - %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef) + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> , <4 x i32> undef) ret <4 x i32> %res } diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index a23b13fb2e259c6a140938c0cc87bb5a058112ee..eabc2330ddc6648c6439e375a668f68c2f077e1f 100644 --- a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -33,10 +33,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 24 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = ashr <4 x i64> %a, %b ret <4 x i64> %shift } @@ -45,10 +45,10 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': ; SSE2: Found an estimated cost of 48 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = ashr <8 x i64> %a, %b ret <8 x i64> %shift } @@ -70,10 +70,10 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i32> %a, %b ret <8 x i32> %shift @@ -83,10 +83,10 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i32> %a, %b ret <16 x i32> %shift @@ -109,11 +109,11 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = ashr <16 x i16> %a, %b ret <16 x i16> %shift } @@ -122,11 +122,11 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = ashr <32 x i16> %a, %b ret <32 x i16> %shift } @@ -147,11 +147,11 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 50 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; AVX512F: Found an estimated cost of 24 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = ashr <32 x i8> %a, %b ret <32 x i8> %shift } @@ -160,11 +160,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': ; SSE2: Found an estimated cost of 216 for instruction: %shift ; SSE41: Found an estimated cost of 96 for instruction: %shift -; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 100 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = ashr <64 x i8> %a, %b ret <64 x i8> %shift } @@ -191,11 +191,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift -; AVX2: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift +; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift -; XOPAVX2: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %insert = insertelement <4 x i64> undef, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer %shift = ashr <4 x i64> %a, %splat @@ -206,11 +205,10 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift -; AVX2: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift +; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift -; XOPAVX2: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %insert = insertelement <8 x i64> undef, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer %shift = ashr <8 x i64> %a, %splat @@ -235,10 +233,10 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <8 x i32> undef, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer @@ -250,10 +248,10 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <16 x i32> undef, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer @@ -279,10 +277,10 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <16 x i16> undef, i16 %b, i32 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer @@ -294,11 +292,11 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <32 x i16> undef, i16 %b, i32 0 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer @@ -324,10 +322,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 50 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; AVX512: Found an estimated cost of 24 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %insert = insertelement <32 x i8> undef, i8 %b, i32 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer %shift = ashr <32 x i8> %a, %splat @@ -338,11 +336,11 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': ; SSE2: Found an estimated cost of 216 for instruction: %shift ; SSE41: Found an estimated cost of 96 for instruction: %shift -; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 100 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %insert = insertelement <64 x i8> undef, i8 %b, i32 0 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer %shift = ashr <64 x i8> %a, %splat @@ -369,10 +367,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 24 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = ashr <4 x i64> %a, ret <4 x i64> %shift } @@ -381,10 +379,10 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': ; SSE2: Found an estimated cost of 48 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = ashr <8 x i64> %a, ret <8 x i64> %shift } @@ -406,10 +404,10 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i32> %a, ret <8 x i32> %shift @@ -419,10 +417,10 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i32> %a, ret <16 x i32> %shift @@ -445,11 +443,11 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = ashr <16 x i16> %a, ret <16 x i16> %shift } @@ -458,11 +456,11 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = ashr <32 x i16> %a, ret <32 x i16> %shift } @@ -483,10 +481,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 108 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 50 for instruction: %shift ; AVX2: Found an estimated cost of 24 for instruction: %shift ; AVX512: Found an estimated cost of 24 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } @@ -495,11 +493,11 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': ; SSE2: Found an estimated cost of 216 for instruction: %shift ; SSE41: Found an estimated cost of 96 for instruction: %shift -; AVX: Found an estimated cost of 96 for instruction: %shift +; AVX: Found an estimated cost of 100 for instruction: %shift ; AVX2: Found an estimated cost of 48 for instruction: %shift ; AVX512F: Found an estimated cost of 48 for instruction: %shift ; AVX512BW: Found an estimated cost of 24 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = ashr <64 x i8> %a, ret <64 x i8> %shift } @@ -524,10 +522,11 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift +; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = ashr <4 x i64> %a, ret <4 x i64> %shift } @@ -536,10 +535,11 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift +; XOPAVX2: Found an estimated cost of 8 for instruction: %shift %shift = ashr <8 x i64> %a, ret <8 x i64> %shift } @@ -560,10 +560,10 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <8 x i32> %a, ret <8 x i32> %shift @@ -573,10 +573,10 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i32> %a, ret <16 x i32> %shift @@ -598,10 +598,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <16 x i16> %a, ret <16 x i16> %shift @@ -611,11 +611,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = ashr <32 x i16> %a, ret <32 x i16> %shift @@ -628,7 +628,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 4 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i8> %a, ret <16 x i8> %shift } @@ -637,10 +637,10 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 4 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, ret <32 x i8> %shift @@ -650,11 +650,11 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512F: Found an estimated cost of 8 for instruction: %shift ; AVX512BW: Found an estimated cost of 4 for instruction: %shift -; XOPAVX: Found an estimated cost of 16 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 8 for instruction: %shift %shift = ashr <64 x i8> %a, ret <64 x i8> %shift diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index 546b2bb50f26ff241f621e785b3d84cd6432f380..6e890369d677ecd27ba61a4d9c71d57920c1559b 100644 --- a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -34,10 +34,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i64> %a, %b ret <4 x i64> %shift @@ -47,10 +47,10 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i64> %a, %b ret <8 x i64> %shift @@ -73,10 +73,10 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i32> %a, %b ret <8 x i32> %shift @@ -86,10 +86,10 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i32> %a, %b ret <16 x i32> %shift @@ -112,11 +112,11 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = lshr <16 x i16> %a, %b ret <16 x i16> %shift } @@ -125,11 +125,11 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = lshr <32 x i16> %a, %b ret <32 x i16> %shift } @@ -150,10 +150,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = lshr <32 x i8> %a, %b ret <32 x i8> %shift } @@ -162,11 +162,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = lshr <64 x i8> %a, %b ret <64 x i8> %shift } @@ -193,10 +193,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <4 x i64> undef, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer @@ -208,10 +208,10 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <8 x i64> undef, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer @@ -237,10 +237,10 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <8 x i32> undef, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer @@ -252,10 +252,10 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <16 x i32> undef, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer @@ -281,10 +281,10 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <16 x i16> undef, i16 %b, i32 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer @@ -296,11 +296,11 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <32 x i16> undef, i16 %b, i32 0 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer @@ -326,10 +326,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %insert = insertelement <32 x i8> undef, i8 %b, i32 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer %shift = lshr <32 x i8> %a, %splat @@ -340,11 +340,11 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %insert = insertelement <64 x i8> undef, i8 %b, i32 0 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer %shift = lshr <64 x i8> %a, %splat @@ -372,10 +372,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i64> %a, ret <4 x i64> %shift @@ -385,10 +385,10 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i64> %a, ret <8 x i64> %shift @@ -411,10 +411,10 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i32> %a, ret <8 x i32> %shift @@ -424,10 +424,10 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i32': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i32> %a, ret <16 x i32> %shift @@ -450,11 +450,11 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = lshr <16 x i16> %a, ret <16 x i16> %shift } @@ -463,11 +463,11 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = lshr <32 x i16> %a, ret <32 x i16> %shift } @@ -488,10 +488,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 24 for instruction: %shift -; AVX: Found an estimated cost of 24 for instruction: %shift +; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 6 for instruction: %shift %shift = lshr <32 x i8> %a, ret <32 x i8> %shift } @@ -500,11 +500,11 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 48 for instruction: %shift -; AVX: Found an estimated cost of 48 for instruction: %shift +; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOP: Found an estimated cost of 12 for instruction: %shift %shift = lshr <64 x i8> %a, ret <64 x i8> %shift } @@ -529,10 +529,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <4 x i64> %a, ret <4 x i64> %shift @@ -542,10 +542,10 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <8 x i64> %a, ret <8 x i64> %shift @@ -567,10 +567,10 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <8 x i32> %a, ret <8 x i32> %shift @@ -580,10 +580,10 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <16 x i32> %a, ret <16 x i32> %shift @@ -605,10 +605,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <16 x i16> %a, ret <16 x i16> %shift @@ -618,11 +618,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i16> %a, ret <32 x i16> %shift @@ -644,10 +644,10 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 6 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i8> %a, ret <32 x i8> %shift @@ -657,11 +657,11 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = lshr <64 x i8> %a, ret <64 x i8> %shift diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll index 90356f5ce8be936fef8d1c47de3ca0d214d00d2e..5e604bb7983e3ee1c2795aa5c350b7adccb67e32 100644 --- a/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -35,10 +35,10 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, %b ret <4 x i64> %shift @@ -48,10 +48,10 @@ define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <8 x i64> %a, %b ret <8 x i64> %shift @@ -74,10 +74,10 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, %b ret <8 x i32> %shift @@ -87,10 +87,10 @@ define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i32': ; SSE2: Found an estimated cost of 40 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i32> %a, %b ret <16 x i32> %shift @@ -113,11 +113,11 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 28 for instruction: %shift -; AVX: Found an estimated cost of 28 for instruction: %shift +; AVX: Found an estimated cost of 30 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; AVX512F: Found an estimated cost of 10 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift %shift = shl <16 x i16> %a, %b ret <16 x i16> %shift } @@ -126,11 +126,11 @@ define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i16': ; SSE2: Found an estimated cost of 128 for instruction: %shift ; SSE41: Found an estimated cost of 56 for instruction: %shift -; AVX: Found an estimated cost of 56 for instruction: %shift +; AVX: Found an estimated cost of 60 for instruction: %shift ; AVX2: Found an estimated cost of 20 for instruction: %shift ; AVX512F: Found an estimated cost of 20 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift %shift = shl <32 x i16> %a, %b ret <32 x i16> %shift } @@ -151,10 +151,10 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift %shift = shl <32 x i8> %a, %b ret <32 x i8> %shift } @@ -163,11 +163,11 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift %shift = shl <64 x i8> %a, %b ret <64 x i8> %shift } @@ -194,10 +194,10 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <4 x i64> undef, i64 %b, i32 0 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer @@ -209,10 +209,10 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i64': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <8 x i64> undef, i64 %b, i32 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer @@ -238,10 +238,10 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <8 x i32> undef, i32 %b, i32 0 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer @@ -253,10 +253,10 @@ define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i32': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <16 x i32> undef, i32 %b, i32 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer @@ -282,10 +282,10 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, i16 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %insert = insertelement <16 x i16> undef, i16 %b, i32 0 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer @@ -297,11 +297,11 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i16': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %insert = insertelement <32 x i16> undef, i16 %b, i32 0 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer @@ -327,10 +327,10 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift %insert = insertelement <32 x i8> undef, i8 %b, i32 0 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer %shift = shl <32 x i8> %a, %splat @@ -341,11 +341,11 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift %insert = insertelement <64 x i8> undef, i8 %b, i32 0 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer %shift = shl <64 x i8> %a, %splat @@ -373,10 +373,10 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, ret <4 x i64> %shift @@ -386,10 +386,10 @@ define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i64': ; SSE2: Found an estimated cost of 16 for instruction: %shift ; SSE41: Found an estimated cost of 16 for instruction: %shift -; AVX: Found an estimated cost of 16 for instruction: %shift +; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <8 x i64> %a, ret <8 x i64> %shift @@ -415,7 +415,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, ret <8 x i32> %shift @@ -428,7 +428,7 @@ define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) { ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i32> %a, ret <16 x i32> %shift @@ -453,7 +453,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i16> %a, ret <16 x i16> %shift @@ -467,7 +467,7 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) { ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i16> %a, ret <32 x i16> %shift @@ -489,10 +489,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 22 for instruction: %shift -; AVX: Found an estimated cost of 22 for instruction: %shift +; AVX: Found an estimated cost of 24 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; AVX512: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 4 for instruction: %shift %shift = shl <32 x i8> %a, ret <32 x i8> %shift } @@ -501,11 +501,11 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v64i8': ; SSE2: Found an estimated cost of 104 for instruction: %shift ; SSE41: Found an estimated cost of 44 for instruction: %shift -; AVX: Found an estimated cost of 44 for instruction: %shift +; AVX: Found an estimated cost of 48 for instruction: %shift ; AVX2: Found an estimated cost of 22 for instruction: %shift ; AVX512F: Found an estimated cost of 22 for instruction: %shift ; AVX512BW: Found an estimated cost of 11 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 8 for instruction: %shift %shift = shl <64 x i8> %a, ret <64 x i8> %shift } @@ -531,10 +531,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, ret <4 x i64> %shift @@ -544,10 +544,10 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i64': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <8 x i64> %a, ret <8 x i64> %shift @@ -570,10 +570,10 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, ret <8 x i32> %shift @@ -583,10 +583,10 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i32': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i32> %a, ret <16 x i32> %shift @@ -608,10 +608,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift -; AVX: Found an estimated cost of 2 for instruction: %shift +; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; AVX512: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 2 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i16> %a, ret <16 x i16> %shift @@ -621,11 +621,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i16': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i16> %a, ret <32 x i16> %shift @@ -638,7 +638,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, ret <16 x i8> %shift } @@ -647,7 +647,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift -; AVX: Found an estimated cost of 4 for instruction: %shift +; AVX: Found an estimated cost of 6 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift ; XOPAVX: Found an estimated cost of 4 for instruction: %shift @@ -660,7 +660,7 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v64i8': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift -; AVX: Found an estimated cost of 8 for instruction: %shift +; AVX: Found an estimated cost of 12 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift @@ -761,7 +761,7 @@ define <16 x i16> @test6(<16 x i16> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shl ; AVX: Found an estimated cost of 4 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOPAVX: Found an estimated cost of 2 for instruction: %shl +; XOPAVX: Found an estimated cost of 4 for instruction: %shl ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl @@ -778,7 +778,7 @@ define <8 x i32> @test7(<8 x i32> %a) { ; SSE41: Found an estimated cost of 2 for instruction: %shl ; AVX: Found an estimated cost of 4 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOPAVX: Found an estimated cost of 2 for instruction: %shl +; XOPAVX: Found an estimated cost of 4 for instruction: %shl ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl @@ -794,9 +794,9 @@ define <4 x i64> @test8(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'test8': ; SSE2: Found an estimated cost of 8 for instruction: %shl ; SSE41: Found an estimated cost of 8 for instruction: %shl -; AVX: Found an estimated cost of 8 for instruction: %shl +; AVX: Found an estimated cost of 10 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOPAVX: Found an estimated cost of 2 for instruction: %shl +; XOPAVX: Found an estimated cost of 4 for instruction: %shl ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl @@ -811,7 +811,7 @@ define <32 x i16> @test9(<32 x i16> %a) { ; SSE41: Found an estimated cost of 4 for instruction: %shl ; AVX: Found an estimated cost of 8 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl -; XOPAVX: Found an estimated cost of 4 for instruction: %shl +; XOPAVX: Found an estimated cost of 8 for instruction: %shl ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl @@ -826,7 +826,7 @@ define <16 x i32> @test10(<16 x i32> %a) { ; SSE41: Found an estimated cost of 4 for instruction: %shl ; AVX: Found an estimated cost of 8 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl -; XOPAVX: Found an estimated cost of 4 for instruction: %shl +; XOPAVX: Found an estimated cost of 8 for instruction: %shl ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl @@ -842,7 +842,7 @@ define <8 x i64> @test11(<8 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'test11': ; SSE2: Found an estimated cost of 16 for instruction: %shl ; SSE41: Found an estimated cost of 16 for instruction: %shl -; AVX: Found an estimated cost of 16 for instruction: %shl +; AVX: Found an estimated cost of 20 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl -; XOPAVX: Found an estimated cost of 4 for instruction: %shl +; XOPAVX: Found an estimated cost of 8 for instruction: %shl ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl diff --git a/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/test/Analysis/Delinearization/constant_functions_multi_dim.ll new file mode 100644 index 0000000000000000000000000000000000000000..b44b900d3f5220dd50ee5cacdb5f1af8aae54ff9 --- /dev/null +++ b/test/Analysis/Delinearization/constant_functions_multi_dim.ll @@ -0,0 +1,80 @@ +; RUN: opt -delinearize -analyze < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK: Inst: %tmp = load float, float* %arrayidx, align 4 +; CHECK-NEXT: In Loop with Header: for.inc +; CHECK-NEXT: AccessFunction: {(4 * %N * %call),+,4}<%for.inc> +; CHECK-NEXT: Base offset: %A +; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[%call][{0,+,1}<%for.inc>] + +; CHECK: Inst: %tmp5 = load float, float* %arrayidx4, align 4 +; CHECK-NEXT: In Loop with Header: for.inc +; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc> +; CHECK-NEXT: Base offset: %B +; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.inc>][%call1] + +; Function Attrs: noinline nounwind uwtable +define void @mat_mul(float* %C, float* %A, float* %B, i64 %N) #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %call = tail call i64 @_Z13get_global_idj(i32 0) #3 + %call1 = tail call i64 @_Z13get_global_idj(i32 1) #3 + %cmp1 = icmp sgt i64 %N, 0 + %mul = mul nsw i64 %call, %N + br i1 %cmp1, label %for.inc.lr.ph, label %for.end + +for.inc.lr.ph: ; preds = %entry.split + br label %for.inc + +for.inc: ; preds = %for.inc.lr.ph, %for.inc + %acc.03 = phi float [ 0.000000e+00, %for.inc.lr.ph ], [ %tmp6, %for.inc ] + %m.02 = phi i64 [ 0, %for.inc.lr.ph ], [ %inc, %for.inc ] + %add = add nsw i64 %m.02, %mul + %arrayidx = getelementptr inbounds float, float* %A, i64 %add + %tmp = load float, float* %arrayidx, align 4 + %mul2 = mul nsw i64 %m.02, %N + %add3 = add nsw i64 %mul2, %call1 + %arrayidx4 = getelementptr inbounds float, float* %B, i64 %add3 + %tmp5 = load float, float* %arrayidx4, align 4 + %tmp6 = tail call float @llvm.fmuladd.f32(float %tmp, float %tmp5, float %acc.03) + %inc = add nuw nsw i64 %m.02, 1 + %exitcond = icmp ne i64 %inc, %N + br i1 %exitcond, label %for.inc, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + %.lcssa = phi float [ %tmp6, %for.inc ] + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + %acc.0.lcssa = phi float [ %.lcssa, %for.cond.for.end_crit_edge ], [ 0.000000e+00, %entry.split ] + %add7 = add nsw i64 %mul, %call1 + %arrayidx8 = getelementptr inbounds float, float* %C, i64 %add7 + store float %acc.0.lcssa, float* %arrayidx8, align 4 + ret void +} + +; Function Attrs: nounwind readnone +declare i64 @_Z13get_global_idj(i32) #1 + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fmuladd.f32(float, float, float) #2 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable } +attributes #3 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (trunk 303846) (llvm/trunk 303834)"} +!2 = !{i32 1, i32 1, i32 1, i32 0} +!3 = !{!"none", !"none", !"none", !"none"} +!4 = !{!"float*", !"float*", !"float*", !"long"} +!5 = !{!"", !"", !"", !""} diff --git a/test/Analysis/DemandedBits/intrinsics.ll b/test/Analysis/DemandedBits/intrinsics.ll new file mode 100644 index 0000000000000000000000000000000000000000..5a6d17284a723c4547c628030ecbe8bf0f0c69cc --- /dev/null +++ b/test/Analysis/DemandedBits/intrinsics.ll @@ -0,0 +1,25 @@ +; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s +; RUN: opt -S -disable-output -passes="print" < %s 2>&1 | FileCheck %s + +; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bitreverse.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +define i8 @test_bswap(i32 %x) { + %1 = or i32 %x, 1 + %2 = call i32 @llvm.bswap.i32(i32 %1) + %3 = trunc i32 %2 to i8 + ret i8 %3 +} +declare i32 @llvm.bswap.i32(i32) + +; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bswap.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +define i8 @test_bitreverse(i32 %x) { + %1 = or i32 %x, 1 + %2 = call i32 @llvm.bitreverse.i32(i32 %1) + %3 = trunc i32 %2 to i8 + ret i8 %3 +} +declare i32 @llvm.bitreverse.i32(i32) + diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll index 0acb050c2519ecd83cdf93e109fd7c8d02628787..8ae44387c1da70c801501fa91b637160d8b71090 100644 --- a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll +++ b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll @@ -1,5 +1,6 @@ ; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_ps': ; CHECK: DIVERGENT: ; CHECK-NOT: %arg0 ; CHECK-NOT: %arg1 @@ -9,7 +10,31 @@ ; CHECK: DIVERGENT: float %arg5 ; CHECK: DIVERGENT: i32 %arg6 -define amdgpu_ps void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { +define amdgpu_ps void @test_amdgpu_ps([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { + ret void +} + +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_amdgpu_kernel': +; CHECK-NOT: %arg0 +; CHECK-NOT: %arg1 +; CHECK-NOT: %arg2 +; CHECK-NOT: %arg3 +; CHECK-NOT: %arg4 +; CHECK-NOT: %arg5 +; CHECK-NOT: %arg6 +define amdgpu_kernel void @test_amdgpu_kernel([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { + ret void +} + +; CHECK-LABEL: Printing analysis 'Divergence Analysis' for function 'test_c': +; CHECK: DIVERGENT: +; CHECK: DIVERGENT: +; CHECK: DIVERGENT: +; CHECK: DIVERGENT: +; CHECK: DIVERGENT: +; CHECK: DIVERGENT: +; CHECK: DIVERGENT: +define void @test_c([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 { ret void } diff --git a/test/Analysis/IVUsers/quadradic-exit-value.ll b/test/Analysis/IVUsers/quadradic-exit-value.ll index 214afcb2ffa7629384f2633dde8ff745bf994d91..afc2151982183e4a6b09c297502ba33e8ca4798b 100644 --- a/test/Analysis/IVUsers/quadradic-exit-value.ll +++ b/test/Analysis/IVUsers/quadradic-exit-value.ll @@ -30,13 +30,47 @@ exit: ret i64 %r } +; PR15470: LSR miscompile. The test1 function should return '1'. +; It is valid to fold SCEVUnknown into the recurrence because it +; was defined before the loop. +; +; SCEV does not know how to denormalize chained recurrences, so make +; sure they aren't marked as post-inc users. +; +; CHECK-LABEL: IV Users for loop %test1.loop +; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test1.loop> (post-inc with loop %test1.loop) in %f = ashr i32 %sext.us, 24 +define i32 @test1(i1 %cond) { +entry: + %sub.us = select i1 %cond, i32 0, i32 0 + br label %test1.loop + +test1.loop: + %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test1.loop ] + %inc11.us = add nsw i32 %inc1115.us, 1 + %cmp.us = icmp slt i32 %inc11.us, 2 + br i1 %cmp.us, label %test1.loop, label %for.end + +for.end: + %tobool.us = icmp eq i32 %inc1115.us, 0 + %mul.us = shl i32 %inc1115.us, 24 + %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us + %sext.us = mul i32 %mul.us, %sub.cond.us + %f = ashr i32 %sext.us, 24 + br label %exit + +exit: + ret i32 %f +} + ; PR15470: LSR miscompile. The test2 function should return '1'. +; It is illegal to fold SCEVUnknown (sext.us) into the recurrence +; because it is defined after the loop where this recurrence belongs. ; ; SCEV does not know how to denormalize chained recurrences, so make ; sure they aren't marked as post-inc users. ; ; CHECK-LABEL: IV Users for loop %test2.loop -; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24 +; CHECK-NO-LCSSA: %sub.cond.us = ((-1 * %sub.us) + {0,+,1}<%test2.loop>) (post-inc with loop %test2.loop) in %sext.us = mul i32 %mul.us, %sub.cond.us define i32 @test2() { entry: br label %test2.loop diff --git a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll index 00ab21e46d5d76df7a77c0d61d19ba8b30a7be79..3a0ab0f03b99544eb14bab4cc00be9f53c4a4e52 100644 --- a/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll +++ b/test/Analysis/LazyValueAnalysis/lvi-after-jumpthreading.ll @@ -10,17 +10,23 @@ define i8 @test1(i32 %a, i32 %length) { ; CHECK-LABEL: LVI for function 'test1': entry: +; CHECK-LABEL: entry: +; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined +; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined br label %loop + ; CHECK-LABEL: backedge: -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' -; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, 400>' -; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv.next = add nsw i32 %iv, 1' -; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, 401>' -; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 +; CHECK-NEXT: ; LatticeVal for: 'i32 %a' is: overdefined +; CHECK-NEXT: ; LatticeVal for: 'i32 %length' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, 400> +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<399, 400> +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, 401> +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%exit' is: constantrange<400, 401> +; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 +; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %cont = icmp slt i32 %iv.next, 400' in BB: '%exit' is: constantrange<0, -1> ; CHECK-NEXT: %cont = icmp slt i32 %iv.next, 400 -; CHECK-NEXT: br i1 %cont, label %backedge, label %exit - ; CHECK-NOT: loop loop: %iv = phi i32 [0, %entry], [%iv.next, %backedge] @@ -36,46 +42,58 @@ exit: ret i8 0 } - ; Here JT does not transform the code, but LVICache is populated during the processing of blocks. define i8 @test2(i32 %n) { ; CHECK-LABEL: LVI for function 'test2': ; CHECK-LABEL: entry: -; CHECK-LABEL: ; OverDefined values for block are: -; CHECK-NEXT: ;i32 %n +; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined ; CHECK-NEXT: br label %loop entry: br label %loop ; CHECK-LABEL: loop: -; CHECK-LABEL: ; OverDefined values for block are: -; CHECK-NEXT: ; %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ] -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' -; CHECK-DAG: ; at beginning of BasicBlock: '%loop' LatticeVal: 'constantrange<0, -2147483647>' -; CHECK-DAG: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<0, -2147483648>' -; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] -; CHECK: %cnd = and i1 %cnd1, %cnd2 -; CHECK: br i1 %cnd, label %backedge, label %exit +; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%loop' is: constantrange<0, -2147483647> +; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%backedge' is: constantrange<0, -2147483648> +; CHECK-DAG: ; LatticeVal for: ' %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ]' in BB: '%exit' is: constantrange<0, -2147483647> +; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] loop: %iv = phi i32 [0, %entry], [%iv.next, %backedge] +; CHECK-NEXT: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%loop' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%backedge' is: constantrange<1, -2147483648> +; CHECK-DAG: ; LatticeVal for: ' %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ]' in BB: '%exit' is: overdefined +; CHECK-NEXT: %iv2 = phi i32 [ %n, %entry ], [ %iv2.next, %backedge ] %iv2 = phi i32 [%n, %entry], [%iv2.next, %backedge] + +; CHECK-NEXT: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%loop' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%backedge' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %cnd1 = icmp sge i32 %iv, 0' in BB: '%exit' is: overdefined +; CHECK-NEXT: %cnd1 = icmp sge i32 %iv, 0 %cnd1 = icmp sge i32 %iv, 0 %cnd2 = icmp sgt i32 %iv2, 0 +; CHECK: %cnd2 = icmp sgt i32 %iv2, 0 +; CHECK: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%loop' is: overdefined +; CHECK-DAG: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%backedge' is: constantrange<-1, 0> +; CHECK-DAG: ; LatticeVal for: ' %cnd = and i1 %cnd1, %cnd2' in BB: '%exit' is: overdefined +; CHECK-NEXT: %cnd = and i1 %cnd1, %cnd2 %cnd = and i1 %cnd1, %cnd2 br i1 %cnd, label %backedge, label %exit ; CHECK-LABEL: backedge: -; CHECK-NEXT: ; CachedLatticeValues for: ' %iv.next = add nsw i32 %iv, 1' -; CHECK-NEXT: ; at beginning of BasicBlock: '%backedge' LatticeVal: 'constantrange<1, -2147483647>' -; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 -; CHECK-NEXT: %iv2.next = sub nsw i32 %iv2, 1 -; CHECK: %cont = and i1 %cont1, %cont2 -; CHECK: br i1 %cont, label %loop, label %exit +; CHECK-NEXT: ; LatticeVal for: 'i32 %n' is: overdefined +; CHECK-NEXT: ; LatticeVal for: ' %iv.next = add nsw i32 %iv, 1' in BB: '%backedge' is: constantrange<1, -2147483647> +; CHECK-NEXT: %iv.next = add nsw i32 %iv, 1 backedge: %iv.next = add nsw i32 %iv, 1 %iv2.next = sub nsw i32 %iv2, 1 +; CHECK: ; LatticeVal for: ' %cont1 = icmp slt i32 %iv.next, 400' in BB: '%backedge' is: overdefined +; CHECK-NEXT: %cont1 = icmp slt i32 %iv.next, 400 %cont1 = icmp slt i32 %iv.next, 400 +; CHECK-NEXT: ; LatticeVal for: ' %cont2 = icmp sgt i32 %iv2.next, 0' in BB: '%backedge' is: overdefined +; CHECK-NEXT: %cont2 = icmp sgt i32 %iv2.next, 0 %cont2 = icmp sgt i32 %iv2.next, 0 +; CHECK-NEXT: ; LatticeVal for: ' %cont = and i1 %cont1, %cont2' in BB: '%backedge' is: overdefined +; CHECK-NEXT: %cont = and i1 %cont1, %cont2 %cont = and i1 %cont1, %cont2 br i1 %cont, label %loop, label %exit diff --git a/test/Analysis/MemorySSA/invariant-groups.ll b/test/Analysis/MemorySSA/invariant-groups.ll index 6e94ae178dbbfc5936decee9fb4ca01b16a59e3a..26a0a32e86f77d97253819b43b638f7415fa7820 100644 --- a/test/Analysis/MemorySSA/invariant-groups.ll +++ b/test/Analysis/MemorySSA/invariant-groups.ll @@ -16,6 +16,8 @@ define i32 @foo(i32* %a) { store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(2) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -33,6 +35,8 @@ define i32 @skipBarrier(i32* %a) { store i32 0, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(1) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -50,6 +54,8 @@ define i32 @skipBarrier2(i32* %a) { %v = load i32, i32* %a, align 4, !invariant.group !0 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -79,6 +85,8 @@ define i32 @handleInvariantGroups(i32* %a) { ; CHECK-NEXT: store i32 1 store i32 1, i32* @g, align 4 %1 = bitcast i32* %a to i8* +; CHECK: MemoryUse(2) +; CHECK-NEXT: %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a8 = call i8* @llvm.invariant.group.barrier(i8* %1) %a32 = bitcast i8* %a8 to i32* @@ -135,6 +143,9 @@ entry: ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) %after = call i8* @llvm.invariant.group.barrier(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End @@ -179,6 +190,9 @@ entry: ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) %after = call i8* @llvm.invariant.group.barrier(i8* %p) br i1 undef, label %Loop.Body, label %Loop.End @@ -238,6 +252,8 @@ entry: ; CHECK: 2 = MemoryDef(1) ; CHECK-NEXT: call void @clobber call void @clobber8(i8* %p) +; CHECK: MemoryUse(2) +; CHECK-NEXT: %after = call i8* @llvm.invariant.group.barrier(i8* %p) %after = call i8* @llvm.invariant.group.barrier(i8* %p) br i1 undef, label %Loop.Pre, label %Loop.End diff --git a/test/Analysis/ScalarEvolution/ZeroStep.ll b/test/Analysis/ScalarEvolution/ZeroStep.ll new file mode 100644 index 0000000000000000000000000000000000000000..fc6ed018e90332fb8700230c6822e3f011f24510 --- /dev/null +++ b/test/Analysis/ScalarEvolution/ZeroStep.ll @@ -0,0 +1,18 @@ +; RUN: opt -analyze -scalar-evolution < %s -o - -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.9.0" + +; Test that SCEV is capable of figuring out value of 'IV' that actually does not change. +; CHECK: Classifying expressions for: @foo +; CHECK: %iv.i = phi i64 +; CHECK: -5 U: [-5,-4) S: [-5,-4) Exits: -5 LoopDispositions: { %loop: Invariant } +define void @foo() { +entry: + br label %loop + +loop: + %iv.i = phi i64 [ -5, %entry ], [ %iv.next.i, %loop ] + %iv.next.i = add nsw i64 %iv.i, 0 + br label %loop +} diff --git a/test/Analysis/ScalarEvolution/different-loops-recs.ll b/test/Analysis/ScalarEvolution/different-loops-recs.ll new file mode 100644 index 0000000000000000000000000000000000000000..6b88f09e936fbe66039706e21c349291357986d5 --- /dev/null +++ b/test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -0,0 +1,512 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +; This test set ensures that we can correctly operate with recurrencies from +; different loops. + +; Check that we can evaluate a sum of phis from two different loops in any +; order. + +define void @test_00() { + +; CHECK-LABEL: Classifying expressions for: @test_00 +; CHECK: %sum1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {14,+,3}<%loop1> +; CHECK: %sum2 = add i32 %sum1, %phi3 +; CHECK-NEXT: --> {20,+,6}<%loop1> +; CHECK: %sum3 = add i32 %phi4, %phi5 +; CHECK-NEXT: --> {116,+,3}<%loop2> +; CHECK: %sum4 = add i32 %sum3, %phi6 +; CHECK-NEXT: --> {159,+,6}<%loop2> +; CHECK: %s1 = add i32 %phi1, %phi4 +; CHECK-NEXT: --> {{{{}}73,+,1}<%loop1>,+,1}<%loop2> +; CHECK: %s2 = add i32 %phi5, %phi2 +; CHECK-NEXT: --> {{{{}}57,+,2}<%loop1>,+,2}<%loop2> +; CHECK: %s3 = add i32 %sum1, %sum3 +; CHECK-NEXT: --> {{{{}}130,+,3}<%loop1>,+,3}<%loop2> +; CHECK: %s4 = add i32 %sum4, %sum2 +; CHECK-NEXT: --> {{{{}}179,+,6}<%loop1>,+,6}<%loop2> +; CHECK: %s5 = add i32 %phi3, %sum3 +; CHECK-NEXT: --> {{{{}}122,+,3}<%loop1>,+,3}<%loop2> +; CHECK: %s6 = add i32 %sum2, %phi6 +; CHECK-NEXT: --> {{{{}}63,+,6}<%loop1>,+,3}<%loop2> + +entry: + br label %loop1 + +loop1: + %phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1 ] + %phi2 = phi i32 [ 4, %entry ], [ %phi2.inc, %loop1 ] + %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ] + %phi1.inc = add i32 %phi1, 1 + %phi2.inc = add i32 %phi2, 2 + %phi3.inc = add i32 %phi3, 3 + %sum1 = add i32 %phi1, %phi2 + %sum2 = add i32 %sum1, %phi3 + %cond1 = icmp ult i32 %sum2, 1000 + br i1 %cond1, label %loop1, label %loop2 + +loop2: + %phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ] + %phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ] + %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ] + %phi4.inc = add i32 %phi4, 1 + %phi5.inc = add i32 %phi5, 2 + %phi6.inc = add i32 %phi6, 3 + %sum3 = add i32 %phi4, %phi5 + %sum4 = add i32 %sum3, %phi6 + %cond2 = icmp ult i32 %sum4, 1000 + br i1 %cond2, label %loop2, label %exit + +exit: + %s1 = add i32 %phi1, %phi4 + %s2 = add i32 %phi5, %phi2 + %s3 = add i32 %sum1, %sum3 + %s4 = add i32 %sum4, %sum2 + %s5 = add i32 %phi3, %sum3 + %s6 = add i32 %sum2, %phi6 + ret void +} + +; Check that we can evaluate a sum of phis+invariants from two different loops +; in any order. + +define void @test_01(i32 %a, i32 %b) { + +; CHECK-LABEL: Classifying expressions for: @test_01 +; CHECK: %sum1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1> +; CHECK: %sum2 = add i32 %sum1, %phi3 +; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1> +; CHECK: %is1 = add i32 %sum2, %a +; CHECK-NEXT: --> {(6 + (2 * %a) + %b),+,6}<%loop1> +; CHECK: %sum3 = add i32 %phi4, %phi5 +; CHECK-NEXT: --> {116,+,3}<%loop2> +; CHECK: %sum4 = add i32 %sum3, %phi6 +; CHECK-NEXT: --> {159,+,6}<%loop2> +; CHECK: %is2 = add i32 %sum4, %b +; CHECK-NEXT: --> {(159 + %b),+,6}<%loop2> +; CHECK: %ec2 = add i32 %is1, %is2 +; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> +; CHECK: %s1 = add i32 %phi1, %is1 +; CHECK-NEXT: --> {(6 + (3 * %a) + %b),+,7}<%loop1> +; CHECK: %s2 = add i32 %is2, %phi4 +; CHECK-NEXT: --> {(222 + %b),+,7}<%loop2> +; CHECK: %s3 = add i32 %is1, %phi5 +; CHECK-NEXT: --> {{{{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2> +; CHECK: %s4 = add i32 %phi2, %is2 +; CHECK-NEXT: --> {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> +; CHECK: %s5 = add i32 %is1, %is2 +; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> +; CHECK: %s6 = add i32 %is2, %is1 +; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> + +entry: + br label %loop1 + +loop1: + %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ] + %phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ] + %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ] + %phi1.inc = add i32 %phi1, 1 + %phi2.inc = add i32 %phi2, 2 + %phi3.inc = add i32 %phi3, 3 + %sum1 = add i32 %phi1, %phi2 + %sum2 = add i32 %sum1, %phi3 + %is1 = add i32 %sum2, %a + %cond1 = icmp ult i32 %is1, 1000 + br i1 %cond1, label %loop1, label %loop2 + +loop2: + %phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ] + %phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ] + %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ] + %phi4.inc = add i32 %phi4, 1 + %phi5.inc = add i32 %phi5, 2 + %phi6.inc = add i32 %phi6, 3 + %sum3 = add i32 %phi4, %phi5 + %sum4 = add i32 %sum3, %phi6 + %is2 = add i32 %sum4, %b + %ec2 = add i32 %is1, %is2 + %cond2 = icmp ult i32 %ec2, 1000 + br i1 %cond2, label %loop2, label %exit + +exit: + %s1 = add i32 %phi1, %is1 + %s2 = add i32 %is2, %phi4 + %s3 = add i32 %is1, %phi5 + %s4 = add i32 %phi2, %is2 + %s5 = add i32 %is1, %is2 + %s6 = add i32 %is2, %is1 + ret void +} + +; Check that we can correctly evaluate a sum of phis+variants from two different +; loops in any order. + +define void @test_02(i32 %a, i32 %b, i32* %p) { + +; CHECK-LABEL: Classifying expressions for: @test_02 +; CHECK: %sum1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {(%a + %b),+,3}<%loop1> +; CHECK: %sum2 = add i32 %sum1, %phi3 +; CHECK-NEXT: --> {(6 + %a + %b),+,6}<%loop1> +; CHECK: %is1 = add i32 %sum2, %v1 +; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1) +; CHECK: %sum3 = add i32 %phi4, %phi5 +; CHECK-NEXT: --> {(%a + %b),+,3}<%loop2> +; CHECK: %sum4 = add i32 %sum3, %phi6 +; CHECK-NEXT: --> {(43 + %a + %b),+,6}<%loop2> +; CHECK: %is2 = add i32 %sum4, %v2 +; CHECK-NEXT: --> ({(43 + %a + %b),+,6}<%loop2> + %v2) +; CHECK: %is3 = add i32 %v1, %sum2 +; CHECK-NEXT: --> ({(6 + %a + %b),+,6}<%loop1> + %v1) +; CHECK: %ec2 = add i32 %is1, %is3 +; CHECK-NEXT: --> (2 * ({(6 + %a + %b),+,6}<%loop1> + %v1)) +; CHECK: %s1 = add i32 %phi1, %is1 +; CHECK-NEXT: --> ({(6 + (2 * %a) + %b),+,7}<%loop1> + %v1) +; CHECK: %s2 = add i32 %is2, %phi4 +; CHECK-NEXT: --> ({(43 + (2 * %a) + %b),+,7}<%loop2> + %v2) +; CHECK: %s3 = add i32 %is1, %phi5 +; CHECK-NEXT: --> {({(6 + (2 * %b) + %a),+,6}<%loop1> + %v1),+,2}<%loop2> +; CHECK: %s4 = add i32 %phi2, %is2 +; CHECK-NEXT: --> ({{{{}}(43 + (2 * %b) + %a),+,2}<%loop1>,+,6}<%loop2> + %v2) +; CHECK: %s5 = add i32 %is1, %is2 +; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2) +; CHECK: %s6 = add i32 %is2, %is1 +; CHECK-NEXT: --> ({({(49 + (2 * %a) + (2 * %b)),+,6}<%loop1> + %v1),+,6}<%loop2> + %v2) + +entry: + br label %loop1 + +loop1: + %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ] + %phi2 = phi i32 [ %b, %entry ], [ %phi2.inc, %loop1 ] + %phi3 = phi i32 [ 6, %entry ], [ %phi3.inc, %loop1 ] + %phi1.inc = add i32 %phi1, 1 + %phi2.inc = add i32 %phi2, 2 + %phi3.inc = add i32 %phi3, 3 + %v1 = load i32, i32* %p + %sum1 = add i32 %phi1, %phi2 + %sum2 = add i32 %sum1, %phi3 + %is1 = add i32 %sum2, %v1 + %cond1 = icmp ult i32 %is1, 1000 + br i1 %cond1, label %loop1, label %loop2 + +loop2: + %phi4 = phi i32 [ %a, %loop1 ], [ %phi4.inc, %loop2 ] + %phi5 = phi i32 [ %b, %loop1 ], [ %phi5.inc, %loop2 ] + %phi6 = phi i32 [ 43, %loop1 ], [ %phi6.inc, %loop2 ] + %phi4.inc = add i32 %phi4, 1 + %phi5.inc = add i32 %phi5, 2 + %phi6.inc = add i32 %phi6, 3 + %v2 = load i32, i32* %p + %sum3 = add i32 %phi4, %phi5 + %sum4 = add i32 %sum3, %phi6 + %is2 = add i32 %sum4, %v2 + %is3 = add i32 %v1, %sum2 + %ec2 = add i32 %is1, %is3 + %cond2 = icmp ult i32 %ec2, 1000 + br i1 %cond2, label %loop2, label %exit + +exit: + %s1 = add i32 %phi1, %is1 + %s2 = add i32 %is2, %phi4 + %s3 = add i32 %is1, %phi5 + %s4 = add i32 %phi2, %is2 + %s5 = add i32 %is1, %is2 + %s6 = add i32 %is2, %is1 + ret void +} + +; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as +; a recurrence of loop1 because of operands order if we pick recurrencies in an +; incorrect order. It also shows that we cannot safely fold v1 (SCEVUnknown) +; because we cannot prove for sure that it doesn't use Phis of loop 2. + +define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) { + +; CHECK-LABEL: Classifying expressions for: @test_03 +; CHECK: %v1 = load i32, i32* %p +; CHECK-NEXT: --> %v1 +; CHECK: %s1 = add i32 %phi1, %v1 +; CHECK-NEXT: --> ({%a,+,1}<%loop1> + %v1) +; CHECK: %s2 = add i32 %s1, %b +; CHECK-NEXT: --> ({(%a + %b),+,1}<%loop1> + %v1) +; CHECK: %s3 = add i32 %s2, %phi2 +; CHECK-NEXT: --> ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1) + +entry: + br label %loop1 + +loop1: + %phi1 = phi i32 [ %a, %entry ], [ %phi1.inc, %loop1 ] + %phi1.inc = add i32 %phi1, 1 + %cond1 = icmp ult i32 %phi1, %c + br i1 %cond1, label %loop1, label %loop2 + +loop2: + %phi2 = phi i32 [ %a, %loop1 ], [ %phi2.inc, %loop2 ] + %phi2.inc = add i32 %phi2, 2 + %v1 = load i32, i32* %p + %s1 = add i32 %phi1, %v1 + %s2 = add i32 %s1, %b + %s3 = add i32 %s2, %phi2 + %cond2 = icmp ult i32 %s3, %c + br i1 %cond2, label %loop2, label %exit + +exit: + + ret void +} + +; Another mix of previous use cases that demonstrates that incorrect picking of +; a loop for a recurrence may cause a crash of SCEV analysis. +define void @test_04() { + +; CHECK-LABEL: Classifying expressions for: @test_04 +; CHECK: %tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ] +; CHECK-NEXT: --> {2,+,1}<%loop1> +; CHECK: %tmp2 = trunc i64 %tmp to i32 +; CHECK-NEXT: --> {2,+,1}<%loop1> +; CHECK: %tmp4 = add nuw nsw i64 %tmp, 1 +; CHECK-NEXT: --> {3,+,1}<%loop1> +; CHECK: %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ] +; CHECK-NEXT: --> {2,+,1}<%loop2> +; CHECK: %tmp10 = sub i64 %tmp9, %tmp7 +; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {-2,+,-1}<%loop2>) +; CHECK: %tmp11 = add i64 %tmp10, undef +; CHECK-NEXT: --> ((sext i8 %tmp8 to i64) + {(-2 + undef),+,-1}<%loop2>) +; CHECK: %tmp13 = trunc i64 %tmp11 to i32 +; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {(trunc i64 (-2 + undef) to i32),+,-1}<%loop2>) +; CHECK: %tmp14 = sub i32 %tmp13, %tmp2 +; CHECK-NEXT: --> ((sext i8 %tmp8 to i32) + {{{{}}(-2 + (trunc i64 (-2 + undef) to i32)),+,-1}<%loop1>,+,-1}<%loop2>) +; CHECK: %tmp15 = add nuw nsw i64 %tmp7, 1 +; CHECK-NEXT: --> {3,+,1}<%loop2> + +bb: + br label %loop1 + +loop1: + %tmp = phi i64 [ 2, %bb ], [ %tmp4, %bb3 ] + %tmp2 = trunc i64 %tmp to i32 + br i1 undef, label %loop2, label %bb3 + +bb3: + %tmp4 = add nuw nsw i64 %tmp, 1 + br label %loop1 + +bb5: + ret void + +loop2: + %tmp7 = phi i64 [ %tmp15, %loop2 ], [ 2, %loop1 ] + %tmp8 = load i8, i8 addrspace(1)* undef, align 1 + %tmp9 = sext i8 %tmp8 to i64 + %tmp10 = sub i64 %tmp9, %tmp7 + %tmp11 = add i64 %tmp10, undef + %tmp13 = trunc i64 %tmp11 to i32 + %tmp14 = sub i32 %tmp13, %tmp2 + %tmp15 = add nuw nsw i64 %tmp7, 1 + %tmp16 = icmp slt i64 %tmp15, %tmp + br i1 %tmp16, label %loop2, label %bb5 +} + +@A = weak global [1000 x i32] zeroinitializer, align 32 + +; Demonstrate a situation when we can add two recs with different degrees from +; the same loop. +define void @test_05(i32 %N) { + +; CHECK-LABEL: Classifying expressions for: @test_05 +; CHECK: %SQ = mul i32 %i.0, %i.0 +; CHECK-NEXT: --> {4,+,5,+,2}<%bb3> +; CHECK: %tmp4 = mul i32 %i.0, 2 +; CHECK-NEXT: --> {4,+,2}<%bb3> +; CHECK: %tmp5 = sub i32 %SQ, %tmp4 +; CHECK-NEXT: --> {0,+,3,+,2}<%bb3> + +entry: + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + br label %bb3 + +bb: ; preds = %bb3 + %tmp = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i32 %i.0 ; [#uses=1] + store i32 123, i32* %tmp + %tmp2 = add i32 %i.0, 1 ; [#uses=1] + br label %bb3 + +bb3: ; preds = %bb, %entry + %i.0 = phi i32 [ 2, %entry ], [ %tmp2, %bb ] ; [#uses=3] + %SQ = mul i32 %i.0, %i.0 + %tmp4 = mul i32 %i.0, 2 + %tmp5 = sub i32 %SQ, %tmp4 + %tmp3 = icmp sle i32 %tmp5, 9999 ; [#uses=1] + br i1 %tmp3, label %bb, label %bb5 + +bb5: ; preds = %bb3 + br label %return + +return: ; preds = %bb5 + ret void +} + +; Check that we can add Phis from different loops with different nesting, nested +; loop comes first. +define void @test_06() { + +; CHECK-LABEL: Classifying expressions for: @test_06 +; CHECK: %s1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2> +; CHECK: %s2 = add i32 %phi2, %phi1 +; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2> +; CHECK: %s3 = add i32 %phi1, %phi3 +; CHECK-NEXT: --> {{{{}}40,+,1}<%loop1>,+,3}<%loop3> +; CHECK: %s4 = add i32 %phi3, %phi1 +; CHECK-NEXT: --> {{{{}}40,+,1}<%loop1>,+,3}<%loop3> +; CHECK: %s5 = add i32 %phi2, %phi3 +; CHECK-NEXT: --> {{{{}}50,+,2}<%loop2>,+,3}<%loop3> +; CHECK: %s6 = add i32 %phi3, %phi2 +; CHECK-NEXT: --> {{{{}}50,+,2}<%loop2>,+,3}<%loop3> + +entry: + br label %loop1 + +loop1: + %phi1 = phi i32 [ 10, %entry ], [ %phi1.inc, %loop1.exit ] + br label %loop2 + +loop2: + %phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ] + %phi2.inc = add i32 %phi2, 2 + %cond2 = icmp ult i32 %phi2.inc, 1000 + br i1 %cond2, label %loop2, label %loop1.exit + +loop1.exit: + %phi1.inc = add i32 %phi1, 1 + %cond1 = icmp ult i32 %phi1.inc, 1000 + br i1 %cond1, label %loop1, label %loop3 + +loop3: + %phi3 = phi i32 [ 30, %loop1.exit ], [ %phi3.inc, %loop3 ] + %phi3.inc = add i32 %phi3, 3 + %cond3 = icmp ult i32 %phi3.inc, 1000 + br i1 %cond3, label %loop3, label %exit + +exit: + %s1 = add i32 %phi1, %phi2 + %s2 = add i32 %phi2, %phi1 + %s3 = add i32 %phi1, %phi3 + %s4 = add i32 %phi3, %phi1 + %s5 = add i32 %phi2, %phi3 + %s6 = add i32 %phi3, %phi2 + ret void +} + +; Check that we can add Phis from different loops with different nesting, nested +; loop comes second. +define void @test_07() { + +; CHECK-LABEL: Classifying expressions for: @test_07 +; CHECK: %s1 = add i32 %phi1, %phi2 +; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2> +; CHECK: %s2 = add i32 %phi2, %phi1 +; CHECK-NEXT: --> {{{{}}30,+,1}<%loop1>,+,2}<%loop2> +; CHECK: %s3 = add i32 %phi1, %phi3 +; CHECK-NEXT: --> {{{{}}40,+,3}<%loop3>,+,1}<%loop1> +; CHECK: %s4 = add i32 %phi3, %phi1 +; CHECK-NEXT: --> {{{{}}40,+,3}<%loop3>,+,1}<%loop1> +; CHECK: %s5 = add i32 %phi2, %phi3 +; CHECK-NEXT: --> {{{{}}50,+,3}<%loop3>,+,2}<%loop2> +; CHECK: %s6 = add i32 %phi3, %phi2 +; CHECK-NEXT: --> {{{{}}50,+,3}<%loop3>,+,2}<%loop2> + +entry: + br label %loop3 + +loop3: + %phi3 = phi i32 [ 30, %entry ], [ %phi3.inc, %loop3 ] + %phi3.inc = add i32 %phi3, 3 + %cond3 = icmp ult i32 %phi3.inc, 1000 + br i1 %cond3, label %loop3, label %loop1 + +loop1: + %phi1 = phi i32 [ 10, %loop3 ], [ %phi1.inc, %loop1.exit ] + br label %loop2 + +loop2: + %phi2 = phi i32 [ 20, %loop1 ], [ %phi2.inc, %loop2 ] + %phi2.inc = add i32 %phi2, 2 + %cond2 = icmp ult i32 %phi2.inc, 1000 + br i1 %cond2, label %loop2, label %loop1.exit + +loop1.exit: + %phi1.inc = add i32 %phi1, 1 + %cond1 = icmp ult i32 %phi1.inc, 1000 + br i1 %cond1, label %exit, label %loop1 + +exit: + %s1 = add i32 %phi1, %phi2 + %s2 = add i32 %phi2, %phi1 + %s3 = add i32 %phi1, %phi3 + %s4 = add i32 %phi3, %phi1 + %s5 = add i32 %phi2, %phi3 + %s6 = add i32 %phi3, %phi2 + ret void +} + +; Make sure that a complicated Phi does not get folded with rec's start value +; of a loop which is above. +define void @test_08() { + +; CHECK-LABEL: Classifying expressions for: @test_08 +; CHECK: %tmp11 = add i64 %iv.2.2, %iv.2.1 +; CHECK-NEXT: --> ({0,+,-1}<%loop_2> + %iv.2.1) +; CHECK: %tmp12 = trunc i64 %tmp11 to i32 +; CHECK-NEXT: --> (trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) +; CHECK: %tmp14 = mul i32 %tmp12, %tmp7 +; CHECK-NEXT: --> ((trunc i64 ({0,+,-1}<%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>) +; CHECK: %tmp16 = mul i64 %iv.2.1, %iv.1.1 +; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1) + +entry: + br label %loop_1 + +loop_1: + %iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ] + %iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ] + br label %loop_1_exit + +dead: + br label %loop_1_exit + +loop_1_exit: + %tmp5 = icmp sgt i64 %iv.1.1, 2 + br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch + +loop_1_back_branch: + %iv.1.1.next = add nuw nsw i64 %iv.1.1, 1 + %iv.1.2.next = add nsw i32 %iv.1.2, 1 + br label %loop_1 + +loop_2_preheader: + %tmp6 = sub i64 1, %iv.1.1 + %tmp7 = trunc i64 %tmp6 to i32 + br label %loop_2 + +loop_2: + %iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ] + %iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ] + %iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ] + %tmp11 = add i64 %iv.2.2, %iv.2.1 + %tmp12 = trunc i64 %tmp11 to i32 + %tmp14 = mul i32 %tmp12, %tmp7 + %tmp16 = mul i64 %iv.2.1, %iv.1.1 + %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1 + %iv.2.2.next = add nsw i64 %iv.2.2, -1 + %tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1 + br i1 %tmp17, label %loop_2, label %exit + +exit: + %tmp10 = add i32 %iv.1.2, 3 + ret void +} diff --git a/test/Analysis/ScalarEvolution/exponential-behavior.ll b/test/Analysis/ScalarEvolution/exponential-behavior.ll new file mode 100644 index 0000000000000000000000000000000000000000..919521a58b6d1a2c7cab37ffd5ccce2321672a4a --- /dev/null +++ b/test/Analysis/ScalarEvolution/exponential-behavior.ll @@ -0,0 +1,57 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'f': + +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is 0 +; CHECK: Loop %loop: Unpredictable predicated backedge-taken count. + + +define void @f(i32 %n, i32* %ptr) { +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ] + %iv.inc = add i32 %iv, 1 + %unswitch_cond_root = icmp ne i32 %iv.inc, 42 + %us.0 = and i1 %unswitch_cond_root, %unswitch_cond_root + %us.1 = and i1 %us.0, %us.0 + %us.2 = and i1 %us.1, %us.1 + %us.3 = and i1 %us.2, %us.2 + %us.4 = and i1 %us.3, %us.3 + %us.5 = and i1 %us.4, %us.4 + %us.6 = and i1 %us.5, %us.5 + %us.7 = and i1 %us.6, %us.6 + %us.8 = and i1 %us.7, %us.7 + %us.9 = and i1 %us.8, %us.8 + %us.10 = and i1 %us.9, %us.9 + %us.11 = and i1 %us.10, %us.10 + %us.12 = and i1 %us.11, %us.11 + %us.13 = and i1 %us.12, %us.12 + %us.14 = and i1 %us.13, %us.13 + %us.15 = and i1 %us.14, %us.14 + %us.16 = and i1 %us.15, %us.15 + %us.17 = and i1 %us.16, %us.16 + %us.18 = and i1 %us.17, %us.17 + %us.19 = and i1 %us.18, %us.18 + %us.20 = and i1 %us.19, %us.19 + %us.21 = and i1 %us.20, %us.20 + %us.22 = and i1 %us.21, %us.21 + %us.23 = and i1 %us.22, %us.22 + %us.24 = and i1 %us.23, %us.23 + %us.25 = and i1 %us.24, %us.24 + %us.26 = and i1 %us.25, %us.25 + %us.27 = and i1 %us.26, %us.26 + %us.28 = and i1 %us.27, %us.27 + %us.29 = and i1 %us.28, %us.28 + br i1 %us.29, label %leave, label %be + +be: + store volatile i32 0, i32* %ptr + %becond = icmp ult i32 %iv.inc, %n + br i1 %becond, label %leave, label %loop + +leave: + ret void +} diff --git a/test/Analysis/ScalarEvolution/flags-from-poison.ll b/test/Analysis/ScalarEvolution/flags-from-poison.ll index 44ee830d9c62f313c798b551d31b8617ca451ffa..15c679a5f105a0a956d64c5a1dfd6107f61ac00f 100644 --- a/test/Analysis/ScalarEvolution/flags-from-poison.ll +++ b/test/Analysis/ScalarEvolution/flags-from-poison.ll @@ -205,7 +205,7 @@ exit: ret void } -; Demonstrate why we need a Visited set in llvm::isKnownNotFullPoison. +; Demonstrate why we need a Visited set in llvm::programUndefinedIfFullPoison. define void @test-add-not-header5(float* %input, i32 %offset) { ; CHECK-LABEL: @test-add-not-header5 entry: diff --git a/test/Analysis/ScalarEvolution/limit-depth.ll b/test/Analysis/ScalarEvolution/limit-depth.ll new file mode 100644 index 0000000000000000000000000000000000000000..5a35bfefd20a00b48f678383405a3b698bbeb0df --- /dev/null +++ b/test/Analysis/ScalarEvolution/limit-depth.ll @@ -0,0 +1,44 @@ +; RUN: opt -scalar-evolution-max-arith-depth=0 -analyze -scalar-evolution < %s | FileCheck %s + +; Check that depth set to 0 prevents getAddExpr and getMulExpr from making +; transformations in SCEV. We expect the result to be very straightforward. + +define void @test_add(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { +; CHECK-LABEL: @test_add +; CHECK: %s2 = add i32 %s1, %p3 +; CHECK-NEXT: --> (%a + %a + %b + %b + %c + %c + %d + %d + %e + %e + %f + %f) + %tmp0 = add i32 %a, %b + %tmp1 = add i32 %b, %c + %tmp2 = add i32 %c, %d + %tmp3 = add i32 %d, %e + %tmp4 = add i32 %e, %f + %tmp5 = add i32 %f, %a + + %p1 = add i32 %tmp0, %tmp3 + %p2 = add i32 %tmp1, %tmp4 + %p3 = add i32 %tmp2, %tmp5 + + %s1 = add i32 %p1, %p2 + %s2 = add i32 %s1, %p3 + ret void +} + +define void @test_mul(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { +; CHECK-LABEL: @test_mul +; CHECK: %s2 = mul i32 %s1, %p3 +; CHECK-NEXT: --> (2 * 3 * 4 * 5 * 6 * 7 * %a * %b * %c * %d * %e * %f) + %tmp0 = mul i32 %a, 2 + %tmp1 = mul i32 %b, 3 + %tmp2 = mul i32 %c, 4 + %tmp3 = mul i32 %d, 5 + %tmp4 = mul i32 %e, 6 + %tmp5 = mul i32 %f, 7 + + %p1 = mul i32 %tmp0, %tmp3 + %p2 = mul i32 %tmp1, %tmp4 + %p3 = mul i32 %tmp2, %tmp5 + + %s1 = mul i32 %p1, %p2 + %s2 = mul i32 %s1, %p3 + ret void +} diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll index a3752919d334d5229744af32a77f0f95d14a79bc..39b958d3ea0e552b0d6fe6f3184fcb2d9145c076 100644 --- a/test/Analysis/ScalarEvolution/nsw.ll +++ b/test/Analysis/ScalarEvolution/nsw.ll @@ -102,7 +102,7 @@ for.body.i.i: ; preds = %entry, %for.body.i. %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i ; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) -; CHECK: Loop %for.body.i.i: max backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) +; CHECK: Loop %for.body.i.i: max backedge-taken count is 4611686018427387903 _ZSt4fillIPiiEvT_S1_RKT0_.exit: ; preds = %for.body.i.i, %entry ret void } diff --git a/test/Analysis/ScalarEvolution/trip-count-pow2.ll b/test/Analysis/ScalarEvolution/trip-count-pow2.ll index 8d053060b50c026e12ff8a4dae725814b8bae628..04d1b9544ab2c040b00723fff02fec72cb63e7f0 100644 --- a/test/Analysis/ScalarEvolution/trip-count-pow2.ll +++ b/test/Analysis/ScalarEvolution/trip-count-pow2.ll @@ -14,7 +14,7 @@ exit: ; CHECK-LABEL: @test1 ; CHECK: Loop %loop: backedge-taken count is ((-32 + (96 * %n)) /u 32) -; CHECK: Loop %loop: max backedge-taken count is ((-32 + (96 * %n)) /u 32) +; CHECK: Loop %loop: max backedge-taken count is 134217727 } ; PR19183 @@ -32,7 +32,7 @@ exit: ; CHECK-LABEL: @test2 ; CHECK: Loop %loop: backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32) -; CHECK: Loop %loop: max backedge-taken count is ((-32 + (32 * (%n /u 32))) /u 32) +; CHECK: Loop %loop: max backedge-taken count is 134217727 } define void @test3(i32 %n) { @@ -49,7 +49,7 @@ exit: ; CHECK-LABEL: @test3 ; CHECK: Loop %loop: backedge-taken count is ((-32 + (32 * %n)) /u 32) -; CHECK: Loop %loop: max backedge-taken count is ((-32 + (32 * %n)) /u 32) +; CHECK: Loop %loop: max backedge-taken count is 134217727 } define void @test4(i32 %n) { @@ -66,7 +66,7 @@ exit: ; CHECK-LABEL: @test4 ; CHECK: Loop %loop: backedge-taken count is ((-4 + (-1431655764 * %n)) /u 4) -; CHECK: Loop %loop: max backedge-taken count is ((-4 + (-1431655764 * %n)) /u 4) +; CHECK: Loop %loop: max backedge-taken count is 1073741823 } define void @test5(i32 %n) { @@ -83,5 +83,5 @@ exit: ; CHECK-LABEL: @test5 ; CHECK: Loop %loop: backedge-taken count is ((-4 + (4 * %n)) /u 4) -; CHECK: Loop %loop: max backedge-taken count is ((-4 + (4 * %n)) /u 4) +; CHECK: Loop %loop: max backedge-taken count is 1073741823 } diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll index eab314eaa9c27cf7b5b52f09c28736cdf0a89cde..655d4558a5e1211a629beaa8f5a2fdd3a014e957 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll @@ -5,22 +5,22 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- ; TBAA should prove that these calls don't interfere, since they are ; IntrArgReadMem and have TBAA metadata. -; CHECK: define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) { +; CHECK: define <8 x i16> @test0(<8 x i16>* %p, <8 x i16>* %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { ; CHECK-NEXT: entry: -; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[NUW:#[0-9]+]] -; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %a = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %p, i32 16, <8 x i1> %m, <8 x i16> %pt) [[NUW:#[0-9]+]] +; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %y, <8 x i16>* %q, i32 16, <8 x i1> %m) ; CHECK-NEXT: %c = add <8 x i16> %a, %a -define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) { +define <8 x i16> @test0(<8 x i16>* %p, <8 x i16>* %q, <8 x i16> %y, <8 x i1> %m, <8 x i16> %pt) { entry: - %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind, !tbaa !2 - call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1 - %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind, !tbaa !2 + %a = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind, !tbaa !2 + call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %y, <8 x i16>* %q, i32 16, <8 x i1> %m), !tbaa !1 + %b = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %p, i32 16, <8 x i1> %m, <8 x i16> %pt) nounwind, !tbaa !2 %c = add <8 x i16> %a, %b ret <8 x i16> %c } -declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly -declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind +declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly +declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind ; CHECK: attributes #0 = { argmemonly nounwind readonly } ; CHECK: attributes #1 = { argmemonly nounwind } diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll index d00fe5882bcd5dead6f6dccade125ccd45e7d456..87ad371deaa556e0e062c2b132050c9fdddd6127 100644 --- a/test/Assembler/auto_upgrade_intrinsics.ll +++ b/test/Assembler/auto_upgrade_intrinsics.ll @@ -85,6 +85,23 @@ define void @tests.masked.store(<2 x double>* %ptr, <2 x i1> %mask, <2 x double> ret void } +declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) + +define <2 x double> @tests.masked.gather(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %passthru) { +; CHECK-LABEL: @tests.masked.gather( +; CHECK: @llvm.masked.gather.v2f64.v2p0f64 + %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptr, i32 1, <2 x i1> %mask, <2 x double> %passthru) + ret <2 x double> %res +} + +declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask) + +define void @tests.masked.scatter(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %val) { +; CHECK-LABEL: @tests.masked.scatter( +; CHECK: @llvm.masked.scatter.v2f64.v2p0f64 + call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptr, i32 3, <2 x i1> %mask) + ret void +} declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind diff --git a/test/Assembler/diexpression.ll b/test/Assembler/diexpression.ll index c2fa3ee14c23482cee863b6c3479b8e6054cbff4..39f4be70145af3cc7b43e41d416f2684db48e534 100644 --- a/test/Assembler/diexpression.ll +++ b/test/Assembler/diexpression.ll @@ -1,18 +1,20 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s ; RUN: verify-uselistorder %s -; CHECK: !named = !{!0, !1, !2, !3, !4, !5} -!named = !{!0, !1, !2, !3, !4, !5} +; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6} +!named = !{!0, !1, !2, !3, !4, !5, !6} ; CHECK: !0 = !DIExpression() ; CHECK-NEXT: !1 = !DIExpression(DW_OP_deref) -; CHECK-NEXT: !2 = !DIExpression(DW_OP_plus, 3) +; CHECK-NEXT: !2 = !DIExpression(DW_OP_constu, 3, DW_OP_plus) ; CHECK-NEXT: !3 = !DIExpression(DW_OP_LLVM_fragment, 3, 7) -; CHECK-NEXT: !4 = !DIExpression(DW_OP_deref, DW_OP_plus, 3, DW_OP_LLVM_fragment, 3, 7) +; CHECK-NEXT: !4 = !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 3, DW_OP_LLVM_fragment, 3, 7) ; CHECK-NEXT: !5 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef) +; CHECK-NEXT: !6 = !DIExpression(DW_OP_plus_uconst, 3) !0 = !DIExpression() !1 = !DIExpression(DW_OP_deref) -!2 = !DIExpression(DW_OP_plus, 3) +!2 = !DIExpression(DW_OP_constu, 3, DW_OP_plus) !3 = !DIExpression(DW_OP_LLVM_fragment, 3, 7) -!4 = !DIExpression(DW_OP_deref, DW_OP_plus, 3, DW_OP_LLVM_fragment, 3, 7) +!4 = !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 3, DW_OP_LLVM_fragment, 3, 7) !5 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef) +!6 = !DIExpression(DW_OP_plus_uconst, 3) diff --git a/test/Assembler/dinamespace.ll b/test/Assembler/dinamespace.ll index 346fcfb111174d6d1c0871d869105356b45c2a21..af20c19f1316057ab171cc0c12a282b0a984a49d 100644 --- a/test/Assembler/dinamespace.ll +++ b/test/Assembler/dinamespace.ll @@ -8,11 +8,11 @@ !1 = distinct !{} !2 = !DIFile(filename: "path/to/file", directory: "/path/to/dir") -; CHECK: !3 = !DINamespace(name: "Namespace", scope: !0, file: !2, line: 7) -!3 = !DINamespace(name: "Namespace", scope: !0, file: !2, line: 7) +; CHECK: !3 = !DINamespace(name: "Namespace", scope: !0) +!3 = !DINamespace(name: "Namespace", scope: !0) ; CHECK: !4 = !DINamespace(scope: !0) -!4 = !DINamespace(name: "", scope: !0, file: null, line: 0) +!4 = !DINamespace(name: "", scope: !0) !5 = !DINamespace(scope: !0) !6 = !DINamespace(scope: !0, exportSymbols: false) ; CHECK: !5 = !DINamespace(scope: !0, exportSymbols: true) diff --git a/test/Assembler/disubprogram.ll b/test/Assembler/disubprogram.ll index f6352a5e82c1b5bf43d6938d963308f49f8650c0..8a3a60aa079b5332be357defe6c4f95810dbdce9 100644 --- a/test/Assembler/disubprogram.ll +++ b/test/Assembler/disubprogram.ll @@ -6,8 +6,8 @@ define void @_Z3foov() !dbg !9 { ret void } -; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12} -!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14} +!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14} !0 = !{null} !1 = distinct !DICompositeType(tag: DW_TAG_structure_type) @@ -61,6 +61,14 @@ define void @_Z3foov() !dbg !9 { unit: !8, templateParams: !5, declaration: !9, variables: !6) -!13 = !{i32 1, !"Debug Info Version", i32 3} -!llvm.module.flags = !{!13} +!13 = !{!4} +; CHECK: !13 = !{!4} +; CHECK: !14 = distinct !DISubprogram(name: "foo", scope: !1, file: !2, line: 1, type: !3, isLocal: true, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !8, thrownTypes: !13) +!14 = distinct !DISubprogram(name: "foo", scope: !1, + file: !2, line: 1, type: !3, isLocal: true, + isDefinition: true, scopeLine: 2, isOptimized: false, + unit: !8, thrownTypes: !13) + +!15 = !{i32 1, !"Debug Info Version", i32 3} +!llvm.module.flags = !{!15} !llvm.dbg.cu = !{!8} diff --git a/test/Assembler/globalvariable-attributes.ll b/test/Assembler/globalvariable-attributes.ll new file mode 100644 index 0000000000000000000000000000000000000000..64227a451c256ea33f834bd8fff4a9fc3bfcea55 --- /dev/null +++ b/test/Assembler/globalvariable-attributes.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +@g1 = global i32 7 "key" = "value" "key2" = "value2" +@g2 = global i32 2, align 4 "key3" = "value3" +@g3 = global i32 2 #0 +@g4 = global i32 2, align 4 "key5" = "value5" #0 + +attributes #0 = { "string" = "value" nobuiltin norecurse } + +; CHECK: @g1 = global i32 7 #0 +; CHECK: @g2 = global i32 2, align 4 #1 +; CHECK: @g3 = global i32 2 #2 +; CHECK: @g4 = global i32 2, align 4 #3 + +; CHECK: attributes #0 = { "key"="value" "key2"="value2" } +; CHECK: attributes #1 = { "key3"="value3" } +; CHECK: attributes #2 = { nobuiltin norecurse "string"="value" } +; CHECK: attributes #3 = { nobuiltin norecurse "key5"="value5" "string"="value" } + diff --git a/test/Bindings/OCaml/core.ml b/test/Bindings/OCaml/core.ml index 105f1bc4f732e11403558339cfb7eb12137f58ca..802baa0b80b25d002c0c27e62cb0f39f5da7d391 100644 --- a/test/Bindings/OCaml/core.ml +++ b/test/Bindings/OCaml/core.ml @@ -66,6 +66,16 @@ let suite name f = let filename = Sys.argv.(1) let m = create_module context filename +(*===-- Contained types --------------------------------------------------===*) + +let test_contained_types () = + let pointer_i32 = pointer_type i32_type in + insist (i32_type = (Array.get (subtypes pointer_i32) 0)); + + let ar = struct_type context [| i32_type; i8_type |] in + insist (i32_type = (Array.get (subtypes ar)) 0); + insist (i8_type = (Array.get (subtypes ar)) 1) + (*===-- Conversion --------------------------------------------------------===*) @@ -1533,6 +1543,7 @@ let test_writer () = (*===-- Driver ------------------------------------------------------------===*) let _ = + suite "contained types" test_contained_types; suite "conversion" test_conversion; suite "target" test_target; suite "constants" test_constants; diff --git a/test/Bitcode/DIExpression-aggresult.ll b/test/Bitcode/DIExpression-aggresult.ll new file mode 100644 index 0000000000000000000000000000000000000000..5ce936d7074da2f7434a9d20148d744b862372cb --- /dev/null +++ b/test/Bitcode/DIExpression-aggresult.ll @@ -0,0 +1,36 @@ +; RUN: llvm-dis -o - %s.bc | FileCheck %s +%class.A = type { i32, i32, i32, i32 } + +define void @_Z3fooi(%class.A* sret %agg.result) #0 !dbg !3 { + ; CHECK: call void @llvm.dbg.declare({{.*}}, metadata ![[EXPR:[0-9]+]]), !dbg + ; CHECK: ![[EXPR]] = !DIExpression() + call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !13, metadata !16), !dbg !17 + ret void, !dbg !17 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { ssp } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "a.cc", directory: "/tmp") +!2 = !{i32 1, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 4, type: !4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!4 = !DISubroutineType(types: !5) +!5 = !{!6} +!6 = !DICompositeType(tag: DW_TAG_class_type, name: "A", scope: !0, file: !1, line: 2, size: 128, align: 32, elements: !7) +!7 = !{!8, !10, !11, !12} +!8 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32) +!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32, offset: 32) +!11 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32, offset: 64) +!12 = !DIDerivedType(tag: DW_TAG_member, name: "o", scope: !1, file: !1, line: 2, baseType: !9, size: 32, align: 32, offset: 96) +!13 = !DILocalVariable(name: "my_a", scope: !14, file: !1, line: 9, type: !15) +!14 = distinct !DILexicalBlock(scope: !3, file: !1, line: 4, column: 14) +!15 = !DIDerivedType(tag: DW_TAG_reference_type, file: !1, baseType: !6) +!16 = !DIExpression(DW_OP_deref) +!17 = !DILocation(line: 9, column: 5, scope: !3) diff --git a/test/Bitcode/DIExpression-aggresult.ll.bc b/test/Bitcode/DIExpression-aggresult.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..bcf6e175b4d04de249adadc1c26db03ba490ac85 Binary files /dev/null and b/test/Bitcode/DIExpression-aggresult.ll.bc differ diff --git a/test/Bitcode/DIExpression-deref.ll b/test/Bitcode/DIExpression-deref.ll new file mode 100644 index 0000000000000000000000000000000000000000..a03d6016523e66eb386f4c49eeb8d8f271726e22 --- /dev/null +++ b/test/Bitcode/DIExpression-deref.ll @@ -0,0 +1,27 @@ +; RUN: llvm-dis -o - %s.bc | FileCheck %s + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!20, !21} + +!0 = distinct !DIGlobalVariable(name: "g", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true) +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang (llvm/trunk 288154)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "a.c", directory: "/") +!3 = !{} +!4 = !{!10, !11, !12, !13} +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; DW_OP_deref should be moved to the back of the expression. +; +; CHECK: !DIExpression(DW_OP_plus_uconst, 0, DW_OP_deref, DW_OP_LLVM_fragment, 8, 32) +!6 = !DIExpression(DW_OP_deref, DW_OP_plus, 0, DW_OP_LLVM_fragment, 8, 32) +; CHECK: !DIExpression(DW_OP_plus_uconst, 0, DW_OP_deref) +!7 = !DIExpression(DW_OP_deref, DW_OP_plus, 0) +; CHECK: !DIExpression(DW_OP_plus_uconst, 1, DW_OP_deref) +!8 = !DIExpression(DW_OP_plus, 1, DW_OP_deref) +; CHECK: !DIExpression(DW_OP_deref) +!9 = !DIExpression(DW_OP_deref) +!10 = !DIGlobalVariableExpression(var: !0, expr: !6) +!11 = !DIGlobalVariableExpression(var: !0, expr: !7) +!12 = !DIGlobalVariableExpression(var: !0, expr: !8) +!13 = !DIGlobalVariableExpression(var: !0, expr: !9) +!20 = !{i32 2, !"Dwarf Version", i32 4} +!21 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/test/Bitcode/DIExpression-deref.ll.bc b/test/Bitcode/DIExpression-deref.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..5297bf9f17b494f5ecb08ad65f1c169303efc621 Binary files /dev/null and b/test/Bitcode/DIExpression-deref.ll.bc differ diff --git a/test/Bitcode/DIExpression-minus-upgrade.ll b/test/Bitcode/DIExpression-minus-upgrade.ll new file mode 100644 index 0000000000000000000000000000000000000000..1f26eba6f98c0b2dedce25440775e22868030377 --- /dev/null +++ b/test/Bitcode/DIExpression-minus-upgrade.ll @@ -0,0 +1,16 @@ +; RUN: llvm-dis -o - %s.bc | FileCheck %s + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!8, !9} + +!0 = distinct !DIGlobalVariable(name: "g", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true) +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang (llvm/trunk 304286)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "a.c", directory: "/") +!3 = !{} +!4 = !{!7} +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; CHECK: !DIExpression(DW_OP_constu, 42, DW_OP_minus) +!6 = !DIExpression(DW_OP_minus, 42) +!7 = !DIGlobalVariableExpression(var: !0, expr: !6) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/test/Bitcode/DIExpression-minus-upgrade.ll.bc b/test/Bitcode/DIExpression-minus-upgrade.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..354ba6454c3febe5a5fe01e25ffe9769d85ff978 Binary files /dev/null and b/test/Bitcode/DIExpression-minus-upgrade.ll.bc differ diff --git a/test/Bitcode/DIGlobalVariableExpression.ll b/test/Bitcode/DIGlobalVariableExpression.ll index f6796bbdb7a054fdbe6e95c1a57a4c91fe549a8c..31c3fda1b00adcf56dfb131eb28a256a467521bb 100644 --- a/test/Bitcode/DIGlobalVariableExpression.ll +++ b/test/Bitcode/DIGlobalVariableExpression.ll @@ -14,7 +14,7 @@ ; CHECK: ![[HVAR:[0-9]+]] = distinct !DIGlobalVariable(name: "h", ; CHECK: ![[IMPORTS]] = !{![[CIMPORT:[0-9]+]]} ; CHECK: ![[CIMPORT]] = !DIImportedEntity({{.*}}entity: ![[HVAR]] -; CHECK: ![[GEXPR]] = !DIExpression(DW_OP_plus, 1) +; CHECK: ![[GEXPR]] = !DIExpression(DW_OP_plus_uconst, 1) ; CHECK: ![[H]] = {{.*}}!DIGlobalVariableExpression(var: ![[HVAR]]) @g = common global i32 0, align 4, !dbg !0 diff --git a/test/Bitcode/DINamespace.ll b/test/Bitcode/DINamespace.ll index 2807cb02d3ddcc4269d34a1089098e48118db3e6..e3a04fbc87243a29198af829a28d995d53bcd15b 100644 --- a/test/Bitcode/DINamespace.ll +++ b/test/Bitcode/DINamespace.ll @@ -10,8 +10,8 @@ target triple = "x86_64-apple-macosx10.12.0" !0 = distinct !DIGlobalVariable(name: "i", linkageName: "_ZN1N1iE", scope: !1, file: !2, line: 2, type: !3, isLocal: false, isDefinition: true) ; Test bitcode upgrade for DINamespace without an exportSymbols field. -; CHECK: !DINamespace(name: "N", scope: null, file: !{{[0-9]+}}, line: 1) -!1 = !DINamespace(name: "N", scope: null, file: !2, line: 1) +; CHECK: !DINamespace(name: "N", scope: null) +!1 = !DINamespace(name: "N", scope: null) !2 = !DIFile(filename: "dinamespace.cpp", directory: "/") !3 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) !4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "clang version 4.0.0 (trunk 283228) (llvm/trunk 283225)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !5, globals: !6) diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll index 9fdf54b7b30856eb2491a783f779fa2dee778ee0..18aa12c7af97e8da4a0a201aa3bb89a78e1fa1eb 100644 --- a/test/Bitcode/attributes.ll +++ b/test/Bitcode/attributes.ll @@ -204,7 +204,7 @@ define void @f34() ; CHECK: define void @f34() { call void @nobuiltin() nobuiltin -; CHECK: call void @nobuiltin() #33 +; CHECK: call void @nobuiltin() #34 ret void; } @@ -334,6 +334,11 @@ define void @f56() writeonly ret void } +; CHECK: define void @f57() #33 +define void @f57() speculatable { + ret void +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } @@ -367,4 +372,5 @@ define void @f56() writeonly ; CHECK: attributes #30 = { allocsize(0) } ; CHECK: attributes #31 = { allocsize(0,1) } ; CHECK: attributes #32 = { writeonly } -; CHECK: attributes #33 = { nobuiltin } +; CHECK: attributes #33 = { speculatable } +; CHECK: attributes #34 = { nobuiltin } diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll index b1f52bbe059fd7e55b920756040821603b53f291..ec69344947c5e515dac1aa8af6fc32cc47276b40 100644 --- a/test/Bitcode/compatibility.ll +++ b/test/Bitcode/compatibility.ll @@ -3,7 +3,7 @@ ; Please update this file when making any IR changes. Information on the ; release process for this file is available here: ; -; http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility +; http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s ; RUN-PR24755: verify-uselistorder < %s @@ -472,6 +472,10 @@ declare cc91 void @f.cc91() ; CHECK: declare amdgpu_kernel void @f.cc91() declare amdgpu_kernel void @f.amdgpu_kernel() ; CHECK: declare amdgpu_kernel void @f.amdgpu_kernel() +declare cc93 void @f.cc93() +; CHECK: declare amdgpu_hs void @f.cc93() +declare amdgpu_hs void @f.amdgpu_hs() +; CHECK: declare amdgpu_hs void @f.amdgpu_hs() declare cc1023 void @f.cc1023() ; CHECK: declare cc1023 void @f.cc1023() @@ -1246,7 +1250,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #41 + ; CHECK: call void @f.nobuiltin() #42 call fastcc noalias i32* @f.noalias() noinline ; CHECK: call fastcc noalias i32* @f.noalias() #12 @@ -1613,6 +1617,9 @@ normal: declare void @f.writeonly() writeonly ; CHECK: declare void @f.writeonly() #40 +declare void @f.speculatable() speculatable +; CHECK: declare void @f.speculatable() #41 + ;; Constant Expressions define i8** @constexpr() { @@ -1661,7 +1668,8 @@ define i8** @constexpr() { ; CHECK: attributes #38 = { nounwind readonly } ; CHECK: attributes #39 = { inaccessiblemem_or_argmemonly nounwind } ; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { builtin } +; CHECK: attributes #41 = { speculatable } +; CHECK: attributes #42 = { builtin } ;; Metadata diff --git a/test/Bitcode/globalvariable-attributes.ll b/test/Bitcode/globalvariable-attributes.ll new file mode 100644 index 0000000000000000000000000000000000000000..cbab3b71e58a32855756df076ba8b95edd058d66 --- /dev/null +++ b/test/Bitcode/globalvariable-attributes.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +@g1 = global i32 7 "key" = "value" "key2" = "value2" +@g2 = global i32 2, align 4 "key3" = "value3" +@g3 = global i32 2 #0 +@g4 = global i32 2, align 4 "key5" = "value5" #0 + +attributes #0 = { "string" = "value" nobuiltin norecurse } + +; CHECK: @g1 = global i32 7 #0 +; CHECK: @g2 = global i32 2, align 4 #1 +; CHECK: @g3 = global i32 2 #2 +; CHECK: @g4 = global i32 2, align 4 #3 + +; CHECK: attributes #0 = { "key"="value" "key2"="value2" } +; CHECK: attributes #1 = { "key3"="value3" } +; CHECK: attributes #2 = { nobuiltin norecurse "string"="value" } +; CHECK: attributes #3 = { nobuiltin norecurse "key5"="value5" "string"="value" } + diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll index c1e1cae373684d37519c3a0b612854852162db08..184f72e9856e007d8f5a437ae727900197d1f007 100644 --- a/test/Bitcode/ptest-old.ll +++ b/test/Bitcode/ptest-old.ll @@ -1,5 +1,6 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s ; RUN: verify-uselistorder < %s +; REQUIRES: x86-registered-target define i32 @foo(<4 x float> %bar) nounwind { entry: diff --git a/test/Bitcode/thinlto-alias.ll b/test/Bitcode/thinlto-alias.ll index cfdf8f7b0bd96ca3b218e6ea16951986d092c76e..2c235f0620ecb2c45a9334bab401730803e1e79c 100644 --- a/test/Bitcode/thinlto-alias.ll +++ b/test/Bitcode/thinlto-alias.ll @@ -5,33 +5,31 @@ ; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; CHECK: +; See if the call to func is registered. +; The value id 1 matches the second FUNCTION record above. +; CHECK-NEXT: ; CHECK-NEXT: -; CHECK-NEXT: + +; CHECK: -; Followed by the alias and aliasee +; See if the call to analias is registered, using the expected value id. +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll index 713e36dd14d609f39dff878c44b3b5232fb0a4b2..7f9d6d95f506bbe3a2086b5472c6c4014d0b69f5 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll @@ -10,31 +10,27 @@ ; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo.1.bc | FileCheck %s --check-prefix=OLD ; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo-combined.1.bc | FileCheck %s --check-prefix=OLD-COMBINED +; CHECK: +; See if the call to func is registered, using the expected hotness type. +; CHECK-NEXT: ; CHECK-NEXT: -; CHECK-NEXT: +; CHECK: +; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll index 3a5adea202e2a0da2b963f851d7ae4dedf168ad4..b64d5bd52bfca41edc8a36e8e15fbbbeaadfc00a 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -6,27 +6,45 @@ ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; CHECK: +; CHECK-NEXT: +; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 +; CHECK-NEXT: ; CHECK-NEXT: -; CHECK-LABEL: -; CHECK-LABEL: + +; CHECK: +; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 +; CHECK-NEXT: +; CHECK-NEXT: + +; CHECK: +; COMBINED_NEXT: + + +; ModuleID = 'thinlto-function-summary-callgraph.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; This function have high profile count, so entry block is hot. +define void @hot_function(i1 %a, i1 %a2) !prof !20 { +entry: + call void @hot1() + br i1 %a, label %Cold, label %Hot, !prof !41 +Cold: ; 1/1000 goes here + call void @cold() + call void @hot2() + call void @hot4(), !prof !15 + call void @none1() + br label %exit +Hot: ; 999/1000 goes here + call void @hot2() + call void @hot3() + br i1 %a2, label %None1, label %None2, !prof !42 +None1: ; half goes here + call void @none1() + call void @none2() + br label %exit +None2: ; half goes here + call void @none3() + br label %exit +exit: + ret void +} + +declare void @hot1() #1 +declare void @hot2() #1 +declare void @hot3() #1 +declare void @hot4() #1 +declare void @cold() #1 +declare void @none1() #1 +declare void @none2() #1 +declare void @none3() #1 + + +!41 = !{!"branch_weights", i32 1, i32 1000} +!42 = !{!"branch_weights", i32 1, i32 1} + + + +!llvm.module.flags = !{!1} +!20 = !{!"function_entry_count", i64 110, i64 123} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"branch_weights", i32 100} diff --git a/test/Bitcode/thinlto-function-summary-callgraph.ll b/test/Bitcode/thinlto-function-summary-callgraph.ll index c00907b7fb291169a60974f301d1f0b0005abe9b..566f3a077e7bf9c3e6f657f4da3f0984d1a8092e 100644 --- a/test/Bitcode/thinlto-function-summary-callgraph.ll +++ b/test/Bitcode/thinlto-function-summary-callgraph.ll @@ -10,30 +10,30 @@ ; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph.1.bc | FileCheck %s --check-prefix=OLD ; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-combined.1.bc | FileCheck %s --check-prefix=OLD-COMBINED +; CHECK: +; See if the call to func is registered. +; CHECK-NEXT: -; CHECK-NEXT: +; CHECK: +; COMBINED-NEXT: +; See if the call to func is registered. +; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -43,10 +43,12 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @main() #0 { entry: call void (...) @func() - ret i32 0 + %u = load i32, i32* @undefinedglob + ret i32 %u } declare void @func(...) #1 +@undefinedglob = external global i32 ; OLD: Index {{.*}} contains 1 nodes (1 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls) -; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls) \ No newline at end of file +; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls) diff --git a/test/Bitcode/thinlto-function-summary-originalnames.ll b/test/Bitcode/thinlto-function-summary-originalnames.ll index 8777bd91607655410c07301bc56de0604334d823..afc9772484ef0051755022483517c30b01a8248c 100644 --- a/test/Bitcode/thinlto-function-summary-originalnames.ll +++ b/test/Bitcode/thinlto-function-summary-originalnames.ll @@ -5,6 +5,9 @@ ; COMBINED: +; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-DAG: ; COMBINED-DAG: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: source_filename = "/path/to/source.c" diff --git a/test/Bitcode/thinlto-function-summary-refgraph.ll b/test/Bitcode/thinlto-function-summary-refgraph.ll index 882f86509ab1a0571b7e00ba53bf82ad11bed2f7..b52fce79179110eaf5ebc460985757aabcf84365 100644 --- a/test/Bitcode/thinlto-function-summary-refgraph.ll +++ b/test/Bitcode/thinlto-function-summary-refgraph.ll @@ -2,6 +2,32 @@ ; RUN: opt -module-summary %s -o %t.o ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s +; CHECK: +; op0=main op4=func op5=func +; CHECK-DAG: ; Function W contains a call to func3 as well as a reference to globalvar: -; CHECK-DAG: +; op0=W op4=globalvar op5=func3 +; CHECK-DAG: ; Function X contains call to foo, as well as address reference to foo ; which is in the same instruction as the call: -; CHECK-DAG: +; op0=X op4=foo op5=foo +; CHECK-DAG: ; Function Y contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while earlier analyzing the phi using its ; return value: -; CHECK-DAG: +; op0=Y op4=func2 +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: -; CHECK-DAG: +; op0=Z op4=func2 +; CHECK-DAG: ; Variable bar initialization contains address reference to func: -; CHECK-DAG: +; op0=bar op2=func +; CHECK-DAG: ; CHECK: -; CHECK-NEXT: +; CHECK: record string = 'variadic' -; BC-NEXT: record string = 'foo' -; BC-NEXT: record string = 'bar' -; BC-NEXT: record string = 'f' -; BC-NEXT: record string = 'anon. +; BC: &1 | FileCheck --check-prefix=ERROR %s + +; CHECK: !llvm.linker.options = !{!2, !3} +; CHECK: !2 = !{!"/DEFAULTLIB:libcmtd.lib"} +; CHECK: !3 = !{!"/DEFAULTLIB:oldnames.lib"} + +; ERROR: 'Linker Options' named metadata no longer supported + +!0 = !{i32 6, !"Linker Options", !1} +!1 = !{!2, !3} +!2 = !{!"/DEFAULTLIB:libcmtd.lib"} +!3 = !{!"/DEFAULTLIB:oldnames.lib"} + +!llvm.module.flags = !{!0} diff --git a/test/BugPoint/compile-custom.ll b/test/BugPoint/compile-custom.ll index d152f08626f8bd8184da1ae1c64f6b4407e61074..847d1184f016b6360355f59527dc87de89da46bb 100644 --- a/test/BugPoint/compile-custom.ll +++ b/test/BugPoint/compile-custom.ll @@ -1,4 +1,4 @@ -; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext --compile-custom --compile-command="%python %s.py arg1 arg2" --output-prefix %t %s | FileCheck %s +; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext --compile-custom --compile-command="%python %/s.py arg1 arg2" --output-prefix %t %s | FileCheck %s ; REQUIRES: loadable_module ; Test that arguments are correctly passed in --compile-command. The output diff --git a/test/BugPoint/unsymbolized.ll b/test/BugPoint/unsymbolized.ll new file mode 100644 index 0000000000000000000000000000000000000000..8547f220ea26062f4393ff4110d98b4c84c2988d --- /dev/null +++ b/test/BugPoint/unsymbolized.ll @@ -0,0 +1,21 @@ +; REQUIRES: loadable_module +; RUN: echo "import sys" > %t.py +; RUN: echo "print('args = ' + str(sys.argv))" >> %t.py +; RUN: echo "exit(1)" >> %t.py +; RUN: not bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -opt-command="%python" -opt-args %t.py | FileCheck %s +; RUN: not --crash opt -load %llvmshlibdir/BugpointPasses%shlibext %s -bugpoint-crashcalls -disable-symbolication 2>&1 | FileCheck --check-prefix=CRASH %s + +; Test that bugpoint disables symbolication on the opt tool to reduce runtime overhead when opt crashes +; CHECK: args = {{.*}}'-disable-symbolication' + +; Test that opt, when it crashes & is passed -disable-symbolication, doesn't symbolicate. +; In theory this test should maybe be in test/tools/opt or +; test/Transforms, but since there doesn't seem to be another convenient way to +; crash opt, apart from the BugpointPasses dynamic plugin, this is the spot for +; now. +; CRASH-NOT: Signals.inc + +define void @f() { + call void @f() + ret void +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c1667049f80fc07358f8d9aaa90fba446b228db3..b52b6018e0263710b8d9429910ec94df12d1032e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -45,6 +45,7 @@ set(LLVM_TEST_DEPENDS llvm-config llvm-cov llvm-cxxdump + llvm-cvtres llvm-diff llvm-dis llvm-dsymutil @@ -60,7 +61,7 @@ set(LLVM_TEST_DEPENDS llvm-nm llvm-objdump llvm-opt-report - llvm-pdbdump + llvm-pdbutil llvm-profdata llvm-ranlib llvm-readobj diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index e40199d82c9ddb5df8e97be4d9cdb96cfa094764..0298315a55105df058f00ec854baad7ddd9a3f16 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -43,7 +43,7 @@ define [1 x double] @constant() { ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %vreg4, %vreg2; mem:ST4[%addr] GPR:%vreg4,%vreg2 (in function: pending_phis) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %vreg5, %vreg2; mem:ST4[%addr] GPR:%vreg5,%vreg2 (in function: pending_phis) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis ; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis: define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) { @@ -154,3 +154,19 @@ continue: define fp128 @test_quad_dump() { ret fp128 0xL00000000000000004000000000000000 } + +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %vreg0(p0) = G_EXTRACT_VECTOR_ELT %vreg1, %vreg2; (in function: vector_of_pointers_extractelement) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_extractelement +; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_extractelement: +define void @vector_of_pointers_extractelement() { + %dummy = extractelement <2 x i16*> undef, i32 0 + ret void +} + +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %vreg0(<2 x p0>) = G_INSERT_VECTOR_ELT %vreg1, %vreg2, %vreg3; (in function: vector_of_pointers_insertelement +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement +; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement: +define void @vector_of_pointers_insertelement() { + %dummy = insertelement <2 x i16*> undef, i16* null, i32 0 + ret void +} diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll index 0063086411843edad4492cfca2b99d0a7c6be635..cd3ea9715e0fd3a77a0deb438528eb06ac92b3e5 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll @@ -4,7 +4,7 @@ ; CHECK: name: test_stack_guard ; CHECK: stack: -; CHECK: - { id: 0, name: StackGuardSlot, offset: 0, size: 8, alignment: 8 } +; CHECK: - { id: 0, name: StackGuardSlot, type: default, offset: 0, size: 8, alignment: 8, ; CHECK-NOT: id: 1 ; CHECK: [[GUARD_SLOT:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.StackGuardSlot diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 02848021dbc09a038349e90f470c042dd3ebf70f..65b8ba57070116efdee691f4190ce2365ed2ec7e 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -31,10 +31,13 @@ define i64 @muli64(i64 %arg1, i64 %arg2) { ; Tests for alloca ; CHECK-LABEL: name: allocai64 ; CHECK: stack: -; CHECK-NEXT: - { id: 0, name: ptr1, offset: 0, size: 8, alignment: 8 } -; CHECK-NEXT: - { id: 1, name: ptr2, offset: 0, size: 8, alignment: 1 } -; CHECK-NEXT: - { id: 2, name: ptr3, offset: 0, size: 128, alignment: 8 } -; CHECK-NEXT: - { id: 3, name: ptr4, offset: 0, size: 1, alignment: 8 } +; CHECK-NEXT: - { id: 0, name: ptr1, type: default, offset: 0, size: 8, alignment: 8, +; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: - { id: 1, name: ptr2, type: default, offset: 0, size: 8, alignment: 1, +; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: - { id: 2, name: ptr3, type: default, offset: 0, size: 128, alignment: 8, +; CHECK-NEXT: callee-saved-register: '', di-variable: '', di-expression: '', di-location: '' } +; CHECK-NEXT: - { id: 3, name: ptr4, type: default, offset: 0, size: 1, alignment: 8, ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.0.ptr1 ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.1.ptr2 ; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.2.ptr3 @@ -1541,3 +1544,24 @@ define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2) %res = shufflevector <8 x i8> %arg1, <8 x i8> %arg2, <16 x i32> ret <16 x i8> %res } + +; CHECK-LABEL: test_constant_vector +; CHECK: [[UNDEF:%[0-9]+]](s16) = IMPLICIT_DEF +; CHECK: [[F:%[0-9]+]](s16) = G_FCONSTANT half 0xH3C00 +; CHECK: [[M:%[0-9]+]](<4 x s16>) = G_MERGE_VALUES [[UNDEF]](s16), [[UNDEF]](s16), [[UNDEF]](s16), [[F]](s16) +; CHECK: %d0 = COPY [[M]](<4 x s16>) +define <4 x half> @test_constant_vector() { + ret <4 x half> +} + +define i32 @test_target_mem_intrinsic(i32* %addr) { +; CHECK-LABEL: name: test_target_mem_intrinsic +; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]](s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr) +; CHECK: G_TRUNC [[VAL]](s64) + %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) + %trunc = trunc i64 %val to i32 + ret i32 %trunc +} + +declare i64 @llvm.aarch64.ldxr.p0i32(i32*) nounwind diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir index 739fdd5cb4c54b16be80b17bb99f08efaf92b98c..296f65c041a177bfdec4b95076a5f00f3633f8e6 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir +++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir @@ -74,6 +74,21 @@ %res = bitcast <2 x i32> %vres to i64 ret i64 %res } + + define i64 @floatingPointLoad(i64 %arg1, double* %addr) { + %varg1 = bitcast i64 %arg1 to double + %varg2 = load double, double* %addr + %vres = fadd double %varg1, %varg2 + %res = bitcast double %vres to i64 + ret i64 %res + } + + define void @floatingPointStore(i64 %arg1, double* %addr) { + %varg1 = bitcast i64 %arg1 to double + %vres = fadd double %varg1, %varg1 + store double %vres, double* %addr + ret void + } ... --- @@ -83,8 +98,8 @@ name: defaultMapping legalized: true # CHECK-LABEL: name: defaultMapping # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -104,8 +119,8 @@ name: defaultMappingVector legalized: true # CHECK-LABEL: name: defaultMappingVector # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -126,10 +141,10 @@ name: defaultMapping1Repair legalized: true # CHECK-LABEL: name: defaultMapping1Repair # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -151,10 +166,10 @@ name: defaultMapping2Repairs legalized: true # CHECK-LABEL: name: defaultMapping2Repairs # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -178,9 +193,9 @@ name: defaultMappingDefRepair legalized: true # CHECK-LABEL: name: defaultMappingDefRepair # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: fpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: fpr } @@ -200,11 +215,11 @@ name: phiPropagation legalized: true tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64sp } -# CHECK-NEXT: - { id: 2, class: gpr32 } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr32 } - { id: 1, class: gpr64sp } @@ -239,10 +254,10 @@ name: defaultMappingUseRepairPhysReg legalized: true # CHECK-LABEL: name: defaultMappingUseRepairPhysReg # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: fpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -265,8 +280,8 @@ name: defaultMappingDefRepairPhysReg legalized: true # CHECK-LABEL: name: defaultMappingDefRepairPhysReg # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -288,18 +303,18 @@ name: greedyMappingOr legalized: true # CHECK-LABEL: name: greedyMappingOr # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } # Fast mode maps vector instruction on FPR. -# FAST-NEXT: - { id: 2, class: fpr } +# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' } # Fast mode needs two extra copies. -# FAST-NEXT: - { id: 3, class: fpr } -# FAST-NEXT: - { id: 4, class: fpr } +# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } # Greedy mode coalesce the computation on the GPR register # because it is the cheapest. -# GREEDY-NEXT: - { id: 2, class: gpr } +# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -335,18 +350,18 @@ name: greedyMappingOrWithConstraints legalized: true # CHECK-LABEL: name: greedyMappingOrWithConstraints # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: fpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } # Fast mode maps vector instruction on FPR. # Fast mode needs two extra copies. -# FAST-NEXT: - { id: 3, class: fpr } -# FAST-NEXT: - { id: 4, class: fpr } +# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } # Greedy mode coalesce the computation on the GPR register because it # is the cheapest, but will need one extra copy to materialize %2 into a FPR. -# GREEDY-NEXT: - { id: 3, class: gpr } +# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -381,8 +396,8 @@ body: | name: ignoreTargetSpecificInst legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr64 } - { id: 1, class: gpr64 } @@ -419,8 +434,8 @@ name: bitcast_s32_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -442,8 +457,8 @@ name: bitcast_s32_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -465,9 +480,9 @@ name: bitcast_s32_gpr_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# FAST-NEXT: - { id: 1, class: fpr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -489,9 +504,9 @@ name: bitcast_s32_fpr_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# FAST-NEXT: - { id: 1, class: gpr } -# GREEDY-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -513,8 +528,8 @@ name: bitcast_s64_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -536,8 +551,8 @@ name: bitcast_s64_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# CHECK-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -559,9 +574,9 @@ name: bitcast_s64_gpr_fpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# FAST-NEXT: - { id: 1, class: fpr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: fpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -582,9 +597,9 @@ name: bitcast_s64_fpr_gpr legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr } -# FAST-NEXT: - { id: 1, class: gpr } -# GREEDY-NEXT: - { id: 1, class: fpr } +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -609,15 +624,15 @@ name: greedyWithChainOfComputation legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# FAST-NEXT: - { id: 2, class: fpr } -# FAST-NEXT: - { id: 3, class: fpr } -# FAST-NEXT: - { id: 4, class: fpr } -# GREEDY-NEXT: - { id: 2, class: gpr } -# GREEDY-NEXT: - { id: 3, class: gpr } -# GREEDY-NEXT: - { id: 4, class: gpr } -# CHECK-NEXT: - { id: 5, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# GREEDY-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -650,3 +665,84 @@ body: | RET_ReallyLR implicit %x0 ... + +--- +# Make sure we map what looks like floating point +# loads to floating point register bank. +# CHECK-LABEL: name: floatingPointLoad +name: floatingPointLoad +legalized: true + +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + +# No repairing should be necessary for both modes. +# CHECK: %0(s64) = COPY %x0 +# CHECK-NEXT: %1(p0) = COPY %x1 +# CHECK-NEXT: %2(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) +# %0 has been mapped to GPR, we need to repair to match FPR. +# CHECK-NEXT: %4(s64) = COPY %0 +# CHECK-NEXT: %3(s64) = G_FADD %4, %2 +# CHECK-NEXT: %x0 = COPY %3(s64) +# CHECK-NEXT: RET_ReallyLR implicit %x0 + +body: | + bb.0: + liveins: %x0, %x1 + + %0(s64) = COPY %x0 + %1(p0) = COPY %x1 + %2(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) + %3(s64) = G_FADD %0, %2 + %x0 = COPY %3(s64) + RET_ReallyLR implicit %x0 + +... + +--- +# Make sure we map what looks like floating point +# stores to floating point register bank. +# CHECK-LABEL: name: floatingPointStore +name: floatingPointStore +legalized: true + +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + +# CHECK: %0(s64) = COPY %x0 +# CHECK-NEXT: %1(p0) = COPY %x1 +# %0 has been mapped to GPR, we need to repair to match FPR. +# CHECK-NEXT: %3(s64) = COPY %0 +# CHECK-NEXT: %4(s64) = COPY %0 +# CHECK-NEXT: %2(s64) = G_FADD %3, %4 +# CHECK-NEXT: G_STORE %2(s64), %1(p0) :: (store 8 into %ir.addr) +# CHECK-NEXT: RET_ReallyLR + +body: | + bb.0: + liveins: %x0, %x1 + + %0(s64) = COPY %x0 + %1(p0) = COPY %x1 + %2(s64) = G_FADD %0, %0 + G_STORE %2(s64), %1(p0) :: (store 8 into %ir.addr) + RET_ReallyLR + +... diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll index 4e6b9cad4c3dbe9fc007bcfce3f8acad85ed24a4..38a90bbfbbd9ae7c952bbc4d2df49a1d4007faea 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -3,8 +3,8 @@ ; CHECK-LABEL: name: test_stack_slots ; CHECK: fixedStack: -; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 1 -; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 1, size: 1 +; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, +; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 1, size: 1, ; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; CHECK: [[LHS:%[0-9]+]](s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) ; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll index f8d95c88cc8f3eec9b9ae18c63cd58da0c7efa2a..e923a0b2847f834eeb1e8f4bdb261052e257963b 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s ; CHECK-LABEL: name: test_trivial_call -; CHECK: ADJCALLSTACKDOWN 0, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def %sp, implicit %sp ; CHECK: BL @trivial_callee, csr_aarch64_aapcs, implicit-def %lr ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def %sp, implicit %sp declare void @trivial_callee() @@ -35,7 +35,7 @@ define void @test_simple_arg(i32 %in) { ; CHECK-LABEL: name: test_indirect_call ; CHECK: registers: ; Make sure the register feeding the indirect call is properly constrained. -; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64 } +; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64, preferred-register: '' } ; CHECK: %[[FUNC]](p0) = COPY %x0 ; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def %lr, implicit %sp ; CHECK: RET_ReallyLR @@ -165,9 +165,9 @@ define zeroext i8 @test_abi_zext_ret(i8* %addr) { ; CHECK-LABEL: name: test_stack_slots ; CHECK: fixedStack: -; CHECK-DAG: - { id: [[STACK0:[0-9]+]], offset: 0, size: 8 -; CHECK-DAG: - { id: [[STACK8:[0-9]+]], offset: 8, size: 8 -; CHECK-DAG: - { id: [[STACK16:[0-9]+]], offset: 16, size: 8 +; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, +; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, +; CHECK-DAG: - { id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 8, ; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; CHECK: [[LHS:%[0-9]+]](s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) ; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] @@ -186,7 +186,7 @@ define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) { ; CHECK: [[C42:%[0-9]+]](s64) = G_CONSTANT i64 42 ; CHECK: [[C12:%[0-9]+]](s64) = G_CONSTANT i64 12 ; CHECK: [[PTR:%[0-9]+]](p0) = G_CONSTANT i64 0 -; CHECK: ADJCALLSTACKDOWN 24, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 24, 0, implicit-def %sp, implicit %sp ; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp ; CHECK: [[C42_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 0 ; CHECK: [[C42_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C42_OFFS]](s64) @@ -208,7 +208,7 @@ define void @test_call_stack() { ; CHECK-LABEL: name: test_mem_i1 ; CHECK: fixedStack: -; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false } +; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true, ; CHECK: [[ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]] ; CHECK: {{%[0-9]+}}(s1) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 0) define void @test_mem_i1([8 x i64], i1 %in) { diff --git a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll index 5a76661180f229c1c829b78bee4028675e6800b5..e832ba953241784e086176bb7e31e95d8396755f 100644 --- a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll +++ b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll @@ -3,8 +3,8 @@ ; CHECK-LABEL: name: debug_declare ; CHECK: stack: -; CHECK: - { id: {{.*}}, name: in.addr, offset: {{.*}}, size: {{.*}}, alignment: {{.*}}, di-variable: '!11', -; CHECK-NEXT: di-expression: '!12', di-location: '!13' } +; CHECK: - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}}, +; CHECK-NEXT: callee-saved-register: '', di-variable: '!11', di-expression: '!12', ; CHECK: DBG_VALUE debug-use %0(s32), debug-use _, !11, !12, debug-location !13 define void @debug_declare(i32 %in) #0 !dbg !7 { entry: @@ -12,33 +12,33 @@ entry: store i32 %in, i32* %in.addr, align 4 call void @llvm.dbg.declare(metadata i32* %in.addr, metadata !11, metadata !12), !dbg !13 call void @llvm.dbg.declare(metadata i32 %in, metadata !11, metadata !12), !dbg !13 - ret void, !dbg !14 + ret void, !dbg !13 } ; CHECK-LABEL: name: debug_declare_vla -; CHECK: DBG_VALUE debug-use %{{[0-9]+}}(p0), debug-use _, !11, !12, debug-location !13 -define void @debug_declare_vla(i32 %in) #0 !dbg !7 { +; CHECK: DBG_VALUE debug-use %{{[0-9]+}}(p0), debug-use _, !15, !12, debug-location !16 +define void @debug_declare_vla(i32 %in) #0 !dbg !14 { entry: %vla.addr = alloca i32, i32 %in - call void @llvm.dbg.declare(metadata i32* %vla.addr, metadata !11, metadata !12), !dbg !13 - ret void, !dbg !14 + call void @llvm.dbg.declare(metadata i32* %vla.addr, metadata !15, metadata !12), !dbg !16 + ret void, !dbg !16 } ; CHECK-LABEL: name: debug_value ; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0 -define void @debug_value(i32 %in) #0 !dbg !7 { +define void @debug_value(i32 %in) #0 !dbg !17 { %addr = alloca i32 -; CHECK: DBG_VALUE debug-use [[IN]](s32), debug-use _, !11, !12, debug-location !13 - call void @llvm.dbg.value(metadata i32 %in, i64 0, metadata !11, metadata !12), !dbg !13 +; CHECK: DBG_VALUE debug-use [[IN]](s32), debug-use _, !18, !12, debug-location !19 + call void @llvm.dbg.value(metadata i32 %in, i64 0, metadata !18, metadata !12), !dbg !19 store i32 %in, i32* %addr -; CHECK: DBG_VALUE debug-use %1(p0), debug-use _, !11, !15, debug-location !13 - call void @llvm.dbg.value(metadata i32* %addr, i64 0, metadata !11, metadata !15), !dbg !13 -; CHECK: DBG_VALUE 123, 0, !11, !12, debug-location !13 - call void @llvm.dbg.value(metadata i32 123, i64 0, metadata !11, metadata !12), !dbg !13 -; CHECK: DBG_VALUE float 1.000000e+00, 0, !11, !12, debug-location !13 - call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !11, metadata !12), !dbg !13 -; CHECK: DBG_VALUE _, 0, !11, !12, debug-location !13 - call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !11, metadata !12), !dbg !13 +; CHECK: DBG_VALUE debug-use %1(p0), debug-use _, !18, !20, debug-location !19 + call void @llvm.dbg.value(metadata i32* %addr, i64 0, metadata !18, metadata !20), !dbg !19 +; CHECK: DBG_VALUE 123, 0, !18, !12, debug-location !19 + call void @llvm.dbg.value(metadata i32 123, i64 0, metadata !18, metadata !12), !dbg !19 +; CHECK: DBG_VALUE float 1.000000e+00, 0, !18, !12, debug-location !19 + call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !18, metadata !12), !dbg !19 +; CHECK: DBG_VALUE _, 0, !18, !12, debug-location !19 + call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !18, metadata !12), !dbg !19 ret void } @@ -64,5 +64,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) !11 = !DILocalVariable(name: "in", arg: 1, scope: !7, file: !1, line: 1, type: !10) !12 = !DIExpression() !13 = !DILocation(line: 1, column: 14, scope: !7) -!14 = !DILocation(line: 2, column: 1, scope: !7) -!15 = !DIExpression(DW_OP_deref) +!14 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!15 = !DILocalVariable(name: "in", arg: 1, scope: !14, file: !1, line: 1, type: !10) +!16 = !DILocation(line: 1, column: 14, scope: !14) +!17 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!18 = !DILocalVariable(name: "in", arg: 1, scope: !17, file: !1, line: 1, type: !10) +!19 = !DILocation(line: 1, column: 14, scope: !17) +!20 = !DIExpression(DW_OP_deref) diff --git a/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll index 3ecdb7bbedfb5b422eacbfde07f0bbf0d126fa02..0972840de47bbceb4e0446eea2af0b783e37c19c 100644 --- a/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -1,10 +1,10 @@ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: -O0 -aarch64-enable-global-isel-at-O=0 \ -; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK +; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O0 --check-prefix NOFALLBACK ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: -O0 -aarch64-enable-global-isel-at-O=0 -global-isel-abort=2 \ -; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK +; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O0 --check-prefix FALLBACK ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: -global-isel \ @@ -32,6 +32,7 @@ ; ENABLED: IRTranslator ; ENABLED-NEXT: Legalizer ; ENABLED-NEXT: RegBankSelect +; ENABLED-O0-NEXT: Localizer ; ENABLED-NEXT: InstructionSelect ; ENABLED-NEXT: ResetMachineFunction diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index c806b4a7060d1cbd0844f33d35cfcb271c631010..ce913d211ae27155cb5160359e8f759525a42827 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -53,9 +53,7 @@ body: | ; CHECK: %7(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %7(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr) - ; CHECK: [[OFFSET0:%[0-9]+]](s64) = G_CONSTANT i64 0 - ; CHECK: [[GEP0:%[0-9]+]](p0) = G_GEP %0, [[OFFSET0]](s64) - ; CHECK: [[LOAD0:%[0-9]+]](s64) = G_LOAD [[GEP0]](p0) :: (load 16 from %ir.addr) + ; CHECK: [[LOAD0:%[0-9]+]](s64) = G_LOAD %0(p0) :: (load 16 from %ir.addr) ; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]](p0) = G_GEP %0, [[OFFSET1]](s64) ; CHECK: [[LOAD1:%[0-9]+]](s64) = G_LOAD [[GEP1]](p0) :: (load 16 from %ir.addr) @@ -105,9 +103,7 @@ body: | ; CHECK: G_STORE %0(p0), %0(p0) :: (store 8 into %ir.addr) G_STORE %0(p0), %0(p0) :: (store 8 into %ir.addr) - ; CHECK: [[OFFSET0:%[0-9]+]](s64) = G_CONSTANT i64 0 - ; CHECK: [[GEP0:%[0-9]+]](p0) = G_GEP %0, [[OFFSET0]](s64) - ; CHECK: G_STORE %5(s64), [[GEP0]](p0) :: (store 16 into %ir.addr) + ; CHECK: G_STORE %5(s64), %0(p0) :: (store 16 into %ir.addr) ; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_CONSTANT i64 8 ; CHECK: [[GEP1:%[0-9]+]](p0) = G_GEP %0, [[OFFSET1]](s64) ; CHECK: G_STORE %6(s64), [[GEP1]](p0) :: (store 16 into %ir.addr) diff --git a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir new file mode 100644 index 0000000000000000000000000000000000000000..28c926b5d062dcce66d4c8f9f75b2f5f8bb566a9 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir @@ -0,0 +1,96 @@ +# RUN: llc -O0 -mtriple aarch64-apple-ios %s -global-isel -start-after regbankselect \ +# RUN: -stop-before instruction-select -o - | FileCheck --check-prefix=CHECK --check-prefix=OPTNONE %s +# RUN: llc -mtriple aarch64-apple-ios %s -global-isel -start-after regbankselect \ +# RUN: -stop-before instruction-select -o - | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +# +# Check that we are only running the localizer at O0 and that it runs +# between the regbankselect pass and the instruction-select. +# Moreover, check that it does what we expect. +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-apple-ios" + + define float @foo(float %arg, i1 %cond) { + br i1 %cond, label %true, label %false + + true: ; preds = %0 + br label %end + + false: ; preds = %0 + br label %end + + end: ; preds = %false, %true + %val = phi float [ 1.000000e+00, %true ], [ 2.000000e+00, %false ] + %res = fadd float %arg, %val + ret float %res + } + +... +--- +# CHECK-LABEL: name: foo +name: foo +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: +# CHECK: registers: +# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' } +# The localizer will create two new values to materialize the constants. +# OPTNONE-NEXT: - { id: 6, class: fpr, preferred-register: '' } +# OPTNONE-NEXT: - { id: 7, class: fpr, preferred-register: '' } + - { id: 0, class: fpr } + - { id: 1, class: gpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + - { id: 5, class: fpr } + +# First block remains untouched +# CHECK: body +# CHECK: %4(s32) = G_FCONSTANT float 1.000000e+00 +# CHECK: %5(s32) = G_FCONSTANT float 2.000000e+00 + +# Second block will get the constant 1.0 when the localizer is enabled. +# CHECK: bb.1.true: +# OPT-NOT: G_FCONSTANT +# OPTNONE: [[FONE:%[0-9]+]](s32) = G_FCONSTANT float 1.000000e+00 +# CHECK: G_BR %bb.3.end + +# Thrid block will get the constant 2.0 when the localizer is enabled. +# CHECK: bb.2.false: +# OPT-NOT: G_FCONSTANT +# OPTNONE: [[FTWO:%[0-9]+]](s32) = G_FCONSTANT float 2.000000e+00 + +# CHECK: bb.3.end +# OPTNONE: %2(s32) = PHI [[FONE]](s32), %bb.1.true, [[FTWO]](s32), %bb.2.false +# OPT: %2(s32) = PHI %4(s32), %bb.1.true, %5(s32), %bb.2.false +# CHECK-NEXT: G_FADD %0, %2 +body: | + bb.0 (%ir-block.0): + liveins: %s0, %w0 + + %0(s32) = COPY %s0 + %1(s1) = COPY %w0 + %4(s32) = G_FCONSTANT float 1.000000e+00 + %5(s32) = G_FCONSTANT float 2.000000e+00 + G_BRCOND %1(s1), %bb.1.true + G_BR %bb.2.false + + bb.1.true: + G_BR %bb.3.end + + bb.2.false: + + bb.3.end: + %2(s32) = PHI %4(s32), %bb.1.true, %5(s32), %bb.2.false + %3(s32) = G_FADD %0, %2 + %s0 = COPY %3(s32) + RET_ReallyLR implicit %s0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/localizer.mir b/test/CodeGen/AArch64/GlobalISel/localizer.mir new file mode 100644 index 0000000000000000000000000000000000000000..afe2c13f025da4ab4897fa635e23d14c96de3f05 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -0,0 +1,361 @@ +# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=localizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK + +# Test the localizer. + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + define void @local_use() { ret void } + define void @non_local_1use() { ret void } + define void @non_local_2uses() { ret void } + define void @non_local_phi_use() { ret void } + define void @non_local_phi_use_followed_by_use() { ret void } + define void @non_local_phi_use_followed_by_use_fi() { ret void } + define void @float_non_local_phi_use_followed_by_use_fi() { ret void } + define void @non_local_phi() { ret void } +... + +--- +# CHECK-LABEL: name: local_use +name: local_use +legalized: true +regBankSelected: true + +# CHECK: registers: +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + +# CHECK: body: +# CHECK: %0(s32) = G_CONSTANT 1 +# CHECK-NEXT: %1(s32) = G_ADD %0, %0 +body: | + bb.0: + %0(s32) = G_CONSTANT 1 + %1(s32) = G_ADD %0, %0 +... + +--- +# CHECK-LABEL: name: non_local_1use +name: non_local_1use +legalized: true +regBankSelected: true + +# CHECK: registers: +# Existing registers should be left untouched +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# The newly created reg should be on the same regbank/regclass as its origin. +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } + +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + +# CHECK: body: +# CHECK: %0(s32) = G_CONSTANT 1 +# CHECK-NEXT: %1(s32) = G_ADD %0, %0 + +# CHECK: bb.1: +# CHECK: %3(s32) = G_CONSTANT 1 +# CHECK-NEXT: %2(s32) = G_ADD %3, %1 +body: | + bb.0: + successors: %bb.1 + + %0(s32) = G_CONSTANT 1 + %1(s32) = G_ADD %0, %0 + + bb.1: + %2(s32) = G_ADD %0, %1 +... + + +--- +# CHECK-LABEL: name: non_local_2uses +name: non_local_2uses +legalized: true +regBankSelected: true + +# CHECK: registers: +# Existing registers should be left untouched +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# The newly created reg should be on the same regbank/regclass as its origin. +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } + +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + +# CHECK: body: +# CHECK: %0(s32) = G_CONSTANT 1 +# CHECK-NEXT: %1(s32) = G_ADD %0, %0 + +# CHECK: bb.1: +# CHECK: %3(s32) = G_CONSTANT 1 +# CHECK-NEXT: %2(s32) = G_ADD %3, %3 +body: | + bb.0: + successors: %bb.1 + + %0(s32) = G_CONSTANT 1 + %1(s32) = G_ADD %0, %0 + + bb.1: + %2(s32) = G_ADD %0, %0 +... + +--- +# CHECK-LABEL: name: non_local_phi_use +name: non_local_phi_use +legalized: true +regBankSelected: true +tracksRegLiveness: true + +# CHECK: registers: +# Existing registers should be left untouched +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# The newly created reg should be on the same regbank/regclass as its origin. +#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } + +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + +# CHECK: body: +# CHECK: %0(s32) = G_CONSTANT 1 +# CHECK-NEXT: %1(s32) = G_ADD %0, %0 + +# CHECK: bb.1: +# CHECK: %5(s32) = G_CONSTANT 1 + +# CHECK: bb.2: +# CHECK: %3(s32) = PHI %5(s32), %bb.1 +body: | + bb.0: + successors: %bb.1 + + %0(s32) = G_CONSTANT 1 + %1(s32) = G_ADD %0, %0 + + bb.1: + successors: %bb.2 + + bb.2: + %3(s32) = PHI %0(s32), %bb.1 + %2(s32) = G_ADD %3, %3 +... + +--- +# CHECK-LABEL: name: non_local_phi_use_followed_by_use +name: non_local_phi_use_followed_by_use +legalized: true +regBankSelected: true +tracksRegLiveness: true + +# CHECK: registers: +# Existing registers should be left untouched +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# The newly created regs should be on the same regbank/regclass as its origin. +#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' } + +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + +# CHECK: body: +# CHECK: %0(s32) = G_CONSTANT 1 +# CHECK-NEXT: %1(s32) = G_ADD %0, %0 + +# CHECK: bb.1: +# CHECK: %5(s32) = G_CONSTANT 1 + +# CHECK: bb.2: +# CHECK: %3(s32) = PHI %5(s32), %bb.1 +# CHECK-NEXT: %6(s32) = G_CONSTANT 1 +# CHECK-NEXT: %2(s32) = G_ADD %3, %6 +body: | + bb.0: + successors: %bb.1 + + %0(s32) = G_CONSTANT 1 + %1(s32) = G_ADD %0, %0 + + bb.1: + successors: %bb.2 + + bb.2: + %3(s32) = PHI %0(s32), %bb.1 + %2(s32) = G_ADD %3, %0 +... + +--- +# CHECK-LABEL: name: non_local_phi_use_followed_by_use_fi +name: non_local_phi_use_followed_by_use_fi +legalized: true +regBankSelected: true +tracksRegLiveness: true + +# CHECK: registers: +# Existing registers should be left untouched +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# The newly created reg should be on the same regbank/regclass as its origin. +#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } +#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' } + +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + +# CHECK: body: +# CHECK: %0(s32) = G_FRAME_INDEX 1 +# CHECK-NEXT: %1(s32) = G_ADD %0, %0 + +# CHECK: bb.1: +# CHECK: %5(s32) = G_FRAME_INDEX 1 + +# CHECK: bb.2: +# CHECK: %3(s32) = PHI %5(s32), %bb.1 +# CHECK-NEXT: %6(s32) = G_FRAME_INDEX 1 +# CHECK-NEXT: %2(s32) = G_ADD %3, %6 +body: | + bb.0: + successors: %bb.1 + + %0(s32) = G_FRAME_INDEX 1 + %1(s32) = G_ADD %0, %0 + + bb.1: + successors: %bb.2 + + bb.2: + %3(s32) = PHI %0(s32), %bb.1 + %2(s32) = G_ADD %3, %0 +... + +--- +# CHECK-LABEL: name: float_non_local_phi_use_followed_by_use_fi +name: float_non_local_phi_use_followed_by_use_fi +legalized: true +regBankSelected: true +tracksRegLiveness: true + +# CHECK: registers: +# Existing registers should be left untouched +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } +# The newly created reg should be on the same regbank/regclass as its origin. +#CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 6, class: fpr, preferred-register: '' } + +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + - { id: 4, class: fpr } + +# CHECK: body: +# CHECK: %0(s32) = G_FCONSTANT float 1.0 +# CHECK-NEXT: %1(s32) = G_FADD %0, %0 + +# CHECK: bb.1: +# CHECK: %5(s32) = G_FCONSTANT float 1.0 + +# CHECK: bb.2: +# CHECK: %3(s32) = PHI %5(s32), %bb.1 +# CHECK-NEXT: %6(s32) = G_FCONSTANT float 1.0 +# CHECK-NEXT: %2(s32) = G_FADD %3, %6 +body: | + bb.0: + successors: %bb.1 + + %0(s32) = G_FCONSTANT float 1.0 + %1(s32) = G_FADD %0, %0 + + bb.1: + successors: %bb.2 + + bb.2: + %3(s32) = PHI %0(s32), %bb.1 + %2(s32) = G_FADD %3, %0 +... + +--- +# Make sure we don't insert a constant before PHIs. +# This used to happen for loops of one basic block. +# CHECK-LABEL: name: non_local_phi +name: non_local_phi +legalized: true +regBankSelected: true +tracksRegLiveness: true + +# CHECK: registers: +# Existing registers should be left untouched +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } +#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } +# The newly created reg should be on the same regbank/regclass as its origin. +#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } + +registers: + - { id: 0, class: fpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: fpr } + +# CHECK: body: +# CHECK: %0(s32) = G_FCONSTANT float 1.0 +# CHECK-NEXT: %1(s32) = G_FADD %0, %0 + +# CHECK: bb.1: +# CHECK: %3(s32) = PHI %1(s32), %bb.0, %4(s32), %bb.1 +# CHECK: %4(s32) = G_FCONSTANT float 1.0 + +# CHECK-NEXT: %2(s32) = G_FADD %3, %1 +body: | + bb.0: + successors: %bb.1 + + %0(s32) = G_FCONSTANT float 1.0 + %1(s32) = G_FADD %0, %0 + + bb.1: + successors: %bb.1 + + %3(s32) = PHI %1(s32), %bb.0, %0(s32), %bb.1 + %2(s32) = G_FADD %3, %1 + G_BR %bb.1 +... diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir index 73d4d20547292598862bb99040ae85cb085a36eb..c8a8266e8b2800da09996dd6c7d7879950a44f88 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-dbg-value.mir @@ -32,7 +32,7 @@ name: test_dbg_value legalized: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } body: | bb.0: liveins: %w0 diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir index 14ee40c941bf08c5953870fce4881ed8c4d9cb63..b8468d8cf55f44d0920da3da27a3b7e321b512b9 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir @@ -73,8 +73,8 @@ name: test_add_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -92,8 +92,8 @@ body: | name: test_add_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -111,8 +111,8 @@ body: | name: test_sub_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -130,8 +130,8 @@ body: | name: test_sub_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -149,8 +149,8 @@ body: | name: test_mul_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -168,8 +168,8 @@ body: | name: test_mul_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -187,8 +187,8 @@ body: | name: test_and_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -206,8 +206,8 @@ body: | name: test_and_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -225,8 +225,8 @@ body: | name: test_or_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -244,8 +244,8 @@ body: | name: test_or_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -263,8 +263,8 @@ body: | name: test_xor_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -282,8 +282,8 @@ body: | name: test_xor_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -301,8 +301,8 @@ body: | name: test_shl_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -320,8 +320,8 @@ body: | name: test_shl_v4s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -339,8 +339,8 @@ body: | name: test_lshr_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -358,8 +358,8 @@ body: | name: test_ashr_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -377,8 +377,8 @@ body: | name: test_sdiv_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -396,8 +396,8 @@ body: | name: test_udiv_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -415,8 +415,8 @@ body: | name: test_anyext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -434,8 +434,8 @@ body: | name: test_sext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -453,8 +453,8 @@ body: | name: test_zext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -472,8 +472,8 @@ body: | name: test_trunc_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -491,7 +491,7 @@ body: | name: test_constant_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -505,7 +505,7 @@ body: | name: test_constant_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -519,8 +519,8 @@ body: | name: test_icmp_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -538,8 +538,8 @@ body: | name: test_icmp_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -557,7 +557,7 @@ body: | name: test_frame_index_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } stack: @@ -573,8 +573,8 @@ body: | name: test_ptrtoint_s64_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -592,8 +592,8 @@ body: | name: test_inttoptr_p0_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -611,8 +611,8 @@ body: | name: test_load_s32_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -630,8 +630,8 @@ body: | name: test_store_s32_p0 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -651,8 +651,8 @@ body: | name: test_fadd_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -670,8 +670,8 @@ body: | name: test_fsub_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -689,8 +689,8 @@ body: | name: test_fmul_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -708,8 +708,8 @@ body: | name: test_fdiv_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -727,8 +727,8 @@ body: | name: test_fpext_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -746,8 +746,8 @@ body: | name: test_fptrunc_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -765,7 +765,7 @@ body: | name: test_fconstant_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -779,8 +779,8 @@ body: | name: test_fcmp_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -798,8 +798,8 @@ body: | name: test_sitofp_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -817,8 +817,8 @@ body: | name: test_uitofp_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: fpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -836,8 +836,8 @@ body: | name: test_fptosi_s64_s32 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -855,8 +855,8 @@ body: | name: test_fptoui_s32_s64 legalized: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: fpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/AArch64/GlobalISel/select-binop.mir b/test/CodeGen/AArch64/GlobalISel/select-binop.mir index 8ae2e1b2eb7d2da8859d3dce8c13d353f6d3c874..70cda516d5f10535b258710cfd85c03f7814d360 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-binop.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-binop.mir @@ -64,9 +64,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -94,9 +94,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -123,9 +123,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32sp } +# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -151,9 +151,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr64sp } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -179,9 +179,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32sp } +# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -213,9 +213,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -243,9 +243,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -273,9 +273,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -303,9 +303,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -333,9 +333,9 @@ legalized: true regBankSelected: true # # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -365,9 +365,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -395,9 +395,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -425,9 +425,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -455,9 +455,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -485,9 +485,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -515,9 +515,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -545,9 +545,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -575,9 +575,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -606,9 +606,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -636,9 +636,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -666,10 +666,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } -# CHECK-NEXT: - { id: 3, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' } # CHECK: body: # CHECK: %0 = COPY %x0 @@ -696,9 +696,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -726,9 +726,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -756,9 +756,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -786,9 +786,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -816,9 +816,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -845,9 +845,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -874,9 +874,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -903,9 +903,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -932,9 +932,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -961,9 +961,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -990,9 +990,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -1019,9 +1019,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir index 5ca63dbc214d55ba8ce03ce3be0f3af6d6297dc9..d871a80661a8d352bbbb307a874ad61e174915a1 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir @@ -19,8 +19,8 @@ name: bitcast_s32_gpr legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: gpr32all } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -44,8 +44,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -69,8 +69,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -94,8 +94,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32all } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -119,8 +119,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all } -# CHECK-NEXT: - { id: 1, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -144,8 +144,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -169,8 +169,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -193,8 +193,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir index 2f36ec8d2aaa9261f36e42fbb264b8fbe9b2dbe9..790cd6517dd3ac09c4107a0c66ae973415a286d8 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir @@ -9,8 +9,8 @@ ret void } - define void @test_dbg_value_dead(i32 %a) !dbg !5 { - call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !9), !dbg !10 + define void @test_dbg_value_dead(i32 %a) !dbg !11 { + call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !12, metadata !9), !dbg !13 ret void } @@ -30,6 +30,9 @@ !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !9 = !DIExpression() !10 = !DILocation(line: 1, column: 1, scope: !5) + !11 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) + !12 = !DILocalVariable(name: "in", arg: 1, scope: !11, file: !1, line: 1, type: !8) + !13 = !DILocation(line: 1, column: 1, scope: !11) ... --- diff --git a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir index fbb11a1c7a4c6c8520ccac36651a3670cdb3f3e0..34c3da3a536945c266c7f88a3db8e8182c979f9e 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir @@ -34,8 +34,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr64 } -# CHECK: - { id: 1, class: fpr32 } +# CHECK: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -59,8 +59,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK: - { id: 0, class: fpr32 } -# CHECK: - { id: 1, class: fpr64 } +# CHECK: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } @@ -84,8 +84,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -109,8 +109,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -134,8 +134,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -159,8 +159,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -184,8 +184,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -209,8 +209,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -234,8 +234,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -259,8 +259,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -284,8 +284,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -309,8 +309,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -334,8 +334,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -359,8 +359,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -384,8 +384,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -409,8 +409,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -434,8 +434,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -459,8 +459,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir index 2ba8b7366252eff4a02cd94d707c54b012167794..5f29f8b62fab154459e099a6d7882aee0a465a22 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir @@ -24,9 +24,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: gpr64all } -# CHECK-NEXT: - { id: 2, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -51,8 +51,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all } -# CHECK-NEXT: - { id: 1, class: gpr32all } +# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -76,9 +76,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -103,8 +103,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -128,8 +128,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -153,8 +153,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -178,9 +178,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -205,8 +205,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -230,8 +230,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -255,8 +255,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir index 6537408f6d9874cc6a256351992310105de02350..b71a9a3d731ebc54ede5a1ef4e2c577496b56707 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir @@ -18,8 +18,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all } -# CHECK-NEXT: - { id: 1, class: gpr64all } +# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -41,8 +41,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -64,8 +64,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -87,8 +87,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -110,8 +110,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -133,8 +133,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-load.mir b/test/CodeGen/AArch64/GlobalISel/select-load.mir index 9188e2b0c0fcc3b10666f9ef20df17f04394c659..d00b98d148be5d6c9486c6e308593d3b9dcabfec 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -37,8 +37,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -62,8 +62,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -87,8 +87,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -112,8 +112,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -137,8 +137,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -165,10 +165,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -197,10 +197,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -229,10 +229,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -261,10 +261,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -293,8 +293,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -318,8 +318,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -343,8 +343,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr16 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -368,8 +368,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr8 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -393,10 +393,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -425,10 +425,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -457,10 +457,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr16 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -489,10 +489,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: fpr8 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: fpr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir index 7d5b43bc16d5be187a63d72ea6b997d6550c5489..cd7a79f17d952198bfb57c20ae082ecd2ab61ab0 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir @@ -13,13 +13,13 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } -# CHECK-NEXT: - { id: 5, class: gpr } -# CHECK-NEXT: - { id: 6, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir b/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir new file mode 100644 index 0000000000000000000000000000000000000000..c35d1719f84c8151b4534cd76b5134021a4d7d6b --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir @@ -0,0 +1,64 @@ +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s + +--- | + define i32 @main() { + entry: + ret i32 0 + } + + declare i32 @printf(i8*, ...) +... +--- +# CHECK-LABEL: name: main +name: main +alignment: 2 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } + - { id: 11, class: gpr } + - { id: 12, class: gpr } + - { id: 13, class: gpr } + - { id: 14, class: gpr } + - { id: 15, class: gpr } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: true + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +# CHECK: body: +# CHECK: %1 = COPY %w0 +# CHECK-NOT: %2 = ORNWrr %wzr, %1 +# CHECK: %4 = EONWrr %1, %3 +body: | + bb.1.entry: + liveins: %w0 + %0(s32) = G_CONSTANT i32 -1 + %3(s32) = G_CONSTANT i32 1 + %1(s32) = COPY %w0 + %2(s32) = G_XOR %1, %0 + %4(s32) = G_XOR %2, %3 + %w0 = COPY %4(s32) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-store.mir b/test/CodeGen/AArch64/GlobalISel/select-store.mir index 9b8f5c566ce0de3d5a11cb0cdd09a1f8fdb51f07..536e236c27387a3590b8c679056f2bfb7dc5cd4c 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -35,8 +35,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -62,8 +62,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -89,8 +89,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -116,8 +116,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -143,8 +143,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -169,8 +169,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -195,8 +195,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -223,10 +223,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -255,10 +255,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -287,10 +287,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -319,10 +319,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -351,8 +351,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -378,8 +378,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -405,10 +405,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr64 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } @@ -437,10 +437,10 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } -# CHECK-NEXT: - { id: 1, class: fpr32 } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir index fc3546e777f703f60c28df53ff5cadea942e3a94..5559e2d3a0d123b217661af113b2e21be29526cf 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir @@ -15,8 +15,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -39,8 +39,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -63,8 +63,8 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select-xor.mir b/test/CodeGen/AArch64/GlobalISel/select-xor.mir index e787849c8d1bf4dcd55ec93863269b723faefe02..7190fda15b8eed5f9d9f522c061c24577a6d45f2 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-xor.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-xor.mir @@ -20,9 +20,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -50,9 +50,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr64 } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -81,9 +81,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -110,9 +110,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64 } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -139,9 +139,9 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/select.mir b/test/CodeGen/AArch64/GlobalISel/select.mir index 8bffa085fdca6348227c1e9ef59e4818efaeeffa..5e52bc761a8420c769f1253f13ef5c2ee00fdc00 100644 --- a/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/test/CodeGen/AArch64/GlobalISel/select.mir @@ -35,7 +35,7 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp } +# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } registers: - { id: 0, class: gpr } @@ -132,12 +132,12 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr64 } -# CHECK-NEXT: - { id: 3, class: gpr32 } -# CHECK-NEXT: - { id: 4, class: gpr64 } -# CHECK-NEXT: - { id: 5, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -180,12 +180,12 @@ legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: fpr64 } -# CHECK-NEXT: - { id: 3, class: gpr32 } -# CHECK-NEXT: - { id: 4, class: gpr32 } -# CHECK-NEXT: - { id: 5, class: gpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -223,9 +223,9 @@ regBankSelected: true tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: fpr32 } +# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } @@ -260,16 +260,16 @@ regBankSelected: true tracksRegLiveness: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32 } -# CHECK-NEXT: - { id: 1, class: gpr32 } -# CHECK-NEXT: - { id: 2, class: gpr32 } -# CHECK-NEXT: - { id: 3, class: gpr32 } -# CHECK-NEXT: - { id: 4, class: gpr64 } -# CHECK-NEXT: - { id: 5, class: gpr64 } -# CHECK-NEXT: - { id: 6, class: gpr64 } -# CHECK-NEXT: - { id: 7, class: gpr64 } -# CHECK-NEXT: - { id: 8, class: gpr64 } -# CHECK-NEXT: - { id: 9, class: gpr64 } +# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 5, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 7, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 8, class: gpr64, preferred-register: '' } +# CHECK-NEXT: - { id: 9, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll index 3bd56fa4cebca92cd9c65f7e31cf6ea26e8acda4..af0ab57b0b9fe35832523ad17d1c25db159d7980 100644 --- a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll @@ -4,7 +4,7 @@ define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64, i32, ...) { ; CHECK-LABEL: name: test_varargs_sentinel ; CHECK: fixedStack: -; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], offset: 8 +; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], type: default, offset: 8 ; CHECK: body: ; CHECK: [[LIST:%[0-9]+]] = COPY %x0 ; CHECK: [[VARARGS_AREA:%[0-9]+]] = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0 diff --git a/test/CodeGen/AArch64/aarch64-addv.ll b/test/CodeGen/AArch64/aarch64-addv.ll index 91797c062b8810cb9299a312c7df0399efa418e8..e65992e9913d93be64ed4a54452ee3514a2ac318 100644 --- a/test/CodeGen/AArch64/aarch64-addv.ll +++ b/test/CodeGen/AArch64/aarch64-addv.ll @@ -1,18 +1,16 @@ ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s +; Function Attrs: nounwind readnone +declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>) +declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>) +declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>) +declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>) + define i8 @add_B(<16 x i8>* %arr) { ; CHECK-LABEL: add_B ; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b %bin.rdx = load <16 x i8>, <16 x i8>* %arr - %rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> - %bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0 - %rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> - %bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf - %rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> - %bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12 - %rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> - %bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13 - %r = extractelement <16 x i8> %bin.rdx14, i32 0 + %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %bin.rdx) ret i8 %r } @@ -20,13 +18,7 @@ define i16 @add_H(<8 x i16>* %arr) { ; CHECK-LABEL: add_H ; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h %bin.rdx = load <8 x i16>, <8 x i16>* %arr - %rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> - %bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf - %rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> - %bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12 - %rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> - %bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13 - %r = extractelement <8 x i16> %bin.rdx14, i32 0 + %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %bin.rdx) ret i16 %r } @@ -34,11 +26,7 @@ define i32 @add_S( <4 x i32>* %arr) { ; CHECK-LABEL: add_S ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s %bin.rdx = load <4 x i32>, <4 x i32>* %arr - %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> - %bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf - %rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> - %bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12 - %r = extractelement <4 x i32> %bin.rdx13, i32 0 + %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %bin.rdx) ret i32 %r } @@ -46,12 +34,12 @@ define i64 @add_D(<2 x i64>* %arr) { ; CHECK-LABEL: add_D ; CHECK-NOT: addv %bin.rdx = load <2 x i64>, <2 x i64>* %arr - %rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> - %bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0 - %r = extractelement <2 x i64> %bin.rdx0, i32 0 + %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %bin.rdx) ret i64 %r } +declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>) + define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) { ; CHECK-LABEL: oversized_ADDV_256 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s @@ -66,33 +54,16 @@ entry: %7 = icmp slt <8 x i32> %6, zeroinitializer %8 = sub nsw <8 x i32> zeroinitializer, %6 %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6 - %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> - %bin.rdx = add <8 x i32> %9, %rdx.shuf - %rdx.shuf1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> - %bin.rdx2 = add <8 x i32> %bin.rdx, %rdx.shuf1 - %rdx.shuf3 = shufflevector <8 x i32> %bin.rdx2, <8 x i32> undef, <8 x i32> - %bin.rdx4 = add <8 x i32> %bin.rdx2, %rdx.shuf3 - %10 = extractelement <8 x i32> %bin.rdx4, i32 0 - ret i32 %10 + %r = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %9) + ret i32 %r } +declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>) + define i32 @oversized_ADDV_512(<16 x i32>* %arr) { ; CHECK-LABEL: oversized_ADDV_512 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s %bin.rdx = load <16 x i32>, <16 x i32>* %arr - - %rdx.shuf0 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> - %bin.rdx0 = add <16 x i32> %bin.rdx, %rdx.shuf0 - - %rdx.shuf = shufflevector <16 x i32> %bin.rdx0, <16 x i32> undef, <16 x i32> - %bin.rdx11 = add <16 x i32> %bin.rdx0, %rdx.shuf - - %rdx.shuf12 = shufflevector <16 x i32> %bin.rdx11, <16 x i32> undef, <16 x i32> - %bin.rdx13 = add <16 x i32> %bin.rdx11, %rdx.shuf12 - - %rdx.shuf13 = shufflevector <16 x i32> %bin.rdx13, <16 x i32> undef, <16 x i32> - %bin.rdx14 = add <16 x i32> %bin.rdx13, %rdx.shuf13 - - %r = extractelement <16 x i32> %bin.rdx14, i32 0 + %r = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %bin.rdx) ret i32 %r } diff --git a/test/CodeGen/AArch64/aarch64-minmaxv.ll b/test/CodeGen/AArch64/aarch64-minmaxv.ll index 9a56cd6ae7c0498f967c56a22f4db88c2f745375..760a8f8419f9e2620f6f8aa7f14355aed22f35cc 100644 --- a/test/CodeGen/AArch64/aarch64-minmaxv.ll +++ b/test/CodeGen/AArch64/aarch64-minmaxv.ll @@ -2,344 +2,148 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>) +declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>) + +declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>) +declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>) +declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>) +declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>) + +declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>) +declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>) + ; CHECK-LABEL: smax_B ; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @smax_B(<16 x i8>* nocapture readonly %arr) { %arr.load = load <16 x i8>, <16 x i8>* %arr - %rdx.shuf = shufflevector <16 x i8> %arr.load, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp sgt <16 x i8> %arr.load, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %arr.load, <16 x i8> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp sgt <16 x i8> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp sgt <16 x i8> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp sgt <16 x i8> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %arr.load) ret i8 %r } ; CHECK-LABEL: smax_H ; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @smax_H(<8 x i16>* nocapture readonly %arr) { - %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr - %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp23 = icmp sgt <8 x i16> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf - %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp26 = icmp sgt <8 x i16> %rdx.minmax.select24, %rdx.shuf25 - %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 - %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp29 = icmp sgt <8 x i16> %rdx.minmax.select27, %rdx.shuf28 - %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 - %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 - %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 - %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + %arr.load = load <8 x i16>, <8 x i16>* %arr + %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %arr.load) ret i16 %r } ; CHECK-LABEL: smax_S ; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @smax_S(<4 x i32> * nocapture readonly %arr) { - %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr - %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp18 = icmp sgt <4 x i32> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf - %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp21 = icmp sgt <4 x i32> %rdx.minmax.select19, %rdx.shuf20 - %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 - %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 - %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 - %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + %arr.load = load <4 x i32>, <4 x i32>* %arr + %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %arr.load) ret i32 %r } -; CHECK-LABEL: smax_D -; CHECK-NOT: smaxv -define i64 @smax_D(<2 x i64>* nocapture readonly %arr) { - %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr - %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> - %rdx.minmax.cmp18 = icmp sgt <2 x i64> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 - %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 - %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 - %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt - ret i64 %r -} - - ; CHECK-LABEL: umax_B ; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @umax_B(<16 x i8>* nocapture readonly %arr) { - %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr - %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp ugt <16 x i8> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp ugt <16 x i8> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp ugt <16 x i8> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp ugt <16 x i8> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + %arr.load = load <16 x i8>, <16 x i8>* %arr + %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %arr.load) ret i8 %r } ; CHECK-LABEL: umax_H ; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @umax_H(<8 x i16>* nocapture readonly %arr) { - %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr - %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp23 = icmp ugt <8 x i16> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf - %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp26 = icmp ugt <8 x i16> %rdx.minmax.select24, %rdx.shuf25 - %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 - %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp29 = icmp ugt <8 x i16> %rdx.minmax.select27, %rdx.shuf28 - %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 - %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 - %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 - %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + %arr.load = load <8 x i16>, <8 x i16>* %arr + %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %arr.load) ret i16 %r } ; CHECK-LABEL: umax_S ; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @umax_S(<4 x i32>* nocapture readonly %arr) { - %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr - %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp18 = icmp ugt <4 x i32> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf - %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp21 = icmp ugt <4 x i32> %rdx.minmax.select19, %rdx.shuf20 - %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 - %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 - %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 - %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + %arr.load = load <4 x i32>, <4 x i32>* %arr + %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %arr.load) ret i32 %r } -; CHECK-LABEL: umax_D -; CHECK-NOT: umaxv -define i64 @umax_D(<2 x i64>* nocapture readonly %arr) { - %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr - %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> - %rdx.minmax.cmp18 = icmp ugt <2 x i64> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 - %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 - %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 - %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt - ret i64 %r -} - - ; CHECK-LABEL: smin_B ; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @smin_B(<16 x i8>* nocapture readonly %arr) { - %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr - %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp slt <16 x i8> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp slt <16 x i8> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp slt <16 x i8> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp slt <16 x i8> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + %arr.load = load <16 x i8>, <16 x i8>* %arr + %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %arr.load) ret i8 %r } ; CHECK-LABEL: smin_H ; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @smin_H(<8 x i16>* nocapture readonly %arr) { - %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr - %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp23 = icmp slt <8 x i16> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf - %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp26 = icmp slt <8 x i16> %rdx.minmax.select24, %rdx.shuf25 - %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 - %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp29 = icmp slt <8 x i16> %rdx.minmax.select27, %rdx.shuf28 - %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 - %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 - %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 - %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + %arr.load = load <8 x i16>, <8 x i16>* %arr + %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %arr.load) ret i16 %r } ; CHECK-LABEL: smin_S ; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @smin_S(<4 x i32>* nocapture readonly %arr) { - %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr - %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp18 = icmp slt <4 x i32> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf - %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp21 = icmp slt <4 x i32> %rdx.minmax.select19, %rdx.shuf20 - %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 - %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 - %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 - %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + %arr.load = load <4 x i32>, <4 x i32>* %arr + %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %arr.load) ret i32 %r } -; CHECK-LABEL: smin_D -; CHECK-NOT: sminv -define i64 @smin_D(<2 x i64>* nocapture readonly %arr) { - %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr - %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> - %rdx.minmax.cmp18 = icmp slt <2 x i64> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 - %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 - %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 - %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt - ret i64 %r -} - - ; CHECK-LABEL: umin_B ; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b define i8 @umin_B(<16 x i8>* nocapture readonly %arr) { - %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr - %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp ult <16 x i8> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp ult <16 x i8> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp ult <16 x i8> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp ult <16 x i8> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt + %arr.load = load <16 x i8>, <16 x i8>* %arr + %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %arr.load) ret i8 %r } ; CHECK-LABEL: umin_H ; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h define i16 @umin_H(<8 x i16>* nocapture readonly %arr) { - %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr - %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp23 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf - %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp26 = icmp ult <8 x i16> %rdx.minmax.select24, %rdx.shuf25 - %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25 - %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> - %rdx.minmax.cmp29 = icmp ult <8 x i16> %rdx.minmax.select27, %rdx.shuf28 - %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0 - %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0 - %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1 - %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt + %arr.load = load <8 x i16>, <8 x i16>* %arr + %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %arr.load) ret i16 %r } ; CHECK-LABEL: umin_S ; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s define i32 @umin_S(<4 x i32>* nocapture readonly %arr) { - %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr - %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp18 = icmp ult <4 x i32> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf - %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> - %rdx.minmax.cmp21 = icmp ult <4 x i32> %rdx.minmax.select19, %rdx.shuf20 - %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0 - %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0 - %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1 - %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt + %arr.load = load <4 x i32>, <4 x i32>* %arr + %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %arr.load) ret i32 %r } -; CHECK-LABEL: umin_D -; CHECK-NOT: uminv -define i64 @umin_D(<2 x i64>* nocapture readonly %arr) { - %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr - %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> - %rdx.minmax.cmp18 = icmp ult <2 x i64> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0 - %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0 - %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1 - %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt - ret i64 %r -} - ; CHECK-LABEL: fmaxnm_S ; CHECK: fmaxnmv define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) { - %rdx.minmax.select = load <4 x float>, <4 x float>* %arr - %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> - %rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf - %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> - %rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1 - %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0 - %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0 - %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1 - %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt + %arr.load = load <4 x float>, <4 x float>* %arr + %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %arr.load) ret float %r } ; CHECK-LABEL: fminnm_S ; CHECK: fminnmv define float @fminnm_S(<4 x float>* nocapture readonly %arr) { - %rdx.minmax.select = load <4 x float>, <4 x float>* %arr - %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> - %rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf - %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> - %rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1 - %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0 - %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0 - %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1 - %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt + %arr.load = load <4 x float>, <4 x float>* %arr + %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %arr.load) ret float %r } +declare i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16>) + define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umax_256 ; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: umaxv {{h[0-9]+}}, [[V0]] - %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr - %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp ugt <16 x i16> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp ugt <16 x i16> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp ugt <16 x i16> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp ugt <16 x i16> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + %arr.load = load <16 x i16>, <16 x i16>* %arr + %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v16i16(<16 x i16> %arr.load) ret i16 %r } +declare i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32>) + define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umax_512 ; CHECK: umax v @@ -347,47 +151,23 @@ define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp ugt <16 x i32> %arr.load, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp ugt <16 x i32> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp ugt <16 x i32> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp ugt <16 x i32> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v16i32(<16 x i32> %arr.load) ret i32 %r } +declare i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16>) + define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umin_256 ; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: uminv {{h[0-9]+}}, [[V0]] - %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr - %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp ult <16 x i16> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp ult <16 x i16> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp ult <16 x i16> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp ult <16 x i16> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + %arr.load = load <16 x i16>, <16 x i16>* %arr + %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v16i16(<16 x i16> %arr.load) ret i16 %r } +declare i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32>) + define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_umin_512 ; CHECK: umin v @@ -395,47 +175,23 @@ define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp ult <16 x i32> %arr.load, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp ult <16 x i32> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp ult <16 x i32> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp ult <16 x i32> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v16i32(<16 x i32> %arr.load) ret i32 %r } +declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>) + define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smax_256 ; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: smaxv {{h[0-9]+}}, [[V0]] %arr.load = load <16 x i16>, <16 x i16>* %arr - %rdx.shuf = shufflevector <16 x i16> %arr.load, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp sgt <16 x i16> %arr.load, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %arr.load, <16 x i16> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp sgt <16 x i16> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp sgt <16 x i16> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp sgt <16 x i16> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %arr.load) ret i16 %r } +declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>) + define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smax_512 ; CHECK: smax v @@ -443,47 +199,23 @@ define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp sgt <16 x i32> %arr.load, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp sgt <16 x i32> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp sgt <16 x i32> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp sgt <16 x i32> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %arr.load) ret i32 %r } +declare i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16>) + define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smin_256 ; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h ; CHECK: sminv {{h[0-9]+}}, [[V0]] - %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr - %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp slt <16 x i16> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp slt <16 x i16> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp slt <16 x i16> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt + %arr.load = load <16 x i16>, <16 x i16>* %arr + %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v16i16(<16 x i16> %arr.load) ret i16 %r } +declare i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32>) + define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-LABEL: oversized_smin_512 ; CHECK: smin v @@ -491,20 +223,6 @@ define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) { ; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s ; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]] %arr.load = load <16 x i32>, <16 x i32>* %arr - %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp22 = icmp slt <16 x i32> %arr.load, %rdx.shuf - %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf - %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp25 = icmp slt <16 x i32> %rdx.minmax.select23, %rdx.shuf24 - %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24 - %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp28 = icmp slt <16 x i32> %rdx.minmax.select26, %rdx.shuf27 - %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27 - %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> - %rdx.minmax.cmp31 = icmp slt <16 x i32> %rdx.minmax.select29, %rdx.shuf30 - %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0 - %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0 - %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1 - %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt + %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v16i32(<16 x i32> %arr.load) ret i32 %r } diff --git a/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/test/CodeGen/AArch64/aarch64-stp-cluster.ll index fe5abbf15eff4a5ec9b0fe5df38d4fd686b4d8be..25cf313b81e7aec9ba4c1e3155b108268269dd50 100644 --- a/test/CodeGen/AArch64/aarch64-stp-cluster.ll +++ b/test/CodeGen/AArch64/aarch64-stp-cluster.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -aarch64-enable-stp-suppress=false -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: stp_i64_scale:BB#0 diff --git a/test/CodeGen/AArch64/addcarry-crash.ll b/test/CodeGen/AArch64/addcarry-crash.ll new file mode 100644 index 0000000000000000000000000000000000000000..ba833e0b5873c0dc2932adead12f339efefeac47 --- /dev/null +++ b/test/CodeGen/AArch64/addcarry-crash.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s | FileCheck %s +target triple = "arm64-apple-ios7.0" + +define i64 @foo(i64* nocapture readonly %ptr, i64 %a, i64 %b, i64 %c) local_unnamed_addr #0 { +; CHECK: ldr w8, [x0, #4] +; CHECK: lsr x9, x1, #32 +; CHECK: cmn x3, x2 +; CHECK: mul x8, x8, x9 +; CHECK: cinc x0, x8, hs +; CHECK: ret +entry: + %0 = lshr i64 %a, 32 + %1 = load i64, i64* %ptr, align 8 + %2 = lshr i64 %1, 32 + %3 = mul nuw i64 %2, %0 + %4 = add i64 %c, %b + %5 = icmp ult i64 %4, %c + %6 = zext i1 %5 to i64 + %7 = add i64 %3, %6 + ret i64 %7 +} + +attributes #0 = { norecurse nounwind readonly } diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index 6cf0ab35b9b527c12b29d7b0365810a366ed7718..5be84b7d493b7998923023b33a4b5e065e8e0988 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -43,9 +43,7 @@ entry: ; CHECK-LABEL: i8i16caller ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. ; They are i8, i16, i8 and i8. -; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5] -; CHECK-DAG: strb {{w[0-9]+}}, [sp, #4] -; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2] +; CHECK-DAG: stur {{w[0-9]+}}, [sp, #2] ; CHECK-DAG: strb {{w[0-9]+}}, [sp] ; CHECK: bl ; FAST-LABEL: i8i16caller diff --git a/test/CodeGen/AArch64/arm64-anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll index 1af3103832432eff169c3ceb12a1977cf4c12e18..10989a07990c5aff510172ac7a206adac39aca19 100644 --- a/test/CodeGen/AArch64/arm64-anyregcc.ll +++ b/test/CodeGen/AArch64/arm64-anyregcc.ll @@ -4,7 +4,7 @@ ; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps ; CHECK-NEXT: __LLVM_StackMaps: ; Header -; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions @@ -48,18 +48,24 @@ ; CHECK-NEXT: .short 3 ; Loc 0: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 2: Constant 3 ; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .short 0 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 3 define i64 @test() nounwind ssp uwtable { entry: @@ -69,18 +75,22 @@ entry: ; property access 1 - %obj is an anyreg call argument and should therefore be in a register ; CHECK-LABEL: .long L{{.*}}-_property_access1 -; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 ; 2 locations ; CHECK-NEXT: .short 2 ; Loc 0: Register <-- this is the return register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define i64 @property_access1(i8* %obj) nounwind ssp uwtable { entry: @@ -91,18 +101,22 @@ entry: ; property access 2 - %obj is an anyreg call argument and should therefore be in a register ; CHECK-LABEL: .long L{{.*}}-_property_access2 -; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 ; 2 locations ; CHECK-NEXT: .short 2 ; Loc 0: Register <-- this is the return register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define i64 @property_access2() nounwind ssp uwtable { entry: @@ -114,18 +128,22 @@ entry: ; property access 3 - %obj is a frame index ; CHECK-LABEL: .long L{{.*}}-_property_access3 -; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 ; 2 locations ; CHECK-NEXT: .short 2 ; Loc 0: Register <-- this is the return register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Direct FP - 8 ; CHECK-NEXT: .byte 2 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 29 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long -8 define i64 @property_access3() nounwind ssp uwtable { entry: @@ -137,78 +155,106 @@ entry: ; anyreg_test1 ; CHECK-LABEL: .long L{{.*}}-_anyreg_test1 -; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 ; 14 locations ; CHECK-NEXT: .short 14 ; Loc 0: Register <-- this is the return register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 2: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 3: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 4: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 5: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 6: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 7: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 8: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 9: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 10: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 11: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 12: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 13: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { entry: @@ -219,78 +265,106 @@ entry: ; anyreg_test2 ; CHECK-LABEL: .long L{{.*}}-_anyreg_test2 -; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 ; 14 locations ; CHECK-NEXT: .short 14 ; Loc 0: Register <-- this is the return register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 2: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 3: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 4: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 5: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 6: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 7: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 8: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 9: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 10: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 11: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 12: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 13: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable { entry: @@ -308,18 +382,24 @@ entry: ; CHECK-NEXT: .short 3 ; Loc 0: Register (some register that will be spilled to the stack) ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: @@ -337,28 +417,38 @@ entry: ; CHECK-NEXT: .short 5 ; Loc 0: Return a register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 1: Arg0 in a Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 2: Arg1 in a Register ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; Loc 3: Arg2 spilled to FP -96 ; CHECK-NEXT: .byte 3 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 29 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long -96 ; Loc 4: Arg3 spilled to FP - 88 ; CHECK-NEXT: .byte 3 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 29 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long -88 define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: diff --git a/test/CodeGen/AArch64/arm64-csldst-mmo.ll b/test/CodeGen/AArch64/arm64-csldst-mmo.ll index 4930c493d62c5c22af850b46013bd3b4760ab8d6..cfb8e3a38c492077cfc02748bc34d048839ef6dc 100644 --- a/test/CodeGen/AArch64/arm64-csldst-mmo.ll +++ b/test/CodeGen/AArch64/arm64-csldst-mmo.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched=0 -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched=0 -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; REQUIRES: asserts @G = external global [0 x i32], align 4 diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll new file mode 100644 index 0000000000000000000000000000000000000000..16a02de79a9177d8208d22cbfbf9b7544d06d275 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -0,0 +1,131 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=arm64-eabi < %s | FileCheck --enable-var-scope %s + +; Test fptosi +define i32 @fptosi_wh(half %a) nounwind ssp { +entry: +; CHECK-LABEL: fptosi_wh +; CHECK: fcvt s1, h0 +; CHECK: fcvtzs [[REG:w[0-9]+]], s1 +; CHECK: mov w0, [[REG]] + %conv = fptosi half %a to i32 + ret i32 %conv +} + +; Test fptoui +define i32 @fptoui_swh(half %a) nounwind ssp { +entry: +; CHECK-LABEL: fptoui_swh +; CHECK: fcvt s1, h0 +; CHECK: fcvtzu [[REG:w[0-9]+]], s1 +; CHECK: mov w0, [[REG]] + %conv = fptoui half %a to i32 + ret i32 %conv +} + +; Test sitofp +define half @sitofp_hw_i1(i1 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i1 +; CHECK: sbfx w0, w0, #0, #1 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i1 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hw_i8(i8 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i8 +; CHECK: sxtb w0, w0 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i8 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hw_i16(i16 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i16 +; CHECK: sxth w0, w0 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i16 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hw_i32(i32 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hw_i32 +; CHECK: scvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = sitofp i32 %a to half + ret half %conv +} + +; Test sitofp +define half @sitofp_hx(i64 %a) nounwind ssp { +entry: +; CHECK-LABEL: sitofp_hx +; CHECK: scvtf s0, x0 +; CHECK: fcvt h0, s0 + %conv = sitofp i64 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i1(i1 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i1 +; CHECK: and w0, w0, #0x1 +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i1 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i8(i8 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i8 +; CHECK: and w0, w0, #0xff +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i8 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i16(i16 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i16 +; CHECK: and w0, w0, #0xffff +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i16 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hw_i32(i32 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hw_i32 +; CHECK: ucvtf s0, w0 +; CHECK: fcvt h0, s0 + %conv = uitofp i32 %a to half + ret half %conv +} + +; Test uitofp +define half @uitofp_hx(i64 %a) nounwind ssp { +entry: +; CHECK-LABEL: uitofp_hx +; CHECK: ucvtf s0, x0 +; CHECK: fcvt h0, s0 + %conv = uitofp i64 %a to half + ret half %conv +} + + diff --git a/test/CodeGen/AArch64/arm64-fml-combines.ll b/test/CodeGen/AArch64/arm64-fml-combines.ll index 840d1dcbf0609e756bade09953a07af13c69242a..f9749882527913ecff15a171749c38ea34561a9a 100644 --- a/test/CodeGen/AArch64/arm64-fml-combines.ll +++ b/test/CodeGen/AArch64/arm64-fml-combines.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast | FileCheck %s + define void @foo_2d(double* %src) { entry: %arrayidx1 = getelementptr inbounds double, double* %src, i64 5 @@ -126,3 +128,23 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } + +; CHECK-LABEL: test1: +; CHECK: fnmadd s0, s0, s1, s2 +define float @test1(float %a, float %b, float %c) { +entry: + %0 = fmul float %a, %b + %mul = fsub float -0.000000e+00, %0 + %sub1 = fsub float %mul, %c + ret float %sub1 +} + +; CHECK-LABEL: test2: +; CHECK: fnmadd d0, d0, d1, d2 +define double @test2(double %a, double %b, double %c) { +entry: + %0 = fmul double %a, %b + %mul = fsub double -0.000000e+00, %0 + %sub1 = fsub double %mul, %c + ret double %sub1 +} diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll index caaf8615cd4abb70a62104120dfa718469737be2..a8d1c24825206e2439f84339888eb6a899c41a96 100644 --- a/test/CodeGen/AArch64/arm64-hello.ll +++ b/test/CodeGen/AArch64/arm64-hello.ll @@ -6,8 +6,8 @@ ; CHECK-NEXT: stp x29, x30, [sp, #16] ; CHECK-NEXT: add x29, sp, #16 ; CHECK-NEXT: stur wzr, [x29, #-4] -; CHECK: adrp x0, L_.str@PAGE -; CHECK: add x0, x0, L_.str@PAGEOFF +; CHECK: adrp x0, l_.str@PAGE +; CHECK: add x0, x0, l_.str@PAGEOFF ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; CHECK-NEXT: add sp, sp, #32 diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index d344084ef62d825a9c707ae563f3a27f85673328..a502800923fdc8197251b1cc0cebf04180b066f7 100644 --- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -6238,3 +6238,84 @@ define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, store <2 x i32> %sub, <2 x i32>* %out, align 16 ret void } + +; CHECK-LABEL: test_ld1lane_build_i16: +; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0] +; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1] +; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2] +; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3] +; CHECK: sub.4h v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0 +; CHECK-NEXT: str d[[REGNUM2]], [x4] +; CHECK-NEXT: ret +define void @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i16> %e, <4 x i16>* %p) { + %ld.a = load i16, i16* %a + %ld.b = load i16, i16* %b + %ld.c = load i16, i16* %c + %ld.d = load i16, i16* %d + %v.a = insertelement <4 x i16> undef, i16 %ld.a, i64 0 + %v.b = insertelement <4 x i16> %v.a, i16 %ld.b, i64 1 + %v.c = insertelement <4 x i16> %v.b, i16 %ld.c, i64 2 + %v = insertelement <4 x i16> %v.c, i16 %ld.d, i64 3 + %sub = sub nsw <4 x i16> %v, %e + store <4 x i16> %sub, <4 x i16>* %p + ret void +} + +; CHECK-LABEL: test_ld1lane_build_half: +; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0] +; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1] +; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2] +; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3] +; CHECK-DAG: fcvtl v[[REGNUM01:[0-9]+]].4s, v0.4h +; CHECK-DAG: fcvtl v[[REGNUM11:[0-9]+]].4s, v[[REGNUM1]].4h +; CHECK: fsub.4s v[[REGNUM2:[0-9]+]], v[[REGNUM11]], v[[REGNUM01]] +; CHECK-DAG: fcvtn v[[REGNUM3:[0-9]+]].4h, v[[REGNUM2]].4s +; CHECK-NEXT: str d[[REGNUM2]], [x4] +; CHECK-NEXT: ret +define void @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4 x half> %e, <4 x half>* %p) { + %ld.a = load half, half* %a + %ld.b = load half, half* %b + %ld.c = load half, half* %c + %ld.d = load half, half* %d + %v.a = insertelement <4 x half> undef, half %ld.a, i64 0 + %v.b = insertelement <4 x half> %v.a, half %ld.b, i64 1 + %v.c = insertelement <4 x half> %v.b, half %ld.c, i64 2 + %v = insertelement <4 x half> %v.c, half %ld.d, i64 3 + %sub = fsub <4 x half> %v, %e + store <4 x half> %sub, <4 x half>* %p + ret void +} + +; CHECK-LABEL: test_ld1lane_build_i8: +; CHECK-DAG: ldr b[[REGNUM1:[0-9]+]], [x0] +; CHECK-DAG: ld1.b { v[[REGNUM1]] }[1], [x1] +; CHECK-DAG: ld1.b { v[[REGNUM1]] }[2], [x2] +; CHECK-DAG: ld1.b { v[[REGNUM1]] }[3], [x3] +; CHECK-DAG: ld1.b { v[[REGNUM1]] }[4], [x4] +; CHECK-DAG: ld1.b { v[[REGNUM1]] }[5], [x5] +; CHECK-DAG: ld1.b { v[[REGNUM1]] }[6], [x6] +; CHECK-DAG: ld1.b { v[[REGNUM1]] }[7], [x7] +; CHECK: sub.8b v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0 +; CHECK-NEXT: str d[[REGNUM2]], [x +; CHECK-NEXT: ret +define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* %f, i8* %g, i8* %h, <8 x i8> %v, <8 x i8>* %p) { + %ld.a = load i8, i8* %a + %ld.b = load i8, i8* %b + %ld.c = load i8, i8* %c + %ld.d = load i8, i8* %d + %ld.e = load i8, i8* %e + %ld.f = load i8, i8* %f + %ld.g = load i8, i8* %g + %ld.h = load i8, i8* %h + %v.a = insertelement <8 x i8> undef, i8 %ld.a, i64 0 + %v.b = insertelement <8 x i8> %v.a, i8 %ld.b, i64 1 + %v.c = insertelement <8 x i8> %v.b, i8 %ld.c, i64 2 + %v.d = insertelement <8 x i8> %v.c, i8 %ld.d, i64 3 + %v.e = insertelement <8 x i8> %v.d, i8 %ld.e, i64 4 + %v.f = insertelement <8 x i8> %v.e, i8 %ld.f, i64 5 + %v.g = insertelement <8 x i8> %v.f, i8 %ld.g, i64 6 + %v1 = insertelement <8 x i8> %v.g, i8 %ld.h, i64 7 + %sub = sub nsw <8 x i8> %v1, %v + store <8 x i8> %sub, <8 x i8>* %p + ret void +} diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll index f28d0ab07c5ac7aefe18fa33de1682872b21f8ff..f849df2a51ec20617bff981455beeafc75ce9bda 100644 --- a/test/CodeGen/AArch64/arm64-inline-asm.ll +++ b/test/CodeGen/AArch64/arm64-inline-asm.ll @@ -254,3 +254,10 @@ define void @test_constraint_w(i32 %a) { tail call void asm sideeffect "sqxtn h0, ${0:s}\0A", "w"(i32 %a) ret void } + +define void @test_inline_modifier_a(i8* %ptr) nounwind { + ; CHECK-LABEL: test_inline_modifier_a: + tail call void asm sideeffect "prfm pldl1keep, ${0:a}\0A", "r"(i8* %ptr) + ; CHECK: prfm pldl1keep, [x0] + ret void +} diff --git a/test/CodeGen/AArch64/arm64-ldp-cluster.ll b/test/CodeGen/AArch64/arm64-ldp-cluster.ll index 0cfbe5958f4df7b43acc02a21b672f1c94bf754e..64e535ca74990d5535d75c744da22d21bc47cf63 100644 --- a/test/CodeGen/AArch64/arm64-ldp-cluster.ll +++ b/test/CodeGen/AArch64/arm64-ldp-cluster.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOS %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=exynos-m1 -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck --check-prefix=EXYNOS %s ; Test ldr clustering. ; CHECK: ********** MI Scheduling ********** diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll index 41287a17da86ebb8181d27dc08529f211c402640..307d1ec1aa8cc09333f3495f9f89134ff5224ad6 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -disable-machine-dce -o - 2>&1 > /dev/null | FileCheck %s -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -disable-machine-dce -o - -misched-limit=2 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=machine-scheduler -disable-machine-dce -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=machine-scheduler -disable-machine-dce -o - -misched-limit=2 2>&1 > /dev/null | FileCheck %s ; ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled ; much higher than the ADD instructions in order to hide latency. When not diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll index fac5f8ad2e9f9882a91c242fecf19f1aa26fc0d5..82ba18ce72ca56f23343b9dd855e81adefdf553e 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll @@ -6,7 +6,7 @@ ; the loads to avoid unnecessary stalls. The generic machine model schedules 4 ; loads consecutively for this case and will cause stalls. ; -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** ; CHECK: main:BB#2 ; CHECK: LDR diff --git a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll index 0ee74d1f782e2df4340a36ba6d4395274359ea9d..cde62fcb3f95c80cc8cd4531a4f197c6a6599d38 100644 --- a/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll +++ b/test/CodeGen/AArch64/arm64-misched-forwarding-A53.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; ; For Cortex-A53, shiftable operands that are not actually shifted ; are not needed for an additional two cycles. diff --git a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll index 0ec754f97ec729e78646010251f3a0f258ed2fed..748a4762d82f45aff80097d188e86891e0ae38e3 100644 --- a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll +++ b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; ; Test for bug in misched memory dependency calculation. ; diff --git a/test/CodeGen/AArch64/arm64-misched-multimmo.ll b/test/CodeGen/AArch64/arm64-misched-multimmo.ll index 3593668e0156863d32e6f361ab64c9ba5daf9f84..75f45da0e48ff7b031641c8ed4b037c3f0fefd6b 100644 --- a/test/CodeGen/AArch64/arm64-misched-multimmo.ll +++ b/test/CodeGen/AArch64/arm64-misched-multimmo.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched=0 -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched=0 -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s @G1 = common global [100 x i32] zeroinitializer, align 4 diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll index 8d9a8c06aa3c58b5b14cd0638600431c25b08c3b..a7b95e717910dcecce686388c8b78a7a12f104ea 100644 --- a/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -906,7 +906,7 @@ define <8 x i8> @getl(<16 x i8> %x) #0 { ; CHECK: str q0 ; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7 ; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3 -; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], {{\[}}[[PTR]]{{\]}} +; CHECK-DAG: ldr h[[R:[0-9]+]], {{\[}}[[PTR]]{{\]}} ; CHECK-DAG: ins v[[R]].h[1], v0.h[1] ; CHECK-DAG: ins v[[R]].h[2], v0.h[2] ; CHECK-DAG: ins v[[R]].h[3], v0.h[3] diff --git a/test/CodeGen/AArch64/arm64-sincos.ll b/test/CodeGen/AArch64/arm64-sincos.ll index 06157b2580c4701a2a0d29da250f143b1b9f4bfb..98876dbe87b0350e6ccddf3e218a97b693b88027 100644 --- a/test/CodeGen/AArch64/arm64-sincos.ll +++ b/test/CodeGen/AArch64/arm64-sincos.ll @@ -1,7 +1,9 @@ ; RUN: llc < %s -mtriple=arm64-apple-ios7 | FileCheck %s --check-prefix CHECK-IOS ; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix CHECK-LINUX -; Combine sin / cos into a single call. +; Combine sin / cos into a single call unless they may write errno (as +; captured by readnone attrbiute, controlled by clang -fmath-errno +; setting). ; rdar://12856873 define float @test1(float %x) nounwind { @@ -11,11 +13,26 @@ entry: ; CHECK-IOS: fadd s0, s0, s1 ; CHECK-LINUX-LABEL: test1: +; CHECK-LINUX: bl sincosf + + %call = tail call float @sinf(float %x) readnone + %call1 = tail call float @cosf(float %x) readnone + %add = fadd float %call, %call1 + ret float %add +} + +define float @test1_errno(float %x) nounwind { +entry: +; CHECK-IOS-LABEL: test1_errno: +; CHECK-IOS: bl _sinf +; CHECK-IOS: bl _cosf + +; CHECK-LINUX-LABEL: test1_errno: ; CHECK-LINUX: bl sinf ; CHECK-LINUX: bl cosf - %call = tail call float @sinf(float %x) nounwind readnone - %call1 = tail call float @cosf(float %x) nounwind readnone + %call = tail call float @sinf(float %x) + %call1 = tail call float @cosf(float %x) %add = fadd float %call, %call1 ret float %add } @@ -27,16 +44,31 @@ entry: ; CHECK-IOS: fadd d0, d0, d1 ; CHECK-LINUX-LABEL: test2: +; CHECK-LINUX: bl sincos + + %call = tail call double @sin(double %x) readnone + %call1 = tail call double @cos(double %x) readnone + %add = fadd double %call, %call1 + ret double %add +} + +define double @test2_errno(double %x) nounwind { +entry: +; CHECK-IOS-LABEL: test2_errno: +; CHECK-IOS: bl _sin +; CHECK-IOS: bl _cos + +; CHECK-LINUX-LABEL: test2_errno: ; CHECK-LINUX: bl sin ; CHECK-LINUX: bl cos - %call = tail call double @sin(double %x) nounwind readnone - %call1 = tail call double @cos(double %x) nounwind readnone + %call = tail call double @sin(double %x) + %call1 = tail call double @cos(double %x) %add = fadd double %call, %call1 ret double %add } -declare float @sinf(float) readonly -declare double @sin(double) readonly -declare float @cosf(float) readonly -declare double @cos(double) readonly +declare float @sinf(float) +declare double @sin(double) +declare float @cosf(float) +declare double @cos(double) diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll index 0b2e9776263dc0a0ae407df6d737bf7b5de558e3..e12a35a93e2243e24f3bbcf5df276261dc022bf8 100644 --- a/test/CodeGen/AArch64/arm64-stackmap.ll +++ b/test/CodeGen/AArch64/arm64-stackmap.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps ; CHECK-NEXT: __LLVM_StackMaps: ; Header -; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions @@ -67,22 +67,30 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NEXT: .short 4 ; SmallConstant ; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 65535 ; SmallConstant ; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 65536 ; SmallConstant ; CHECK-NEXT: .byte 5 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; LargeConstant at index 0 ; CHECK-NEXT: .byte 5 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 1 @@ -99,12 +107,16 @@ entry: ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define void @osrinline(i64 %a, i64 %b) { entry: @@ -123,12 +135,16 @@ entry: ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define void @osrcold(i64 %a, i64 %b) { entry: @@ -163,12 +179,16 @@ entry: ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) { entry: @@ -185,12 +205,16 @@ entry: ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) { entry: @@ -207,12 +231,16 @@ entry: ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 2 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 ; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short {{[0-9]+}} +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 0 define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) { entry: @@ -233,8 +261,11 @@ entry: ; Check that at least one is a spilled entry from RBP. ; Location: Indirect FP + ... ; CHECK: .byte 3 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short ; CHECK-NEXT: .short 29 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) { entry: call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 20, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) @@ -252,8 +283,11 @@ entry: ; Check that at least one is a spilled entry from RBP. ; Location: Indirect FP + ... ; CHECK: .byte 3 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short ; CHECK-NEXT: .short 29 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .long define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) { entry: call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) @@ -269,7 +303,9 @@ entry: ; CHECK-NEXT: .short 1 ; Loc 0: SmallConstant ; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 8 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long 33 @@ -286,8 +322,10 @@ define void @liveConstant() { ; CHECK-NEXT: .short 1 ; Loc 0: Indirect FP (r29) - offset ; CHECK-NEXT: .byte 3 -; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .short 4 ; CHECK-NEXT: .short 29 +; CHECK-NEXT: .short 0 ; CHECK-NEXT: .long -{{[0-9]+}} define void @clobberLR(i32 %a) { tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x31}"() nounwind diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll index 88700a1534376412ebda0b1dbe01a1e78cccf879..17979f4036cb608c786fb6f4242433fc09079668 100644 --- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll +++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll @@ -30,13 +30,13 @@ define i32 @test_generaldynamic() { ; CHECK-NOLD: ldr w0, [x[[TP]], x0] ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } @@ -56,13 +56,13 @@ define i32* @test_generaldynamic_addr() { ; CHECK: add x0, [[TP]], x0 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } @@ -95,15 +95,15 @@ define i32 @test_localdynamic() { ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } @@ -131,15 +131,15 @@ define i32* @test_localdynamic_addr() { ret i32* @local_dynamic_var ; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12 ; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21 -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC -; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12 +; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12 ; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL } diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll index c7b0c33550d017179240af155caef899ddeacf4d..6b754b0a169ecda091f0066f3953d1b12310c796 100644 --- a/test/CodeGen/AArch64/arm64-vabs.ll +++ b/test/CodeGen/AArch64/arm64-vabs.ll @@ -33,7 +33,7 @@ define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: sabdl2_8h: -;CHECK: sabdl2.8h +;CHECK: sabdl.8h %load1 = load <16 x i8>, <16 x i8>* %A %load2 = load <16 x i8>, <16 x i8>* %B %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> @@ -45,7 +45,7 @@ define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: sabdl2_4s: -;CHECK: sabdl2.4s +;CHECK: sabdl.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> @@ -57,7 +57,7 @@ define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: sabdl2_2d: -;CHECK: sabdl2.2d +;CHECK: sabdl.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> @@ -99,7 +99,7 @@ define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: uabdl2_8h: -;CHECK: uabdl2.8h +;CHECK: uabdl.8h %load1 = load <16 x i8>, <16 x i8>* %A %load2 = load <16 x i8>, <16 x i8>* %B %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> @@ -112,7 +112,7 @@ define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: uabdl2_4s: -;CHECK: uabdl2.4s +;CHECK: uabdl.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> @@ -124,7 +124,7 @@ define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: uabdl2_2d: -;CHECK: uabdl2.2d +;CHECK: uabdl.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> @@ -134,8 +134,10 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ret <2 x i64> %tmp4 } -define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) { -; CHECK-LABEL: uabdl8h_log2_shuffle +declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>) + +define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) { +; CHECK-LABEL: uabdl8h_rdx ; CHECK: uabdl2.8h ; CHECK: uabdl.8h %aload = load <16 x i8>, <16 x i8>* %a, align 1 @@ -146,20 +148,14 @@ define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) { %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff - %rdx.shuf = shufflevector <16 x i16> %absel, <16 x i16> undef, <16 x i32> - %bin1.rdx = add <16 x i16> %absel, %rdx.shuf - %rdx.shufx = shufflevector <16 x i16> %bin1.rdx, <16 x i16> undef, <16 x i32> - %bin.rdx = add <16 x i16> %bin1.rdx, %rdx.shufx - %rdx.shuf136 = shufflevector <16 x i16> %bin.rdx, <16 x i16> undef, <16 x i32> - %bin.rdx137 = add <16 x i16> %bin.rdx, %rdx.shuf136 - %rdx.shuf138 = shufflevector <16 x i16> %bin.rdx137, <16 x i16> undef, <16 x i32> - %bin.rdx139 = add <16 x i16> %bin.rdx137, %rdx.shuf138 - %reduced_v = extractelement <16 x i16> %bin.rdx139, i16 0 + %reduced_v = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %absel) ret i16 %reduced_v } -define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) { -; CHECK-LABEL: uabdl4s_log2_shuffle +declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>) + +define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) { +; CHECK-LABEL: uabdl4s_rdx ; CHECK: uabdl2.4s ; CHECK: uabdl.4s %aload = load <8 x i16>, <8 x i16>* %a, align 1 @@ -170,18 +166,14 @@ define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) { %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff - %rdx.shuf = shufflevector <8 x i32> %absel, <8 x i32> undef, <8 x i32> - %bin.rdx = add <8 x i32> %absel, %rdx.shuf - %rdx.shuf136 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> - %bin.rdx137 = add <8 x i32> %bin.rdx, %rdx.shuf136 - %rdx.shuf138 = shufflevector <8 x i32> %bin.rdx137, <8 x i32> undef, <8 x i32> - %bin.rdx139 = add <8 x i32> %bin.rdx137, %rdx.shuf138 - %reduced_v = extractelement <8 x i32> %bin.rdx139, i32 0 + %reduced_v = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %absel) ret i32 %reduced_v } -define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) { -; CHECK: uabdl2d_log2_shuffle +declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>) + +define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) { +; CHECK: uabdl2d_rdx ; CHECK: uabdl2.2d ; CHECK: uabdl.2d %aload = load <4 x i32>, <4 x i32>* %a, align 1 @@ -192,11 +184,7 @@ define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) { %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff - %rdx.shuf136 = shufflevector <4 x i64> %absel, <4 x i64> undef, <4 x i32> - %bin.rdx137 = add <4 x i64> %absel, %rdx.shuf136 - %rdx.shuf138 = shufflevector <4 x i64> %bin.rdx137, <4 x i64> undef, <4 x i32> - %bin.rdx139 = add <4 x i64> %bin.rdx137, %rdx.shuf138 - %reduced_v = extractelement <4 x i64> %bin.rdx139, i16 0 + %reduced_v = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> %absel) ret i64 %reduced_v } @@ -573,7 +561,7 @@ define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind { ;CHECK-LABEL: sabal2_8h: -;CHECK: sabal2.8h +;CHECK: sabal.8h %load1 = load <16 x i8>, <16 x i8>* %A %load2 = load <16 x i8>, <16 x i8>* %B %tmp3 = load <8 x i16>, <8 x i16>* %C @@ -587,7 +575,7 @@ define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: sabal2_4s: -;CHECK: sabal2.4s +;CHECK: sabal.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -601,7 +589,7 @@ define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { ;CHECK-LABEL: sabal2_2d: -;CHECK: sabal2.2d +;CHECK: sabal.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C @@ -651,7 +639,7 @@ define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind { ;CHECK-LABEL: uabal2_8h: -;CHECK: uabal2.8h +;CHECK: uabal.8h %load1 = load <16 x i8>, <16 x i8>* %A %load2 = load <16 x i8>, <16 x i8>* %B %tmp3 = load <8 x i16>, <8 x i16>* %C @@ -665,7 +653,7 @@ define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: uabal2_4s: -;CHECK: uabal2.4s +;CHECK: uabal.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -679,7 +667,7 @@ define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { ;CHECK-LABEL: uabal2_2d: -;CHECK: uabal2.2d +;CHECK: uabal.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C diff --git a/test/CodeGen/AArch64/arm64-vadd.ll b/test/CodeGen/AArch64/arm64-vadd.ll index 9d09251524eaec3df828d998c7fc5bc5b54fa5fb..2a25538250e43823188af1e9b72a8a030e69d264 100644 --- a/test/CodeGen/AArch64/arm64-vadd.ll +++ b/test/CodeGen/AArch64/arm64-vadd.ll @@ -318,7 +318,7 @@ define <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: uaddw2_8h: -;CHECK: uaddw2.8h +;CHECK: uaddw.8h %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B @@ -331,7 +331,7 @@ define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: uaddw2_4s: -;CHECK: uaddw2.4s +;CHECK: uaddw.4s %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B @@ -344,7 +344,7 @@ define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: uaddw2_2d: -;CHECK: uaddw2.2d +;CHECK: uaddw.2d %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B @@ -387,7 +387,7 @@ define <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: saddw2_8h: -;CHECK: saddw2.8h +;CHECK: saddw.8h %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B @@ -400,7 +400,7 @@ define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: saddw2_4s: -;CHECK: saddw2.4s +;CHECK: saddw.4s %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B @@ -413,7 +413,7 @@ define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: saddw2_2d: -;CHECK: saddw2.2d +;CHECK: saddw.2d %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B diff --git a/test/CodeGen/AArch64/arm64-vmul.ll b/test/CodeGen/AArch64/arm64-vmul.ll index a5fa78abb92f4cdb248ef0f6c23130e105130c6c..f70ed9a43427869581674ccca76545039152e88d 100644 --- a/test/CodeGen/AArch64/arm64-vmul.ll +++ b/test/CodeGen/AArch64/arm64-vmul.ll @@ -83,7 +83,7 @@ define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: sqdmull2_4s: -;CHECK: sqdmull2.4s +;CHECK: sqdmull.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> @@ -94,7 +94,7 @@ define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: sqdmull2_2d: -;CHECK: sqdmull2.2d +;CHECK: sqdmull.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> @@ -324,7 +324,7 @@ define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: sqdmlal2_4s: -;CHECK: sqdmlal2.4s +;CHECK: sqdmlal.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -337,7 +337,7 @@ define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { ;CHECK-LABEL: sqdmlal2_2d: -;CHECK: sqdmlal2.2d +;CHECK: sqdmlal.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C @@ -372,7 +372,7 @@ define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: sqdmlsl2_4s: -;CHECK: sqdmlsl2.4s +;CHECK: sqdmlsl.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -385,7 +385,7 @@ define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { ;CHECK-LABEL: sqdmlsl2_2d: -;CHECK: sqdmlsl2.2d +;CHECK: sqdmlsl.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C @@ -874,7 +874,7 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: sqdmull2_lane_4s: ;CHECK-NOT: dup -;CHECK: sqdmull2.4s +;CHECK: sqdmull.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> @@ -886,7 +886,7 @@ define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: sqdmull2_lane_2d: ;CHECK-NOT: dup -;CHECK: sqdmull2.2d +;CHECK: sqdmull.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> @@ -994,7 +994,7 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: sqdmlal2_lane_4s: ;CHECK-NOT: dup -;CHECK: sqdmlal2.4s +;CHECK: sqdmlal.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -1008,7 +1008,7 @@ define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) define <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { ;CHECK-LABEL: sqdmlal2_lane_2d: ;CHECK-NOT: dup -;CHECK: sqdmlal2.2d +;CHECK: sqdmlal.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C @@ -1147,7 +1147,7 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind { ;CHECK-LABEL: sqdmlsl2_lane_4s: ;CHECK-NOT: dup -;CHECK: sqdmlsl2.4s +;CHECK: sqdmlsl.4s %load1 = load <8 x i16>, <8 x i16>* %A %load2 = load <8 x i16>, <8 x i16>* %B %tmp3 = load <4 x i32>, <4 x i32>* %C @@ -1161,7 +1161,7 @@ define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind { ;CHECK-LABEL: sqdmlsl2_lane_2d: ;CHECK-NOT: dup -;CHECK: sqdmlsl2.2d +;CHECK: sqdmlsl.2d %load1 = load <4 x i32>, <4 x i32>* %A %load2 = load <4 x i32>, <4 x i32>* %B %tmp3 = load <2 x i64>, <2 x i64>* %C @@ -1201,35 +1201,35 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou ; Scalar FMULX define float @fmulxs(float %a, float %b) nounwind { ; CHECK-LABEL: fmulxs: -; CHECKNEXT: fmulx s0, s0, s1 +; CHECK-NEXT: fmulx s0, s0, s1 %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret float %fmulx.i } define double @fmulxd(double %a, double %b) nounwind { ; CHECK-LABEL: fmulxd: -; CHECKNEXT: fmulx d0, d0, d1 +; CHECK-NEXT: fmulx d0, d0, d1 %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret double %fmulx.i } define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind { ; CHECK-LABEL: fmulxs_lane: -; CHECKNEXT: fmulx.s s0, s0, v1[3] +; CHECK-NEXT: fmulx.s s0, s0, v1[3] %b = extractelement <4 x float> %vec, i32 3 %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret float %fmulx.i } define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind { ; CHECK-LABEL: fmulxd_lane: -; CHECKNEXT: fmulx d0, d0, v1[1] +; CHECK-NEXT: fmulx.d d0, d0, v1[1] %b = extractelement <2 x double> %vec, i32 1 %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind -; CHECKNEXT: ret +; CHECK-NEXT: ret ret double %fmulx.i } diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll index c1c4649bd6a48593502d497eb1fc139645a32180..6b0fe40b5a090f532eb64e46c8625de5e64d8e45 100644 --- a/test/CodeGen/AArch64/arm64-vshift.ll +++ b/test/CodeGen/AArch64/arm64-vshift.ll @@ -1164,7 +1164,7 @@ define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind { define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind { ;CHECK-LABEL: ushll2_8h: -;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1 +;CHECK: ushll.8h v0, {{v[0-9]+}}, #1 %load1 = load <16 x i8>, <16 x i8>* %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = zext <8 x i8> %tmp1 to <8 x i16> @@ -1174,7 +1174,7 @@ define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind { define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind { ;CHECK-LABEL: ushll2_4s: -;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1 +;CHECK: ushll.4s v0, {{v[0-9]+}}, #1 %load1 = load <8 x i16>, <8 x i16>* %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = zext <4 x i16> %tmp1 to <4 x i32> @@ -1184,7 +1184,7 @@ define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind { define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind { ;CHECK-LABEL: ushll2_2d: -;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1 +;CHECK: ushll.2d v0, {{v[0-9]+}}, #1 %load1 = load <4 x i32>, <4 x i32>* %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = zext <2 x i32> %tmp1 to <2 x i64> @@ -1221,7 +1221,7 @@ define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind { define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind { ;CHECK-LABEL: sshll2_8h: -;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1 +;CHECK: sshll.8h v0, {{v[0-9]+}}, #1 %load1 = load <16 x i8>, <16 x i8>* %A %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> %tmp2 = sext <8 x i8> %tmp1 to <8 x i16> @@ -1231,7 +1231,7 @@ define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind { define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind { ;CHECK-LABEL: sshll2_4s: -;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1 +;CHECK: sshll.4s v0, {{v[0-9]+}}, #1 %load1 = load <8 x i16>, <8 x i16>* %A %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> %tmp2 = sext <4 x i16> %tmp1 to <4 x i32> @@ -1241,7 +1241,7 @@ define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind { define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind { ;CHECK-LABEL: sshll2_2d: -;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1 +;CHECK: sshll.2d v0, {{v[0-9]+}}, #1 %load1 = load <4 x i32>, <4 x i32>* %A %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> %tmp2 = sext <2 x i32> %tmp1 to <2 x i64> diff --git a/test/CodeGen/AArch64/arm64-vsub.ll b/test/CodeGen/AArch64/arm64-vsub.ll index 7af69118347e2e7287ad5cadae84b87215b51f26..6746e49989cbda3350f5fb690bef37ffa877bda5 100644 --- a/test/CodeGen/AArch64/arm64-vsub.ll +++ b/test/CodeGen/AArch64/arm64-vsub.ll @@ -157,7 +157,7 @@ define <2 x i64> @ssubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: ssubl2_8h: -;CHECK: ssubl2.8h +;CHECK: ssubl.8h %tmp1 = load <16 x i8>, <16 x i8>* %A %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> %ext1 = sext <8 x i8> %high1 to <8 x i16> @@ -172,7 +172,7 @@ define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: ssubl2_4s: -;CHECK: ssubl2.4s +;CHECK: ssubl.4s %tmp1 = load <8 x i16>, <8 x i16>* %A %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> %ext1 = sext <4 x i16> %high1 to <4 x i32> @@ -187,7 +187,7 @@ define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: ssubl2_2d: -;CHECK: ssubl2.2d +;CHECK: ssubl.2d %tmp1 = load <4 x i32>, <4 x i32>* %A %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> %ext1 = sext <2 x i32> %high1 to <2 x i64> @@ -235,7 +235,7 @@ define <2 x i64> @usubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: usubl2_8h: -;CHECK: usubl2.8h +;CHECK: usubl.8h %tmp1 = load <16 x i8>, <16 x i8>* %A %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> %ext1 = zext <8 x i8> %high1 to <8 x i16> @@ -250,7 +250,7 @@ define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: usubl2_4s: -;CHECK: usubl2.4s +;CHECK: usubl.4s %tmp1 = load <8 x i16>, <8 x i16>* %A %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> %ext1 = zext <4 x i16> %high1 to <4 x i32> @@ -265,7 +265,7 @@ define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: usubl2_2d: -;CHECK: usubl2.2d +;CHECK: usubl.2d %tmp1 = load <4 x i32>, <4 x i32>* %A %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> %ext1 = zext <2 x i32> %high1 to <2 x i64> @@ -310,7 +310,7 @@ define <2 x i64> @ssubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: ssubw2_8h: -;CHECK: ssubw2.8h +;CHECK: ssubw.8h %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B @@ -323,7 +323,7 @@ define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: ssubw2_4s: -;CHECK: ssubw2.4s +;CHECK: ssubw.4s %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B @@ -336,7 +336,7 @@ define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: ssubw2_2d: -;CHECK: ssubw2.2d +;CHECK: ssubw.2d %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B @@ -379,7 +379,7 @@ define <2 x i64> @usubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind { define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { ;CHECK-LABEL: usubw2_8h: -;CHECK: usubw2.8h +;CHECK: usubw.8h %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B @@ -392,7 +392,7 @@ define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind { define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: usubw2_4s: -;CHECK: usubw2.4s +;CHECK: usubw.4s %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B @@ -405,7 +405,7 @@ define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind { define <2 x i64> @usubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind { ;CHECK-LABEL: usubw2_2d: -;CHECK: usubw2.2d +;CHECK: usubw.2d %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B diff --git a/test/CodeGen/AArch64/asm-print-comments.ll b/test/CodeGen/AArch64/asm-print-comments.ll new file mode 100644 index 0000000000000000000000000000000000000000..e997dce23583aad311e31b7e97c985b9630e7c98 --- /dev/null +++ b/test/CodeGen/AArch64/asm-print-comments.ll @@ -0,0 +1,17 @@ +; RUN: llc %s -mtriple=arm64-apple-darwin -o - | FileCheck %s + +; CHECK-LABEL: ; -- Begin function foo +; CHECK: foo: +define hidden i32 @foo() { + entry: + ret i32 30 +} +; CHECK: ; -- End function + +; CHECK-LABEL: ; -- Begin function bar +; CHECK: bar: +define i32 @bar() { + entry: + ret i32 30 +} +; CHECK: ; -- End function diff --git a/test/CodeGen/AArch64/cmpxchg-O0.ll b/test/CodeGen/AArch64/cmpxchg-O0.ll index 8432b15ea523e8c8b3e1eb4ff5b1dc857de8f7fd..1bfbcf851c0e9df7537649c454aa869884ce6470 100644 --- a/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -3,10 +3,11 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: mov [[STATUS:w[3-9]+]], #0 ; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0] ; CHECK: cmp [[OLD]], w1, uxtb ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrb [[STATUS:w[3-9]]], w2, [x0] +; CHECK: stlxrb [[STATUS]], w2, [x0] ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -18,6 +19,7 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_16: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: mov [[STATUS:w[3-9]+]], #0 ; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0] ; CHECK: cmp [[OLD]], w1, uxth ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] @@ -33,10 +35,11 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_32: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: mov [[STATUS:w[3-9]+]], #0 ; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK: cmp [[OLD]], w1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS:w[3-9]]], w2, [x0] +; CHECK: stlxr [[STATUS]], w2, [x0] ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -48,10 +51,11 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: mov [[STATUS:w[3-9]+]], #0 ; CHECK: ldaxr [[OLD:x[0-9]+]], [x0] ; CHECK: cmp [[OLD]], x1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS:w[3-9]]], x2, [x0] +; CHECK: stlxr [[STATUS]], x2, [x0] ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{x[0-9]+}}, [[OLD]], x1 diff --git a/test/CodeGen/AArch64/concat_vector-scalar-combine.ll b/test/CodeGen/AArch64/concat_vector-scalar-combine.ll index 1c64af636cb3530323c087832a53d89e7e9bceec..3abb14241ea07ccbea7f8f0cb818de6c1ca1122c 100644 --- a/test/CodeGen/AArch64/concat_vector-scalar-combine.ll +++ b/test/CodeGen/AArch64/concat_vector-scalar-combine.ll @@ -38,7 +38,7 @@ entry: define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 { entry: ; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8: -; CHECK-NEXT: ins.h v0[0], w0 +; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ins.h v0[1], w1 ; CHECK-NEXT: ins.h v0[3], w1 ; CHECK-NEXT: ret @@ -84,7 +84,7 @@ define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, entry: ; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8: ; CHECK-NEXT: fmov s[[X:[0-9]+]], w0 -; CHECK-NEXT: ins.h v0[0], v[[X]][0] +; CHECK-NEXT: mov.16b v0, v[[X]] ; CHECK-NEXT: ins.h v0[1], v1[0] ; CHECK-NEXT: ins.h v0[2], v[[X]][0] ; CHECK-NEXT: ins.h v0[3], v1[0] @@ -99,7 +99,7 @@ entry: define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 { entry: ; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8: -; CHECK-NEXT: ins.h v0[0], v1[0] +; CHECK-NEXT: mov.16b v0, v1 ; CHECK-NEXT: ins.h v0[1], v2[0] ; CHECK-NEXT: ins.h v0[2], v1[0] ; CHECK-NEXT: ins.h v0[3], v2[0] diff --git a/test/CodeGen/AArch64/fadd-combines.ll b/test/CodeGen/AArch64/fadd-combines.ll new file mode 100644 index 0000000000000000000000000000000000000000..c106f293ccffb99bb2d99da98e66001bb2d6a49f --- /dev/null +++ b/test/CodeGen/AArch64/fadd-combines.ll @@ -0,0 +1,78 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: test1: +; CHECK: fadd d1, d1, d1 +; CHECK: fsub d0, d0, d1 +define double @test1(double %a, double %b) local_unnamed_addr #0 { +entry: + %mul = fmul double %b, -2.000000e+00 + %add1 = fadd double %a, %mul + ret double %add1 +} + +; DAGCombine will canonicalize 'a - 2.0*b' to 'a + -2.0*b' +; CHECK-LABEL: test2: +; CHECK: fadd d1, d1, d1 +; CHECK: fsub d0, d0, d1 +define double @test2(double %a, double %b) local_unnamed_addr #0 { +entry: + %mul = fmul double %b, 2.000000e+00 + %add1 = fsub double %a, %mul + ret double %add1 +} + +; CHECK-LABEL: test3: +; CHECK: fmul d0, d0, d1 +; CHECK: fadd d1, d2, d2 +; CHECK: fsub d0, d0, d1 +define double @test3(double %a, double %b, double %c) local_unnamed_addr #0 { +entry: + %mul = fmul double %a, %b + %mul1 = fmul double %c, 2.000000e+00 + %sub = fsub double %mul, %mul1 + ret double %sub +} + +; CHECK-LABEL: test4: +; CHECK: fmul d0, d0, d1 +; CHECK: fadd d1, d2, d2 +; CHECK: fsub d0, d0, d1 +define double @test4(double %a, double %b, double %c) local_unnamed_addr #0 { +entry: + %mul = fmul double %a, %b + %mul1 = fmul double %c, -2.000000e+00 + %add2 = fadd double %mul, %mul1 + ret double %add2 +} + +; CHECK-LABEL: test5: +; CHECK: fadd v1.4s, v1.4s, v1.4s +; CHECK: fsub v0.4s, v0.4s, v1.4s +define <4 x float> @test5(<4 x float> %a, <4 x float> %b) { + %mul = fmul <4 x float> %b, + %add = fadd <4 x float> %a, %mul + ret <4 x float> %add +} + +; CHECK-LABEL: test6: +; CHECK: fadd v1.4s, v1.4s, v1.4s +; CHECK: fsub v0.4s, v0.4s, v1.4s +define <4 x float> @test6(<4 x float> %a, <4 x float> %b) { + %mul = fmul <4 x float> %b, + %add = fsub <4 x float> %a, %mul + ret <4 x float> %add +} + +; Don't fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) if the fmul has +; multiple uses. +; CHECK-LABEL: test7: +; CHECK: fmul +define double @test7(double %a, double %b) local_unnamed_addr #0 { +entry: + %mul = fmul double %b, -2.000000e+00 + %add1 = fadd double %a, %mul + call void @use(double %mul) + ret double %add1 +} + +declare void @use(double) diff --git a/test/CodeGen/AArch64/fast-isel-cmpxchg.ll b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll index aa78210fae744f739f4aeaddedcca1403924ee23..7ef625abab20f742a465261d6d3535df553095e5 100644 --- a/test/CodeGen/AArch64/fast-isel-cmpxchg.ll +++ b/test/CodeGen/AArch64/fast-isel-cmpxchg.ll @@ -2,11 +2,12 @@ ; CHECK-LABEL: cmpxchg_monotonic_32: ; CHECK: [[RETRY:.LBB[0-9_]+]]: +; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 ; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // BB#2: -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0] +; CHECK-NEXT: stlxr [[STATUS]], w2, [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: ; CHECK-NEXT: cmp [[OLD]], w1 @@ -27,11 +28,12 @@ define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { ; CHECK: // BB#0: ; CHECK: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: +; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 ; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] ; CHECK-NEXT: cmp [[OLD]], w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // BB#2: -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0] +; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: ; CHECK-NEXT: cmp [[OLD]], w1 @@ -51,11 +53,12 @@ define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 ; CHECK-LABEL: cmpxchg_seq_cst_64: ; CHECK: [[RETRY:.LBB[0-9_]+]]: +; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 ; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0] ; CHECK-NEXT: cmp [[OLD]], x1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // BB#2: -; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0] +; CHECK-NEXT: stlxr [[STATUS]], x2, [x0] ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: ; CHECK-NEXT: cmp [[OLD]], x1 diff --git a/test/CodeGen/AArch64/fast-isel-sp-adjust.ll b/test/CodeGen/AArch64/fast-isel-sp-adjust.ll new file mode 100644 index 0000000000000000000000000000000000000000..9201d1be6a9ce3bcb22883735c4b534be4bb7d1d --- /dev/null +++ b/test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -0,0 +1,288 @@ +; RUN: llc -O0 -mtriple=aarch64-apple-ios -o - %s | FileCheck %s +; RUN: not llc -O0 -mtriple=aarch64-apple-ios -o /dev/null -fast-isel-abort=3 %s 2> %t +; RUN: FileCheck %s --check-prefix=CHECK-ERRORS < %t + +; The issue here is that FastISel cannot emit an ADDrr where one of the inputs +; is SP. This only ever crops up with function calls, and then only if the +; argument is at an offset > 2^12 * size from SP. + +; If FastISel ever starts coping with this and emits an "add xD, sp, xM" it's +; critical to check the encoding as well as the textual assembly. An ADDXrs with +; SP as an operand will still print with SP, but will actually mean XZR. + +; CHECK-ERRORS: LLVM ERROR: FastISel missed call + +; CHECK-LABEL: foo: +; CHECK-DAG: mov x[[SP:[0-9]+]], sp +; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104 +; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]] +; CHECK: strb w0, [x[[SP]], x[[OFFSET]]] + +define void @foo(i8 %in) { + call void @bar(i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, ; All regs gone. + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 32 + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 64 + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 128 + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 256 + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 512 + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 1024 + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 2048 + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, + i64 undef, i64 undef, i64 undef, i64 undef, ; sp + 4096 + i64 undef, ; sp + 4104 (i.e. not uimm12 or uimm12 << 12). + i8 %in) + ret void +} + +declare void @bar(i64, i64, i64, i64, + i64, i64, i64, i64, ; All regs gone. + i64, i64, i64, i64, ; sp + 32 + i64, i64, i64, i64, ; sp + 64 + i64, i64, i64, i64, + i64, i64, i64, i64, ; sp + 128 + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, ; sp + 256 + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, ; sp + 512 + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, ; sp + 1024 + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, ; sp + 2048 + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, + i64, i64, i64, i64, ; sp + 4096 + i64, + i8) diff --git a/test/CodeGen/AArch64/fence-singlethread.ll b/test/CodeGen/AArch64/fence-singlethread.ll new file mode 100644 index 0000000000000000000000000000000000000000..2ed744277385a732281941daf0513ff094c5215f --- /dev/null +++ b/test/CodeGen/AArch64/fence-singlethread.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s --check-prefix=LINUX +; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=IOS +; RUN: llc -mtriple=aarch64-linux-gnueabihf %s -filetype=obj -o %t +; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=OBJ + +; OBJ-NOT: dmb + +define void @fence_singlethread() { +; LINUX-LABEL: fence_singlethread: +; LINUX-NOT: dmb +; LINUX: // COMPILER BARRIER +; LINUX-NOT: dmb + +; IOS-LABEL: fence_singlethread: +; IOS-NOT: dmb +; IOS: ; COMPILER BARRIER +; IOS-NOT: dmb + + fence singlethread seq_cst + ret void +} diff --git a/test/CodeGen/AArch64/ldst-zero.ll b/test/CodeGen/AArch64/ldst-zero.ll index 95b92ac70879ffb34282f7d5183d3d878b316991..7d443a631f914b0b49d6429b9f5c2aac95eb76fd 100644 --- a/test/CodeGen/AArch64/ldst-zero.ll +++ b/test/CodeGen/AArch64/ldst-zero.ll @@ -9,9 +9,9 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) ; Original test case which exhibited the bug define void @test1(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-LABEL: test1: -; CHECK: stp xzr, xzr, [x0, #8] -; CHECK: stp xzr, x2, [x0] -; CHECK: str w1, [x0, #16] +; CHECK-DAG: stp x2, xzr, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) @@ -25,10 +25,8 @@ entry: ; Store to each struct element instead of using memset define void @test2(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-LABEL: test2: -; CHECK: stp xzr, xzr, [x0] -; CHECK: str wzr, [x0, #16] -; CHECK: str w1, [x0, #16] -; CHECK: str x2, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: stp xzr, x2, [x0] entry: %0 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 0 %1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 @@ -44,9 +42,9 @@ entry: ; Vector store instead of memset define void @test3(%struct.tree_common* %t, i32 %code, i8* %type) { ; CHECK-LABEL: test3: -; CHECK: stp xzr, xzr, [x0, #8] -; CHECK: stp xzr, x2, [x0] -; CHECK: str w1, [x0, #16] +; CHECK-DAG: stp x2, xzr, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to <3 x i64>* store <3 x i64> zeroinitializer, <3 x i64>* %0, align 8 @@ -60,9 +58,8 @@ entry: ; Vector store, then store to vector elements define void @test4(<3 x i64>* %p, i64 %x, i64 %y) { ; CHECK-LABEL: test4: -; CHECK: stp xzr, xzr, [x0, #8] -; CHECK: stp xzr, x2, [x0] -; CHECK: str x1, [x0, #16] +; CHECK-DAG: stp x2, x1, [x0, #8] +; CHECK-DAG: str xzr, [x0] entry: store <3 x i64> zeroinitializer, <3 x i64>* %p, align 8 %0 = bitcast <3 x i64>* %p to i64* diff --git a/test/CodeGen/AArch64/live-interval-analysis.mir b/test/CodeGen/AArch64/live-interval-analysis.mir index d44300973566cba1f6cacde2e00be4889f11aed0..93dfcf507fff39353aa3947f7e452b66f477d564 100644 --- a/test/CodeGen/AArch64/live-interval-analysis.mir +++ b/test/CodeGen/AArch64/live-interval-analysis.mir @@ -6,7 +6,7 @@ --- # CHECK-LABEL: ********** INTERVALS ********** # W29 is reserved, so we should only see dead defs -# CHECK-DAG: W29 [0B,0d:{{[0-9]+}})[32r,32d:{{[0-9]+}})[64r,64d:{{[0-9]+}}) +# CHECK-DAG: W29 [32r,32d:{{[0-9]+}})[64r,64d:{{[0-9]+}}) # For normal registers like x28 we should see the full intervals # CHECK-DAG: W28 [0B,16r:{{[0-9]+}})[32r,48r:{{[0-9]+}})[48r,48d:{{[0-9]+}}) # CHECK: # End machine code for function reserved_reg_liveness. @@ -14,7 +14,7 @@ name: reserved_reg_liveness tracksRegLiveness: true body: | bb.0: - liveins: %x28_fp + liveins: %x28 %6 : xseqpairsclass = COPY %x28_fp %x28_fp = COPY %6 %x28 = COPY %x28 diff --git a/test/CodeGen/AArch64/loh.mir b/test/CodeGen/AArch64/loh.mir index 1d08ebdc5790adc20d64d937ceb8631c576dc662..6e4bb5cfaee6d4492580869c0e5dd9f6bc7febe0 100644 --- a/test/CodeGen/AArch64/loh.mir +++ b/test/CodeGen/AArch64/loh.mir @@ -180,7 +180,6 @@ body: | %x9 = ADRP target-flags(aarch64-page, aarch64-got) @g5 bb.13: - successors: %bb.14 ; Cannot produce a LOH for multiple users ; CHECK-NOT: MCLOH_AdrpAdd %x10 = ADRP target-flags(aarch64-page) @g0 diff --git a/test/CodeGen/AArch64/machine-copy-remove.mir b/test/CodeGen/AArch64/machine-copy-remove.mir index 6f2d3a3009b021da302983369683f84be3703dbb..50c03ddb40374ca5f52ebc5c1a8d894693ba5c75 100644 --- a/test/CodeGen/AArch64/machine-copy-remove.mir +++ b/test/CodeGen/AArch64/machine-copy-remove.mir @@ -7,20 +7,16 @@ name: test1 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1 %x0 = COPY %x1 CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 - %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -38,20 +34,16 @@ name: test2 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1 %x1 = COPY %x0 CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 - %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -69,7 +61,6 @@ name: test3 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1, %x2 %x0 = COPY %x1 @@ -77,13 +68,10 @@ body: | CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 - %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -101,7 +89,6 @@ name: test4 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1, %x2 %x1 = COPY %x0 @@ -109,13 +96,10 @@ body: | CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 - %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -133,7 +117,6 @@ name: test5 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1, %x2 %x1 = COPY %x0 @@ -141,13 +124,10 @@ body: | CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 - %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -165,7 +145,6 @@ name: test6 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1, %x2 %x2 = COPY %x0 @@ -173,13 +152,10 @@ body: | CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 - %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -197,7 +173,6 @@ name: test7 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1, %x2 %x2 = COPY %x0 @@ -206,13 +181,10 @@ body: | CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 - %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -232,14 +204,12 @@ name: test8 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1 %x1 = COPY %x0 CBNZX %x1, %bb.2 bb.1: - successors: %bb.3 liveins: %x0, %x2 %x0, %x1 = LDPXi %x2, 0 @@ -248,7 +218,6 @@ body: | B %bb.3 bb.2: - successors: %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -267,20 +236,17 @@ name: test9 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1 CBNZX %x0, %bb.2 bb.1: - successors: %bb.3 liveins: %x0, %x2 %x0 = COPY %xzr B %bb.3 bb.2: - successors: %bb.1, %bb.3 liveins: %x1 %x0 = LDRXui %x1, 0 @@ -304,7 +270,6 @@ name: test10 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1 dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv @@ -312,7 +277,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm 7 @@ -332,7 +296,6 @@ name: test11 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %x0, %x1 dead %xzr = SUBSXri killed %x0, 7, 0, implicit-def %nzcv @@ -340,7 +303,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm 7, implicit-def %x0 @@ -360,7 +322,6 @@ name: test12 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %x0, %x1 dead %xzr = SUBSXri killed %x0, 7, 0, implicit-def %nzcv @@ -368,7 +329,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm 7 @@ -388,7 +348,6 @@ name: test13 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1 dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv @@ -396,7 +355,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm 7, implicit-def %x0 @@ -413,7 +371,6 @@ name: test14 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1, %x2 dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv @@ -423,7 +380,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm 7 @@ -440,7 +396,6 @@ name: test15 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1, %x2 dead %wzr = SUBSWri killed %w0, 7, 0, implicit-def %nzcv @@ -448,7 +403,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1, %x2 %w0 = LDRWui %x1, 0 @@ -467,7 +421,6 @@ name: test16 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1 dead %wzr = SUBSWri %w0, 7, 0, implicit-def %nzcv @@ -476,7 +429,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w2 = MOVi32imm 7 @@ -493,7 +445,6 @@ name: test17 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1 dead %w0 = SUBSWri killed %w0, 7, 0, implicit-def %nzcv @@ -501,7 +452,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm 7 @@ -520,14 +470,12 @@ name: test18 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %x0, %x1 CBNZX killed %x0, %bb.2 B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %x0 = MOVi64imm 4252017623040 @@ -547,7 +495,6 @@ name: test19 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1 dead %wzr = ADDSWri killed %w0, 1, 0, implicit-def %nzcv @@ -555,7 +502,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm -1 @@ -575,7 +521,6 @@ name: test20 tracksRegLiveness: true body: | bb.0: - successors: %bb.1, %bb.2 liveins: %x0, %x1 dead %xzr = ADDSXri killed %x0, 1, 0, implicit-def %nzcv @@ -583,7 +528,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %x0 = MOVi64imm -1 @@ -603,7 +547,6 @@ name: test21 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %x0, %x1 dead %xzr = ADDSXri killed %x0, 1, 0, implicit-def %nzcv @@ -611,7 +554,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm -1 @@ -629,7 +571,6 @@ name: test22 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1 dead %wzr = ADDSWri killed %w0, 1, 0, implicit-def %nzcv @@ -637,7 +578,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %x0 = MOVi64imm -1 @@ -654,7 +594,6 @@ name: test23 tracksRegLiveness: true body: | bb.0.entry: - successors: %bb.1, %bb.2 liveins: %w0, %x1 dead %wzr = SUBSWri killed %w0, 1, 12, implicit-def %nzcv @@ -662,7 +601,6 @@ body: | B %bb.1 bb.1: - successors: %bb.2 liveins: %x1 %w0 = MOVi32imm 4096 diff --git a/test/CodeGen/AArch64/machine-sink-zr.mir b/test/CodeGen/AArch64/machine-sink-zr.mir index 535fba0dc63bc5a4cdb45fb7d98c6f70fd99b3ce..2cf2bc488237f2f2be94802cc0935e80a10332d8 100644 --- a/test/CodeGen/AArch64/machine-sink-zr.mir +++ b/test/CodeGen/AArch64/machine-sink-zr.mir @@ -17,7 +17,6 @@ body: | ; CHECK-LABEL: bb.0: ; CHECK-NOT: COPY %wzr bb.0: - successors: %bb.3, %bb.1 liveins: %w0 %0 = COPY %w0 @@ -28,13 +27,9 @@ body: | ; CHECK: COPY %wzr bb.1: - successors: %bb.2 - B %bb.2 bb.2: - successors: %bb.3, %bb.2 - %2 = PHI %0, %bb.1, %4, %bb.2 %w0 = COPY %1 %3 = SUBSWri %2, 1, 0, implicit-def dead %nzcv diff --git a/test/CodeGen/AArch64/macho-global-symbols.ll b/test/CodeGen/AArch64/macho-global-symbols.ll new file mode 100644 index 0000000000000000000000000000000000000000..d68abad57ccd66f0efaa8aaa4b809d9cd4e0a2de --- /dev/null +++ b/test/CodeGen/AArch64/macho-global-symbols.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=arm64-apple-ios %s -o - | FileCheck %s + +; All global symbols must be at-most linker-private for AArch64 because we don't +; use section-relative relocations in MachO. + +define i8* @private_sym() { +; CHECK-LABEL: private_sym: +; CHECK: adrp [[HIBITS:x[0-9]+]], l_var@PAGE +; CHECK: add x0, [[HIBITS]], l_var@PAGEOFF + + ret i8* getelementptr([2 x i8], [2 x i8]* @var, i32 0, i32 0) +} + +; CHECK: .section __TEXT,__cstring +; CHECK: l_var: +; CHECK: .asciz "\002" +@var = private unnamed_addr constant [2 x i8] [i8 2, i8 0] diff --git a/test/CodeGen/AArch64/misched-fusion-aes.ll b/test/CodeGen/AArch64/misched-fusion-aes.ll index f29dfb3a9802159dbcd3218b0f6f42cc7287724d..8ee4dbcee52b6387c7c3eb65eaf563d46136bb64 100644 --- a/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -1,4 +1,9 @@ -; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKA57 +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKM1 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k) @@ -71,39 +76,41 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ret void ; CHECK-LABEL: aesea: -; CHECKA57: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VC]] -; CHECKA57: aesmc {{v[0-7].16b}}, [[VA]] -; CHECKA57: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VD]] -; CHECKA57: aesmc {{v[0-7].16b}}, [[VB]] -; CHECKA57: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VE]] -; CHECKA57: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VF]] -; CHECKA57: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VG]] -; CHECKA57: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesmc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VA]] +; CHECKFUSEALLPAIRS: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VB]] +; CHECKFUSEALLPAIRS: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VC]] +; CHECKFUSEALLPAIRS: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VD]] +; CHECKFUSEALLPAIRS: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VE]] +; CHECKFUSEALLPAIRS: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VF]] +; CHECKFUSEALLPAIRS: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VG]] +; CHECKFUSEALLPAIRS: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS-NOT: aesmc + ; CHECKM1: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1: aesmc {{v[0-7].16b}}, [[VA]] +; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VA]] +; CHECKM1: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VB]] ; CHECKM1: aese {{v[0-7].16b}}, {{v[0-7].16b}} ; CHECKM1: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VC]] ; CHECKM1: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1: aesmc {{v[0-7].16b}}, [[VD]] +; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VD]] +; CHECKM1: aesmc {{v[0-7].16b}}, [[VH]] ; CHECKM1: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VE]] ; CHECKM1: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VF]] ; CHECKM1: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VG]] -; CHECKM1: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VH]] } define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) { @@ -171,37 +178,67 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ret void ; CHECK-LABEL: aesda: -; CHECKA57: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VC]] -; CHECKA57: aesimc {{v[0-7].16b}}, [[VA]] -; CHECKA57: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VD]] -; CHECKA57: aesimc {{v[0-7].16b}}, [[VB]] -; CHECKA57: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VE]] -; CHECKA57: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VF]] -; CHECKA57: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VG]] -; CHECKA57: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} -; CHECKA57-NEXT: aesimc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VA]] +; CHECKFUSEALLPAIRS: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VB]] +; CHECKFUSEALLPAIRS: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VC]] +; CHECKFUSEALLPAIRS: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VD]] +; CHECKFUSEALLPAIRS: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VE]] +; CHECKFUSEALLPAIRS: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VF]] +; CHECKFUSEALLPAIRS: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VG]] +; CHECKFUSEALLPAIRS: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} +; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VH]] +; CHECKFUSEALLPAIRS-NOT: aesimc + ; CHECKM1: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1: aesimc {{v[0-7].16b}}, [[VA]] +; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VA]] +; CHECKM1: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VB]] ; CHECKM1: aesd {{v[0-7].16b}}, {{v[0-7].16b}} ; CHECKM1: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VC]] ; CHECKM1: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1: aesimc {{v[0-7].16b}}, [[VD]] +; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VD]] +; CHECKM1: aesimc {{v[0-7].16b}}, [[VH]] ; CHECKM1: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VE]] ; CHECKM1: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VF]] ; CHECKM1: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VG]] -; CHECKM1: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VH]] +} + +define void @aes_load_store(<16 x i8> *%p1, <16 x i8> *%p2 , <16 x i8> *%p3) { +entry: + %x1 = alloca <16 x i8>, align 16 + %x2 = alloca <16 x i8>, align 16 + %x3 = alloca <16 x i8>, align 16 + %x4 = alloca <16 x i8>, align 16 + %x5 = alloca <16 x i8>, align 16 + %in1 = load <16 x i8>, <16 x i8>* %p1, align 16 + store <16 x i8> %in1, <16 x i8>* %x1, align 16 + %aese1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in1) #2 + store <16 x i8> %aese1, <16 x i8>* %x2, align 16 + %in2 = load <16 x i8>, <16 x i8>* %p2, align 16 + %aesmc1= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese1) #2 + store <16 x i8> %aesmc1, <16 x i8>* %x3, align 16 + %aese2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in2) #2 + store <16 x i8> %aese2, <16 x i8>* %x4, align 16 + %aesmc2= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese2) #2 + store <16 x i8> %aesmc2, <16 x i8>* %x5, align 16 + ret void + +; CHECK-LABEL: aes_load_store: +; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} +; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VA]] +; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} +; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VB]] +; CHECK-NOT: aesmc } diff --git a/test/CodeGen/AArch64/misched-stp.ll b/test/CodeGen/AArch64/misched-stp.ll index 4ea481cae68ef617b85278020893f97ddb269abd..1c9ea68834c23aacdaabe7aa68d788fa2eca25cc 100644 --- a/test/CodeGen/AArch64/misched-stp.ll +++ b/test/CodeGen/AArch64/misched-stp.ll @@ -1,20 +1,18 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa -enable-misched -verify-misched -o - | FileCheck %s ; Tests to check that the scheduler dependencies derived from alias analysis are ; correct when we have loads that have been split up so that they can later be ; merged into STP. -; CHECK: ********** MI Scheduling ********** -; CHECK: test_splat:BB#0 entry -; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%3+8] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU1:SU\([0-9]+\)]] -; CHECK: SU({{[0-9]+}}): STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%3+4] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU2:SU\([0-9]+\)]] -; CHECK: [[SU1]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 3; mem:ST4[%2] -; CHECK: [[SU2]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 2; mem:ST4[%1] +; Now that overwritten stores are elided in SelectionDAG, dependencies +; are resolved and removed before MISCHED. Check that we have +; equivalent pair of stp calls as a baseline. + +; CHECK-LABEL: test_splat +; CHECK: ldr [[REG:w[0-9]+]], [x2] +; CHECK-DAG: stp w0, [[REG]], [x2, #12] +; CHECK-DAG: stp [[REG]], w1, [x2, #4] define void @test_splat(i32 %x, i32 %y, i32* %p) { entry: %val = load i32, i32* %p, align 4 @@ -35,16 +33,11 @@ entry: declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) %struct.tree_common = type { i8*, i8*, i32 } -; CHECK: ********** MI Scheduling ********** -; CHECK: test_zero:BB#0 entry -; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 2; mem:ST8[%0+16] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU3:SU\([0-9]+\)]] -; CHECK: SU({{[0-9]+}}): STRXui %XZR, %vreg{{[0-9]+}}, 1; mem:ST8[%0+8] -; CHECK: Successors: -; CHECK-NEXT: ord [[SU4:SU\([0-9]+\)]] -; CHECK: [[SU3]]: STRWui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 4; mem:ST4[%code1] -; CHECK: [[SU4]]: STRXui %vreg{{[0-9]+}}, %vreg{{[0-9]+}}, 1; mem:ST8[%type2] +; CHECK-LABEL: test_zero +; CHECK-DAG: stp x2, xzr, [x0, #8] +; CHECK-DAG: str w1, [x0, #16] +; CHECK-DAG: str xzr, [x0] + define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) { entry: %0 = bitcast %struct.tree_common* %t to i8* diff --git a/test/CodeGen/AArch64/nonlazybind.ll b/test/CodeGen/AArch64/nonlazybind.ll new file mode 100644 index 0000000000000000000000000000000000000000..4355d45fe84da707e1afea3e376721a1dae410d5 --- /dev/null +++ b/test/CodeGen/AArch64/nonlazybind.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=CHECK-NORMAL + +define void @local() nonlazybind { + ret void +} + +declare void @nonlocal() nonlazybind + +define void @test_laziness() { +; CHECK-LABEL: test_laziness: + +; CHECK: bl _local + +; CHECK: adrp x[[TMP:[0-9]+]], _nonlocal@GOTPAGE +; CHECK: ldr [[FUNC:x[0-9]+]], [x[[TMP]], _nonlocal@GOTPAGEOFF] +; CHECK: blr [[FUNC]] + +; CHECK-NORMAL-LABEL: test_laziness: +; CHECK-NORMAL: bl _local +; CHEKC-NORMAL: bl _nonlocal + + call void @local() + call void @nonlocal() + ret void +} + +define void @test_laziness_tail() { +; CHECK-LABEL: test_laziness_tail: + +; CHECK: adrp x[[TMP:[0-9]+]], _nonlocal@GOTPAGE +; CHECK: ldr [[FUNC:x[0-9]+]], [x[[TMP]], _nonlocal@GOTPAGEOFF] +; CHECK: br [[FUNC]] + +; CHECK-NORMAL-LABEL: test_laziness_tail: +; CHECK-NORMAL: b _nonlocal + + tail call void @nonlocal() + ret void +} diff --git a/test/CodeGen/AArch64/optimize-imm.ll b/test/CodeGen/AArch64/optimize-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f960a3a95fc919b30c745f1b4be7160c1d36d653 --- /dev/null +++ b/test/CodeGen/AArch64/optimize-imm.ll @@ -0,0 +1,83 @@ +; RUN: llc -o - %s -mtriple=aarch64-- | FileCheck %s + +; CHECK-LABEL: and1: +; CHECK: and {{w[0-9]+}}, w0, #0xfffffffd + +define void @and1(i32 %a, i8* nocapture %p) { +entry: + %and = and i32 %a, 253 + %conv = trunc i32 %and to i8 + store i8 %conv, i8* %p, align 1 + ret void +} + +; (a & 0x3dfd) | 0xffffc000 +; +; CHECK-LABEL: and2: +; CHECK: and {{w[0-9]+}}, w0, #0xfdfdfdfd + +define i32 @and2(i32 %a) { +entry: + %and = and i32 %a, 15869 + %or = or i32 %and, -16384 + ret i32 %or +} + +; (a & 0x19) | 0xffffffc0 +; +; CHECK-LABEL: and3: +; CHECK: and {{w[0-9]+}}, w0, #0x99999999 + +define i32 @and3(i32 %a) { +entry: + %and = and i32 %a, 25 + %or = or i32 %and, -64 + ret i32 %or +} + +; (a & 0xc5600) | 0xfff1f1ff +; +; CHECK-LABEL: and4: +; CHECK: and {{w[0-9]+}}, w0, #0xfffc07ff + +define i32 @and4(i32 %a) { +entry: + %and = and i32 %a, 787968 + %or = or i32 %and, -921089 + ret i32 %or +} + +; Make sure we don't shrink or optimize an XOR's immediate operand if the +; immediate is -1. Instruction selection turns (and ((xor $mask, -1), $v0)) into +; a BIC. + +; CHECK-LABEL: xor1: +; CHECK: orr [[R0:w[0-9]+]], wzr, #0x38 +; CHECK: bic {{w[0-9]+}}, [[R0]], w0, lsl #3 + +define i32 @xor1(i32 %a) { +entry: + %shl = shl i32 %a, 3 + %xor = and i32 %shl, 56 + %and = xor i32 %xor, 56 + ret i32 %and +} + +; Check that, when (and %t1, 129) is transformed to (and %t0, 0), +; (xor %arg, 129) doesn't get transformed to (xor %arg, 0). +; +; CHECK-LABEL: PR33100: +; CHECK: mov w[[R0:[0-9]+]], #129 +; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, x[[R0]] + +define i64 @PR33100(i64 %arg) { +entry: + %alloca0 = alloca i64 + store i64 8, i64* %alloca0, align 4 + %t0 = load i64, i64* %alloca0, align 4 + %t1 = shl i64 %arg, %t0 + %and0 = and i64 %t1, 129 + %xor0 = xor i64 %arg, 129 + %t2 = add i64 %and0, %xor0 + ret i64 %t2 +} diff --git a/test/CodeGen/AArch64/pr33172.ll b/test/CodeGen/AArch64/pr33172.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e1da78b28ff4d536ac4398e9e5b6ca116b0a659 --- /dev/null +++ b/test/CodeGen/AArch64/pr33172.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s | FileCheck %s + +; CHECK-LABEL: pr33172 +; CHECK: ldp +; CHECK: stp + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios10.3.0" + +@main.b = external global [200 x float], align 8 +@main.x = external global [200 x float], align 8 + +; Function Attrs: nounwind ssp +define void @pr33172() local_unnamed_addr { +entry: + %wide.load8281058.3 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 12) to i64*), align 8 + %wide.load8291059.3 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 14) to i64*), align 8 + store i64 %wide.load8281058.3, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 12) to i64*), align 8 + store i64 %wide.load8291059.3, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 14) to i64*), align 8 + %wide.load8281058.4 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 16) to i64*), align 8 + %wide.load8291059.4 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 18) to i64*), align 8 + store i64 %wide.load8281058.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 16) to i64*), align 8 + store i64 %wide.load8291059.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 18) to i64*), align 8 + tail call void @llvm.memset.p0i8.i64(i8* bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i32 8, i1 false) #2 + unreachable +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 + +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind } diff --git a/test/CodeGen/AArch64/reg-scavenge-frame.mir b/test/CodeGen/AArch64/reg-scavenge-frame.mir new file mode 100644 index 0000000000000000000000000000000000000000..f79e75e248b9681a3886ffa8170271cf4ce182ce --- /dev/null +++ b/test/CodeGen/AArch64/reg-scavenge-frame.mir @@ -0,0 +1,86 @@ +# RUN: llc -run-pass=prologepilog -verify-machineinstrs %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-linux-gnu" + define void @ScavengeForFrameWithoutOffset() { ret void } +... +--- +name: ScavengeForFrameWithoutOffset +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, offset: 0, size: 32, alignment: 8 } +body: | + bb.0: + liveins: %d16_d17_d18_d19 + %x0 = COPY %xzr + %x1 = COPY %xzr + %x2 = COPY %xzr + %x3 = COPY %xzr + %x4 = COPY %xzr + %x5 = COPY %xzr + %x6 = COPY %xzr + %x7 = COPY %xzr + %x8 = COPY %xzr + %x9 = COPY %xzr + %x10 = COPY %xzr + %x11 = COPY %xzr + %x12 = COPY %xzr + %x13 = COPY %xzr + %x14 = COPY %xzr + %x15 = COPY %xzr + %x16 = COPY %xzr + %x17 = COPY %xzr + %x18 = COPY %xzr + %x19 = COPY %xzr + %x20 = COPY %xzr + %x21 = COPY %xzr + %x22 = COPY %xzr + %x23 = COPY %xzr + %x24 = COPY %xzr + %x25 = COPY %xzr + %x26 = COPY %xzr + %x27 = COPY %xzr + %x28 = COPY %xzr + %fp = COPY %xzr + %lr = COPY %xzr + ST1Fourv1d killed %d16_d17_d18_d19, %stack.0 :: (store 32 into %stack.0, align 8) + ; CHECK: STRXui killed %[[SCAVREG:x[0-9]+|fp|lr]], %sp, [[SPOFFSET:[0-9]+]] :: (store 8 into %stack.1) + ; CHECK-NEXT: %[[SCAVREG]] = ADDXri %sp, {{[0-9]+}}, 0 + ; CHECK-NEXT: ST1Fourv1d killed %d16_d17_d18_d19, killed %[[SCAVREG]] :: (store 32 into %stack.0, align 8) + ; CHECK-NEXT: %[[SCAVREG]] = LDRXui %sp, [[SPOFFSET]] :: (load 8 from %stack.1) + + HINT 0, implicit %x0 + HINT 0, implicit %x1 + HINT 0, implicit %x2 + HINT 0, implicit %x3 + HINT 0, implicit %x4 + HINT 0, implicit %x5 + HINT 0, implicit %x6 + HINT 0, implicit %x7 + HINT 0, implicit %x8 + HINT 0, implicit %x9 + HINT 0, implicit %x10 + HINT 0, implicit %x11 + HINT 0, implicit %x12 + HINT 0, implicit %x13 + HINT 0, implicit %x14 + HINT 0, implicit %x15 + HINT 0, implicit %x16 + HINT 0, implicit %x17 + HINT 0, implicit %x18 + HINT 0, implicit %x19 + HINT 0, implicit %x20 + HINT 0, implicit %x21 + HINT 0, implicit %x22 + HINT 0, implicit %x23 + HINT 0, implicit %x24 + HINT 0, implicit %x25 + HINT 0, implicit %x26 + HINT 0, implicit %x27 + HINT 0, implicit %x28 + HINT 0, implicit %fp + HINT 0, implicit %lr + + RET_ReallyLR +... diff --git a/test/CodeGen/AArch64/regcoal-physreg.mir b/test/CodeGen/AArch64/regcoal-physreg.mir index 813106366968d8e742851916c423377ef6817998..f88b7482acacfbb7d5789860f06fdc9fbe886333 100644 --- a/test/CodeGen/AArch64/regcoal-physreg.mir +++ b/test/CodeGen/AArch64/regcoal-physreg.mir @@ -93,7 +93,6 @@ body: | name: func1 body: | bb.0: - successors: %bb.1, %bb.2 ; Cannot coalesce physreg because we have reads on other CFG paths (we ; currently abort for any control flow) ; CHECK-NOT: %fp = SUBXri @@ -117,7 +116,6 @@ body: | name: func2 body: | bb.0: - successors: %bb.1, %bb.2 ; We can coalesce copies from physreg to vreg across multiple blocks. ; CHECK-NOT: COPY ; CHECK: CBZX undef %x0, %bb.1 diff --git a/test/CodeGen/AArch64/scheduledag-constreg.mir b/test/CodeGen/AArch64/scheduledag-constreg.mir index 23c785504f01cc9048476280f8c77899507f2467..6b83dc715e0af2483ed2c1f08fd7011a7c2b5e84 100644 --- a/test/CodeGen/AArch64/scheduledag-constreg.mir +++ b/test/CodeGen/AArch64/scheduledag-constreg.mir @@ -1,4 +1,4 @@ -# RUN: llc -o /dev/null %s -mtriple=aarch64-- -run-pass=machine-scheduler -enable-misched -debug-only=misched 2>&1 | FileCheck %s +# RUN: llc -o /dev/null %s -mtriple=aarch64-- -run-pass=machine-scheduler -enable-misched -debug-only=machine-scheduler 2>&1 | FileCheck %s # REQUIRES: asserts --- | define void @func() { ret void } diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll index c3a172dfb427e0b0f34a58511f487ce010534219..41ee40378b4fc84795000d28a998b489e866e632 100644 --- a/test/CodeGen/AArch64/sincos-expansion.ll +++ b/test/CodeGen/AArch64/sincos-expansion.ll @@ -1,8 +1,18 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s define float @test_sincos_f32(float %f) { +; CHECK-LABEL: test_sincos_f32: %sin = call float @sinf(float %f) readnone %cos = call float @cosf(float %f) readnone +; CHECK: bl sincosf + %val = fadd float %sin, %cos + ret float %val +} + +define float @test_sincos_f32_errno(float %f) { +; CHECK-LABEL: test_sincos_f32_errno: + %sin = call float @sinf(float %f) + %cos = call float @cosf(float %f) ; CHECK: bl sinf ; CHECK: bl cosf %val = fadd float %sin, %cos @@ -10,26 +20,46 @@ define float @test_sincos_f32(float %f) { } define double @test_sincos_f64(double %f) { +; CHECK-LABEL: test_sincos_f64: %sin = call double @sin(double %f) readnone %cos = call double @cos(double %f) readnone %val = fadd double %sin, %cos +; CHECK: bl sincos + ret double %val +} + +define double @test_sincos_f64_errno(double %f) { +; CHECK-LABEL: test_sincos_f64_errno: + %sin = call double @sin(double %f) + %cos = call double @cos(double %f) + %val = fadd double %sin, %cos ; CHECK: bl sin ; CHECK: bl cos ret double %val } define fp128 @test_sincos_f128(fp128 %f) { +; CHECK-LABEL: test_sincos_f128: %sin = call fp128 @sinl(fp128 %f) readnone %cos = call fp128 @cosl(fp128 %f) readnone %val = fadd fp128 %sin, %cos +; CHECK: bl sincosl + ret fp128 %val +} + +define fp128 @test_sincos_f128_errno(fp128 %f) { +; CHECK-LABEL: test_sincos_f128_errno: + %sin = call fp128 @sinl(fp128 %f) + %cos = call fp128 @cosl(fp128 %f) + %val = fadd fp128 %sin, %cos ; CHECK: bl sinl ; CHECK: bl cosl ret fp128 %val } -declare float @sinf(float) readonly -declare double @sin(double) readonly -declare fp128 @sinl(fp128) readonly -declare float @cosf(float) readonly -declare double @cos(double) readonly -declare fp128 @cosl(fp128) readonly +declare float @sinf(float) +declare double @sin(double) +declare fp128 @sinl(fp128) +declare float @cosf(float) +declare double @cos(double) +declare fp128 @cosl(fp128) diff --git a/test/CodeGen/AArch64/spill-undef.mir b/test/CodeGen/AArch64/spill-undef.mir new file mode 100644 index 0000000000000000000000000000000000000000..4294df286bd302779c38416c44b75b9c81168b9c --- /dev/null +++ b/test/CodeGen/AArch64/spill-undef.mir @@ -0,0 +1,67 @@ +# RUN: llc %s -run-pass greedy -o - | FileCheck %s +# Check that we don't insert spill code for undef values. +# Uninitialized memory for them is fine. +# PR33311 +--- | + ; ModuleID = 'stuff.ll' + target triple = "aarch64--" + + @g = external global i32 + + define void @foobar() { + ret void + } + +... +--- +name: foobar +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32all } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr64common } + - { id: 5, class: gpr32 } + - { id: 6, class: gpr64common } + - { id: 7, class: gpr32 } + - { id: 8, class: gpr32 } + - { id: 9, class: gpr64 } +body: | + bb.0: + liveins: %x0 + successors: %bb.1, %bb.2 + + ; %8 is going to be spilled. + ; But on that path, we don't care about its value. + ; Emit a simple KILL instruction instead of an + ; actual spill. + ; CHECK: [[UNDEF:%[0-9]+]] = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[UNDEF]] + %8 = IMPLICIT_DEF + ; %9 us going to be spilled. + ; But it is only partially undef. + ; Make sure we spill it properly + ; CHECK: [[NINE:%[0-9]+]] = COPY %x0 + ; CHECK: [[NINE]].sub_32 = IMPLICIT_DEF + ; CHECK-NEXT: STRXui [[NINE]] + %9 = COPY %x0 + %9.sub_32 = IMPLICIT_DEF + CBNZW %wzr, %bb.2 + B %bb.1 + + bb.1: + %4 = ADRP target-flags(aarch64-page) @g + %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g) + INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + + bb.2: + INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr + %6 = ADRP target-flags(aarch64-page) @g + %w0 = MOVi32imm 42 + STRWui %8, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 4 into @g) + STRXui %9, %6, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile store 8 into @g) + RET_ReallyLR implicit killed %w0 + +... diff --git a/test/CodeGen/AArch64/stackmap-frame-setup.ll b/test/CodeGen/AArch64/stackmap-frame-setup.ll index 5646703fa4038c86d4cfff3af98848c75958afbd..677ff8dc25306fbb64778fee5b83d6339e246a8a 100644 --- a/test/CodeGen/AArch64/stackmap-frame-setup.ll +++ b/test/CodeGen/AArch64/stackmap-frame-setup.ll @@ -7,11 +7,11 @@ entry: store i64 11, i64* %metadata store i64 12, i64* %metadata store i64 13, i64* %metadata -; ISEL: ADJCALLSTACKDOWN 0, implicit-def +; ISEL: ADJCALLSTACKDOWN 0, 0, implicit-def ; ISEL-NEXT: STACKMAP ; ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata) -; FAST-ISEL: ADJCALLSTACKDOWN 0, implicit-def +; FAST-ISEL: ADJCALLSTACKDOWN 0, 0, implicit-def ; FAST-ISEL-NEXT: STACKMAP ; FAST-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def ret void diff --git a/test/CodeGen/AArch64/stackmap-liveness.ll b/test/CodeGen/AArch64/stackmap-liveness.ll index 4b04276ac226ba3608f29a35512a6a45f940c307..b66dbfae6c8a4f7d2914705c8d556c32a2e04b48 100644 --- a/test/CodeGen/AArch64/stackmap-liveness.ll +++ b/test/CodeGen/AArch64/stackmap-liveness.ll @@ -5,7 +5,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps ; CHECK-NEXT: __LLVM_StackMaps: ; Header -; CHECK-NEXT: .byte 2 +; CHECK-NEXT: .byte 3 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 0 ; Num Functions @@ -25,6 +25,7 @@ define i64 @stackmap_liveness(i1 %c) { ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 ; Padding +; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: .short 0 ; Num LiveOut Entries: 1 ; CHECK-NEXT: .short 2 diff --git a/test/CodeGen/AArch64/swifterror.ll b/test/CodeGen/AArch64/swifterror.ll index 69bf3510cc5a72594daf924763012347bebc845b..bc28f477c8104ca81d8d53d465b3e1b2d9460362 100644 --- a/test/CodeGen/AArch64/swifterror.ll +++ b/test/CodeGen/AArch64/swifterror.ll @@ -597,3 +597,30 @@ entry: tail call void @acallee(i8* null) ret void } + +declare swiftcc void @foo2(%swift_error** swifterror) + +; Make sure we properly assign registers during fast-isel. +; CHECK-O0-LABEL: testAssign +; CHECK-O0: mov [[TMP:x.*]], xzr +; CHECK-O0: mov x21, [[TMP]] +; CHECK-O0: bl _foo2 +; CHECK-O0: str x21, [s[[STK:.*]]] +; CHECK-O0: ldr x0, [s[[STK]]] + +; CHECK-APPLE-LABEL: testAssign +; CHECK-APPLE: mov x21, xzr +; CHECK-APPLE: bl _foo2 +; CHECK-APPLE: mov x0, x21 + +define swiftcc %swift_error* @testAssign(i8* %error_ref) { +entry: + %error_ptr = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr + call swiftcc void @foo2(%swift_error** swifterror %error_ptr) + br label %a + +a: + %error = load %swift_error*, %swift_error** %error_ptr + ret %swift_error* %error +} diff --git a/test/CodeGen/AArch64/swiftself-scavenger.ll b/test/CodeGen/AArch64/swiftself-scavenger.ll new file mode 100644 index 0000000000000000000000000000000000000000..6d02784409317459e5aefc23e98d8c1e846291ab --- /dev/null +++ b/test/CodeGen/AArch64/swiftself-scavenger.ll @@ -0,0 +1,82 @@ +; RUN: llc -o - %s | FileCheck %s +; Check that we reserve an emergency spill slot, even if we added an extra +; CSR spill for the values used by the swiftself parameter. +; CHECK-LABEL: func: +; CHECK: str [[REG:x[0-9]+]], [sp, #8] +; CHECK: add [[REG]], sp, #248 +; CHECK: str xzr, [{{\s*}}[[REG]], #32760] +; CHECK: ldr x30, [sp, #8] +target triple = "arm64-apple-ios" + +@ptr8 = external global i8* +@ptr64 = external global i64 + +define hidden swiftcc void @func(i8* swiftself %arg) #0 { +bb: + %stack0 = alloca i8*, i32 5000, align 8 + %stack1 = alloca i8*, i32 32, align 8 + + %v0 = load volatile i64, i64* @ptr64, align 8 + %v1 = load volatile i64, i64* @ptr64, align 8 + %v2 = load volatile i64, i64* @ptr64, align 8 + %v3 = load volatile i64, i64* @ptr64, align 8 + %v4 = load volatile i64, i64* @ptr64, align 8 + %v5 = load volatile i64, i64* @ptr64, align 8 + %v6 = load volatile i64, i64* @ptr64, align 8 + %v7 = load volatile i64, i64* @ptr64, align 8 + %v8 = load volatile i64, i64* @ptr64, align 8 + %v9 = load volatile i64, i64* @ptr64, align 8 + %v10 = load volatile i64, i64* @ptr64, align 8 + %v11 = load volatile i64, i64* @ptr64, align 8 + %v12 = load volatile i64, i64* @ptr64, align 8 + %v13 = load volatile i64, i64* @ptr64, align 8 + %v14 = load volatile i64, i64* @ptr64, align 8 + %v15 = load volatile i64, i64* @ptr64, align 8 + %v16 = load volatile i64, i64* @ptr64, align 8 + %v17 = load volatile i64, i64* @ptr64, align 8 + %v18 = load volatile i64, i64* @ptr64, align 8 + %v19 = load volatile i64, i64* @ptr64, align 8 + %v20 = load volatile i64, i64* @ptr64, align 8 + %v21 = load volatile i64, i64* @ptr64, align 8 + %v22 = load volatile i64, i64* @ptr64, align 8 + %v23 = load volatile i64, i64* @ptr64, align 8 + %v24 = load volatile i64, i64* @ptr64, align 8 + %v25 = load volatile i64, i64* @ptr64, align 8 + + ; this should exceed stack-relative addressing limits and need an emergency + ; spill slot. + %s = getelementptr inbounds i8*, i8** %stack0, i64 4092 + store volatile i8* null, i8** %s + store volatile i8* null, i8** %stack1 + + store volatile i64 %v0, i64* @ptr64, align 8 + store volatile i64 %v1, i64* @ptr64, align 8 + store volatile i64 %v2, i64* @ptr64, align 8 + store volatile i64 %v3, i64* @ptr64, align 8 + store volatile i64 %v4, i64* @ptr64, align 8 + store volatile i64 %v5, i64* @ptr64, align 8 + store volatile i64 %v6, i64* @ptr64, align 8 + store volatile i64 %v7, i64* @ptr64, align 8 + store volatile i64 %v8, i64* @ptr64, align 8 + store volatile i64 %v9, i64* @ptr64, align 8 + store volatile i64 %v10, i64* @ptr64, align 8 + store volatile i64 %v11, i64* @ptr64, align 8 + store volatile i64 %v12, i64* @ptr64, align 8 + store volatile i64 %v13, i64* @ptr64, align 8 + store volatile i64 %v14, i64* @ptr64, align 8 + store volatile i64 %v15, i64* @ptr64, align 8 + store volatile i64 %v16, i64* @ptr64, align 8 + store volatile i64 %v17, i64* @ptr64, align 8 + store volatile i64 %v18, i64* @ptr64, align 8 + store volatile i64 %v19, i64* @ptr64, align 8 + store volatile i64 %v20, i64* @ptr64, align 8 + store volatile i64 %v21, i64* @ptr64, align 8 + store volatile i64 %v22, i64* @ptr64, align 8 + store volatile i64 %v23, i64* @ptr64, align 8 + store volatile i64 %v24, i64* @ptr64, align 8 + store volatile i64 %v25, i64* @ptr64, align 8 + + ; use swiftself parameter late so it stays alive throughout the function. + store volatile i8* %arg, i8** @ptr8 + ret void +} diff --git a/test/CodeGen/AArch64/tailcall_misched_graph.ll b/test/CodeGen/AArch64/tailcall_misched_graph.ll index 0e4eb2b5fad9ed74f0ba2b4d0b96e15da6f5c501..4fbd8944f0322178591d9fa8f5ac04adb2936f5d 100644 --- a/test/CodeGen/AArch64/tailcall_misched_graph.ll +++ b/test/CodeGen/AArch64/tailcall_misched_graph.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=cyclone -debug-only=misched < %s 2>&1 | FileCheck %s +; RUN: llc -mcpu=cyclone -debug-only=machine-scheduler < %s 2>&1 | FileCheck %s ; REQUIRES: asserts diff --git a/test/CodeGen/AArch64/xray-attribute-instrumentation.ll b/test/CodeGen/AArch64/xray-attribute-instrumentation.ll index d0f5f40e156c9e6c96a819e3937c83907c002274..38b62a72a20f5ac7eaf8fe0c2b9c9be5d85db905 100644 --- a/test/CodeGen/AArch64/xray-attribute-instrumentation.ll +++ b/test/CodeGen/AArch64/xray-attribute-instrumentation.ll @@ -26,6 +26,7 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" } ; CHECK: .p2align 4 ; CHECK-NEXT: .xword .Lxray_synthetic_0 +; CHECK-NEXT: .xword .Lxray_fn_idx_synth_0 ; CHECK-NEXT: .section xray_instr_map,{{.*}} ; CHECK-LABEL: Lxray_synthetic_0: ; CHECK: .xword .Lxray_sled_0 diff --git a/test/CodeGen/AArch64/xray-tail-call-sled.ll b/test/CodeGen/AArch64/xray-tail-call-sled.ll index 6ada3ce8d551b0ec72056a5d203ed84ebaca6b30..fb89950b99c844f0ee51a86a7bab3f4da87f7a4d 100644 --- a/test/CodeGen/AArch64/xray-tail-call-sled.ll +++ b/test/CodeGen/AArch64/xray-tail-call-sled.ll @@ -29,10 +29,16 @@ define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-alway } ; CHECK: .p2align 4 ; CHECK-NEXT: .xword .Lxray_synthetic_0 +; CHECK-NEXT: .xword .Lxray_fn_idx_synth_0 ; CHECK-NEXT: .section xray_instr_map,{{.*}} ; CHECK-LABEL: Lxray_synthetic_0: ; CHECK: .xword .Lxray_sled_0 ; CHECK: .xword .Lxray_sled_1 +; CHECK-LABEL: Lxray_synthetic_end0: +; CHECK: .section xray_fn_idx,{{.*}} +; CHECK-LABEL: Lxray_fn_idx_synth_0: +; CHECK: .xword .Lxray_synthetic_0 +; CHECK-NEXT: .xword .Lxray_synthetic_end0 define i32 @caller() nounwind noinline uwtable "function-instrument"="xray-always" { ; CHECK: .p2align 2 @@ -63,7 +69,13 @@ define i32 @caller() nounwind noinline uwtable "function-instrument"="xray-alway } ; CHECK: .p2align 4 ; CHECK-NEXT: .xword .Lxray_synthetic_1 +; CHECK-NEXT: .xword .Lxray_fn_idx_synth_1 ; CHECK-NEXT: .section xray_instr_map,{{.*}} ; CHECK-LABEL: Lxray_synthetic_1: ; CHECK: .xword .Lxray_sled_2 ; CHECK: .xword .Lxray_sled_3 +; CHECK-LABEL: Lxray_synthetic_end1: +; CHECK: .section xray_fn_idx,{{.*}} +; CHECK-LABEL: Lxray_fn_idx_synth_1: +; CHECK: .xword .Lxray_synthetic_1 +; CHECK-NEXT: .xword .Lxray_synthetic_end1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll b/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll index 5b78009961335d860050fa6b8fa05bae0b8c79bd..cdfb667c26bd784bd823b8f0aa8f94cbc02a875b 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll @@ -6,7 +6,8 @@ ; Tests for add. ; CHECK: name: addi32 ; CHECK: {{%[0-9]+}}(s32) = G_ADD -define i32 @addi32(i32 %arg1, i32 %arg2) { +define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) { %res = add i32 %arg1, %arg2 - ret i32 %res + store i32 %res, i32 addrspace(1)* undef + ret void } diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir new file mode 100644 index 0000000000000000000000000000000000000000..f10c896a7af66d069a51c8bf8728f620882811ef --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -0,0 +1,22 @@ +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_add() { ret void } +... + +--- +name: test_add +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %vgpr0, %vgpr1 + ; CHECK-LABEL: name: test_add + ; CHECK: %2(s32) = G_ADD %0, %1 + + %0(s32) = COPY %vgpr0 + %1(s32) = COPY %vgpr1 + %2(s32) = G_ADD %0, %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir new file mode 100644 index 0000000000000000000000000000000000000000..b3e41c7751c53e0155283e52050a112a4f6eaa27 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir @@ -0,0 +1,49 @@ +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_constant() { + entry: + ret void + } + + define void @test_fconstant() { + entry: + ret void + } + + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 + + attributes #1 = { nounwind } + +... + +--- +name: test_constant +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_constant + ; CHECK: %0(s32) = G_CONSTANT i32 5 + ; CHECK: %1(s1) = G_CONSTANT i1 false + + %0(s32) = G_CONSTANT i32 5 + %1(s1) = G_CONSTANT i1 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %0, %0, %0, %0, %0, %1, %1; +... + +--- +name: test_fconstant +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0.entry: + ; CHECK-LABEL: name: test_fconstant + ; CHECK: %0(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: %1(s32) = G_FCONSTANT float 7.5 + + %0(s32) = G_FCONSTANT float 1.0 + %1(s32) = G_FCONSTANT float 7.5 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir new file mode 100644 index 0000000000000000000000000000000000000000..ebd473d769b37073b4d7e76634cd5445c079c8de --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -0,0 +1,24 @@ +# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_icmp() { + entry: + ret void + } +... + +--- +name: test_icmp +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0.entry: + liveins: %vgpr0 + %0(s32) = G_CONSTANT i32 0 + %1(s32) = COPY %vgpr0 + + ; CHECK: %2(s1) = G_ICMP intpred(ne), %0(s32), %1 + %2(s1) = G_ICMP intpred(ne), %0, %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir new file mode 100644 index 0000000000000000000000000000000000000000..d11130936bd9b3d7e2bfbcd961229138eed9c8af --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -0,0 +1,28 @@ +# RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s + +--- | + define void @test_select() { ret void } +... + +--- +name: test_select +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %vgpr0 + %0(s32) = G_CONSTANT i32 0 + %1(s32) = COPY %vgpr0 + + %2(s1) = G_ICMP intpred(ne), %0, %1 + %3(s32) = G_CONSTANT i32 1 + %4(s32) = G_CONSTANT i32 2 + ; CHECK: %5(s32) = G_SELECT %2(s1), %3, %4 + %5(s32) = G_SELECT %2, %3, %4 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/lit.local.cfg b/test/CodeGen/AMDGPU/GlobalISel/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..e99d1bb8446ce4ea3f995c967620e5163d400e52 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'global-isel' in config.root.available_features: + config.unsupported = True diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir index 3496b1ab71fe66ad28dbd7a3195d9e0e652cc263..902f1e6c67255e6bae3c6301abe0002a258a1e7f 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir @@ -24,8 +24,8 @@ legalized: true # CHECK-LABEL: name: load_constant # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: sgpr, preferred-register: '' } body: | bb.0: @@ -40,8 +40,8 @@ legalized: true # CHECK-LABEL: name: load_global_uniform # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: sgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: sgpr, preferred-register: '' } body: | bb.0: @@ -56,9 +56,9 @@ legalized: true # CHECK-LABEL: name: load_global_non_uniform # CHECK: registers: -# CHECK: - { id: 0, class: sgpr } -# CHECK: - { id: 1, class: vgpr } -# CHECK: - { id: 2, class: vgpr } +# CHECK: - { id: 0, class: sgpr, preferred-register: '' } +# CHECK: - { id: 1, class: vgpr, preferred-register: '' } +# CHECK: - { id: 2, class: vgpr, preferred-register: '' } body: | diff --git a/test/CodeGen/AMDGPU/add.i16.ll b/test/CodeGen/AMDGPU/add.i16.ll index b65e79f14deb71238a05ad350aaa308a6195b31d..bee13d8c17f1d34d4cf7fe3dfc2a95d4f08d8180 100644 --- a/test/CodeGen/AMDGPU/add.i16.ll +++ b/test/CodeGen/AMDGPU/add.i16.ll @@ -87,8 +87,7 @@ define amdgpu_kernel void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i1 ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] ; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]] -; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 -; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} +; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:{{[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} define amdgpu_kernel void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %tid diff --git a/test/CodeGen/AMDGPU/add.v2i16.ll b/test/CodeGen/AMDGPU/add.v2i16.ll index e137ef4bc23672555769fb1c53b9d3fba0b763dc..76f724c2b90bac74e93b3a53363369023a510ea7 100644 --- a/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/test/CodeGen/AMDGPU/add.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; FIXME: Need to handle non-uniform case for function below (load without gep). @@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i ; GFX9: s_load_dword [[VAL0:s[0-9]+]] ; GFX9: s_load_dword [[VAL1:s[0-9]+]] ; GFX9: v_mov_b32_e32 [[VVAL1:v[0-9]+]] -; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VVAL1]], [[VAL0]] +; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[VAL0]], [[VVAL1]] ; VI: s_add_i32 ; VI: s_add_i32 @@ -50,7 +50,7 @@ define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, < ; FIXME: VI should not scalarize arg access. ; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg: -; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} +; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; VI: v_add_i32 ; VI: v_add_i32_sdwa @@ -62,10 +62,11 @@ define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out ; GCN-LABEL: {{^}}v_test_add_v2i16_constant: ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0x1c8007b{{$}} -; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}} +; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x7b, v{{[0-9]+}} -; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x1c8, v{{[0-9]+}} +; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0x1c8 +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -79,10 +80,11 @@ define amdgpu_kernel void @v_test_add_v2i16_constant(<2 x i16> addrspace(1)* %ou ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_v2i16_neg_constant: ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}} -; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}} +; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffcb3, v{{[0-9]+}} -; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffc21, v{{[0-9]+}} +; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], 0xfffffc21 +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -96,11 +98,11 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)* ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1: ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}} +; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]] ; VI: flat_load_ushort [[LOAD1:v[0-9]+]] -; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD0]] +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD1]] -; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -114,7 +116,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(<2 x i16> addrspace(1)* ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_lo_zero_hi: ; GFX9: s_mov_b32 [[K:s[0-9]+]], 32{{$}} -; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}} +; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}} ; VI-NOT: v_add_u16 ; VI: v_add_u16_e32 v{{[0-9]+}}, 32, v{{[0-9]+}} @@ -134,12 +136,12 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_lo_zero_hi(<2 x i16> addrspac ; The high element gives fp ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_fp_split: ; GFX9: s_mov_b32 [[K:s[0-9]+]], 1.0 -; GFX9: v_pk_add_u16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}} +; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]]{{$}} ; VI-NOT: v_add_u16 -; VI: v_add_u16_e32 v{{[0-9]+}}, 0x3f80, v{{[0-9]+}} +; VI: v_mov_b32_e32 v[[K:[0-9]+]], 0x3f80 +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_add_u16 -; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -191,21 +193,19 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1) ; GFX9: flat_load_dword [[A:v[0-9]+]] ; GFX9: flat_load_dword [[B:v[0-9]+]] -; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] ; GFX9: buffer_store_dwordx4 +; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI: flat_load_ushort v[[A_LO:[0-9]+]] ; VI: flat_load_ushort v[[A_HI:[0-9]+]] ; VI: flat_load_ushort v[[B_LO:[0-9]+]] ; VI: flat_load_ushort v[[B_HI:[0-9]+]] -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; VI: v_add_u16_e32 -; VI: v_add_u16_e32 +; VI-DAG: v_add_u16_e32 +; VI-DAG: v_add_u16_e32 ; VI: buffer_store_dwordx4 define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll index 6ec93c72ec527d4ad0f51570bd46cd8349031ce2..b1e71722d80c5c2193ac41b2d2115c367364b449 100644 --- a/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=HSA -check-prefix=GFX9 %s ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: ; HSA: enable_sgpr_private_segment_buffer = 1 @@ -223,9 +223,8 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { } ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: -; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}} -; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} -; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)* store volatile i32 7, i32* %cast diff --git a/test/CodeGen/AMDGPU/always-uniform.ll b/test/CodeGen/AMDGPU/always-uniform.ll new file mode 100644 index 0000000000000000000000000000000000000000..4ba57fba81bc059e43c897dea4fd5b7470c269fa --- /dev/null +++ b/test/CodeGen/AMDGPU/always-uniform.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.workitem.id.x() +declare i32 @llvm.amdgcn.readfirstlane(i32) + +; GCN-LABEL: readfirstlane_uniform +; GCN: s_load_dwordx2 s{{\[}}[[IN_ADDR:[0-9]+]]:1{{\]}}, s[4:5], 0x0 +; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0 +; GCN: s_add_u32 s[[LOAD_ADDR:[0-9]+]], s[[IN_ADDR]], s[[SCALAR]] +; GCN: s_load_dword s{{[0-9]+}}, s{{\[}}[[LOAD_ADDR]] + +define amdgpu_kernel void @readfirstlane_uniform(float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly) { + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() + %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid) + %idx = zext i32 %scalar to i64 + %gep0 = getelementptr inbounds float, float addrspace(1)* %0, i64 %idx + %val = load float, float addrspace(1)* %gep0, align 4 + %gep1 = getelementptr inbounds float, float addrspace(1)* %1, i64 10 + store float %val, float addrspace(1)* %gep1, align 4 + ret void +} diff --git a/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll index 95a206e1dd00d3910c42f8faad0993e2002677a3..8e5a512dd3c9178f9ac25ea3fd14f010ecd83650 100644 --- a/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll +++ b/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll @@ -3,333 +3,358 @@ ; GCN-LABEL: @add_i3( ; SI: %r = add i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @add_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @add_i3(i3 %a, i3 %b) { %r = add i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nsw_i3( ; SI: %r = add nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @add_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @add_nsw_i3(i3 %a, i3 %b) { %r = add nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_i3( ; SI: %r = add nuw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @add_nuw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @add_nuw_i3(i3 %a, i3 %b) { %r = add nuw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_nsw_i3( ; SI: %r = add nuw nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @add_nuw_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @add_nuw_nsw_i3(i3 %a, i3 %b) { %r = add nuw nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_i3( ; SI: %r = sub i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @sub_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @sub_i3(i3 %a, i3 %b) { %r = sub i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nsw_i3( ; SI: %r = sub nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @sub_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @sub_nsw_i3(i3 %a, i3 %b) { %r = sub nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_i3( ; SI: %r = sub nuw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @sub_nuw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @sub_nuw_i3(i3 %a, i3 %b) { %r = sub nuw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_nsw_i3( ; SI: %r = sub nuw nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @sub_nuw_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @sub_nuw_nsw_i3(i3 %a, i3 %b) { %r = sub nuw nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_i3( ; SI: %r = mul i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @mul_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @mul_i3(i3 %a, i3 %b) { %r = mul i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nsw_i3( ; SI: %r = mul nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @mul_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @mul_nsw_i3(i3 %a, i3 %b) { %r = mul nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_i3( ; SI: %r = mul nuw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @mul_nuw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @mul_nuw_i3(i3 %a, i3 %b) { %r = mul nuw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_nsw_i3( ; SI: %r = mul nuw nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @mul_nuw_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @mul_nuw_nsw_i3(i3 %a, i3 %b) { %r = mul nuw nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @urem_i3( ; SI: %r = urem i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @urem_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @urem_i3(i3 %a, i3 %b) { %r = urem i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @srem_i3( ; SI: %r = srem i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @srem_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @srem_i3(i3 %a, i3 %b) { %r = srem i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_i3( ; SI: %r = shl i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @shl_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @shl_i3(i3 %a, i3 %b) { %r = shl i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nsw_i3( ; SI: %r = shl nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @shl_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @shl_nsw_i3(i3 %a, i3 %b) { %r = shl nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_i3( ; SI: %r = shl nuw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @shl_nuw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @shl_nuw_i3(i3 %a, i3 %b) { %r = shl nuw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_nsw_i3( ; SI: %r = shl nuw nsw i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @shl_nuw_nsw_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @shl_nuw_nsw_i3(i3 %a, i3 %b) { %r = shl nuw nsw i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_i3( ; SI: %r = lshr i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @lshr_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @lshr_i3(i3 %a, i3 %b) { %r = lshr i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_exact_i3( ; SI: %r = lshr exact i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @lshr_exact_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @lshr_exact_i3(i3 %a, i3 %b) { %r = lshr exact i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_i3( ; SI: %r = ashr i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @ashr_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @ashr_i3(i3 %a, i3 %b) { %r = ashr i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_exact_i3( ; SI: %r = ashr exact i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = sext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @ashr_exact_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @ashr_exact_i3(i3 %a, i3 %b) { %r = ashr exact i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @and_i3( ; SI: %r = and i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @and_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @and_i3(i3 %a, i3 %b) { %r = and i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @or_i3( ; SI: %r = or i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @or_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @or_i3(i3 %a, i3 %b) { %r = or i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @xor_i3( ; SI: %r = xor i3 %a, %b -; SI-NEXT: ret i3 %r +; SI-NEXT: store volatile i3 %r ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[R_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @xor_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @xor_i3(i3 %a, i3 %b) { %r = xor i3 %a, %b - ret i3 %r + store volatile i3 %r, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_eq_i3( ; SI: %cmp = icmp eq i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]] @@ -337,17 +362,18 @@ define i3 @xor_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_eq_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_eq_i3(i3 %a, i3 %b) { %cmp = icmp eq i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ne_i3( ; SI: %cmp = icmp ne i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]] @@ -355,17 +381,18 @@ define i3 @select_eq_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_ne_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_ne_i3(i3 %a, i3 %b) { %cmp = icmp ne i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ugt_i3( ; SI: %cmp = icmp ugt i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]] @@ -373,17 +400,18 @@ define i3 @select_ne_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_ugt_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_ugt_i3(i3 %a, i3 %b) { %cmp = icmp ugt i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_uge_i3( ; SI: %cmp = icmp uge i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]] @@ -391,17 +419,18 @@ define i3 @select_ugt_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_uge_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_uge_i3(i3 %a, i3 %b) { %cmp = icmp uge i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ult_i3( ; SI: %cmp = icmp ult i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]] @@ -409,17 +438,18 @@ define i3 @select_uge_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_ult_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_ult_i3(i3 %a, i3 %b) { %cmp = icmp ult i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ule_i3( ; SI: %cmp = icmp ule i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]] @@ -427,17 +457,18 @@ define i3 @select_ult_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_ule_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_ule_i3(i3 %a, i3 %b) { %cmp = icmp ule i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sgt_i3( ; SI: %cmp = icmp sgt i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]] @@ -445,17 +476,18 @@ define i3 @select_ule_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_sgt_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_sgt_i3(i3 %a, i3 %b) { %cmp = icmp sgt i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sge_i3( ; SI: %cmp = icmp sge i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]] @@ -463,17 +495,18 @@ define i3 @select_sgt_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_sge_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_sge_i3(i3 %a, i3 %b) { %cmp = icmp sge i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_slt_i3( ; SI: %cmp = icmp slt i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]] @@ -481,17 +514,18 @@ define i3 @select_sge_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_slt_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_slt_i3(i3 %a, i3 %b) { %cmp = icmp slt i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sle_i3( ; SI: %cmp = icmp sle i3 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i3 %a, i3 %b -; SI-NEXT: ret i3 %sel +; SI-NEXT: store volatile i3 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i3 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]] @@ -499,384 +533,415 @@ define i3 @select_slt_i3(i3 %a, i3 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i3 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_3:[0-9]+]] = trunc i32 %[[SEL_32]] to i3 -; VI-NEXT: ret i3 %[[SEL_3]] -define i3 @select_sle_i3(i3 %a, i3 %b) { +; VI-NEXT: store volatile i3 %[[SEL_3]] +define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) { %cmp = icmp sle i3 %a, %b %sel = select i1 %cmp, i3 %a, i3 %b - ret i3 %sel + store volatile i3 %sel, i3 addrspace(1)* undef + ret void } declare i3 @llvm.bitreverse.i3(i3) ; GCN-LABEL: @bitreverse_i3( ; SI: %brev = call i3 @llvm.bitreverse.i3(i3 %a) -; SI-NEXT: ret i3 %brev +; SI-NEXT: store volatile i3 %brev ; VI: %[[A_32:[0-9]+]] = zext i3 %a to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]]) ; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 29 ; VI-NEXT: %[[R_3:[0-9]+]] = trunc i32 %[[S_32]] to i3 -; VI-NEXT: ret i3 %[[R_3]] -define i3 @bitreverse_i3(i3 %a) { +; VI-NEXT: store volatile i3 %[[R_3]] +define amdgpu_kernel void @bitreverse_i3(i3 %a) { %brev = call i3 @llvm.bitreverse.i3(i3 %a) - ret i3 %brev + store volatile i3 %brev, i3 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_i16( ; SI: %r = add i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @add_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @add_i16(i16 %a, i16 %b) { %r = add i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @constant_add_i16( -; VI: ret i16 3 -define i16 @constant_add_i16() { +; VI: store volatile i16 3 +define amdgpu_kernel void @constant_add_i16() { %r = add i16 1, 2 - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @constant_add_nsw_i16( -; VI: ret i16 3 -define i16 @constant_add_nsw_i16() { +; VI: store volatile i16 3 +define amdgpu_kernel void @constant_add_nsw_i16() { %r = add nsw i16 1, 2 - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @constant_add_nuw_i16( -; VI: ret i16 3 -define i16 @constant_add_nuw_i16() { +; VI: store volatile i16 3 +define amdgpu_kernel void @constant_add_nuw_i16() { %r = add nsw i16 1, 2 - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nsw_i16( ; SI: %r = add nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @add_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @add_nsw_i16(i16 %a, i16 %b) { %r = add nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_i16( ; SI: %r = add nuw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @add_nuw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @add_nuw_i16(i16 %a, i16 %b) { %r = add nuw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_nsw_i16( ; SI: %r = add nuw nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @add_nuw_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @add_nuw_nsw_i16(i16 %a, i16 %b) { %r = add nuw nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_i16( ; SI: %r = sub i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @sub_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @sub_i16(i16 %a, i16 %b) { %r = sub i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nsw_i16( ; SI: %r = sub nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @sub_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @sub_nsw_i16(i16 %a, i16 %b) { %r = sub nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_i16( ; SI: %r = sub nuw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @sub_nuw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @sub_nuw_i16(i16 %a, i16 %b) { %r = sub nuw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_nsw_i16( ; SI: %r = sub nuw nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @sub_nuw_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @sub_nuw_nsw_i16(i16 %a, i16 %b) { %r = sub nuw nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_i16( ; SI: %r = mul i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @mul_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @mul_i16(i16 %a, i16 %b) { %r = mul i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nsw_i16( ; SI: %r = mul nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @mul_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @mul_nsw_i16(i16 %a, i16 %b) { %r = mul nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_i16( ; SI: %r = mul nuw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @mul_nuw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @mul_nuw_i16(i16 %a, i16 %b) { %r = mul nuw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_nsw_i16( ; SI: %r = mul nuw nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @mul_nuw_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @mul_nuw_nsw_i16(i16 %a, i16 %b) { %r = mul nuw nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @urem_i16( ; SI: %r = urem i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = urem i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @urem_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @urem_i16(i16 %a, i16 %b) { %r = urem i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @srem_i16( ; SI: %r = srem i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = srem i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @srem_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @srem_i16(i16 %a, i16 %b) { %r = srem i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_i16( ; SI: %r = shl i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @shl_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @shl_i16(i16 %a, i16 %b) { %r = shl i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nsw_i16( ; SI: %r = shl nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @shl_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @shl_nsw_i16(i16 %a, i16 %b) { %r = shl nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_i16( ; SI: %r = shl nuw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @shl_nuw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @shl_nuw_i16(i16 %a, i16 %b) { %r = shl nuw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_nsw_i16( ; SI: %r = shl nuw nsw i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @shl_nuw_nsw_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @shl_nuw_nsw_i16(i16 %a, i16 %b) { %r = shl nuw nsw i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_i16( ; SI: %r = lshr i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @lshr_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @lshr_i16(i16 %a, i16 %b) { %r = lshr i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_exact_i16( ; SI: %r = lshr exact i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @lshr_exact_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @lshr_exact_i16(i16 %a, i16 %b) { %r = lshr exact i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_i16( ; SI: %r = ashr i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @ashr_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @ashr_i16(i16 %a, i16 %b) { %r = ashr i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_exact_i16( ; SI: %r = ashr exact i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = sext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @ashr_exact_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @ashr_exact_i16(i16 %a, i16 %b) { %r = ashr exact i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @constant_lshr_exact_i16( -; VI: ret i16 2 -define i16 @constant_lshr_exact_i16(i16 %a, i16 %b) { +; VI: store volatile i16 2 +define amdgpu_kernel void @constant_lshr_exact_i16(i16 %a, i16 %b) { %r = lshr exact i16 4, 1 - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @and_i16( ; SI: %r = and i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = and i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @and_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @and_i16(i16 %a, i16 %b) { %r = and i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @or_i16( ; SI: %r = or i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = or i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @or_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @or_i16(i16 %a, i16 %b) { %r = or i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @xor_i16( ; SI: %r = xor i16 %a, %b -; SI-NEXT: ret i16 %r +; SI-NEXT: store volatile i16 %r ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = xor i32 %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[R_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @xor_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @xor_i16(i16 %a, i16 %b) { %r = xor i16 %a, %b - ret i16 %r + store volatile i16 %r, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_eq_i16( ; SI: %cmp = icmp eq i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq i32 %[[A_32_0]], %[[B_32_0]] @@ -884,17 +949,18 @@ define i16 @xor_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_eq_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_eq_i16(i16 %a, i16 %b) { %cmp = icmp eq i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ne_i16( ; SI: %cmp = icmp ne i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne i32 %[[A_32_0]], %[[B_32_0]] @@ -902,17 +968,18 @@ define i16 @select_eq_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_ne_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_ne_i16(i16 %a, i16 %b) { %cmp = icmp ne i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ugt_i16( ; SI: %cmp = icmp ugt i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt i32 %[[A_32_0]], %[[B_32_0]] @@ -920,17 +987,18 @@ define i16 @select_ne_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_ugt_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_ugt_i16(i16 %a, i16 %b) { %cmp = icmp ugt i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_uge_i16( ; SI: %cmp = icmp uge i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge i32 %[[A_32_0]], %[[B_32_0]] @@ -938,17 +1006,18 @@ define i16 @select_ugt_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_uge_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_uge_i16(i16 %a, i16 %b) { %cmp = icmp uge i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ult_i16( ; SI: %cmp = icmp ult i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult i32 %[[A_32_0]], %[[B_32_0]] @@ -956,17 +1025,18 @@ define i16 @select_uge_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_ult_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_ult_i16(i16 %a, i16 %b) { %cmp = icmp ult i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ule_i16( ; SI: %cmp = icmp ule i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule i32 %[[A_32_0]], %[[B_32_0]] @@ -974,17 +1044,18 @@ define i16 @select_ult_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_ule_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_ule_i16(i16 %a, i16 %b) { %cmp = icmp ule i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sgt_i16( ; SI: %cmp = icmp sgt i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt i32 %[[A_32_0]], %[[B_32_0]] @@ -992,17 +1063,18 @@ define i16 @select_ule_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_sgt_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_sgt_i16(i16 %a, i16 %b) { %cmp = icmp sgt i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sge_i16( ; SI: %cmp = icmp sge i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge i32 %[[A_32_0]], %[[B_32_0]] @@ -1010,17 +1082,18 @@ define i16 @select_sgt_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_sge_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_sge_i16(i16 %a, i16 %b) { %cmp = icmp sge i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_slt_i16( ; SI: %cmp = icmp slt i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt i32 %[[A_32_0]], %[[B_32_0]] @@ -1028,17 +1101,18 @@ define i16 @select_sge_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_slt_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_slt_i16(i16 %a, i16 %b) { %cmp = icmp slt i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sle_i16( ; SI: %cmp = icmp sle i16 %a, %b ; SI-NEXT: %sel = select i1 %cmp, i16 %a, i16 %b -; SI-NEXT: ret i16 %sel +; SI-NEXT: store volatile i16 %sel ; VI: %[[A_32_0:[0-9]+]] = sext i16 %a to i32 ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle i32 %[[A_32_0]], %[[B_32_0]] @@ -1046,356 +1120,384 @@ define i16 @select_slt_i16(i16 %a, i16 %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext i16 %b to i32 ; VI-NEXT: %[[SEL_32:[0-9]+]] = select i1 %[[CMP]], i32 %[[A_32_1]], i32 %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc i32 %[[SEL_32]] to i16 -; VI-NEXT: ret i16 %[[SEL_16]] -define i16 @select_sle_i16(i16 %a, i16 %b) { +; VI-NEXT: store volatile i16 %[[SEL_16]] +define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) { %cmp = icmp sle i16 %a, %b %sel = select i1 %cmp, i16 %a, i16 %b - ret i16 %sel + store volatile i16 %sel, i16 addrspace(1)* undef + ret void } declare i16 @llvm.bitreverse.i16(i16) + ; GCN-LABEL: @bitreverse_i16( ; SI: %brev = call i16 @llvm.bitreverse.i16(i16 %a) -; SI-NEXT: ret i16 %brev +; SI-NEXT: store volatile i16 %brev ; VI: %[[A_32:[0-9]+]] = zext i16 %a to i32 ; VI-NEXT: %[[R_32:[0-9]+]] = call i32 @llvm.bitreverse.i32(i32 %[[A_32]]) ; VI-NEXT: %[[S_32:[0-9]+]] = lshr i32 %[[R_32]], 16 ; VI-NEXT: %[[R_16:[0-9]+]] = trunc i32 %[[S_32]] to i16 -; VI-NEXT: ret i16 %[[R_16]] -define i16 @bitreverse_i16(i16 %a) { +; VI-NEXT: store volatile i16 %[[R_16]] +define amdgpu_kernel void @bitreverse_i16(i16 %a) { %brev = call i16 @llvm.bitreverse.i16(i16 %a) - ret i16 %brev + store volatile i16 %brev, i16 addrspace(1)* undef + ret void } ; GCN-LABEL: @add_3xi15( ; SI: %r = add <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @add_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @add_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = add <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nsw_3xi15( ; SI: %r = add nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @add_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = add nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_3xi15( ; SI: %r = add nuw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @add_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = add nuw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_nsw_3xi15( ; SI: %r = add nuw nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @add_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = add nuw nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_3xi15( ; SI: %r = sub <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @sub_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @sub_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = sub <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nsw_3xi15( ; SI: %r = sub nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @sub_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = sub nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_3xi15( ; SI: %r = sub nuw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @sub_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = sub nuw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_nsw_3xi15( ; SI: %r = sub nuw nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @sub_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = sub nuw nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_3xi15( ; SI: %r = mul <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @mul_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @mul_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = mul <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nsw_3xi15( ; SI: %r = mul nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @mul_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = mul nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_3xi15( ; SI: %r = mul nuw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @mul_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = mul nuw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_nsw_3xi15( ; SI: %r = mul nuw nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @mul_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = mul nuw nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @urem_3xi15( ; SI: %r = urem <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @urem_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @urem_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = urem <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @srem_3xi15( ; SI: %r = srem <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @srem_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @srem_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = srem <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_3xi15( ; SI: %r = shl <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @shl_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @shl_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = shl <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nsw_3xi15( ; SI: %r = shl nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @shl_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = shl nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_3xi15( ; SI: %r = shl nuw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @shl_nuw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = shl nuw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_nsw_3xi15( ; SI: %r = shl nuw nsw <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @shl_nuw_nsw_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = shl nuw nsw <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_3xi15( ; SI: %r = lshr <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @lshr_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = lshr <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_exact_3xi15( ; SI: %r = lshr exact <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @lshr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = lshr exact <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_3xi15( ; SI: %r = ashr <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @ashr_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = ashr <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_exact_3xi15( ; SI: %r = ashr exact <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @ashr_exact_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = ashr exact <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @and_3xi15( ; SI: %r = and <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @and_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @and_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = and <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @or_3xi15( ; SI: %r = or <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @or_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @or_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = or <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @xor_3xi15( ; SI: %r = xor <3 x i15> %a, %b -; SI-NEXT: ret <3 x i15> %r +; SI-NEXT: store volatile <3 x i15> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @xor_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @xor_3xi15(<3 x i15> %a, <3 x i15> %b) { %r = xor <3 x i15> %a, %b - ret <3 x i15> %r + store volatile <3 x i15> %r, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_eq_3xi15( ; SI: %cmp = icmp eq <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1403,17 +1505,18 @@ define <3 x i15> @xor_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp eq <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ne_3xi15( ; SI: %cmp = icmp ne <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1421,17 +1524,18 @@ define <3 x i15> @select_eq_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp ne <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ugt_3xi15( ; SI: %cmp = icmp ugt <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1439,17 +1543,18 @@ define <3 x i15> @select_ne_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp ugt <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_uge_3xi15( ; SI: %cmp = icmp uge <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1457,17 +1562,18 @@ define <3 x i15> @select_ugt_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp uge <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ult_3xi15( ; SI: %cmp = icmp ult <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1475,17 +1581,18 @@ define <3 x i15> @select_uge_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp ult <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ule_3xi15( ; SI: %cmp = icmp ule <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1493,17 +1600,18 @@ define <3 x i15> @select_ult_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp ule <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sgt_3xi15( ; SI: %cmp = icmp sgt <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1511,17 +1619,18 @@ define <3 x i15> @select_ule_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp sgt <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sge_3xi15( ; SI: %cmp = icmp sge <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1529,17 +1638,18 @@ define <3 x i15> @select_sgt_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp sge <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_slt_3xi15( ; SI: %cmp = icmp slt <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1547,17 +1657,18 @@ define <3 x i15> @select_sge_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp slt <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sle_3xi15( ; SI: %cmp = icmp sle <3 x i15> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b -; SI-NEXT: ret <3 x i15> %sel +; SI-NEXT: store volatile <3 x i15> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1565,356 +1676,383 @@ define <3 x i15> @select_slt_3xi15(<3 x i15> %a, <3 x i15> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i15> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_15:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[SEL_15]] -define <3 x i15> @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) { +; VI-NEXT: store volatile <3 x i15> %[[SEL_15]] +define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) { %cmp = icmp sle <3 x i15> %a, %b %sel = select <3 x i1> %cmp, <3 x i15> %a, <3 x i15> %b - ret <3 x i15> %sel + store volatile <3 x i15> %sel, <3 x i15> addrspace(1)* undef + ret void } declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>) ; GCN-LABEL: @bitreverse_3xi15( ; SI: %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a) -; SI-NEXT: ret <3 x i15> %brev +; SI-NEXT: store volatile <3 x i15> %brev ; VI: %[[A_32:[0-9]+]] = zext <3 x i15> %a to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]]) ; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], ; VI-NEXT: %[[R_15:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i15> -; VI-NEXT: ret <3 x i15> %[[R_15]] -define <3 x i15> @bitreverse_3xi15(<3 x i15> %a) { +; VI-NEXT: store volatile <3 x i15> %[[R_15]] +define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) { %brev = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> %a) - ret <3 x i15> %brev + store volatile <3 x i15> %brev, <3 x i15> addrspace(1)* undef + ret void } ; GCN-LABEL: @add_3xi16( ; SI: %r = add <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @add_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @add_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nsw_3xi16( ; SI: %r = add nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @add_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_3xi16( ; SI: %r = add nuw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @add_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add nuw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @add_nuw_nsw_3xi16( ; SI: %r = add nuw nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = add nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @add_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = add nuw nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_3xi16( ; SI: %r = sub <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @sub_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @sub_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nsw_3xi16( ; SI: %r = sub nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @sub_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_3xi16( ; SI: %r = sub nuw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @sub_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub nuw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @sub_nuw_nsw_3xi16( ; SI: %r = sub nuw nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = sub nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @sub_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = sub nuw nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_3xi16( ; SI: %r = mul <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @mul_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @mul_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nsw_3xi16( ; SI: %r = mul nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @mul_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_3xi16( ; SI: %r = mul nuw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @mul_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul nuw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @mul_nuw_nsw_3xi16( ; SI: %r = mul nuw nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = mul nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @mul_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = mul nuw nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @urem_3xi16( ; SI: %r = urem <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = urem <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @urem_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @urem_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = urem <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @srem_3xi16( ; SI: %r = srem <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = srem <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @srem_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @srem_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = srem <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_3xi16( ; SI: %r = shl <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @shl_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @shl_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nsw_3xi16( ; SI: %r = shl nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @shl_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_3xi16( ; SI: %r = shl nuw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @shl_nuw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl nuw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @shl_nuw_nsw_3xi16( ; SI: %r = shl nuw nsw <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = shl nuw nsw <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @shl_nuw_nsw_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = shl nuw nsw <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_3xi16( ; SI: %r = lshr <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = lshr <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @lshr_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = lshr <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @lshr_exact_3xi16( ; SI: %r = lshr exact <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = lshr exact <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @lshr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = lshr exact <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_3xi16( ; SI: %r = ashr <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = ashr <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @ashr_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = ashr <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @ashr_exact_3xi16( ; SI: %r = ashr exact <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = sext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = ashr exact <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @ashr_exact_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = ashr exact <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @and_3xi16( ; SI: %r = and <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = and <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @and_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @and_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = and <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @or_3xi16( ; SI: %r = or <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = or <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @or_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @or_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = or <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @xor_3xi16( ; SI: %r = xor <3 x i16> %a, %b -; SI-NEXT: ret <3 x i16> %r +; SI-NEXT: store volatile <3 x i16> %r ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = xor <3 x i32> %[[A_32]], %[[B_32]] ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[R_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @xor_3xi16(<3 x i16> %a, <3 x i16> %b) { %r = xor <3 x i16> %a, %b - ret <3 x i16> %r + store volatile <3 x i16> %r, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_eq_3xi16( ; SI: %cmp = icmp eq <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp eq <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1922,17 +2060,18 @@ define <3 x i16> @xor_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp eq <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ne_3xi16( ; SI: %cmp = icmp ne <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ne <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1940,17 +2079,18 @@ define <3 x i16> @select_eq_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ne <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ugt_3xi16( ; SI: %cmp = icmp ugt <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ugt <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1958,17 +2098,18 @@ define <3 x i16> @select_ne_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ugt <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_uge_3xi16( ; SI: %cmp = icmp uge <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp uge <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1976,17 +2117,18 @@ define <3 x i16> @select_ugt_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp uge <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ult_3xi16( ; SI: %cmp = icmp ult <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ult <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -1994,17 +2136,18 @@ define <3 x i16> @select_uge_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ult <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_ule_3xi16( ; SI: %cmp = icmp ule <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp ule <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -2012,17 +2155,18 @@ define <3 x i16> @select_ult_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = zext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp ule <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sgt_3xi16( ; SI: %cmp = icmp sgt <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sgt <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -2030,17 +2174,18 @@ define <3 x i16> @select_ule_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp sgt <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sge_3xi16( ; SI: %cmp = icmp sge <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sge <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -2048,17 +2193,18 @@ define <3 x i16> @select_sgt_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp sge <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_slt_3xi16( ; SI: %cmp = icmp slt <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp slt <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -2066,17 +2212,18 @@ define <3 x i16> @select_sge_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp slt <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } ; GCN-LABEL: @select_sle_3xi16( ; SI: %cmp = icmp sle <3 x i16> %a, %b ; SI-NEXT: %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b -; SI-NEXT: ret <3 x i16> %sel +; SI-NEXT: store volatile <3 x i16> %sel ; VI: %[[A_32_0:[0-9]+]] = sext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[B_32_0:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[CMP:[0-9]+]] = icmp sle <3 x i32> %[[A_32_0]], %[[B_32_0]] @@ -2084,23 +2231,26 @@ define <3 x i16> @select_slt_3xi16(<3 x i16> %a, <3 x i16> %b) { ; VI-NEXT: %[[B_32_1:[0-9]+]] = sext <3 x i16> %b to <3 x i32> ; VI-NEXT: %[[SEL_32:[0-9]+]] = select <3 x i1> %[[CMP]], <3 x i32> %[[A_32_1]], <3 x i32> %[[B_32_1]] ; VI-NEXT: %[[SEL_16:[0-9]+]] = trunc <3 x i32> %[[SEL_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[SEL_16]] -define <3 x i16> @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) { +; VI-NEXT: store volatile <3 x i16> %[[SEL_16]] +define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) { %cmp = icmp sle <3 x i16> %a, %b %sel = select <3 x i1> %cmp, <3 x i16> %a, <3 x i16> %b - ret <3 x i16> %sel + store volatile <3 x i16> %sel, <3 x i16> addrspace(1)* undef + ret void } declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>) + ; GCN-LABEL: @bitreverse_3xi16( ; SI: %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a) -; SI-NEXT: ret <3 x i16> %brev +; SI-NEXT: store volatile <3 x i16> %brev ; VI: %[[A_32:[0-9]+]] = zext <3 x i16> %a to <3 x i32> ; VI-NEXT: %[[R_32:[0-9]+]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> %[[A_32]]) ; VI-NEXT: %[[S_32:[0-9]+]] = lshr <3 x i32> %[[R_32]], ; VI-NEXT: %[[R_16:[0-9]+]] = trunc <3 x i32> %[[S_32]] to <3 x i16> -; VI-NEXT: ret <3 x i16> %[[R_16]] -define <3 x i16> @bitreverse_3xi16(<3 x i16> %a) { +; VI-NEXT: store volatile <3 x i16> %[[R_16]] +define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) { %brev = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> %a) - ret <3 x i16> %brev + store volatile <3 x i16> %brev, <3 x i16> addrspace(1)* undef + ret void } diff --git a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index 97cb9067f29a977c8717b1901e69f2e7d8bbddff..1f4b1eaa209a0f7ddffe614b7a43e4f6bca32bab 100644 --- a/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -27,8 +27,6 @@ ; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120 ; HSA-PROMOTE: .end_amd_kernel_code_t -; FIXME: These should be merged -; HSA-PROMOTE: s_load_dword s{{[0-9]+}}, s[4:5], 0x1 ; HSA-PROMOTE: s_load_dword s{{[0-9]+}}, s[4:5], 0x2 ; SI-PROMOTE: ds_write_b32 @@ -58,9 +56,9 @@ ; HSAOPT: [[LDZU:%[0-9]+]] = load i32, i32 addrspace(2)* [[GEP1]], align 4, !range !1, !invariant.load !0 ; HSAOPT: [[EXTRACTY:%[0-9]+]] = lshr i32 [[LDXY]], 16 -; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !1 -; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !1 -; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !1 +; HSAOPT: [[WORKITEM_ID_X:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.x(), !range !2 +; HSAOPT: [[WORKITEM_ID_Y:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.y(), !range !2 +; HSAOPT: [[WORKITEM_ID_Z:%[0-9]+]] = call i32 @llvm.amdgcn.workitem.id.z(), !range !2 ; HSAOPT: [[Y_SIZE_X_Z_SIZE:%[0-9]+]] = mul nuw nsw i32 [[EXTRACTY]], [[LDZU]] ; HSAOPT: [[YZ_X_XID:%[0-9]+]] = mul i32 [[Y_SIZE_X_Z_SIZE]], [[WORKITEM_ID_X]] @@ -77,9 +75,9 @@ ; NOHSAOPT: call i32 @llvm.r600.read.local.size.y(), !range !0 ; NOHSAOPT: call i32 @llvm.r600.read.local.size.z(), !range !0 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !0 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !0 -; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !0 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.x(), !range !1 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.y(), !range !1 +; NOHSAOPT: call i32 @llvm.amdgcn.workitem.id.z(), !range !1 define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4 @@ -557,6 +555,8 @@ entry: attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" } ; HSAOPT: !0 = !{} -; HSAOPT: !1 = !{i32 0, i32 2048} +; HSAOPT: !1 = !{i32 0, i32 257} +; HSAOPT: !2 = !{i32 0, i32 256} -; NOHSAOPT: !0 = !{i32 0, i32 2048} +; NOHSAOPT: !0 = !{i32 0, i32 257} +; NOHSAOPT: !1 = !{i32 0, i32 256} diff --git a/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index e2620ce353c607f042d93bdc75373bb4cd7c4889..f7461b925ca15da8dafad898363479c6a07f3d49 100644 --- a/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -221,10 +221,10 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* ret void } -attributes #0 = { nounwind readnone } +attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } -; HSA: attributes #0 = { nounwind readnone } +; HSA: attributes #0 = { nounwind readnone speculatable } ; HSA: attributes #1 = { nounwind } ; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" } ; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" } diff --git a/test/CodeGen/AMDGPU/ashr.v2i16.ll b/test/CodeGen/AMDGPU/ashr.v2i16.ll index 96a5e3b23758a68ae22187929f326c357e112abb..dd96e6264418ae49033197e5d18b3a51ebfe54ef 100644 --- a/test/CodeGen/AMDGPU/ashr.v2i16.ll +++ b/test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s @@ -9,7 +9,7 @@ ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[VLHS]] ; VI: v_ashrrev_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_ashrrev_i32_e32 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll index ac2f7b4a4a4b343802288b455520c22387a59066..822ea803194d8683266c96268895a1c48f6b96b2 100644 --- a/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ b/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -39,44 +39,49 @@ define amdgpu_kernel void @max_9_sgprs(i32 addrspace(1)* %out1, ; features when the number of registers is frozen), this ends up using ; more than expected. -; ALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs: -; TOSGPR: SGPRBlocks: 1 -; TOSGPR: NumSGPRsForWavesPerEU: 16 +; XALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs: +; XTOSGPR: SGPRBlocks: 1 +; XTOSGPR: NumSGPRsForWavesPerEU: 16 -; TOSMEM: s_mov_b64 s[10:11], s[2:3] -; TOSMEM: s_mov_b64 s[8:9], s[0:1] -; TOSMEM: s_mov_b32 s7, s13 +; XTOSMEM: s_mov_b64 s[10:11], s[2:3] +; XTOSMEM: s_mov_b64 s[8:9], s[0:1] +; XTOSMEM: s_mov_b32 s7, s13 -; TOSMEM: SGPRBlocks: 1 -; TOSMEM: NumSGPRsForWavesPerEU: 16 -define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, - i32 addrspace(1)* %out2, - i32 addrspace(1)* %out3, - i32 addrspace(1)* %out4, - i32 %one, i32 %two, i32 %three, i32 %four) #2 { - %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() - %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() - %x.2 = call i32 @llvm.amdgcn.workgroup.id.z() - %x.3 = call i64 @llvm.amdgcn.dispatch.id() - %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() - %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() - store volatile i32 0, i32* undef - br label %stores - -stores: - store volatile i32 %x.0, i32 addrspace(1)* undef - store volatile i32 %x.0, i32 addrspace(1)* undef - store volatile i32 %x.0, i32 addrspace(1)* undef - store volatile i64 %x.3, i64 addrspace(1)* undef - store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef - store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef - - store i32 %one, i32 addrspace(1)* %out1 - store i32 %two, i32 addrspace(1)* %out2 - store i32 %three, i32 addrspace(1)* %out3 - store i32 %four, i32 addrspace(1)* %out4 - ret void -} +; XTOSMEM: SGPRBlocks: 1 +; XTOSMEM: NumSGPRsForWavesPerEU: 16 +; +; This test case is disabled: When calculating the spillslot addresses AMDGPU +; creates an extra vreg to save/restore m0 which in a point of maximum register +; pressure would trigger an endless loop; the compiler aborts earlier with +; "Incomplete scavenging after 2nd pass" in practice. +;define amdgpu_kernel void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1, +; i32 addrspace(1)* %out2, +; i32 addrspace(1)* %out3, +; i32 addrspace(1)* %out4, +; i32 %one, i32 %two, i32 %three, i32 %four) #2 { +; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() +; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() +; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z() +; %x.3 = call i64 @llvm.amdgcn.dispatch.id() +; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() +; %x.5 = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() +; store volatile i32 0, i32* undef +; br label %stores +; +;stores: +; store volatile i32 %x.0, i32 addrspace(1)* undef +; store volatile i32 %x.0, i32 addrspace(1)* undef +; store volatile i32 %x.0, i32 addrspace(1)* undef +; store volatile i64 %x.3, i64 addrspace(1)* undef +; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef +; store volatile i8 addrspace(2)* %x.5, i8 addrspace(2)* addrspace(1)* undef +; +; store i32 %one, i32 addrspace(1)* %out1 +; store i32 %two, i32 addrspace(1)* %out2 +; store i32 %three, i32 addrspace(1)* %out3 +; store i32 %four, i32 addrspace(1)* %out4 +; ret void +;} ; The following test is commented out for now; http://llvm.org/PR31230 ; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}} diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll index e245e4296df2af6f742bacebbd2bcde0a6ac3de9..d8f9e4f51ff44dbd78404390db5703a940b8704d 100644 --- a/test/CodeGen/AMDGPU/basic-branch.ll +++ b/test/CodeGen/AMDGPU/basic-branch.ll @@ -34,8 +34,6 @@ end: ; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]] ; GCN: buffer_store_dword -; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; TODO: This waitcnt can be eliminated ; GCN: {{^}}[[END]]: ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/bfe-combine.ll b/test/CodeGen/AMDGPU/bfe-combine.ll new file mode 100644 index 0000000000000000000000000000000000000000..6035e3bf4a5fe504a6c39075d0c53b524f07c5af --- /dev/null +++ b/test/CodeGen/AMDGPU/bfe-combine.ll @@ -0,0 +1,49 @@ +; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sdwa-peephole=0 < %s | FileCheck --check-prefix=GCN --check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck --check-prefix=GCN --check-prefix=VI-SDWA %s +; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GCN --check-prefix=CI %s + +; GCN-LABEL: {{^}}bfe_combine8: +; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 8, 8 +; VI: v_lshlrev_b32_e32 v[[ADDRBASE:[0-9]+]], 2, v[[BFE]] +; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2 +; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 6, v{{[0-9]+}} +; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]] +; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: +define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) { + %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 + %idx = add i32 %x, %id + %srl = lshr i32 %idx, 8 + %and = and i32 %srl, 255 + %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %and + %val = load i32, i32 addrspace(1)* %ptr, align 4 + store i32 %val, i32 addrspace(1)* %arg, align 4 + ret void +} + +; GCN-LABEL: {{^}}bfe_combine16: +; VI: v_bfe_u32 v[[BFE:[0-9]+]], v{{[0-9]+}}, 16, 16 +; VI: v_lshlrev_b32_e32 v[[ADDRBASE:[0-9]+]], {{[^,]+}}, v[[BFE]] +; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15 +; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}] +; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}} +; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]] +; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2 +; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: +define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) { + %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 + %idx = add i32 %x, %id + %srl = lshr i32 %idx, 1 + %and = and i32 %srl, 2147450880 + %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %and + %val = load i32, i32 addrspace(1)* %ptr, align 4 + store i32 %val, i32 addrspace(1)* %arg, align 4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/test/CodeGen/AMDGPU/bfe-patterns.ll b/test/CodeGen/AMDGPU/bfe-patterns.ll index 5e39a6c6774b116ceb2e06c062b92510c741761d..907c8c2216b76965a4fda96f877f50c4b7cb673a 100644 --- a/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -50,7 +50,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, ; GCN-LABEL: {{^}}s_ubfe_sub_i32: ; GCN: s_load_dword [[SRC:s[0-9]+]] ; GCN: s_load_dword [[WIDTH:s[0-9]+]] -; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]] +; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]] ; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -128,7 +128,7 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, ; GCN-LABEL: {{^}}s_sbfe_sub_i32: ; GCN: s_load_dword [[SRC:s[0-9]+]] ; GCN: s_load_dword [[WIDTH:s[0-9]+]] -; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]] +; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]] ; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/branch-condition-and.ll b/test/CodeGen/AMDGPU/branch-condition-and.ll index 68b77ea3490e5ccc42efc77b943946677e834713..662ea37a2b997c3cf8e520f8e319f28120cafa82 100644 --- a/test/CodeGen/AMDGPU/branch-condition-and.ll +++ b/test/CodeGen/AMDGPU/branch-condition-and.ll @@ -19,9 +19,8 @@ ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %bb4 ; GCN: ds_write_b32 -; GCN: s_waitcnt -; GCN-NEXT: [[BB5]] +; GCN: [[BB5]] ; GCN: s_or_b64 exec, exec ; GCN-NEXT: s_endpgm ; GCN-NEXT: .Lfunc_end diff --git a/test/CodeGen/AMDGPU/branch-relax-spill.ll b/test/CodeGen/AMDGPU/branch-relax-spill.ll index ede15559c4ffa545892d9d443bec488839799da8..db476c21636fc58645b9fda8ffa51581d545ac97 100644 --- a/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -7,110 +7,110 @@ define amdgpu_kernel void @spill(i32 addrspace(1)* %arg, i32 %cnd) #0 { entry: - %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={SGPR0}"() #0 - %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={SGPR1}"() #0 - %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={SGPR2}"() #0 - %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={SGPR3}"() #0 - %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={SGPR4}"() #0 - %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={SGPR5}"() #0 - %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={SGPR6}"() #0 - %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={SGPR7}"() #0 - %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={SGPR8}"() #0 - %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={SGPR9}"() #0 - %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={SGPR10}"() #0 - %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={SGPR11}"() #0 - %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={SGPR12}"() #0 - %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={SGPR13}"() #0 - %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={SGPR14}"() #0 - %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={SGPR15}"() #0 - %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={SGPR16}"() #0 - %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={SGPR17}"() #0 - %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={SGPR18}"() #0 - %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={SGPR19}"() #0 - %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={SGPR20}"() #0 - %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={SGPR21}"() #0 - %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={SGPR22}"() #0 - %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={SGPR23}"() #0 - %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={SGPR24}"() #0 - %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={SGPR25}"() #0 - %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={SGPR26}"() #0 - %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={SGPR27}"() #0 - %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={SGPR28}"() #0 - %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={SGPR29}"() #0 - %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={SGPR30}"() #0 - %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={SGPR31}"() #0 - %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={SGPR32}"() #0 - %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={SGPR33}"() #0 - %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={SGPR34}"() #0 - %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={SGPR35}"() #0 - %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={SGPR36}"() #0 - %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={SGPR37}"() #0 - %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={SGPR38}"() #0 - %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={SGPR39}"() #0 - %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={SGPR40}"() #0 - %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={SGPR41}"() #0 - %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={SGPR42}"() #0 - %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={SGPR43}"() #0 - %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={SGPR44}"() #0 - %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={SGPR45}"() #0 - %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={SGPR46}"() #0 - %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={SGPR47}"() #0 - %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={SGPR48}"() #0 - %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={SGPR49}"() #0 - %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={SGPR50}"() #0 - %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={SGPR51}"() #0 - %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={SGPR52}"() #0 - %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={SGPR53}"() #0 - %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={SGPR54}"() #0 - %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={SGPR55}"() #0 - %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={SGPR56}"() #0 - %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={SGPR57}"() #0 - %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={SGPR58}"() #0 - %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={SGPR59}"() #0 - %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={SGPR60}"() #0 - %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={SGPR61}"() #0 - %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={SGPR62}"() #0 - %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={SGPR63}"() #0 - %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={SGPR64}"() #0 - %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={SGPR65}"() #0 - %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={SGPR66}"() #0 - %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={SGPR67}"() #0 - %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={SGPR68}"() #0 - %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={SGPR69}"() #0 - %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={SGPR70}"() #0 - %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={SGPR71}"() #0 - %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={SGPR72}"() #0 - %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={SGPR73}"() #0 - %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={SGPR74}"() #0 - %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={SGPR75}"() #0 - %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={SGPR76}"() #0 - %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={SGPR77}"() #0 - %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={SGPR78}"() #0 - %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={SGPR79}"() #0 - %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={SGPR80}"() #0 - %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={SGPR81}"() #0 - %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={SGPR82}"() #0 - %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={SGPR83}"() #0 - %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={SGPR84}"() #0 - %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={SGPR85}"() #0 - %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={SGPR86}"() #0 - %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={SGPR87}"() #0 - %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={SGPR88}"() #0 - %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={SGPR89}"() #0 - %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={SGPR90}"() #0 - %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={SGPR91}"() #0 - %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={SGPR92}"() #0 - %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={SGPR93}"() #0 - %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={SGPR94}"() #0 - %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={SGPR95}"() #0 - %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={SGPR96}"() #0 - %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={SGPR97}"() #0 - %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={SGPR98}"() #0 - %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={SGPR99}"() #0 - %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={SGPR100}"() #0 - %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={SGPR101}"() #0 - %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={SGPR102}"() #0 - %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={SGPR103}"() #0 + %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 + %sgpr1 = tail call i32 asm sideeffect "s_mov_b32 s1, 0", "={s1}"() #0 + %sgpr2 = tail call i32 asm sideeffect "s_mov_b32 s2, 0", "={s2}"() #0 + %sgpr3 = tail call i32 asm sideeffect "s_mov_b32 s3, 0", "={s3}"() #0 + %sgpr4 = tail call i32 asm sideeffect "s_mov_b32 s4, 0", "={s4}"() #0 + %sgpr5 = tail call i32 asm sideeffect "s_mov_b32 s5, 0", "={s5}"() #0 + %sgpr6 = tail call i32 asm sideeffect "s_mov_b32 s6, 0", "={s6}"() #0 + %sgpr7 = tail call i32 asm sideeffect "s_mov_b32 s7, 0", "={s7}"() #0 + %sgpr8 = tail call i32 asm sideeffect "s_mov_b32 s8, 0", "={s8}"() #0 + %sgpr9 = tail call i32 asm sideeffect "s_mov_b32 s9, 0", "={s9}"() #0 + %sgpr10 = tail call i32 asm sideeffect "s_mov_b32 s10, 0", "={s10}"() #0 + %sgpr11 = tail call i32 asm sideeffect "s_mov_b32 s11, 0", "={s11}"() #0 + %sgpr12 = tail call i32 asm sideeffect "s_mov_b32 s12, 0", "={s12}"() #0 + %sgpr13 = tail call i32 asm sideeffect "s_mov_b32 s13, 0", "={s13}"() #0 + %sgpr14 = tail call i32 asm sideeffect "s_mov_b32 s14, 0", "={s14}"() #0 + %sgpr15 = tail call i32 asm sideeffect "s_mov_b32 s15, 0", "={s15}"() #0 + %sgpr16 = tail call i32 asm sideeffect "s_mov_b32 s16, 0", "={s16}"() #0 + %sgpr17 = tail call i32 asm sideeffect "s_mov_b32 s17, 0", "={s17}"() #0 + %sgpr18 = tail call i32 asm sideeffect "s_mov_b32 s18, 0", "={s18}"() #0 + %sgpr19 = tail call i32 asm sideeffect "s_mov_b32 s19, 0", "={s19}"() #0 + %sgpr20 = tail call i32 asm sideeffect "s_mov_b32 s20, 0", "={s20}"() #0 + %sgpr21 = tail call i32 asm sideeffect "s_mov_b32 s21, 0", "={s21}"() #0 + %sgpr22 = tail call i32 asm sideeffect "s_mov_b32 s22, 0", "={s22}"() #0 + %sgpr23 = tail call i32 asm sideeffect "s_mov_b32 s23, 0", "={s23}"() #0 + %sgpr24 = tail call i32 asm sideeffect "s_mov_b32 s24, 0", "={s24}"() #0 + %sgpr25 = tail call i32 asm sideeffect "s_mov_b32 s25, 0", "={s25}"() #0 + %sgpr26 = tail call i32 asm sideeffect "s_mov_b32 s26, 0", "={s26}"() #0 + %sgpr27 = tail call i32 asm sideeffect "s_mov_b32 s27, 0", "={s27}"() #0 + %sgpr28 = tail call i32 asm sideeffect "s_mov_b32 s28, 0", "={s28}"() #0 + %sgpr29 = tail call i32 asm sideeffect "s_mov_b32 s29, 0", "={s29}"() #0 + %sgpr30 = tail call i32 asm sideeffect "s_mov_b32 s30, 0", "={s30}"() #0 + %sgpr31 = tail call i32 asm sideeffect "s_mov_b32 s31, 0", "={s31}"() #0 + %sgpr32 = tail call i32 asm sideeffect "s_mov_b32 s32, 0", "={s32}"() #0 + %sgpr33 = tail call i32 asm sideeffect "s_mov_b32 s33, 0", "={s33}"() #0 + %sgpr34 = tail call i32 asm sideeffect "s_mov_b32 s34, 0", "={s34}"() #0 + %sgpr35 = tail call i32 asm sideeffect "s_mov_b32 s35, 0", "={s35}"() #0 + %sgpr36 = tail call i32 asm sideeffect "s_mov_b32 s36, 0", "={s36}"() #0 + %sgpr37 = tail call i32 asm sideeffect "s_mov_b32 s37, 0", "={s37}"() #0 + %sgpr38 = tail call i32 asm sideeffect "s_mov_b32 s38, 0", "={s38}"() #0 + %sgpr39 = tail call i32 asm sideeffect "s_mov_b32 s39, 0", "={s39}"() #0 + %sgpr40 = tail call i32 asm sideeffect "s_mov_b32 s40, 0", "={s40}"() #0 + %sgpr41 = tail call i32 asm sideeffect "s_mov_b32 s41, 0", "={s41}"() #0 + %sgpr42 = tail call i32 asm sideeffect "s_mov_b32 s42, 0", "={s42}"() #0 + %sgpr43 = tail call i32 asm sideeffect "s_mov_b32 s43, 0", "={s43}"() #0 + %sgpr44 = tail call i32 asm sideeffect "s_mov_b32 s44, 0", "={s44}"() #0 + %sgpr45 = tail call i32 asm sideeffect "s_mov_b32 s45, 0", "={s45}"() #0 + %sgpr46 = tail call i32 asm sideeffect "s_mov_b32 s46, 0", "={s46}"() #0 + %sgpr47 = tail call i32 asm sideeffect "s_mov_b32 s47, 0", "={s47}"() #0 + %sgpr48 = tail call i32 asm sideeffect "s_mov_b32 s48, 0", "={s48}"() #0 + %sgpr49 = tail call i32 asm sideeffect "s_mov_b32 s49, 0", "={s49}"() #0 + %sgpr50 = tail call i32 asm sideeffect "s_mov_b32 s50, 0", "={s50}"() #0 + %sgpr51 = tail call i32 asm sideeffect "s_mov_b32 s51, 0", "={s51}"() #0 + %sgpr52 = tail call i32 asm sideeffect "s_mov_b32 s52, 0", "={s52}"() #0 + %sgpr53 = tail call i32 asm sideeffect "s_mov_b32 s53, 0", "={s53}"() #0 + %sgpr54 = tail call i32 asm sideeffect "s_mov_b32 s54, 0", "={s54}"() #0 + %sgpr55 = tail call i32 asm sideeffect "s_mov_b32 s55, 0", "={s55}"() #0 + %sgpr56 = tail call i32 asm sideeffect "s_mov_b32 s56, 0", "={s56}"() #0 + %sgpr57 = tail call i32 asm sideeffect "s_mov_b32 s57, 0", "={s57}"() #0 + %sgpr58 = tail call i32 asm sideeffect "s_mov_b32 s58, 0", "={s58}"() #0 + %sgpr59 = tail call i32 asm sideeffect "s_mov_b32 s59, 0", "={s59}"() #0 + %sgpr60 = tail call i32 asm sideeffect "s_mov_b32 s60, 0", "={s60}"() #0 + %sgpr61 = tail call i32 asm sideeffect "s_mov_b32 s61, 0", "={s61}"() #0 + %sgpr62 = tail call i32 asm sideeffect "s_mov_b32 s62, 0", "={s62}"() #0 + %sgpr63 = tail call i32 asm sideeffect "s_mov_b32 s63, 0", "={s63}"() #0 + %sgpr64 = tail call i32 asm sideeffect "s_mov_b32 s64, 0", "={s64}"() #0 + %sgpr65 = tail call i32 asm sideeffect "s_mov_b32 s65, 0", "={s65}"() #0 + %sgpr66 = tail call i32 asm sideeffect "s_mov_b32 s66, 0", "={s66}"() #0 + %sgpr67 = tail call i32 asm sideeffect "s_mov_b32 s67, 0", "={s67}"() #0 + %sgpr68 = tail call i32 asm sideeffect "s_mov_b32 s68, 0", "={s68}"() #0 + %sgpr69 = tail call i32 asm sideeffect "s_mov_b32 s69, 0", "={s69}"() #0 + %sgpr70 = tail call i32 asm sideeffect "s_mov_b32 s70, 0", "={s70}"() #0 + %sgpr71 = tail call i32 asm sideeffect "s_mov_b32 s71, 0", "={s71}"() #0 + %sgpr72 = tail call i32 asm sideeffect "s_mov_b32 s72, 0", "={s72}"() #0 + %sgpr73 = tail call i32 asm sideeffect "s_mov_b32 s73, 0", "={s73}"() #0 + %sgpr74 = tail call i32 asm sideeffect "s_mov_b32 s74, 0", "={s74}"() #0 + %sgpr75 = tail call i32 asm sideeffect "s_mov_b32 s75, 0", "={s75}"() #0 + %sgpr76 = tail call i32 asm sideeffect "s_mov_b32 s76, 0", "={s76}"() #0 + %sgpr77 = tail call i32 asm sideeffect "s_mov_b32 s77, 0", "={s77}"() #0 + %sgpr78 = tail call i32 asm sideeffect "s_mov_b32 s78, 0", "={s78}"() #0 + %sgpr79 = tail call i32 asm sideeffect "s_mov_b32 s79, 0", "={s79}"() #0 + %sgpr80 = tail call i32 asm sideeffect "s_mov_b32 s80, 0", "={s80}"() #0 + %sgpr81 = tail call i32 asm sideeffect "s_mov_b32 s81, 0", "={s81}"() #0 + %sgpr82 = tail call i32 asm sideeffect "s_mov_b32 s82, 0", "={s82}"() #0 + %sgpr83 = tail call i32 asm sideeffect "s_mov_b32 s83, 0", "={s83}"() #0 + %sgpr84 = tail call i32 asm sideeffect "s_mov_b32 s84, 0", "={s84}"() #0 + %sgpr85 = tail call i32 asm sideeffect "s_mov_b32 s85, 0", "={s85}"() #0 + %sgpr86 = tail call i32 asm sideeffect "s_mov_b32 s86, 0", "={s86}"() #0 + %sgpr87 = tail call i32 asm sideeffect "s_mov_b32 s87, 0", "={s87}"() #0 + %sgpr88 = tail call i32 asm sideeffect "s_mov_b32 s88, 0", "={s88}"() #0 + %sgpr89 = tail call i32 asm sideeffect "s_mov_b32 s89, 0", "={s89}"() #0 + %sgpr90 = tail call i32 asm sideeffect "s_mov_b32 s90, 0", "={s90}"() #0 + %sgpr91 = tail call i32 asm sideeffect "s_mov_b32 s91, 0", "={s91}"() #0 + %sgpr92 = tail call i32 asm sideeffect "s_mov_b32 s92, 0", "={s92}"() #0 + %sgpr93 = tail call i32 asm sideeffect "s_mov_b32 s93, 0", "={s93}"() #0 + %sgpr94 = tail call i32 asm sideeffect "s_mov_b32 s94, 0", "={s94}"() #0 + %sgpr95 = tail call i32 asm sideeffect "s_mov_b32 s95, 0", "={s95}"() #0 + %sgpr96 = tail call i32 asm sideeffect "s_mov_b32 s96, 0", "={s96}"() #0 + %sgpr97 = tail call i32 asm sideeffect "s_mov_b32 s97, 0", "={s97}"() #0 + %sgpr98 = tail call i32 asm sideeffect "s_mov_b32 s98, 0", "={s98}"() #0 + %sgpr99 = tail call i32 asm sideeffect "s_mov_b32 s99, 0", "={s99}"() #0 + %sgpr100 = tail call i32 asm sideeffect "s_mov_b32 s100, 0", "={s100}"() #0 + %sgpr101 = tail call i32 asm sideeffect "s_mov_b32 s101, 0", "={s101}"() #0 + %sgpr102 = tail call i32 asm sideeffect "s_mov_b32 s102, 0", "={s102}"() #0 + %sgpr103 = tail call i32 asm sideeffect "s_mov_b32 s103, 0", "={s103}"() #0 %vcc_lo = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_LO}"() #0 %vcc_hi = tail call i32 asm sideeffect "s_mov_b32 $0, 0", "={VCC_HI}"() #0 %cmp = icmp eq i32 %cnd, 0 @@ -126,112 +126,112 @@ bb2: ; 28 bytes br label %bb3 bb3: - tail call void asm sideeffect "; reg use $0", "{SGPR0}"(i32 %sgpr0) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR1}"(i32 %sgpr1) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR2}"(i32 %sgpr2) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR3}"(i32 %sgpr3) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR4}"(i32 %sgpr4) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR5}"(i32 %sgpr5) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR6}"(i32 %sgpr6) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR7}"(i32 %sgpr7) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR8}"(i32 %sgpr8) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR9}"(i32 %sgpr9) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR10}"(i32 %sgpr10) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR11}"(i32 %sgpr11) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR12}"(i32 %sgpr12) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR13}"(i32 %sgpr13) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR14}"(i32 %sgpr14) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR15}"(i32 %sgpr15) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR16}"(i32 %sgpr16) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR17}"(i32 %sgpr17) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR18}"(i32 %sgpr18) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR19}"(i32 %sgpr19) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR20}"(i32 %sgpr20) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR21}"(i32 %sgpr21) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR22}"(i32 %sgpr22) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR23}"(i32 %sgpr23) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR24}"(i32 %sgpr24) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR25}"(i32 %sgpr25) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR26}"(i32 %sgpr26) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR27}"(i32 %sgpr27) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR28}"(i32 %sgpr28) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR29}"(i32 %sgpr29) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR30}"(i32 %sgpr30) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR31}"(i32 %sgpr31) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR32}"(i32 %sgpr32) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR33}"(i32 %sgpr33) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR34}"(i32 %sgpr34) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR35}"(i32 %sgpr35) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR36}"(i32 %sgpr36) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR37}"(i32 %sgpr37) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR38}"(i32 %sgpr38) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR39}"(i32 %sgpr39) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR40}"(i32 %sgpr40) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR41}"(i32 %sgpr41) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR42}"(i32 %sgpr42) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR43}"(i32 %sgpr43) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR44}"(i32 %sgpr44) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR45}"(i32 %sgpr45) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR46}"(i32 %sgpr46) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR47}"(i32 %sgpr47) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR48}"(i32 %sgpr48) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR49}"(i32 %sgpr49) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR50}"(i32 %sgpr50) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR51}"(i32 %sgpr51) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR52}"(i32 %sgpr52) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR53}"(i32 %sgpr53) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR54}"(i32 %sgpr54) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR55}"(i32 %sgpr55) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR56}"(i32 %sgpr56) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR57}"(i32 %sgpr57) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR58}"(i32 %sgpr58) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR59}"(i32 %sgpr59) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR60}"(i32 %sgpr60) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR61}"(i32 %sgpr61) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR62}"(i32 %sgpr62) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR63}"(i32 %sgpr63) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR64}"(i32 %sgpr64) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR65}"(i32 %sgpr65) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR66}"(i32 %sgpr66) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR67}"(i32 %sgpr67) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR68}"(i32 %sgpr68) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR69}"(i32 %sgpr69) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR70}"(i32 %sgpr70) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR71}"(i32 %sgpr71) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR72}"(i32 %sgpr72) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR73}"(i32 %sgpr73) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR74}"(i32 %sgpr74) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR75}"(i32 %sgpr75) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR76}"(i32 %sgpr76) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR77}"(i32 %sgpr77) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR78}"(i32 %sgpr78) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR79}"(i32 %sgpr79) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR80}"(i32 %sgpr80) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR81}"(i32 %sgpr81) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR82}"(i32 %sgpr82) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR83}"(i32 %sgpr83) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR84}"(i32 %sgpr84) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR85}"(i32 %sgpr85) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR86}"(i32 %sgpr86) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR87}"(i32 %sgpr87) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR88}"(i32 %sgpr88) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR89}"(i32 %sgpr89) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR90}"(i32 %sgpr90) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR91}"(i32 %sgpr91) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR92}"(i32 %sgpr92) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR93}"(i32 %sgpr93) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR94}"(i32 %sgpr94) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR95}"(i32 %sgpr95) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR96}"(i32 %sgpr96) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR97}"(i32 %sgpr97) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR98}"(i32 %sgpr98) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR99}"(i32 %sgpr99) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR100}"(i32 %sgpr100) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR101}"(i32 %sgpr101) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR102}"(i32 %sgpr102) #0 - tail call void asm sideeffect "; reg use $0", "{SGPR103}"(i32 %sgpr103) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_LO}"(i32 %vcc_lo) #0 - tail call void asm sideeffect "; reg use $0", "{VCC_HI}"(i32 %vcc_hi) #0 + tail call void asm sideeffect "; reg use $0", "{s0}"(i32 %sgpr0) #0 + tail call void asm sideeffect "; reg use $0", "{s1}"(i32 %sgpr1) #0 + tail call void asm sideeffect "; reg use $0", "{s2}"(i32 %sgpr2) #0 + tail call void asm sideeffect "; reg use $0", "{s3}"(i32 %sgpr3) #0 + tail call void asm sideeffect "; reg use $0", "{s4}"(i32 %sgpr4) #0 + tail call void asm sideeffect "; reg use $0", "{s5}"(i32 %sgpr5) #0 + tail call void asm sideeffect "; reg use $0", "{s6}"(i32 %sgpr6) #0 + tail call void asm sideeffect "; reg use $0", "{s7}"(i32 %sgpr7) #0 + tail call void asm sideeffect "; reg use $0", "{s8}"(i32 %sgpr8) #0 + tail call void asm sideeffect "; reg use $0", "{s9}"(i32 %sgpr9) #0 + tail call void asm sideeffect "; reg use $0", "{s10}"(i32 %sgpr10) #0 + tail call void asm sideeffect "; reg use $0", "{s11}"(i32 %sgpr11) #0 + tail call void asm sideeffect "; reg use $0", "{s12}"(i32 %sgpr12) #0 + tail call void asm sideeffect "; reg use $0", "{s13}"(i32 %sgpr13) #0 + tail call void asm sideeffect "; reg use $0", "{s14}"(i32 %sgpr14) #0 + tail call void asm sideeffect "; reg use $0", "{s15}"(i32 %sgpr15) #0 + tail call void asm sideeffect "; reg use $0", "{s16}"(i32 %sgpr16) #0 + tail call void asm sideeffect "; reg use $0", "{s17}"(i32 %sgpr17) #0 + tail call void asm sideeffect "; reg use $0", "{s18}"(i32 %sgpr18) #0 + tail call void asm sideeffect "; reg use $0", "{s19}"(i32 %sgpr19) #0 + tail call void asm sideeffect "; reg use $0", "{s20}"(i32 %sgpr20) #0 + tail call void asm sideeffect "; reg use $0", "{s21}"(i32 %sgpr21) #0 + tail call void asm sideeffect "; reg use $0", "{s22}"(i32 %sgpr22) #0 + tail call void asm sideeffect "; reg use $0", "{s23}"(i32 %sgpr23) #0 + tail call void asm sideeffect "; reg use $0", "{s24}"(i32 %sgpr24) #0 + tail call void asm sideeffect "; reg use $0", "{s25}"(i32 %sgpr25) #0 + tail call void asm sideeffect "; reg use $0", "{s26}"(i32 %sgpr26) #0 + tail call void asm sideeffect "; reg use $0", "{s27}"(i32 %sgpr27) #0 + tail call void asm sideeffect "; reg use $0", "{s28}"(i32 %sgpr28) #0 + tail call void asm sideeffect "; reg use $0", "{s29}"(i32 %sgpr29) #0 + tail call void asm sideeffect "; reg use $0", "{s30}"(i32 %sgpr30) #0 + tail call void asm sideeffect "; reg use $0", "{s31}"(i32 %sgpr31) #0 + tail call void asm sideeffect "; reg use $0", "{s32}"(i32 %sgpr32) #0 + tail call void asm sideeffect "; reg use $0", "{s33}"(i32 %sgpr33) #0 + tail call void asm sideeffect "; reg use $0", "{s34}"(i32 %sgpr34) #0 + tail call void asm sideeffect "; reg use $0", "{s35}"(i32 %sgpr35) #0 + tail call void asm sideeffect "; reg use $0", "{s36}"(i32 %sgpr36) #0 + tail call void asm sideeffect "; reg use $0", "{s37}"(i32 %sgpr37) #0 + tail call void asm sideeffect "; reg use $0", "{s38}"(i32 %sgpr38) #0 + tail call void asm sideeffect "; reg use $0", "{s39}"(i32 %sgpr39) #0 + tail call void asm sideeffect "; reg use $0", "{s40}"(i32 %sgpr40) #0 + tail call void asm sideeffect "; reg use $0", "{s41}"(i32 %sgpr41) #0 + tail call void asm sideeffect "; reg use $0", "{s42}"(i32 %sgpr42) #0 + tail call void asm sideeffect "; reg use $0", "{s43}"(i32 %sgpr43) #0 + tail call void asm sideeffect "; reg use $0", "{s44}"(i32 %sgpr44) #0 + tail call void asm sideeffect "; reg use $0", "{s45}"(i32 %sgpr45) #0 + tail call void asm sideeffect "; reg use $0", "{s46}"(i32 %sgpr46) #0 + tail call void asm sideeffect "; reg use $0", "{s47}"(i32 %sgpr47) #0 + tail call void asm sideeffect "; reg use $0", "{s48}"(i32 %sgpr48) #0 + tail call void asm sideeffect "; reg use $0", "{s49}"(i32 %sgpr49) #0 + tail call void asm sideeffect "; reg use $0", "{s50}"(i32 %sgpr50) #0 + tail call void asm sideeffect "; reg use $0", "{s51}"(i32 %sgpr51) #0 + tail call void asm sideeffect "; reg use $0", "{s52}"(i32 %sgpr52) #0 + tail call void asm sideeffect "; reg use $0", "{s53}"(i32 %sgpr53) #0 + tail call void asm sideeffect "; reg use $0", "{s54}"(i32 %sgpr54) #0 + tail call void asm sideeffect "; reg use $0", "{s55}"(i32 %sgpr55) #0 + tail call void asm sideeffect "; reg use $0", "{s56}"(i32 %sgpr56) #0 + tail call void asm sideeffect "; reg use $0", "{s57}"(i32 %sgpr57) #0 + tail call void asm sideeffect "; reg use $0", "{s58}"(i32 %sgpr58) #0 + tail call void asm sideeffect "; reg use $0", "{s59}"(i32 %sgpr59) #0 + tail call void asm sideeffect "; reg use $0", "{s60}"(i32 %sgpr60) #0 + tail call void asm sideeffect "; reg use $0", "{s61}"(i32 %sgpr61) #0 + tail call void asm sideeffect "; reg use $0", "{s62}"(i32 %sgpr62) #0 + tail call void asm sideeffect "; reg use $0", "{s63}"(i32 %sgpr63) #0 + tail call void asm sideeffect "; reg use $0", "{s64}"(i32 %sgpr64) #0 + tail call void asm sideeffect "; reg use $0", "{s65}"(i32 %sgpr65) #0 + tail call void asm sideeffect "; reg use $0", "{s66}"(i32 %sgpr66) #0 + tail call void asm sideeffect "; reg use $0", "{s67}"(i32 %sgpr67) #0 + tail call void asm sideeffect "; reg use $0", "{s68}"(i32 %sgpr68) #0 + tail call void asm sideeffect "; reg use $0", "{s69}"(i32 %sgpr69) #0 + tail call void asm sideeffect "; reg use $0", "{s70}"(i32 %sgpr70) #0 + tail call void asm sideeffect "; reg use $0", "{s71}"(i32 %sgpr71) #0 + tail call void asm sideeffect "; reg use $0", "{s72}"(i32 %sgpr72) #0 + tail call void asm sideeffect "; reg use $0", "{s73}"(i32 %sgpr73) #0 + tail call void asm sideeffect "; reg use $0", "{s74}"(i32 %sgpr74) #0 + tail call void asm sideeffect "; reg use $0", "{s75}"(i32 %sgpr75) #0 + tail call void asm sideeffect "; reg use $0", "{s76}"(i32 %sgpr76) #0 + tail call void asm sideeffect "; reg use $0", "{s77}"(i32 %sgpr77) #0 + tail call void asm sideeffect "; reg use $0", "{s78}"(i32 %sgpr78) #0 + tail call void asm sideeffect "; reg use $0", "{s79}"(i32 %sgpr79) #0 + tail call void asm sideeffect "; reg use $0", "{s80}"(i32 %sgpr80) #0 + tail call void asm sideeffect "; reg use $0", "{s81}"(i32 %sgpr81) #0 + tail call void asm sideeffect "; reg use $0", "{s82}"(i32 %sgpr82) #0 + tail call void asm sideeffect "; reg use $0", "{s83}"(i32 %sgpr83) #0 + tail call void asm sideeffect "; reg use $0", "{s84}"(i32 %sgpr84) #0 + tail call void asm sideeffect "; reg use $0", "{s85}"(i32 %sgpr85) #0 + tail call void asm sideeffect "; reg use $0", "{s86}"(i32 %sgpr86) #0 + tail call void asm sideeffect "; reg use $0", "{s87}"(i32 %sgpr87) #0 + tail call void asm sideeffect "; reg use $0", "{s88}"(i32 %sgpr88) #0 + tail call void asm sideeffect "; reg use $0", "{s89}"(i32 %sgpr89) #0 + tail call void asm sideeffect "; reg use $0", "{s90}"(i32 %sgpr90) #0 + tail call void asm sideeffect "; reg use $0", "{s91}"(i32 %sgpr91) #0 + tail call void asm sideeffect "; reg use $0", "{s92}"(i32 %sgpr92) #0 + tail call void asm sideeffect "; reg use $0", "{s93}"(i32 %sgpr93) #0 + tail call void asm sideeffect "; reg use $0", "{s94}"(i32 %sgpr94) #0 + tail call void asm sideeffect "; reg use $0", "{s95}"(i32 %sgpr95) #0 + tail call void asm sideeffect "; reg use $0", "{s96}"(i32 %sgpr96) #0 + tail call void asm sideeffect "; reg use $0", "{s97}"(i32 %sgpr97) #0 + tail call void asm sideeffect "; reg use $0", "{s98}"(i32 %sgpr98) #0 + tail call void asm sideeffect "; reg use $0", "{s99}"(i32 %sgpr99) #0 + tail call void asm sideeffect "; reg use $0", "{s100}"(i32 %sgpr100) #0 + tail call void asm sideeffect "; reg use $0", "{s101}"(i32 %sgpr101) #0 + tail call void asm sideeffect "; reg use $0", "{s102}"(i32 %sgpr102) #0 + tail call void asm sideeffect "; reg use $0", "{s103}"(i32 %sgpr103) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_lo}"(i32 %vcc_lo) #0 + tail call void asm sideeffect "; reg use $0", "{vcc_hi}"(i32 %vcc_hi) #0 ret void } diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll index 263059d4a6ed0014f781b144434e517fce439023..d3f835bdf1632cd49258b15791328be6d0bd0d9f 100644 --- a/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -223,7 +223,6 @@ bb3: ; GCN-NEXT: [[BB2]]: ; %bb2 ; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 ; GCN: buffer_store_dword [[BB2_K]] -; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2 ; GCN-NEXT: s_getpc_b64 vcc @@ -393,7 +392,6 @@ bb3: ; GCN-NEXT: ; BB#2: ; %if_uniform ; GCN: buffer_store_dword -; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: [[ENDIF]]: ; %endif ; GCN-NEXT: s_or_b64 exec, exec, [[MASK]] diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll index cbdcf6aeaf429650fb1f996f9705619003e023af..5dec3e35ab3d0d74a51bd887295e754f5e7a4adc 100644 --- a/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -1,12 +1,19 @@ -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s -; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s -; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI -check-prefix=OPT-CIVI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI -check-prefix=OPT-CIVI %s +; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-GFX9 %s +; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; OPT-LABEL: @test_no_sink_flat_small_offset_i32( -; OPT: getelementptr i32, i32 addrspace(4)* %in -; OPT: br i1 -; OPT-NOT: ptrtoint +; OPT-CIVI: getelementptr i32, i32 addrspace(4)* %in +; OPT-CIVI: br i1 +; OPT-CIVI-NOT: ptrtoint + +; OPT-GFX9: br +; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %0, i64 28 +; OPT-GFX9: %1 = bitcast i8 addrspace(4)* %sunkaddr to i32 addrspace(4)* +; OPT-GFX9: load i32, i32 addrspace(4)* %1 ; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32: ; GCN: flat_load_dword @@ -96,3 +103,105 @@ endif: done: ret void } + +; OPT-LABEL: @test_sink_flat_small_max_flat_offset( +; OPT-CIVI: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095 +; OPT-CIVI: br +; OPT-CIVI-NOT: getelementptr +; OPT-CIVI: load i8, i8 addrspace(4)* %in.gep + +; OPT-GFX9: br +; OPT-GFX9: %sunkaddr = getelementptr i8, i8 addrspace(4)* %in, i64 4095 +; OPT-GFX9: load i8, i8 addrspace(4)* %sunkaddr + +; GCN-LABEL: {{^}}test_sink_flat_small_max_flat_offset: +; GFX9: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}} +; CIVI: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @test_sink_flat_small_max_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 { +entry: + %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 + %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4095 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp2 = sext i8 %tmp1 to i32 + br label %endif + +endif: + %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(4)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset( +; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096 +; OPT: br +; OPT-NOT: getelementptr +; OPT: load i8, i8 addrspace(4)* %in.gep + +; GCN-LABEL: {{^}}test_sink_flat_small_max_plus_1_flat_offset: +; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @test_sink_flat_small_max_plus_1_flat_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in) #1 { +entry: + %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 99999 + %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 4096 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp2 = sext i8 %tmp1 to i32 + br label %endif + +endif: + %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(4)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_no_sink_flat_reg_offset( +; OPT: %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg +; OPT: br + +; OPT-NOT: getelementptr +; OPT: load i8, i8 addrspace(4)* %in.gep + +; GCN-LABEL: {{^}}test_no_sink_flat_reg_offset: +; GCN: flat_load_sbyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]$}} +define amdgpu_kernel void @test_no_sink_flat_reg_offset(i32 addrspace(4)* %out, i8 addrspace(4)* %in, i64 %reg) #1 { +entry: + %out.gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 + %in.gep = getelementptr i8, i8 addrspace(4)* %in, i64 %reg + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = load i8, i8 addrspace(4)* %in.gep + %tmp2 = sext i8 %tmp1 to i32 + br label %endif + +endif: + %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(4)* %out.gep + br label %done + +done: + ret void +} + +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } +attributes #2 = { nounwind argmemonly } diff --git a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir index fbfd0fbf930884a180204a1c847ae29717fafc68..6ecf75c1acec37a225aa68e02763d86439082d17 100644 --- a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -24,6 +24,10 @@ ret void } + define amdgpu_ps void @v_max_reg_imm_f32() #0 { + ret void + } + attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } ... @@ -422,3 +426,19 @@ body: | S_ENDPGM ... +--- + +# Pass used to crash with immediate second operand of max +name: v_max_reg_imm_f32 +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } +body: | + bb.0 (%ir-block.0): + liveins: %vgpr0 + + %0 = COPY %vgpr0 + %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit %exec + +... diff --git a/test/CodeGen/AMDGPU/coalescer_distribute.ll b/test/CodeGen/AMDGPU/coalescer_distribute.ll index 7ca2612598c8493f2ed874b4107b1822e93eb2d6..d0276a3fb59c3186ff43b5365617b888fbb51596 100644 --- a/test/CodeGen/AMDGPU/coalescer_distribute.ll +++ b/test/CodeGen/AMDGPU/coalescer_distribute.ll @@ -5,7 +5,7 @@ target triple = "amdgcn--" define spir_kernel void @hoge() { bb: - %tmp = tail call i32 @llvm.r600.read.tidig.x() + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() br i1 undef, label %bb2, label %bb23 bb2: @@ -50,4 +50,4 @@ bb34: ret void } -declare i32 @llvm.r600.read.tidig.x() +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll b/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll new file mode 100644 index 0000000000000000000000000000000000000000..a33c3646e253e5f9da59bb55ce2b8d6bc8db8519 --- /dev/null +++ b/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck %s + +; CHECK: - Name: test_ro_arg +; CHECK: Args: +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F32 +; CHECK-NEXT: AccQual: ReadOnly +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: IsConst: true +; CHECK-NEXT: IsRestrict: true +; CHECK-NEXT: TypeName: 'float*' + +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F32 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: TypeName: 'float*' + +define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out) + !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 + !kernel_arg_base_type !2 !kernel_arg_type_qual !3 { + ret void +} + +!0 = !{i32 1, i32 1} +!1 = !{!"none", !"none"} +!2 = !{!"float*", !"float*"} +!3 = !{!"const restrict", !""} + diff --git a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll b/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll index 88ba310a92caef8b7d4addc768149c07a3f75852..a68ddabd95609b750f2d497172966827128bf8df 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll +++ b/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll @@ -1253,8 +1253,8 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, ; NOTES-NEXT: Owner Data size Description ; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001) ; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003) -; GFX700: AMD 0x00009171 Unknown note type: (0x0000000a) -; GFX800: AMD 0x00009190 Unknown note type: (0x0000000a) -; GFX900: AMD 0x00009171 Unknown note type: (0x0000000a) +; GFX700: AMD 0x00008b06 Unknown note type: (0x0000000a) +; GFX800: AMD 0x00008e6a Unknown note type: (0x0000000a) +; GFX900: AMD 0x00008b06 Unknown note type: (0x0000000a) ; PARSER: AMDGPU Code Object Metadata Parser Test: PASS diff --git a/test/CodeGen/AMDGPU/code-object-metadata-images.ll b/test/CodeGen/AMDGPU/code-object-metadata-images.ll new file mode 100644 index 0000000000000000000000000000000000000000..918560469852b695a39109266b050be674bc92e2 --- /dev/null +++ b/test/CodeGen/AMDGPU/code-object-metadata-images.ll @@ -0,0 +1,80 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +%opencl.image1d_t = type opaque +%opencl.image1d_array_t = type opaque +%opencl.image1d_buffer_t = type opaque +%opencl.image2d_t = type opaque +%opencl.image2d_array_t = type opaque +%opencl.image2d_array_depth_t = type opaque +%opencl.image2d_array_msaa_t = type opaque +%opencl.image2d_array_msaa_depth_t = type opaque +%opencl.image2d_depth_t = type opaque +%opencl.image2d_msaa_t = type opaque +%opencl.image2d_msaa_depth_t = type opaque +%opencl.image3d_t = type opaque + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] + +; CHECK: Kernels: +; CHECK: - Name: test +; CHECK: Args: +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image1d_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image1d_array_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image1d_buffer_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_msaa_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_array_msaa_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_msaa_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image2d_msaa_depth_t +; CHECK: - Size: 8 +; CHECK: ValueKind: Image +; CHECK: TypeName: image3d_t +define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, + %opencl.image1d_array_t addrspace(1)* %b, + %opencl.image1d_buffer_t addrspace(1)* %c, + %opencl.image2d_t addrspace(1)* %d, + %opencl.image2d_array_t addrspace(1)* %e, + %opencl.image2d_array_depth_t addrspace(1)* %f, + %opencl.image2d_array_msaa_t addrspace(1)* %g, + %opencl.image2d_array_msaa_depth_t addrspace(1)* %h, + %opencl.image2d_depth_t addrspace(1)* %i, + %opencl.image2d_msaa_t addrspace(1)* %j, + %opencl.image2d_msaa_depth_t addrspace(1)* %k, + %opencl.image3d_t addrspace(1)* %l) + !kernel_arg_type !1 !kernel_arg_base_type !1 { + ret void +} + +!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t", + !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t", + !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t", + !"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t", + !"image3d_t"} diff --git a/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll b/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll index 801029be8cb9fa86c1832d20b0280a8ef53f43e8..0ffc922031539da8578af8b95b2a1d656993797a 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll +++ b/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll @@ -12,7 +12,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK: DebugProps: ; CHECK: DebuggerABIVersion: [ 1, 0 ] ; CHECK: ReservedNumVGPRs: 4 -; CHECK: ReservedFirstVGPR: 11 +; GFX700: ReservedFirstVGPR: 8 +; GFX800: ReservedFirstVGPR: 8 +; GFX9: ReservedFirstVGPR: 14 ; CHECK: PrivateSegmentBufferSGPR: 0 ; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11 define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 { diff --git a/test/CodeGen/AMDGPU/commute-compares.ll b/test/CodeGen/AMDGPU/commute-compares.ll index 973c4544d97a704bb113393db0829e4f84831173..caba83c504282c61855c3a55965671a63272333f 100644 --- a/test/CodeGen/AMDGPU/commute-compares.ll +++ b/test/CodeGen/AMDGPU/commute-compares.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() #0 @@ -35,7 +35,7 @@ define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspa ; FIXME: Why isn't this being folded as a constant? ; GCN-LABEL: {{^}}commute_ne_litk_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039 -; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}} +; GCN: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, [[K]] define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -99,11 +99,9 @@ define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrsp ret void } -; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm - ; GCN-LABEL: {{^}}commute_ule_64_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}} -; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}} +; GCN: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, [[K]] define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid @@ -702,7 +700,7 @@ define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double ad ; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}} -; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}} +; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]] define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: %stack0 = alloca i32 diff --git a/test/CodeGen/AMDGPU/commute_modifiers.ll b/test/CodeGen/AMDGPU/commute_modifiers.ll index 8820e4fd80e56d6df48c251d50c9d104e7c74f55..f38c1f8aa6edb8de2b328434d7ec8950d40fe56b 100644 --- a/test/CodeGen/AMDGPU/commute_modifiers.ll +++ b/test/CodeGen/AMDGPU/commute_modifiers.ll @@ -51,7 +51,7 @@ define amdgpu_kernel void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, fl ; FUNC-LABEL: @commute_add_lit_fabs_f32 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000 -; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[K]], |[[X]]| +; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]] ; SI: buffer_store_dword [[REG]] define amdgpu_kernel void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll b/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll index 0831d250b9e7dd2c3f0d0a3fa7287ef4683da914..8611cd080e15d05752ea79ad7a0eb5ffd109a200 100644 --- a/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll +++ b/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll @@ -25,7 +25,7 @@ define amdgpu_kernel void @fold_mi_s_and_0(i32 addrspace(1)* %out, i32 %x) #0 { } ; GCN-LABEL: {{^}}fold_mi_v_or_0: -; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]] +; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] ; GCN-NOT: [[RESULT]] ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @fold_mi_v_or_0(i32 addrspace(1)* %out) { @@ -50,7 +50,7 @@ define amdgpu_kernel void @fold_mi_s_or_0(i32 addrspace(1)* %out, i32 %x) #0 { } ; GCN-LABEL: {{^}}fold_mi_v_xor_0: -; GCN: v_mbcnt_lo_u32_b32_e64 [[RESULT:v[0-9]+]] +; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]] ; GCN-NOT: [[RESULT]] ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @fold_mi_v_xor_0(i32 addrspace(1)* %out) { @@ -86,8 +86,8 @@ define amdgpu_kernel void @fold_mi_s_not_0(i32 addrspace(1)* %out, i32 %x) #0 { } ; GCN-LABEL: {{^}}fold_mi_v_not_0: -; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} -; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} +; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} ; GCN-NEXT: v_not_b32_e32 v[[RESULT_LO]] ; GCN-NEXT: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], -1{{$}} ; GCN-NEXT: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} @@ -104,8 +104,8 @@ define amdgpu_kernel void @fold_mi_v_not_0(i64 addrspace(1)* %out) { ; GCN: buffer_load_dwordx2 ; GCN: buffer_load_dwordx2 v{{\[}}[[VREG1_LO:[0-9]+]]:[[VREG1_HI:[0-9]+]]{{\]}} -; GCN: v_bcnt_u32_b32_e64 v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} -; GCN: v_bcnt_u32_b32_e{{[0-9]+}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, 0{{$}} +; GCN: v_bcnt_u32_b32{{(_e32)*(_e64)*}} v[[RESULT_LO:[0-9]+]], v{{[0-9]+}}, v[[RESULT_LO]]{{$}} ; GCN-DAG: v_not_b32_e32 v[[RESULT_LO]], v[[RESULT_LO]] ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO]], v[[VREG1_LO]], v[[RESULT_LO]] ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], v[[VREG1_HI]] diff --git a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index d3e6c11ef908460fbda2c95901a776c8d9fefa02..79d9b169187830a2aa2d15fe4c2e7d8a829b6348 100644 --- a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -37,22 +37,21 @@ ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: {{^}}BB{{[0-9]+}}_1: ; %if ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] +; GCN: s_waitcnt lgkmcnt(0) ; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload -; GCN: s_waitcnt vmcnt(0) ; Spill val register ; GCN: v_add_i32_e32 [[VAL:v[0-9]+]], vcc, [[LOAD1]], [[RELOAD_LOAD0]] ; GCN: buffer_store_dword [[VAL]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) ; VMEM: [[ENDIF]]: ; Reload and restore exec mask +; VGPR: s_waitcnt lgkmcnt(0) ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -119,7 +118,6 @@ endif: ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]] ; GCN-NEXT: s_cbranch_execz [[END]] @@ -130,7 +128,6 @@ endif: ; GCN: v_cmp_ne_u32_e32 vcc, ; GCN: s_and_b64 vcc, exec, vcc ; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_cbranch_vccnz [[LOOP]] @@ -197,7 +194,6 @@ end: ; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_mov_b64 exec, [[CMP0]] -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; FIXME: It makes no sense to put this skip here ; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]] @@ -235,7 +231,6 @@ end: ; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}} -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN-NEXT: s_cbranch_execz [[ENDIF]] @@ -245,14 +240,12 @@ end: ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ELSE]]: ; %else ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_waitcnt vmcnt(0) expcnt(0) ; GCN-NEXT: s_branch [[FLOW]] ; GCN: [[ENDIF]]: diff --git a/test/CodeGen/AMDGPU/copy-illegal-type.ll b/test/CodeGen/AMDGPU/copy-illegal-type.ll index 026dd7ca6c870dc532be57b23ec15923ca200d97..d772d1b679369800398bdf64a83a11a321e6b16b 100644 --- a/test/CodeGen/AMDGPU/copy-illegal-type.ll +++ b/test/CodeGen/AMDGPU/copy-illegal-type.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-sdwa-peephole=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone diff --git a/test/CodeGen/AMDGPU/ctlz.ll b/test/CodeGen/AMDGPU/ctlz.ll index e252971e3f427e47acac077bfc7813d12a95e2c8..149c50685b1db3a02fe7c68a06a3bbc09c4e7df2 100644 --- a/test/CodeGen/AMDGPU/ctlz.ll +++ b/test/CodeGen/AMDGPU/ctlz.ll @@ -135,7 +135,6 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 } ; FUNC-LABEL: {{^}}v_ctlz_i64: -; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cmp_eq_u32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] @@ -145,7 +144,7 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 ; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[HI]], v[[LO]] ; GCN-DAG: v_cmp_ne_u32_e32 vcc, 0, [[OR]] ; GCN-DAG: v_cndmask_b32_e32 v[[CLTZ_LO:[0-9]+]], 64, v[[CTLZ:[0-9]+]], vcc -; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}} +; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI:[0-9]+]]{{\]}} define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index 87ba563a740f8539a75af5322e86bbd095bad0c5..48f3e4401f1a8b0136f78378ca80eb8e9f34f9af 100644 --- a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -121,8 +121,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias ; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; GCN-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]] -; GCN-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}} -; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}} +; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}} define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/ctpop.ll b/test/CodeGen/AMDGPU/ctpop.ll index a29e72ea57cb37321875d52bb1efa1a30b3c3c60..aa913ad406d2bc9af48b13859d38257b7cc6c117 100644 --- a/test/CodeGen/AMDGPU/ctpop.ll +++ b/test/CodeGen/AMDGPU/ctpop.ll @@ -25,7 +25,7 @@ define amdgpu_kernel void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) ; XXX - Why 0 in register? ; FUNC-LABEL: {{^}}v_ctpop_i32: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 0 ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -40,9 +40,9 @@ define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrs ; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32: ; GCN: buffer_load_dword [[VAL1:v[0-9]+]], ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], [[VAL1]], 0 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] -; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -61,7 +61,7 @@ define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, ; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32: ; GCN: buffer_load_dword [[VAL0:v[0-9]+]], ; GCN: s_waitcnt -; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} +; GCN-NEXT: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}} ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind { @@ -73,8 +73,8 @@ define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, } ; FUNC-LABEL: {{^}}v_ctpop_v2i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -87,10 +87,10 @@ define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, < } ; FUNC-LABEL: {{^}}v_ctpop_v4i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -105,14 +105,14 @@ define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, < } ; FUNC-LABEL: {{^}}v_ctpop_v8i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -131,22 +131,22 @@ define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, < } ; FUNC-LABEL: {{^}}v_ctpop_v16i32: -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 -; GCN: v_bcnt_u32_b32_e64 +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} +; GCN: v_bcnt_u32_b32{{(_e64)*}} ; GCN: s_endpgm ; EG: BCNT_INT @@ -174,7 +174,7 @@ define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4 ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -189,7 +189,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noa ; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv: ; GCN: buffer_load_dword [[VAL:v[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], 4 ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -206,7 +206,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] -; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { @@ -220,7 +220,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %ou ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var: ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -236,7 +236,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i ; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv: ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; GCN-DAG: s_load_dword [[VAR:s[0-9]+]], -; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -253,7 +253,7 @@ define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %ou ; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}} ; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] -; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] +; VI: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/ctpop64.ll b/test/CodeGen/AMDGPU/ctpop64.ll index 2610684ad9ee7c1009ee777ac773793004ae4a57..f18bd9fd8174b1c5fdc9e1d38397b1683932dab6 100644 --- a/test/CodeGen/AMDGPU/ctpop64.ll +++ b/test/CodeGen/AMDGPU/ctpop64.ll @@ -26,9 +26,9 @@ define amdgpu_kernel void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) ; FUNC-LABEL: {{^}}v_ctpop_i64: ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { @@ -41,9 +41,9 @@ define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrs ; FUNC-LABEL: {{^}}v_ctpop_i64_user: ; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, -; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 +; GCN: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0 ; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] -; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] +; VI-NEXT: v_bcnt_u32_b32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]] ; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]] ; GCN-DAG: v_mov_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}} ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} @@ -171,11 +171,11 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val) ; FUNC-LABEL: {{^}}v_ctpop_i128: ; GCN: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0 -; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]] +; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT0:v[0-9]+]], v{{[0-9]+}}, 0 +; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT1:v[0-9]+]], v[[VAL3]], [[MIDRESULT0]] -; GCN-DAG: v_bcnt_u32_b32_e64 [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0 -; GCN-DAG: v_bcnt_u32_b32{{_e32|_e64}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]] +; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0 +; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]] ; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]] diff --git a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index e16daa6fad9d09129b90913709c2839e6430b022..0328ce31002df9aace971435d844ea1cd52e69fc 100644 --- a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -94,7 +94,6 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1) ; GCN-DAG: v_cvt_f32_ubyte3_e32 ; GCN-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 24 -; GCN-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16 ; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16 ; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 8 diff --git a/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/test/CodeGen/AMDGPU/detect-dead-lanes.mir index 32e6f7cc0cdc71135f5952aadfbaad5a28c0d28b..3148b9b8ff9dbbf7bfabab5461907dbe80be9e1c 100644 --- a/test/CodeGen/AMDGPU/detect-dead-lanes.mir +++ b/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -294,7 +294,6 @@ registers: - { id: 5, class: sreg_128 } body: | bb.0: - successors: %bb.1 S_NOP 0, implicit-def %0 S_NOP 0, implicit-def %1 S_NOP 0, implicit-def %2 @@ -302,7 +301,6 @@ body: | S_BRANCH %bb.1 bb.1: - successors: %bb.1, %bb.2 %4 = PHI %3, %bb.0, %5, %bb.1 ; let's swiffle some lanes around for fun... @@ -348,7 +346,6 @@ registers: - { id: 6, class: sreg_128 } body: | bb.0: - successors: %bb.1 S_NOP 0, implicit-def %0 S_NOP 0, implicit-def %1 S_NOP 0, implicit-def dead %2 @@ -357,7 +354,6 @@ body: | S_BRANCH %bb.1 bb.1: - successors: %bb.1, %bb.2 %5 = PHI %4, %bb.0, %6, %bb.1 ; rotate lanes, but skip sub2 lane... @@ -396,13 +392,11 @@ registers: - { id: 3, class: sreg_128 } body: | bb.0: - successors: %bb.1 S_NOP 0, implicit-def %0 %1 = REG_SEQUENCE %0, %subreg.sub0 S_BRANCH %bb.1 bb.1: - successors: %bb.1, %bb.2 %2 = PHI %1, %bb.0, %3, %bb.1 ; rotate subreg lanes, skipping sub1 diff --git a/test/CodeGen/AMDGPU/ds-combine-large-stride.ll b/test/CodeGen/AMDGPU/ds-combine-large-stride.ll new file mode 100644 index 0000000000000000000000000000000000000000..a723b0210adebb106545a7969cd76cec8657540d --- /dev/null +++ b/test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -0,0 +1,412 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s + +; GCN-LABEL: ds_read32_combine_stride_400: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100 +define amdgpu_kernel void @ds_read32_combine_stride_400(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +bb: + %tmp = load float, float addrspace(3)* %arg, align 4 + %tmp2 = fadd float %tmp, 0.000000e+00 + %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 + %tmp4 = load float, float addrspace(3)* %tmp3, align 4 + %tmp5 = fadd float %tmp2, %tmp4 + %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 + %tmp7 = load float, float addrspace(3)* %tmp6, align 4 + %tmp8 = fadd float %tmp5, %tmp7 + %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 + %tmp10 = load float, float addrspace(3)* %tmp9, align 4 + %tmp11 = fadd float %tmp8, %tmp10 + %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 + %tmp13 = load float, float addrspace(3)* %tmp12, align 4 + %tmp14 = fadd float %tmp11, %tmp13 + %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 + %tmp16 = load float, float addrspace(3)* %tmp15, align 4 + %tmp17 = fadd float %tmp14, %tmp16 + %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 + %tmp19 = load float, float addrspace(3)* %tmp18, align 4 + %tmp20 = fadd float %tmp17, %tmp19 + %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 + %tmp22 = load float, float addrspace(3)* %tmp21, align 4 + %tmp23 = fadd float %tmp20, %tmp22 + store float %tmp23, float *%arg1, align 4 + ret void +} + +; GCN-LABEL: ds_read32_combine_stride_400_back: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 +; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:100 +define amdgpu_kernel void @ds_read32_combine_stride_400_back(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +bb: + %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 + %tmp2 = load float, float addrspace(3)* %tmp, align 4 + %tmp3 = fadd float %tmp2, 0.000000e+00 + %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 + %tmp5 = load float, float addrspace(3)* %tmp4, align 4 + %tmp6 = fadd float %tmp3, %tmp5 + %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 + %tmp8 = load float, float addrspace(3)* %tmp7, align 4 + %tmp9 = fadd float %tmp6, %tmp8 + %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 + %tmp11 = load float, float addrspace(3)* %tmp10, align 4 + %tmp12 = fadd float %tmp9, %tmp11 + %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 + %tmp14 = load float, float addrspace(3)* %tmp13, align 4 + %tmp15 = fadd float %tmp12, %tmp14 + %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 + %tmp17 = load float, float addrspace(3)* %tmp16, align 4 + %tmp18 = fadd float %tmp15, %tmp17 + %tmp19 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 + %tmp20 = load float, float addrspace(3)* %tmp19, align 4 + %tmp21 = fadd float %tmp18, %tmp20 + %tmp22 = load float, float addrspace(3)* %arg, align 4 + %tmp23 = fadd float %tmp21, %tmp22 + store float %tmp23, float *%arg1, align 4 + ret void +} + +; GCN-LABEL: ds_read32_combine_stride_8192: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:32 +; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:64 offset1:96 +; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:128 offset1:160 +; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:192 offset1:224 +define amdgpu_kernel void @ds_read32_combine_stride_8192(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +bb: + %tmp = load float, float addrspace(3)* %arg, align 4 + %tmp2 = fadd float %tmp, 0.000000e+00 + %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048 + %tmp4 = load float, float addrspace(3)* %tmp3, align 4 + %tmp5 = fadd float %tmp2, %tmp4 + %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096 + %tmp7 = load float, float addrspace(3)* %tmp6, align 4 + %tmp8 = fadd float %tmp5, %tmp7 + %tmp9 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144 + %tmp10 = load float, float addrspace(3)* %tmp9, align 4 + %tmp11 = fadd float %tmp8, %tmp10 + %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192 + %tmp13 = load float, float addrspace(3)* %tmp12, align 4 + %tmp14 = fadd float %tmp11, %tmp13 + %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240 + %tmp16 = load float, float addrspace(3)* %tmp15, align 4 + %tmp17 = fadd float %tmp14, %tmp16 + %tmp18 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288 + %tmp19 = load float, float addrspace(3)* %tmp18, align 4 + %tmp20 = fadd float %tmp17, %tmp19 + %tmp21 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336 + %tmp22 = load float, float addrspace(3)* %tmp21, align 4 + %tmp23 = fadd float %tmp20, %tmp22 + store float %tmp23, float *%arg1, align 4 + ret void +} + +; GCN-LABEL: ds_read32_combine_stride_8192_shifted: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 +; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32 +; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32 +define amdgpu_kernel void @ds_read32_combine_stride_8192_shifted(float addrspace(3)* nocapture readonly %arg, float *nocapture %arg1) { +bb: + %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2 + %tmp2 = load float, float addrspace(3)* %tmp, align 4 + %tmp3 = fadd float %tmp2, 0.000000e+00 + %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2050 + %tmp5 = load float, float addrspace(3)* %tmp4, align 4 + %tmp6 = fadd float %tmp3, %tmp5 + %tmp7 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4098 + %tmp8 = load float, float addrspace(3)* %tmp7, align 4 + %tmp9 = fadd float %tmp6, %tmp8 + %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6146 + %tmp11 = load float, float addrspace(3)* %tmp10, align 4 + %tmp12 = fadd float %tmp9, %tmp11 + %tmp13 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8194 + %tmp14 = load float, float addrspace(3)* %tmp13, align 4 + %tmp15 = fadd float %tmp12, %tmp14 + %tmp16 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10242 + %tmp17 = load float, float addrspace(3)* %tmp16, align 4 + %tmp18 = fadd float %tmp15, %tmp17 + store float %tmp18, float *%arg1, align 4 + ret void +} + +; GCN-LABEL: ds_read64_combine_stride_400: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50 +; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150 +; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250 +; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:50 +define amdgpu_kernel void @ds_read64_combine_stride_400(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { +bb: + %tmp = load double, double addrspace(3)* %arg, align 8 + %tmp2 = fadd double %tmp, 0.000000e+00 + %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 50 + %tmp4 = load double, double addrspace(3)* %tmp3, align 8 + %tmp5 = fadd double %tmp2, %tmp4 + %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100 + %tmp7 = load double, double addrspace(3)* %tmp6, align 8 + %tmp8 = fadd double %tmp5, %tmp7 + %tmp9 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150 + %tmp10 = load double, double addrspace(3)* %tmp9, align 8 + %tmp11 = fadd double %tmp8, %tmp10 + %tmp12 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200 + %tmp13 = load double, double addrspace(3)* %tmp12, align 8 + %tmp14 = fadd double %tmp11, %tmp13 + %tmp15 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250 + %tmp16 = load double, double addrspace(3)* %tmp15, align 8 + %tmp17 = fadd double %tmp14, %tmp16 + %tmp18 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300 + %tmp19 = load double, double addrspace(3)* %tmp18, align 8 + %tmp20 = fadd double %tmp17, %tmp19 + %tmp21 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350 + %tmp22 = load double, double addrspace(3)* %tmp21, align 8 + %tmp23 = fadd double %tmp20, %tmp22 + store double %tmp23, double *%arg1, align 8 + ret void +} + +; GCN-LABEL: ds_read64_combine_stride_8192_shifted: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 +; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16 +; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16 +define amdgpu_kernel void @ds_read64_combine_stride_8192_shifted(double addrspace(3)* nocapture readonly %arg, double *nocapture %arg1) { +bb: + %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1 + %tmp2 = load double, double addrspace(3)* %tmp, align 8 + %tmp3 = fadd double %tmp2, 0.000000e+00 + %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025 + %tmp5 = load double, double addrspace(3)* %tmp4, align 8 + %tmp6 = fadd double %tmp3, %tmp5 + %tmp7 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049 + %tmp8 = load double, double addrspace(3)* %tmp7, align 8 + %tmp9 = fadd double %tmp6, %tmp8 + %tmp10 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073 + %tmp11 = load double, double addrspace(3)* %tmp10, align 8 + %tmp12 = fadd double %tmp9, %tmp11 + %tmp13 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097 + %tmp14 = load double, double addrspace(3)* %tmp13, align 8 + %tmp15 = fadd double %tmp12, %tmp14 + %tmp16 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121 + %tmp17 = load double, double addrspace(3)* %tmp16, align 8 + %tmp18 = fadd double %tmp15, %tmp17 + store double %tmp18, double *%arg1, align 8 + ret void +} + +; GCN-LABEL: ds_write32_combine_stride_400: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +define amdgpu_kernel void @ds_write32_combine_stride_400(float addrspace(3)* nocapture %arg) { +bb: + store float 1.000000e+00, float addrspace(3)* %arg, align 4 + %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 + store float 1.000000e+00, float addrspace(3)* %tmp, align 4 + %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 + store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 + %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 + store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 + %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 + store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 + %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 + store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 + %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 + store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 + %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 + store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 + ret void +} + +; GCN-LABEL: ds_write32_combine_stride_400_back: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +; GCN-DAG: ds_write2_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 +define amdgpu_kernel void @ds_write32_combine_stride_400_back(float addrspace(3)* nocapture %arg) { +bb: + %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 700 + store float 1.000000e+00, float addrspace(3)* %tmp, align 4 + %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 600 + store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 + %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 500 + store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 + %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 400 + store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 + %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 300 + store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 + %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 200 + store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 + %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 100 + store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 + store float 1.000000e+00, float addrspace(3)* %arg, align 4 + ret void +} + +; GCN-LABEL: ds_write32_combine_stride_8192: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 +; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:64 offset1:96 +; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:128 offset1:160 +; GCN-DAG: ds_write2st64_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset0:192 offset1:224 +define amdgpu_kernel void @ds_write32_combine_stride_8192(float addrspace(3)* nocapture %arg) { +bb: + store float 1.000000e+00, float addrspace(3)* %arg, align 4 + %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 2048 + store float 1.000000e+00, float addrspace(3)* %tmp, align 4 + %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4096 + store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 + %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6144 + store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 + %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8192 + store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 + %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10240 + store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 + %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 12288 + store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 + %tmp6 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14336 + store float 1.000000e+00, float addrspace(3)* %tmp6, align 4 + ret void +} + +; GCN-LABEL: ds_write32_combine_stride_8192_shifted: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] +; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 +; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 +; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 +define amdgpu_kernel void @ds_write32_combine_stride_8192_shifted(float addrspace(3)* nocapture %arg) { +bb: + %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1 + store float 1.000000e+00, float addrspace(3)* %tmp, align 4 + %tmp1 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2049 + store float 1.000000e+00, float addrspace(3)* %tmp1, align 4 + %tmp2 = getelementptr inbounds float, float addrspace(3)* %arg, i32 4097 + store float 1.000000e+00, float addrspace(3)* %tmp2, align 4 + %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6145 + store float 1.000000e+00, float addrspace(3)* %tmp3, align 4 + %tmp4 = getelementptr inbounds float, float addrspace(3)* %arg, i32 8193 + store float 1.000000e+00, float addrspace(3)* %tmp4, align 4 + %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10241 + store float 1.000000e+00, float addrspace(3)* %tmp5, align 4 + ret void +} + +; GCN-LABEL: ds_write64_combine_stride_400: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 +; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150 +; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250 +; GCN-DAG: ds_write2_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 +define amdgpu_kernel void @ds_write64_combine_stride_400(double addrspace(3)* nocapture %arg) { +bb: + store double 1.000000e+00, double addrspace(3)* %arg, align 8 + %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 50 + store double 1.000000e+00, double addrspace(3)* %tmp, align 8 + %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 100 + store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 + %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 150 + store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 + %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 200 + store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 + %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 250 + store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 + %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 300 + store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 + %tmp6 = getelementptr inbounds double, double addrspace(3)* %arg, i32 350 + store double 1.000000e+00, double addrspace(3)* %tmp6, align 8 + ret void +} + +; GCN-LABEL: ds_write64_combine_stride_8192_shifted: +; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 +; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] +; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 +; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 +; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 +define amdgpu_kernel void @ds_write64_combine_stride_8192_shifted(double addrspace(3)* nocapture %arg) { +bb: + %tmp = getelementptr inbounds double, double addrspace(3)* %arg, i32 1 + store double 1.000000e+00, double addrspace(3)* %tmp, align 8 + %tmp1 = getelementptr inbounds double, double addrspace(3)* %arg, i32 1025 + store double 1.000000e+00, double addrspace(3)* %tmp1, align 8 + %tmp2 = getelementptr inbounds double, double addrspace(3)* %arg, i32 2049 + store double 1.000000e+00, double addrspace(3)* %tmp2, align 8 + %tmp3 = getelementptr inbounds double, double addrspace(3)* %arg, i32 3073 + store double 1.000000e+00, double addrspace(3)* %tmp3, align 8 + %tmp4 = getelementptr inbounds double, double addrspace(3)* %arg, i32 4097 + store double 1.000000e+00, double addrspace(3)* %tmp4, align 8 + %tmp5 = getelementptr inbounds double, double addrspace(3)* %arg, i32 5121 + store double 1.000000e+00, double addrspace(3)* %tmp5, align 8 + ret void +} diff --git a/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/test/CodeGen/AMDGPU/ds_read2_superreg.ll index fc85ec06f58df6e9f68baebc38a865e4bb752d32..3dfdaf3936a64b780e5454ad3fa653e2d47b6e24 100644 --- a/test/CodeGen/AMDGPU/ds_read2_superreg.ll +++ b/test/CodeGen/AMDGPU/ds_read2_superreg.ll @@ -150,7 +150,7 @@ define amdgpu_kernel void @simple_read2_v16f32_superreg(<16 x float> addrspace(1 ; Do scalar loads into the super register we need. ; CI-LABEL: {{^}}simple_read2_v2f32_superreg_scalar_loads_align4: ; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}} -; CI-NOT: v_mov +; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}} ; CI: buffer_store_dwordx2 v{{\[}}[[REG_ELT0]]:[[REG_ELT1]]{{\]}} ; CI: s_endpgm define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x float> addrspace(1)* %out) #0 { @@ -173,7 +173,7 @@ define amdgpu_kernel void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x ; CI-LABEL: {{^}}simple_read2_v4f32_superreg_scalar_loads_align4: ; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT0:[0-9]+]]:[[REG_ELT1:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}} ; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT2:[0-9]+]]:[[REG_ELT3:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} -; CI-NOT: v_mov +; CI-NOT: v_mov {{v[0-9]+}}, {{[sv][0-9]+}} ; CI: buffer_store_dwordx4 v{{\[}}[[REG_ELT0]]:[[REG_ELT3]]{{\]}} ; CI: s_endpgm define amdgpu_kernel void @simple_read2_v4f32_superreg_scalar_loads_align4(<4 x float> addrspace(1)* %out) #0 { diff --git a/test/CodeGen/AMDGPU/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll index ab1cf0ba25b5fdc12688e19000a7f142983269bc..0f49919a1d109e474adbbf9f30862a114278585d 100644 --- a/test/CodeGen/AMDGPU/ds_write2.ll +++ b/test/CodeGen/AMDGPU/ds_write2.ll @@ -266,8 +266,8 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* } ; SI-LABEL: @simple_write2_one_val_f64 -; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], -; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} +; SI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], +; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}} ; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8 ; SI: s_endpgm define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { diff --git a/test/CodeGen/AMDGPU/endcf-loop-header.ll b/test/CodeGen/AMDGPU/endcf-loop-header.ll index bd861e0c663edbfe1323dccb66084658f777be3d..3ae74abcb6cb7c7081a8ecff472769efd4632b75 100644 --- a/test/CodeGen/AMDGPU/endcf-loop-header.ll +++ b/test/CodeGen/AMDGPU/endcf-loop-header.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s ; This tests that the llvm.SI.end.cf intrinsic is not inserted into the ; loop block. This intrinsic will be lowered to s_or_b64 by the code @@ -14,7 +14,7 @@ ; CHECK: s_cbranch_execnz [[LOOP_LABEL]] define amdgpu_kernel void @test(i32 addrspace(1)* %out) { entry: - %cond = call i32 @llvm.r600.read.tidig.x() #0 + %cond = call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp0 = icmp eq i32 %cond, 0 br i1 %tmp0, label %if, label %loop @@ -34,6 +34,6 @@ done: ret void } -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 -attributes #0 = { readnone } +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll index 40d115bfc06065b86be3c1cd61fdeb1ec6958712..13aafc24895df43f4311c09363eeddb26a8afd5b 100644 --- a/test/CodeGen/AMDGPU/exceed-max-sgprs.ll +++ b/test/CodeGen/AMDGPU/exceed-max-sgprs.ll @@ -2,97 +2,97 @@ ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_tahiti define amdgpu_kernel void @use_too_many_sgprs_tahiti() #0 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } ; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire define amdgpu_kernel void @use_too_many_sgprs_bonaire() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () ret void } -; ERROR: error: scalar registers limit of 104 exceeded (106) in use_too_many_sgprs_bonaire_flat_scr +; ERROR: error: scalar registers limit of 104 exceeded (108) in use_too_many_sgprs_bonaire_flat_scr define amdgpu_kernel void @use_too_many_sgprs_bonaire_flat_scr() #1 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103}" () - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{FLAT_SCR}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:103]}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{flat_scratch}" () ret void } ; ERROR: error: scalar registers limit of 96 exceeded (98) in use_too_many_sgprs_iceland define amdgpu_kernel void @use_too_many_sgprs_iceland() #2 { - call void asm sideeffect "", "~{VCC}" () - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () + call void asm sideeffect "", "~{vcc}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () ret void } ; ERROR: error: addressable scalar registers limit of 102 exceeded (103) in use_too_many_sgprs_fiji define amdgpu_kernel void @use_too_many_sgprs_fiji() #3 { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{SGPR96_SGPR97_SGPR98_SGPR99}" () - call void asm sideeffect "", "~{SGPR100_SGPR101}" () - call void asm sideeffect "", "~{SGPR102}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{s[96:99]}" () + call void asm sideeffect "", "~{s[100:101]}" () + call void asm sideeffect "", "~{s102}" () ret void } diff --git a/test/CodeGen/AMDGPU/extload-align.ll b/test/CodeGen/AMDGPU/extload-align.ll index 4644800421d86e0ef21230417194c69ee481a2e7..12cf27b918afcc1af67cf1d4266540f4870a38f0 100644 --- a/test/CodeGen/AMDGPU/extload-align.ll +++ b/test/CodeGen/AMDGPU/extload-align.ll @@ -1,4 +1,4 @@ -; RUN: llc -debug-only=misched -march=amdgcn -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s +; RUN: llc -debug-only=machine-scheduler -march=amdgcn -verify-machineinstrs %s -o - 2>&1| FileCheck -check-prefix=SI-NOHSA -check-prefix=FUNC -check-prefix=DEBUG %s ; REQUIRES: asserts ; Verify that the extload generated from %eval has the default @@ -20,4 +20,4 @@ define amdgpu_kernel void @extload_align(i32* %out, i32 %index) #0 { %eval = sext i16 %val to i32 store i32 %eval, i32* %out ret void -} \ No newline at end of file +} diff --git a/test/CodeGen/AMDGPU/fabs.f16.ll b/test/CodeGen/AMDGPU/fabs.f16.ll index d4ef7124a334c77ceaadf518f98c57de18129f25..4e2ec4b3054fed2c63d33ec7b1e65b0ef0abeff0 100644 --- a/test/CodeGen/AMDGPU/fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fabs.f16.ll @@ -40,7 +40,7 @@ define amdgpu_kernel void @s_fabs_f16(half addrspace(1)* %out, half %in) { ; VI: flat_load_ushort [[LO:v[0-9]+]] ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} ; VI-DAG: v_and_b32_e32 [[FABS_LO:v[0-9]+]], [[MASK]], [[HI]] -; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[MASK]], [[LO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa [[FABS_HI:v[0-9]+]], [[LO]], [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, [[FABS_HI]], [[FABS_LO]] ; VI: flat_store_dword @@ -60,8 +60,8 @@ define amdgpu_kernel void @s_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half ; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]] ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}} -; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[MASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -128,7 +128,7 @@ define amdgpu_kernel void @fabs_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_cvt_f16_f32 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} +; VI: v_mul_f16_sdwa v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_mul_f16_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} ; GFX9: v_and_b32_e32 [[FABS:v[0-9]+]], 0x7fff7fff, [[VAL]] diff --git a/test/CodeGen/AMDGPU/fabs.f64.ll b/test/CodeGen/AMDGPU/fabs.f64.ll index 998e02f7bdf84f5ccb6cf7619f7b5ee69baf16e8..718176b80f0fbea0373c9a72a61e349e256ef398 100644 --- a/test/CodeGen/AMDGPU/fabs.f64.ll +++ b/test/CodeGen/AMDGPU/fabs.f64.ll @@ -55,7 +55,7 @@ define amdgpu_kernel void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x doub ; SI-LABEL: {{^}}fabs_fold_f64: ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-NOT: and -; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]| +; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}} ; SI: s_endpgm define amdgpu_kernel void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { %fabs = call double @llvm.fabs.f64(double %in0) @@ -67,7 +67,7 @@ define amdgpu_kernel void @fabs_fold_f64(double addrspace(1)* %out, double %in0, ; SI-LABEL: {{^}}fabs_fn_fold_f64: ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-NOT: and -; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]| +; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}} ; SI: s_endpgm define amdgpu_kernel void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { %fabs = call double @fabs(double %in0) diff --git a/test/CodeGen/AMDGPU/fabs.ll b/test/CodeGen/AMDGPU/fabs.ll index ac8fa3e45ef51e14211f5af8fc7680382ac357e5..600c6cd8230eba926f8b0a44aec638fc2ed3fa5c 100644 --- a/test/CodeGen/AMDGPU/fabs.ll +++ b/test/CodeGen/AMDGPU/fabs.ll @@ -75,7 +75,7 @@ define amdgpu_kernel void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; GCN-NOT: and -; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]| +; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @fabs(float %in0) %fmul = fmul float %fabs, %in1 @@ -87,7 +87,7 @@ define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, fl ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; GCN-NOT: and -; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]| +; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} define amdgpu_kernel void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @llvm.fabs.f32(float %in0) %fmul = fmul float %fabs, %in1 diff --git a/test/CodeGen/AMDGPU/fadd.f16.ll b/test/CodeGen/AMDGPU/fadd.f16.ll index f76ecf58d9052fa5dd99b5c6db5ecca2c79e44ef..08199be144f4914b8d860e62587875faf9f77a04 100644 --- a/test/CodeGen/AMDGPU/fadd.f16.ll +++ b/test/CodeGen/AMDGPU/fadd.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_add_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -96,9 +96,9 @@ entry: } ; GCN-LABEL: {{^}}fadd_v2f16_imm_a: -; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] ; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], 1.0, v[[B_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] @@ -107,9 +107,9 @@ entry: ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] -; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]] +; VI-DAG: v_mov_b32_e32 v[[CONST2:[0-9]+]], 0x4000 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]] -; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -125,9 +125,9 @@ entry: } ; GCN-LABEL: {{^}}fadd_v2f16_imm_b: -; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; SI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI: v_add_f32_e32 v[[R_F32_0:[0-9]+]], 2.0, v[[A_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] @@ -136,10 +136,10 @@ entry: ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] -; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[A_F16_1]] +; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00 +; VI-DAG: v_add_f16_sdwa v[[R_F16_0:[0-9]+]], v[[A_V2_F16]], v[[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[A_V2_F16]] -; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_0]] -; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_1]] +; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fadd64.ll b/test/CodeGen/AMDGPU/fadd64.ll index 7eb7747de215cea1601b4e29fb3f3fb4391f4bbf..c936d98673ba1dbdd9ac2384950e6f2dc5acfe3a 100644 --- a/test/CodeGen/AMDGPU/fadd64.ll +++ b/test/CodeGen/AMDGPU/fadd64.ll @@ -13,7 +13,7 @@ define amdgpu_kernel void @v_fadd_f64(double addrspace(1)* %out, double addrspac } ; CHECK-LABEL: {{^}}s_fadd_f64: -; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) { %r2 = fadd double %r0, %r1 store double %r2, double addrspace(1)* %out diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index f2686a5582dc6f052979eed2ca1bfb60aaede6d5..404358f0ecb98e5b4c9e58e4c6c5aa6ec3e6bfc3 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s declare half @llvm.fabs.f16(half) #0 declare half @llvm.canonicalize.f16(half) #0 @@ -205,9 +205,9 @@ define amdgpu_kernel void @test_fold_canonicalize_snan3_value_f16(half addrspace } ; GCN-LABEL: {{^}}v_test_canonicalize_var_v2f16: -; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, {{v[0-9]+}} +; VI: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00 +; VI-DAG: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, {{v[0-9]+}} -; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI-NOT: v_and_b32 ; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+$}} @@ -223,7 +223,8 @@ define amdgpu_kernel void @v_test_canonicalize_var_v2f16(<2 x half> addrspace(1) ; GCN-LABEL: {{^}}v_test_canonicalize_fabs_var_v2f16: ; VI-DAG: v_bfe_u32 ; VI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, v{{[0-9]+}} -; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, v{{[0-9]+}} +; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00 +; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}} ; VI-NOT: 0xffff ; VI: v_or_b32 @@ -240,9 +241,10 @@ define amdgpu_kernel void @v_test_canonicalize_fabs_var_v2f16(<2 x half> addrspa } ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_fabs_var_v2f16: -; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}} -; VI: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, v{{[0-9]+}} -; VI: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}} +; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00 +; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}} +; VI-DAG: v_mul_f16_sdwa [[REG0:v[0-9]+]], v[[CONST1]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, v{{[0-9]+}} ; VI: v_or_b32 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, v{{[0-9]+}} @@ -259,11 +261,10 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_v2f16(<2 x half> ad ; FIXME: Fold modifier ; GCN-LABEL: {{^}}v_test_canonicalize_fneg_var_v2f16: -; VI: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}} -; VI-DAG: v_lshrrev_b32_e32 [[FNEG_HI:v[0-9]+]], 16, [[FNEG]] -; VI-DAG: v_mul_f16_e32 [[REG1:v[0-9]+]], 1.0, [[FNEG_HI]] +; VI-DAG: v_mov_b32_e32 v[[CONST1:[0-9]+]], 0x3c00 +; VI-DAG: v_xor_b32_e32 [[FNEG:v[0-9]+]], 0x80008000, v{{[0-9]+}} +; VI-DAG: v_mul_f16_sdwa [[REG1:v[0-9]+]], v[[CONST1]], [[FNEG]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_mul_f16_e32 [[REG0:v[0-9]+]], 1.0, [[FNEG]] -; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI-NOT: 0xffff ; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{v[0-9]+}} neg_lo:[0,1] neg_hi:[0,1]{{$}} @@ -277,9 +278,9 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa } ; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16: -; VI: v_mul_f16_e64 [[REG0:v[0-9]+]], 1.0, {{s[0-9]+}} -; VI-DAG: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}} -; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, +; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 0x3c00 +; VI: v_mul_f16_sdwa [[REG0:v[0-9]+]], [[ONE]], {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_mul_f16_e64 [[REG1:v[0-9]+]], 1.0, {{s[0-9]+}} ; VI-NOT: v_and_b32 ; GFX9: v_pk_mul_f16 [[REG:v[0-9]+]], 1.0, {{s[0-9]+$}} diff --git a/test/CodeGen/AMDGPU/fdiv.ll b/test/CodeGen/AMDGPU/fdiv.ll index b3a2b664372071a3b1301900104a2965283a9d0c..738a5adba14fb31e58ce83c732e13dc53931c6cf 100644 --- a/test/CodeGen/AMDGPU/fdiv.ll +++ b/test/CodeGen/AMDGPU/fdiv.ll @@ -85,10 +85,20 @@ entry: } ; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32: -; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} -; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] -; GCN-NOT: [[RESULT]] -; GCN: buffer_store_dword [[RESULT]] +; GCN: v_div_scale_f32 [[NUM_SCALE:v[0-9]+]] +; GCN-DAG: v_div_scale_f32 [[DEN_SCALE:v[0-9]+]] +; GCN-DAG: v_rcp_f32_e32 [[NUM_RCP:v[0-9]+]], [[NUM_SCALE]] + +; GCN-NOT: s_setreg +; GCN: v_fma_f32 [[A:v[0-9]+]], -[[NUM_SCALE]], [[NUM_RCP]], 1.0 +; GCN: v_fma_f32 [[B:v[0-9]+]], [[A]], [[NUM_RCP]], [[NUM_RCP]] +; GCN: v_mul_f32_e32 [[C:v[0-9]+]], [[B]], [[DEN_SCALE]] +; GCN: v_fma_f32 [[D:v[0-9]+]], -[[NUM_SCALE]], [[C]], [[DEN_SCALE]] +; GCN: v_fma_f32 [[E:v[0-9]+]], [[D]], [[B]], [[C]] +; GCN: v_fma_f32 [[F:v[0-9]+]], -[[NUM_SCALE]], [[E]], [[DEN_SCALE]] +; GCN-NOT: s_setreg +; GCN: v_div_fmas_f32 [[FMAS:v[0-9]+]], [[F]], [[B]], [[E]] +; GCN: v_div_fixup_f32 v{{[0-9]+}}, [[FMAS]], define amdgpu_kernel void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { entry: %fdiv = fdiv fast float %a, %b diff --git a/test/CodeGen/AMDGPU/fence-amdgiz.ll b/test/CodeGen/AMDGPU/fence-amdgiz.ll new file mode 100644 index 0000000000000000000000000000000000000000..df675c9a8692e05db12905f36d4c8cf3a670831f --- /dev/null +++ b/test/CodeGen/AMDGPU/fence-amdgiz.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +target triple = "amdgcn-amd-amdhsa-amdgizcl" + +; CHECK_LABEL: atomic_fence +; CHECK: BB#0: +; CHECK: ATOMIC_FENCE 4, 1 +; CHECK: s_endpgm + +define amdgpu_kernel void @atomic_fence() { + fence acquire + ret void +} + diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll index c867e4fca229503488a2f58ff6669554fd525178..e486b9c71a54d34c718e190e45f526089f6d7671 100644 --- a/test/CodeGen/AMDGPU/flat-address-space.ll +++ b/test/CodeGen/AMDGPU/flat-address-space.ll @@ -1,6 +1,7 @@ -; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck %s -; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s +; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s ; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s ; Disable optimizations in case there are optimizations added that ; specialize away generic pointer accesses. @@ -172,6 +173,55 @@ define amdgpu_kernel void @flat_scratch_multidword_store() { ret void } +; CHECK-LABEL: {{^}}store_flat_i8_max_offset: +; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}} +; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4095{{$}} +define amdgpu_kernel void @store_flat_i8_max_offset(i8 addrspace(4)* %fptr, i8 %x) #0 { + %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095 + store volatile i8 %x, i8 addrspace(4)* %fptr.offset + ret void +} + +; CHECK-LABEL: {{^}}store_flat_i8_max_offset_p1: +; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr, i8 %x) #0 { + %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096 + store volatile i8 %x, i8 addrspace(4)* %fptr.offset + ret void +} + +; CHECK-LABEL: {{^}}store_flat_i8_neg_offset: +; CHECK: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @store_flat_i8_neg_offset(i8 addrspace(4)* %fptr, i8 %x) #0 { + %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2 + store volatile i8 %x, i8 addrspace(4)* %fptr.offset + ret void +} + +; CHECK-LABEL: {{^}}load_flat_i8_max_offset: +; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}} +; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4095{{$}} +define amdgpu_kernel void @load_flat_i8_max_offset(i8 addrspace(4)* %fptr) #0 { + %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4095 + %val = load volatile i8, i8 addrspace(4)* %fptr.offset + ret void +} + +; CHECK-LABEL: {{^}}load_flat_i8_max_offset_p1: +; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}} +define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8 addrspace(4)* %fptr) #0 { + %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 4096 + %val = load volatile i8, i8 addrspace(4)* %fptr.offset + ret void +} + +; CHECK-LABEL: {{^}}load_flat_i8_neg_offset: +; CHECK: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}} +define amdgpu_kernel void @load_flat_i8_neg_offset(i8 addrspace(4)* %fptr) #0 { + %fptr.offset = getelementptr inbounds i8, i8 addrspace(4)* %fptr, i64 -2 + %val = load volatile i8, i8 addrspace(4)* %fptr.offset + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind convergent } -attributes #3 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll index 23f40daf3d237b7123c482f04af3f9a239614820..a7664c399fbb0c505ffa1d82e71a65b4a0fd7f32 100644 --- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -21,7 +21,7 @@ ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @no_vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7}"() + call void asm sideeffect "", "~{s7}"() ret void } @@ -35,7 +35,7 @@ entry: ; VI-XNACK: ; NumSgprs: 12 define amdgpu_kernel void @vcc_no_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC}"() + call void asm sideeffect "", "~{s7},~{vcc}"() ret void } @@ -44,15 +44,15 @@ entry: ; HSA-VI-NOXNACK: is_xnack_enabled = 0 ; HSA-VI-XNACK: is_xnack_enabled = 1 -; CI: ; NumSgprs: 8 -; VI-NOXNACK: ; NumSgprs: 8 -; VI-XNACK: ; NumSgprs: 12 -; HSA-CI: ; NumSgprs: 8 -; HSA-VI-NOXNACK: ; NumSgprs: 8 -; HSA-VI-XNACK: ; NumSgprs: 12 +; CI: ; NumSgprs: 12 +; VI-NOXNACK: ; NumSgprs: 14 +; VI-XNACK: ; NumSgprs: 14 +; HSA-CI: ; NumSgprs: 12 +; HSA-VI-NOXNACK: ; NumSgprs: 14 +; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @no_vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{flat_scratch}"() ret void } @@ -60,14 +60,49 @@ entry: ; HSA-NOXNACK: is_xnack_enabled = 0 ; HSA-XNACK: is_xnack_enabled = 1 -; CI: ; NumSgprs: 10 -; VI-NOXNACK: ; NumSgprs: 10 -; VI-XNACK: ; NumSgprs: 12 -; HSA-CI: ; NumSgprs: 10 -; HSA-VI-NOXNACK: ; NumSgprs: 10 -; HSA-VI-XNACK: ; NumSgprs: 12 +; CI: ; NumSgprs: 12 +; VI-NOXNACK: ; NumSgprs: 14 +; VI-XNACK: ; NumSgprs: 14 +; HSA-CI: ; NumSgprs: 12 +; HSA-VI-NOXNACK: ; NumSgprs: 14 +; HSA-VI-XNACK: ; NumSgprs: 14 define amdgpu_kernel void @vcc_flat() { entry: - call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"() + call void asm sideeffect "", "~{s7},~{vcc},~{flat_scratch}"() + ret void +} + +; Make sure used SGPR count for flat_scr is correct when there is no +; scratch usage and implicit flat uses. + +; GCN-LABEL: {{^}}use_flat_scr: +; CI: NumSgprs: 4 +; VI-NOXNACK: NumSgprs: 6 +; VI-XNACK: NumSgprs: 6 +define amdgpu_kernel void @use_flat_scr() #0 { +entry: + call void asm sideeffect "; clobber ", "~{flat_scratch}"() + ret void +} + +; GCN-LABEL: {{^}}use_flat_scr_lo: +; CI: NumSgprs: 4 +; VI-NOXNACK: NumSgprs: 6 +; VI-XNACK: NumSgprs: 6 +define amdgpu_kernel void @use_flat_scr_lo() #0 { +entry: + call void asm sideeffect "; clobber ", "~{flat_scratch_lo}"() ret void } + +; GCN-LABEL: {{^}}use_flat_scr_hi: +; CI: NumSgprs: 4 +; VI-NOXNACK: NumSgprs: 6 +; VI-XNACK: NumSgprs: 6 +define amdgpu_kernel void @use_flat_scr_hi() #0 { +entry: + call void asm sideeffect "; clobber ", "~{flat_scratch_hi}"() + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/flat_atomics.ll b/test/CodeGen/AMDGPU/flat_atomics.ll index cc95d80570e09f1e0bfef109987a2f6c2e8267f9..8e153181decb7cd9200bcd09443e30f3f8b72f8a 100644 --- a/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/test/CodeGen/AMDGPU/flat_atomics.ll @@ -1,8 +1,10 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}atomic_add_i32_offset: -; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -10,8 +12,28 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_add_i32_max_offset: +; CIVI: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; GFX9: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:4092{{$}} +define amdgpu_kernel void @atomic_add_i32_max_offset(i32 addrspace(4)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1023 + %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_add_i32_max_offset_p1: +; GCN: flat_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +define amdgpu_kernel void @atomic_add_i32_max_offset_p1(i32 addrspace(4)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 1024 + %val = atomicrmw volatile add i32 addrspace(4)* %gep, i32 %in seq_cst + ret void +} + ; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: -; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -22,7 +44,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: -; GCN: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -32,7 +55,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: -; GCN: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -82,7 +106,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_and_i32_offset: -; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -91,7 +116,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: -; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -102,7 +128,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: -; GCN: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -112,7 +139,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: -; GCN: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -162,7 +190,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_sub_i32_offset: -; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -171,7 +200,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: -; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -182,7 +212,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: -; GCN: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -192,7 +223,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: -; GCN: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -242,7 +274,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_max_i32_offset: -; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -251,7 +284,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: -; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -262,7 +296,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: -; GCN: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -272,7 +307,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: -; GCN: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -322,7 +358,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umax_i32_offset: -; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -331,7 +368,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: -; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -342,7 +380,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: -; GCN: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -352,7 +391,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: -; GCN: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -402,7 +442,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_min_i32_offset: -; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -411,7 +452,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: -; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -422,7 +464,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: -; GCN: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -432,7 +475,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: -; GCN: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -482,7 +526,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umin_i32_offset: -; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -491,7 +536,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: -; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -502,7 +548,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: -; GCN: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -512,7 +559,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: -; GCN: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -562,7 +610,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_or_i32_offset: -; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}} +; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -571,7 +620,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: -; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -582,7 +632,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: -; GCN: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}} +; CIVI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -592,7 +643,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: -; GCN: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -642,7 +694,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xchg_i32_offset: -; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}} +; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -651,7 +704,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: -; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -662,7 +716,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: -; GCN: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}} +; CIVI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -672,7 +727,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: -; GCN: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -724,7 +780,8 @@ entry: ; CMP_SWAP ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: -; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(4)* %out, i32 %in, i32 %old) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -733,7 +790,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: -; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i32 %old) { entry: @@ -745,7 +803,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: -; GCN: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CIVI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GFX9: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index, i32 %old) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -755,7 +814,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: -; GCN: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; CIVI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GFX9: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RET]] define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index, i32 %old) { entry: @@ -808,7 +868,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xor_i32_offset: -; GCN: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CIVI: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; GFX9: flat_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(4)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -817,7 +878,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: -; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in) { entry: @@ -828,7 +890,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: -; GCN: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; CIVI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(4)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -838,7 +901,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: -; GCN: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; CIVI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %out2, i32 %in, i64 %index) { entry: @@ -888,7 +952,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_load_i32_offset: -; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -909,7 +974,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: -; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(4)* %in, i32 addrspace(4)* %out, i64 %index) { entry: @@ -932,7 +998,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_store_i32_offset: -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -949,7 +1016,8 @@ entry: } ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index diff --git a/test/CodeGen/AMDGPU/fmax3.ll b/test/CodeGen/AMDGPU/fmax3.ll index a96eb5db9e2a25dd0aeb80049191e319d6486c6b..2e6d3f3c1e8f4496ab4b872c196167e564d57e69 100644 --- a/test/CodeGen/AMDGPU/fmax3.ll +++ b/test/CodeGen/AMDGPU/fmax3.ll @@ -1,39 +1,92 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s - -declare float @llvm.maxnum.f32(float, float) nounwind readnone - -; SI-LABEL: {{^}}test_fmax3_olt_0: -; SI: buffer_load_dword [[REGC:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm -define amdgpu_kernel void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind { +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}test_fmax3_olt_0_f32: +; GCN: buffer_load_dword [[REGC:v[0-9]+]] +; GCN: buffer_load_dword [[REGB:v[0-9]+]] +; GCN: buffer_load_dword [[REGA:v[0-9]+]] +; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @test_fmax3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 { %a = load volatile float, float addrspace(1)* %aptr, align 4 %b = load volatile float, float addrspace(1)* %bptr, align 4 %c = load volatile float, float addrspace(1)* %cptr, align 4 - %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone - %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone + %f0 = call float @llvm.maxnum.f32(float %a, float %b) + %f1 = call float @llvm.maxnum.f32(float %f0, float %c) store float %f1, float addrspace(1)* %out, align 4 ret void } ; Commute operand of second fmax -; SI-LABEL: {{^}}test_fmax3_olt_1: -; SI: buffer_load_dword [[REGB:v[0-9]+]] -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGC:v[0-9]+]] -; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm -define amdgpu_kernel void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind { +; GCN-LABEL: {{^}}test_fmax3_olt_1_f32: +; GCN: buffer_load_dword [[REGB:v[0-9]+]] +; GCN: buffer_load_dword [[REGA:v[0-9]+]] +; GCN: buffer_load_dword [[REGC:v[0-9]+]] +; GCN: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm +define amdgpu_kernel void @test_fmax3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 { %a = load volatile float, float addrspace(1)* %aptr, align 4 %b = load volatile float, float addrspace(1)* %bptr, align 4 %c = load volatile float, float addrspace(1)* %cptr, align 4 - %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone - %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone + %f0 = call float @llvm.maxnum.f32(float %a, float %b) + %f1 = call float @llvm.maxnum.f32(float %c, float %f0) store float %f1, float addrspace(1)* %out, align 4 ret void } + +; GCN-LABEL: {{^}}test_fmax3_olt_0_f16: +; GCN: buffer_load_ushort [[REGC:v[0-9]+]] +; GCN: buffer_load_ushort [[REGB:v[0-9]+]] +; GCN: buffer_load_ushort [[REGA:v[0-9]+]] + +; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], +; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]] + +; VI: v_max_f16_e32 +; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], + +; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_short [[RESULT]], +define amdgpu_kernel void @test_fmax3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 { + %a = load volatile half, half addrspace(1)* %aptr, align 2 + %b = load volatile half, half addrspace(1)* %bptr, align 2 + %c = load volatile half, half addrspace(1)* %cptr, align 2 + %f0 = call half @llvm.maxnum.f16(half %a, half %b) + %f1 = call half @llvm.maxnum.f16(half %f0, half %c) + store half %f1, half addrspace(1)* %out, align 2 + ret void +} + +; Commute operand of second fmax +; GCN-LABEL: {{^}}test_fmax3_olt_1_f16: +; GCN: buffer_load_ushort [[REGB:v[0-9]+]] +; GCN: buffer_load_ushort [[REGA:v[0-9]+]] +; GCN: buffer_load_ushort [[REGC:v[0-9]+]] + +; SI: v_max3_f32 [[RESULT_F32:v[0-9]+]], +; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]] + +; VI: v_max_f16_e32 +; VI: v_max_f16_e32 [[RESULT:v[0-9]+]], + +; GFX9: v_max3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_short [[RESULT]], +define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 { + %a = load volatile half, half addrspace(1)* %aptr, align 2 + %b = load volatile half, half addrspace(1)* %bptr, align 2 + %c = load volatile half, half addrspace(1)* %cptr, align 2 + %f0 = call half @llvm.maxnum.f16(half %a, half %b) + %f1 = call half @llvm.maxnum.f16(half %c, half %f0) + store half %f1, half addrspace(1)* %out, align 2 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare float @llvm.maxnum.f32(float, float) #1 +declare half @llvm.maxnum.f16(half, half) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/fmed3.ll b/test/CodeGen/AMDGPU/fmed3.ll index d2cfc713ed37c6dd5004c372e08734235c01c34f..27d9261b1fab822004bd307dd584f5c1965ec1d8 100644 --- a/test/CodeGen/AMDGPU/fmed3.ll +++ b/test/CodeGen/AMDGPU/fmed3.ll @@ -845,10 +845,10 @@ define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(float addrspace( ; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] ; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] ; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] -; GCN: v_min_f32 -; GCN: v_max_f32 -; GCN: v_min_f32 -; GCN: v_max_f32 +; GCN-DAG: v_min_f32 +; GCN-DAG: v_max_f32 +; GCN-DAG: v_min_f32 +; GCN-DAG: v_max_f32 define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid diff --git a/test/CodeGen/AMDGPU/fmin3.ll b/test/CodeGen/AMDGPU/fmin3.ll index 3183f77f090bd4bba7778b0d5b6b3047a5ccfb72..5fc5895c3ecb6c22b6ebb845dd2d497078fdf52d 100644 --- a/test/CodeGen/AMDGPU/fmin3.ll +++ b/test/CodeGen/AMDGPU/fmin3.ll @@ -1,40 +1,90 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s - -declare float @llvm.minnum.f32(float, float) nounwind readnone - -; SI-LABEL: {{^}}test_fmin3_olt_0: -; SI: buffer_load_dword [[REGC:v[0-9]+]] -; SI: buffer_load_dword [[REGB:v[0-9]+]] -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm -define amdgpu_kernel void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind { +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}test_fmin3_olt_0_f32: +; GCN: buffer_load_dword [[REGC:v[0-9]+]] +; GCN: buffer_load_dword [[REGB:v[0-9]+]] +; GCN: buffer_load_dword [[REGA:v[0-9]+]] +; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_dword [[RESULT]], +define amdgpu_kernel void @test_fmin3_olt_0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 { %a = load volatile float, float addrspace(1)* %aptr, align 4 %b = load volatile float, float addrspace(1)* %bptr, align 4 %c = load volatile float, float addrspace(1)* %cptr, align 4 - %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone - %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone + %f0 = call float @llvm.minnum.f32(float %a, float %b) + %f1 = call float @llvm.minnum.f32(float %f0, float %c) store float %f1, float addrspace(1)* %out, align 4 ret void } ; Commute operand of second fmin -; SI-LABEL: {{^}}test_fmin3_olt_1: -; SI: buffer_load_dword [[REGB:v[0-9]+]] -; SI: buffer_load_dword [[REGA:v[0-9]+]] -; SI: buffer_load_dword [[REGC:v[0-9]+]] -; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm -define amdgpu_kernel void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind { +; GCN-LABEL: {{^}}test_fmin3_olt_1_f32: +; GCN: buffer_load_dword [[REGB:v[0-9]+]] +; GCN: buffer_load_dword [[REGA:v[0-9]+]] +; GCN: buffer_load_dword [[REGC:v[0-9]+]] +; GCN: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_dword [[RESULT]], +define amdgpu_kernel void @test_fmin3_olt_1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #0 { %a = load volatile float, float addrspace(1)* %aptr, align 4 %b = load volatile float, float addrspace(1)* %bptr, align 4 %c = load volatile float, float addrspace(1)* %cptr, align 4 - %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone - %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone + %f0 = call float @llvm.minnum.f32(float %a, float %b) + %f1 = call float @llvm.minnum.f32(float %c, float %f0) store float %f1, float addrspace(1)* %out, align 4 ret void } + +; GCN-LABEL: {{^}}test_fmin3_olt_0_f16: +; GCN: buffer_load_ushort [[REGC:v[0-9]+]] +; GCN: buffer_load_ushort [[REGB:v[0-9]+]] +; GCN: buffer_load_ushort [[REGA:v[0-9]+]] + +; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]], +; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]] + +; VI: v_min_f16_e32 +; VI: v_min_f16_e32 [[RESULT:v[0-9]+]], + +; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_short [[RESULT]], +define amdgpu_kernel void @test_fmin3_olt_0_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 { + %a = load volatile half, half addrspace(1)* %aptr, align 2 + %b = load volatile half, half addrspace(1)* %bptr, align 2 + %c = load volatile half, half addrspace(1)* %cptr, align 2 + %f0 = call half @llvm.minnum.f16(half %a, half %b) + %f1 = call half @llvm.minnum.f16(half %f0, half %c) + store half %f1, half addrspace(1)* %out, align 2 + ret void +} + +; Commute operand of second fmin +; GCN-LABEL: {{^}}test_fmin3_olt_1_f16: +; GCN: buffer_load_ushort [[REGB:v[0-9]+]] +; GCN: buffer_load_ushort [[REGA:v[0-9]+]] +; GCN: buffer_load_ushort [[REGC:v[0-9]+]] + +; SI: v_min3_f32 [[RESULT_F32:v[0-9]+]], +; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[RESULT]] + +; VI: v_min_f16_e32 +; VI: v_min_f16_e32 [[RESULT:v[0-9]+]], + +; GFX9: v_min3_f16 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]] +; GCN: buffer_store_short [[RESULT]], +define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half addrspace(1)* %aptr, half addrspace(1)* %bptr, half addrspace(1)* %cptr) #0 { + %a = load volatile half, half addrspace(1)* %aptr, align 2 + %b = load volatile half, half addrspace(1)* %bptr, align 2 + %c = load volatile half, half addrspace(1)* %cptr, align 2 + %f0 = call half @llvm.minnum.f16(half %a, half %b) + %f1 = call half @llvm.minnum.f16(half %c, half %f0) + store half %f1, half addrspace(1)* %out, align 2 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 +declare float @llvm.minnum.f32(float, float) #1 +declare half @llvm.minnum.f16(half, half) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/fmul.f16.ll b/test/CodeGen/AMDGPU/fmul.f16.ll index 4e96091ae25639e0285794892ca9eda32494493d..cd86409e20384803f648d4062d5f6fee876894ce 100644 --- a/test/CodeGen/AMDGPU/fmul.f16.ll +++ b/test/CodeGen/AMDGPU/fmul.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_mul_f16_e32 v[[R_F16_LO:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -96,17 +96,18 @@ entry: } ; GCN-LABEL: {{^}}fmul_v2f16_imm_a: -; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] ; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], 0x40400000, v[[B_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI-DAG: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]] +; VI-DAG: v_mov_b32_e32 v[[CONST4:[0-9]+]], 0x4400 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] +; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm @@ -121,17 +122,18 @@ entry: } ; GCN-LABEL: {{^}}fmul_v2f16_imm_b: -; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI: v_mul_f32_e32 v[[R_F32_0:[0-9]+]], 4.0, v[[A_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI-DAG: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]] +; VI-DAG: v_mov_b32_e32 v[[CONST3:[0-9]+]], 0x4200 +; VI-DAG: v_mul_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] +; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fmuladd.f32.ll b/test/CodeGen/AMDGPU/fmuladd.f32.ll index fb605dd2e4bd480a40c59b6229f33ae42d687374..e422550266924e542de644927d256573f0f9c416 100644 --- a/test/CodeGen/AMDGPU/fmuladd.f32.ll +++ b/test/CodeGen/AMDGPU/fmuladd.f32.ll @@ -191,8 +191,8 @@ define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out, ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]] -; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]] -; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]] +; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] +; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] ; SI-DENORM: buffer_store_dword [[RESULT]] ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] @@ -251,8 +251,8 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]] -; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]] -; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] +; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]] +; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]] ; SI-DENORM: buffer_store_dword [[RESULT]] ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] diff --git a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll index bdd3c04fd31894ecee4288752f2afe160989594e..624610096cbc5ea3f52ecd4975978fd12b8cc1db 100644 --- a/test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ b/test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -1,12 +1,12 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s - -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-FLUSH,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-FLUSH,GFX9 %s + +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=on -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-STRICT,GFX9-DENORM-STRICT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=+fp64-fp16-denormals -fp-contract=fast -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefixes=GCN,GCN-CONTRACT,GFX9-DENORM-CONTRACT,GFX9-DENORM,GFX9 %s declare i32 @llvm.amdgcn.workitem.id.x() #1 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 diff --git a/test/CodeGen/AMDGPU/fneg-combines.ll b/test/CodeGen/AMDGPU/fneg-combines.ll index 1c0e9a2f13ceaade255c4508fbc513e6f29f8651..66bf9d0ffb00eb1fee2452669b61af2a882767a5 100644 --- a/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/test/CodeGen/AMDGPU/fneg-combines.ll @@ -1471,11 +1471,10 @@ define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addr ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] -; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]] -; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] -; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] -; GCN: buffer_store_dword [[MUL]] +; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] +; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0 +; GCN-NEXT: buffer_store_dword [[ADD]] +; GCN-NEXT: buffer_store_dword [[MUL]] define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index 555764c15519ead17f7b053554796af59b80055d..f4afaca2b7a7f5a063b0f6c65cdc884ed4ec1c23 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=CIVI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN -check-prefix=CIVI %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX89 -check-prefix=GFX9 -check-prefix=GCN %s ; GCN-LABEL: {{^}}fneg_fabs_fadd_f16: ; CI: v_cvt_f32_f16_e32 @@ -71,7 +71,9 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ; FIXME: single bit op ; GCN-LABEL: {{^}}s_fneg_fabs_v2f16: ; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] +; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; CIVI: flat_store_dword @@ -85,10 +87,15 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x ; GCN-LABEL: {{^}}fneg_fabs_v4f16: ; CIVI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; CIVI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], ; GFX9: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 ; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}} @@ -109,7 +116,7 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} ; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0 -; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0 +; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 neg_lo:[1,0] neg_hi:[1,0] diff --git a/test/CodeGen/AMDGPU/fneg-fabs.f64.ll b/test/CodeGen/AMDGPU/fneg-fabs.f64.ll index 85f544032171c7275095bb413218b2d57b692aea..bc0e59980186fd637dcab19c00942b55276151c0 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.f64.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.f64.ll @@ -5,7 +5,7 @@ ; into 2 modifiers, although theoretically that should work. ; GCN-LABEL: {{^}}fneg_fabs_fadd_f64: -; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}} +; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}| define amdgpu_kernel void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) { %fabs = call double @llvm.fabs.f64(double %x) %fsub = fsub double -0.000000e+00, %fabs @@ -25,7 +25,7 @@ define amdgpu_kernel void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, doubl } ; GCN-LABEL: {{^}}fneg_fabs_fmul_f64: -; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}} +; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}| define amdgpu_kernel void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) { %fabs = call double @llvm.fabs.f64(double %x) %fsub = fsub double -0.000000e+00, %fabs diff --git a/test/CodeGen/AMDGPU/fneg-fabs.ll b/test/CodeGen/AMDGPU/fneg-fabs.ll index a0cf37b159dbbaedb0a543cd77c41d45f21d68e6..0a7346f410c943d7cb13c286e3bc622194976f70 100644 --- a/test/CodeGen/AMDGPU/fneg-fabs.ll +++ b/test/CodeGen/AMDGPU/fneg-fabs.ll @@ -4,7 +4,7 @@ ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32: ; SI-NOT: and -; SI: v_subrev_f32_e64 {{v[0-9]+}}, |{{v[0-9]+}}|, {{s[0-9]+}} +; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}| define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) { %fabs = call float @llvm.fabs.f32(float %x) %fsub = fsub float -0.000000e+00, %fabs @@ -15,7 +15,7 @@ define amdgpu_kernel void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x ; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32: ; SI-NOT: and -; SI: v_mul_f32_e64 {{v[0-9]+}}, -|{{v[0-9]+}}|, {{s[0-9]+}} +; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}| ; SI-NOT: and define amdgpu_kernel void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) { %fabs = call float @llvm.fabs.f32(float %x) diff --git a/test/CodeGen/AMDGPU/fneg.f16.ll b/test/CodeGen/AMDGPU/fneg.f16.ll index 626a0b50cce8ad58292c2cfc827f2062ff415c2d..59745a9352ce59b5060551d4f9e239a5425e0226 100644 --- a/test/CodeGen/AMDGPU/fneg.f16.ll +++ b/test/CodeGen/AMDGPU/fneg.f16.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN -check-prefix=GFX89 %s ; FIXME: Should be able to do scalar op ; GCN-LABEL: {{^}}s_fneg_f16: @@ -117,7 +117,7 @@ define amdgpu_kernel void @fneg_free_v2f16(<2 x half> addrspace(1)* %out, i32 %i ; CI: v_cvt_f16_f32 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, -; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} +; VI: v_mul_f16_sdwa v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_mul_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_mul_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} neg_lo:[1,0] neg_hi:[1,0]{{$}} @@ -129,6 +129,43 @@ define amdgpu_kernel void @v_fneg_fold_v2f16(<2 x half> addrspace(1)* %out, <2 x ret void } +; GCN-LABEL: {{^}}v_extract_fneg_fold_v2f16: +; GCN-DAG: flat_load_dword [[VAL:v[0-9]+]] +; CI-DAG: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}} +; CI-DAG: v_sub_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}} + +; GFX9: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VAL]] +; GFX89-DAG: v_mul_f16_e32 v{{[0-9]+}}, -4.0, [[VAL]] +; GFX9-DAG: v_sub_f16_e32 v{{[0-9]+}}, 2.0, [[ELT1]] +; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000 +; VI-DAG: v_sub_f16_sdwa v{{[0-9]+}}, [[CONST2]], [[VAL]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +define amdgpu_kernel void @v_extract_fneg_fold_v2f16(<2 x half> addrspace(1)* %in) #0 { + %val = load <2 x half>, <2 x half> addrspace(1)* %in + %fneg = fsub <2 x half> , %val + %elt0 = extractelement <2 x half> %fneg, i32 0 + %elt1 = extractelement <2 x half> %fneg, i32 1 + + %fmul0 = fmul half %elt0, 4.0 + %fadd1 = fadd half %elt1, 2.0 + store volatile half %fmul0, half addrspace(1)* undef + store volatile half %fadd1, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}v_extract_fneg_no_fold_v2f16: +; GCN: flat_load_dword [[VAL:v[0-9]+]] +; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80008000, [[VAL]] +; GCN: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]] +define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(<2 x half> addrspace(1)* %in) #0 { + %val = load <2 x half>, <2 x half> addrspace(1)* %in + %fneg = fsub <2 x half> , %val + %elt0 = extractelement <2 x half> %fneg, i32 0 + %elt1 = extractelement <2 x half> %fneg, i32 1 + store volatile half %elt0, half addrspace(1)* undef + store volatile half %elt1, half addrspace(1)* undef + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/fptosi.f16.ll b/test/CodeGen/AMDGPU/fptosi.f16.ll index 50e56e08416ad4813a18d893e8a641b34dc2e16e..f310618d8bdb674ee6b1cfce512ef16704228f00 100644 --- a/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -66,7 +66,7 @@ entry: ; VI: v_cvt_f32_f16_sdwa v[[A_F32_1:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI: v_cvt_i32_f32_e32 v[[R_I16_0:[0-9]+]], v[[A_F32_0]] ; VI: v_cvt_i32_f32_sdwa v[[R_I16_1:[0-9]+]], v[[A_F32_1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GCN: buffer_store_dword v[[R_V2_I16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fptoui.f16.ll b/test/CodeGen/AMDGPU/fptoui.f16.ll index 2afa6111cf17477aad722a974a632cda17c5052d..7641c08e33c367b9eb07f5c479068b9c885da981 100644 --- a/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -66,7 +66,7 @@ entry: ; VI-DAG: v_cvt_f32_f16_sdwa v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; VI: v_cvt_i32_f32_e32 v[[R_I16_1:[0-9]+]], v[[A_F32_1]] ; VI: v_cvt_i32_f32_sdwa v[[R_I16_0:[0-9]+]], v[[A_F32_0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_0]], v[[R_I16_1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v[[R_V2_I16:[0-9]+]], v[[R_I16_1]], v[[R_I16_0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GCN: buffer_store_dword v[[R_V2_I16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fptrunc.f16.ll b/test/CodeGen/AMDGPU/fptrunc.f16.ll index cdcc7be8f2f8d3072475b19dad4c43ee0dda0896..bc72f4424c98f04a52eeb57cea308a2987940c18 100644 --- a/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,7 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SIVI %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=SIVI %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,+fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-FLUSH %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}fptrunc_f32_to_f16: ; GCN: buffer_load_dword v[[A_F32:[0-9]+]] @@ -45,10 +44,8 @@ entry: ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] ; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] -; GFX9-FLUSH: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] -; GFX9-FLUSH: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] - -; GFX9-DENORM: v_pack_b32_f16 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] +; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] +; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm @@ -74,10 +71,8 @@ entry: ; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]] -; GFX9-FLUSH: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] -; GFX9-FLUSH: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] - -; GFX9-DENORM: v_pack_b32_f16 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] +; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] +; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] diff --git a/test/CodeGen/AMDGPU/fract.f64.ll b/test/CodeGen/AMDGPU/fract.f64.ll index 7a5bcfffa3f3b2eb73d7c04051bc9dd5e8d841f4..9a56cbe983cdd4198b6a7c74509acc96f3ce41a1 100644 --- a/test/CodeGen/AMDGPU/fract.f64.ll +++ b/test/CodeGen/AMDGPU/fract.f64.ll @@ -12,7 +12,7 @@ declare double @llvm.floor.f64(double) #0 ; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff -; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] +; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]] ; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc ; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc @@ -39,7 +39,7 @@ define amdgpu_kernel void @fract_f64(double addrspace(1)* %out, double addrspace ; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]] ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff -; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] +; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]] ; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc ; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc @@ -67,7 +67,7 @@ define amdgpu_kernel void @fract_f64_neg(double addrspace(1)* %out, double addrs ; SI-DAG: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]| ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1 ; SI-DAG: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff -; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]] +; SI-DAG: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], [[FRC]], v{{\[}}[[UPLO]]:[[UPHI]]] ; SI-DAG: v_cmp_class_f64_e64 vcc, v{{\[}}[[LO]]:[[HI]]], 3 ; SI: v_cndmask_b32_e32 v[[RESLO:[0-9]+]], v[[MINLO]], v[[LO]], vcc ; SI: v_cndmask_b32_e32 v[[RESHI:[0-9]+]], v[[MINHI]], v[[HI]], vcc diff --git a/test/CodeGen/AMDGPU/frame-index-amdgiz.ll b/test/CodeGen/AMDGPU/frame-index-amdgiz.ll new file mode 100644 index 0000000000000000000000000000000000000000..dd46403b68af1019f3ebcbe6ea151f90e7046bfc --- /dev/null +++ b/test/CodeGen/AMDGPU/frame-index-amdgiz.ll @@ -0,0 +1,55 @@ +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; +; The original OpenCL kernel: +; kernel void f(global int *a, int i, int j) { +; int x[100]; +; x[i] = 7; +; a[0] = x[j]; +; } +; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +target triple = "amdgcn---amdgiz" + +define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 { +entry: +; CHECK: s_load_dword s2, s[0:1], 0xb +; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CHECK: s_load_dword s0, s[0:1], 0xc +; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; CHECK: s_mov_b32 s10, -1 +; CHECK: s_waitcnt lgkmcnt(0) +; CHECK: s_lshl_b32 s1, s2, 2 +; CHECK: v_mov_b32_e32 v0, 4 +; CHECK: s_mov_b32 s11, 0xe8f000 +; CHECK: v_add_i32_e32 v1, vcc, s1, v0 +; CHECK: v_mov_b32_e32 v2, 7 +; CHECK: s_lshl_b32 s0, s0, 2 +; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen +; CHECK: v_add_i32_e32 v0, vcc, s0, v0 +; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen +; CHECK: s_mov_b32 s7, 0xf000 +; CHECK: s_mov_b32 s6, -1 +; CHECK: s_waitcnt vmcnt(0) +; CHECK: buffer_store_dword v0, off, s[4:7], 0 +; CHECK: s_endpgm + + %x = alloca [100 x i32], align 4, addrspace(5) + %0 = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)* + call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0 + %arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i + store i32 7, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j + %1 = load i32, i32 addrspace(5)* %arrayidx2, align 4 + store i32 %1, i32 addrspace(1)* %a, align 4 + call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0 + ret void +} + +declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1 + +declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } diff --git a/test/CodeGen/AMDGPU/frame-index-elimination.ll b/test/CodeGen/AMDGPU/frame-index-elimination.ll new file mode 100644 index 0000000000000000000000000000000000000000..eab73b9013010f760c1ce6037f663f8096476e2d --- /dev/null +++ b/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -0,0 +1,124 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Test that non-entry function frame indices are expanded properly to +; give an index relative to the scratch wave offset register + +; Materialize into a mov. Make sure there isn't an unnecessary copy. +; GCN-LABEL: {{^}}func_mov_fi_i32: +; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN: s_sub_u32 vcc_hi, s5, s4 +; GCN-NEXT: s_lshr_b32 vcc_hi, vcc_hi, 6 +; GCN-NEXT: v_add_i32_e64 v0, vcc, vcc_hi, 4 +; GCN-NOT: v_mov +; GCN: ds_write_b32 v0, v0 +define void @func_mov_fi_i32() #0 { + %alloca = alloca i32 + store volatile i32* %alloca, i32* addrspace(3)* undef + ret void +} + +; Materialize into an add of a constant offset from the FI. +; FIXME: Should be able to merge adds + +; GCN-LABEL: {{^}}func_add_constant_to_fi_i32: +; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN: s_sub_u32 vcc_hi, s5, s4 +; GCN-NEXT: s_lshr_b32 vcc_hi, vcc_hi, 6 +; GCN-NEXT: v_add_i32_e64 v0, {{s\[[0-9]+:[0-9]+\]|vcc}}, vcc_hi, 4 +; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 +; GCN-NOT: v_mov +; GCN: ds_write_b32 v0, v0 +define void @func_add_constant_to_fi_i32() #0 { + %alloca = alloca [2 x i32], align 4 + %gep0 = getelementptr inbounds [2 x i32], [2 x i32]* %alloca, i32 0, i32 1 + store volatile i32* %gep0, i32* addrspace(3)* undef + ret void +} + +; A user the materialized frame index can't be meaningfully folded +; into. + +; GCN-LABEL: {{^}}func_other_fi_user_i32: +; GCN: s_sub_u32 vcc_hi, s5, s4 +; GCN-NEXT: s_lshr_b32 vcc_hi, vcc_hi, 6 +; GCN-NEXT: v_add_i32_e64 v0, vcc, vcc_hi, 4 +; GCN-NEXT: v_mul_lo_i32 v0, v0, 9 +; GCN-NOT: v_mov +; GCN: ds_write_b32 v0, v0 +define void @func_other_fi_user_i32() #0 { + %alloca = alloca [2 x i32], align 4 + %ptrtoint = ptrtoint [2 x i32]* %alloca to i32 + %mul = mul i32 %ptrtoint, 9 + store volatile i32 %mul, i32 addrspace(3)* undef + ret void +} + +; GCN-LABEL: {{^}}func_store_private_arg_i32_ptr: +; GCN: v_mov_b32_e32 v1, 15{{$}} +; GCN: buffer_store_dword v1, v0, s[0:3], s4 offen{{$}} +define void @func_store_private_arg_i32_ptr(i32* %ptr) #0 { + store volatile i32 15, i32* %ptr + ret void +} + +; GCN-LABEL: {{^}}func_load_private_arg_i32_ptr: +; GCN: s_waitcnt +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], s4 offen{{$}} +define void @func_load_private_arg_i32_ptr(i32* %ptr) #0 { + %val = load volatile i32, i32* %ptr + ret void +} + +; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr: +; GCN: s_waitcnt +; GCN-NEXT: s_sub_u32 vcc_hi, s5, s4 +; GCN-NEXT: v_lshr_b32_e64 v0, vcc_hi, 6 +; GCN-NEXT: v_add_i32_e32 v0, vcc, 4, v0 +; GCN-NOT: v_mov +; GCN: ds_write_b32 v0, v0 +define void @void_func_byval_struct_i8_i32_ptr({ i8, i32 }* byval %arg0) #0 { + %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1 + %load1 = load i32, i32* %gep1 + store volatile i32* %gep1, i32* addrspace(3)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_value: +; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: buffer_load_ubyte v0, off, s[0:3], s5 +; GCN_NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4 +define void @void_func_byval_struct_i8_i32_ptr_value({ i8, i32 }* byval %arg0) #0 { + %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1 + %load0 = load i8, i8* %gep0 + %load1 = load i32, i32* %gep1 + store volatile i8 %load0, i8 addrspace(3)* undef + store volatile i32 %load1, i32 addrspace(3)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_ptr_nonentry_block: +; GCN: s_sub_u32 vcc_hi, s5, s4 +; GCN: v_lshr_b32_e64 v1, vcc_hi, 6 +; GCN: s_and_saveexec_b64 + +; GCN: v_add_i32_e32 v0, vcc, 4, v1 +; GCN: buffer_load_dword v1, v1, s[0:3], s4 offen offset:4 +; GCN: ds_write_b32 +define void @void_func_byval_struct_i8_i32_ptr_nonentry_block({ i8, i32 }* byval %arg0, i32 %arg2) #0 { + %cmp = icmp eq i32 %arg2, 0 + br i1 %cmp, label %bb, label %ret + +bb: + %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1 + %load1 = load volatile i32, i32* %gep1 + store volatile i32* %gep1, i32* addrspace(3)* undef + br label %ret + +ret: + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/fsub.f16.ll b/test/CodeGen/AMDGPU/fsub.f16.ll index d3c5df3177713369bb780f7dbfdc62c0c3485b31..fa00c06546dbdcb4debde5d1a6c7f52649187bd5 100644 --- a/test/CodeGen/AMDGPU/fsub.f16.ll +++ b/test/CodeGen/AMDGPU/fsub.f16.ll @@ -78,7 +78,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_subrev_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_subrev_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1] @@ -99,7 +99,7 @@ entry: } ; GCN-LABEL: {{^}}fsub_v2f16_imm_a: -; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] ; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] @@ -111,14 +111,13 @@ entry: ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] -; VI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; VI-DAG: v_sub_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]] +; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000 +; VI-DAG: v_sub_f16_sdwa v[[R_F16_HI:[0-9]+]], [[CONST2]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]] -; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40003c00 -; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], [[K]], v[[B_V2_F16]] neg_lo:[0,1] neg_hi:[0,1] +; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[B_V2_F16]], [[K]] neg_lo:[1,0] neg_hi:[1,0] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm @@ -134,7 +133,7 @@ entry: } ; GCN-LABEL: {{^}}fsub_v2f16_imm_b: -; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] ; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] @@ -146,14 +145,13 @@ entry: ; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] -; VI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-DAG: v_add_f16_e32 v[[R_F16_1:[0-9]+]], -1.0, v[[A_F16_1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00 +; VI-DAG: v_add_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[A_V2_F16]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]] -; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00c000 -; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], [[K]], v[[A_V2_F16]]{{$}} +; GFX9: v_pk_add_f16 v[[R_V2_F16:[0-9]+]], v[[A_V2_F16]], [[K]]{{$}} ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/fsub64.ll b/test/CodeGen/AMDGPU/fsub64.ll index 1b0879d098ee01fa572fc1666aa614dc95a5f2f0..dc332414a1527e742364268566b4a21679127f25 100644 --- a/test/CodeGen/AMDGPU/fsub64.ll +++ b/test/CodeGen/AMDGPU/fsub64.ll @@ -39,7 +39,7 @@ define amdgpu_kernel void @fsub_fabs_inv_f64(double addrspace(1)* %out, double a } ; SI-LABEL: {{^}}s_fsub_f64: -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) { %sub = fsub double %a, %b store double %sub, double addrspace(1)* %out diff --git a/test/CodeGen/AMDGPU/function-args.ll b/test/CodeGen/AMDGPU/function-args.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b1368493ba5b93dad4d495e1befab006d643089 --- /dev/null +++ b/test/CodeGen/AMDGPU/function-args.ll @@ -0,0 +1,734 @@ +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s + +; GCN-LABEL: {{^}}void_func_i1: +; GCN: v_and_b32_e32 v0, 1, v0 +; GCN: buffer_store_byte v0, off +define void @void_func_i1(i1 %arg0) #0 { + store i1 %arg0, i1 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i1_zeroext: +; GCN: s_waitcnt +; GCN-NEXT: v_or_b32_e32 v0, 12, v0 +; GCN-NOT: v0 +; GCN: buffer_store_dword v0, off +define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { + %ext = zext i1 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i1_signext: +; GCN: s_waitcnt +; GCN-NEXT: v_add_i32_e32 v0, vcc, 12, v0 +; GCN-NOT: v0 +; GCN: buffer_store_dword v0, off +define void @void_func_i1_signext(i1 signext %arg0) #0 { + %ext = sext i1 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i8: +; GCN-NOT: v0 +; GCN: buffer_store_byte v0, off +define void @void_func_i8(i8 %arg0) #0 { + store i8 %arg0, i8 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i8_zeroext: +; GCN-NOT: and_b32 +; GCN: v_add_i32_e32 v0, vcc, 12, v0 +define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { + %ext = zext i8 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i8_signext: +; GCN-NOT: v_bfe_i32 +; GCN: v_add_i32_e32 v0, vcc, 12, v0 +define void @void_func_i8_signext(i8 signext %arg0) #0 { + %ext = sext i8 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i16: +; GCN: buffer_store_short v0, off +define void @void_func_i16(i16 %arg0) #0 { + store i16 %arg0, i16 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i16_zeroext: +; GCN-NOT: v0 +; GCN: v_add_i32_e32 v0, vcc, 12, v0 +define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { + %ext = zext i16 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i16_signext: +; GCN-NOT: v0 +; GCN: v_add_i32_e32 v0, vcc, 12, v0 +define void @void_func_i16_signext(i16 signext %arg0) #0 { + %ext = sext i16 %arg0 to i32 + %add = add i32 %ext, 12 + store i32 %add, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i32: +; GCN-NOT: v0 +; GCN: buffer_store_dword v0, off +define void @void_func_i32(i32 %arg0) #0 { + store i32 %arg0, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_i64: +; GCN-NOT: v[0:1] +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN: buffer_store_dwordx2 v[0:1], off +define void @void_func_i64(i64 %arg0) #0 { + store i64 %arg0, i64 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_f16: +; VI-NOT: v0 +; CI: v_cvt_f16_f32_e32 v0, v0 +; GCN: buffer_store_short v0, off +define void @void_func_f16(half %arg0) #0 { + store half %arg0, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_f32 +; GCN-NOT: v0 +; GCN: buffer_store_dword v0, off +define void @void_func_f32(float %arg0) #0 { + store float %arg0, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_f64: +; GCN-NOT: v[0:1] +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN: buffer_store_dwordx2 v[0:1], off +define void @void_func_f64(double %arg0) #0 { + store double %arg0, double addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v2i32: +; GCN-NOT: v[0:1] +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN: buffer_store_dwordx2 v[0:1], off +define void @void_func_v2i32(<2 x i32> %arg0) #0 { + store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v3i32: +; GCN-DAG: buffer_store_dword v2, off +; GCN-DAG: buffer_store_dwordx2 v[0:1], off +define void @void_func_v3i32(<3 x i32> %arg0) #0 { + store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v4i32: +; GCN: buffer_store_dwordx4 v[0:3], off +define void @void_func_v4i32(<4 x i32> %arg0) #0 { + store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v5i32: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dword v4, off +define void @void_func_v5i32(<5 x i32> %arg0) #0 { + store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v8i32: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +define void @void_func_v8i32(<8 x i32> %arg0) #0 { + store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v16i32: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +define void @void_func_v16i32(<16 x i32> %arg0) #0 { + store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +; GCN-DAG: buffer_store_dwordx4 v[16:19], off +; GCN-DAG: buffer_store_dwordx4 v[20:23], off +; GCN-DAG: buffer_store_dwordx4 v[24:27], off +; GCN-DAG: buffer_store_dwordx4 v[28:31], off +define void @void_func_v32i32(<32 x i32> %arg0) #0 { + store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + ret void +} + +; 1 over register limit +; GCN-LABEL: {{^}}void_func_v33i32: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +; GCN-DAG: buffer_load_dword [[STACKLOAD:v[0-9]+]], off, s[0:3], s5 +; GCN-DAG: buffer_store_dwordx4 v[16:19], off +; GCN-DAG: buffer_store_dwordx4 v[20:23], off +; GCN-DAG: buffer_store_dwordx4 v[24:27], off +; GCN-DAG: buffer_store_dwordx4 v[28:31], off +; GCN: buffer_store_dword [[STACKLOAD]], off +define void @void_func_v33i32(<33 x i32> %arg0) #0 { + store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v2i64: +; GCN: buffer_store_dwordx4 v[0:3], off +define void @void_func_v2i64(<2 x i64> %arg0) #0 { + store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v3i64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx2 v[4:5], off +define void @void_func_v3i64(<3 x i64> %arg0) #0 { + store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v4i64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +define void @void_func_v4i64(<4 x i64> %arg0) #0 { + store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v5i64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx2 v[8:9], off +define void @void_func_v5i64(<5 x i64> %arg0) #0 { + store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v8i64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +define void @void_func_v8i64(<8 x i64> %arg0) #0 { + store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v16i64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +; GCN-DAG: buffer_store_dwordx4 v[16:19], off +; GCN-DAG: buffer_store_dwordx4 v[20:23], off +; GCN-DAG: buffer_store_dwordx4 v[24:27], off +; GCN-DAG: buffer_store_dwordx4 v[28:31], off +define void @void_func_v16i64(<16 x i64> %arg0) #0 { + store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v2i16: +; GFX9-NOT: v0 +; GFX9: buffer_store_dword v0, off +define void @void_func_v2i16(<2 x i16> %arg0) #0 { + store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v3i16: +; GCN-DAG: buffer_store_dword v0, off +; GCN-DAG: buffer_store_short v2, off +define void @void_func_v3i16(<3 x i16> %arg0) #0 { + store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v4i16: +; GFX9-NOT: v0 +; GFX9-NOT: v1 +; GFX9: buffer_store_dwordx2 v[0:1], off +define void @void_func_v4i16(<4 x i16> %arg0) #0 { + store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v5i16: +; GCN-DAG: buffer_store_short v4, off, +; GCN-DAG: buffer_store_dwordx2 v[1:2], off +define void @void_func_v5i16(<5 x i16> %arg0) #0 { + store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v8i16: +; GFX9-DAG: buffer_store_dwordx4 v[0:3], off +define void @void_func_v8i16(<8 x i16> %arg0) #0 { + store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v16i16: +; GFX9-DAG: buffer_store_dwordx4 v[0:3], off +; GFX9-DAG: buffer_store_dwordx4 v[4:7], off +define void @void_func_v16i16(<16 x i16> %arg0) #0 { + store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v2f32: +; GCN-NOT: v[0:1] +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN: buffer_store_dwordx2 v[0:1], off +define void @void_func_v2f32(<2 x float> %arg0) #0 { + store <2 x float> %arg0, <2 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v3f32: +; GCN-DAG: buffer_store_dword v2, off +; GCN-DAG: buffer_store_dwordx2 v[0:1], off +define void @void_func_v3f32(<3 x float> %arg0) #0 { + store <3 x float> %arg0, <3 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v4f32: +; GCN: buffer_store_dwordx4 v[0:3], off +define void @void_func_v4f32(<4 x float> %arg0) #0 { + store <4 x float> %arg0, <4 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v8f32: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +define void @void_func_v8f32(<8 x float> %arg0) #0 { + store <8 x float> %arg0, <8 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v16f32: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +define void @void_func_v16f32(<16 x float> %arg0) #0 { + store <16 x float> %arg0, <16 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v2f64: +; GCN: buffer_store_dwordx4 v[0:3], off +define void @void_func_v2f64(<2 x double> %arg0) #0 { + store <2 x double> %arg0, <2 x double> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v3f64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx2 v[4:5], off +define void @void_func_v3f64(<3 x double> %arg0) #0 { + store <3 x double> %arg0, <3 x double> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v4f64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +define void @void_func_v4f64(<4 x double> %arg0) #0 { + store <4 x double> %arg0, <4 x double> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v8f64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +define void @void_func_v8f64(<8 x double> %arg0) #0 { + store <8 x double> %arg0, <8 x double> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v16f64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +; GCN-DAG: buffer_store_dwordx4 v[16:19], off +; GCN-DAG: buffer_store_dwordx4 v[20:23], off +; GCN-DAG: buffer_store_dwordx4 v[24:27], off +; GCN-DAG: buffer_store_dwordx4 v[28:31], off +define void @void_func_v16f64(<16 x double> %arg0) #0 { + store <16 x double> %arg0, <16 x double> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v2f16: +; GFX9-NOT: v0 +; GFX9: buffer_store_dword v0, off +define void @void_func_v2f16(<2 x half> %arg0) #0 { + store <2 x half> %arg0, <2 x half> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v3f16: +; GFX9-NOT: v0 +; GCN-DAG: buffer_store_dword v0, off +; GCN-DAG: buffer_store_short v2, off +define void @void_func_v3f16(<3 x half> %arg0) #0 { + store <3 x half> %arg0, <3 x half> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v4f16: +; GFX9-NOT: v0 +; GFX9-NOT: v1 +; GFX9-NOT: v[0:1] +; GFX9: buffer_store_dwordx2 v[0:1], off +define void @void_func_v4f16(<4 x half> %arg0) #0 { + store <4 x half> %arg0, <4 x half> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v8f16: +; GFX9-NOT: v0 +; GFX9-NOT: v1 +; GFX9: buffer_store_dwordx4 v[0:3], off +define void @void_func_v8f16(<8 x half> %arg0) #0 { + store <8 x half> %arg0, <8 x half> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v16f16: +; GFX9-NOT: v0 +; GFX9-NOT: v1 +; GFX9-DAG: buffer_store_dwordx4 v[0:3], off +; GFX9-DAG: buffer_store_dwordx4 v[4:7], off +define void @void_func_v16f16(<16 x half> %arg0) #0 { + store <16 x half> %arg0, <16 x half> addrspace(1)* undef + ret void +} + +; Make sure there is no alignment requirement for passed vgprs. +; GCN-LABEL: {{^}}void_func_i32_i64_i32: +; GCN-NOT: v0 +; GCN: buffer_store_dword v0, off +; GCN: buffer_store_dwordx2 v[1:2] +; GCN: buffer_store_dword v3 +define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { + store volatile i32 %arg0, i32 addrspace(1)* undef + store volatile i64 %arg1, i64 addrspace(1)* undef + store volatile i32 %arg2, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_struct_i32: +; GCN-NOT: v0 +; GCN: buffer_store_dword v0, off +define void @void_func_struct_i32({ i32 } %arg0) #0 { + store { i32 } %arg0, { i32 } addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_struct_i8_i32: +; GCN-DAG: buffer_store_byte v0, off +; GCN-DAG: buffer_store_dword v1, off +define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { + store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32: +; GCN-DAG: buffer_load_ubyte v[[ELT0:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[ELT1:[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GCN-DAG: buffer_store_dword v[[ELT1]] +; GCN-DAG: buffer_store_byte v[[ELT0]] +define void @void_func_byval_struct_i8_i32({ i8, i32 }* byval %arg0) #0 { + %arg0.load = load { i8, i32 }, { i8, i32 }* %arg0 + store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_byval_struct_i8_i32_x2: +; GCN: buffer_load_ubyte v[[ELT0_0:[0-9]+]], off, s[0:3], s5{{$}} +; GCN: buffer_load_dword v[[ELT1_0:[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GCN: buffer_load_ubyte v[[ELT0_1:[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; GCN: buffer_load_dword v[[ELT1_1:[0-9]+]], off, s[0:3], s5 offset:12{{$}} + +; GCN: ds_write_b32 v0, v0 +; GCN: s_setpc_b64 +define void @void_func_byval_struct_i8_i32_x2({ i8, i32 }* byval %arg0, { i8, i32 }* byval %arg1, i32 %arg2) #0 { + %arg0.load = load volatile { i8, i32 }, { i8, i32 }* %arg0 + %arg1.load = load volatile { i8, i32 }, { i8, i32 }* %arg1 + store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef + store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef + store volatile i32 %arg2, i32 addrspace(3)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_byval_i32_byval_i64: +; GCN-DAG: buffer_load_dword v[[ARG0_LOAD:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[ARG1_LOAD0:[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; GCN-DAG: buffer_load_dword v[[ARG1_LOAD1:[0-9]+]], off, s[0:3], s5 offset:12{{$}} +; GCN-DAG: buffer_store_dword v[[ARG0_LOAD]], off +; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[ARG1_LOAD0]]:[[ARG1_LOAD1]]{{\]}}, off +define void @void_func_byval_i32_byval_i64(i32* byval %arg0, i64* byval %arg1) #0 { + %arg0.load = load i32, i32* %arg0 + %arg1.load = load i64, i64* %arg1 + store i32 %arg0.load, i32 addrspace(1)* undef + store i64 %arg1.load, i64 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32_i32_i64: +; GCN-DAG: buffer_store_dwordx4 v[0:3], off +; GCN-DAG: buffer_store_dwordx4 v[4:7], off +; GCN-DAG: buffer_store_dwordx4 v[8:11], off +; GCN-DAG: buffer_store_dwordx4 v[12:15], off +; GCN-DAG: buffer_store_dwordx4 v[16:19], off +; GCN-DAG: buffer_store_dwordx4 v[20:23], off +; GCN-DAG: buffer_store_dwordx4 v[24:27], off +; GCN-DAG: buffer_store_dwordx4 v[28:31], off +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:4 +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:8 + +; GCN: buffer_store_dword v[[LOAD_ARG1]] +; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off +define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile i32 %arg1, i32 addrspace(1)* undef + store volatile i64 %arg2, i64 addrspace(1)* undef + ret void +} + +; FIXME: Different ext load types on CI vs. VI +; GCN-LABEL: {{^}}void_func_v32i32_i1_i8_i16: +; GCN-DAG: buffer_load_ubyte [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5{{$}} +; VI-DAG: buffer_load_ushort [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; VI-DAG: buffer_load_ushort [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; VI-DAG: buffer_load_ushort [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:12{{$}} + +; CI-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; CI-DAG: buffer_load_dword [[LOAD_ARG3:v[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; CI-DAG: buffer_load_dword [[LOAD_ARG4:v[0-9]+]], off, s[0:3], s5 offset:12{{$}} + +; GCN-DAG: v_and_b32_e32 [[TRUNC_ARG1_I1:v[0-9]+]], 1, [[LOAD_ARG1]] +; CI-DAG: v_cvt_f16_f32_e32 [[CVT_ARG4:v[0-9]+]], [[LOAD_ARG4]] + +; GCN: buffer_store_byte [[TRUNC_ARG1_I1]], off +; GCN: buffer_store_byte [[LOAD_ARG2]], off +; GCN: buffer_store_short [[LOAD_ARG3]], off +; VI: buffer_store_short [[LOAD_ARG4]], off + +; CI: buffer_store_short [[CVT_ARG4]], off +define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile i1 %arg1, i1 addrspace(1)* undef + store volatile i8 %arg2, i8 addrspace(1)* undef + store volatile i16 %arg3, i16 addrspace(1)* undef + store volatile half %arg4, half addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32_v2i32_v2f32: +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:12{{$}} + +; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_1]]{{\]}}, off +; GCN: buffer_store_dwordx2 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_1]]{{\]}}, off +define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef + store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32_v2i16_v2f16: +; GFX9-DAG: buffer_load_dword [[LOAD_ARG1:v[0-9]+]], off, s[0:3], s5{{$}} +; GFX9-DAG: buffer_load_dword [[LOAD_ARG2:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GFX9: buffer_store_dword [[LOAD_ARG1]], off +; GFX9: buffer_store_short [[LOAD_ARG2]], off +define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef + store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32_v2i64_v2f64: +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}} + +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:16{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:20{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:24{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:28{{$}} + +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off +define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef + store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32_v4i32_v4f32: +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}} + +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:16{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:20{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:24{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:28{{$}} + +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off +define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef + store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32_v8i32_v8f32: +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:16{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:20{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:24{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:28{{$}} + +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:32{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:36{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:40{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:44{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:48{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:52{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:56{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:60{{$}} + +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_4]]:[[LOAD_ARG1_7]]{{\]}}, off +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG1_0]]:[[LOAD_ARG1_3]]{{\]}}, off +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_4]]:[[LOAD_ARG2_7]]{{\]}}, off +; GCN: buffer_store_dwordx4 v{{\[}}[[LOAD_ARG2_0]]:[[LOAD_ARG2_3]]{{\]}}, off +define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef + store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}void_func_v32i32_v16i32_v16f32: +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_0:[0-9]+]], off, s[0:3], s5{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_1:[0-9]+]], off, s[0:3], s5 offset:4{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_2:[0-9]+]], off, s[0:3], s5 offset:8{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_3:[0-9]+]], off, s[0:3], s5 offset:12{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_4:[0-9]+]], off, s[0:3], s5 offset:16{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_5:[0-9]+]], off, s[0:3], s5 offset:20{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_6:[0-9]+]], off, s[0:3], s5 offset:24{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_7:[0-9]+]], off, s[0:3], s5 offset:28{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_8:[0-9]+]], off, s[0:3], s5 offset:32{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_9:[0-9]+]], off, s[0:3], s5 offset:36{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_10:[0-9]+]], off, s[0:3], s5 offset:40{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_11:[0-9]+]], off, s[0:3], s5 offset:44{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_12:[0-9]+]], off, s[0:3], s5 offset:48{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_13:[0-9]+]], off, s[0:3], s5 offset:52{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_14:[0-9]+]], off, s[0:3], s5 offset:56{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG1_15:[0-9]+]], off, s[0:3], s5 offset:60{{$}} + +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_0:[0-9]+]], off, s[0:3], s5 offset:64{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_1:[0-9]+]], off, s[0:3], s5 offset:68{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_2:[0-9]+]], off, s[0:3], s5 offset:72{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_3:[0-9]+]], off, s[0:3], s5 offset:76{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_4:[0-9]+]], off, s[0:3], s5 offset:80{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_5:[0-9]+]], off, s[0:3], s5 offset:84{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_6:[0-9]+]], off, s[0:3], s5 offset:88{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_7:[0-9]+]], off, s[0:3], s5 offset:92{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_8:[0-9]+]], off, s[0:3], s5 offset:96{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_9:[0-9]+]], off, s[0:3], s5 offset:100{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_10:[0-9]+]], off, s[0:3], s5 offset:104{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_11:[0-9]+]], off, s[0:3], s5 offset:108{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_12:[0-9]+]], off, s[0:3], s5 offset:112{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_13:[0-9]+]], off, s[0:3], s5 offset:116{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_14:[0-9]+]], off, s[0:3], s5 offset:120{{$}} +; GCN-DAG: buffer_load_dword v[[LOAD_ARG2_15:[0-9]+]], off, s[0:3], s5 offset:124{{$}} +define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef + store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef + ret void +} + +; Check there is no crash. +; GCN-LABEL: {{^}}void_func_v16i8: +define void @void_func_v16i8(<16 x i8> %arg0) #0 { + store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef + ret void +} + +; Check there is no crash. +; GCN-LABEL: {{^}}void_func_v32i32_v16i8: +define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { + store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef + store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/function-returns.ll b/test/CodeGen/AMDGPU/function-returns.ll new file mode 100644 index 0000000000000000000000000000000000000000..f704d43a1742c0d9951d5376402eb4789ab2ae37 --- /dev/null +++ b/test/CodeGen/AMDGPU/function-returns.ll @@ -0,0 +1,514 @@ +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s + +; GCN-LABEL: {{^}}i1_func_void: +; GCN: buffer_load_ubyte v0, off +; GCN-NEXT: s_waitcnt +; GCN-NEXT: s_setpc_b64 +define i1 @i1_func_void() #0 { + %val = load i1, i1 addrspace(1)* undef + ret i1 %val +} + +; FIXME: Missing and? +; GCN-LABEL: {{^}}i1_zeroext_func_void: +; GCN: buffer_load_ubyte v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define zeroext i1 @i1_zeroext_func_void() #0 { + %val = load i1, i1 addrspace(1)* undef + ret i1 %val +} + +; GCN-LABEL: {{^}}i1_signext_func_void: +; GCN: buffer_load_ubyte v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} +; GCN-NEXT: s_setpc_b64 +define signext i1 @i1_signext_func_void() #0 { + %val = load i1, i1 addrspace(1)* undef + ret i1 %val +} + +; GCN-LABEL: {{^}}i8_func_void: +; GCN: buffer_load_ubyte v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i8 @i8_func_void() #0 { + %val = load i8, i8 addrspace(1)* undef + ret i8 %val +} + +; GCN-LABEL: {{^}}i8_zeroext_func_void: +; GCN: buffer_load_ubyte v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define zeroext i8 @i8_zeroext_func_void() #0 { + %val = load i8, i8 addrspace(1)* undef + ret i8 %val +} + +; GCN-LABEL: {{^}}i8_signext_func_void: +; GCN: buffer_load_sbyte v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define signext i8 @i8_signext_func_void() #0 { + %val = load i8, i8 addrspace(1)* undef + ret i8 %val +} + +; GCN-LABEL: {{^}}i16_func_void: +; GCN: buffer_load_ushort v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @i16_func_void() #0 { + %val = load i16, i16 addrspace(1)* undef + ret i16 %val +} + +; GCN-LABEL: {{^}}i16_zeroext_func_void: +; GCN: buffer_load_ushort v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define zeroext i16 @i16_zeroext_func_void() #0 { + %val = load i16, i16 addrspace(1)* undef + ret i16 %val +} + +; GCN-LABEL: {{^}}i16_signext_func_void: +; GCN: buffer_load_sshort v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define signext i16 @i16_signext_func_void() #0 { + %val = load i16, i16 addrspace(1)* undef + ret i16 %val +} + +; GCN-LABEL: {{^}}i32_func_void: +; GCN: buffer_load_dword v0, off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i32 @i32_func_void() #0 { + %val = load i32, i32 addrspace(1)* undef + ret i32 %val +} + +; GCN-LABEL: {{^}}i64_func_void: +; GCN: buffer_load_dwordx2 v[0:1], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i64 @i64_func_void() #0 { + %val = load i64, i64 addrspace(1)* undef + ret i64 %val +} + +; GCN-LABEL: {{^}}f32_func_void: +; GCN: buffer_load_dword v0, off, s[8:11], 0 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define float @f32_func_void() #0 { + %val = load float, float addrspace(1)* undef + ret float %val +} + +; GCN-LABEL: {{^}}f64_func_void: +; GCN: buffer_load_dwordx2 v[0:1], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define double @f64_func_void() #0 { + %val = load double, double addrspace(1)* undef + ret double %val +} + +; GCN-LABEL: {{^}}v2i32_func_void: +; GCN: buffer_load_dwordx2 v[0:1], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <2 x i32> @v2i32_func_void() #0 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* undef + ret <2 x i32> %val +} + +; GCN-LABEL: {{^}}v3i32_func_void: +; GCN: buffer_load_dwordx4 v[0:3], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <3 x i32> @v3i32_func_void() #0 { + %val = load <3 x i32>, <3 x i32> addrspace(1)* undef + ret <3 x i32> %val +} + +; GCN-LABEL: {{^}}v4i32_func_void: +; GCN: buffer_load_dwordx4 v[0:3], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <4 x i32> @v4i32_func_void() #0 { + %val = load <4 x i32>, <4 x i32> addrspace(1)* undef + ret <4 x i32> %val +} + +; GCN-LABEL: {{^}}v5i32_func_void: +; GCN-DAG: buffer_load_dword v4, off +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <5 x i32> @v5i32_func_void() #0 { + %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef + ret <5 x i32> %val +} + +; GCN-LABEL: {{^}}v8i32_func_void: +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN-DAG: buffer_load_dwordx4 v[4:7], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <8 x i32> @v8i32_func_void() #0 { + %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(2)* undef + %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr + ret <8 x i32> %val +} + +; GCN-LABEL: {{^}}v16i32_func_void: +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN-DAG: buffer_load_dwordx4 v[4:7], off +; GCN-DAG: buffer_load_dwordx4 v[8:11], off +; GCN-DAG: buffer_load_dwordx4 v[12:15], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <16 x i32> @v16i32_func_void() #0 { + %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(2)* undef + %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr + ret <16 x i32> %val +} + +; GCN-LABEL: {{^}}v32i32_func_void: +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN-DAG: buffer_load_dwordx4 v[4:7], off +; GCN-DAG: buffer_load_dwordx4 v[8:11], off +; GCN-DAG: buffer_load_dwordx4 v[12:15], off +; GCN-DAG: buffer_load_dwordx4 v[16:19], off +; GCN-DAG: buffer_load_dwordx4 v[20:23], off +; GCN-DAG: buffer_load_dwordx4 v[24:27], off +; GCN-DAG: buffer_load_dwordx4 v[28:31], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <32 x i32> @v32i32_func_void() #0 { + %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(2)* undef + %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr + ret <32 x i32> %val +} + +; GCN-LABEL: {{^}}v2i64_func_void: +; GCN: buffer_load_dwordx4 v[0:3], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <2 x i64> @v2i64_func_void() #0 { + %val = load <2 x i64>, <2 x i64> addrspace(1)* undef + ret <2 x i64> %val +} + +; GCN-LABEL: {{^}}v3i64_func_void: +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN-DAG: buffer_load_dwordx4 v[4:7], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <3 x i64> @v3i64_func_void() #0 { + %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(2)* undef + %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr + ret <3 x i64> %val +} + +; GCN-LABEL: {{^}}v4i64_func_void: +; GCN: buffer_load_dwordx4 v[0:3], off +; GCN: buffer_load_dwordx4 v[4:7], off +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <4 x i64> @v4i64_func_void() #0 { + %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(2)* undef + %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr + ret <4 x i64> %val +} + +; GCN-LABEL: {{^}}v5i64_func_void: +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN-DAG: buffer_load_dwordx4 v[4:7], off +; GCN-DAG: buffer_load_dwordx4 v[8:11], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <5 x i64> @v5i64_func_void() #0 { + %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(2)* undef + %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr + ret <5 x i64> %val +} + +; GCN-LABEL: {{^}}v8i64_func_void: +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN-DAG: buffer_load_dwordx4 v[4:7], off +; GCN-DAG: buffer_load_dwordx4 v[8:11], off +; GCN-DAG: buffer_load_dwordx4 v[12:15], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <8 x i64> @v8i64_func_void() #0 { + %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(2)* undef + %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr + ret <8 x i64> %val +} + +; GCN-LABEL: {{^}}v16i64_func_void: +; GCN-DAG: buffer_load_dwordx4 v[0:3], off +; GCN-DAG: buffer_load_dwordx4 v[4:7], off +; GCN-DAG: buffer_load_dwordx4 v[8:11], off +; GCN-DAG: buffer_load_dwordx4 v[12:15], off +; GCN-DAG: buffer_load_dwordx4 v[16:19], off +; GCN-DAG: buffer_load_dwordx4 v[20:23], off +; GCN-DAG: buffer_load_dwordx4 v[24:27], off +; GCN-DAG: buffer_load_dwordx4 v[28:31], off +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <16 x i64> @v16i64_func_void() #0 { + %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(2)* undef + %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr + ret <16 x i64> %val +} + +; GCN-LABEL: {{^}}v2i16_func_void: +; GFX9: buffer_load_dword v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 +define <2 x i16> @v2i16_func_void() #0 { + %val = load <2 x i16>, <2 x i16> addrspace(1)* undef + ret <2 x i16> %val +} + +; GCN-LABEL: {{^}}v3i16_func_void: +; GFX9: buffer_load_dwordx2 v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 +define <3 x i16> @v3i16_func_void() #0 { + %val = load <3 x i16>, <3 x i16> addrspace(1)* undef + ret <3 x i16> %val +} + +; GCN-LABEL: {{^}}v4i16_func_void: +; GFX9: buffer_load_dwordx2 v[0:1], off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 +define <4 x i16> @v4i16_func_void() #0 { + %val = load <4 x i16>, <4 x i16> addrspace(1)* undef + ret <4 x i16> %val +} + +; FIXME: Should not scalarize +; GCN-LABEL: {{^}}v5i16_func_void: +; GFX9: buffer_load_dwordx2 v[0:1] +; GFX9: buffer_load_ushort v4 +; GFX9: v_lshrrev_b32_e32 v3, 16, v1 +; GFX9: v_mov_b32_e32 v2, v1 +; GFX9: v_lshrrev_b32_e32 v3, 16, v0 +; GCN: s_setpc_b64 +define <5 x i16> @v5i16_func_void() #0 { + %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(2)* undef + %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr + ret <5 x i16> %val +} + +; GCN-LABEL: {{^}}v8i16_func_void: +; GFX9-DAG: buffer_load_dwordx4 v[0:3], off +; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 +define <8 x i16> @v8i16_func_void() #0 { + %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(2)* undef + %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr + ret <8 x i16> %val +} + +; GCN-LABEL: {{^}}v16i16_func_void: +; GFX9: buffer_load_dwordx4 v[0:3], off +; GFX9: buffer_load_dwordx4 v[4:7], off +; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 +define <16 x i16> @v16i16_func_void() #0 { + %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(2)* undef + %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr + ret <16 x i16> %val +} + +; FIXME: Should pack +; GCN-LABEL: {{^}}v16i8_func_void: +; GCN-DAG: v12 +; GCN-DAG: v13 +; GCN-DAG: v14 +; GCN-DAG: v15 +define <16 x i8> @v16i8_func_void() #0 { + %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(2)* undef + %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr + ret <16 x i8> %val +} + +; FIXME: Should pack +; GCN-LABEL: {{^}}v4i8_func_void: +; GCN: buffer_load_dword v0 +; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 +; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 +; CI-DAG: v_bfe_u32 v1, v0, 8, 8 +; VI-DAG: v_lshrrev_b16_e32 v1, 8, v0 +; GCN: s_setpc_b64 +define <4 x i8> @v4i8_func_void() #0 { + %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(2)* undef + %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr + ret <4 x i8> %val +} + +; GCN-LABEL: {{^}}struct_i8_i32_func_void: +; GCN-DAG: buffer_load_dword v1 +; GCN-DAG: buffer_load_ubyte v0 +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define {i8, i32} @struct_i8_i32_func_void() #0 { + %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef + ret { i8, i32 } %val +} + +; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: +; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] +; GCN: buffer_load_dword [[VAL1:v[0-9]+]] +; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s4 offen{{$}} +; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s4 offen offset:4{{$}} +define void @void_func_sret_struct_i8_i32({ i8, i32 }* sret %arg0) #0 { + %val0 = load volatile i8, i8 addrspace(1)* undef + %val1 = load volatile i32, i32 addrspace(1)* undef + %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 0 + %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %arg0, i32 0, i32 1 + store i8 %val0, i8* %gep0 + store i32 %val1, i32* %gep1 + ret void +} + +; GCN-LABEL: {{^}}v33i32_func_void: +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define <33 x i32> @v33i32_func_void() #0 { + %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(2)* undef + %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr + ret <33 x i32> %val +} + +; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:4{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:8{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:12{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:16{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:20{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:24{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:28{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:32{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:36{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:40{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:44{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:48{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:52{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:56{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:60{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:64{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:68{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:72{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:76{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:80{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:84{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:88{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:92{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:96{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:100{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:104{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:108{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:112{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:116{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:120{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:124{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { + %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(2)* undef + %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr + ret { <32 x i32>, i32 }%val +} + +; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:128{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:132{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:136{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:140{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:144{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:148{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:152{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:156{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:160{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:164{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:168{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:172{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:176{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:180{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:184{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:188{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:192{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:196{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:200{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:204{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:208{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:212{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:216{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:220{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:224{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:228{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:232{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:236{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:240{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:244{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:248{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen offset:252{{$}} +; GCN: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 +define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { + %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(2)* undef + %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr + ret { i32, <32 x i32> }%val +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/global-constant.ll b/test/CodeGen/AMDGPU/global-constant.ll index 80acfcca70822ce1ad3680b75a60eb7f1608c703..1898c8fb63ea85e65f2afa64e8bb270c914bc438 100644 --- a/test/CodeGen/AMDGPU/global-constant.ll +++ b/test/CodeGen/AMDGPU/global-constant.ll @@ -29,10 +29,10 @@ define amdgpu_kernel void @private_test(i32 %index, float addrspace(1)* %out) { %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index %val = load float, float addrspace(2)* %ptr - store float %val, float addrspace(1)* %out + store volatile float %val, float addrspace(1)* %out %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index %val2 = load float, float addrspace(2)* %ptr2 - store float %val2, float addrspace(1)* %out + store volatile float %val2, float addrspace(1)* %out ret void } diff --git a/test/CodeGen/AMDGPU/global_smrd_cfg.ll b/test/CodeGen/AMDGPU/global_smrd_cfg.ll index a6a04151caa9892f54c4e70f267db0365ff07ce7..be6e3fd05ae7317859c8deb2b33dc938e0c3db99 100644 --- a/test/CodeGen/AMDGPU/global_smrd_cfg.ll +++ b/test/CodeGen/AMDGPU/global_smrd_cfg.ll @@ -72,6 +72,39 @@ bb22: ; preds = %bb20, %bb11 br i1 %tmp31, label %bb7, label %bb11 } +; one more test to ensure that aliasing store after the load +; is considered clobbering if load parent block is the same +; as a loop header block. + +; CHECK-LABEL: %bb1 + +; Load from %arg has alias store that is after the load +; but is considered clobbering because of the loop. + +; CHECK: flat_load_dword + +define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { +bb: + br label %bb1 + +bb2: + ret void + +bb1: + %tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ] + %tmp14 = srem i32 %tmp13, %arg2 + %tmp15 = sext i32 %tmp14 to i64 + %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15 + %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0 + %tmp19 = sext i32 %tmp13 to i64 + %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19 + store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0 + %tmp25 = add nuw nsw i32 %tmp13, 1 + %tmp31 = icmp eq i32 %tmp25, 100 + br i1 %tmp31, label %bb2, label %bb1 +} + + attributes #0 = { "target-cpu"="fiji" } !0 = !{!1, !1, i64 0} diff --git a/test/CodeGen/AMDGPU/hsa-func-align.ll b/test/CodeGen/AMDGPU/hsa-func-align.ll new file mode 100644 index 0000000000000000000000000000000000000000..a00f5e2669d1de77d9d776959f12463518cfdb81 --- /dev/null +++ b/test/CodeGen/AMDGPU/hsa-func-align.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s + +; ELF: Section { +; ELF: Name: .text +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_EXECINSTR (0x4) +; ELF: AddressAlignment: 32 +; ELF: } + +; HSA: .globl simple_align16 +; HSA: .p2align 5 +define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 { +entry: + %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/hsa-func.ll b/test/CodeGen/AMDGPU/hsa-func.ll index b4cdd4030d86a79d8d91747f6b13bbd0ca28ff08..35aeeeaa225ce6560f363051de9297875956f474 100644 --- a/test/CodeGen/AMDGPU/hsa-func.ll +++ b/test/CodeGen/AMDGPU/hsa-func.ll @@ -14,6 +14,7 @@ ; ELF: Flags [ (0x6) ; ELF: SHF_ALLOC (0x2) ; ELF: SHF_EXECINSTR (0x4) +; ELF: AddressAlignment: 4 ; ELF: } ; ELF: SHT_NOTE @@ -26,7 +27,7 @@ ; ELF: Symbol { ; ELF: Name: simple -; ELF: Size: 292 +; ELF: Size: 48 ; ELF: Type: Function (0x2) ; ELF: } @@ -36,25 +37,36 @@ ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-NOT: .amdgpu_hsa_kernel simple +; HSA: .globl simple +; HSA: .p2align 2 ; HSA: {{^}}simple: -; HSA: .amd_kernel_code_t -; HSA: enable_sgpr_private_segment_buffer = 1 -; HSA: enable_sgpr_kernarg_segment_ptr = 1 -; HSA: .end_amd_kernel_code_t -; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 +; HSA-NOT: amd_kernel_code_t +; HSA-NOT: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 ; Make sure we are setting the ATC bit: -; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000 +; HSA-CI: s_mov_b32 s[[HI:[0-9]+]], 0x100f000 ; On VI+ we also need to set MTYPE = 2 -; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000 +; HSA-VI: s_mov_b32 s[[HI:[0-9]+]], 0x1100f000 ; Make sure we generate flat store for HSA ; HSA: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; HSA: .Lfunc_end0: ; HSA: .size simple, .Lfunc_end0-simple +; HSA: ; Function info: +; HSA-NOT: COMPUTE_PGM_RSRC2 +define void @simple(i32 addrspace(1)* addrspace(2)* %ptr.out) { +entry: + %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out + store i32 0, i32 addrspace(1)* %out + ret void +} -define void @simple(i32 addrspace(1)* %out) { +; Ignore explicit alignment that is too low. +; HSA: .globl simple_align2 +; HSA: .p2align 2 +define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 { entry: + %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out store i32 0, i32 addrspace(1)* %out ret void } diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll index af63a4f8df760f7b0ce32e2f2af85c81378b4ba4..81d9ed2eba8cc63e6d924f06d5251e0efaa3a0d2 100644 --- a/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -1,6 +1,12 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx600 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI600 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx601 | FileCheck --check-prefix=HSA --check-prefix=HSA-SI601 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx700 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx701 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx702 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI702 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx703 | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=mullins | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=hawaii | FileCheck --check-prefix=HSA --check-prefix=HSA-CI701 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kabini | FileCheck --check-prefix=HSA --check-prefix=HSA-CI703 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI700 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI801 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefix=HSA --check-prefix=HSA-VI802 %s @@ -15,11 +21,16 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx903 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX903 %s ; HSA: .hsa_code_object_version 2,1 +; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU" +; HSA-SI601: .hsa_code_object_isa 6,0,1,"AMD","AMDGPU" ; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" ; HSA-CI701: .hsa_code_object_isa 7,0,1,"AMD","AMDGPU" ; HSA-CI702: .hsa_code_object_isa 7,0,2,"AMD","AMDGPU" +; HSA-CI703: .hsa_code_object_isa 7,0,3,"AMD","AMDGPU" ; HSA-VI800: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU" ; HSA-VI801: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-VI802: .hsa_code_object_isa 8,0,2,"AMD","AMDGPU" @@ -28,3 +39,5 @@ ; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" ; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" ; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU" +; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU" +; HSA-GFX903: .hsa_code_object_isa 9,0,3,"AMD","AMDGPU" diff --git a/test/CodeGen/AMDGPU/i1-copy-phi.ll b/test/CodeGen/AMDGPU/i1-copy-phi.ll index b160af86a2b6dbebb8798a8adc9b091125799481..4a0213dd1de55ed476ae9ba4fb66bc205cf29430 100644 --- a/test/CodeGen/AMDGPU/i1-copy-phi.ll +++ b/test/CodeGen/AMDGPU/i1-copy-phi.ll @@ -12,7 +12,7 @@ ; SI: s_endpgm define amdgpu_kernel void @br_i1_phi(i32 %arg) { bb: - %tidig = call i32 @llvm.r600.read.tidig.x() #0 + %tidig = call i32 @llvm.amdgcn.workitem.id.x() %cmp = trunc i32 %tidig to i1 br i1 %cmp, label %bb2, label %bb3 @@ -32,6 +32,6 @@ bb6: ; preds = %bb4, %bb3 ret void } -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 -attributes #0 = { readnone } +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll index 6e411ce5e017019684039a7dec1b59d059fdadbc..0c5b8fbda22294445eeddd31ca2df4899c3f52a8 100644 --- a/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll +++ b/test/CodeGen/AMDGPU/illegal-sgpr-to-vgpr-copy.ll @@ -5,40 +5,40 @@ ; GCN: ; illegal copy v1 to s9 define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_i32() #0 { - %vgpr = call i32 asm sideeffect "; def $0", "=${VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR9}"(i32 %vgpr) + %vgpr = call i32 asm sideeffect "; def $0", "=${v1}"() + call void asm sideeffect "; use $0", "${s9}"(i32 %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v2i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:1] to s[10:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v2i32() #0 { - %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1}"() - call void asm sideeffect "; use $0", "${SGPR10_SGPR11}"(<2 x i32> %vgpr) + %vgpr = call <2 x i32> asm sideeffect "; def $0", "=${v[0:1]}"() + call void asm sideeffect "; use $0", "${s[10:11]}"(<2 x i32> %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v4i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:3] to s[8:11] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v4i32() #0 { - %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11}"(<4 x i32> %vgpr) + %vgpr = call <4 x i32> asm sideeffect "; def $0", "=${v[0:3]}"() + call void asm sideeffect "; use $0", "${s[8:11]}"(<4 x i32> %vgpr) ret void } ; ERR: error: :0:0: in function illegal_vgpr_to_sgpr_copy_v8i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:7] to s[8:15] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v8i32() #0 { - %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}"() - call void asm sideeffect "; use $0", "${SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}"(<8 x i32> %vgpr) + %vgpr = call <8 x i32> asm sideeffect "; def $0", "=${v[0:7]}"() + call void asm sideeffect "; use $0", "${s[8:15]}"(<8 x i32> %vgpr) ret void } ; ERR error: :0:0: in function illegal_vgpr_to_sgpr_copy_v16i32 void (): illegal SGPR to VGPR copy ; GCN: ; illegal copy v[0:15] to s[16:31] define amdgpu_kernel void @illegal_vgpr_to_sgpr_copy_v16i32() #0 { - %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}"() - call void asm sideeffect "; use $0", "${SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23_SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}"(<16 x i32> %vgpr) + %vgpr = call <16 x i32> asm sideeffect "; def $0", "=${v[0:15]}"() + call void asm sideeffect "; use $0", "${s[16:31]}"(<16 x i32> %vgpr) ret void } diff --git a/test/CodeGen/AMDGPU/immv216.ll b/test/CodeGen/AMDGPU/immv216.ll index 85ad365d02a8975aa6ae6c0680e22c14b74f7e62..cd3502baee7bef7444cb5ba35703762fe813e3d5 100644 --- a/test/CodeGen/AMDGPU/immv216.ll +++ b/test/CodeGen/AMDGPU/immv216.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; FIXME: Merge into imm.ll @@ -123,7 +123,8 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -140,7 +141,8 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -157,7 +159,8 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -0.5, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -174,7 +177,8 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1.0, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -191,7 +195,8 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1.0, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xbc00 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -208,7 +213,8 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2.0, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -225,7 +231,8 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2.0, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -242,7 +249,8 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 4.0, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -259,7 +267,8 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -4.0, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -273,10 +282,10 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(<2 x half> addrspace(1)* ; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 ; GFX9: buffer_store_dword [[REG]] +; VI: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 ; VI: buffer_load_dword ; VI-NOT: and -; VI: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}} +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}} ; VI: v_or_b32 ; VI: buffer_store_dword @@ -288,16 +297,16 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace } ; GCN-LABEL: {{^}}commute_add_literal_v2f16: -; GFX9: buffer_load_dword [[VAL:v[0-9]+]] -; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x64006400 -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[K]], [[VAL]] +; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}} +; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}} ; GFX9: buffer_store_dword [[REG]] ; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x6400{{$}} ; VI-DAG: buffer_load_dword ; VI-NOT: and ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}} -; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[K]], v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: buffer_store_dword define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { @@ -315,7 +324,8 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 1, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -332,7 +342,8 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 2, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -349,7 +360,8 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 16, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -366,7 +378,8 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -1, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM1:v[0-9]+]], 0xffff +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -383,7 +396,8 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* % ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -2, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xfffe +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -400,7 +414,8 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* % ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, -16, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONSTM16:v[0-9]+]], 0xfff0 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONSTM16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -417,7 +432,8 @@ define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 63, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { @@ -434,7 +450,8 @@ define amdgpu_kernel void @add_inline_imm_63_v2f16(<2 x half> addrspace(1)* %out ; VI: buffer_load_ushort [[VAL0:v[0-9]+]] ; VI: buffer_load_ushort [[VAL1:v[0-9]+]] ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL0]] -; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 64, [[VAL1]] +; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64 +; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[VAL1]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_or_b32 ; VI: buffer_store_dword define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 { diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll index b18ae353ca4cb15d434f7db30abd155324addc8d..0d20c32a4770c35c07fc16866e67fcc4e6238bfb 100644 --- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -120,8 +120,7 @@ entry: ; FIXME: The waitcnt for the argument load can go after the loop ; IDXMODE: s_set_gpr_idx_on 0, src0 ; GCN: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, exec -; GCN: s_waitcnt lgkmcnt(0) - +; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v{{[0-9]+}} ; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe0 @@ -250,8 +249,6 @@ entry: ; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}} ; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; GCN: s_waitcnt lgkmcnt(0) - ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]: ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] @@ -290,7 +287,6 @@ entry: ; IDXMODE: s_set_gpr_idx_on 0, dst ; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; GCN: s_waitcnt lgkmcnt(0) ; The offset depends on the register that holds the first element of the vector. ; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]] @@ -330,9 +326,9 @@ entry: ; IDXMODE: s_set_gpr_idx_on 0, src0 ; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec -; GCN: s_waitcnt vmcnt(0) ; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] @@ -383,7 +379,7 @@ entry: %idx0 = load volatile i32, i32 addrspace(1)* %gep %idx1 = add i32 %idx0, 1 %val0 = extractelement <4 x i32> , i32 %idx0 - %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={SGPR4}" () + %live.out.reg = call i32 asm sideeffect "s_mov_b32 $0, 17", "={s4}" () %val1 = extractelement <4 x i32> , i32 %idx1 store volatile i32 %val0, i32 addrspace(1)* %out0 store volatile i32 %val1, i32 addrspace(1)* %out0 @@ -411,6 +407,7 @@ bb2: ; IDXMODE: s_set_gpr_idx_on 0, dst ; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]: +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]] ; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]] diff --git a/test/CodeGen/AMDGPU/infinite-loop.ll b/test/CodeGen/AMDGPU/infinite-loop.ll index 73482756b8c804ed7be0ae7a151e92bfb96d400d..3caffc342c7ed574773be6ac90f364dad8642c58 100644 --- a/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/test/CodeGen/AMDGPU/infinite-loop.ll @@ -4,8 +4,8 @@ ; SI-LABEL: {{^}}infinite_loop: ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 ; SI: BB0_1: +; SI: s_waitcnt lgkmcnt(0) ; SI: buffer_store_dword [[REG]] -; SI: s_waitcnt vmcnt(0) expcnt(0) ; SI: s_branch BB0_1 define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) { entry: diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll index 5d49b11f0d416b39374e75c8e158bfbdae20c608..c0f5218efc16bf74655fe71f3bf2a1725459d594 100644 --- a/test/CodeGen/AMDGPU/inline-asm.ll +++ b/test/CodeGen/AMDGPU/inline-asm.ll @@ -191,8 +191,58 @@ entry: ; CHECK: v_mov_b32_e32 v0, s0 ; CHECK: v_mov_b32_e32 v1, s1 ; CHECK: use v[0:1] -define void @i64_imm_input_phys_vgpr() { +define amdgpu_kernel void @i64_imm_input_phys_vgpr() { entry: - call void asm sideeffect "; use $0 ", "{VGPR0_VGPR1}"(i64 123456) + call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456) + ret void +} + +; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr: +; CHECK: v_mov_b32_e32 v0, -1{{$}} +; CHECK: ; use v0 +define amdgpu_kernel void @i1_imm_input_phys_vgpr() { +entry: + call void asm sideeffect "; use $0 ", "{v0}"(i1 true) + ret void +} + +; CHECK-LABEL: {{^}}i1_input_phys_vgpr: +; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]] +; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]] +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK: ; use v0 +define amdgpu_kernel void @i1_input_phys_vgpr() { +entry: + %val = load i1, i1 addrspace(1)* undef + call void asm sideeffect "; use $0 ", "{v0}"(i1 %val) + ret void +} + +; FIXME: Should be scheduled to shrink vcc +; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2: +; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0 +; CHECK: v_cmp_eq_u32_e64 s[0:1], 1, v1 +; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +define amdgpu_kernel void @i1_input_phys_vgpr_x2() { +entry: + %val0 = load volatile i1, i1 addrspace(1)* undef + %val1 = load volatile i1, i1 addrspace(1)* undef + call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1) + ret void +} + +; CHECK-LABEL: {{^}}muliple_def_phys_vgpr: +; CHECK: ; def v0 +; CHECK: v_mov_b32_e32 v1, v0 +; CHECK: ; def v0 +; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1 +define amdgpu_kernel void @muliple_def_phys_vgpr() { +entry: + %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"() + %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"() + %add = shl i32 %def0, %def1 + store i32 %add, i32 addrspace(1)* undef ret void } diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll index a3f82b8a011746f60a187a2ec9cdaaa445098a7b..86fc41a237720daf380df2b6f835de2fe5da1d1f 100644 --- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -216,7 +216,7 @@ define amdgpu_kernel void @v_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, ; CIVI-DAG: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], [[ELT0_SHIFT]], [[ELT1]] -; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff{{$}} +; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0000{{$}} ; GFX9-DAG: v_lshrrev_b32_e64 [[ELT0_SHIFT:v[0-9]+]], 16, [[ELT0]] ; GFX9: v_and_or_b32 [[RES:v[0-9]+]], [[VEC]], [[MASK]], [[ELT0_SHIFT]] @@ -258,8 +258,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace ; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0 ; GCN-LABEL: {{^}}v_insertelement_v2i16_1: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e70000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -278,9 +280,12 @@ define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, } ; GCN-LABEL: {{^}}v_insertelement_v2i16_1_inlineimm: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xfff10000 ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] +; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { @@ -337,8 +342,10 @@ define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspac } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x45000000 ; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] @@ -357,9 +364,12 @@ define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1_inlineimm: +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0x230000 ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] -; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] +; CI: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] +; CI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] +; VI: v_or_b32_sdwa [[RES:v[0-9]+]], [[VEC]], [[K]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { @@ -413,9 +423,9 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspac ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr: ; GCN: flat_load_dword [[IDX:v[0-9]+]] ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 - ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 + ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] @@ -440,9 +450,9 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspac ; GCN-LABEL: {{^}}v_insertelement_v2f16_dynamic_vgpr: ; GCN: flat_load_dword [[IDX:v[0-9]+]] ; GCN: flat_load_dword [[VEC:v[0-9]+]] -; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 - ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} +; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 + ; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]] ; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]] diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir index 1479303712d0f0299e03df5b34a06f2b52658650..c6fe6debd225a8cae3d9c0d015eb284e793394d8 100644 --- a/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -77,19 +77,16 @@ name: div_fmas body: | bb.0: - successors: %bb.1 %vcc = S_MOV_B64 0 %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_BRANCH %bb.1 bb.1: - successors: %bb.2 implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_BRANCH %bb.2 bb.2: - successors: %bb.3 %vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_BRANCH %bb.3 @@ -130,19 +127,16 @@ name: s_getreg body: | bb.0: - successors: %bb.1 S_SETREG_B32 %sgpr0, 1 %sgpr1 = S_GETREG_B32 1 S_BRANCH %bb.1 bb.1: - successors: %bb.2 S_SETREG_IMM32_B32 0, 1 %sgpr1 = S_GETREG_B32 1 S_BRANCH %bb.2 bb.2: - successors: %bb.3 S_SETREG_B32 %sgpr0, 1 %sgpr1 = S_MOV_B32 0 %sgpr2 = S_GETREG_B32 1 @@ -178,13 +172,11 @@ name: s_setreg body: | bb.0: - successors: %bb.1 S_SETREG_B32 %sgpr0, 1 S_SETREG_B32 %sgpr1, 1 S_BRANCH %bb.1 bb.1: - successors: %bb.2 S_SETREG_B32 %sgpr0, 64 S_SETREG_B32 %sgpr1, 128 S_BRANCH %bb.2 @@ -237,7 +229,6 @@ name: vmem_gt_8dw_store body: | bb.0: - successors: %bb.1 BUFFER_STORE_DWORD_OFFSET %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec %vgpr3 = V_MOV_B32_e32 0, implicit %exec BUFFER_STORE_DWORDX3_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec @@ -310,19 +301,16 @@ name: readwrite_lane body: | bb.0: - successors: %bb.1 %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec %sgpr4 = V_READLANE_B32 %vgpr4, %sgpr0 S_BRANCH %bb.1 bb.1: - successors: %bb.2 %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec %vgpr4 = V_WRITELANE_B32 %sgpr0, %sgpr0 S_BRANCH %bb.2 bb.2: - successors: %bb.3 %vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec %sgpr4 = V_READLANE_B32 %vgpr4, %vcc_lo S_BRANCH %bb.3 @@ -352,7 +340,6 @@ name: rfe body: | bb.0: - successors: %bb.1 S_SETREG_B32 %sgpr0, 3 S_RFE_B64 %sgpr2_sgpr3 S_BRANCH %bb.1 @@ -382,7 +369,6 @@ name: s_mov_fed_b32 body: | bb.0: - successors: %bb.1 %sgpr0 = S_MOV_FED_B32 %sgpr0 %sgpr0 = S_MOV_B32 %sgpr0 S_BRANCH %bb.1 @@ -423,19 +409,16 @@ name: s_movrel body: | bb.0: - successors: %bb.1 %m0 = S_MOV_B32 0 %sgpr0 = S_MOVRELS_B32 %sgpr0, implicit %m0 S_BRANCH %bb.1 bb.1: - successors: %bb.2 %m0 = S_MOV_B32 0 %sgpr0_sgpr1 = S_MOVRELS_B64 %sgpr0_sgpr1, implicit %m0 S_BRANCH %bb.2 bb.2: - successors: %bb.3 %m0 = S_MOV_B32 0 %sgpr0 = S_MOVRELD_B32 %sgpr0, implicit %m0 S_BRANCH %bb.3 @@ -475,19 +458,16 @@ name: v_interp body: | bb.0: - successors: %bb.1 %m0 = S_MOV_B32 0 %vgpr0 = V_INTERP_P1_F32 %vgpr0, 0, 0, implicit %m0, implicit %exec S_BRANCH %bb.1 bb.1: - successors: %bb.2 %m0 = S_MOV_B32 0 %vgpr0 = V_INTERP_P2_F32 %vgpr0, %vgpr1, 0, 0, implicit %m0, implicit %exec S_BRANCH %bb.2 bb.2: - successors: %bb.3 %m0 = S_MOV_B32 0 %vgpr0 = V_INTERP_P1_F32_16bank %vgpr0, 0, 0, implicit %m0, implicit %exec S_BRANCH %bb.3 diff --git a/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir index bc1dafe0ea1e22be99e3b925f8a656be3c3a4759..67642282f75b0b0a3d4e7b9495ff505e7ea22df4 100644 --- a/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir +++ b/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir @@ -53,7 +53,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.entry: - successors: %bb.2.if, %bb.1.else liveins: %sgpr0_sgpr1 %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) @@ -62,7 +61,6 @@ body: | S_CBRANCH_VCCNZ %bb.2.if, implicit undef %vcc bb.1.else: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 100, implicit %exec @@ -71,7 +69,6 @@ body: | S_BRANCH %bb.3.done bb.2.if: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 9, implicit %exec diff --git a/test/CodeGen/AMDGPU/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll index 6fa26cb3879357bdd09e25e2c45d6b129fb3b0aa..9441bf20882952845c1f75dc8c889418213dcdeb 100644 --- a/test/CodeGen/AMDGPU/kernel-args.ll +++ b/test/CodeGen/AMDGPU/kernel-args.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GCN,MESA-GCN,FUNC ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,MESA-VI,MESA-GCN,FUNC ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,HSA-VI,FUNC -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s --check-prefix=EG --check-prefix=FUNC +; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck %s --check-prefix=EG --check-prefix=FUNC ; FUNC-LABEL: {{^}}i8_arg: ; HSA-VI: kernarg_segment_alignment = 4 diff --git a/test/CodeGen/AMDGPU/lds-size.ll b/test/CodeGen/AMDGPU/lds-size.ll index c65817abd489da4cdb5ec6e7aa13cb6dcdc48d70..ff78c3bcb18cf964dda224b38f5467ab11169c1e 100644 --- a/test/CodeGen/AMDGPU/lds-size.ll +++ b/test/CodeGen/AMDGPU/lds-size.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=ALL -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s ; This test makes sure we do not double count global values when they are @@ -11,6 +12,9 @@ ; EG-NEXT: .long 1 ; ALL: {{^}}test: +; HSA: granulated_lds_size = 0 +; HSA: workgroup_group_segment_byte_size = 4 + ; GCN: ; LDSByteSize: 4 bytes/workgroup (compile time only) @lds = internal unnamed_addr addrspace(3) global i32 undef, align 4 diff --git a/test/CodeGen/AMDGPU/limit-coalesce.mir b/test/CodeGen/AMDGPU/limit-coalesce.mir index 106a96e32dc3890a11affcd54dc23c0eddba28c1..d6b3d7b14cd21a686b33ffde1cffb92e2ef351c6 100644 --- a/test/CodeGen/AMDGPU/limit-coalesce.mir +++ b/test/CodeGen/AMDGPU/limit-coalesce.mir @@ -2,13 +2,13 @@ # Check that coalescer does not create wider register tuple than in source -# CHECK: - { id: 2, class: vreg_64 } -# CHECK: - { id: 3, class: vreg_64 } -# CHECK: - { id: 4, class: vreg_64 } -# CHECK: - { id: 5, class: vreg_96 } -# CHECK: - { id: 6, class: vreg_96 } -# CHECK: - { id: 7, class: vreg_128 } -# CHECK: - { id: 8, class: vreg_128 } +# CHECK: - { id: 2, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 3, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 4, class: vreg_64, preferred-register: '' } +# CHECK: - { id: 5, class: vreg_96, preferred-register: '' } +# CHECK: - { id: 6, class: vreg_96, preferred-register: '' } +# CHECK: - { id: 7, class: vreg_128, preferred-register: '' } +# CHECK: - { id: 8, class: vreg_128, preferred-register: '' } # No more registers shall be defined # CHECK-NEXT: liveins: # CHECK: FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %4, diff --git a/test/CodeGen/AMDGPU/liveness.mir b/test/CodeGen/AMDGPU/liveness.mir index 48762e3f2ab4255ee353ffb37633516bf695a60d..6fd8466492d08b108e8dfd7e1d0df593732fbafd 100644 --- a/test/CodeGen/AMDGPU/liveness.mir +++ b/test/CodeGen/AMDGPU/liveness.mir @@ -16,13 +16,11 @@ registers: - { id: 0, class: sreg_64 } body: | bb.0: - successors: %bb.1, %bb.2 S_NOP 0, implicit-def undef %0.sub0 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc S_BRANCH %bb.2 bb.1: - successors: %bb.2 S_NOP 0, implicit-def %0.sub1 S_NOP 0, implicit %0.sub1 S_BRANCH %bb.2 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll new file mode 100644 index 0000000000000000000000000000000000000000..873a3f0f368fd267c90d750db8855dd9741103b2 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.alignb.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.alignbit(i32, i32, i32) #0 +declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_alignbit_b32: +; GCN: v_alignbit_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} +define amdgpu_kernel void @v_alignbit_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 { + %val = call i32 @llvm.amdgcn.alignbit(i32 %src1, i32 %src2, i32 %src3) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_alignbyte_b32: +; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} +define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 { + %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll index 405534ea4b5736ea914602c0a8360d2cb84e5266..1fc77893e7e97300dc8ddd4751a3182ef2f42b11 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -234,8 +234,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64 addrspace(4)* %p } ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -248,8 +248,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64 addrspace(4 } ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -355,8 +355,9 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* } ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { @@ -370,8 +371,9 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace } ; GCN-LABEL: {{^}}global_atomic_dec_noret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll index 8334c0c357befb62765595131bc6b2a19c1b8d78..eee8351de79befbfb9370b9ff4f48f74de1bbdb0 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -206,8 +206,9 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* } ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { @@ -221,8 +222,9 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace } ; GCN-LABEL: {{^}}global_atomic_inc_noret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; CI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}}{{$}} define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { @@ -348,8 +350,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64 addrspace(4)* %p } ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]}} glc{{$}} define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64 addrspace(4)* %out, i64 addrspace(4)* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -362,8 +364,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64 addrspace(4 } ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i64_offset_addr64: -; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 -; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 +; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[KLO]]:[[KHI]]{{\]$}} define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64 addrspace(4)* %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll index 555a1d23ebe972bff7fc316528def07d69381fd2..e50455f6f9a13c24b8a68de692d7f01de3cecacc 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll @@ -58,7 +58,7 @@ main_body: ; ;CHECK-LABEL: {{^}}buffer_store_wait: ;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen -;CHECK: s_waitcnt vmcnt(0) expcnt(0) +;CHECK: s_waitcnt expcnt(0) ;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll index 5ae255c7a26cf2181258e38c9e6de5b7395b41cc..81597516d5f2a238ca4d08c5828c828e7b45a6af 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll @@ -58,7 +58,7 @@ main_body: ; ;CHECK-LABEL: {{^}}buffer_store_wait: ;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen -;CHECK: s_waitcnt vmcnt(0) expcnt(0) +;CHECK: s_waitcnt expcnt(0) ;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 idxen diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll index b92eb34750d9a258e0a0270089b6fbd449168130..7179d02fc6ddc6e0a16e913b6bb1d52b00fbdcdc 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -7,7 +7,7 @@ ; GCN-DAG: s_load_dword [[SY:s[0-9]+]], s[0:1], 0x{{c|30}} ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], [[SY]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[X]], [[VY]] -; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[VY]] +; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[X]], [[VY]] define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 { %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) store <2 x half> %result, <2 x half> addrspace(1)* %out @@ -16,7 +16,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, ; GCN-LABEL: {{^}}s_cvt_pkrtz_samereg_v2f16_f32: ; GCN: s_load_dword [[X:s[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[X]], [[X]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]] define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 { %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x) store <2 x half> %result, <2 x half> addrspace(1)* %out @@ -39,7 +39,7 @@ define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]] -; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]] +; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -55,7 +55,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], 1.0 +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -70,7 +70,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1) ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]] -; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]] +; GFX89: v_cvt_pkrtz_f16_f32 v{{[0-9]+}}, 1.0, [[A]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -85,7 +85,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1) ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], [[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -103,7 +103,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1) ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], -[[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -121,7 +121,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1) ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], -[[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -140,7 +140,7 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] -; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -|[[A]]|, -[[B]] +; GCN: v_cvt_pkrtz_f16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll index e04d9e662cea315d43a70b70f98f5fe1bed07d27..3bb5e21d67ac0f8a091e80d8ca43433d517d76dc 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll @@ -27,7 +27,7 @@ entry: ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] ; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x4200{{$}} -; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] +; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_a( @@ -46,7 +46,7 @@ entry: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[C_F16:[0-9]+]] ; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x4200{{$}} -; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] +; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_b( @@ -65,7 +65,7 @@ entry: ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] ; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x4200{{$}} -; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] +; VI: v_div_fixup_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_c( diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll index a86468b07a272f801e6ccc3ef9a6320f47a95647..2cc63ae74bf10c4b0603c03627be636c12790fcd 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -17,7 +17,7 @@ declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind re ; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]] ; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]] -; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VB]], [[VA]], [[VC]] +; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], [[VC]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll index a3a78d326a628a5790d50cb83608e2f8b0e62713..d97644262016ea23d424a9cd783d28a7517bcac9 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll @@ -4,8 +4,7 @@ declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0 ; FUNC-LABEL: {{^}}ds_swizzle: -; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:100 -; CHECK: s_waitcnt lgkmcnt +; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11") define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind { %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 store i32 %swizzle, i32 addrspace(1)* %out, align 4 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll index c9993ee88369c4399f4aad057e7bed3d2d601bab..737be5d0044786ff13cfe3d311f85710d167f0d8 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -14,7 +14,7 @@ define amdgpu_kernel void @v_fcmp_f32_dynamic_cc(i64 addrspace(1)* %out, float % } ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs: -; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, {{s[0-9]+}} +; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}| define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { %temp = call float @llvm.fabs.f32(float %a) %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) @@ -23,7 +23,7 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, floa } ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs: -; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, |{{s[0-9]+}}| +; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}| define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { %temp = call float @llvm.fabs.f32(float %a) %src_input = call float @llvm.fabs.f32(float %src) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll index b47d2dbc744d447068c5301ba786a12e96def586..be8462d09064ad09bd89102eb286bcecd76fd1c4 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -27,7 +27,7 @@ define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, } ; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32: -; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |s{{[0-9]+}}| +; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}| define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll index c74c0fa15855db2fa5ffb0a06d374dab52b104f3..a289f7b0cfb1ee7fd4c48de170da35d7b84bd5f9 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll @@ -130,7 +130,7 @@ main_body: ; ; GCN-LABEL: {{^}}image_store_wait: ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm -; GCN: s_waitcnt vmcnt(0) expcnt(0) +; GCN: s_waitcnt expcnt(0) ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm ; GCN: s_waitcnt vmcnt(0) ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll new file mode 100644 index 0000000000000000000000000000000000000000..617f1f19e360ae6e2550ec8287740bcc9a5691af --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -0,0 +1,80 @@ +;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN + +; GCN-LABEL: {{^}}full_mask: +; GCN: s_mov_b64 exec, -1 +; GCN: v_add_f32_e32 v0, +define amdgpu_ps float @full_mask(float %a, float %b) { +main_body: + %s = fadd float %a, %b + call void @llvm.amdgcn.init.exec(i64 -1) + ret float %s +} + +; GCN-LABEL: {{^}}partial_mask: +; GCN: s_mov_b64 exec, 0x1e240 +; GCN: v_add_f32_e32 v0, +define amdgpu_ps float @partial_mask(float %a, float %b) { +main_body: + %s = fadd float %a, %b + call void @llvm.amdgcn.init.exec(i64 123456) + ret float %s +} + +; GCN-LABEL: {{^}}input_s3off8: +; GCN: s_bfe_u32 s0, s3, 0x70008 +; GCN: s_bfm_b64 exec, s0, 0 +; GCN: s_cmp_eq_u32 s0, 64 +; GCN: s_cmov_b64 exec, -1 +; GCN: v_add_f32_e32 v0, +define amdgpu_ps float @input_s3off8(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) { +main_body: + %s = fadd float %a, %b + call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8) + ret float %s +} + +; GCN-LABEL: {{^}}input_s0off19: +; GCN: s_bfe_u32 s0, s0, 0x70013 +; GCN: s_bfm_b64 exec, s0, 0 +; GCN: s_cmp_eq_u32 s0, 64 +; GCN: s_cmov_b64 exec, -1 +; GCN: v_add_f32_e32 v0, +define amdgpu_ps float @input_s0off19(i32 inreg %count, float %a, float %b) { +main_body: + %s = fadd float %a, %b + call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19) + ret float %s +} + +; GCN-LABEL: {{^}}reuse_input: +; GCN: s_bfe_u32 s1, s0, 0x70013 +; GCN: s_bfm_b64 exec, s1, 0 +; GCN: s_cmp_eq_u32 s1, 64 +; GCN: s_cmov_b64 exec, -1 +; GCN: v_add_i32_e32 v0, vcc, s0, v0 +define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) { +main_body: + call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19) + %s = add i32 %a, %count + %f = sitofp i32 %s to float + ret float %f +} + +; GCN-LABEL: {{^}}reuse_input2: +; GCN: s_bfe_u32 s1, s0, 0x70013 +; GCN: s_bfm_b64 exec, s1, 0 +; GCN: s_cmp_eq_u32 s1, 64 +; GCN: s_cmov_b64 exec, -1 +; GCN: v_add_i32_e32 v0, vcc, s0, v0 +define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) { +main_body: + %s = add i32 %a, %count + %f = sitofp i32 %s to float + call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19) + ret float %f +} + +declare void @llvm.amdgcn.init.exec(i64) #1 +declare void @llvm.amdgcn.init.exec.from.input(i32, i32) #1 + +attributes #1 = { convergent } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll index 055dddbfa8af2944ce0ba26e127aaeff026f6d4f..9a27809f37bb8e9fd4cd2c758821ff8cdf7d4b39 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -2,6 +2,8 @@ ; RUN: llc -mtriple=amdgcn--amdhsa-opencl -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-OPENCL %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s ; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-NOENV %s +; RUN: llc -mtriple=amdgcn--amdhsa-amdgizcl -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-OPENCL %s ; ALL-LABEL: {{^}}test: ; CO-V2: enable_sgpr_kernarg_segment_ptr = 1 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll index ab76c870796be87187821527b82c29ec1173242b..144c8f428ab0ff428faf7839431a364ff169537f 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}mbcnt_intrinsics: -; GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[LO:v[0-9]+]], -1, 0 ; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]] -; VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]] +; VI: v_mbcnt_hi_u32_b32 {{v[0-9]+}}, -1, [[LO]] define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, i32 inreg %arg3) { main_body: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll index 3a2b87cd87f30586ca409902583b67e974a68cef..83bc8b2347245be26a79b84fd3b373153f6dd494 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate: -; GCN: v_mqsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_mqsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll index a8d03bf6bbac5087c7f1ee6bdaaa3dfc3ef02ebf..685b5e0f29c423991260a06bfca577edbd0464d6 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -3,45 +3,56 @@ declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0 -; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_non_inline_constant: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_use_non_inline_constant(<4 x i32> addrspace(1)* %out, i64 %src) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 100, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 +; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] +define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %b) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 - ret void -} - -; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_fp_immediate: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> ) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_mqsad_u32_u8_use_sgpr_vgpr: -; GCN: v_mqsad_u32_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v0, v2 +; GCN-DAG: v_mov_b32_e32 v1, v3 +; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) { %in = load <4 x i32>, <4 x i32> addrspace(1) * %input - - %result = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %src, i32 %a, <4 x i32> %in) #0 - store <4 x i32> %result, <4 x i32> addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0 + %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll index be71225c5e06f3b232687af8ae961d3c843bf89e..1f46613a8db0d3724114de24668259bafd058976 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -4,18 +4,28 @@ declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN-DAG: v_mov_b32_e32 v5, v1 +; GCN-DAG: v_mov_b32_e32 v4, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 100, i64 100) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 + %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 + %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 + store i64 %tmp2, i64 addrspace(1)* %out, align 4 ret void } ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate: -; GCN: v_qsad_pk_u16_u8 v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: v_qsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] +; GCN-DAG: v_mov_b32_e32 v3, v1 +; GCN-DAG: v_mov_b32_e32 v2, v0 define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { - %result= call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %src, i32 %a, i64 %b) #0 - store i64 %result, i64 addrspace(1)* %out, align 4 + %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 + %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 + %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 + %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 + %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 + store i64 %tmp4, i64 addrspace(1)* %out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll index 5e892fad3741bf10863e65890e7757ade16af6be..cbd8f0a9c23a3bcd41e5d5afd9d0f9a1b02960e0 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll @@ -19,6 +19,20 @@ define amdgpu_kernel void @test_readlane_imm_sreg(i32 addrspace(1)* %out, i32 %s ret void } +; CHECK-LABEL: {{^}}test_readlane_vregs: +; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}} +; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, [[LANE]] +define amdgpu_kernel void @test_readlane_vregs(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid + %args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in + %value = extractelement <2 x i32> %args, i32 0 + %lane = extractelement <2 x i32> %args, i32 1 + %readlane = call i32 @llvm.amdgcn.readlane(i32 %value, i32 %lane) + store i32 %readlane, i32 addrspace(1)* %out, align 4 + ret void +} + ; TODO: m0 should be folded. ; CHECK-LABEL: {{^}}test_readlane_m0_sreg: ; CHECK: s_mov_b32 m0, -1 @@ -40,5 +54,8 @@ define amdgpu_kernel void @test_readlane_imm(i32 addrspace(1)* %out, i32 %src0) ret void } +declare i32 @llvm.amdgcn.workitem.id.x() #2 + attributes #0 = { nounwind readnone convergent } attributes #1 = { nounwind } +attributes #2 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll index 5f8ca28ec5f05d2d7af708996c3575a9a0d47e8e..3d815cca5be2d7b14db48ec235ff66f3877f558d 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -1,20 +1,21 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=NOAUTO %s +; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=AUTO %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=NOAUTO %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=AUTO %s ; GCN-LABEL: {{^}}test_barrier: ; GFX8: buffer_store_dword -; GFX8: s_waitcnt ; GFX9: flat_store_dword -; GFX9-NOT: s_waitcnt +; NOAUTO: s_waitcnt +; AUTO-NOT: s_waitcnt ; GCN: s_barrier -define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp store i32 %tmp, i32 addrspace(1)* %tmp1 call void @llvm.amdgcn.s.barrier() - %tmp2 = call i32 @llvm.r600.read.local.size.x() - %tmp3 = sub i32 %tmp2, 1 + %tmp3 = sub i32 %size, 1 %tmp4 = sub i32 %tmp3, %tmp %tmp5 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp4 %tmp6 = load i32, i32 addrspace(1)* %tmp5 @@ -24,7 +25,6 @@ entry: declare void @llvm.amdgcn.s.barrier() #1 declare i32 @llvm.amdgcn.workitem.id.x() #2 -declare i32 @llvm.r600.read.local.size.x() #2 attributes #0 = { nounwind } attributes #1 = { convergent nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll index b488565c6b3ad5ddb2d3a06f5421da5f288a065b..224b2ed72e3bd6d6b888d9b777bc21b7b8aa6b96 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @test_s_dcache_inv() #0 { ; GCN: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_inv_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.inv() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll index a3a5c329f41115a43d37f2e8cc8bda103ed05423..f96d5db5794aca3b54fe2768bb98f3f2296ff099 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @test_s_dcache_inv_vol() #0 { ; GCN: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.inv.vol() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll index 909a85dda3e8a884db1fc1b578811b54c1edae70..99b6513504395c48b518acc88271bf4b28c7ff23 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_wb() #0 { ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.wb() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll index 217bf97c41a4a6b4f909c740ad5bbc3344d230dd..844fcecdb48bfd0645068c3bcf8d175d10178cb9 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_wb_vol() #0 { ; VI: s_waitcnt lgkmcnt(0) ; encoding define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() #0 { call void @llvm.amdgcn.s.dcache.wb.vol() - call void @llvm.amdgcn.s.waitcnt(i32 0) + call void @llvm.amdgcn.s.waitcnt(i32 127) br label %end end: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll new file mode 100644 index 0000000000000000000000000000000000000000..22e15e21680516a403a189ce5617a797f0f2ab90 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i64 @llvm.amdgcn.s.getpc() #0 + +; GCN-LABEL: {{^}}test_s_getpc: +; GCN: s_load_dwordx2 +; GCN-DAG: s_getpc_b64 s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dwordx2 +define amdgpu_kernel void @test_s_getpc(i64 addrspace(1)* %out) #0 { + %tmp = call i64 @llvm.amdgcn.s.getpc() #1 + store volatile i64 %tmp, i64 addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll index 6083ec885a868873c8bd92441c19ceaa26f86960..ee58d359a935934e729e5a2534ce652909e405ed 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll @@ -18,8 +18,8 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> ; ; CHECK-LABEL: {{^}}test2: ; CHECK: image_load -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: s_waitcnt +; CHECK-NEXT: s_waitcnt +; CHECK: s_waitcnt vmcnt(0){{$}} ; CHECK-NEXT: image_store define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) { %t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll new file mode 100644 index 0000000000000000000000000000000000000000..bafafa33016fac02657563790cbd15de80c71549 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll @@ -0,0 +1,9 @@ +; RUN: llc -march amdgcn %s -filetype=obj -o /dev/null +; RUN: llc -march amdgcn <%s | FileCheck %s +define amdgpu_kernel void @f() { + ; CHECK: ; divergent unreachable + call void @llvm.amdgcn.unreachable() + ret void +} + +declare void @llvm.amdgcn.unreachable() diff --git a/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/test/CodeGen/AMDGPU/llvm.fma.f16.ll index 518fe8baaa7a1cfe84d8c5072141cb5ef02c687e..3f4fba7d8ead02180825c0248239d48d64fff2e0 100644 --- a/test/CodeGen/AMDGPU/llvm.fma.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.fma.f16.ll @@ -39,7 +39,7 @@ define amdgpu_kernel void @fma_f16( ; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] ; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x4200{{$}} -; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] +; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_a( @@ -62,7 +62,7 @@ define amdgpu_kernel void @fma_f16_imm_a( ; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] ; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x4200{{$}} -; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] +; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_b( @@ -85,7 +85,7 @@ define amdgpu_kernel void @fma_f16_imm_b( ; SI: v_fma_f32 v[[R_F32:[0-9]+]], v[[A_F32:[0-9]]], v[[B_F32:[0-9]]], v[[C_F32:[0-9]]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] ; VI: v_mov_b32_e32 v[[C_F16:[0-9]+]], 0x4200{{$}} -; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[B_F16]], v[[A_F16]], v[[C_F16]] +; VI: v_fma_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], v[[C_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fma_f16_imm_c( diff --git a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index f30fd1d5820436f8d64a8b4d49d42433778d54ab..806723e5136ca618fad08e6260baa019a4a8645a 100644 --- a/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -50,7 +50,7 @@ define amdgpu_kernel void @fmuladd_f16( ; VI-FLUSH: buffer_store_short v[[C_F16]] ; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200 -; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[B_F16]], v[[C_F16]] +; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[B_F16]], [[KA]], v[[C_F16]] ; VI-DENORM: buffer_store_short [[RESULT]] ; GCN: s_endpgm @@ -78,7 +78,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_a( ; VI-FLUSH: buffer_store_short v[[C_F16]] ; VI-DENORM: v_mov_b32_e32 [[KA:v[0-9]+]], 0x4200 -; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], [[KA]], v[[A_F16]], v[[C_F16]] +; VI-DENORM: v_fma_f16 [[RESULT:v[0-9]+]], v[[A_F16]], [[KA]], v[[C_F16]] ; VI-DENORM buffer_store_short [[RESULT]] @@ -118,7 +118,7 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] ; VI-FLUSH: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-FLUSH-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[B_V2_F16]], v[[C_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-FLUSH-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]] ; VI-FLUSH-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]] ; VI-FLUSH-NOT: v_and_b32 diff --git a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll index 4c8dff52509a203de69035c0450d967986a7d928..8f4b314ffabb2b949b722e12aa20f29aeaf46880 100644 --- a/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -82,7 +82,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_max_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NOT: and ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] @@ -101,18 +101,19 @@ entry: } ; GCN-LABEL: {{^}}maxnum_v2f16_imm_a: -; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] ; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], 0x40400000, v[[B_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 4.0, v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI-DAG: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]] +; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 +; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] +; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN-NOT: and ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -128,18 +129,19 @@ entry: } ; GCN-LABEL: {{^}}maxnum_v2f16_imm_b: -; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI: v_max_f32_e32 v[[R_F32_0:[0-9]+]], 4.0, v[[A_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI-DAG: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]] +; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 +; VI-DAG: v_max_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] +; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN-NOT: and ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GCN: buffer_store_dword v[[R_V2_F16]] diff --git a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll index b8221356b664197b02b2869caa33edc9549b3456..1a86286f7136cffd37939bc547ef0ea07fb6acc9 100644 --- a/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -81,7 +81,7 @@ entry: ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] -; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[B_V2_F16]], v[[A_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_min_f16_sdwa v[[R_F16_1:[0-9]+]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-NOT: and ; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], v[[R_F16_0]] @@ -100,7 +100,7 @@ entry: } ; GCN-LABEL: {{^}}minnum_v2f16_imm_a: -; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[B_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] ; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] @@ -110,11 +110,11 @@ entry: ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; VI-DAG: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]] +; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 +; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] +; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN-NOT: and ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GCN: buffer_store_dword v[[R_V2_F16]] @@ -130,18 +130,19 @@ entry: } ; GCN-LABEL: {{^}}minnum_v2f16_imm_b: -; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]] +; GCN-DAG: buffer_load_dword v[[A_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] ; SI: v_min_f32_e32 v[[R_F32_0:[0-9]+]], 4.0, v[[A_F32_0]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], 0x40400000, v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI-DAG: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]] +; VI-DAG: v_mov_b32_e32 [[CONST3:v[0-9]+]], 0x4200 +; VI-DAG: v_min_f16_sdwa v[[R_F16_HI:[0-9]+]], v[[B_V2_F16]], [[CONST3]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] -; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] +; SI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN-NOT: and ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_0]] ; GCN: buffer_store_dword v[[R_V2_F16]] diff --git a/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/test/CodeGen/AMDGPU/llvm.rint.f16.ll index f56655630bebf61d638646e143ce282b8dfb173f..59e81a7acc0b04683aa8ce9299ee721829a9e5c8 100644 --- a/test/CodeGen/AMDGPU/llvm.rint.f16.ll +++ b/test/CodeGen/AMDGPU/llvm.rint.f16.ll @@ -44,7 +44,8 @@ entry: ; GFX9: v_rndne_f16_e32 v[[R_F16_0:[0-9]+]], v[[A_V2_F16]] ; GFX9: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] ; GFX9: v_rndne_f16_e32 v[[R_F16_1:[0-9]+]], v[[A_F16_1]] -; GFX9: v_pack_b32_f16 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_1]] +; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] +; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]] ; GCN: buffer_store_dword v[[R_V2_F16]] ; GCN: s_endpgm diff --git a/test/CodeGen/AMDGPU/llvm.round.ll b/test/CodeGen/AMDGPU/llvm.round.ll index d211ad8ec9f4aa67650e415866d3247e0e3c253a..ffe87977870baedf30d38fbfca3eda6491e59695 100644 --- a/test/CodeGen/AMDGPU/llvm.round.ll +++ b/test/CodeGen/AMDGPU/llvm.round.ll @@ -87,7 +87,8 @@ define amdgpu_kernel void @round_f16(half addrspace(1)* %out, i32 %x.arg) #0 { ; GFX89: v_bfi_b32 [[COPYSIGN0:v[0-9]+]], [[K]], [[BFI_K]], ; GFX89: v_bfi_b32 [[COPYSIGN1:v[0-9]+]], [[K]], [[BFI_K]], -; GFX9: v_pack_b32_f16 +; GFX9: v_and_b32_e32 +; GFX9: v_lshl_or_b32 define amdgpu_kernel void @round_v2f16(<2 x half> addrspace(1)* %out, i32 %in.arg) #0 { %in = bitcast i32 %in.arg to <2 x half> %result = call <2 x half> @llvm.round.v2f16(<2 x half> %in) diff --git a/test/CodeGen/AMDGPU/load-constant-i16.ll b/test/CodeGen/AMDGPU/load-constant-i16.ll index 5dd2efdf638210757f0d9d7063e2b3f3142f7349..72fde04ba39100bdb890b0adc91b2571e1bb632f 100644 --- a/test/CodeGen/AMDGPU/load-constant-i16.ll +++ b/test/CodeGen/AMDGPU/load-constant-i16.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}constant_load_i16: ; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/load-constant-i8.ll b/test/CodeGen/AMDGPU/load-constant-i8.ll index 6e56b9f9b6d62acbfc1327a8897bf289f858783f..bdfc3caf9d01922d9242ea1971d5ba4b1ac5b5fb 100644 --- a/test/CodeGen/AMDGPU/load-constant-i8.ll +++ b/test/CodeGen/AMDGPU/load-constant-i8.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}constant_load_i8: diff --git a/test/CodeGen/AMDGPU/load-global-i16.ll b/test/CodeGen/AMDGPU/load-global-i16.ll index dcdd1a947cd4dc799cfd8c46090e4b660b69130d..e3415b9c47dec596a0d083a81242ecaf1f3e88ae 100644 --- a/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/test/CodeGen/AMDGPU/load-global-i16.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s ; FIXME: r600 is broken because the bigger testcases spill and it's not implemented diff --git a/test/CodeGen/AMDGPU/load-global-i8.ll b/test/CodeGen/AMDGPU/load-global-i8.ll index 71adf090532fc961dd4f819576eb94702391915e..fc0cbf916b529c7a6ff612527aa7988f77b45440 100644 --- a/test/CodeGen/AMDGPU/load-global-i8.ll +++ b/test/CodeGen/AMDGPU/load-global-i8.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,SI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_i8: diff --git a/test/CodeGen/AMDGPU/load-local-i16.ll b/test/CodeGen/AMDGPU/load-local-i16.ll index bbbb34e8d3331abfa9f5adfc714ec799ed9b93fe..7de3f3b28c6dd7485a816f85cfd8f25341733346 100644 --- a/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/test/CodeGen/AMDGPU/load-local-i16.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_i16: ; GCN: ds_read_u16 v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/load-local-i8.ll b/test/CodeGen/AMDGPU/load-local-i8.ll index 731996ec6c4595f8e03a2262888e29f525f3c99f..16eb366a4b156b16d64da5741f215d26d7e860d0 100644 --- a/test/CodeGen/AMDGPU/load-local-i8.ll +++ b/test/CodeGen/AMDGPU/load-local-i8.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_i8: diff --git a/test/CodeGen/AMDGPU/local-memory.amdgcn.ll b/test/CodeGen/AMDGPU/local-memory.amdgcn.ll index 4ce9208eaddcf8ca5a38535064d7e404862e6b9e..47b6558241b9f8e5c6007c524d1b3e17f8324f61 100644 --- a/test/CodeGen/AMDGPU/local-memory.amdgcn.ll +++ b/test/CodeGen/AMDGPU/local-memory.amdgcn.ll @@ -45,11 +45,7 @@ entry: ; GCN-LABEL: {{^}}local_memory_two_objects: ; GCN: v_lshlrev_b32_e32 [[ADDRW:v[0-9]+]], 2, v0 ; CI-DAG: ds_write2_b32 [[ADDRW]], {{v[0-9]+}}, {{v[0-9]+}} offset1:4 - -; SI: v_add_i32_e32 [[ADDRW_OFF:v[0-9]+]], vcc, 16, [[ADDRW]] - -; SI-DAG: ds_write_b32 [[ADDRW]], -; SI-DAG: ds_write_b32 [[ADDRW_OFF]], +; SI-DAG: ds_write2_b32 [[ADDRW]], {{v[0-9]+}}, {{v[0-9]+}} offset1:4 ; GCN: s_barrier diff --git a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll b/test/CodeGen/AMDGPU/local-stack-slot-bug.ll deleted file mode 100644 index d3e0f0be4b5f32e45bf8d03655ad6bda24864aa2..0000000000000000000000000000000000000000 --- a/test/CodeGen/AMDGPU/local-stack-slot-bug.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck %s - -; This used to fail due to a v_add_i32 instruction with an illegal immediate -; operand that was created during Local Stack Slot Allocation. Test case derived -; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 -; -; CHECK-LABEL: {{^}}main: - -; CHECK-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x200 -; CHECK-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0x400{{$}} -; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 -; CHECK-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] - -; CHECK-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[CLAMP_IDX]], [[K]] -; CHECK-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[CLAMP_IDX]], [[ZERO]] - -; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen -; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen -define amdgpu_ps float @main(i32 %idx) { -main_body: - %v1 = extractelement <81 x float> , i32 %idx - %v2 = extractelement <81 x float> , i32 %idx - %r = fadd float %v1, %v2 - ret float %r -} diff --git a/test/CodeGen/AMDGPU/loop_break.ll b/test/CodeGen/AMDGPU/loop_break.ll index 492472155ee6bce776ab86718c782076e9abbb94..b9df2cb779ad0a793e906f570f9dde2888d08a61 100644 --- a/test/CodeGen/AMDGPU/loop_break.ll +++ b/test/CodeGen/AMDGPU/loop_break.ll @@ -27,8 +27,9 @@ ; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1 ; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]] -; GCN: s_cmp_gt_i32 s{{[0-9]+}}, -1 -; GCN-NEXT: s_cbranch_scc1 [[FLOW:BB[0-9]+_[0-9]+]] +; GCN: v_cmp_lt_i32_e32 vcc, -1 +; GCN: s_and_b64 vcc, exec, vcc +; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]] ; GCN: ; BB#2: ; %bb4 ; GCN: buffer_load_dword diff --git a/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll b/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll index 4e0ecc0565e02370fa32b784bc0979025cf6b0dc..6f5f4ca13b5ed5cddbfa8cbef7a8fa9d6b186634 100644 --- a/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll +++ b/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll @@ -14,8 +14,8 @@ entry: } ; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range: -; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0 -; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]] +; CHECK-NOT: v_and_b32 +; CHECK: {{flat|buffer}}_store_dword {{.*}}v0 define amdgpu_kernel void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 @@ -26,8 +26,8 @@ entry: ; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range_m1: ; CHECK-NOT: v0 -; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0 -; CHECK: {{flat|buffer}}_store_dword {{.*}}[[MASKED]] +; CHECK-NOT: v_and_b32 +; CHECK: {{flat|buffer}}_store_dword {{.*}}v0 define amdgpu_kernel void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1 diff --git a/test/CodeGen/AMDGPU/lshl64-to-32.ll b/test/CodeGen/AMDGPU/lshl64-to-32.ll new file mode 100644 index 0000000000000000000000000000000000000000..5ff6b71c1f02ebd7da01d64c35d04d336f0f48af --- /dev/null +++ b/test/CodeGen/AMDGPU/lshl64-to-32.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=amdgcn < %s | FileCheck %s + +; CHECK-LABEL: {{^}}zext_shl64_to_32: +; CHECK: s_lshl_b32 +; CHECK-NOT: s_lshl_b64 +define amdgpu_kernel void @zext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) { + %and = and i32 %x, 1073741823 + %ext = zext i32 %and to i64 + %shl = shl i64 %ext, 2 + store i64 %shl, i64 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: {{^}}sext_shl64_to_32: +; CHECK: s_lshl_b32 +; CHECK-NOT: s_lshl_b64 +define amdgpu_kernel void @sext_shl64_to_32(i64 addrspace(1)* nocapture %out, i32 %x) { + %and = and i32 %x, 536870911 + %ext = sext i32 %and to i64 + %shl = shl i64 %ext, 2 + store i64 %shl, i64 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: {{^}}zext_shl64_overflow: +; CHECK: s_lshl_b64 +; CHECK-NOT: s_lshl_b32 +define amdgpu_kernel void @zext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) { + %and = and i32 %x, 2147483647 + %ext = zext i32 %and to i64 + %shl = shl i64 %ext, 2 + store i64 %shl, i64 addrspace(1)* %out, align 4 + ret void +} + +; CHECK-LABEL: {{^}}sext_shl64_overflow: +; CHECK: s_lshl_b64 +; CHECK-NOT: s_lshl_b32 +define amdgpu_kernel void @sext_shl64_overflow(i64 addrspace(1)* nocapture %out, i32 %x) { + %and = and i32 %x, 2147483647 + %ext = sext i32 %and to i64 + %shl = shl i64 %ext, 2 + store i64 %shl, i64 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/AMDGPU/lshr.v2i16.ll b/test/CodeGen/AMDGPU/lshr.v2i16.ll index e21d0d09bb415849fb106f6e956c87673972f9c1..6a90a7a9f2eb3235dfe0355f3c642959b84d5a0a 100644 --- a/test/CodeGen/AMDGPU/lshr.v2i16.ll +++ b/test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s diff --git a/test/CodeGen/AMDGPU/mad24-get-global-id.ll b/test/CodeGen/AMDGPU/mad24-get-global-id.ll index 1e78c4ebcc9f1a2e8b364970317ee1a1c0eb1a12..176d1d25f1962232adaf5a9bd52962408246f06b 100644 --- a/test/CodeGen/AMDGPU/mad24-get-global-id.ll +++ b/test/CodeGen/AMDGPU/mad24-get-global-id.ll @@ -10,7 +10,7 @@ declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 ; GCN-LABEL: {{^}}get_global_id_0: ; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff ; GCN: v_mov_b32_e32 [[VWGSIZEX:v[0-9]+]], [[WGSIZEX]] -; GCN: v_mad_u32_u24 v{{[0-9]+}}, [[VWGSIZEX]], s8, v0 +; GCN: v_mad_u32_u24 v{{[0-9]+}}, s8, [[VWGSIZEX]], v0 define amdgpu_kernel void @get_global_id_0(i32 addrspace(1)* %out) #1 { %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %cast.dispatch.ptr = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)* diff --git a/test/CodeGen/AMDGPU/madak.ll b/test/CodeGen/AMDGPU/madak.ll index eb4066a2a0a80ca07d2189741650ef236a8f51ea..8e0014911def87fff7659a4ba5ae29533a51df12 100644 --- a/test/CodeGen/AMDGPU/madak.ll +++ b/test/CodeGen/AMDGPU/madak.ll @@ -9,7 +9,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; GCN-LABEL: {{^}}madak_f32: ; GCN: buffer_load_dword [[VA:v[0-9]+]] ; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 +; GCN: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid @@ -63,7 +63,7 @@ define amdgpu_kernel void @madak_2_use_f32(float addrspace(1)* noalias %out, flo ; GCN-LABEL: {{^}}madak_m_inline_imm_f32: ; GCN: buffer_load_dword [[VA:v[0-9]+]] -; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000 +; GCN: v_madak_f32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000 define amdgpu_kernel void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid @@ -151,7 +151,7 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo ; GCN-LABEL: {{^}}no_madak_src0_modifier_f32: ; GCN: buffer_load_dword [[VA:v[0-9]+]] ; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} +; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} ; GCN: s_endpgm define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -173,7 +173,7 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia ; GCN-LABEL: {{^}}no_madak_src1_modifier_f32: ; GCN: buffer_load_dword [[VA:v[0-9]+]] ; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} +; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} ; GCN: s_endpgm define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -198,7 +198,7 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia ; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}} ; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]] ; GCN: buffer_load_dword [[VGPR:v[0-9]+]] -; GCN: v_madak_f32_e32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000 +; GCN: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VGPR]], [[MADAK]] ; GCN: buffer_store_dword [[MUL]] define amdgpu_kernel void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 { diff --git a/test/CodeGen/AMDGPU/madmk.ll b/test/CodeGen/AMDGPU/madmk.ll index 6e70e95383c97eeb8e00114ccf92c00f908f4bec..6bc40e82459bbdbd973e3a9ae10bcad827724893 100644 --- a/test/CodeGen/AMDGPU/madmk.ll +++ b/test/CodeGen/AMDGPU/madmk.ll @@ -129,7 +129,7 @@ define amdgpu_kernel void @scalar_vector_madmk_f32(float addrspace(1)* noalias % ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 -; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], |[[VA]]|, [[VB]] +; GCN: v_mad_f32 {{v[0-9]+}}, |[[VA]]|, [[VK]], [[VB]] define amdgpu_kernel void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid @@ -171,7 +171,7 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(float addrspace(1)* noalia ; GCN-LABEL: {{^}}madmk_add_inline_imm_f32: ; GCN: buffer_load_dword [[A:v[0-9]+]] ; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 -; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0 +; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[VK]], 2.0 define amdgpu_kernel void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid diff --git a/test/CodeGen/AMDGPU/max3.ll b/test/CodeGen/AMDGPU/max3.ll index 4bb4fd46becde1fd51c049ad2f27d7563c9ffc7f..46dcf8e340f47450af01accaf24934876c72122d 100644 --- a/test/CodeGen/AMDGPU/max3.ll +++ b/test/CodeGen/AMDGPU/max3.ll @@ -1,41 +1,94 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - -; FUNC-LABEL: @v_test_imax3_sgt_i32 -; SI: v_max3_i32 -define amdgpu_kernel void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +; GCN-LABEL: {{^}}v_test_imax3_sgt_i32: +; GCN: v_max3_i32 +define amdgpu_kernel void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load i32, i32 addrspace(1)* %gep0, align 4 - %b = load i32, i32 addrspace(1)* %gep1, align 4 - %c = load i32, i32 addrspace(1)* %gep2, align 4 + %a = load i32, i32 addrspace(1)* %gep0 + %b = load i32, i32 addrspace(1)* %gep1 + %c = load i32, i32 addrspace(1)* %gep2 %icmp0 = icmp sgt i32 %a, %b %i0 = select i1 %icmp0, i32 %a, i32 %b %icmp1 = icmp sgt i32 %i0, %c %i1 = select i1 %icmp1, i32 %i0, i32 %c - store i32 %i1, i32 addrspace(1)* %out, align 4 + store i32 %i1, i32 addrspace(1)* %out ret void } -; FUNC-LABEL: @v_test_umax3_ugt_i32 -; SI: v_max3_u32 -define amdgpu_kernel void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +; GCN-LABEL: {{^}}v_test_umax3_ugt_i32: +; GCN: v_max3_u32 +define amdgpu_kernel void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load i32, i32 addrspace(1)* %gep0, align 4 - %b = load i32, i32 addrspace(1)* %gep1, align 4 - %c = load i32, i32 addrspace(1)* %gep2, align 4 + %a = load i32, i32 addrspace(1)* %gep0 + %b = load i32, i32 addrspace(1)* %gep1 + %c = load i32, i32 addrspace(1)* %gep2 %icmp0 = icmp ugt i32 %a, %b %i0 = select i1 %icmp0, i32 %a, i32 %b %icmp1 = icmp ugt i32 %i0, %c %i1 = select i1 %icmp1, i32 %i0, i32 %c - store i32 %i1, i32 addrspace(1)* %out, align 4 + store i32 %i1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_test_imax3_sgt_i16: +; SI: v_max3_i32 + +; VI: v_max_i16 +; VI: v_max_i16 + +; GFX9: v_max3_i16 +define amdgpu_kernel void @v_test_imax3_sgt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0 + %b = load i16, i16 addrspace(1)* %gep1 + %c = load i16, i16 addrspace(1)* %gep2 + %icmp0 = icmp sgt i16 %a, %b + %i0 = select i1 %icmp0, i16 %a, i16 %b + %icmp1 = icmp sgt i16 %i0, %c + %i1 = select i1 %icmp1, i16 %i0, i16 %c + store i16 %i1, i16 addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}v_test_umax3_ugt_i16: +; SI: v_max3_u32 + +; VI: v_max_u16 +; VI: v_max_u16 + +; GFX9: v_max3_u16 +define amdgpu_kernel void @v_test_umax3_ugt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0 + %b = load i16, i16 addrspace(1)* %gep1 + %c = load i16, i16 addrspace(1)* %gep2 + %icmp0 = icmp ugt i16 %a, %b + %i0 = select i1 %icmp0, i16 %a, i16 %b + %icmp1 = icmp ugt i16 %i0, %c + %i1 = select i1 %icmp1, i16 %i0, i16 %c + store i16 %i1, i16 addrspace(1)* %out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/merge-m0.mir b/test/CodeGen/AMDGPU/merge-m0.mir new file mode 100644 index 0000000000000000000000000000000000000000..720642ad1ddb905ff6285a7f3f283679d56c1098 --- /dev/null +++ b/test/CodeGen/AMDGPU/merge-m0.mir @@ -0,0 +1,131 @@ +# RUN: llc -march=amdgcn -amdgpu-enable-merge-m0 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s + +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.2: +# GCN: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.3: +# GCN: SI_INIT_M0 3 + +# GCN: bb.4: +# GCN-NOT: SI_INIT_M0 +# GCN: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 4 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.5: +# GCN-NOT: SI_INIT_M0 +# GCN: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 4 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.6: +# GCN: SI_INIT_M0 -1, +# GCN-NEXT: DS_WRITE_B32 +# GCN: SI_INIT_M0 %2 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 %2 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 + +--- +name: test +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sreg_32_xm0 } +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_CBRANCH_VCCZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + SI_INIT_M0 65536, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.3 + + bb.3: + successors: %bb.4, %bb.5 + S_CBRANCH_VCCZ %bb.4, implicit undef %vcc + S_BRANCH %bb.5 + + bb.4: + successors: %bb.6 + SI_INIT_M0 3, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 4, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.6 + + bb.5: + successors: %bb.6 + SI_INIT_M0 3, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 4, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.6 + + bb.6: + successors: %bb.0.entry, %bb.6 + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + %2 = IMPLICIT_DEF + SI_INIT_M0 %2, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 %2, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + SI_INIT_M0 -1, implicit-def %m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit %m0, implicit %exec + S_CBRANCH_VCCZ %bb.6, implicit undef %vcc + S_BRANCH %bb.0.entry + +... diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll index e85a724c1567c98d788ee4b5feb953df4a852f4a..60e43f8fb2a7d9d64b5a157207487d68938cf476 100644 --- a/test/CodeGen/AMDGPU/min.ll +++ b/test/CodeGen/AMDGPU/min.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}v_test_imin_sle_i32: ; GCN: v_min_i32_e32 diff --git a/test/CodeGen/AMDGPU/min3.ll b/test/CodeGen/AMDGPU/min3.ll index 59d5d2cdb1aa36422c1bedb9db106dc1676c2c3a..e20fb81f2ecf412f5ee65227959a9f2a88a4aca7 100644 --- a/test/CodeGen/AMDGPU/min3.ll +++ b/test/CodeGen/AMDGPU/min3.ll @@ -1,50 +1,50 @@ -; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s - -declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - -; FUNC-LABEL: @v_test_imin3_slt_i32 -; SI: v_min3_i32 -define amdgpu_kernel void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}v_test_imin3_slt_i32: +; GCN: v_min3_i32 +define amdgpu_kernel void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load i32, i32 addrspace(1)* %gep0, align 4 - %b = load i32, i32 addrspace(1)* %gep1, align 4 - %c = load i32, i32 addrspace(1)* %gep2, align 4 + %a = load i32, i32 addrspace(1)* %gep0 + %b = load i32, i32 addrspace(1)* %gep1 + %c = load i32, i32 addrspace(1)* %gep2 %icmp0 = icmp slt i32 %a, %b %i0 = select i1 %icmp0, i32 %a, i32 %b %icmp1 = icmp slt i32 %i0, %c %i1 = select i1 %icmp1, i32 %i0, i32 %c - store i32 %i1, i32 addrspace(1)* %outgep, align 4 + store i32 %i1, i32 addrspace(1)* %outgep ret void } -; FUNC-LABEL: @v_test_umin3_ult_i32 -; SI: v_min3_u32 -define amdgpu_kernel void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +; GCN-LABEL: {{^}}v_test_umin3_ult_i32: +; GCN: v_min3_u32 +define amdgpu_kernel void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load i32, i32 addrspace(1)* %gep0, align 4 - %b = load i32, i32 addrspace(1)* %gep1, align 4 - %c = load i32, i32 addrspace(1)* %gep2, align 4 + %a = load i32, i32 addrspace(1)* %gep0 + %b = load i32, i32 addrspace(1)* %gep1 + %c = load i32, i32 addrspace(1)* %gep2 %icmp0 = icmp ult i32 %a, %b %i0 = select i1 %icmp0, i32 %a, i32 %b %icmp1 = icmp ult i32 %i0, %c %i1 = select i1 %icmp1, i32 %i0, i32 %c - store i32 %i1, i32 addrspace(1)* %outgep, align 4 + store i32 %i1, i32 addrspace(1)* %outgep ret void } -; FUNC-LABEL: @v_test_umin_umin_umin -; SI: v_min_i32 -; SI: v_min3_i32 -define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +; GCN-LABEL: {{^}}v_test_umin_umin_umin: +; GCN: v_min_i32 +; GCN: v_min3_i32 +define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid2 = mul i32 %tid, 2 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid @@ -57,10 +57,10 @@ define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 add %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2 - %a = load i32, i32 addrspace(1)* %gep0, align 4 - %b = load i32, i32 addrspace(1)* %gep1, align 4 - %c = load i32, i32 addrspace(1)* %gep2, align 4 - %d = load i32, i32 addrspace(1)* %gep3, align 4 + %a = load i32, i32 addrspace(1)* %gep0 + %b = load i32, i32 addrspace(1)* %gep1 + %c = load i32, i32 addrspace(1)* %gep2 + %d = load i32, i32 addrspace(1)* %gep3 %icmp0 = icmp slt i32 %a, %b %i0 = select i1 %icmp0, i32 %a, i32 %b @@ -71,14 +71,14 @@ define amdgpu_kernel void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 add %icmp2 = icmp slt i32 %i0, %i1 %i2 = select i1 %icmp2, i32 %i0, i32 %i1 - store i32 %i2, i32 addrspace(1)* %outgep1, align 4 + store i32 %i2, i32 addrspace(1)* %outgep1 ret void } -; FUNC-LABEL: @v_test_umin3_2_uses -; SI-NOT: v_min3 -define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind { - %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +; GCN-LABEL: {{^}}v_test_umin3_2_uses: +; GCN-NOT: v_min3 +define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid2 = mul i32 %tid, 2 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid @@ -91,10 +91,10 @@ define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrs %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2 - %a = load i32, i32 addrspace(1)* %gep0, align 4 - %b = load i32, i32 addrspace(1)* %gep1, align 4 - %c = load i32, i32 addrspace(1)* %gep2, align 4 - %d = load i32, i32 addrspace(1)* %gep3, align 4 + %a = load i32, i32 addrspace(1)* %gep0 + %b = load i32, i32 addrspace(1)* %gep1 + %c = load i32, i32 addrspace(1)* %gep2 + %d = load i32, i32 addrspace(1)* %gep3 %icmp0 = icmp slt i32 %a, %b %i0 = select i1 %icmp0, i32 %a, i32 %b @@ -105,7 +105,60 @@ define amdgpu_kernel void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrs %icmp2 = icmp slt i32 %i0, %c %i2 = select i1 %icmp2, i32 %i0, i32 %c - store i32 %i2, i32 addrspace(1)* %outgep0, align 4 - store i32 %i0, i32 addrspace(1)* %outgep1, align 4 + store i32 %i2, i32 addrspace(1)* %outgep0 + store i32 %i0, i32 addrspace(1)* %outgep1 ret void } + +; GCN-LABEL: {{^}}v_test_imin3_slt_i16: +; SI: v_min3_i32 + +; VI: v_min_i16 +; VI: v_min_i16 + +; GFX9: v_min3_i16 +define amdgpu_kernel void @v_test_imin3_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0 + %b = load i16, i16 addrspace(1)* %gep1 + %c = load i16, i16 addrspace(1)* %gep2 + %icmp0 = icmp slt i16 %a, %b + %i0 = select i1 %icmp0, i16 %a, i16 %b + %icmp1 = icmp slt i16 %i0, %c + %i1 = select i1 %icmp1, i16 %i0, i16 %c + store i16 %i1, i16 addrspace(1)* %outgep + ret void +} + +; GCN-LABEL: {{^}}v_test_umin3_ult_i16: +; SI: v_min3_u32 + +; VI: v_min_u16 +; VI: v_min_u16 + +; GFX9: v_min3_u16 +define amdgpu_kernel void @v_test_umin3_ult_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 addrspace(1)* %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid + %gep2 = getelementptr i16, i16 addrspace(1)* %cptr, i32 %tid + %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid + %a = load i16, i16 addrspace(1)* %gep0 + %b = load i16, i16 addrspace(1)* %gep1 + %c = load i16, i16 addrspace(1)* %gep2 + %icmp0 = icmp ult i16 %a, %b + %i0 = select i1 %icmp0, i16 %a, i16 %b + %icmp1 = icmp ult i16 %i0, %c + %i1 = select i1 %icmp1, i16 %i0, i16 %c + store i16 %i1, i16 addrspace(1)* %outgep + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/mubuf-offset-private.ll b/test/CodeGen/AMDGPU/mubuf-offset-private.ll new file mode 100644 index 0000000000000000000000000000000000000000..3a0605fa182a37baf415d78003942490bf0e6c04 --- /dev/null +++ b/test/CodeGen/AMDGPU/mubuf-offset-private.ll @@ -0,0 +1,136 @@ +; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s + +; Test addressing modes when the scratch base is not a frame index. + +; GCN-LABEL: {{^}}store_private_offset_i8: +; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_i8() #0 { + store volatile i8 5, i8* inttoptr (i32 8 to i8*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i16: +; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_i16() #0 { + store volatile i16 5, i16* inttoptr (i32 8 to i16*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i32: +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_i32() #0 { + store volatile i32 5, i32* inttoptr (i32 8 to i32*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_v2i32: +; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_v2i32() #0 { + store volatile <2 x i32> , <2 x i32>* inttoptr (i32 8 to <2 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_v4i32: +; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @store_private_offset_v4i32() #0 { + store volatile <4 x i32> , <4 x i32>* inttoptr (i32 8 to <4 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_i8: +; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_i8() #0 { + %load = load volatile i8, i8* inttoptr (i32 8 to i8*) + ret void +} + +; GCN-LABEL: {{^}}sextload_private_offset_i8: +; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 { + %load = load volatile i8, i8* inttoptr (i32 8 to i8*) + %sextload = sext i8 %load to i32 + store i32 %sextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}zextload_private_offset_i8: +; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 { + %load = load volatile i8, i8* inttoptr (i32 8 to i8*) + %zextload = zext i8 %load to i32 + store i32 %zextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_i16: +; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_i16() #0 { + %load = load volatile i16, i16* inttoptr (i32 8 to i16*) + ret void +} + +; GCN-LABEL: {{^}}sextload_private_offset_i16: +; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 { + %load = load volatile i16, i16* inttoptr (i32 8 to i16*) + %sextload = sext i16 %load to i32 + store i32 %sextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}zextload_private_offset_i16: +; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 { + %load = load volatile i16, i16* inttoptr (i32 8 to i16*) + %zextload = zext i16 %load to i32 + store i32 %zextload, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_i32: +; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_i32() #0 { + %load = load volatile i32, i32* inttoptr (i32 8 to i32*) + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_v2i32: +; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_v2i32() #0 { + %load = load volatile <2 x i32>, <2 x i32>* inttoptr (i32 8 to <2 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}load_private_offset_v4i32: +; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s8 offset:8 +define amdgpu_kernel void @load_private_offset_v4i32() #0 { + %load = load volatile <4 x i32>, <4 x i32>* inttoptr (i32 8 to <4 x i32>*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i8_max_offset: +; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s8 offset:4095 +define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 { + store volatile i8 5, i8* inttoptr (i32 4095 to i8*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1: +; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 +; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen{{$}} +define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 { + store volatile i8 5, i8* inttoptr (i32 4096 to i8*) + ret void +} + +; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2: +; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 +; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s8 offen offset:1{{$}} +define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 { + store volatile i8 5, i8* inttoptr (i32 4097 to i8*) + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/mul.ll b/test/CodeGen/AMDGPU/mul.ll index a72a6efb071198646e2a7ab6b959de48b725f7a6..57c50c9804e56a243338acf11d71b285e75fc2f4 100644 --- a/test/CodeGen/AMDGPU/mul.ll +++ b/test/CodeGen/AMDGPU/mul.ll @@ -211,10 +211,10 @@ endif: ; SI: s_mul_i32 ; SI: v_mul_hi_u32 ; SI: s_mul_i32 -; SI: s_mul_i32 -; SI: v_mul_hi_u32 -; SI: v_mul_hi_u32 -; SI: s_mul_i32 +; SI-DAG: s_mul_i32 +; SI-DAG: v_mul_hi_u32 +; SI-DAG: v_mul_hi_u32 +; SI-DAG: s_mul_i32 ; SI-DAG: s_mul_i32 ; SI-DAG: v_mul_hi_u32 ; SI: s_mul_i32 diff --git a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index 9d0b6b395996b56ddff7dd92c2971878627e689f..82c27f204a47892b67882b616f9fd718a8e108ea 100644 --- a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -362,6 +362,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock ; GCN-NEXT: s_or_b64 exec, exec +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN-NEXT: ; return define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 { diff --git a/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll new file mode 100644 index 0000000000000000000000000000000000000000..bced3c408c52bc67f2ae1b2c6cab3cd3259bc8e7 --- /dev/null +++ b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: @volatile_load +; GCN: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0 +; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]] +; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]] +; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} + +define amdgpu_kernel void @volatile_load(i32 addrspace(1)* %arg, i32 addrspace(1)* nocapture %arg1) { +bb: + %tmp18 = load volatile i32, i32 addrspace(1)* %arg, align 4 + %tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 5 + store i32 %tmp18, i32 addrspace(1)* %tmp26, align 4 + ret void +} diff --git a/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir index 2de6b59e59e9669eea85850a06b50881684a9b3c..b5dc9d9dac8413f1af0613e65d3ddeb169d97271 100644 --- a/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir +++ b/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -176,7 +176,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -189,7 +188,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -236,7 +234,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -248,7 +245,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -295,7 +291,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -307,7 +302,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -356,7 +350,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -370,7 +363,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -418,7 +410,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr6 = S_MOV_B32 -1 @@ -433,7 +424,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7 %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) @@ -480,7 +470,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -494,7 +483,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -544,7 +532,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -557,7 +544,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1, %sgpr2_sgpr3 S_SLEEP 0, implicit %sgpr2_sgpr3 %sgpr7 = S_MOV_B32 61440 @@ -606,7 +592,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -618,7 +603,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -665,7 +649,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -677,7 +660,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 @@ -724,7 +706,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.main_body: - successors: %bb.1.if, %bb.2.end liveins: %vgpr0 %sgpr0_sgpr1 = COPY %exec @@ -736,7 +717,6 @@ body: | S_BRANCH %bb.1.if bb.1.if: - successors: %bb.2.end liveins: %sgpr0_sgpr1 %sgpr7 = S_MOV_B32 61440 diff --git a/test/CodeGen/AMDGPU/pack.v2f16.ll b/test/CodeGen/AMDGPU/pack.v2f16.ll index b86215627131d22dab67400946253bcbd5fdc04e..5a07f7ca6ae86413ce6800f478976e5411d721c7 100644 --- a/test/CodeGen/AMDGPU/pack.v2f16.ll +++ b/test/CodeGen/AMDGPU/pack.v2f16.ll @@ -1,5 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-FLUSH %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s @@ -59,10 +58,9 @@ define amdgpu_kernel void @s_pack_v2f16_imm_hi(i32 addrspace(2)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2f16: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -85,10 +83,9 @@ define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GCN-LABEL: {{^}}v_pack_v2f16_user: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] +; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] ; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { @@ -112,11 +109,9 @@ define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspa ; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo: ; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234{{$}} -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]] -; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -134,10 +129,9 @@ define amdgpu_kernel void @v_pack_v2f16_imm_lo(i32 addrspace(1)* %in1) #0 { ; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo: ; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 4.0, [[VAL1]] -; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 { @@ -156,12 +150,10 @@ define amdgpu_kernel void @v_pack_v2f16_inline_imm_lo(i32 addrspace(1)* %in1) #0 ; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi: ; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]] -; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 { @@ -180,11 +172,10 @@ define amdgpu_kernel void @v_pack_v2f16_imm_hi(i32 addrspace(1)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi: ; GFX9-DAG: flat_load_dword [[VAL:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 1.0 -; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00 -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00 +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) #0 { @@ -203,10 +194,9 @@ define amdgpu_kernel void @v_pack_v2f16_inline_f16imm_hi(i32 addrspace(1)* %in0) ; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_hi: ; GFX9: flat_load_dword [[VAL:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 64 -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]] +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(i32 addrspace(1)* %in0) #0 { diff --git a/test/CodeGen/AMDGPU/pack.v2i16.ll b/test/CodeGen/AMDGPU/pack.v2i16.ll index 9ffd16754a1c43f719465957a213a5e559aad812..8515fbc6dbae51910fb775605ab1ab31e107ce75 100644 --- a/test/CodeGen/AMDGPU/pack.v2i16.ll +++ b/test/CodeGen/AMDGPU/pack.v2i16.ll @@ -1,5 +1,4 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,+fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-DENORM %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX9-FLUSH %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=-flat-for-global,-fp64-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s @@ -55,10 +54,9 @@ define amdgpu_kernel void @s_pack_v2i16_imm_hi(i32 addrspace(2)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2i16: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -79,10 +77,9 @@ define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GCN-LABEL: {{^}}v_pack_v2i16_user: ; GFX9: flat_load_dword [[VAL0:v[0-9]+]] ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[VAL1]] -; GFX9-FLUSH: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] +; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] ; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { @@ -105,10 +102,9 @@ define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspa ; GCN-LABEL: {{^}}v_pack_v2i16_imm_lo: ; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] ; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[K]], [[VAL1]] -; GFX9-FLUSH-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_imm_lo(i32 addrspace(1)* %in1) #0 { @@ -126,9 +122,8 @@ define amdgpu_kernel void @v_pack_v2i16_imm_lo(i32 addrspace(1)* %in1) #0 { ; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_lo: ; GFX9: flat_load_dword [[VAL1:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], 64, [[VAL1]] -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, 64 +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, 64 ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -145,10 +140,9 @@ define amdgpu_kernel void @v_pack_v2i16_inline_imm_lo(i32 addrspace(1)* %in1) #0 ; GCN-LABEL: {{^}}v_pack_v2i16_imm_hi: ; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL0]], [[K]] -; GFX9-FLUSH-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]] +; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 { @@ -166,8 +160,7 @@ define amdgpu_kernel void @v_pack_v2i16_imm_hi(i32 addrspace(1)* %in0) #0 { ; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_hi: ; GFX9: flat_load_dword [[VAL:v[0-9]+]] -; GFX9-DENORM: v_pack_b32_f16 [[PACKED:v[0-9]+]], [[VAL]], 7 -; GFX9-FLUSH: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]] +; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(i32 addrspace(1)* %in0) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/packed-op-sel.ll b/test/CodeGen/AMDGPU/packed-op-sel.ll new file mode 100644 index 0000000000000000000000000000000000000000..4970375d40d3fad621a7460138d71cd90018eb5e --- /dev/null +++ b/test/CodeGen/AMDGPU/packed-op-sel.ll @@ -0,0 +1,693 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s + +; GCN-LABEL: {{^}}fma_vector_vector_scalar_lo: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0]{{$}} +define amdgpu_kernel void @fma_vector_vector_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + + %scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0 + %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.broadcast) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; Apply fneg to broadcasted vector +; GCN-LABEL: {{^}}fma_vector_vector_neg_broadcast_scalar_lo: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_neg_broadcast_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + + %scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0 + %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + %neg.scalar0.broadcast = fsub <2 x half> , %scalar0.broadcast + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; Apply fneg before broadcast +; GCN-LABEL: {{^}}fma_vector_vector_neg_scalar_lo: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + + %neg.scalar0 = fsub half -0.0, %scalar0 + %neg.scalar0.vec = insertelement <2 x half> undef, half %neg.scalar0, i32 0 + %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; Apply fneg before and after broadcast, and should cancel out. +; GCN-LABEL: {{^}}fma_vector_vector_neg_broadcast_neg_scalar_lo: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0]{{$}} +define amdgpu_kernel void @fma_vector_vector_neg_broadcast_neg_scalar_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + + %neg.scalar0 = fsub half -0.0, %scalar0 + %neg.scalar0.vec = insertelement <2 x half> undef, half %neg.scalar0, i32 0 + %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + %neg.neg.scalar0.broadcast = fsub <2 x half> , %neg.scalar0.broadcast + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.scalar0.broadcast) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; Add scalar, but negate low component +; GCN-LABEL: {{^}}fma_vector_vector_scalar_neg_lo: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_lo:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_scalar_neg_lo(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + + %neg.scalar0 = fsub half -0.0, %scalar0 + %neg.scalar0.vec = insertelement <2 x half> undef, half %neg.scalar0, i32 0 + %neg.scalar0.scalar0 = insertelement <2 x half> %neg.scalar0.vec, half %scalar0, i32 1 + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.scalar0) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; Add scalar, but negate high component +; GCN-LABEL: {{^}}fma_vector_vector_scalar_neg_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[SCALAR0]] op_sel_hi:[1,1,0] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_scalar_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + + %neg.scalar0 = fsub half -0.0, %scalar0 + %neg.scalar0.vec = insertelement <2 x half> undef, half %scalar0, i32 0 + %scalar0.neg.scalar0 = insertelement <2 x half> %neg.scalar0.vec, half %neg.scalar0, i32 1 + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.neg.scalar0) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; Apply fneg before broadcast with bitcast +; GCN-LABEL: {{^}}add_vector_neg_bitcast_scalar_lo: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_add_u16 v{{[0-9]+}}, [[VEC0]], [[SCALAR0]] op_sel_hi:[1,0] neg_lo:[0,1] neg_hi:[0,1]{{$}} +define amdgpu_kernel void @add_vector_neg_bitcast_scalar_lo(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %vec0 = load volatile <2 x i16>, <2 x i16> addrspace(3)* %lds, align 4 + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + %neg.scalar0 = fsub half -0.0, %scalar0 + %neg.scalar0.bc = bitcast half %neg.scalar0 to i16 + + %neg.scalar0.vec = insertelement <2 x i16> undef, i16 %neg.scalar0.bc, i32 0 + %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> undef, <2 x i32> zeroinitializer + + %result = add <2 x i16> %vec0, %neg.scalar0.broadcast + store <2 x i16> %result, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_scalar_lo_neg_scalar_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR1:v[0-9]+]] + +; FIXME: Remove and +; GCN: v_and_b32_e32 [[SCALAR0]], 0xffff, [[SCALAR0]] +; GCN: v_xor_b32_e32 [[SCALAR1]], 0x8000, [[SCALAR1]] +; GCN: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[SCALAR1]], 16, [[SCALAR0]] + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[PACKED]]{{$}} +define amdgpu_kernel void @fma_vector_vector_scalar_lo_neg_scalar_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %arg2.gep = getelementptr inbounds half, half addrspace(3)* %arg2, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + %scalar1 = load volatile half, half addrspace(3)* %arg2.gep, align 2 + + %neg.scalar1 = fsub half -0.0, %scalar1 + %vec.ins0 = insertelement <2 x half> undef, half %scalar0, i32 0 + %vec2 = insertelement <2 x half> %vec.ins0, half %neg.scalar1, i32 1 + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_neg_scalar_lo_scalar_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR0:v[0-9]+]] +; GCN: ds_read_u16 [[SCALAR1:v[0-9]+]] + +; FIXME: Remove and +; GCN: v_and_b32_e32 [[SCALAR0]], 0xffff, [[SCALAR0]] +; GCN: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[SCALAR1]], 16, [[SCALAR0]] + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[PACKED]] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_neg_scalar_lo_scalar_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds, half addrspace(3)* %arg2) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %arg2.gep = getelementptr inbounds half, half addrspace(3)* %arg2, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + + %scalar0 = load volatile half, half addrspace(3)* %arg2, align 2 + %scalar1 = load volatile half, half addrspace(3)* %arg2.gep, align 2 + + %vec.ins0 = insertelement <2 x half> undef, half %scalar0, i32 0 + %vec2 = insertelement <2 x half> %vec.ins0, half %scalar1, i32 1 + %neg.vec2 = fsub <2 x half> , %vec2 + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_neg_vector_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_neg_vector_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + + %vec2.fneg = fsub <2 x half> , %vec2 + %vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> undef, <2 x i32> + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.fneg.elt1.broadcast) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_vector_neg_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_vector_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + + %vec2.elt1 = extractelement <2 x half> %vec2, i32 1 + %neg.vec2.elt1 = fsub half -0.0, %vec2.elt1 + + %neg.vec2.elt1.insert = insertelement <2 x half> %vec2, half %neg.vec2.elt1, i32 1 + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2.elt1.insert) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}add_vector_scalar_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_add_u16 v{{[0-9]+}}, [[VEC0]], [[VEC1]] op_sel:[0,1]{{$}} +define amdgpu_kernel void @add_vector_scalar_hi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(3)* %lds, i32 1 + + %vec0 = load volatile <2 x i16>, <2 x i16> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x i16>, <2 x i16> addrspace(3)* %lds.gep1, align 4 + + %vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> undef, <2 x i32> + %result = add <2 x i16> %vec0, %vec1.elt1.broadcast + + store <2 x i16> %result, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_scalar_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_scalar_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + + %vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.elt1.broadcast) + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_neg_vector_lo_neg_hi: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]]{{$}} +define amdgpu_kernel void @fma_vector_vector_neg_vector_lo_neg_hi(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + + %neg.vec2 = fsub <2 x half> , %vec2 + %neg.vec2.elt1 = extractelement <2 x half> %neg.vec2, i32 1 + %neg.neg.vec2.elt1 = fsub half -0.0, %neg.vec2.elt1 + %neg.neg.vec2.elt1.insert = insertelement <2 x half> %vec2, half %neg.neg.vec2.elt1, i32 1 + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.vec2.elt1.insert) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_swap_vector: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] op_sel_hi:[1,1,0]{{$}} +define amdgpu_kernel void @fma_vector_vector_swap_vector(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + + %vec2.swap = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.swap) + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_swap_neg_vector: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or +; GCN-NOT: xor + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[0,0,1] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_swap_neg_vector(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + %neg.vec2 = fsub <2 x half> , %vec2 + + %neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> undef, <2 x i32> + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2.swap) + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_blend_vector_neg_vector_0: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or +; GCN-NOT: xor + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] op_sel_hi:[1,1,0] neg_lo:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + %neg.vec2 = fsub <2 x half> , %vec2 + %combined = shufflevector <2 x half> %vec2, <2 x half> %neg.vec2, <2 x i32> + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %combined) + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_blend_vector_neg_vector_1: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or +; GCN-NOT: xor + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] neg_lo:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + %neg.vec2 = fsub <2 x half> , %vec2 + %combined = shufflevector <2 x half> %vec2, <2 x half> %neg.vec2, <2 x i32> + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %combined) + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_blend_vector_neg_vector_2: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or +; GCN-NOT: xor + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] neg_hi:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_2(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + %neg.vec2 = fsub <2 x half> , %vec2 + %combined = shufflevector <2 x half> %vec2, <2 x half> %neg.vec2, <2 x i32> + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %combined) + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}fma_vector_vector_blend_vector_neg_vector_3: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: or +; GCN-NOT: xor + +; GCN: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[VEC2]] op_sel:[0,0,1] neg_lo:[0,0,1]{{$}} +define amdgpu_kernel void @fma_vector_vector_blend_vector_neg_vector_3(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + %neg.vec2 = fsub <2 x half> , %vec2 + %combined = shufflevector <2 x half> %vec2, <2 x half> %neg.vec2, <2 x i32> + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %combined) + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}bitcast_fneg_f32: +; GCN: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+$}} +define amdgpu_kernel void @bitcast_fneg_f32(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %f32 = load volatile float, float addrspace(3)* undef, align 4 + %neg.f32 = fsub float -0.0, %f32 + %bc = bitcast float %neg.f32 to <2 x half> + %result = fadd <2 x half> %vec0, %bc + + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}shuffle_bitcast_fneg_f32: +; GCN: v_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} op_sel:[0,1] op_sel_hi:[1,0]{{$}} +define amdgpu_kernel void @shuffle_bitcast_fneg_f32(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + + %f32 = load volatile float, float addrspace(3)* undef, align 4 + %neg.f32 = fsub float -0.0, %f32 + %bc = bitcast float %neg.f32 to <2 x half> + %shuf = shufflevector <2 x half> %bc, <2 x half> undef, <2 x i32> + %result = fadd <2 x half> %vec0, %shuf + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}extract_from_i64: +; GCN: v_lshl_or_b32 +; GCN: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+$}} +define amdgpu_kernel void @extract_from_i64(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(3)* %lds) #0 { +bb: + %vec0 = load volatile <2 x i16>, <2 x i16> addrspace(3)* %lds, align 4 + %i64 = load volatile i64, i64 addrspace(1)* undef + + %elt0 = trunc i64 %i64 to i16 + %hi = lshr i64 %i64, 16 + %elt1 = trunc i64 %hi to i16 + + %ins0 = insertelement <2 x i16> undef, i16 %elt1, i32 0 + %ins1 = insertelement <2 x i16> %ins0, i16 %elt0, i32 1 + %result = add <2 x i16> %vec0, %ins1 + store <2 x i16> %result, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + + +; Bitcast is final obstacle to identifying same source register +; GCN-LABEL: {{^}}bitcast_lo_elt_op_sel: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: _or + +; GCN: v_pk_add_f16 [[FADD:v[0-9]+]] +; GCN-NEXT: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[FADD]] op_sel:[0,0,1] op_sel_hi:[1,1,0]{{$}} +define amdgpu_kernel void @bitcast_lo_elt_op_sel(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + + %scalar0 = load volatile i16, i16 addrspace(1)* undef + %shl = shl i16 %scalar0, 1 + %shl.bc = bitcast i16 %shl to half + + %fadd = fadd <2 x half> %vec2, + %shuffle = shufflevector <2 x half> %fadd, <2 x half> %vec2, <2 x i32> + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %shuffle) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + + +; Bitcast is final obstacle to identifying same source register +; GCN-LABEL: {{^}}mix_elt_types_op_sel: +; GCN: ds_read_b32 [[VEC0:v[0-9]+]] +; GCN: ds_read_b32 [[VEC1:v[0-9]+]] +; GCN: ds_read_b32 [[VEC2:v[0-9]+]] + +; GCN-NOT: pack +; GCN-NOT: and +; GCN-NOT: shl +; GCN-NOT: _or + +; GCN: v_pk_add_f16 [[FADD:v[0-9]+]] +; GCN-NEXT: v_pk_fma_f16 v{{[0-9]+}}, [[VEC0]], [[VEC1]], [[FADD]] op_sel:[0,0,1] op_sel_hi:[1,1,0]{{$}} +define amdgpu_kernel void @mix_elt_types_op_sel(<2 x half> addrspace(1)* %out, <2 x half> addrspace(3)* %lds) #0 { +bb: + %lds.gep1 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 1 + %lds.gep2 = getelementptr inbounds <2 x half>, <2 x half> addrspace(3)* %lds, i32 2 + + %vec0 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds, align 4 + %vec1 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep1, align 4 + %vec2 = load volatile <2 x half>, <2 x half> addrspace(3)* %lds.gep2, align 4 + + %scalar0 = load volatile i16, i16 addrspace(1)* undef + %scalar1 = load volatile half, half addrspace(1)* undef + %shl = shl i16 %scalar0, 1 + %shl.bc = bitcast i16 %shl to half + + %insert0 = insertelement <2 x half> undef, half %shl.bc, i32 0 + + %fadd = fadd <2 x half> %vec2, + %insert1 = shufflevector <2 x half> %fadd, <2 x half> %insert0, <2 x i32> + + %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %insert1) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/parallelandifcollapse.ll b/test/CodeGen/AMDGPU/parallelandifcollapse.ll index a90f200f79e3bdc2145b81b54351e5e2aea57af8..190d2b72ebafe8757f72aa6ea5c05ca4241ca15b 100644 --- a/test/CodeGen/AMDGPU/parallelandifcollapse.ll +++ b/test/CodeGen/AMDGPU/parallelandifcollapse.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck %s +; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck %s ; ; CFG flattening should use parallel-and mode to generate branch conditions and ; then merge if-regions with the same bodies. diff --git a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index 77d793201adc7679b8a09505d4676049e3d50c32..49f00e9447dab0f91e652a6ff5739fb9a0d6bba8 100644 --- a/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -608,11 +608,11 @@ ret: ; GCN: ;;#ASMSTART ; GCN: ; use s[0:1] define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () #0 - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () #0 - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19}"() #0 - call void asm sideeffect "", "~{VGPR20_VGPR21}"() #0 - call void asm sideeffect "", "~{VGPR22}"() #0 + call void asm sideeffect "", "~{v[0:7]}" () #0 + call void asm sideeffect "", "~{v[8:15]}" () #0 + call void asm sideeffect "", "~{v[16:19]}"() #0 + call void asm sideeffect "", "~{v[20:21]}"() #0 + call void asm sideeffect "", "~{v22}"() #0 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/test/CodeGen/AMDGPU/private-access-no-objects.ll b/test/CodeGen/AMDGPU/private-access-no-objects.ll index af268351029389f8547583e7c36d344550222877..dcb089010e99d778d9eef48b2ca4dc6510dd75d2 100644 --- a/test/CodeGen/AMDGPU/private-access-no-objects.ll +++ b/test/CodeGen/AMDGPU/private-access-no-objects.ll @@ -1,7 +1,7 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=OPTNONE %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI -check-prefix=OPT %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI -check-prefix=OPT %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=OPTNONE %s ; There are no stack objects, but still a private memory access. The ; private access regiters need to be correctly initialized anyway, and @@ -27,9 +27,9 @@ define amdgpu_kernel void @store_to_undef() #0 { ; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1] ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3] ; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}} -; OPT: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}} +; OPT: buffer_store_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}} define amdgpu_kernel void @store_to_inttoptr() #0 { - store volatile i32 0, i32* inttoptr (i32 123 to i32*) + store volatile i32 0, i32* inttoptr (i32 124 to i32*) ret void } @@ -47,9 +47,9 @@ define amdgpu_kernel void @load_from_undef() #0 { ; OPT-DAG: s_mov_b64 s{{\[}}[[RSRC_LO:[0-9]+]]:{{[0-9]+\]}}, s[0:1] ; OPT-DAG: s_mov_b64 s{{\[[0-9]+}}:[[RSRC_HI:[0-9]+]]{{\]}}, s[2:3] ; OPT-DAG: s_mov_b32 [[SOFFSET:s[0-9]+]], s7{{$}} -; OPT: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offen{{$}} +; OPT: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[RSRC_LO]]:[[RSRC_HI]]{{\]}}, [[SOFFSET]] offset:124{{$}} define amdgpu_kernel void @load_from_inttoptr() #0 { - %ld = load volatile i32, i32* inttoptr (i32 123 to i32*) + %ld = load volatile i32, i32* inttoptr (i32 124 to i32*) ret void } diff --git a/test/CodeGen/AMDGPU/private-memory-r600.ll b/test/CodeGen/AMDGPU/private-memory-r600.ll index d07a0a02cbae4d620494a4953df407540d867cb2..866cd16ec3b53db63eee26e0513e818c517ead08 100644 --- a/test/CodeGen/AMDGPU/private-memory-r600.ll +++ b/test/CodeGen/AMDGPU/private-memory-r600.ll @@ -12,9 +12,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; OPT: call i32 @llvm.r600.read.local.size.y(), !range !0 ; OPT: call i32 @llvm.r600.read.local.size.z(), !range !0 -; OPT: call i32 @llvm.r600.read.tidig.x(), !range !0 -; OPT: call i32 @llvm.r600.read.tidig.y(), !range !0 -; OPT: call i32 @llvm.r600.read.tidig.z(), !range !0 +; OPT: call i32 @llvm.r600.read.tidig.x(), !range !1 +; OPT: call i32 @llvm.r600.read.tidig.y(), !range !1 +; OPT: call i32 @llvm.r600.read.tidig.z(), !range !1 define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { entry: @@ -295,6 +295,7 @@ define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { ret void } -; OPT: !0 = !{i32 0, i32 2048} +; OPT: !0 = !{i32 0, i32 257} +; OPT: !1 = !{i32 0, i32 256} attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" } diff --git a/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll new file mode 100644 index 0000000000000000000000000000000000000000..5b2da788a4052fbf7e636677112fc3ee96c05b80 --- /dev/null +++ b/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll @@ -0,0 +1,131 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-promote-alloca < %s | FileCheck --check-prefix=OPT %s + +; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly +; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but +; the pass should handle it gracefully if it is +; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt +; should now leave these unchanged + +; OPT-LABEL: @promote_1d_aggr( +; OPT: store [1 x float] %tmp3, [1 x float]* %f1 + +%Block = type { [1 x float], i32 } +%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] } + +@block = external addrspace(1) global %Block +@pv = external addrspace(1) global %gl_PerVertex + +define amdgpu_vs void @promote_1d_aggr() #0 { + %i = alloca i32 + %f1 = alloca [1 x float] + %tmp = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 1 + %tmp1 = load i32, i32 addrspace(1)* %tmp + store i32 %tmp1, i32* %i + %tmp2 = getelementptr %Block, %Block addrspace(1)* @block, i32 0, i32 0 + %tmp3 = load [1 x float], [1 x float] addrspace(1)* %tmp2 + store [1 x float] %tmp3, [1 x float]* %f1 + %tmp4 = load i32, i32* %i + %tmp5 = getelementptr [1 x float], [1 x float]* %f1, i32 0, i32 %tmp4 + %tmp6 = load float, float* %tmp5 + %tmp7 = alloca <4 x float> + %tmp8 = load <4 x float>, <4 x float>* %tmp7 + %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0 + %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1 + %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2 + %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3 + %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 + store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13 + ret void +} + + +; OPT-LABEL: @promote_store_aggr( +; OPT: %tmp6 = load [2 x float], [2 x float]* %f1 + +%Block2 = type { i32, [2 x float] } +@block2 = external addrspace(1) global %Block2 + +define amdgpu_vs void @promote_store_aggr() #0 { + %i = alloca i32 + %f1 = alloca [2 x float] + %tmp = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 0 + %tmp1 = load i32, i32 addrspace(1)* %tmp + store i32 %tmp1, i32* %i + %tmp2 = load i32, i32* %i + %tmp3 = sitofp i32 %tmp2 to float + %tmp4 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 0 + store float %tmp3, float* %tmp4 + %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 1 + store float 2.000000e+00, float* %tmp5 + %tmp6 = load [2 x float], [2 x float]* %f1 + %tmp7 = getelementptr %Block2, %Block2 addrspace(1)* @block2, i32 0, i32 1 + store [2 x float] %tmp6, [2 x float] addrspace(1)* %tmp7 + %tmp8 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 + store <4 x float> , <4 x float> addrspace(1)* %tmp8 + ret void +} + +; OPT-LABEL: @promote_load_from_store_aggr( +; OPT: store [2 x float] %tmp3, [2 x float]* %f1 + +%Block3 = type { [2 x float], i32 } +@block3 = external addrspace(1) global %Block3 + +define amdgpu_vs void @promote_load_from_store_aggr() #0 { + %i = alloca i32 + %f1 = alloca [2 x float] + %tmp = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 1 + %tmp1 = load i32, i32 addrspace(1)* %tmp + store i32 %tmp1, i32* %i + %tmp2 = getelementptr %Block3, %Block3 addrspace(1)* @block3, i32 0, i32 0 + %tmp3 = load [2 x float], [2 x float] addrspace(1)* %tmp2 + store [2 x float] %tmp3, [2 x float]* %f1 + %tmp4 = load i32, i32* %i + %tmp5 = getelementptr [2 x float], [2 x float]* %f1, i32 0, i32 %tmp4 + %tmp6 = load float, float* %tmp5 + %tmp7 = alloca <4 x float> + %tmp8 = load <4 x float>, <4 x float>* %tmp7 + %tmp9 = insertelement <4 x float> %tmp8, float %tmp6, i32 0 + %tmp10 = insertelement <4 x float> %tmp9, float %tmp6, i32 1 + %tmp11 = insertelement <4 x float> %tmp10, float %tmp6, i32 2 + %tmp12 = insertelement <4 x float> %tmp11, float %tmp6, i32 3 + %tmp13 = getelementptr %gl_PerVertex, %gl_PerVertex addrspace(1)* @pv, i32 0, i32 0 + store <4 x float> %tmp12, <4 x float> addrspace(1)* %tmp13 + ret void +} + +; OPT-LABEL: @promote_double_aggr( +; OPT: store [2 x double] %tmp5, [2 x double]* %s + +@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> } +@frag_color = external addrspace(1) global <4 x float> + +define amdgpu_ps void @promote_double_aggr() #0 { + %s = alloca [2 x double] + %tmp = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 0 + %tmp1 = load double, double addrspace(1)* %tmp + %tmp2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, { [4 x double], <2 x double>, <3 x double>, <4 x double> } addrspace(1)* @tmp_g, i32 0, i32 0, i32 1 + %tmp3 = load double, double addrspace(1)* %tmp2 + %tmp4 = insertvalue [2 x double] undef, double %tmp1, 0 + %tmp5 = insertvalue [2 x double] %tmp4, double %tmp3, 1 + store [2 x double] %tmp5, [2 x double]* %s + %tmp6 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 + %tmp7 = load double, double* %tmp6 + %tmp8 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 + %tmp9 = load double, double* %tmp8 + %tmp10 = fadd double %tmp7, %tmp9 + %tmp11 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 + store double %tmp10, double* %tmp11 + %tmp12 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 0 + %tmp13 = load double, double* %tmp12 + %tmp14 = getelementptr [2 x double], [2 x double]* %s, i32 0, i32 1 + %tmp15 = load double, double* %tmp14 + %tmp16 = fadd double %tmp13, %tmp15 + %tmp17 = fptrunc double %tmp16 to float + %tmp18 = insertelement <4 x float> undef, float %tmp17, i32 0 + %tmp19 = insertelement <4 x float> %tmp18, float %tmp17, i32 1 + %tmp20 = insertelement <4 x float> %tmp19, float %tmp17, i32 2 + %tmp21 = insertelement <4 x float> %tmp20, float %tmp17, i32 3 + store <4 x float> %tmp21, <4 x float> addrspace(1)* @frag_color + ret void +} diff --git a/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll new file mode 100644 index 0000000000000000000000000000000000000000..a95e9f828b6159c6bbb8a560ff3c35c1b35896cc --- /dev/null +++ b/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll @@ -0,0 +1,74 @@ +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s + +; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { +; IR: alloca [5 x i32] +; ASM-LABEL: {{^}}promote_alloca_shaders: +; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only) + +define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { +entry: + %stack = alloca [5 x i32], align 4 + %tmp0 = load i32, i32 addrspace(1)* %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0 + store i32 4, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 + %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1 + store i32 5, i32* %arrayidx3, align 4 + %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 + %tmp2 = load i32, i32* %arrayidx4, align 4 + store i32 %tmp2, i32 addrspace(1)* %out, align 4 + %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 + %tmp3 = load i32, i32* %arrayidx5 + %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 + store i32 %tmp3, i32 addrspace(1)* %arrayidx6 + ret void +} + +; OPT-LABEL: @promote_to_vector_call_c( +; OPT-NOT: alloca +; OPT: extractelement <2 x i32> %{{[0-9]+}}, i32 %in +; ASM-NOT: LDSByteSize +define void @promote_to_vector_call_c(i32 addrspace(1)* %out, i32 %in) #0 { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in + %tmp4 = load i32, i32* %tmp3 + %tmp5 = load volatile i32, i32 addrspace(1)* undef + %tmp6 = add i32 %tmp4, %tmp5 + store i32 %tmp6, i32 addrspace(1)* %out + ret void +} + +; OPT-LABEL: @no_promote_to_lds_c( +; OPT: alloca +; ASM-NOT: LDSByteSize +define void @no_promote_to_lds(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { +entry: + %stack = alloca [5 x i32], align 4 + %0 = load i32, i32 addrspace(1)* %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 + store i32 4, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 + %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 + store i32 5, i32* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 + %2 = load i32, i32* %arrayidx10, align 4 + store i32 %2, i32 addrspace(1)* %out, align 4 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 + %3 = load i32, i32* %arrayidx12 + %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 + store i32 %3, i32 addrspace(1)* %arrayidx13 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/promote-alloca-shaders.ll b/test/CodeGen/AMDGPU/promote-alloca-shaders.ll deleted file mode 100644 index d40fca9f4fd5e341741969eb6ab3e1b7aaa0dd15..0000000000000000000000000000000000000000 --- a/test/CodeGen/AMDGPU/promote-alloca-shaders.ll +++ /dev/null @@ -1,29 +0,0 @@ -; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s - -; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { -; IR: alloca [5 x i32] -; ASM-LABEL: {{^}}promote_alloca_shaders: -; ASM: ; LDSByteSize: 0 bytes/workgroup (compile time only) - -define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 { -entry: - %stack = alloca [5 x i32], align 4 - %tmp0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp0 - store i32 4, i32* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %tmp1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp1 - store i32 5, i32* %arrayidx3, align 4 - %arrayidx4 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 - %tmp2 = load i32, i32* %arrayidx4, align 4 - store i32 %tmp2, i32 addrspace(1)* %out, align 4 - %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 - %tmp3 = load i32, i32* %arrayidx5 - %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 - store i32 %tmp3, i32 addrspace(1)* %arrayidx6 - ret void -} - -attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" } diff --git a/test/CodeGen/AMDGPU/promote-alloca-volatile.ll b/test/CodeGen/AMDGPU/promote-alloca-volatile.ll index 9c43a6dc60f41e0aa2a639efe83c3a9a07f58d02..d7655993a2d9f0a46120f3b81c06183e90545f13 100644 --- a/test/CodeGen/AMDGPU/promote-alloca-volatile.ll +++ b/test/CodeGen/AMDGPU/promote-alloca-volatile.ll @@ -1,26 +1,26 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-promote-alloca < %s | FileCheck %s ; CHECK-LABEL: @volatile_load( -; CHECK: alloca [5 x i32] +; CHECK: alloca [4 x i32] ; CHECK: load volatile i32, i32* define amdgpu_kernel void @volatile_load(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [4 x i32], align 4 %tmp = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp + %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp %load = load volatile i32, i32* %arrayidx1 store i32 %load, i32 addrspace(1)* %out ret void } ; CHECK-LABEL: @volatile_store( -; CHECK: alloca [5 x i32] +; CHECK: alloca [4 x i32] ; CHECK: store volatile i32 %tmp, i32* define amdgpu_kernel void @volatile_store(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: - %stack = alloca [5 x i32], align 4 + %stack = alloca [4 x i32], align 4 %tmp = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %tmp + %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32]* %stack, i32 0, i32 %tmp store volatile i32 %tmp, i32* %arrayidx1 ret void } diff --git a/test/CodeGen/AMDGPU/r600.bitcast.ll b/test/CodeGen/AMDGPU/r600.bitcast.ll index acf7a66a357fc03c96909c5f3d6013fb4928f190..67431e6a4825c61249db5221273c6d8d7452eec0 100644 --- a/test/CodeGen/AMDGPU/r600.bitcast.ll +++ b/test/CodeGen/AMDGPU/r600.bitcast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; This test just checks that the compiler doesn't crash. diff --git a/test/CodeGen/AMDGPU/readcyclecounter.ll b/test/CodeGen/AMDGPU/readcyclecounter.ll index 5c698c839fa686692831ab75352e5ae312e8efd6..d7b353cd25d38aa478f51d059f9c83328b1ddb9b 100644 --- a/test/CodeGen/AMDGPU/readcyclecounter.ll +++ b/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -22,4 +22,18 @@ define amdgpu_kernel void @test_readcyclecounter(i64 addrspace(1)* %out) #0 { ret void } +; This test used to crash in ScheduleDAG. +; +; GCN-LABEL: {{^}}test_readcyclecounter_smem: +; SI-DAG: s_memtime +; VI-DAG: s_memrealtime +; GCN-DAG: s_load_dword +define amdgpu_cs i32 @test_readcyclecounter_smem(i64 addrspace(2)* inreg %in) #0 { + %cycle0 = call i64 @llvm.readcyclecounter() + %in.v = load i64, i64 addrspace(2)* %in + %r.64 = add i64 %cycle0, %in.v + %r.32 = trunc i64 %r.64 to i32 + ret i32 %r.32 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/regcoalesce-prune.mir b/test/CodeGen/AMDGPU/regcoalesce-prune.mir new file mode 100644 index 0000000000000000000000000000000000000000..7ad474bf0ed2f00d0ecc71385e193a8e259682b1 --- /dev/null +++ b/test/CodeGen/AMDGPU/regcoalesce-prune.mir @@ -0,0 +1,31 @@ +# RUN: llc -o - %s -mtriple=amdgcn-amd-amdhsa-opencl -run-pass=simple-register-coalescing | FileCheck %s +--- +# Checks for a bug where subregister liveranges were not properly pruned for +# an IMPLCITI_DEF that gets removed completely. +# +# CHECK-LABEL: name: func +# IMPLICIT_DEF should be gone without llc hitting assertion failures. +# CHECK-NOT: IMPLICIT_DEF +name: func +tracksRegLiveness: true +body: | + bb.0: + undef %5.sub1 = V_MOV_B32_e32 0, implicit %exec + %6 = COPY %5 + S_CBRANCH_VCCZ %bb.2, implicit undef %vcc + + bb.1: + %1 : sreg_32_xm0 = S_MOV_B32 0 + undef %0.sub0 : sreg_64 = COPY %1 + %0.sub1 = COPY %1 + %4 : vreg_64 = COPY killed %0 + %5 : vreg_64 = IMPLICIT_DEF + %6 : vreg_64 = COPY killed %4 + + bb.2: + %2 : vgpr_32 = V_CVT_F32_I32_e32 killed %5.sub1, implicit %exec + + bb.3: + %3 : vgpr_32 = V_CVT_F32_I32_e32 killed %6.sub1, implicit %exec + S_ENDPGM +... diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir new file mode 100644 index 0000000000000000000000000000000000000000..31024277871d86b0365f3dea413a27a404a3a32a --- /dev/null +++ b/test/CodeGen/AMDGPU/rename-independent-subregs-invalid-mac-operands.mir @@ -0,0 +1,69 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck -check-prefix=GCN %s +--- + +# GCN-LABEL: name: mac_invalid_operands +# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec + +name: mac_invalid_operands +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_128 } + - { id: 1, class: vreg_128 } + - { id: 2, class: sgpr_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: vgpr_32 } + - { id: 5, class: vgpr_32 } + - { id: 6, class: vgpr_32 } + - { id: 7, class: sreg_64 } + - { id: 8, class: vgpr_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: vreg_64 } + - { id: 11, class: vreg_64 } + - { id: 12, class: vreg_128 } + - { id: 13, class: vreg_128 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vreg_64 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vreg_128 } +body: | + bb.0: + successors: %bb.2, %bb.1 + + %7 = V_CMP_NEQ_F32_e64 0, 0, 0, undef %3, 0, 0, implicit %exec + %vcc = COPY killed %7 + S_CBRANCH_VCCZ %bb.2, implicit killed %vcc + + bb.1: + successors: %bb.3 + + %4 = V_ADD_F32_e32 undef %6, undef %5, implicit %exec + undef %12.sub0 = COPY killed %4 + %17 = COPY killed %12 + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3 + + %8 = V_MAC_F32_e32 undef %3, undef %9, undef %8, implicit %exec + undef %13.sub0 = COPY %8 + %13.sub1 = COPY %8 + %13.sub2 = COPY killed %8 + %0 = COPY killed %13 + %17 = COPY killed %0 + + bb.3: + %1 = COPY killed %17 + FLAT_STORE_DWORD undef %10, %1.sub2, 0, 0, 0, implicit %exec, implicit %flat_scr + %14 = COPY %1.sub1 + %16 = COPY killed %1.sub0 + undef %15.sub0 = COPY killed %16 + %15.sub1 = COPY killed %14 + FLAT_STORE_DWORDX2 undef %11, killed %15, 0, 0, 0, implicit %exec, implicit %flat_scr + S_ENDPGM + +... diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/test/CodeGen/AMDGPU/rename-independent-subregs.mir index fc2e4426ba48ff41669e5fe812e31070fc1ce58b..31ad26e76979699689568e31d5abc588fccd11ed 100644 --- a/test/CodeGen/AMDGPU/rename-independent-subregs.mir +++ b/test/CodeGen/AMDGPU/rename-independent-subregs.mir @@ -49,7 +49,6 @@ registers: - { id: 1, class: sreg_128 } body: | bb.0: - successors: %bb.1, %bb.2 S_NOP 0, implicit-def undef %0.sub2 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc S_BRANCH %bb.2 diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll index f2fbacbab82e7dd1b58c0bb700ef89d7013aec60..e7a05d94cdc43c0ef7ba64fc698d6797f9a4e88f 100644 --- a/test/CodeGen/AMDGPU/ret_jump.ll +++ b/test/CodeGen/AMDGPU/ret_jump.ll @@ -65,7 +65,6 @@ ret.bb: ; preds = %else, %main_body ; GCN-NEXT: ; %unreachable.bb ; GCN: ds_write_b32 -; GCN: s_waitcnt ; GCN: ; divergent unreachable ; GCN: ; %ret.bb @@ -73,6 +72,7 @@ ret.bb: ; preds = %else, %main_body ; GCN: ; %UnifiedReturnBlock ; GCN-NEXT: s_or_b64 exec, exec +; GCN-NEXT: s_waitcnt ; GCN-NEXT: ; return ; GCN-NEXT: .Lfunc_end define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 { diff --git a/test/CodeGen/AMDGPU/schedule-regpressure.mir b/test/CodeGen/AMDGPU/schedule-regpressure.mir index c71de87eeecee686bc27bb8ef29608605e729372..3a20ec732e5bfde3b538c48fc14a926fe00ce82d 100644 --- a/test/CodeGen/AMDGPU/schedule-regpressure.mir +++ b/test/CodeGen/AMDGPU/schedule-regpressure.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler %s -o - -debug-only=misched 2>&1 | FileCheck %s +# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler %s -o - -debug-only=machine-scheduler 2>&1 | FileCheck %s # REQUIRES: asserts # Check there is no SReg_32 pressure created by DS_* instructions because of M0 use diff --git a/test/CodeGen/AMDGPU/scratch-simple.ll b/test/CodeGen/AMDGPU/scratch-simple.ll new file mode 100644 index 0000000000000000000000000000000000000000..abd15f1fb47f889f740cc187556a8c2c89a4d3f4 --- /dev/null +++ b/test/CodeGen/AMDGPU/scratch-simple.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=amdgcn -mcpu=verde -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s + +; This used to fail due to a v_add_i32 instruction with an illegal immediate +; operand that was created during Local Stack Slot Allocation. Test case derived +; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 +; +; GCN-LABEL: {{^}}ps_main: + +; GCN-DAG: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GCN-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 +; GCN-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] + +; GCN-DAG: v_mov_b32_e32 [[C200:v[0-9]+]], 0x200 +; GCN-DAG: v_mov_b32_e32 [[C400:v[0-9]+]], 0x400 +; GCN-DAG: v_or_b32_e32 [[LO_OFF:v[0-9]+]], [[C200]], [[CLAMP_IDX]] +; GCN-DAG: v_or_b32_e32 [[HI_OFF:v[0-9]+]], [[C400]], [[CLAMP_IDX]] + +; GCN: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_ps float @ps_main(i32 %idx) { + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}vs_main: +; GCN: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_vs float @vs_main(i32 %idx) { + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}cs_main: +; GCN: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_cs float @cs_main(i32 %idx) { + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}hs_main: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_hs float @hs_main(i32 %idx) { + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}gs_main: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s0 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +define amdgpu_gs float @gs_main(i32 %idx) { + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %r = fadd float %v1, %v2 + ret float %r +} + +; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s6 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: s_mov_b32 s2, s5 +define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %f = fadd float %v1, %v2 + %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 + %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 + ret <{i32, i32, i32, float}> %r2 +} + +; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset: +; SI: s_mov_b32 [[SWO:s[0-9]+]], s6 +; GFX9: s_mov_b32 [[SWO:s[0-9]+]], s5 +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[SWO]] offen +; GCN: s_mov_b32 s2, s5 +define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { + %v1 = extractelement <81 x float> , i32 %idx + %v2 = extractelement <81 x float> , i32 %idx + %f = fadd float %v1, %v2 + %r1 = insertvalue <{i32, i32, i32, float}> undef, i32 %swo, 2 + %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 + ret <{i32, i32, i32, float}> %r2 +} diff --git a/test/CodeGen/AMDGPU/sdiv.ll b/test/CodeGen/AMDGPU/sdiv.ll index f9ac425be79428cc56f20e250fa045950e1a0842..7ec6ca809b685c31d46862a58e790ea24f1c865f 100644 --- a/test/CodeGen/AMDGPU/sdiv.ll +++ b/test/CodeGen/AMDGPU/sdiv.ll @@ -36,7 +36,7 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* ; FUNC-LABEL: {{^}}slow_sdiv_i32_3435: ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; SI-DAG: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b -; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]] +; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]] ; SI: v_add_i32 ; SI: v_lshrrev_b32 ; SI: v_ashrrev_i32 diff --git a/test/CodeGen/AMDGPU/sdwa-peephole.ll b/test/CodeGen/AMDGPU/sdwa-peephole.ll index 1e0ac3807528000e7ea1134770ef31d34746bbbb..66e166d283f7a6d303b306cb7bf2d78c7127e072 100644 --- a/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -74,7 +74,7 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_LO:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL_HI:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_HI]], v[[DST_MUL_LO]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL_LO]], v[[DST_MUL_HI]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -97,8 +97,8 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL1:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL2:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL3:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %ina, <4 x i16> addrspace(1)* %inb) { entry: @@ -125,10 +125,10 @@ entry: ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL5:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL6:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; SDWA-DAG: v_mul_u32_u24_sdwa v[[DST_MUL7:[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL7]], v[[DST_MUL6]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL5]], v[[DST_MUL4]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL3]], v[[DST_MUL2]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL1]], v[[DST_MUL0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL6]], v[[DST_MUL7]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL4]], v[[DST_MUL5]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL2]], v[[DST_MUL3]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_or_b32_sdwa v{{[0-9]+}}, v[[DST_MUL0]], v[[DST_MUL1]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD define amdgpu_kernel void @mul_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %ina, <8 x i16> addrspace(1)* %inb) { entry: @@ -345,7 +345,10 @@ entry: ; GCN-LABEL: {{^}}immediate_mul_v2i16: ; NOSDWA-NOT: v_mul_u32_u24_sdwa -; SDWA-NOT: v_mul_u32_u24_sdwa +; SDWA-DAG: v_mov_b32_e32 v[[M321:[0-9]+]], 0x141 +; SDWA-DAG: v_mov_b32_e32 v[[M123:[0-9]+]], 0x7b +; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M123]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDWA-DAG: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[M321]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @immediate_mul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { entry: @@ -364,7 +367,7 @@ entry: ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; NOSDWA-NOT: v_mul_u32_u24_sdwa -; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA: v_mul_u32_u24_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD define amdgpu_kernel void @mulmul_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %ina, <2 x i16> addrspace(1)* %inb) { entry: @@ -393,3 +396,53 @@ store_label: store <2 x i16> %add, <2 x i16> addrspace(1)* %out, align 4 ret void } + + +; Check that "pulling out" SDWA operands works correctly. +; GCN-LABEL: {{^}}pulled_out_test: +; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; NOSDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; NOSDWA-DAG: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; NOSDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; NOSDWA-NOT: v_and_b32_sdwa +; NOSDWA-NOT: v_or_b32_sdwa + +; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SDWA-DAG: v_and_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; SDWA-DAG: v_lshlrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} +; SDWA: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD + +define amdgpu_kernel void @pulled_out_test(<8 x i8> addrspace(1)* %sourceA, <8 x i8> addrspace(1)* %destValues) { +entry: + %idxprom = ashr exact i64 15, 32 + %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %sourceA, i64 %idxprom + %tmp = load <8 x i8>, <8 x i8> addrspace(1)* %arrayidx, align 8 + + %tmp1 = extractelement <8 x i8> %tmp, i32 0 + %tmp2 = extractelement <8 x i8> %tmp, i32 1 + %tmp3 = extractelement <8 x i8> %tmp, i32 2 + %tmp4 = extractelement <8 x i8> %tmp, i32 3 + %tmp5 = extractelement <8 x i8> %tmp, i32 4 + %tmp6 = extractelement <8 x i8> %tmp, i32 5 + %tmp7 = extractelement <8 x i8> %tmp, i32 6 + %tmp8 = extractelement <8 x i8> %tmp, i32 7 + + %tmp9 = insertelement <2 x i8> undef, i8 %tmp1, i32 0 + %tmp10 = insertelement <2 x i8> %tmp9, i8 %tmp2, i32 1 + %tmp11 = insertelement <2 x i8> undef, i8 %tmp3, i32 0 + %tmp12 = insertelement <2 x i8> %tmp11, i8 %tmp4, i32 1 + %tmp13 = insertelement <2 x i8> undef, i8 %tmp5, i32 0 + %tmp14 = insertelement <2 x i8> %tmp13, i8 %tmp6, i32 1 + %tmp15 = insertelement <2 x i8> undef, i8 %tmp7, i32 0 + %tmp16 = insertelement <2 x i8> %tmp15, i8 %tmp8, i32 1 + + %tmp17 = shufflevector <2 x i8> %tmp10, <2 x i8> %tmp12, <4 x i32> + %tmp18 = shufflevector <2 x i8> %tmp14, <2 x i8> %tmp16, <4 x i32> + %tmp19 = shufflevector <4 x i8> %tmp17, <4 x i8> %tmp18, <8 x i32> + + %arrayidx5 = getelementptr inbounds <8 x i8>, <8 x i8> addrspace(1)* %destValues, i64 %idxprom + store <8 x i8> %tmp19, <8 x i8> addrspace(1)* %arrayidx5, align 8 + ret void +} diff --git a/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir new file mode 100644 index 0000000000000000000000000000000000000000..ba937c927c706fac8811a8b7a1365f17b8c0517a --- /dev/null +++ b/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir @@ -0,0 +1,408 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -start-before si-peephole-sdwa -o - %s | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: {{^}}sdwa_imm_operand: +# GCN: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2 +# GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 2 +# GCN: BB0_1: +# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 + +# GCN-LABEL: {{^}}sdwa_sgpr_operand: +# GCN: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 2 +# GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 2 +# GCN: BB1_1: +# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +# GCN: v_lshlrev_b32_sdwa v{{[0-9]+}}, v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 + +--- | + ; ModuleID = 'sdwa-scalar-ops.opt.ll' + source_filename = "sdwa-scalar-ops.opt.ll" + target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + + define amdgpu_kernel void @sdwa_imm_operand(i32 addrspace(1)* nocapture %arg) { + bb: + br label %bb2 + + bb1: ; preds = %bb2 + ret void + + bb2: ; preds = %bb2, %bb + %lsr.iv = phi i64 [ %lsr.iv.next, %bb2 ], [ 0, %bb ] + %bc = bitcast i32 addrspace(1)* %arg to i8 addrspace(1)* + %uglygep4 = getelementptr i8, i8 addrspace(1)* %bc, i64 %lsr.iv + %uglygep45 = bitcast i8 addrspace(1)* %uglygep4 to i32 addrspace(1)* + %tmp5 = load i32, i32 addrspace(1)* %uglygep45, align 4 + %tmp6 = lshr i32 %tmp5, 8 + %tmp7 = and i32 %tmp6, 255 + %tmp8 = zext i32 %tmp7 to i64 + %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp8 + store i32 1, i32 addrspace(1)* %tmp9, align 4 + %scevgep = getelementptr i32, i32 addrspace(1)* %uglygep45, i64 1 + %tmp13 = load i32, i32 addrspace(1)* %scevgep, align 4 + %tmp14 = lshr i32 %tmp13, 8 + %tmp15 = and i32 %tmp14, 255 + %tmp16 = zext i32 %tmp15 to i64 + %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16 + store i32 1, i32 addrspace(1)* %tmp17, align 4 + %lsr.iv.next = add nuw nsw i64 %lsr.iv, 8 + %tmp1 = trunc i64 %lsr.iv.next to i32 + %tmp19 = icmp eq i32 %tmp1, 4096 + br i1 %tmp19, label %bb1, label %bb2 + } + + define amdgpu_kernel void @sdwa_sgpr_operand(i32 addrspace(1)* nocapture %arg) { + bb: + br label %bb2 + + bb1: ; preds = %bb2 + ret void + + bb2: ; preds = %bb2, %bb + %lsr.iv = phi i64 [ %lsr.iv.next, %bb2 ], [ 0, %bb ] + %bc = bitcast i32 addrspace(1)* %arg to i8 addrspace(1)* + %uglygep4 = getelementptr i8, i8 addrspace(1)* %bc, i64 %lsr.iv + %uglygep45 = bitcast i8 addrspace(1)* %uglygep4 to i32 addrspace(1)* + %tmp5 = load i32, i32 addrspace(1)* %uglygep45, align 4 + %tmp6 = lshr i32 %tmp5, 8 + %tmp7 = and i32 %tmp6, 255 + %tmp8 = zext i32 %tmp7 to i64 + %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp8 + store i32 1, i32 addrspace(1)* %tmp9, align 4 + %scevgep = getelementptr i32, i32 addrspace(1)* %uglygep45, i64 1 + %tmp13 = load i32, i32 addrspace(1)* %scevgep, align 4 + %tmp14 = lshr i32 %tmp13, 8 + %tmp15 = and i32 %tmp14, 255 + %tmp16 = zext i32 %tmp15 to i64 + %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16 + store i32 1, i32 addrspace(1)* %tmp17, align 4 + %lsr.iv.next = add nuw nsw i64 %lsr.iv, 8 + %tmp1 = trunc i64 %lsr.iv.next to i32 + %tmp19 = icmp eq i32 %tmp1, 4096 + br i1 %tmp19, label %bb1, label %bb2 + } + +... +--- +name: sdwa_imm_operand +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_64 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: sgpr_128 } + - { id: 4, class: sgpr_64 } + - { id: 5, class: sreg_32_xm0 } + - { id: 6, class: sgpr_32 } + - { id: 7, class: sreg_64 } + - { id: 8, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } + - { id: 10, class: sreg_32_xm0 } + - { id: 11, class: sreg_32_xm0 } + - { id: 12, class: sreg_32_xm0 } + - { id: 13, class: sreg_32_xm0 } + - { id: 14, class: sreg_32_xm0 } + - { id: 15, class: sreg_32_xm0 } + - { id: 16, class: sreg_64 } + - { id: 17, class: vgpr_32 } + - { id: 18, class: vreg_64 } + - { id: 19, class: sreg_32_xm0 } + - { id: 20, class: sreg_32 } + - { id: 21, class: sreg_32_xm0 } + - { id: 22, class: sreg_32_xm0 } + - { id: 23, class: sreg_32_xm0 } + - { id: 24, class: sreg_64 } + - { id: 25, class: sreg_32_xm0 } + - { id: 26, class: sreg_32_xm0 } + - { id: 27, class: sreg_32_xm0 } + - { id: 28, class: sreg_32_xm0 } + - { id: 29, class: sreg_64 } + - { id: 30, class: vgpr_32 } + - { id: 31, class: vreg_64 } + - { id: 32, class: sreg_32_xm0 } + - { id: 33, class: sreg_32_xm0 } + - { id: 34, class: sreg_64 } + - { id: 35, class: sreg_32_xm0 } + - { id: 36, class: sreg_32_xm0 } + - { id: 37, class: sreg_32_xm0 } + - { id: 38, class: sreg_32_xm0 } + - { id: 39, class: vreg_64 } + - { id: 40, class: vgpr_32 } + - { id: 41, class: vreg_64 } + - { id: 42, class: sreg_32_xm0 } + - { id: 43, class: sreg_32 } + - { id: 44, class: sreg_32_xm0 } + - { id: 45, class: sreg_64 } + - { id: 46, class: sreg_32_xm0 } + - { id: 47, class: sreg_32_xm0 } + - { id: 48, class: sreg_32_xm0 } + - { id: 49, class: sreg_32_xm0 } + - { id: 50, class: sreg_64 } + - { id: 51, class: vreg_64 } + - { id: 52, class: sreg_64 } + - { id: 53, class: sreg_32_xm0 } + - { id: 54, class: sreg_32_xm0 } + - { id: 55, class: sreg_32_xm0 } + - { id: 56, class: sreg_32_xm0 } + - { id: 57, class: sreg_64 } + - { id: 58, class: sreg_32_xm0 } + - { id: 59, class: sreg_32_xm0 } + - { id: 60, class: vgpr_32 } + - { id: 61, class: vgpr_32 } + - { id: 62, class: vreg_64 } + - { id: 63, class: vgpr_32 } + - { id: 64, class: vgpr_32 } + - { id: 65, class: vgpr_32 } + - { id: 66, class: vgpr_32 } + - { id: 67, class: vreg_64 } + - { id: 68, class: vgpr_32 } + - { id: 69, class: vgpr_32 } + - { id: 70, class: vgpr_32 } + - { id: 71, class: vgpr_32 } + - { id: 72, class: vgpr_32 } + - { id: 73, class: vgpr_32 } + - { id: 74, class: vgpr_32 } + - { id: 75, class: vreg_64 } + - { id: 76, class: vgpr_32 } + - { id: 77, class: vgpr_32 } + - { id: 78, class: vgpr_32 } + - { id: 79, class: vgpr_32 } + - { id: 80, class: vreg_64 } + - { id: 81, class: vgpr_32 } + - { id: 82, class: vgpr_32 } + - { id: 83, class: vgpr_32 } +liveins: + - { reg: '%sgpr4_sgpr5', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.bb: + successors: %bb.2.bb2(0x80000000) + liveins: %sgpr4_sgpr5 + + %4 = COPY %sgpr4_sgpr5 + %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %8 = S_MOV_B64 0 + %7 = COPY %9 + %30 = V_MOV_B32_e32 1, implicit %exec + S_BRANCH %bb.2.bb2 + + bb.1.bb1: + S_ENDPGM + + bb.2.bb2: + successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000) + + %0 = PHI %8, %bb.0.bb, %1, %bb.2.bb2 + %13 = COPY %7.sub1 + %14 = S_ADD_U32 %7.sub0, %0.sub0, implicit-def %scc + %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc + %16 = REG_SEQUENCE %14, 1, %15, 2 + %18 = COPY %16 + %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45) + %60 = V_BFE_U32 %17, 8, 8, implicit %exec + %61 = V_LSHLREV_B32_e32 2, killed %60, implicit %exec + %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec + %66 = COPY %13 + %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec + %67 = REG_SEQUENCE %70, 1, killed %65, 2 + FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9) + %37 = S_ADD_U32 %14, 4, implicit-def %scc + %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc + %71 = COPY killed %37 + %72 = COPY killed %38 + %41 = REG_SEQUENCE killed %71, 1, killed %72, 2 + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep) + %73 = V_BFE_U32 %40, 8, 8, implicit %exec + %74 = V_LSHLREV_B32_e32 2, killed %73, implicit %exec + %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec + %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec + %80 = REG_SEQUENCE %83, 1, killed %78, 2 + FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17) + %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc + %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc + %57 = REG_SEQUENCE %55, 1, killed %56, 2 + %1 = COPY %57 + S_CMPK_EQ_I32 %55, 4096, implicit-def %scc + S_CBRANCH_SCC1 %bb.1.bb1, implicit %scc + S_BRANCH %bb.2.bb2 + +... +--- +name: sdwa_sgpr_operand +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_64 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: sgpr_128 } + - { id: 4, class: sgpr_64 } + - { id: 5, class: sreg_32_xm0 } + - { id: 6, class: sgpr_32 } + - { id: 7, class: sreg_64 } + - { id: 8, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } + - { id: 10, class: sreg_32_xm0 } + - { id: 11, class: sreg_32_xm0 } + - { id: 12, class: sreg_32_xm0 } + - { id: 13, class: sreg_32_xm0 } + - { id: 14, class: sreg_32_xm0 } + - { id: 15, class: sreg_32_xm0 } + - { id: 16, class: sreg_64 } + - { id: 17, class: vgpr_32 } + - { id: 18, class: vreg_64 } + - { id: 19, class: sreg_32_xm0 } + - { id: 20, class: sreg_32 } + - { id: 21, class: sreg_32_xm0 } + - { id: 22, class: sreg_32_xm0 } + - { id: 23, class: sreg_32_xm0 } + - { id: 24, class: sreg_64 } + - { id: 25, class: sreg_32_xm0 } + - { id: 26, class: sreg_32_xm0 } + - { id: 27, class: sreg_32_xm0 } + - { id: 28, class: sreg_32_xm0 } + - { id: 29, class: sreg_64 } + - { id: 30, class: vgpr_32 } + - { id: 31, class: vreg_64 } + - { id: 32, class: sreg_32_xm0 } + - { id: 33, class: sreg_32_xm0 } + - { id: 34, class: sreg_64 } + - { id: 35, class: sreg_32_xm0 } + - { id: 36, class: sreg_32_xm0 } + - { id: 37, class: sreg_32_xm0 } + - { id: 38, class: sreg_32_xm0 } + - { id: 39, class: vreg_64 } + - { id: 40, class: vgpr_32 } + - { id: 41, class: vreg_64 } + - { id: 42, class: sreg_32_xm0 } + - { id: 43, class: sreg_32 } + - { id: 44, class: sreg_32_xm0 } + - { id: 45, class: sreg_64 } + - { id: 46, class: sreg_32_xm0 } + - { id: 47, class: sreg_32_xm0 } + - { id: 48, class: sreg_32_xm0 } + - { id: 49, class: sreg_32_xm0 } + - { id: 50, class: sreg_64 } + - { id: 51, class: vreg_64 } + - { id: 52, class: sreg_64 } + - { id: 53, class: sreg_32_xm0 } + - { id: 54, class: sreg_32_xm0 } + - { id: 55, class: sreg_32_xm0 } + - { id: 56, class: sreg_32_xm0 } + - { id: 57, class: sreg_64 } + - { id: 58, class: sreg_32_xm0 } + - { id: 59, class: sreg_32_xm0 } + - { id: 60, class: vgpr_32 } + - { id: 61, class: vgpr_32 } + - { id: 62, class: vreg_64 } + - { id: 63, class: vgpr_32 } + - { id: 64, class: vgpr_32 } + - { id: 65, class: vgpr_32 } + - { id: 66, class: vgpr_32 } + - { id: 67, class: vreg_64 } + - { id: 68, class: vgpr_32 } + - { id: 69, class: vgpr_32 } + - { id: 70, class: vgpr_32 } + - { id: 71, class: vgpr_32 } + - { id: 72, class: vgpr_32 } + - { id: 73, class: vgpr_32 } + - { id: 74, class: vgpr_32 } + - { id: 75, class: vreg_64 } + - { id: 76, class: vgpr_32 } + - { id: 77, class: vgpr_32 } + - { id: 78, class: vgpr_32 } + - { id: 79, class: vgpr_32 } + - { id: 80, class: vreg_64 } + - { id: 81, class: vgpr_32 } + - { id: 82, class: vgpr_32 } + - { id: 83, class: vgpr_32 } + - { id: 84, class: sreg_32_xm0 } +liveins: + - { reg: '%sgpr4_sgpr5', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.bb: + successors: %bb.2.bb2(0x80000000) + liveins: %sgpr4_sgpr5 + + %4 = COPY %sgpr4_sgpr5 + %9 = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %8 = S_MOV_B64 0 + %7 = COPY %9 + %30 = V_MOV_B32_e32 1, implicit %exec + %84 = S_MOV_B32 2 + S_BRANCH %bb.2.bb2 + + bb.1.bb1: + S_ENDPGM + + bb.2.bb2: + successors: %bb.1.bb1(0x04000000), %bb.2.bb2(0x7c000000) + + %0 = PHI %8, %bb.0.bb, %1, %bb.2.bb2 + %13 = COPY %7.sub1 + %14 = S_ADD_U32 %7.sub0, %0.sub0, implicit-def %scc + %15 = S_ADDC_U32 %7.sub1, %0.sub1, implicit-def dead %scc, implicit %scc + %16 = REG_SEQUENCE %14, 1, %15, 2 + %18 = COPY %16 + %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.uglygep45) + %60 = V_BFE_U32 %17, 8, 8, implicit %exec + %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit %exec + %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def %vcc, implicit %exec + %66 = COPY %13 + %65 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec + %67 = REG_SEQUENCE %70, 1, killed %65, 2 + FLAT_STORE_DWORD %67, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp9) + %37 = S_ADD_U32 %14, 4, implicit-def %scc + %38 = S_ADDC_U32 %15, 0, implicit-def dead %scc, implicit %scc + %71 = COPY killed %37 + %72 = COPY killed %38 + %41 = REG_SEQUENCE killed %71, 1, killed %72, 2 + %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.scevgep) + %73 = V_BFE_U32 %40, 8, 8, implicit %exec + %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit %exec + %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def %vcc, implicit %exec + %78 = V_ADDC_U32_e32 0, %66, implicit-def %vcc, implicit %vcc, implicit %exec + %80 = REG_SEQUENCE %83, 1, killed %78, 2 + FLAT_STORE_DWORD %80, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4 into %ir.tmp17) + %55 = S_ADD_U32 %0.sub0, 8, implicit-def %scc + %56 = S_ADDC_U32 %0.sub1, 0, implicit-def dead %scc, implicit %scc + %57 = REG_SEQUENCE %55, 1, killed %56, 2 + %1 = COPY %57 + S_CMPK_EQ_I32 %55, 4096, implicit-def %scc + S_CBRANCH_SCC1 %bb.1.bb1, implicit %scc + S_BRANCH %bb.2.bb2 + +... diff --git a/test/CodeGen/AMDGPU/select-vectors.ll b/test/CodeGen/AMDGPU/select-vectors.ll index 8710fc8c7307bf28c32d832a308ff54702f4376d..4b00a48211ecf59c23d5e817796f4a301b0bde52 100644 --- a/test/CodeGen/AMDGPU/select-vectors.ll +++ b/test/CodeGen/AMDGPU/select-vectors.ll @@ -1,69 +1,186 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -verify-machineinstrs -march=amdgcn < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; Test expansion of scalar selects on vectors. ; Evergreen not enabled since it seems to be having problems with doubles. +; GCN-LABEL: {{^}}v_select_v2i8: +; SI: v_cndmask_b32 +; SI-NOT: cndmask -; FUNC-LABEL: {{^}}select_v4i8: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind { +; GFX9: v_cndmask_b32 +; GFX9-NOT: cndmask + +; This is worse when i16 is legal and packed is not because +; SelectionDAGBuilder for some reason changes the select type. +; VI: v_cndmask_b32 +; VI: v_cndmask_b32 +define amdgpu_kernel void @v_select_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %a.ptr, <2 x i8> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <2 x i8>, <2 x i8> addrspace(1)* %a.ptr, align 2 + %b = load <2 x i8>, <2 x i8> addrspace(1)* %b.ptr, align 2 + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <2 x i8> %a, <2 x i8> %b + store <2 x i8> %select, <2 x i8> addrspace(1)* %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}v_select_v4i8: +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %a.ptr, <4 x i8> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <4 x i8>, <4 x i8> addrspace(1)* %a.ptr + %b = load <4 x i8>, <4 x i8> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b + store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_select_v8i8: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %a.ptr, <8 x i8> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <8 x i8>, <8 x i8> addrspace(1)* %a.ptr + %b = load <8 x i8>, <8 x i8> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <8 x i8> %a, <8 x i8> %b + store <8 x i8> %select, <8 x i8> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_select_v16i8: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %a.ptr, <16 x i8> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <16 x i8>, <16 x i8> addrspace(1)* %a.ptr + %b = load <16 x i8>, <16 x i8> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <16 x i8> %a, <16 x i8> %b + store <16 x i8> %select, <16 x i8> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_v4i8: +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) #0 { %cmp = icmp eq i8 %c, 0 %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b store <4 x i8> %select, <4 x i8> addrspace(1)* %out, align 4 ret void } -; FUNC-LABEL: {{^}}select_v4i16: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 +; GCN-LABEL: {{^}}select_v2i16: +; GCN: v_cndmask_b32_e32 +; GCN-NOT: v_cndmask_b32 +define amdgpu_kernel void @select_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b, i32 %c) #0 { + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <2 x i16> %a, <2 x i16> %b + store <2 x i16> %select, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_select_v2i16: +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <2 x i16>, <2 x i16> addrspace(1)* %a.ptr + %b = load <2 x i16>, <2 x i16> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <2 x i16> %a, <2 x i16> %b + store <2 x i16> %select, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_select_v3i16: ; SI: v_cndmask_b32_e32 -define amdgpu_kernel void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind { +; SI: cndmask +; SI-NOT: cndmask + +; GFX9: v_cndmask_b32_e32 +; GFX9: cndmask +; GFX9-NOT: cndmask + +; VI: v_cndmask_b32 +; VI: v_cndmask_b32 +; VI: v_cndmask_b32 +define amdgpu_kernel void @v_select_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <3 x i16>, <3 x i16> addrspace(1)* %a.ptr + %b = load <3 x i16>, <3 x i16> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <3 x i16> %a, <3 x i16> %b + store <3 x i16> %select, <3 x i16> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_select_v4i16: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %a.ptr, <4 x i16> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <4 x i16>, <4 x i16> addrspace(1)* %a.ptr + %b = load <4 x i16>, <4 x i16> addrspace(1)* %b.ptr %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b store <4 x i16> %select, <4 x i16> addrspace(1)* %out, align 4 ret void } +; GCN-LABEL: {{^}}v_select_v8i16: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %a.ptr, <8 x i16> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <8 x i16>, <8 x i16> addrspace(1)* %a.ptr + %b = load <8 x i16>, <8 x i16> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <8 x i16> %a, <8 x i16> %b + store <8 x i16> %select, <8 x i16> addrspace(1)* %out, align 4 + ret void +} + ; FIXME: Expansion with bitwise operations may be better if doing a ; vector select with SGPR inputs. -; FUNC-LABEL: {{^}}s_select_v2i32: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: buffer_store_dwordx2 -define amdgpu_kernel void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind { +; GCN-LABEL: {{^}}s_select_v2i32: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: buffer_store_dwordx2 +define amdgpu_kernel void @s_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b store <2 x i32> %select, <2 x i32> addrspace(1)* %out, align 8 ret void } -; FUNC-LABEL: {{^}}s_select_v4i32: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: buffer_store_dwordx4 -define amdgpu_kernel void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind { +; GCN-LABEL: {{^}}s_select_v4i32: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: buffer_store_dwordx4 +define amdgpu_kernel void @s_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b store <4 x i32> %select, <4 x i32> addrspace(1)* %out, align 16 ret void } -; FUNC-LABEL: {{^}}v_select_v4i32: -; SI: buffer_load_dwordx4 -; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32 -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: buffer_store_dwordx4 +; GCN-LABEL: {{^}}v_select_v4i32: +; GCN: buffer_load_dwordx4 +; GCN: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32 +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: buffer_store_dwordx4 define amdgpu_kernel void @v_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in, i32 %cond) #0 { bb: %tmp2 = icmp ult i32 %cond, 32 @@ -73,68 +190,68 @@ bb: ret void } -; FUNC-LABEL: {{^}}select_v8i32: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind { +; GCN-LABEL: {{^}}select_v8i32: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b store <8 x i32> %select, <8 x i32> addrspace(1)* %out, align 16 ret void } -; FUNC-LABEL: {{^}}s_select_v2f32: -; SI-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; SI-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}} +; GCN-LABEL: {{^}}s_select_v2f32: +; GCN-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}} -; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]] -; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]] -; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]] -; SI-DAG: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]] +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]] +; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]] +; GCN-DAG: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} -; SI: v_cndmask_b32_e32 -; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]] -; SI: v_cndmask_b32_e32 -; SI: buffer_store_dwordx2 -define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind { +; GCN: v_cndmask_b32_e32 +; GCN: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]] +; GCN: v_cndmask_b32_e32 +; GCN: buffer_store_dwordx2 +define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <2 x float> %a, <2 x float> %b store <2 x float> %select, <2 x float> addrspace(1)* %out, align 16 ret void } -; FUNC-LABEL: {{^}}s_select_v4f32: -; SI: s_load_dwordx4 -; SI: s_load_dwordx4 -; SI: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} +; GCN-LABEL: {{^}}s_select_v4f32: +; GCN: s_load_dwordx4 +; GCN: s_load_dwordx4 +; GCN: v_cmp_eq_u32_e64 vcc, s{{[0-9]+}}, 0{{$}} -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 -; SI: buffer_store_dwordx4 -define amdgpu_kernel void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind { +; GCN: buffer_store_dwordx4 +define amdgpu_kernel void @s_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <4 x float> %a, <4 x float> %b store <4 x float> %select, <4 x float> addrspace(1)* %out, align 16 ret void } -; FUNC-LABEL: {{^}}v_select_v4f32: -; SI: buffer_load_dwordx4 -; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32 -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: buffer_store_dwordx4 +; GCN-LABEL: {{^}}v_select_v4f32: +; GCN: buffer_load_dwordx4 +; GCN: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32 +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} +; GCN: buffer_store_dwordx4 define amdgpu_kernel void @v_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %cond) #0 { bb: %tmp2 = icmp ult i32 %cond, 32 @@ -144,74 +261,112 @@ bb: ret void } -; FUNC-LABEL: {{^}}select_v8f32: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -define amdgpu_kernel void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind { +; GCN-LABEL: {{^}}select_v8f32: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +define amdgpu_kernel void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x float> %a, <8 x float> %b store <8 x float> %select, <8 x float> addrspace(1)* %out, align 16 ret void } -; FUNC-LABEL: {{^}}select_v2f64: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -define amdgpu_kernel void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind { +; GCN-LABEL: {{^}}select_v2f64: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +define amdgpu_kernel void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <2 x double> %a, <2 x double> %b store <2 x double> %select, <2 x double> addrspace(1)* %out, align 16 ret void } -; FUNC-LABEL: {{^}}select_v4f64: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -define amdgpu_kernel void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind { +; GCN-LABEL: {{^}}select_v4f64: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +define amdgpu_kernel void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <4 x double> %a, <4 x double> %b store <4 x double> %select, <4 x double> addrspace(1)* %out, align 16 ret void } -; FUNC-LABEL: {{^}}select_v8f64: -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -; SI: v_cndmask_b32_e32 -define amdgpu_kernel void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind { +; GCN-LABEL: {{^}}select_v8f64: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +define amdgpu_kernel void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) #0 { %cmp = icmp eq i32 %c, 0 %select = select i1 %cmp, <8 x double> %a, <8 x double> %b store <8 x double> %select, <8 x double> addrspace(1)* %out, align 16 ret void } +; GCN-LABEL: {{^}}v_select_v2f16: +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %a.ptr, <2 x half> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <2 x half>, <2 x half> addrspace(1)* %a.ptr + %b = load <2 x half>, <2 x half> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <2 x half> %a, <2 x half> %b + store <2 x half> %select, <2 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_select_v3f16: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %a.ptr, <3 x half> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <3 x half>, <3 x half> addrspace(1)* %a.ptr + %b = load <3 x half>, <3 x half> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <3 x half> %a, <3 x half> %b + store <3 x half> %select, <3 x half> addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}v_select_v4f16: +; GCN: v_cndmask_b32_e32 +; GCN: v_cndmask_b32_e32 +; GCN-NOT: cndmask +define amdgpu_kernel void @v_select_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %a.ptr, <4 x half> addrspace(1)* %b.ptr, i32 %c) #0 { + %a = load <4 x half>, <4 x half> addrspace(1)* %a.ptr + %b = load <4 x half>, <4 x half> addrspace(1)* %b.ptr + %cmp = icmp eq i32 %c, 0 + %select = select i1 %cmp, <4 x half> %a, <4 x half> %b + store <4 x half> %select, <4 x half> addrspace(1)* %out, align 4 + ret void +} + ; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/test/CodeGen/AMDGPU/select.f16.ll b/test/CodeGen/AMDGPU/select.f16.ll index 2a7a9c9e0638f8c430011419099047580926dfd5..92ee2eb7f403f43254cd37066c8f2c69540d0af5 100644 --- a/test/CodeGen/AMDGPU/select.f16.ll +++ b/test/CodeGen/AMDGPU/select.f16.ll @@ -196,11 +196,11 @@ entry: ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 -; SI: v_cmp_lt_f32_e64 -; SI: v_cmp_lt_f32_e32 vcc, 0.5 +; SI-DAG: v_cmp_gt_f32_e64 +; SI-DAG: v_cmp_lt_f32_e32 vcc, 0.5 ; VI: v_cmp_lt_f16_e32 -; VI: v_cmp_lt_f16_e64 +; VI: v_cmp_gt_f16_e64 ; GCN: v_cndmask_b32_e32 ; GCN: v_cndmask_b32_e64 ; SI: v_cvt_f16_f32_e32 @@ -228,11 +228,11 @@ entry: ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 ; SI: v_cvt_f32_f16_e32 -; SI: v_cmp_gt_f32_e64 -; SI: v_cmp_gt_f32_e32 vcc, 0.5 +; SI-DAG: v_cmp_lt_f32_e64 +; SI-DAG: v_cmp_gt_f32_e32 vcc, 0.5 ; VI: v_cmp_gt_f16_e32 -; VI: v_cmp_gt_f16_e64 +; VI: v_cmp_lt_f16_e64 ; GCN: v_cndmask_b32_e32 ; GCN: v_cndmask_b32_e64 diff --git a/test/CodeGen/AMDGPU/setcc.ll b/test/CodeGen/AMDGPU/setcc.ll index add90e9c2f3a98994abd568e95708e8321153367..f63719d62a847854e5ad75b938d3d998ed4f5065 100644 --- a/test/CodeGen/AMDGPU/setcc.ll +++ b/test/CodeGen/AMDGPU/setcc.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index b702e1c07200d6ec3f3e970b4aa04bb39a9472c8..160fb6a038fed9cdb2174d5a65b9d7ffee68b229 100644 --- a/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=GFX89 -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FIXME: i16 promotion pass ruins the scalar cases when legal. diff --git a/test/CodeGen/AMDGPU/sgprcopies.ll b/test/CodeGen/AMDGPU/sgprcopies.ll new file mode 100644 index 0000000000000000000000000000000000000000..68cd83bb6cf09c56dcdcc99f3cef32f8639c158c --- /dev/null +++ b/test/CodeGen/AMDGPU/sgprcopies.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}checkTwoBlocksWithUniformBranch +; GCN: BB0_2 +; GCN: v_add +define amdgpu_kernel void @checkTwoBlocksWithUniformBranch(i32 addrspace(1)* nocapture %out, i32 %width, float %xPos, float %yPos, float %xStep, float %yStep, i32 %maxIter) { +entry: + %conv = call i32 @llvm.amdgcn.workitem.id.x() #1 + %rem = urem i32 %conv, %width + %div = udiv i32 %conv, %width + %conv1 = sitofp i32 %rem to float + %x = tail call float @llvm.fmuladd.f32(float %xStep, float %conv1, float %xPos) + %conv2 = sitofp i32 %div to float + %y = tail call float @llvm.fmuladd.f32(float %yStep, float %conv2, float %yPos) + %yy = fmul float %y, %y + %xy = tail call float @llvm.fmuladd.f32(float %x, float %x, float %yy) + %cmp01 = fcmp ole float %xy, 4.000000e+00 + %cmp02 = icmp ne i32 %maxIter, 0 + %cond01 = and i1 %cmp02, %cmp01 + br i1 %cond01, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %x_val = phi float [ %call8, %for.body ], [ %x, %for.body.preheader ] + %iter_val = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %y_val = phi float [ %call9, %for.body ], [ %y, %for.body.preheader ] + %sub = fsub float -0.000000e+00, %y_val + %call7 = tail call float @llvm.fmuladd.f32(float %x_val, float %x_val, float %x) #1 + %call8 = tail call float @llvm.fmuladd.f32(float %sub, float %y_val, float %call7) #1 + %mul = fmul float %x_val, 2.000000e+00 + %call9 = tail call float @llvm.fmuladd.f32(float %mul, float %y_val, float %y) #1 + %inc = add nuw i32 %iter_val, 1 + %mul3 = fmul float %call9, %call9 + %0 = tail call float @llvm.fmuladd.f32(float %call8, float %call8, float %mul3) + %cmp = fcmp ole float %0, 4.000000e+00 + %cmp5 = icmp ult i32 %inc, %maxIter + %or.cond = and i1 %cmp5, %cmp + br i1 %or.cond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %iter.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %for.end.loopexit ] + %idxprom = ashr exact i32 %conv, 32 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %idxprom + store i32 %iter.0.lcssa, i32 addrspace(1)* %arrayidx, align 4 + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare float @llvm.fmuladd.f32(float, float, float) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { readnone } diff --git a/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll index b3cb19ad05e2992622ec249a36573019e47443da..4f7b61adc91d57df611ba1ecd52b3a4e1e68bec1 100644 --- a/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll +++ b/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll @@ -5,14 +5,14 @@ ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} ; GCN: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x %ld.64 = load i128, i128 addrspace(1)* %in.gep @@ -24,14 +24,15 @@ define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 ; Extract the high bit of the 2nd quarter ; GCN-LABEL: {{^}}v_uextract_bit_63_i128: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} +; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -49,14 +50,14 @@ define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], 0{{$}} +; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x %ld.64 = load i128, i128 addrspace(1)* %in.gep @@ -68,14 +69,15 @@ define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 ; Extract the high bit of the 4th quarter ; GCN-LABEL: {{^}}v_uextract_bit_127_i128: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} -; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} ; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} +; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -90,15 +92,16 @@ define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 ; Spans more than 2 dword boundaries ; GCN-LABEL: {{^}}v_uextract_bit_34_100_i128: -; GCN: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: v_lshl_b64 v{{\[}}[[SHLLO:[0-9]+]]:[[SHLHI:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, 30 ; GCN-DAG: v_lshrrev_b32_e32 v[[ELT1PART:[0-9]+]], 2, v{{[[0-9]+}} ; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}} ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[SHLLO]], v[[ELT1PART]] +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}} -; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} +; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -113,5 +116,7 @@ define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.x() #0 + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll index 744c1c2b682be983b53cbeb2d1a21ebdbeaae786..a6026785b1739d12c36b955d5da12f23a1ee4900 100644 --- a/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll +++ b/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll @@ -9,7 +9,7 @@ ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x %ld.64 = load i64, i64 addrspace(1)* %in.gep @@ -21,10 +21,11 @@ define amdgpu_kernel void @v_uextract_bit_31_i64(i64 addrspace(1)* %out, i64 add ; Extract the high bit of the high half ; GCN-LABEL: {{^}}v_uextract_bit_63_i64: +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] -; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x @@ -42,7 +43,7 @@ define amdgpu_kernel void @v_uextract_bit_63_i64(i64 addrspace(1)* %out, i64 add ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x %ld.64 = load i64, i64 addrspace(1)* %in.gep @@ -58,7 +59,7 @@ define amdgpu_kernel void @v_uextract_bit_1_i64(i64 addrspace(1)* %out, i64 addr ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x %ld.64 = load i64, i64 addrspace(1)* %in.gep @@ -69,10 +70,11 @@ define amdgpu_kernel void @v_uextract_bit_20_i64(i64 addrspace(1)* %out, i64 add } ; GCN-LABEL: {{^}}v_uextract_bit_32_i64: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 1, [[VAL]] ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}} +; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x @@ -85,10 +87,11 @@ define amdgpu_kernel void @v_uextract_bit_32_i64(i64 addrspace(1)* %out, i64 add } ; GCN-LABEL: {{^}}v_uextract_bit_33_i64: +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 1{{$}} -; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x @@ -106,7 +109,7 @@ define amdgpu_kernel void @v_uextract_bit_33_i64(i64 addrspace(1)* %out, i64 add ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x %ld.64 = load i64, i64 addrspace(1)* %in.gep @@ -122,7 +125,7 @@ define amdgpu_kernel void @v_uextract_bit_20_21_i64(i64 addrspace(1)* %out, i64 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x %ld.64 = load i64, i64 addrspace(1)* %in.gep @@ -138,7 +141,7 @@ define amdgpu_kernel void @v_uextract_bit_1_30_i64(i64 addrspace(1)* %out, i64 a ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x %ld.64 = load i64, i64 addrspace(1)* %in.gep @@ -156,7 +159,7 @@ define amdgpu_kernel void @v_uextract_bit_1_31_i64(i64 addrspace(1)* %out, i64 a ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() + %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id.x %ld.64 = load i64, i64 addrspace(1)* %in.gep @@ -167,10 +170,11 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64(i64 addrspace(1)* %out, i64 } ; GCN-LABEL: {{^}}v_uextract_bit_32_33_i64: +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 2 -; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x @@ -183,11 +187,12 @@ define amdgpu_kernel void @v_uextract_bit_32_33_i64(i64 addrspace(1)* %out, i64 } ; GCN-LABEL: {{^}}v_uextract_bit_30_60_i64: +; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 30 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 0x3fffffff, v[[SHRLO]]{{$}} -; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[AND]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x @@ -200,10 +205,11 @@ define amdgpu_kernel void @v_uextract_bit_30_60_i64(i64 addrspace(1)* %out, i64 } ; GCN-LABEL: {{^}}v_uextract_bit_33_63_i64: +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 30 -; GCN-DAG: v_mov_b32_e32 v[[BFE:[0-9]+]], 0{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]] +; GCN: buffer_store_dwordx2 v{{\[}}[[SHIFT]]:[[ZERO1]]{{\]}} define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %id.x @@ -216,9 +222,10 @@ define amdgpu_kernel void @v_uextract_bit_33_63_i64(i64 addrspace(1)* %out, i64 } ; GCN-LABEL: {{^}}v_uextract_bit_31_63_i64: +; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] ; GCN: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 31 -; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], 0{{$}} +; GCN-NEXT: v_mov_b32_e32 v[[SHRHI]], v[[ZERO]] ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} define amdgpu_kernel void @v_uextract_bit_31_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -299,10 +306,11 @@ define amdgpu_kernel void @v_uextract_bit_31_32_i64_trunc_i32(i32 addrspace(1)* } ; GCN-LABEL: {{^}}and_not_mask_i64: -; GCN: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}} -; GCN: v_mov_b32_e32 v[[SHRHI]], 0{{$}} +; GCN-DAG: buffer_load_dwordx2 v{{\[}}[[VALLO:[0-9]+]]:[[VALHI:[0-9]+]]{{\]}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 v[[SHRHI:[0-9]+]], v[[ZERO]]{{$}} ; GCN: v_lshrrev_b32_e32 [[SHR:v[0-9]+]], 20, v[[VALLO]] -; GCN-DAG: v_and_b32_e32 v[[SHRLO]], 4, [[SHR]] +; GCN-DAG: v_and_b32_e32 v[[SHRLO:[0-9]+]], 4, [[SHR]] ; GCN-NOT: v[[SHRLO]] ; GCN-NOT: v[[SHRHI]] ; GCN: buffer_store_dwordx2 v{{\[}}[[SHRLO]]:[[SHRHI]]{{\]}} @@ -321,7 +329,7 @@ define amdgpu_kernel void @and_not_mask_i64(i64 addrspace(1)* %out, i64 addrspac ; keeping the 32-bit and has a smaller encoding size than the bfe. ; GCN-LABEL: {{^}}v_uextract_bit_27_29_multi_use_shift_i64: -; GCN: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] +; GCN-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] ; GCN-DAG: v_lshr_b64 v{{\[}}[[SHRLO:[0-9]+]]:[[SHRHI:[0-9]+]]{{\]}}, [[VAL]], 27 ; GCN-DAG: v_and_b32_e32 v[[AND:[0-9]+]], 3, v[[SHRLO]] ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} @@ -340,8 +348,8 @@ define amdgpu_kernel void @v_uextract_bit_27_29_multi_use_shift_i64(i64 addrspac } ; GCN-LABEL: {{^}}v_uextract_bit_34_37_multi_use_shift_i64: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} -; GCN: v_mov_b32_e32 v[[ZERO_SHR:[0-9]+]], 0{{$}} +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; GCN-DAG: v_mov_b32_e32 v[[ZERO_SHR:[0-9]+]], 0{{$}} ; GCN: v_mov_b32_e32 v[[ZERO_BFE:[0-9]+]], v[[ZERO_SHR]] ; GCN-DAG: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 2, [[VAL]] ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 2, 3 @@ -360,10 +368,10 @@ define amdgpu_kernel void @v_uextract_bit_34_37_multi_use_shift_i64(i64 addrspac } ; GCN-LABEL: {{^}}v_uextract_bit_33_36_use_upper_half_shift_i64: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; GCN-DAG: v_bfe_u32 v[[BFE:[0-9]+]], [[VAL]], 1, 3 ; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}} +; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:{{[0-9]+\]}} ; GCN: buffer_store_dword v[[ZERO]] define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 addrspace(1)* %out0, i32 addrspace(1)* %out1, i64 addrspace(1)* %in) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -383,5 +391,7 @@ define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(i64 add declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.x() #0 + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll b/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll new file mode 100644 index 0000000000000000000000000000000000000000..1cdfec9fdb597f9db8c9aa905ed02f3742c70568 --- /dev/null +++ b/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck %s + +; Check transformation shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) +; Only one shift if expected, GEP shall not produce a separate shift + +; CHECK-LABEL: {{^}}add_const_offset: +; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0 +; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]] +; CHECK-NOT: v_lshl +; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]] +; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: +define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) { +bb: + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %add = add i32 %id, 200 + %shl = shl i32 %add, 2 + %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %shl + %val = load i32, i32 addrspace(1)* %ptr, align 4 + store i32 %val, i32 addrspace(1)* %arg, align 4 + ret void +} + +; CHECK-LABEL: {{^}}or_const_offset: +; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0 +; CHECK: v_or_b32_e32 v[[OR:[0-9]+]], 0x1000, v[[SHL]] +; CHECK-NOT: v_lshl +; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]] +; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: +define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) { +bb: + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %add = or i32 %id, 256 + %shl = shl i32 %add, 2 + %ptr = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %shl + %val = load i32, i32 addrspace(1)* %ptr, align 4 + store i32 %val, i32 addrspace(1)* %arg, align 4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/shl.ll b/test/CodeGen/AMDGPU/shl.ll index f6520eeb4fd6913f927e861b192881791e95a167..edc313ee323bd5dfa8978f7dafae63f881ff949c 100644 --- a/test/CodeGen/AMDGPU/shl.ll +++ b/test/CodeGen/AMDGPU/shl.ll @@ -1,9 +1,11 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.r600.read.tgid.x() #0 + ;EG: {{^}}shl_v2i32: ;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -288,7 +290,7 @@ define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, i64 %a) { ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[LO_A]]{{\]}} define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.r600.read.tgid.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %a = load i64, i64 addrspace(1)* %gep.in diff --git a/test/CodeGen/AMDGPU/shl.v2i16.ll b/test/CodeGen/AMDGPU/shl.v2i16.ll index eac29bad7cf23347f307c1ef7eda250a67fe11cf..839854fd575bd711e08069722810555ba59fcc81 100644 --- a/test/CodeGen/AMDGPU/shl.v2i16.ll +++ b/test/CodeGen/AMDGPU/shl.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=CIVI %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=CIVI %s @@ -10,7 +10,7 @@ ; VI: v_lshlrev_b32_e32 ; VI: v_lshlrev_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_lshlrev_b32_e32 ; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} diff --git a/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir index 20052e865a54ea21a34d0dcca382ebe3a17e811c..18176de53793bf3801b71345ec24050c85402193 100644 --- a/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir +++ b/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -20,12 +20,10 @@ body: | ; GCN: V_ADD_I32 bb.0: liveins: %vgpr0 - successors: %bb.1 %7 = COPY %vgpr0 %8 = S_MOV_B32 0 bb.1: - successors: %bb.1, %bb.2 %0 = PHI %8, %bb.0, %0, %bb.1, %2, %bb.2 %9 = V_MOV_B32_e32 9, implicit %exec %10 = V_CMP_EQ_U32_e64 %7, %9, implicit %exec @@ -33,7 +31,6 @@ body: | S_BRANCH %bb.1 bb.2: - successors: %bb.1 SI_END_CF %1, implicit-def %exec, implicit-def %scc, implicit %exec %11 = S_MOV_B32 1 %2 = S_ADD_I32 %0, %11, implicit-def %scc diff --git a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll index cb010cf153001668fc07f29c0ab551d16b1ee04d..5b0d5274d5bc17f68050b82253cea8af31768157 100644 --- a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll +++ b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll @@ -9,7 +9,6 @@ ; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %unreachable ; GCN: ds_write_b32 ; GCN: ; divergent unreachable -; GCN: s_waitcnt ; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock ; GCN-NEXT: s_or_b64 exec, exec @@ -38,7 +37,6 @@ ret: ; GCN-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %unreachable ; GCN: ds_write_b32 ; GCN: ; divergent unreachable -; GCN: s_waitcnt ; GCN: [[RETURN]]: ; GCN-NEXT: s_or_b64 exec, exec @@ -66,7 +64,6 @@ unreachable: ; GCN: [[UNREACHABLE]]: ; GCN: ds_write_b32 -; GCN: s_waitcnt define amdgpu_kernel void @uniform_lower_control_flow_unreachable_terminator(i32 %arg0) #0 { bb: %tmp63 = icmp eq i32 %arg0, 32 diff --git a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll index 114c97b61bd40bb343f76fd3fb6ab19c8367ddbe..a57e7b13453f2ef5aeba4d90e15ed1a8e3d80a70 100644 --- a/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ b/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll @@ -25,50 +25,50 @@ ; SMEM: s_dcache_wb ; ALL: s_endpgm define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { - call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" () - call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" () - call void asm sideeffect "", "~{SGPR16_SGPR17_SGPR18_SGPR19_SGPR20_SGPR21_SGPR22_SGPR23}" () - call void asm sideeffect "", "~{SGPR24_SGPR25_SGPR26_SGPR27_SGPR28_SGPR29_SGPR30_SGPR31}" () - call void asm sideeffect "", "~{SGPR32_SGPR33_SGPR34_SGPR35_SGPR36_SGPR37_SGPR38_SGPR39}" () - call void asm sideeffect "", "~{SGPR40_SGPR41_SGPR42_SGPR43_SGPR44_SGPR45_SGPR46_SGPR47}" () - call void asm sideeffect "", "~{SGPR48_SGPR49_SGPR50_SGPR51_SGPR52_SGPR53_SGPR54_SGPR55}" () - call void asm sideeffect "", "~{SGPR56_SGPR57_SGPR58_SGPR59_SGPR60_SGPR61_SGPR62_SGPR63}" () - call void asm sideeffect "", "~{SGPR64_SGPR65_SGPR66_SGPR67_SGPR68_SGPR69_SGPR70_SGPR71}" () - call void asm sideeffect "", "~{SGPR72_SGPR73_SGPR74_SGPR75_SGPR76_SGPR77_SGPR78_SGPR79}" () - call void asm sideeffect "", "~{SGPR80_SGPR81_SGPR82_SGPR83_SGPR84_SGPR85_SGPR86_SGPR87}" () - call void asm sideeffect "", "~{SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95}" () - call void asm sideeffect "", "~{VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7}" () - call void asm sideeffect "", "~{VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15}" () - call void asm sideeffect "", "~{VGPR16_VGPR17_VGPR18_VGPR19_VGPR20_VGPR21_VGPR22_VGPR23}" () - call void asm sideeffect "", "~{VGPR24_VGPR25_VGPR26_VGPR27_VGPR28_VGPR29_VGPR30_VGPR31}" () - call void asm sideeffect "", "~{VGPR32_VGPR33_VGPR34_VGPR35_VGPR36_VGPR37_VGPR38_VGPR39}" () - call void asm sideeffect "", "~{VGPR40_VGPR41_VGPR42_VGPR43_VGPR44_VGPR45_VGPR46_VGPR47}" () - call void asm sideeffect "", "~{VGPR48_VGPR49_VGPR50_VGPR51_VGPR52_VGPR53_VGPR54_VGPR55}" () - call void asm sideeffect "", "~{VGPR56_VGPR57_VGPR58_VGPR59_VGPR60_VGPR61_VGPR62_VGPR63}" () - call void asm sideeffect "", "~{VGPR64_VGPR65_VGPR66_VGPR67_VGPR68_VGPR69_VGPR70_VGPR71}" () - call void asm sideeffect "", "~{VGPR72_VGPR73_VGPR74_VGPR75_VGPR76_VGPR77_VGPR78_VGPR79}" () - call void asm sideeffect "", "~{VGPR80_VGPR81_VGPR82_VGPR83_VGPR84_VGPR85_VGPR86_VGPR87}" () - call void asm sideeffect "", "~{VGPR88_VGPR89_VGPR90_VGPR91_VGPR92_VGPR93_VGPR94_VGPR95}" () - call void asm sideeffect "", "~{VGPR96_VGPR97_VGPR98_VGPR99_VGPR100_VGPR101_VGPR102_VGPR103}" () - call void asm sideeffect "", "~{VGPR104_VGPR105_VGPR106_VGPR107_VGPR108_VGPR109_VGPR110_VGPR111}" () - call void asm sideeffect "", "~{VGPR112_VGPR113_VGPR114_VGPR115_VGPR116_VGPR117_VGPR118_VGPR119}" () - call void asm sideeffect "", "~{VGPR120_VGPR121_VGPR122_VGPR123_VGPR124_VGPR125_VGPR126_VGPR127}" () - call void asm sideeffect "", "~{VGPR128_VGPR129_VGPR130_VGPR131_VGPR132_VGPR133_VGPR134_VGPR135}" () - call void asm sideeffect "", "~{VGPR136_VGPR137_VGPR138_VGPR139_VGPR140_VGPR141_VGPR142_VGPR143}" () - call void asm sideeffect "", "~{VGPR144_VGPR145_VGPR146_VGPR147_VGPR148_VGPR149_VGPR150_VGPR151}" () - call void asm sideeffect "", "~{VGPR152_VGPR153_VGPR154_VGPR155_VGPR156_VGPR157_VGPR158_VGPR159}" () - call void asm sideeffect "", "~{VGPR160_VGPR161_VGPR162_VGPR163_VGPR164_VGPR165_VGPR166_VGPR167}" () - call void asm sideeffect "", "~{VGPR168_VGPR169_VGPR170_VGPR171_VGPR172_VGPR173_VGPR174_VGPR175}" () - call void asm sideeffect "", "~{VGPR176_VGPR177_VGPR178_VGPR179_VGPR180_VGPR181_VGPR182_VGPR183}" () - call void asm sideeffect "", "~{VGPR184_VGPR185_VGPR186_VGPR187_VGPR188_VGPR189_VGPR190_VGPR191}" () - call void asm sideeffect "", "~{VGPR192_VGPR193_VGPR194_VGPR195_VGPR196_VGPR197_VGPR198_VGPR199}" () - call void asm sideeffect "", "~{VGPR200_VGPR201_VGPR202_VGPR203_VGPR204_VGPR205_VGPR206_VGPR207}" () - call void asm sideeffect "", "~{VGPR208_VGPR209_VGPR210_VGPR211_VGPR212_VGPR213_VGPR214_VGPR215}" () - call void asm sideeffect "", "~{VGPR216_VGPR217_VGPR218_VGPR219_VGPR220_VGPR221_VGPR222_VGPR223}" () - call void asm sideeffect "", "~{VGPR224_VGPR225_VGPR226_VGPR227_VGPR228_VGPR229_VGPR230_VGPR231}" () - call void asm sideeffect "", "~{VGPR232_VGPR233_VGPR234_VGPR235_VGPR236_VGPR237_VGPR238_VGPR239}" () - call void asm sideeffect "", "~{VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247}" () - call void asm sideeffect "", "~{VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255}" () + call void asm sideeffect "", "~{s[0:7]}" () + call void asm sideeffect "", "~{s[8:15]}" () + call void asm sideeffect "", "~{s[16:23]}" () + call void asm sideeffect "", "~{s[24:31]}" () + call void asm sideeffect "", "~{s[32:39]}" () + call void asm sideeffect "", "~{s[40:47]}" () + call void asm sideeffect "", "~{s[48:55]}" () + call void asm sideeffect "", "~{s[56:63]}" () + call void asm sideeffect "", "~{s[64:71]}" () + call void asm sideeffect "", "~{s[72:79]}" () + call void asm sideeffect "", "~{s[80:87]}" () + call void asm sideeffect "", "~{s[88:95]}" () + call void asm sideeffect "", "~{v[0:7]}" () + call void asm sideeffect "", "~{v[8:15]}" () + call void asm sideeffect "", "~{v[16:23]}" () + call void asm sideeffect "", "~{v[24:31]}" () + call void asm sideeffect "", "~{v[32:39]}" () + call void asm sideeffect "", "~{v[40:47]}" () + call void asm sideeffect "", "~{v[48:55]}" () + call void asm sideeffect "", "~{v[56:63]}" () + call void asm sideeffect "", "~{v[64:71]}" () + call void asm sideeffect "", "~{v[72:79]}" () + call void asm sideeffect "", "~{v[80:87]}" () + call void asm sideeffect "", "~{v[88:95]}" () + call void asm sideeffect "", "~{v[96:103]}" () + call void asm sideeffect "", "~{v[104:111]}" () + call void asm sideeffect "", "~{v[112:119]}" () + call void asm sideeffect "", "~{v[120:127]}" () + call void asm sideeffect "", "~{v[128:135]}" () + call void asm sideeffect "", "~{v[136:143]}" () + call void asm sideeffect "", "~{v[144:151]}" () + call void asm sideeffect "", "~{v[152:159]}" () + call void asm sideeffect "", "~{v[160:167]}" () + call void asm sideeffect "", "~{v[168:175]}" () + call void asm sideeffect "", "~{v[176:183]}" () + call void asm sideeffect "", "~{v[184:191]}" () + call void asm sideeffect "", "~{v[192:199]}" () + call void asm sideeffect "", "~{v[200:207]}" () + call void asm sideeffect "", "~{v[208:215]}" () + call void asm sideeffect "", "~{v[216:223]}" () + call void asm sideeffect "", "~{v[224:231]}" () + call void asm sideeffect "", "~{v[232:239]}" () + call void asm sideeffect "", "~{v[240:247]}" () + call void asm sideeffect "", "~{v[248:255]}" () store i32 %in, i32 addrspace(1)* %out ret void diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll index 3f53572ab44033373743f8be476e73566fbb53e1..ea8b87f1dee238cd8afc6d8fdfd26c9f9603de4c 100644 --- a/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -79,7 +79,7 @@ define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { call void @llvm.AMDGPU.kill(float %x) - %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() + %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() call void @llvm.AMDGPU.kill(float %y) ret void } @@ -128,7 +128,7 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) br label %exit @@ -186,11 +186,11 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() - %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"() + v_nop_e64", "={v7}"() + %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() call void @llvm.AMDGPU.kill(float %var) store volatile float %live.across, float addrspace(1)* undef - %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"() + %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() br label %exit exit: @@ -242,7 +242,7 @@ bb: v_nop_e64 v_nop_e64 v_nop_e64 - v_nop_e64", "={VGPR7}"() + v_nop_e64", "={v7}"() call void @llvm.AMDGPU.kill(float %var) %vgpr = load volatile i32, i32 addrspace(1)* undef %loop.cond = icmp eq i32 %vgpr, 0 diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll index 4e093cdece212b41a23ba44d38e673a701dd1ca9..a9aac2d8abb75e12e973c2a1b3a06797c48c8c19 100644 --- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s @@ -10,11 +10,11 @@ ; VI: v_sub_i32_e32 ; VI-DAG: v_sub_i32_e32 -; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; VI: v_add_i32_e32 ; VI: v_add_i32_e32 -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_sub_i32_e32 ; CI-DAG: v_sub_i32_e32 @@ -40,13 +40,14 @@ define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> % ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 +; VI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 ; VI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; VI: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}} -; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}} +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[TWO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_and_b32 ; VI: v_or_b32_e32 define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 { @@ -206,7 +207,7 @@ define amdgpu_kernel void @v_min_max_v2i16_user(<2 x i16> addrspace(1)* %out0, < } ; GCN-LABEL: {{^}}u_min_max_v2i16: -; GFX9: v_pk_max_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} +; GFX9: v_pk_max_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_min_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @u_min_max_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %val0, <2 x i16> %val1) nounwind { %cond0 = icmp ugt <2 x i16> %val0, %val1 diff --git a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll index 343211b0219cc8f235b679935d4cd38862b5ab07..333113e8a9b67d7e8092a51104eab3b40e4c55c6 100644 --- a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -5,7 +5,7 @@ ; GCN-FUNC: {{^}}vccz_workaround: ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0 ; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}} -; GCN: s_waitcnt lgkmcnt(0) +; VCCZ-BUG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VCCZ-BUG: s_mov_b64 vcc, vcc ; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]] diff --git a/test/CodeGen/AMDGPU/spill-m0.ll b/test/CodeGen/AMDGPU/spill-m0.ll index 0e715c453209e7d5edc9c7ed7143f1110e4c4dd4..7e8fa118c2c22f3678ecd360252b4919ff8c16b0 100644 --- a/test/CodeGen/AMDGPU/spill-m0.ll +++ b/test/CodeGen/AMDGPU/spill-m0.ll @@ -18,13 +18,11 @@ ; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 ; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]] ; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Spill -; TOVMEM: s_waitcnt vmcnt(0) ; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 ; TOSMEM: s_add_u32 m0, s3, 0x100{{$}} ; TOSMEM-NOT: [[M0_COPY]] ; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill -; TOSMEM: s_waitcnt lgkmcnt(0) ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] @@ -69,19 +67,20 @@ endif: ; TOSMEM-NOT: s_m0 ; TOSMEM: s_add_u32 m0, s7, 0x100 ; TOSMEM-NEXT: s_buffer_store_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 4-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it +; FIXME-TOSMEM-NOT: m0 -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s7, 0x200 ; TOSMEM: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_mov_b64 exec, ; TOSMEM: s_cbranch_execz ; TOSMEM: s_branch ; TOSMEM: BB{{[0-9]+_[0-9]+}}: -; TOSMEM-NEXT: s_add_u32 m0, s7, 0x200 +; TOSMEM: s_add_u32 m0, s7, 0x200 ; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload @@ -130,7 +129,7 @@ endif: ; preds = %else, %if ; TOSMEM: s_branch ; TOSMEM: BB{{[0-9]+_[0-9]+}}: -; TOSMEM-NEXT: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload ; GCN-NOT: v_readlane_b32 m0 @@ -159,13 +158,14 @@ endif: ; GCN-LABEL: {{^}}restore_m0_lds: ; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] ; TOSMEM: s_cmp_eq_u32 -; TOSMEM-NOT: m0 +; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s3, 0x300 ; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_cbranch_scc1 ; TOSMEM: s_mov_b32 m0, -1 @@ -178,10 +178,10 @@ endif: ; TOSMEM: ds_write_b64 -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_add_u32 m0, s3, 0x300 ; TOSMEM: s_buffer_load_dword s0, s[88:91], m0 ; 4-byte Folded Reload -; TOSMEM-NOT: m0 +; FIXME-TOSMEM-NOT: m0 ; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM-NOT: m0 ; TOSMEM: s_mov_b32 m0, s0 diff --git a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index c05021a91ff059b6f099f10b4538b1305304d5a9..a23461a0a51489274d1b143b830164271c310a5b 100644 --- a/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -20,13 +20,13 @@ entry: %a = load <1280 x i32>, <1280 x i32> addrspace(1)* %aptr ; mark most VGPR registers as used to increase register pressure - call void asm sideeffect "", "~{VGPR4},~{VGPR8},~{VGPR12},~{VGPR16},~{VGPR20},~{VGPR24},~{VGPR28},~{VGPR32}" () - call void asm sideeffect "", "~{VGPR36},~{VGPR40},~{VGPR44},~{VGPR48},~{VGPR52},~{VGPR56},~{VGPR60},~{VGPR64}" () - call void asm sideeffect "", "~{VGPR68},~{VGPR72},~{VGPR76},~{VGPR80},~{VGPR84},~{VGPR88},~{VGPR92},~{VGPR96}" () - call void asm sideeffect "", "~{VGPR100},~{VGPR104},~{VGPR108},~{VGPR112},~{VGPR116},~{VGPR120},~{VGPR124},~{VGPR128}" () - call void asm sideeffect "", "~{VGPR132},~{VGPR136},~{VGPR140},~{VGPR144},~{VGPR148},~{VGPR152},~{VGPR156},~{VGPR160}" () - call void asm sideeffect "", "~{VGPR164},~{VGPR168},~{VGPR172},~{VGPR176},~{VGPR180},~{VGPR184},~{VGPR188},~{VGPR192}" () - call void asm sideeffect "", "~{VGPR196},~{VGPR200},~{VGPR204},~{VGPR208},~{VGPR212},~{VGPR216},~{VGPR220},~{VGPR224}" () + call void asm sideeffect "", "~{v4},~{v8},~{v12},~{v16},~{v20},~{v24},~{v28},~{v32}" () + call void asm sideeffect "", "~{v36},~{v40},~{v44},~{v48},~{v52},~{v56},~{v60},~{v64}" () + call void asm sideeffect "", "~{v68},~{v72},~{v76},~{v80},~{v84},~{v88},~{v92},~{v96}" () + call void asm sideeffect "", "~{v100},~{v104},~{v108},~{v112},~{v116},~{v120},~{v124},~{v128}" () + call void asm sideeffect "", "~{v132},~{v136},~{v140},~{v144},~{v148},~{v152},~{v156},~{v160}" () + call void asm sideeffect "", "~{v164},~{v168},~{v172},~{v176},~{v180},~{v184},~{v188},~{v192}" () + call void asm sideeffect "", "~{v196},~{v200},~{v204},~{v208},~{v212},~{v216},~{v220},~{v224}" () %outptr = getelementptr <1280 x i32>, <1280 x i32> addrspace(1)* %out, i32 %tid store <1280 x i32> %a, <1280 x i32> addrspace(1)* %outptr diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll index b4355b76016a1ba380c35f010e94ab904928ff81..44cfdf6398aef42e82d1b61b1142a4cce7578e5e 100644 --- a/test/CodeGen/AMDGPU/sra.ll +++ b/test/CodeGen/AMDGPU/sra.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #0 diff --git a/test/CodeGen/AMDGPU/srem.ll b/test/CodeGen/AMDGPU/srem.ll index c89f798397ae60c7321ed9dd654b34b29eebc346..e067258920892b8a1f4f14124412f88ac2627c31 100644 --- a/test/CodeGen/AMDGPU/srem.ll +++ b/test/CodeGen/AMDGPU/srem.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* ; FUNC-LABEL: {{^}}srem_i32_7: ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493 -; SI: v_mul_hi_i32 {{v[0-9]+}}, [[MAGIC]], +; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]] ; SI: v_mul_lo_i32 ; SI: v_sub_i32 ; SI: s_endpgm diff --git a/test/CodeGen/AMDGPU/srl.ll b/test/CodeGen/AMDGPU/srl.ll index 1daf4bb33e819e2791bb66c3de49a22605dde420..cb40ecf2de1ca4493e01f98bc4f58311338b68e7 100644 --- a/test/CodeGen/AMDGPU/srl.ll +++ b/test/CodeGen/AMDGPU/srl.ll @@ -201,7 +201,8 @@ define amdgpu_kernel void @s_lshr_32_i64(i64 addrspace(1)* %out, i64 %a) { ; GCN-LABEL: {{^}}v_lshr_32_i64: ; GCN-DAG: buffer_load_dword v[[HI_A:[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 v[[VHI1:[0-9]+]], 0{{$}} +; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], v[[VHI1]]{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[HI_A]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %tid = call i32 @llvm.r600.read.tidig.x() #0 diff --git a/test/CodeGen/AMDGPU/store-global.ll b/test/CodeGen/AMDGPU/store-global.ll index 160e921fc075995ddad68eeb0ca8460864782600..f61e524ee2e556d37a916fbce236ed2de15c5c45 100644 --- a/test/CodeGen/AMDGPU/store-global.ll +++ b/test/CodeGen/AMDGPU/store-global.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}store_i1: ; EG: MEM_RAT MSKOR diff --git a/test/CodeGen/AMDGPU/store-private.ll b/test/CodeGen/AMDGPU/store-private.ll index ab73ada370ea0a6700150aef860dcff5939eea1f..ce7656adc0b452a5dc70dac7b31a25b35c2fc787 100644 --- a/test/CodeGen/AMDGPU/store-private.ll +++ b/test/CodeGen/AMDGPU/store-private.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}store_i1: ; EG: MOVA_INT diff --git a/test/CodeGen/AMDGPU/sub.i16.ll b/test/CodeGen/AMDGPU/sub.i16.ll index ada72140563392cdff5e4d4a78d313b95dfca957..1d407ea9bcda648af3a89a558c0ecef83cfef868 100644 --- a/test/CodeGen/AMDGPU/sub.i16.ll +++ b/test/CodeGen/AMDGPU/sub.i16.ll @@ -85,10 +85,10 @@ define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i1 ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64: +; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] ; VI-DAG: v_subrev_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]] -; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 ; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/test/CodeGen/AMDGPU/sub.ll b/test/CodeGen/AMDGPU/sub.ll index f366029fdea27bae180dbd6ab8e226e7d4e953ff..e7655df1552041bf75fed0c3ff58d30d49a62c0d 100644 --- a/test/CodeGen/AMDGPU/sub.ll +++ b/test/CodeGen/AMDGPU/sub.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() readnone diff --git a/test/CodeGen/AMDGPU/sub.v2i16.ll b/test/CodeGen/AMDGPU/sub.v2i16.ll index 69f0accef6282605e0542fe99f7165a603a89e02..ee923e2b8b611cf7bfe44d139faacf5fc4cdc9e4 100644 --- a/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -1,11 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx901 -mattr=-flat-for-global -verify-machineinstrs -enable-packed-inlinable-literals < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16: ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI: v_subrev_u16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0, <2 x i16> addrspace(1)* %in1) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i ; GFX9: s_load_dword [[VAL0:s[0-9]+]] ; GFX9: s_load_dword [[VAL1:s[0-9]+]] ; GFX9: v_mov_b32_e32 [[VVAL1:v[0-9]+]] -; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[VVAL1]], [[VAL0]] +; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[VAL0]], [[VVAL1]] ; VI: s_sub_i32 ; VI: s_sub_i32 @@ -47,7 +47,7 @@ define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, < ; FIXME: VI should not scalarize arg access. ; GCN-LABEL: {{^}}s_test_sub_v2i16_kernarg: -; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} +; GFX9: v_pk_sub_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; VI: v_subrev_i32_e32 ; VI: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD @@ -59,9 +59,10 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out ; GCN-LABEL: {{^}}v_test_sub_v2i16_constant: ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0x1c8007b{{$}} -; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}} +; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] -; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xfffffe38, v{{[0-9]+}} +; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfffffe38 +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0xffffff85, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -76,9 +77,10 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16_neg_constant: ; GFX9: s_mov_b32 [[CONST:s[0-9]+]], 0xfc21fcb3{{$}} -; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[CONST]], v{{[0-9]+}} +; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[CONST]] -; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x3df, v{{[0-9]+}} +; VI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3df +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 0x34d, v{{[0-9]+}} define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -93,11 +95,11 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)* ; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_neg1: ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, -1{{$}} +; VI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 ; VI: flat_load_ushort [[LOAD0:v[0-9]+]] ; VI: flat_load_ushort [[LOAD1:v[0-9]+]] -; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD0]] +; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD0]], [[ONE]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, 1, [[LOAD1]] -; VI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -111,7 +113,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)* ; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_lo_zero_hi: ; GFX9: s_mov_b32 [[K:s[0-9]+]], 32{{$}} -; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}} +; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; VI-NOT: v_subrev_i16 ; VI: v_add_u16_e32 v{{[0-9]+}}, 0xffffffe0, v{{[0-9]+}} @@ -131,12 +133,12 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac ; The high element gives fp ; GCN-LABEL: {{^}}v_test_sub_v2i16_inline_fp_split: ; GFX9: s_mov_b32 [[K:s[0-9]+]], 1.0 -; GFX9: v_pk_sub_i16 v{{[0-9]+}}, [[K]], v{{[0-9]+}}{{$}} +; GFX9: v_pk_sub_i16 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; VI-NOT: v_subrev_i16 -; VI: v_add_u16_e32 v{{[0-9]+}}, 0xffffc080, v{{[0-9]+}} +; VI: v_mov_b32_e32 [[K:v[0-9]+]], 0xffffc080 +; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[K]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_subrev_i16 -; VI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, ; VI: v_or_b32_e32 define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in0) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -185,10 +187,10 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1) ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16_zext_to_v2i64: +; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: flat_load_dword [[A:v[0-9]+]] ; GFX9: flat_load_dword [[B:v[0-9]+]] -; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] ; GFX9-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]] @@ -199,8 +201,6 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1) ; VI: flat_load_ushort v[[B_LO:[0-9]+]] ; VI: flat_load_ushort v[[B_HI:[0-9]+]] -; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} -; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; VI-DAG: v_subrev_u16_e32 ; VI-DAG: v_subrev_u16_e32 @@ -252,7 +252,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1) ; GFX9: v_pk_sub_i16 ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} -; VI: v_subrev_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI: v_sub_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI: v_subrev_u16_e32 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16 diff --git a/test/CodeGen/AMDGPU/subreg-intervals.mir b/test/CodeGen/AMDGPU/subreg-intervals.mir index c477fe9bc6d348bf4afe080816844a655cb9d286..62816da25b2c4366e8ce93c6567c96ddea5b8f28 100644 --- a/test/CodeGen/AMDGPU/subreg-intervals.mir +++ b/test/CodeGen/AMDGPU/subreg-intervals.mir @@ -31,17 +31,14 @@ registers: - { id: 0, class: sreg_64 } body: | bb.0: - successors: %bb.1, %bb.2 S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc S_BRANCH %bb.2 bb.1: - successors: %bb.3 S_NOP 0, implicit-def undef %0.sub0 S_BRANCH %bb.3 bb.2: - successors: %bb.3 S_NOP 0, implicit-def %0 S_BRANCH %bb.3 diff --git a/test/CodeGen/AMDGPU/subreg_interference.mir b/test/CodeGen/AMDGPU/subreg_interference.mir index 24d06a576c2a4c6bc1bb1e7a0d4d939e2aa60c69..6fc22c8d189f00fc797d605004dddc96ae7642be 100644 --- a/test/CodeGen/AMDGPU/subreg_interference.mir +++ b/test/CodeGen/AMDGPU/subreg_interference.mir @@ -1,4 +1,12 @@ # RUN: llc -o - %s -mtriple=amdgcn--amdhsa -verify-machineinstrs -run-pass=greedy,virtregrewriter | FileCheck %s +--- | + + define amdgpu_kernel void @func0() { + ret void + } + +... + --- # We should not detect any interference between v0/v1 here and only allocate # sgpr0-sgpr3. diff --git a/test/CodeGen/AMDGPU/trap.ll b/test/CodeGen/AMDGPU/trap.ll index 77ad895d0e86a18b03b0e6a1f674f258e963d299..51771c9723e00e7102644b77e9937be145e06518 100644 --- a/test/CodeGen/AMDGPU/trap.ll +++ b/test/CodeGen/AMDGPU/trap.ll @@ -80,4 +80,25 @@ define amdgpu_kernel void @trap() { ret void } +; GCN-LABEL: {{^}}non_entry_trap: +; TRAP-BIT: enable_trap_handler = 1 +; NO-TRAP-BIT: enable_trap_handler = 0 + +; HSA: BB{{[0-9]_[0-9]+]]: ; %trap +; HSA-TRAP: s_mov_b64 s[0:1], s[4:5] +; HSA-TRAP-NEXT: s_trap 2 +define amdgpu_kernel void @non_entry_trap(i32 addrspace(1)* nocapture readonly %arg0) local_unnamed_addr #1 { +entry: + %tmp29 = load volatile i32, i32 addrspace(1)* %arg0 + %cmp = icmp eq i32 %tmp29, -1 + br i1 %cmp, label %ret, label %trap + +trap: + call void @llvm.trap() + unreachable + +ret: + ret void +} + attributes #0 = { nounwind noreturn } diff --git a/test/CodeGen/AMDGPU/udiv.ll b/test/CodeGen/AMDGPU/udiv.ll index 2874a0cdbc05f81e68d6620be5c694fd28b4b3f6..d9dab0d40acf652702675959cd9be2531c891528 100644 --- a/test/CodeGen/AMDGPU/udiv.ll +++ b/test/CodeGen/AMDGPU/udiv.ll @@ -74,7 +74,7 @@ define amdgpu_kernel void @udiv_i32_div_pow2(i32 addrspace(1)* %out, i32 addrspa ; FUNC-LABEL: {{^}}udiv_i32_div_k_even: ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfabbd9c1 -; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[K]], [[VAL]] +; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[VAL]], [[K]] ; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 25, [[MULHI]] ; SI: buffer_store_dword [[RESULT]] define amdgpu_kernel void @udiv_i32_div_k_even(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -88,7 +88,7 @@ define amdgpu_kernel void @udiv_i32_div_k_even(i32 addrspace(1)* %out, i32 addrs ; FUNC-LABEL: {{^}}udiv_i32_div_k_odd: ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] ; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7d5deca3 -; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[K]], [[VAL]] +; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[VAL]], [[K]] ; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 24, [[MULHI]] ; SI: buffer_store_dword [[RESULT]] define amdgpu_kernel void @udiv_i32_div_k_odd(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -176,7 +176,7 @@ define amdgpu_kernel void @test_udiv2(i32 %p) { ; FUNC-LABEL: {{^}}test_udiv_3_mulhu: ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab -; SI: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}} +; SI: v_mul_hi_u32 v0, {{s[0-9]+}}, {{v[0-9]+}} ; SI-NEXT: v_lshrrev_b32_e32 v0, 1, v0 define amdgpu_kernel void @test_udiv_3_mulhu(i32 %p) { %i = udiv i32 %p, 3 diff --git a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll index 3e80fcf85b529b1dc88117468c3408e1a02a3cac..1e08f51dabde2aa6533faf2ecdef9d2465a63e4c 100644 --- a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -73,14 +73,14 @@ bb11: ; preds = %bb9 ; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}} define amdgpu_kernel void @partially_undef_copy() #0 { - %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={VGPR5}"() - %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={VGPR6}"() + %tmp0 = call i32 asm sideeffect "v_mov_b32_e32 v5, 5", "={v5}"() + %tmp1 = call i32 asm sideeffect "v_mov_b32_e32 v6, 6", "={v6}"() %partially.undef.0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0 %partially.undef.1 = insertelement <4 x i32> %partially.undef.0, i32 %tmp1, i32 0 store volatile <4 x i32> %partially.undef.1, <4 x i32> addrspace(1)* undef, align 16 - tail call void asm sideeffect "v_nop", "v={VGPR5_VGPR6_VGPR7_VGPR8}"(<4 x i32> %partially.undef.0) + tail call void asm sideeffect "v_nop", "v={v[5:8]}"(<4 x i32> %partially.undef.0) ret void } diff --git a/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll index e0067f9f45acc5988de1b067085b2f41335e388c..8a08f9d8bb0d78964c0c24f7f4927547ee9fba70 100644 --- a/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll +++ b/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll @@ -7,11 +7,11 @@ ; CHECK: s_and_saveexec_b64 ; CHECK-NEXT: s_xor_b64 ; CHECK-NEXT: ; mask branch - +; CHECK-NEXT: s_cbranch_execz BB{{[0-9]+_[0-9]+}} ; CHECK-NEXT: BB{{[0-9]+_[0-9]+}}: ; %loop_body.preheader ; CHECK: [[LOOP_BODY_LABEL:BB[0-9]+_[0-9]+]]: -; CHECK: s_cbranch_scc0 [[LOOP_BODY_LABEL]] +; CHECK: s_cbranch_vccz [[LOOP_BODY_LABEL]] ; CHECK: s_endpgm define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <2 x i32> %addr.base, i32 %y, i32 %p) { diff --git a/test/CodeGen/AMDGPU/unknown-processor.ll b/test/CodeGen/AMDGPU/unknown-processor.ll index 25a700a943d2aa0b4a0bab9bc459ac483e2ff924..e25f2235993f54b18193f36914e22266e3a291f4 100644 --- a/test/CodeGen/AMDGPU/unknown-processor.ll +++ b/test/CodeGen/AMDGPU/unknown-processor.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=unknown < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s -; RUN: llc -march=r600 -mcpu=unknown < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s +; RUN: llc -march=amdgcn -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s +; RUN: llc -march=r600 -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s ; Should not crash when the processor is not recognized and the ; wavefront size feature not set. diff --git a/test/CodeGen/AMDGPU/urem.ll b/test/CodeGen/AMDGPU/urem.ll index fd7f8fa2efab573d47445461e607bf822afa6dc7..fb4eab43a2d66a660075688d6008e0ca8f565e71 100644 --- a/test/CodeGen/AMDGPU/urem.ll +++ b/test/CodeGen/AMDGPU/urem.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1 ; FUNC-LABEL: {{^}}test_urem_i32_7: ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925 -; SI: v_mul_hi_u32 {{v[0-9]+}}, [[MAGIC]] +; SI: v_mul_hi_u32 [[MAGIC]], {{v[0-9]+}} ; SI: v_subrev_i32 ; SI: v_mul_lo_i32 ; SI: v_sub_i32 diff --git a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll index f8e6b7edfe3583fc3d0d1bbc177a5143f9ca9485..e6bdb68a4f775a9b97d282b1166511fdae0d4a14 100644 --- a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll +++ b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll @@ -54,8 +54,8 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace( ; VI: buffer_load_dword [[VA0:v[0-9]+]] ; VI: buffer_load_dword [[VA1:v[0-9]+]] -; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]] -; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]] +; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SA]], [[VA0]], [[VB]] +; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SA]], [[VA1]], [[VB]] ; GCN: buffer_store_dword [[RESULT0]] ; GCN: buffer_store_dword [[RESULT1]] define amdgpu_kernel void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 { @@ -74,7 +74,7 @@ define amdgpu_kernel void @test_use_s_v_s(float addrspace(1)* %out, float %a, fl ; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c ; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1 @@ -88,7 +88,7 @@ define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace( ; VI-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c ; VI-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]] -; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]] +; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]] ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 { %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1 @@ -228,7 +228,7 @@ define amdgpu_kernel void @test_literal_use_twice_ternary_op_s_k_k_x2(float addr ; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000 ; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]] -; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VS1]], [[SGPR0]], [[VK0]] +; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]] ; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000 ; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]] @@ -251,7 +251,7 @@ define amdgpu_kernel void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]] ; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]] -; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}} +; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[SGPR0]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}} ; Same zero component is re-used for half of each immediate. ; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000 diff --git a/test/CodeGen/AMDGPU/v_mac_f16.ll b/test/CodeGen/AMDGPU/v_mac_f16.ll index c45af522ec49bc8e80a1641555dc80adaecb45cb..ce4a69db350607300d857220fb26c61db8276ba7 100644 --- a/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -304,14 +304,14 @@ entry: ; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]] -; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] -; SI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] -; SI-DAG: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; SI: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] +; SI: v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]] +; SI: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]] +; SI: v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]] +; SI: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] ; SI: v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]] +; SI-DAG: v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]] ; SI-DAG: v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]] ; SI-DAG: v_cvt_f16_f32_e32 v[[R_F16_LO:[0-9]+]], v[[C_F32_0]] ; SI-DAG: v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]] @@ -320,12 +320,12 @@ entry: ; VI-NOT: and ; SI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] -; VI-DAG: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]] -; VI-DAG: v_mac_f16_sdwa v[[A_F16_1]], v[[C_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; VI-DAG: v_mac_f16_e32 v[[A_V2_F16]], v[[C_V2_F16]], v[[B_V2_F16]] -; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[A_F16_1]] +; VI-DAG: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]] +; VI-DAG: v_mac_f16_sdwa v[[C_F16_1]], v[[A_V2_F16]], v[[B_V2_F16]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-DAG: v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]] +; VI-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]] ; VI-NOT: and -; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[A_V2_F16]] +; VI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[C_V2_F16]] ; GCN: {{buffer|flat}}_store_dword v[[R_V2_F16]] ; GCN: s_endpgm @@ -336,7 +336,9 @@ define amdgpu_kernel void @mac_v2f16( <2 x half> addrspace(1)* %c) #0 { entry: %a.val = load <2 x half>, <2 x half> addrspace(1)* %a + call void @llvm.amdgcn.s.barrier() #2 %b.val = load <2 x half>, <2 x half> addrspace(1)* %b + call void @llvm.amdgcn.s.barrier() #2 %c.val = load <2 x half>, <2 x half> addrspace(1)* %c %t.val = fmul <2 x half> %a.val, %b.val @@ -482,9 +484,10 @@ entry: ; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] ; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]] -; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} -; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} -; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; VI-DAG: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} +; VI-DAG: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]] ; GCN: s_endpgm @@ -513,9 +516,10 @@ entry: ; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} ; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}} -; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} +; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} -; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-DAG: v_mac_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-DAG: v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}} ; GCN: s_endpgm @@ -544,8 +548,9 @@ entry: ; SI-DAG: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} ; SI-DAG: v_mac_f32_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}} -; VI: v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}} +; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI: v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}} +; VI: v_sub_f16_sdwa v[[NEG_A0:[0-9]+]], [[ZERO]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-DAG: v_mac_f16_sdwa v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; VI-DAG: v_mac_f16_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}} @@ -667,5 +672,8 @@ entry: ret void } +declare void @llvm.amdgcn.s.barrier() #2 + attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" } attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" } +attributes #2 = { nounwind convergent } diff --git a/test/CodeGen/AMDGPU/v_madak_f16.ll b/test/CodeGen/AMDGPU/v_madak_f16.ll index bfb10503aaea211b7420b6a06c31df993753a92b..0148ff470b783586d1f0f57f029b2da579723c42 100644 --- a/test/CodeGen/AMDGPU/v_madak_f16.ll +++ b/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}madak_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] -; VI: v_madak_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], 0x4900{{$}} +; VI: v_madak_f16 v[[R_F16:[0-9]+]], v[[A_F16]], v[[B_F16]], 0x4900{{$}} ; VI: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @madak_f16( diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll index aad260c3e3690a264914d59274105737343a07d5..a67f36d0a7e8ae6dcfca68569038fe25de4ff866 100644 --- a/test/CodeGen/AMDGPU/valu-i1.ll +++ b/test/CodeGen/AMDGPU/valu-i1.ll @@ -11,7 +11,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; SI: v_cmp_lt_i32_e32 vcc, 0, ; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc ; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]] -; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]] ; SI-NEXT: s_cbranch_execz [[FLOW_BB]] @@ -72,7 +71,6 @@ end: ; SI-NEXT: BB{{[0-9]+_[0-9]+}}: ; SI: buffer_store_dword -; SI-NEXT: s_waitcnt ; SI-NEXT: {{^}}[[EXIT]]: ; SI: s_or_b64 exec, exec, [[BR_SREG]] @@ -101,7 +99,6 @@ exit: ; SI-NEXT: BB{{[0-9]+_[0-9]+}}: ; SI: buffer_store_dword -; SI-NEXT: s_waitcnt ; SI-NEXT: {{^}}[[EXIT]]: ; SI: s_or_b64 exec, exec, [[BR_SREG]] @@ -132,7 +129,6 @@ exit: ; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %exit ; SI: ds_write_b32 -; SI: s_waitcnt ; SI-NEXT: {{^}}[[FLOW]]: ; SI-NEXT: s_or_saveexec_b64 @@ -140,8 +136,8 @@ exit: ; SI-NEXT: ; mask branch [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]] ; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %then -; SI: buffer_store_dword -; SI-NEXT: s_waitcnt +; SI: s_waitcnt +; SI-NEXT: buffer_store_dword ; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock ; SI: s_or_b64 exec, exec @@ -172,8 +168,8 @@ exit: ; SI: [[LABEL_LOOP:BB[0-9]+_[0-9]+]]: ; SI: buffer_load_dword ; SI-DAG: buffer_store_dword -; SI-DAG: s_cmpk_eq_i32 s{{[0-9]+}}, 0x100 -; SI: s_cbranch_scc0 [[LABEL_LOOP]] +; SI-DAG: v_cmp_eq_u32_e32 vcc, 0x100 +; SI: s_cbranch_vccz [[LABEL_LOOP]] ; SI: [[LABEL_EXIT]]: ; SI: s_endpgm diff --git a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir index 5e5465800c3a3c68ab4ebc044b1b27561d57fa32..6eb937e71b1b62b9aeb81be3ad939b560c3de541 100644 --- a/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir +++ b/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -75,7 +75,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.entry: - successors: %bb.2.if, %bb.1.else liveins: %sgpr0_sgpr1 %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`) @@ -86,7 +85,6 @@ body: | S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc bb.2.if: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 9, implicit %exec @@ -95,7 +93,6 @@ body: | S_BRANCH %bb.3.done bb.1.else: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 100, implicit %exec @@ -141,7 +138,6 @@ frameInfo: hasMustTailInVarArgFunc: false body: | bb.0.entry: - successors: %bb.2.if, %bb.1.else liveins: %sgpr0_sgpr1 %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) @@ -150,7 +146,6 @@ body: | S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc bb.2.if: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 9, implicit %exec @@ -159,7 +154,6 @@ body: | S_BRANCH %bb.3.done bb.1.else: - successors: %bb.3.done liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 %vgpr0 = V_MOV_B32_e32 100, implicit %exec diff --git a/test/CodeGen/AMDGPU/vector-alloca.ll b/test/CodeGen/AMDGPU/vector-alloca.ll index 03cf725601b79dd2801439ef6364afae6a5742e2..a0aac8c1d9ba527bacce4f5c543f1aa04526546e 100644 --- a/test/CodeGen/AMDGPU/vector-alloca.ll +++ b/test/CodeGen/AMDGPU/vector-alloca.ll @@ -138,3 +138,25 @@ entry: store float %tmp2, float addrspace(1)* %out ret void } + +; The pointer arguments in local address space should not affect promotion to vector. + +; OPT-LABEL: @vector_read_with_local_arg( +; OPT: %0 = extractelement <4 x i32> , i32 %index +; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 +define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) { +entry: + %tmp = alloca [4 x i32] + %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 + store i32 0, i32* %x + store i32 1, i32* %y + store i32 2, i32* %z + store i32 3, i32* %w + %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index + %tmp2 = load i32, i32* %tmp1 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll index e82e548f23cda7a5f394fed5bccb6d379d6e77ef..135f02ac205a23b15e5a33aabc4a02b68560d32a 100644 --- a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -186,7 +186,7 @@ bb12: ; preds = %bb145, %bb %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb145 ] %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb145 ] %tmp142 = bitcast float %tmp95 to i32 - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tmp143 = icmp sgt i32 %tmp142, %tid br i1 %tmp143, label %bb144, label %bb145 @@ -593,7 +593,7 @@ bb145: ; preds = %bb12 br label %bb12 } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/test/CodeGen/AMDGPU/waitcnt-looptest.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a3ce4dfd191badbcb94003aaef24441b80941a6 --- /dev/null +++ b/test/CodeGen/AMDGPU/waitcnt-looptest.ll @@ -0,0 +1,146 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global | FileCheck --check-prefix=GCN %s + +; Check that the waitcnt insertion algorithm correctly propagates wait counts +; from before a loop to the loop header. + +; GCN-LABEL: {{^}}testKernel +; GCN: BB0_1: +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_f32_e64 +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_f32_e32 +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: v_cmp_eq_f32_e32 + +@data_generic = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4 +@data_reference = addrspace(1) global [100 x float] [float 0.000000e+00, float 0x3FB99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD99999A0000000, float 5.000000e-01, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000, float 0x3FECCCCCC0000000, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000, float 1.500000e+00, float 0x3FF99999A0000000, float 0x3FFB333340000000, float 0x3FFCCCCCC0000000, float 0x3FFE666660000000, float 2.000000e+00, float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000, float 2.500000e+00, float 0x4004CCCCC0000000, float 0x40059999A0000000, float 0x4006666660000000, float 0x4007333340000000, float 3.000000e+00, float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000, float 3.500000e+00, float 0x400CCCCCC0000000, float 0x400D9999A0000000, float 0x400E666660000000, float 0x400F333340000000, float 4.000000e+00, float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000, float 4.500000e+00, float 0x4012666660000000, float 0x4012CCCCC0000000, float 0x4013333340000000, float 0x40139999A0000000, float 5.000000e+00, float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000, float 5.500000e+00, float 0x4016666660000000, float 0x4016CCCCC0000000, float 0x4017333340000000, float 0x40179999A0000000, float 6.000000e+00, float 0x4018666660000000, float 0x4018CCCCC0000000, float 0x4019333340000000, float 0x40199999A0000000, float 6.500000e+00, float 0x401A666660000000, float 0x401ACCCCC0000000, float 0x401B333340000000, float 0x401B9999A0000000, float 7.000000e+00, float 0x401C666660000000, float 0x401CCCCCC0000000, float 0x401D333340000000, float 0x401D9999A0000000, float 7.500000e+00, float 0x401E666660000000, float 0x401ECCCCC0000000, float 0x401F333340000000, float 0x401F9999A0000000, float 8.000000e+00, float 0x4020333340000000, float 0x4020666660000000, float 0x40209999A0000000, float 0x4020CCCCC0000000, float 8.500000e+00, float 0x4021333340000000, float 0x4021666660000000, float 0x40219999A0000000, float 0x4021CCCCC0000000, float 9.000000e+00, float 0x4022333340000000, float 0x4022666660000000, float 0x40229999A0000000, float 0x4022CCCCC0000000, float 9.500000e+00, float 0x4023333340000000, float 0x4023666660000000, float 0x40239999A0000000, float 0x4023CCCCC0000000], align 4 + +define amdgpu_kernel void @testKernel(i32 addrspace(1)* nocapture %arg) local_unnamed_addr #0 { +bb: + store <2 x float> , <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_generic to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4 + store <2 x float> , <2 x float> addrspace(4)* bitcast (float addrspace(4)* getelementptr ([100 x float], [100 x float] addrspace(4)* addrspacecast ([100 x float] addrspace(1)* @data_reference to [100 x float] addrspace(4)*), i64 0, i64 4) to <2 x float> addrspace(4)*), align 4 + br label %bb18 + +bb1: ; preds = %bb18 + %tmp = tail call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() + %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp3 = tail call i32 @llvm.amdgcn.workgroup.id.x() + %tmp4 = getelementptr inbounds i8, i8 addrspace(2)* %tmp, i64 4 + %tmp5 = bitcast i8 addrspace(2)* %tmp4 to i16 addrspace(2)* + %tmp6 = load i16, i16 addrspace(2)* %tmp5, align 4 + %tmp7 = zext i16 %tmp6 to i32 + %tmp8 = mul i32 %tmp3, %tmp7 + %tmp9 = add i32 %tmp8, %tmp2 + %tmp10 = tail call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() + %tmp11 = zext i32 %tmp9 to i64 + %tmp12 = bitcast i8 addrspace(2)* %tmp10 to i64 addrspace(2)* + %tmp13 = load i64, i64 addrspace(2)* %tmp12, align 8 + %tmp14 = add i64 %tmp13, %tmp11 + %tmp15 = zext i1 %tmp99 to i32 + %tmp16 = and i64 %tmp14, 4294967295 + %tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp16 + store i32 %tmp15, i32 addrspace(1)* %tmp17, align 4 + ret void + +bb18: ; preds = %bb18, %bb + %tmp19 = phi i64 [ 0, %bb ], [ %tmp102, %bb18 ] + %tmp20 = phi i32 [ 0, %bb ], [ %tmp100, %bb18 ] + %tmp21 = phi i1 [ true, %bb ], [ %tmp99, %bb18 ] + %tmp22 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp19 + %tmp23 = load float, float addrspace(1)* %tmp22, align 4 + %tmp24 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp19 + %tmp25 = load float, float addrspace(1)* %tmp24, align 4 + %tmp26 = fcmp oeq float %tmp23, %tmp25 + %tmp27 = and i1 %tmp21, %tmp26 + %tmp28 = or i32 %tmp20, 1 + %tmp29 = sext i32 %tmp28 to i64 + %tmp30 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp29 + %tmp31 = load float, float addrspace(1)* %tmp30, align 4 + %tmp32 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp29 + %tmp33 = load float, float addrspace(1)* %tmp32, align 4 + %tmp34 = fcmp oeq float %tmp31, %tmp33 + %tmp35 = and i1 %tmp27, %tmp34 + %tmp36 = add nuw nsw i32 %tmp20, 2 + %tmp37 = sext i32 %tmp36 to i64 + %tmp38 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp37 + %tmp39 = load float, float addrspace(1)* %tmp38, align 4 + %tmp40 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp37 + %tmp41 = load float, float addrspace(1)* %tmp40, align 4 + %tmp42 = fcmp oeq float %tmp39, %tmp41 + %tmp43 = and i1 %tmp35, %tmp42 + %tmp44 = add nuw nsw i32 %tmp20, 3 + %tmp45 = sext i32 %tmp44 to i64 + %tmp46 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp45 + %tmp47 = load float, float addrspace(1)* %tmp46, align 4 + %tmp48 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp45 + %tmp49 = load float, float addrspace(1)* %tmp48, align 4 + %tmp50 = fcmp oeq float %tmp47, %tmp49 + %tmp51 = and i1 %tmp43, %tmp50 + %tmp52 = add nuw nsw i32 %tmp20, 4 + %tmp53 = sext i32 %tmp52 to i64 + %tmp54 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp53 + %tmp55 = load float, float addrspace(1)* %tmp54, align 4 + %tmp56 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp53 + %tmp57 = load float, float addrspace(1)* %tmp56, align 4 + %tmp58 = fcmp oeq float %tmp55, %tmp57 + %tmp59 = and i1 %tmp51, %tmp58 + %tmp60 = add nuw nsw i32 %tmp20, 5 + %tmp61 = sext i32 %tmp60 to i64 + %tmp62 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp61 + %tmp63 = load float, float addrspace(1)* %tmp62, align 4 + %tmp64 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp61 + %tmp65 = load float, float addrspace(1)* %tmp64, align 4 + %tmp66 = fcmp oeq float %tmp63, %tmp65 + %tmp67 = and i1 %tmp59, %tmp66 + %tmp68 = add nuw nsw i32 %tmp20, 6 + %tmp69 = sext i32 %tmp68 to i64 + %tmp70 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp69 + %tmp71 = load float, float addrspace(1)* %tmp70, align 4 + %tmp72 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp69 + %tmp73 = load float, float addrspace(1)* %tmp72, align 4 + %tmp74 = fcmp oeq float %tmp71, %tmp73 + %tmp75 = and i1 %tmp67, %tmp74 + %tmp76 = add nuw nsw i32 %tmp20, 7 + %tmp77 = sext i32 %tmp76 to i64 + %tmp78 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp77 + %tmp79 = load float, float addrspace(1)* %tmp78, align 4 + %tmp80 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp77 + %tmp81 = load float, float addrspace(1)* %tmp80, align 4 + %tmp82 = fcmp oeq float %tmp79, %tmp81 + %tmp83 = and i1 %tmp75, %tmp82 + %tmp84 = add nuw nsw i32 %tmp20, 8 + %tmp85 = sext i32 %tmp84 to i64 + %tmp86 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp85 + %tmp87 = load float, float addrspace(1)* %tmp86, align 4 + %tmp88 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp85 + %tmp89 = load float, float addrspace(1)* %tmp88, align 4 + %tmp90 = fcmp oeq float %tmp87, %tmp89 + %tmp91 = and i1 %tmp83, %tmp90 + %tmp92 = add nuw nsw i32 %tmp20, 9 + %tmp93 = sext i32 %tmp92 to i64 + %tmp94 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_generic, i64 0, i64 %tmp93 + %tmp95 = load float, float addrspace(1)* %tmp94, align 4 + %tmp96 = getelementptr inbounds [100 x float], [100 x float] addrspace(1)* @data_reference, i64 0, i64 %tmp93 + %tmp97 = load float, float addrspace(1)* %tmp96, align 4 + %tmp98 = fcmp oeq float %tmp95, %tmp97 + %tmp99 = and i1 %tmp91, %tmp98 + %tmp100 = add nuw nsw i32 %tmp20, 10 + %tmp101 = icmp eq i32 %tmp100, 100 + %tmp102 = sext i32 %tmp100 to i64 + br i1 %tmp101, label %bb1, label %bb18 +} + +; Function Attrs: nounwind readnone speculatable +declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workgroup.id.x() #1 + +; Function Attrs: nounwind readnone speculatable +declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #1 + +attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/waitcnt-permute.mir b/test/CodeGen/AMDGPU/waitcnt-permute.mir new file mode 100644 index 0000000000000000000000000000000000000000..44dbd38f2d3009811fea1c8f620df2d7a248e713 --- /dev/null +++ b/test/CodeGen/AMDGPU/waitcnt-permute.mir @@ -0,0 +1,33 @@ +# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s + +--- | + define float @waitcnt-permute(i32 %x, i32 %y) { + entry: + %0 = call i32 @llvm.amdgcn.ds.bpermute(i32 %x, i32 %y) + %1 = bitcast i32 %0 to float + %2 = fadd float 1.000000e+00, %1 + ret float %2 + } + + declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) + +... +--- +# CHECK-LABEL: name: waitcnt-permute{{$}} +# CHECK: DS_BPERMUTE_B32 +# CHECK-NEXT: S_WAITCNT 127 + +name: waitcnt-permute +liveins: + - { reg: '%vgpr0' } + - { reg: '%vgpr1' } + - { reg: '%sgpr30_sgpr31' } +body: | + bb.0: + liveins: %vgpr0, %vgpr1, %sgpr30_sgpr31 + + %vgpr0 = DS_BPERMUTE_B32 killed %vgpr0, killed %vgpr1, 0, implicit %exec + %vgpr0 = V_ADD_F32_e32 1065353216, killed %vgpr0, implicit %exec + S_SETPC_B64_return killed %sgpr30_sgpr31, implicit killed %vgpr0 + +... diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll index 9f277b2c9a59da7620e1ddfae46888e595528caf..133aaa35981e178989d7e26078936b102690a850 100644 --- a/test/CodeGen/AMDGPU/wqm.ll +++ b/test/CodeGen/AMDGPU/wqm.ll @@ -349,7 +349,7 @@ main_body: ; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body ; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]] -; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]] +; CHECK: v_cmp_gt_f32_e32 vcc, [[CTR]], [[SEVEN]] ; CHECK: s_cbranch_vccz [[LOOPHDR]] ; CHECK: ; %break diff --git a/test/CodeGen/AMDGPU/zext-lid.ll b/test/CodeGen/AMDGPU/zext-lid.ll new file mode 100644 index 0000000000000000000000000000000000000000..066f29277270a8789ad4c04b10c8d57f5c34919d --- /dev/null +++ b/test/CodeGen/AMDGPU/zext-lid.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=amdgcn < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-intrinsics < %s | FileCheck -check-prefix=OPT %s + +; CHECK-NOT: and_b32 + +; OPT-LABEL: @zext_grp_size_128 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !0 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !0 +define amdgpu_kernel void @zext_grp_size_128(i32 addrspace(1)* nocapture %arg) #0 { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp1 = and i32 %tmp, 127 + store i32 %tmp1, i32 addrspace(1)* %arg, align 4 + %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp3 = and i32 %tmp2, 127 + %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 + store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() + %tmp6 = and i32 %tmp5, 127 + %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 + store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 + ret void +} + +; OPT-LABEL: @zext_grp_size_32x4x1 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !2 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !3 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !4 +define amdgpu_kernel void @zext_grp_size_32x4x1(i32 addrspace(1)* nocapture %arg) #0 !reqd_work_group_size !0 { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp1 = and i32 %tmp, 31 + store i32 %tmp1, i32 addrspace(1)* %arg, align 4 + %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp3 = and i32 %tmp2, 3 + %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 + store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() + %tmp6 = and i32 %tmp5, 1 + %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 + store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 + ret void +} + +; OPT-LABEL: @zext_grp_size_512 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !5 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.y(), !range !5 +; OPT: tail call i32 @llvm.amdgcn.workitem.id.z(), !range !5 +define amdgpu_kernel void @zext_grp_size_512(i32 addrspace(1)* nocapture %arg) #1 { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp1 = and i32 %tmp, 65535 + store i32 %tmp1, i32 addrspace(1)* %arg, align 4 + %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp3 = and i32 %tmp2, 65535 + %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1 + store i32 %tmp3, i32 addrspace(1)* %tmp4, align 4 + %tmp5 = tail call i32 @llvm.amdgcn.workitem.id.z() + %tmp6 = and i32 %tmp5, 65535 + %tmp7 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2 + store i32 %tmp6, i32 addrspace(1)* %tmp7, align 4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #2 + +declare i32 @llvm.amdgcn.workitem.id.y() #2 + +declare i32 @llvm.amdgcn.workitem.id.z() #2 + +attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" } +attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" } +attributes #2 = { nounwind readnone speculatable } +attributes #3 = { nounwind readnone } + +!0 = !{i32 32, i32 4, i32 1} + +; OPT: !0 = !{i32 0, i32 128} +; OPT: !1 = !{i32 32, i32 4, i32 1} +; OPT: !2 = !{i32 0, i32 32} +; OPT: !3 = !{i32 0, i32 4} +; OPT: !4 = !{i32 0, i32 1} +; OPT: !5 = !{i32 0, i32 512} diff --git a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll index 8403dd991360e627858b0ed9a070d0c896b6ee56..777eccb00b02b7b1afefc6052cc66958e03cfbd8 100644 --- a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll +++ b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll @@ -20,7 +20,7 @@ bb3: ; preds = %bb, %entry bb8: ; preds = %bb3 %1 = getelementptr inbounds i8, i8* %0, i32 0 - store i8 0, i8* %1, align 1 + store volatile i8 0, i8* %1, align 1 %2 = call i32 @ptou() nounwind ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], @@ -35,7 +35,7 @@ bb8: ; preds = %bb3 %7 = or i8 %6, 48 %8 = add i8 %6, 87 %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8 - store i8 %iftmp.5.0.1, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.1, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -49,7 +49,7 @@ bb8: ; preds = %bb3 %13 = or i8 %12, 48 %14 = add i8 %12, 87 %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14 - store i8 %iftmp.5.0.2, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.2, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -63,7 +63,7 @@ bb8: ; preds = %bb3 %19 = or i8 %18, 48 %20 = add i8 %18, 87 %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20 - store i8 %iftmp.5.0.4, i8* null, align 1 + store volatile i8 %iftmp.5.0.4, i8* null, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -74,7 +74,7 @@ bb8: ; preds = %bb3 %22 = urem i32 %21, 10 %23 = icmp ult i32 %22, 10 %iftmp.5.0.5 = select i1 %23, i8 0, i8 %val8 - store i8 %iftmp.5.0.5, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.5, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -88,7 +88,7 @@ bb8: ; preds = %bb3 %28 = or i8 %27, 48 %29 = add i8 %27, 87 %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29 - store i8 %iftmp.5.0.6, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.6, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -102,7 +102,7 @@ bb8: ; preds = %bb3 %34 = or i8 %33, 48 %35 = add i8 %33, 87 %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35 - store i8 %iftmp.5.0.7, i8* %p8, align 1 + store volatile i8 %iftmp.5.0.7, i8* %p8, align 1 ; CHECK: umull [[REGISTER:lr|r[0-9]+]], ; CHECK-NOT: [[REGISTER]], ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}} @@ -116,7 +116,7 @@ bb8: ; preds = %bb3 %40 = or i8 %39, 48 %41 = add i8 %39, 87 %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41 - store i8 %iftmp.5.0.8, i8* null, align 1 + store volatile i8 %iftmp.5.0.8, i8* null, align 1 br label %bb46 bb46: ; preds = %bb3 diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll index 2a5af6199a345cb8286bc67a00f3afa9e2a5060d..954860219d194bc190bc910f59341dd55ceec6fb 100644 --- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -13,7 +13,7 @@ entry: ; CHECK: sub sp, sp, #12 ; CHECK: sub sp, sp, #4 ; CHECK: add r0, sp, #4 -; CHECK: stm sp, {r0, r1, r2, r3} +; CHECK: stmib sp, {r1, r2, r3} %g = alloca i8* %g1 = bitcast i8** %g to i8* call void @llvm.va_start(i8* %g1) diff --git a/test/CodeGen/ARM/ARMLoadStoreDBG.mir b/test/CodeGen/ARM/ARMLoadStoreDBG.mir index 0e6f80bfb48bc0f62467d75c5e7f6ffee1bb322b..cf5388ac1ccb9662a3ae946fe2b2d13d23963755 100644 --- a/test/CodeGen/ARM/ARMLoadStoreDBG.mir +++ b/test/CodeGen/ARM/ARMLoadStoreDBG.mir @@ -118,7 +118,6 @@ stack: - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7' } body: | bb.0.entry: - successors: %bb.1, %bb.2.if.end liveins: %r0, %r1, %r2, %r3, %lr, %r7 DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28 diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir new file mode 100644 index 0000000000000000000000000000000000000000..d7f208d4cf5953201e9976e64b008c654a2a18d6 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir @@ -0,0 +1,149 @@ +# RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- | + define void @test_mla() #0 { ret void } + define void @test_mla_v5() #1 { ret void } + + define void @test_mls() #2 { ret void } + define void @test_no_mls() { ret void } + + attributes #0 = { "target-features"="+v6" } + attributes #1 = { "target-features"="-v6" } + attributes #2 = { "target-features"="+v6t2" } +... +--- +name: test_mla +# CHECK-LABEL: name: test_mla +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_ADD %3, %2 + ; CHECK: [[VREGR:%[0-9]+]] = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mla_v5 +# CHECK-LABEL: name: test_mla_v5 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_ADD %3, %2 + ; CHECK: [[VREGR:%[0-9]+]] = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mls +# CHECK-LABEL: name: test_mls +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGR:%[0-9]+]] = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_no_mls +# CHECK-LABEL: name: test_no_mls +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } +body: | + bb.0: + liveins: %r0, %r1, %r2 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + + %3(s32) = G_MUL %0, %1 + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGM:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]] = SUBrr [[VREGZ]], [[VREGM]], 14, _, _ + + %r0 = COPY %4(s32) + ; CHECK: %r0 = COPY [[VREGR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 66d9033a6d7cba987b1827ad3086534b51cf7f7e..16642d85d9cfd014289624b0119231035f49bc45 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -4,6 +4,10 @@ define void @test_sext_s1() { ret void } define void @test_sext_s8() { ret void } define void @test_zext_s16() { ret void } + define void @test_anyext_s8() { ret void } + define void @test_anyext_s16() { ret void } + + define void @test_trunc_s32_16() { ret void } define void @test_add_s8() { ret void } define void @test_add_s16() { ret void } @@ -12,6 +16,22 @@ define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } + define void @test_sub_s8() { ret void } + define void @test_sub_s16() { ret void } + define void @test_sub_s32() { ret void } + + define void @test_mul_s8() #1 { ret void } + define void @test_mul_s16() #1 { ret void } + define void @test_mul_s32() #1 { ret void } + define void @test_mulv5_s32() { ret void } + + define void @test_sdiv_s32() #2 { ret void } + define void @test_udiv_s32() #2 { ret void } + + define void @test_and_s32() { ret void } + define void @test_or_s32() { ret void } + define void @test_xor_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_load_f32() #0 { ret void } define void @test_load_f64() #0 { ret void } @@ -19,11 +39,14 @@ define void @test_stores() #0 { ret void } define void @test_gep() { ret void } - define void @test_constants() { ret void } + define void @test_constant_imm() { ret void } + define void @test_constant_cimm() { ret void } define void @test_soft_fp_double() #0 { ret void } attributes #0 = { "target-features"="+vfp2,-neonfp" } + attributes #1 = { "target-features"="+v6" } + attributes #2 = { "target-features"="+hwdiv-arm" } ... --- name: test_zext_s1 @@ -132,6 +155,86 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- +name: test_anyext_s8 +# CHECK-LABEL: name: test_anyext_s8 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s8) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = G_ANYEXT %0(s8) + ; CHECK: [[VREGEXT:%[0-9]+]] = COPY [[VREGX]] + + %r0 = COPY %1(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_anyext_s16 +# CHECK-LABEL: name: test_anyext_s16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +body: | + bb.0: + liveins: %r0 + + %0(s16) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = G_ANYEXT %0(s16) + ; CHECK: [[VREGEXT:%[0-9]+]] = COPY [[VREGX]] + + %r0 = COPY %1(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_trunc_s32_16 +# CHECK-LABEL: name: test_trunc_s32_16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +body: | + bb.0: + liveins: %r0 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s16) = G_TRUNC %0(s32) + ; CHECK: [[VREGTRUNC:%[0-9]+]] = COPY [[VREGX]] + + %r0 = COPY %1(s16) + ; CHECK: %r0 = COPY [[VREGTRUNC]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- name: test_add_s8 # CHECK-LABEL: name: test_add_s8 legalized: true @@ -142,9 +245,15 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } # CHECK-DAG: id: 0, class: gpr # CHECK-DAG: id: 1, class: gpr # CHECK-DAG: id: 2, class: gpr +# CHECK-DAG: id: 3, class: gpr +# CHECK-DAG: id: 4, class: gpr +# CHECK-DAG: id: 5, class: gpr body: | bb.0: liveins: %r0, %r1 @@ -155,11 +264,20 @@ body: | %1(s8) = COPY %r1 ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s8) = G_ADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]] = ADDrr [[VREGX]], [[VREGY]], 14, _, _ + %2(s32) = G_ANYEXT %0(s8) + ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] - %r0 = COPY %2(s8) - ; CHECK: %r0 = COPY [[VREGSUM]] + %3(s32) = G_ANYEXT %1(s8) + ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + + %4(s32) = G_ADD %2, %3 + ; CHECK: [[VREGSUM:%[0-9]+]] = ADDrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + + %5(s8) = G_TRUNC %4(s32) + ; CHECK: [[VREGSUMTR:%[0-9]+]] = COPY [[VREGSUM]] + + %r0 = COPY %5(s8) + ; CHECK: %r0 = COPY [[VREGSUMTR]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 @@ -175,9 +293,15 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } # CHECK-DAG: id: 0, class: gpr # CHECK-DAG: id: 1, class: gpr # CHECK-DAG: id: 2, class: gpr +# CHECK-DAG: id: 3, class: gpr +# CHECK-DAG: id: 4, class: gpr +# CHECK-DAG: id: 5, class: gpr body: | bb.0: liveins: %r0, %r1 @@ -188,11 +312,20 @@ body: | %1(s16) = COPY %r1 ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s16) = G_ADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]] = ADDrr [[VREGX]], [[VREGY]], 14, _, _ + %2(s32) = G_ANYEXT %0(s16) + ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] - %r0 = COPY %2(s16) - ; CHECK: %r0 = COPY [[VREGSUM]] + %3(s32) = G_ANYEXT %1(s16) + ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + + %4(s32) = G_ADD %2, %3 + ; CHECK: [[VREGSUM:%[0-9]+]] = ADDrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + + %5(s16) = G_TRUNC %4(s32) + ; CHECK: [[VREGSUMTR:%[0-9]+]] = COPY [[VREGSUM]] + + %r0 = COPY %5(s16) + ; CHECK: %r0 = COPY [[VREGSUMTR]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 @@ -297,6 +430,462 @@ body: | ; CHECK: BX_RET 14, _, implicit %d0 ... --- +name: test_sub_s8 +# CHECK-LABEL: name: test_sub_s8 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +# CHECK-DAG: id: 2, class: gpr +# CHECK-DAG: id: 3, class: gpr +# CHECK-DAG: id: 4, class: gpr +# CHECK-DAG: id: 5, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s8) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_ANYEXT %0(s8) + ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] + + %3(s32) = G_ANYEXT %1(s8) + ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + + %5(s8) = G_TRUNC %4(s32) + ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] + + %r0 = COPY %5(s8) + ; CHECK: %r0 = COPY [[VREGRESTR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_sub_s16 +# CHECK-LABEL: name: test_sub_s16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +# CHECK-DAG: id: 2, class: gpr +# CHECK-DAG: id: 3, class: gpr +# CHECK-DAG: id: 4, class: gpr +# CHECK-DAG: id: 5, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s16) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_ANYEXT %0(s16) + ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] + + %3(s32) = G_ANYEXT %1(s16) + ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + + %4(s32) = G_SUB %2, %3 + ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + + %5(s16) = G_TRUNC %4(s32) + ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] + + %r0 = COPY %5(s16) + ; CHECK: %r0 = COPY [[VREGRESTR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_sub_s32 +# CHECK-LABEL: name: test_sub_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_SUB %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s8 +# CHECK-LABEL: name: test_mul_s8 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +# CHECK-DAG: id: 2, class: gprnopc +# CHECK-DAG: id: 3, class: gprnopc +# CHECK-DAG: id: 4, class: gprnopc +# CHECK-DAG: id: 5, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s8) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_ANYEXT %0(s8) + ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] + + %3(s32) = G_ANYEXT %1(s8) + ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + + %4(s32) = G_MUL %2, %3 + ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + + %5(s8) = G_TRUNC %4(s32) + ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] + + %r0 = COPY %5(s8) + ; CHECK: %r0 = COPY [[VREGRESTR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s16 +# CHECK-LABEL: name: test_mul_s16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } + - { id: 3, class: gprb } + - { id: 4, class: gprb } + - { id: 5, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +# CHECK-DAG: id: 2, class: gprnopc +# CHECK-DAG: id: 3, class: gprnopc +# CHECK-DAG: id: 4, class: gprnopc +# CHECK-DAG: id: 5, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s16) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_ANYEXT %0(s16) + ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] + + %3(s32) = G_ANYEXT %1(s16) + ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + + %4(s32) = G_MUL %2, %3 + ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + + %5(s16) = G_TRUNC %4(s32) + ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] + + %r0 = COPY %5(s16) + ; CHECK: %r0 = COPY [[VREGRESTR]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gprnopc +# CHECK: id: 1, class: gprnopc +# CHECK: id: 2, class: gprnopc +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_MUL %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_mulv5_s32 +# CHECK-LABEL: name: test_mulv5_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gprnopc +# CHECK: id: 1, class: gprnopc +# CHECK: id: 2, class: gprnopc +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_MUL %0, %1 + ; CHECK: early-clobber [[VREGRES:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_sdiv_s32 +# CHECK-LABEL: name: test_sdiv_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_SDIV %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = SDIV [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_udiv_s32 +# CHECK-LABEL: name: test_udiv_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_UDIV %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = UDIV [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_AND %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = ANDrr [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_OR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = ORRrr [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } + - { id: 2, class: gprb } +# CHECK: id: 0, class: gpr +# CHECK: id: 1, class: gpr +# CHECK: id: 2, class: gpr +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + + %2(s32) = G_XOR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]] = EORrr [[VREGX]], [[VREGY]], 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- name: test_load_from_stack # CHECK-LABEL: name: test_load_from_stack legalized: true @@ -316,8 +905,8 @@ fixedStack: - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false } -# CHECK-DAG: id: [[FI1:[0-9]+]], offset: 0 -# CHECK-DAG: id: [[FI32:[0-9]+]], offset: 8 +# CHECK-DAG: id: [[FI1:[0-9]+]], type: default, offset: 0, size: 1 +# CHECK-DAG: id: [[FI32:[0-9]+]], type: default, offset: 8 body: | bb.0: liveins: %r0, %r1, %r2, %r3 @@ -473,8 +1062,8 @@ body: | BX_RET 14, _, implicit %r0 ... --- -name: test_constants -# CHECK-LABEL: name: test_constants +name: test_constant_imm +# CHECK-LABEL: name: test_constant_imm legalized: true regBankSelected: true selected: false @@ -491,6 +1080,26 @@ body: | BX_RET 14, _, implicit %r0 ... --- +name: test_constant_cimm +# CHECK-LABEL: name: test_constant_cimm +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } +# CHECK: id: [[C:[0-9]+]], class: gpr +body: | + bb.0: + ; Adding a type on G_CONSTANT changes its operand from an Imm into a CImm. + ; We still want to see the same thing in the output though. + %0(s32) = G_CONSTANT i32 42 + ; CHECK: %[[C]] = MOVi 42, 14, _, _ + + %r0 = COPY %0(s32) + BX_RET 14, _, implicit %r0 +... +--- name: test_soft_fp_double # CHECK-LABEL: name: test_soft_fp_double legalized: true @@ -518,13 +1127,11 @@ body: | %1(s32) = COPY %r3 ; CHECK: [[IN2:%[0-9]+]] = COPY %r3 - %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 1 + %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) ; CHECK: %[[DREG]] = VMOVDRR [[IN1]], [[IN2]] - %3(s32) = G_EXTRACT %2(s64), 0 - %4(s32) = G_EXTRACT %2(s64), 32 - ; CHECK: [[OUT1:%[0-9]+]] = VGETLNi32 %[[DREG]], 0 - ; CHECK: [[OUT2:%[0-9]+]] = VGETLNi32 %[[DREG]], 1 + %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64) + ; CHECK: [[OUT1:%[0-9]+]], [[OUT2:%[0-9]+]] = VMOVRRD %[[DREG]] %r0 = COPY %3 ; CHECK: %r0 = COPY [[OUT1]] diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index a7f5ec33bee3cde982dfe20fc86097ae49a9f8a2..6663a9210b87005f4b4a4b5cb6a737a5207868bf 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE -; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG +; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE +; RUN: llc -mtriple armeb-unknown -mattr=+vfp2 -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG define void @test_void_return() { ; CHECK-LABEL: name: test_void_return @@ -35,6 +35,19 @@ entry: ret i8 %sum } +define i8 @test_sub_i8(i8 %x, i8 %y) { +; CHECK-LABEL: name: test_sub_i8 +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[VREGX:%[0-9]+]](s8) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]](s8) = COPY %r1 +; CHECK: [[RES:%[0-9]+]](s8) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: %r0 = COPY [[RES]](s8) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %res = sub i8 %x, %y + ret i8 %res +} + define signext i8 @test_return_sext_i8(i8 %x) { ; CHECK-LABEL: name: test_return_sext_i8 ; CHECK: liveins: %r0 @@ -59,6 +72,19 @@ entry: ret i16 %sum } +define i16 @test_sub_i16(i16 %x, i16 %y) { +; CHECK-LABEL: name: test_sub_i16 +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[VREGX:%[0-9]+]](s16) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]](s16) = COPY %r1 +; CHECK: [[RES:%[0-9]+]](s16) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: %r0 = COPY [[RES]](s16) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %res = sub i16 %x, %y + ret i16 %res +} + define zeroext i16 @test_return_zext_i16(i16 %x) { ; CHECK-LABEL: name: test_return_zext_i16 ; CHECK: liveins: %r0 @@ -83,16 +109,29 @@ entry: ret i32 %sum } +define i32 @test_sub_i32(i32 %x, i32 %y) { +; CHECK-LABEL: name: test_sub_i32 +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[VREGX:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[RES:%[0-9]+]](s32) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: %r0 = COPY [[RES]](s32) +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %res = sub i32 %x, %y + ret i32 %res +} + define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { ; CHECK-LABEL: name: test_stack_args ; CHECK: fixedStack: ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2 -; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]] -; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP5]] +; CHECK: [[VREGP2:%[0-9]+]](s32) = COPY %r2 +; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]]{{.*}}load 4 +; CHECK: [[SUM:%[0-9]+]](s32) = G_ADD [[VREGP2]], [[VREGP5]] ; CHECK: %r0 = COPY [[SUM]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -107,10 +146,11 @@ define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP1:%[0-9]+]]{{.*}} = COPY %r1 -; CHECK: [[FIP5:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]]{{.*}} = G_LOAD [[FIP5]](p0) -; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP1]], [[VREGP5]] +; CHECK: [[VREGP1:%[0-9]+]](s16) = COPY %r1 +; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5EXT:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGP5:%[0-9]+]](s16) = G_TRUNC [[VREGP5EXT]] +; CHECK: [[SUM:%[0-9]+]](s16) = G_ADD [[VREGP1]], [[VREGP5]] ; CHECK: %r0 = COPY [[SUM]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -125,10 +165,11 @@ define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP2:%[0-9]+]]{{.*}} = COPY %r2 -; CHECK: [[FIP4:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P4]] -; CHECK: [[VREGP4:%[0-9]+]]{{.*}} = G_LOAD [[FIP4]](p0) -; CHECK: [[SUM:%[0-9]+]]{{.*}} = G_ADD [[VREGP2]], [[VREGP4]] +; CHECK: [[VREGP2:%[0-9]+]](s8) = COPY %r2 +; CHECK: [[FIP4:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P4]] +; CHECK: [[VREGP4EXT:%[0-9]+]](s32) = G_LOAD [[FIP4]](p0){{.*}}load 4 +; CHECK: [[VREGP4:%[0-9]+]](s8) = G_TRUNC [[VREGP4EXT]] +; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[VREGP2]], [[VREGP4]] ; CHECK: %r0 = COPY [[SUM]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -136,11 +177,46 @@ entry: ret i8 %sum } +define i8 @test_stack_args_noext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 %p4, i16 %p5) { +; CHECK-LABEL: name: test_stack_args_noext +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[VREGP2:%[0-9]+]](s8) = COPY %r2 +; CHECK: [[FIP4:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P4]] +; CHECK: [[VREGP4:%[0-9]+]](s8) = G_LOAD [[FIP4]](p0){{.*}}load 1 +; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[VREGP2]], [[VREGP4]] +; CHECK: %r0 = COPY [[SUM]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +} + +define zeroext i16 @test_stack_args_extend_the_extended(i32 %p0, i16 %p1, i8 %p2, i1 %p3, + i8 signext %p4, i16 signext %p5) { +; CHECK-LABEL: name: test_stack_args_extend_the_extended +; CHECK: fixedStack: +; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 +; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5SEXT:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGP5:%[0-9]+]](s16) = G_TRUNC [[VREGP5SEXT]] +; CHECK: [[VREGP5ZEXT:%[0-9]+]](s32) = G_ZEXT [[VREGP5]] +; CHECK: %r0 = COPY [[VREGP5ZEXT]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + ret i16 %p5 +} + define i16 @test_ptr_arg(i16* %p) { ; CHECK-LABEL: name: test_ptr_arg ; CHECK: liveins: %r0 ; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0 -; CHECK: [[VREGV:%[0-9]+]](s16) = G_LOAD [[VREGP]](p0) +; CHECK: [[VREGV:%[0-9]+]](s16) = G_LOAD [[VREGP]](p0){{.*}}load 2 entry: %v = load i16, i16* %p ret i16 %v @@ -151,7 +227,7 @@ define i32* @test_ptr_ret(i32** %p) { ; CHECK-LABEL: name: test_ptr_ret ; CHECK: liveins: %r0 ; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0 -; CHECK: [[VREGV:%[0-9]+]](p0) = G_LOAD [[VREGP]](p0) +; CHECK: [[VREGV:%[0-9]+]](p0) = G_LOAD [[VREGP]](p0){{.*}}load 4 ; CHECK: %r0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -164,9 +240,9 @@ define i32 @test_ptr_arg_on_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32* %p) { ; CHECK: fixedStack: ; CHECK: id: [[P:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[FIP:%[0-9]+]]{{.*}} = G_FRAME_INDEX %fixed-stack.[[P]] -; CHECK: [[VREGP:%[0-9]+]](p0) = G_LOAD [[FIP]](p0) -; CHECK: [[VREGV:%[0-9]+]](s32) = G_LOAD [[VREGP]](p0) +; CHECK: [[FIP:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P]] +; CHECK: [[VREGP:%[0-9]+]](p0) = G_LOAD [[FIP]](p0){{.*}}load 4 +; CHECK: [[VREGV:%[0-9]+]](s32) = G_LOAD [[VREGP]](p0){{.*}}load 4 ; CHECK: %r0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -183,7 +259,7 @@ define arm_aapcscc float @test_float_aapcscc(float %p0, float %p1, float %p2, ; CHECK: liveins: %r0, %r1, %r2, %r3 ; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %r1 ; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0) +; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 ; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGP5]] ; CHECK: %r0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %r0 @@ -212,7 +288,7 @@ define arm_aapcs_vfpcc float @test_float_vfpcc(float %p0, float %p1, float %p2, ; CHECK: liveins: %s0, %s1, %s2, %s3, %s4, %s5, %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15 ; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %s1 ; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]](s32) = G_LOAD [[FIQ1]](p0) +; CHECK: [[VREGQ1:%[0-9]+]](s32) = G_LOAD [[FIQ1]](p0){{.*}}load 4 ; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %s0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %s0 @@ -233,7 +309,7 @@ define arm_aapcs_vfpcc double @test_double_vfpcc(double %p0, double %p1, double ; CHECK: liveins: %d0, %d1, %d2, %d3, %d4, %d5, %d6, %d7 ; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d1 ; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0) +; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %d0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %d0 @@ -253,15 +329,13 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p ; CHECK: liveins: %r0, %r1, %r2, %r3 ; CHECK-DAG: [[VREGP1LO:%[0-9]+]](s32) = COPY %r2 ; CHECK-DAG: [[VREGP1HI:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1LO]](s32), 0, [[VREGP1HI]](s32), 32 -; BIG: [[VREGP1:%[0-9]+]](s64) = G_SEQUENCE [[VREGP1HI]](s32), 0, [[VREGP1LO]](s32), 32 +; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1LO]](s32), [[VREGP1HI]](s32) +; BIG: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1HI]](s32), [[VREGP1LO]](s32) ; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0) +; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 -; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -283,7 +357,7 @@ define arm_aapcs_vfpcc double @test_double_gap_vfpcc(double %p0, float %filler, ; CHECK: liveins: %d0, %d2, %d3, %d4, %d5, %d6, %d7, %s2 ; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d2 ; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0) +; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %d0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %d0 @@ -300,15 +374,13 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0, ; CHECK: liveins: %r0, %r2, %r3 ; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r2 ; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32 -; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32 +; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) +; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] -; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0) +; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 -; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -325,15 +397,13 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler, ; CHECK: liveins: %r0, %r1, %r2 ; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0LO]](s32), 0, [[VREGP0HI]](s32), 32 -; BIG: [[VREGP0:%[0-9]+]](s64) = G_SEQUENCE [[VREGP0HI]](s32), 0, [[VREGP0LO]](s32), 32 +; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) +; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) ; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] -; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0) +; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 ; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; LITTLE: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 -; BIG: [[VREGVHI:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 0 -; BIG: [[VREGVLO:%[0-9]+]](s32) = G_EXTRACT [[VREGV]](s64), 32 +; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -344,9 +414,11 @@ entry: define arm_aapcscc void @test_indirect_call(void() *%fptr) { ; CHECK-LABEL: name: test_indirect_call -; CHECK: [[FPTR:%[0-9]+]](p0) = COPY %r0 -; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BLX [[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp +; CHECK: registers: +; CHECK-NEXT: id: [[FPTR:[0-9]+]], class: gpr +; CHECK: %[[FPTR]](p0) = COPY %r0 +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BLX %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp entry: notail call arm_aapcscc void %fptr() @@ -357,7 +429,7 @@ declare arm_aapcscc void @call_target() define arm_aapcscc void @test_direct_call() { ; CHECK-LABEL: name: test_direct_call -; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK: BLX @call_target, csr_aapcs, implicit-def %lr, implicit %sp ; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp entry: @@ -371,7 +443,7 @@ define arm_aapcscc i32* @test_call_simple_reg_params(i32 *%a, i32 %b) { ; CHECK-LABEL: name: test_call_simple_reg_params ; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0 ; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1 -; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK-DAG: %r0 = COPY [[BVREG]] ; CHECK-DAG: %r1 = COPY [[AVREG]] ; CHECK: BLX @simple_reg_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0 @@ -390,7 +462,7 @@ define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) { ; CHECK-LABEL: name: test_call_simple_stack_params ; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0 ; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1 -; CHECK: ADJCALLSTACKDOWN 8, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK-DAG: %r0 = COPY [[BVREG]] ; CHECK-DAG: %r1 = COPY [[AVREG]] ; CHECK-DAG: %r2 = COPY [[BVREG]] @@ -420,7 +492,7 @@ define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) { ; CHECK-DAG: [[AVREG:%[0-9]+]](s8) = COPY %r0 ; CHECK-DAG: [[BVREG:%[0-9]+]](s16) = COPY %r1 ; CHECK-DAG: [[CVREG:%[0-9]+]](s1) = COPY %r2 -; CHECK: ADJCALLSTACKDOWN 20, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 20, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK: [[SEXTA:%[0-9]+]](s32) = G_SEXT [[AVREG]](s8) ; CHECK: %r0 = COPY [[SEXTA]] ; CHECK: [[ZEXTA:%[0-9]+]](s32) = G_ZEXT [[AVREG]](s8) @@ -471,7 +543,7 @@ define arm_aapcs_vfpcc double @test_call_vfpcc_fp_params(double %a, float %b) { ; CHECK-LABEL: name: test_call_vfpcc_fp_params ; CHECK-DAG: [[AVREG:%[0-9]+]](s64) = COPY %d0 ; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %s2 -; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK-DAG: %s0 = COPY [[BVREG]] ; CHECK-DAG: %d1 = COPY [[AVREG]] ; CHECK: BLX @vfpcc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %d1, implicit-def %d0 @@ -490,13 +562,12 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { ; CHECK-LABEL: name: test_call_aapcs_fp_params ; CHECK-DAG: [[A1:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[A2:%[0-9]+]](s32) = COPY %r1 -; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A1]](s32), 0, [[A2]](s32), 32 -; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_SEQUENCE [[A2]](s32), 0, [[A1]](s32), 32 +; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A1]](s32), [[A2]](s32) +; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32) ; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r2 -; CHECK: ADJCALLSTACKDOWN 16, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK-DAG: %r0 = COPY [[BVREG]] -; CHECK-DAG: [[A1:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 0 -; CHECK-DAG: [[A2:%[0-9]+]](s32) = G_EXTRACT [[AVREG]](s64), 32 +; CHECK-DAG: [[A1:%[0-9]+]](s32), [[A2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[AVREG]](s64) ; LITTLE-DAG: %r2 = COPY [[A1]] ; LITTLE-DAG: %r3 = COPY [[A2]] ; BIG-DAG: %r2 = COPY [[A2]] @@ -512,11 +583,10 @@ define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { ; CHECK: BLX @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r0 ; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R1]](s32), 0, [[R2]](s32), 32 -; BIG: [[RVREG:%[0-9]+]](s64) = G_SEQUENCE [[R2]](s32), 0, [[R1]](s32), 32 +; LITTLE: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R1]](s32), [[R2]](s32) +; BIG: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R2]](s32), [[R1]](s32) ; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 0 -; CHECK: [[R2:%[0-9]+]](s32) = G_EXTRACT [[RVREG]](s64), 32 +; CHECK: [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RVREG]](s64) ; LITTLE-DAG: %r0 = COPY [[R1]] ; LITTLE-DAG: %r1 = COPY [[R2]] ; BIG-DAG: %r0 = COPY [[R2]] @@ -532,7 +602,7 @@ declare arm_aapcscc float @different_call_conv_target(float) define arm_aapcs_vfpcc float @test_call_different_call_conv(float %x) { ; CHECK-LABEL: name: test_call_different_call_conv ; CHECK: [[X:%[0-9]+]](s32) = COPY %s0 -; CHECK: ADJCALLSTACKDOWN 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp ; CHECK: %r0 = COPY [[X]] ; CHECK: BLX @different_call_conv_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit-def %r0 ; CHECK: [[R:%[0-9]+]](s32) = COPY %r0 @@ -544,6 +614,299 @@ entry: ret float %r } +declare arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32]) + +define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { +; CHECK-LABEL: name: test_tiny_int_arrays +; CHECK: liveins: %r0, %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[ARG_ARR:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BLX @tiny_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[R2:%[0-9]+]](s32) = COPY %r2 +; CHECK: [[RES_ARR:%[0-9]+]](s96) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32) +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RES_ARR]](s96) +; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say +; that composite types larger than 4 bytes should be passed through memory), +; but it's what DAGISel does. We should fix it in the common code for both. +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1, implicit %r2 +entry: + %r = notail call arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32] %arr) + ret [3 x i32] %r +} + +declare arm_aapcscc void @multiple_int_arrays_target([2 x i32], [2 x i32]) + +define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %arr1) { +; CHECK-LABEL: name: test_multiple_int_arrays +; CHECK: liveins: %r0, %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[R2:%[0-9]+]](s32) = COPY %r2 +; CHECK: [[R3:%[0-9]+]](s32) = COPY %r3 +; CHECK: [[ARG_ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: [[ARG_ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32) +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR0]](s64) +; CHECK: [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR1]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: %r3 = COPY [[R3]] +; CHECK: BLX @multiple_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BX_RET 14, _ +entry: + notail call arm_aapcscc void @multiple_int_arrays_target([2 x i32] %arr0, [2 x i32] %arr1) + ret void +} + +declare arm_aapcscc void @large_int_arrays_target([20 x i32]) + +define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { +; CHECK-LABEL: name: test_large_int_arrays +; CHECK: fixedStack: +; The parameters live in separate stack locations, one for each element that +; doesn't fit in the registers. +; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, +; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 60, size: 4 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r2 +; CHECK-DAG: [[R3:%[0-9]+]](s32) = COPY %r3 +; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[ARG_ARR:%[0-9]+]](s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) +; CHECK: ADJCALLSTACKDOWN 64, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32), [[FIRST_STACK_ELEMENT:%[0-9]+]](s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR]](s640) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: %r3 = COPY [[R3]] +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_FIRST_ELEMENT]](s32) +; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 +; Match the second-to-last offset, so we can get the correct SP for the last element +; CHECK: G_CONSTANT i32 56 +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 60 +; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) +; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: BLX @large_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 +; CHECK: ADJCALLSTACKUP 64, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BX_RET 14, _ +entry: + notail call arm_aapcscc void @large_int_arrays_target([20 x i32] %arr) + ret void +} + +declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double]) + +define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { +; CHECK-LABEL: name: test_fp_arrays_aapcs +; CHECK: fixedStack: +; CHECK: id: [[ARR2_ID:[0-9]+]], type: default, offset: 0, size: 8, +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[ARR0_0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[ARR0_1:%[0-9]+]](s32) = COPY %r1 +; LITTLE: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_0]](s32), [[ARR0_1]](s32) +; BIG: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_1]](s32), [[ARR0_0]](s32) +; CHECK: [[ARR1_0:%[0-9]+]](s32) = COPY %r2 +; CHECK: [[ARR1_1:%[0-9]+]](s32) = COPY %r3 +; LITTLE: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_0]](s32), [[ARR1_1]](s32) +; BIG: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32) +; CHECK: [[ARR2_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] +; CHECK: [[ARR2:%[0-9]+]](s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] +; CHECK: [[ARR_MERGED:%[0-9]+]](s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64) +; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[ARR0:%[0-9]+]](s64), [[ARR1:%[0-9]+]](s64), [[ARR2:%[0-9]+]](s64) = G_UNMERGE_VALUES [[ARR_MERGED]](s192) +; CHECK: [[ARR0_0:%[0-9]+]](s32), [[ARR0_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR0]](s64) +; LITTLE: %r0 = COPY [[ARR0_0]](s32) +; LITTLE: %r1 = COPY [[ARR0_1]](s32) +; BIG: %r0 = COPY [[ARR0_1]](s32) +; BIG: %r1 = COPY [[ARR0_0]](s32) +; CHECK: [[ARR1_0:%[0-9]+]](s32), [[ARR1_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR1]](s64) +; LITTLE: %r2 = COPY [[ARR1_0]](s32) +; LITTLE: %r3 = COPY [[ARR1_1]](s32) +; BIG: %r2 = COPY [[ARR1_1]](s32) +; BIG: %r3 = COPY [[ARR1_0]](s32) +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[ARR2_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[ARR2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[ARR2_OFFSET]](s32) +; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store 8 +; CHECK: BLX @fp_arrays_aapcs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[R_MERGED:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[R_MERGED]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %r = notail call arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double] %arr) + ret [2 x float] %r +} + +declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3 x float], [4 x double]) + +define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) { +; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp +; CHECK: fixedStack: +; CHECK-DAG: id: [[Z0_ID:[0-9]+]], type: default, offset: 0, size: 8, +; CHECK-DAG: id: [[Z1_ID:[0-9]+]], type: default, offset: 8, size: 8, +; CHECK-DAG: id: [[Z2_ID:[0-9]+]], type: default, offset: 16, size: 8, +; CHECK-DAG: id: [[Z3_ID:[0-9]+]], type: default, offset: 24, size: 8, +; CHECK: liveins: %d0, %d1, %d2, %s6, %s7, %s8 +; CHECK: [[X0:%[0-9]+]](s64) = COPY %d0 +; CHECK: [[X1:%[0-9]+]](s64) = COPY %d1 +; CHECK: [[X2:%[0-9]+]](s64) = COPY %d2 +; CHECK: [[Y0:%[0-9]+]](s32) = COPY %s6 +; CHECK: [[Y1:%[0-9]+]](s32) = COPY %s7 +; CHECK: [[Y2:%[0-9]+]](s32) = COPY %s8 +; CHECK: [[Z0_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z0_ID]] +; CHECK: [[Z0:%[0-9]+]](s64) = G_LOAD [[Z0_FI]]{{.*}}load 8 +; CHECK: [[Z1_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z1_ID]] +; CHECK: [[Z1:%[0-9]+]](s64) = G_LOAD [[Z1_FI]]{{.*}}load 8 +; CHECK: [[Z2_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z2_ID]] +; CHECK: [[Z2:%[0-9]+]](s64) = G_LOAD [[Z2_FI]]{{.*}}load 8 +; CHECK: [[Z3_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z3_ID]] +; CHECK: [[Z3:%[0-9]+]](s64) = G_LOAD [[Z3_FI]]{{.*}}load 8 +; CHECK: [[X_ARR:%[0-9]+]](s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64) +; CHECK: [[Y_ARR:%[0-9]+]](s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32) +; CHECK: [[Z_ARR:%[0-9]+]](s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64) +; CHECK: ADJCALLSTACKDOWN 32, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[X0:%[0-9]+]](s64), [[X1:%[0-9]+]](s64), [[X2:%[0-9]+]](s64) = G_UNMERGE_VALUES [[X_ARR]](s192) +; CHECK: [[Y0:%[0-9]+]](s32), [[Y1:%[0-9]+]](s32), [[Y2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[Y_ARR]](s96) +; CHECK: [[Z0:%[0-9]+]](s64), [[Z1:%[0-9]+]](s64), [[Z2:%[0-9]+]](s64), [[Z3:%[0-9]+]](s64) = G_UNMERGE_VALUES [[Z_ARR]](s256) +; CHECK: %d0 = COPY [[X0]](s64) +; CHECK: %d1 = COPY [[X1]](s64) +; CHECK: %d2 = COPY [[X2]](s64) +; CHECK: %s6 = COPY [[Y0]](s32) +; CHECK: %s7 = COPY [[Y1]](s32) +; CHECK: %s8 = COPY [[Y2]](s32) +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[Z0_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[Z0_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z0_OFFSET]](s32) +; CHECK: G_STORE [[Z0]](s64), [[Z0_ADDR]](p0){{.*}}store 8 +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[Z1_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 8 +; CHECK: [[Z1_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z1_OFFSET]](s32) +; CHECK: G_STORE [[Z1]](s64), [[Z1_ADDR]](p0){{.*}}store 8 +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[Z2_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 16 +; CHECK: [[Z2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z2_OFFSET]](s32) +; CHECK: G_STORE [[Z2]](s64), [[Z2_ADDR]](p0){{.*}}store 8 +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[Z3_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 24 +; CHECK: [[Z3_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z3_OFFSET]](s32) +; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store 8 +; CHECK: BLX @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit %d2, implicit %s6, implicit %s7, implicit %s8, implicit-def %s0, implicit-def %s1, implicit-def %s2, implicit-def %s3 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %s0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %s1 +; CHECK: [[R2:%[0-9]+]](s32) = COPY %s2 +; CHECK: [[R3:%[0-9]+]](s32) = COPY %s3 +; CHECK: [[R_MERGED:%[0-9]+]](s128) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32) +; CHECK: ADJCALLSTACKUP 32, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32) = G_UNMERGE_VALUES [[R_MERGED]](s128) +; CHECK: %s0 = COPY [[R0]] +; CHECK: %s1 = COPY [[R1]] +; CHECK: %s2 = COPY [[R2]] +; CHECK: %s3 = COPY [[R3]] +; CHECK: BX_RET 14, _, implicit %s0, implicit %s1, implicit %s2, implicit %s3 +entry: + %r = notail call arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z) + ret [4 x float] %r +} + +declare arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) + +define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { +; CHECK-LABEL: name: test_tough_arrays +; CHECK: fixedStack: +; The parameters live in separate stack locations, one for each element that +; doesn't fit in the registers. +; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, +; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 76, size: 4 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r2 +; CHECK-DAG: [[R3:%[0-9]+]](s32) = COPY %r3 +; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[ARG_ARR:%[0-9]+]](s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) +; CHECK: ADJCALLSTACKDOWN 80, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32), [[FIRST_STACK_ELEMENT:%[0-9]+]](s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR]](s768) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: %r3 = COPY [[R3]] +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_FIRST_ELEMENT]](s32) +; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 +; Match the second-to-last offset, so we can get the correct SP for the last element +; CHECK: G_CONSTANT i32 72 +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 76 +; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) +; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: BLX @tough_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[RES_ARR:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKUP 80, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RES_ARR]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %r = notail call arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) + ret [2 x i32*] %r +} + +declare arm_aapcscc {i32, i32} @structs_target({i32, i32}) + +define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) { +; CHECK-LABEL: test_structs +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 +; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[X0:%[0-9]+]](s32), [[X1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[X]](s64) +; CHECK-DAG: %r0 = COPY [[X0]](s32) +; CHECK-DAG: %r1 = COPY [[X1]](s32) +; CHECK: BLX @structs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 +; CHECK: [[R:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[R]](s64) +; CHECK: %r0 = COPY [[R0]](s32) +; CHECK: %r1 = COPY [[R1]](s32) +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 + %r = notail call arm_aapcscc {i32, i32} @structs_target({i32, i32} %x) + ret {i32, i32} %r +} + define i32 @test_shufflevector_s32_v2s32(i32 %arg) { ; CHECK-LABEL: name: test_shufflevector_s32_v2s32 ; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0 @@ -623,3 +986,33 @@ define i32 @test_shufflevector_v4s32_v2s32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %res = extractelement <2 x i32> %shuffle, i32 0 ret i32 %res } + +%struct.v2s32 = type { <2 x i32> } + +define i32 @test_constantstruct_v2s32() { +; CHECK-LABEL: name: test_constantstruct_v2s32 +; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 +; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) +; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>) + %vec = extractvalue %struct.v2s32 {<2 x i32>}, 0 + %elt = extractelement <2 x i32> %vec, i32 0 + ret i32 %elt +} + +%struct.v2s32.s32.s32 = type { <2 x i32>, i32, i32 } + +define i32 @test_constantstruct_v2s32_s32_s32() { +; CHECK-LABEL: name: test_constantstruct_v2s32_s32_s32 +; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 +; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) +; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 +; CHECK: [[C4:%[0-9]+]](s32) = G_CONSTANT i32 4 +; CHECK: [[CS:%[0-9]+]](s128) = G_SEQUENCE [[VEC]](<2 x s32>), 0, [[C3]](s32), 64, [[C4]](s32), 96 +; CHECK: [[EXT:%[0-9]+]](<2 x s32>) = G_EXTRACT [[CS]](s128), 0 +; CHECK: G_EXTRACT_VECTOR_ELT [[EXT]](<2 x s32>) + %vec = extractvalue %struct.v2s32.s32.s32 {<2 x i32>, i32 3, i32 4}, 0 + %elt = extractelement <2 x i32> %vec, i32 0 + ret i32 %elt +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel-divmod.ll b/test/CodeGen/ARM/GlobalISel/arm-isel-divmod.ll new file mode 100644 index 0000000000000000000000000000000000000000..c778caacd0f45b8b14f4c8925f466f2b10f7d612 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-isel-divmod.ll @@ -0,0 +1,89 @@ +; We use V6 ops so we can easily check for the extensions (sxth vs bit tricks). +; RUN: llc -mtriple arm-gnueabi -mattr=+v6,+hwdiv-arm -global-isel %s -o - | FileCheck %s -check-prefixes=CHECK,HWDIV +; RUN: llc -mtriple arm-gnueabi -mattr=+v6,-hwdiv-arm -global-isel %s -o - | FileCheck %s -check-prefixes=CHECK,SOFT-AEABI +; RUN: llc -mtriple arm-gnu -mattr=+v6,+hwdiv-arm -global-isel %s -o - | FileCheck %s -check-prefixes=CHECK,HWDIV +; RUN: llc -mtriple arm-gnu -mattr=+v6,-hwdiv-arm -global-isel %s -o - | FileCheck %s -check-prefixes=CHECK,SOFT-DEFAULT + +define arm_aapcscc i32 @test_sdiv_i32(i32 %a, i32 %b) { +; CHECK-LABEL: test_sdiv_i32: +; HWDIV: sdiv r0, r0, r1 +; SOFT-AEABI: blx __aeabi_idiv +; SOFT-DEFAULT: blx __divsi3 + %r = sdiv i32 %a, %b + ret i32 %r +} + +define arm_aapcscc i32 @test_udiv_i32(i32 %a, i32 %b) { +; CHECK-LABEL: test_udiv_i32: +; HWDIV: udiv r0, r0, r1 +; SOFT-AEABI: blx __aeabi_uidiv +; SOFT-DEFAULT: blx __udivsi3 + %r = udiv i32 %a, %b + ret i32 %r +} + +define arm_aapcscc i16 @test_sdiv_i16(i16 %a, i16 %b) { +; CHECK-LABEL: test_sdiv_i16: +; CHECK-DAG: sxth r0, r0 +; CHECK-DAG: sxth r1, r1 +; HWDIV: sdiv r0, r0, r1 +; SOFT-AEABI: blx __aeabi_idiv +; SOFT-DEFAULT: blx __divsi3 + %r = sdiv i16 %a, %b + ret i16 %r +} + +define arm_aapcscc i16 @test_udiv_i16(i16 %a, i16 %b) { +; CHECK-LABEL: test_udiv_i16: +; CHECK-DAG: uxth r0, r0 +; CHECK-DAG: uxth r1, r1 +; HWDIV: udiv r0, r0, r1 +; SOFT-AEABI: blx __aeabi_uidiv +; SOFT-DEFAULT: blx __udivsi3 + %r = udiv i16 %a, %b + ret i16 %r +} + +define arm_aapcscc i8 @test_sdiv_i8(i8 %a, i8 %b) { +; CHECK-LABEL: test_sdiv_i8: +; CHECK-DAG: sxtb r0, r0 +; CHECK-DAG: sxtb r1, r1 +; HWDIV: sdiv r0, r0, r1 +; SOFT-AEABI: blx __aeabi_idiv +; SOFT-DEFAULT: blx __divsi3 + %r = sdiv i8 %a, %b + ret i8 %r +} + +define arm_aapcscc i8 @test_udiv_i8(i8 %a, i8 %b) { +; CHECK-LABEL: test_udiv_i8: +; CHECK-DAG: uxtb r0, r0 +; CHECK-DAG: uxtb r1, r1 +; HWDIV: udiv r0, r0, r1 +; SOFT-AEABI: blx __aeabi_uidiv +; SOFT-DEFAULT: blx __udivsi3 + %r = udiv i8 %a, %b + ret i8 %r +} + +define arm_aapcscc i32 @test_srem_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_srem_i32: +; HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 +; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 +; HWDIV: sub r0, r0, [[P]] +; SOFT-AEABI: blx __aeabi_idivmod +; SOFT-DEFAULT: blx __modsi3 + %r = srem i32 %x, %y + ret i32 %r +} + +define arm_aapcscc i32 @test_urem_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_urem_i32: +; HWDIV: udiv [[Q:r[0-9]+]], r0, r1 +; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 +; HWDIV: sub r0, r0, [[P]] +; SOFT-AEABI: blx __aeabi_uidivmod +; SOFT-DEFAULT: blx __umodsi3 + %r = urem i32 %x, %y + ret i32 %r +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 236dcbeb84c524652e911262cd6b28d07d80d10f..6ddc29a3bbbae776c40804103788a67ac0f09ca2 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple arm-unknown -mattr=+vfp2 -global-isel %s -o - | FileCheck %s +; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v6 -global-isel %s -o - | FileCheck %s define void @test_void_return() { ; CHECK-LABEL: test_void_return: @@ -7,6 +7,14 @@ entry: ret void } +define i32 @test_constant_return_i32() { +; CHECK-LABEL: test_constant_return_i32: +; CHECK: mov r0, #42 +; CHECK: bx lr +entry: + ret i32 42 +} + define zeroext i1 @test_zext_i1(i1 %x) { ; CHECK-LABEL: test_zext_i1 ; CHECK: and r0, r0, #1 @@ -40,6 +48,30 @@ entry: ret i16 %x } +define void @test_trunc_i32_i16(i32 %v, i16 *%p) { +; CHECK-LABEL: test_trunc_i32_i16: +; The trunc doesn't result in any instructions, but we +; expect the store to be explicitly 16-bit. +; CHECK: strh r0, [r1] +; CHECK: bx lr +entry: + %v16 = trunc i32 %v to i16 + store i16 %v16, i16 *%p + ret void +} + +define void @test_trunc_i32_i8(i32 %v, i8 *%p) { +; CHECK-LABEL: test_trunc_i32_i8: +; The trunc doesn't result in any instructions, but we +; expect the store to be explicitly 8-bit. +; CHECK: strb r0, [r1] +; CHECK: bx lr +entry: + %v8 = trunc i32 %v to i8 + store i8 %v8, i8 *%p + ret void +} + define i8 @test_add_i8(i8 %x, i8 %y) { ; CHECK-LABEL: test_add_i8: ; CHECK: add r0, r0, r1 @@ -67,6 +99,141 @@ entry: ret i32 %sum } +define i8 @test_sub_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_sub_i8: +; CHECK: sub r0, r0, r1 +; CHECK: bx lr +entry: + %sum = sub i8 %x, %y + ret i8 %sum +} + +define i16 @test_sub_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_sub_i16: +; CHECK: sub r0, r0, r1 +; CHECK: bx lr +entry: + %sum = sub i16 %x, %y + ret i16 %sum +} + +define i32 @test_sub_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_sub_i32: +; CHECK: sub r0, r0, r1 +; CHECK: bx lr +entry: + %sum = sub i32 %x, %y + ret i32 %sum +} + +define i8 @test_mul_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_mul_i8: +; CHECK: mul r0, r0, r1 +; CHECK: bx lr +entry: + %sum = mul i8 %x, %y + ret i8 %sum +} + +define i16 @test_mul_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_mul_i16: +; CHECK: mul r0, r0, r1 +; CHECK: bx lr +entry: + %sum = mul i16 %x, %y + ret i16 %sum +} + +define i32 @test_mul_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_mul_i32: +; CHECK: mul r0, r0, r1 +; CHECK: bx lr +entry: + %sum = mul i32 %x, %y + ret i32 %sum +} + +define i8 @test_and_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_and_i8: +; CHECK: and r0, r0, r1 +; CHECK: bx lr +entry: + %sum = and i8 %x, %y + ret i8 %sum +} + +define i16 @test_and_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_and_i16: +; CHECK: and r0, r0, r1 +; CHECK: bx lr +entry: + %sum = and i16 %x, %y + ret i16 %sum +} + +define i32 @test_and_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_and_i32: +; CHECK: and r0, r0, r1 +; CHECK: bx lr +entry: + %sum = and i32 %x, %y + ret i32 %sum +} + +define i8 @test_or_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_or_i8: +; CHECK: orr r0, r0, r1 +; CHECK: bx lr +entry: + %sum = or i8 %x, %y + ret i8 %sum +} + +define i16 @test_or_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_or_i16: +; CHECK: orr r0, r0, r1 +; CHECK: bx lr +entry: + %sum = or i16 %x, %y + ret i16 %sum +} + +define i32 @test_or_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_or_i32: +; CHECK: orr r0, r0, r1 +; CHECK: bx lr +entry: + %sum = or i32 %x, %y + ret i32 %sum +} + +define i8 @test_xor_i8(i8 %x, i8 %y) { +; CHECK-LABEL: test_xor_i8: +; CHECK: eor r0, r0, r1 +; CHECK: bx lr +entry: + %sum = xor i8 %x, %y + ret i8 %sum +} + +define i16 @test_xor_i16(i16 %x, i16 %y) { +; CHECK-LABEL: test_xor_i16: +; CHECK: eor r0, r0, r1 +; CHECK: bx lr +entry: + %sum = xor i16 %x, %y + ret i16 %sum +} + +define i32 @test_xor_i32(i32 %x, i32 %y) { +; CHECK-LABEL: test_xor_i32: +; CHECK: eor r0, r0, r1 +; CHECK: bx lr +entry: + %sum = xor i32 %x, %y + ret i32 %sum +} + define i32 @test_stack_args_i32(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) { ; CHECK-LABEL: test_stack_args_i32: ; CHECK: add [[P5ADDR:r[0-9]+]], sp, #4 @@ -111,6 +278,17 @@ entry: ret i8 %sum } +define i8 @test_stack_args_noext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, i8 %p4) { +; CHECK-LABEL: test_stack_args_noext: +; CHECK: mov [[P4ADDR:r[0-9]+]], sp +; CHECK: ldrb [[P4:r[0-9]+]], {{.*}}[[P4ADDR]] +; CHECK: add r0, r2, [[P4]] +; CHECK: bx lr +entry: + %sum = add i8 %p2, %p4 + ret i8 %sum +} + define i32 @test_ptr_arg_in_reg(i32* %p) { ; CHECK-LABEL: test_ptr_arg_in_reg: ; CHECK: ldr r0, [r0] @@ -175,8 +353,7 @@ define arm_aapcscc double @test_double_softfp(double %f0, double %f1) { ; CHECK-DAG: vmov [[F0:d[0-9]+]], r0, r1 ; CHECK-DAG: vmov [[F1:d[0-9]+]], r2, r3 ; CHECK: vadd.f64 [[FV:d[0-9]+]], [[F0]], [[F1]] -; CHECK: vmov.32 r0, [[FV]][0] -; CHECK: vmov.32 r1, [[FV]][1] +; CHECK: vmov r0, r1, [[FV]] ; CHECK: bx lr entry: %v = fadd double %f0, %f1 diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir new file mode 100644 index 0000000000000000000000000000000000000000..c93e7fa0ec560dc3791e9440b767710038a3e6eb --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir @@ -0,0 +1,305 @@ +# RUN: llc -mtriple arm-linux-gnueabi -mattr=+hwdiv-arm -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,HWDIV +# RUN: llc -mtriple arm-linux-gnueabi -mattr=-hwdiv-arm -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,SOFT,SOFT-AEABI +# RUN: llc -mtriple arm-linux-gnu -mattr=+hwdiv-arm -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,HWDIV +# RUN: llc -mtriple arm-linux-gnu -mattr=-hwdiv-arm -global-isel -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,SOFT,SOFT-DEFAULT +--- | + define void @test_sdiv_i32() { ret void } + define void @test_udiv_i32() { ret void } + + define void @test_sdiv_i16() { ret void } + define void @test_udiv_i16() { ret void } + + define void @test_sdiv_i8() { ret void } + define void @test_udiv_i8() { ret void } + + define void @test_srem_i32() { ret void } + define void @test_urem_i32() { ret void } +... +--- +name: test_sdiv_i32 +# CHECK-LABEL: name: test_sdiv_i32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; HWDIV: [[R:%[0-9]+]](s32) = G_SDIV [[X]], [[Y]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-DAG: %r1 = COPY [[Y]] + ; SOFT-AEABI: BLX $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT-DEFAULT: BLX $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + %2(s32) = G_SDIV %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_udiv_i32 +# CHECK-LABEL: name: test_udiv_i32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; HWDIV: [[R:%[0-9]+]](s32) = G_UDIV [[X]], [[Y]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-DAG: %r1 = COPY [[Y]] + ; SOFT-AEABI: BLX $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT-DEFAULT: BLX $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + %2(s32) = G_UDIV %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_sdiv_i16 +# CHECK-LABEL: name: test_sdiv_i16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s16) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s16) = COPY %r1 + ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_SEXT [[X]](s16) + ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_SEXT [[Y]](s16) + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + ; HWDIV: [[R32:%[0-9]+]](s32) = G_SDIV [[X32]], [[Y32]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X32]] + ; SOFT-DAG: %r1 = COPY [[Y32]] + ; SOFT-AEABI: BLX $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-DEFAULT: BLX $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + ; CHECK: [[R:%[0-9]+]](s16) = G_TRUNC [[R32]] + %2(s16) = G_SDIV %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 +... +--- +name: test_udiv_i16 +# CHECK-LABEL: name: test_udiv_i16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s16) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s16) = COPY %r1 + ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_ZEXT [[X]](s16) + ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_ZEXT [[Y]](s16) + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + ; HWDIV: [[R32:%[0-9]+]](s32) = G_UDIV [[X32]], [[Y32]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X32]] + ; SOFT-DAG: %r1 = COPY [[Y32]] + ; SOFT-AEABI: BLX $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-DEFAULT: BLX $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + ; CHECK: [[R:%[0-9]+]](s16) = G_TRUNC [[R32]] + %2(s16) = G_UDIV %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 +... +--- +name: test_sdiv_i8 +# CHECK-LABEL: name: test_sdiv_i8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s8) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s8) = COPY %r1 + ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_SEXT [[X]](s8) + ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_SEXT [[Y]](s8) + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + ; HWDIV: [[R32:%[0-9]+]](s32) = G_SDIV [[X32]], [[Y32]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X32]] + ; SOFT-DAG: %r1 = COPY [[Y32]] + ; SOFT-AEABI: BLX $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-DEFAULT: BLX $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + ; CHECK: [[R:%[0-9]+]](s8) = G_TRUNC [[R32]] + %2(s8) = G_SDIV %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_udiv_i8 +# CHECK-LABEL: name: test_udiv_i8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s8) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s8) = COPY %r1 + ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_ZEXT [[X]](s8) + ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_ZEXT [[Y]](s8) + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + ; HWDIV: [[R32:%[0-9]+]](s32) = G_UDIV [[X32]], [[Y32]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X32]] + ; SOFT-DAG: %r1 = COPY [[Y32]] + ; SOFT-AEABI: BLX $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-DEFAULT: BLX $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + ; CHECK: [[R:%[0-9]+]](s8) = G_TRUNC [[R32]] + %2(s8) = G_UDIV %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_srem_i32 +# CHECK-LABEL: name: test_srem_i32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; HWDIV: [[Q:%[0-9]+]](s32) = G_SDIV [[X]], [[Y]] + ; HWDIV: [[P:%[0-9]+]](s32) = G_MUL [[Q]], [[Y]] + ; HWDIV: [[R:%[0-9]+]](s32) = G_SUB [[X]], [[P]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-DAG: %r1 = COPY [[Y]] + ; SOFT-AEABI: BLX $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r1 + ; SOFT-DEFAULT: BLX $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + %2(s32) = G_SREM %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_urem_i32 +# CHECK-LABEL: name: test_urem_i32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + ; HWDIV: [[Q:%[0-9]+]](s32) = G_UDIV [[X]], [[Y]] + ; HWDIV: [[P:%[0-9]+]](s32) = G_MUL [[Q]], [[Y]] + ; HWDIV: [[R:%[0-9]+]](s32) = G_SUB [[X]], [[P]] + ; SOFT: ADJCALLSTACKDOWN + ; SOFT-DAG: %r0 = COPY [[X]] + ; SOFT-DAG: %r1 = COPY [[Y]] + ; SOFT-AEABI: BLX $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r1 + ; SOFT-DEFAULT: BLX $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT: ADJCALLSTACKUP + %2(s32) = G_UREM %0, %1 + ; CHECK: %r0 = COPY [[R]] + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 +... diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir index d154b4887c195ea74f3ef59105a984f6988e0f44..803135ba595e4e409d2a2811ff503392e89c5b28 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -82,10 +82,10 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] - %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; CHECK: ADJCALLSTACKDOWN ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] @@ -97,8 +97,7 @@ body: | ; HARD: BLX $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP %6(s64) = G_FREM %4, %5 - %7(s32) = G_EXTRACT %6(s64), 0 - %8(s32) = G_EXTRACT %6(s64), 32 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) BX_RET 14, _, implicit %r0, implicit %r1 @@ -174,10 +173,10 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] - %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; CHECK: ADJCALLSTACKDOWN ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] @@ -189,8 +188,7 @@ body: | ; HARD: BLX $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP %6(s64) = G_FPOW %4, %5 - %7(s32) = G_EXTRACT %6(s64), 0 - %8(s32) = G_EXTRACT %6(s64), 32 + %7(s32), %8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) BX_RET 14, _, implicit %r0, implicit %r1 @@ -258,10 +256,10 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_SEQUENCE [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_SEQUENCE [[Y0]] - %4(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %5(s64) = G_SEQUENCE %2(s32), 0, %3(s32), 32 + ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; HARD: [[R:%[0-9]+]](s64) = G_FADD [[X]], [[Y]] ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] @@ -272,10 +270,8 @@ body: | ; SOFT-DEFAULT: BLX $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; SOFT: ADJCALLSTACKUP %6(s64) = G_FADD %4, %5 - ; HARD-DAG: G_EXTRACT [[R]](s64), 0 - ; HARD-DAG: G_EXTRACT [[R]](s64), 32 - %7(s32) = G_EXTRACT %6(s64), 0 - %8(s32) = G_EXTRACT %6(s64), 32 + ; HARD-DAG: G_UNMERGE_VALUES [[R]](s64) + %7(s32),%8(s32) = G_UNMERGE_VALUES %6(s64) %r0 = COPY %7(s32) %r1 = COPY %8(s32) BX_RET 14, _, implicit %r0, implicit %r1 diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index cbff7e12fb77c872bbb61c41720e8a7c622bbff0..c6f6ca81c27953efee6ddf491bc9c795b8dcf801 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -7,6 +7,26 @@ define void @test_add_s16() { ret void } define void @test_add_s32() { ret void } + define void @test_sub_s8() { ret void } + define void @test_sub_s16() { ret void } + define void @test_sub_s32() { ret void } + + define void @test_mul_s8() { ret void } + define void @test_mul_s16() { ret void } + define void @test_mul_s32() { ret void } + + define void @test_and_s8() { ret void } + define void @test_and_s16() { ret void } + define void @test_and_s32() { ret void } + + define void @test_or_s8() { ret void } + define void @test_or_s16() { ret void } + define void @test_or_s32() { ret void } + + define void @test_xor_s8() { ret void } + define void @test_xor_s16() { ret void } + define void @test_xor_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_legal_loads() #0 { ret void } define void @test_legal_stores() #0 { ret void } @@ -83,8 +103,9 @@ body: | %0(s8) = COPY %r0 %1(s8) = COPY %r1 %2(s8) = G_ADD %0, %1 - ; G_ADD with s8 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s8) = G_ADD {{%[0-9]+, %[0-9]+}} + ; G_ADD with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_ADD {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_ADD {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s8) BX_RET 14, _, implicit %r0 ... @@ -107,8 +128,9 @@ body: | %0(s16) = COPY %r0 %1(s16) = COPY %r1 %2(s16) = G_ADD %0, %1 - ; G_ADD with s16 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s16) = G_ADD {{%[0-9]+, %[0-9]+}} + ; G_ADD with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_ADD {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_ADD {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s16) BX_RET 14, _, implicit %r0 @@ -137,6 +159,386 @@ body: | %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 +... +--- +name: test_sub_s8 +# CHECK-LABEL: name: test_sub_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_SUB %0, %1 + ; G_SUB with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_SUB {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_sub_s16 +# CHECK-LABEL: name: test_sub_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_SUB %0, %1 + ; G_SUB with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_SUB {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sub_s32 +# CHECK-LABEL: name: test_sub_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_SUB %0, %1 + ; G_SUB with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s8 +# CHECK-LABEL: name: test_mul_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_MUL %0, %1 + ; G_MUL with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_MUL {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_mul_s16 +# CHECK-LABEL: name: test_mul_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_MUL %0, %1 + ; G_MUL with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_MUL {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_MUL %0, %1 + ; G_MUL with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_and_s8 +# CHECK-LABEL: name: test_and_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_AND %0, %1 + ; G_AND with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_AND {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_and_s16 +# CHECK-LABEL: name: test_and_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_AND %0, %1 + ; G_AND with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_AND {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_AND %0, %1 + ; G_AND with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_or_s8 +# CHECK-LABEL: name: test_or_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_OR %0, %1 + ; G_OR with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_OR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_or_s16 +# CHECK-LABEL: name: test_or_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_OR %0, %1 + ; G_OR with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_OR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_OR %0, %1 + ; G_OR with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_xor_s8 +# CHECK-LABEL: name: test_xor_s8 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s8) = G_XOR %0, %1 + ; G_XOR with s8 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s8) = G_XOR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s8) + BX_RET 14, _, implicit %r0 +... +--- +name: test_xor_s16 +# CHECK-LABEL: name: test_xor_s16 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s16) = G_XOR %0, %1 + ; G_XOR with s16 should widen + ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}(s16) = G_XOR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_XOR %0, %1 + ; G_XOR with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + ... --- name: test_load_from_stack @@ -155,7 +557,7 @@ fixedStack: - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } - { id: 2, offset: 8, size: 4, alignment: 4, isImmutable: true, isAliased: false } - # CHECK: id: [[FRAME_INDEX:[0-9]+]], offset: 8 + # CHECK: id: [[FRAME_INDEX:[0-9]+]], type: default, offset: 8 body: | bb.0: liveins: %r0, %r1, %r2, %r3 diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index fbf8d81322f8f3767f4aaf98389aeb3c578a2810..cc1df80c60191b581b81dcb2fcc1c3825ff43566 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -5,6 +5,21 @@ define void @test_add_s8() { ret void } define void @test_add_s1() { ret void } + define void @test_sub_s32() { ret void } + define void @test_sub_s16() { ret void } + define void @test_sub_s8() { ret void } + + define void @test_mul_s32() { ret void } + define void @test_mul_s16() { ret void } + define void @test_mul_s8() { ret void } + + define void @test_sdiv_s32() #1 { ret void } + define void @test_udiv_s32() #1 { ret void } + + define void @test_and_s32() { ret void} + define void @test_or_s32() { ret void} + define void @test_xor_s32() { ret void} + define void @test_loads() #0 { ret void } define void @test_stores() #0 { ret void } @@ -14,12 +29,18 @@ define void @test_constants() { ret void } + define void @test_anyext_s8_32() { ret void } + define void @test_anyext_s16_32() { ret void } + + define void @test_trunc_s32_16() { ret void } + define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } define void @test_soft_fp_s64() #0 { ret void } attributes #0 = { "target-features"="+vfp2"} + attributes #1 = { "target-features"="+hwdiv-arm" } ... --- name: test_add_s32 @@ -28,9 +49,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -54,22 +75,31 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 %0(s16) = COPY %r0 %1(s16) = COPY %r1 - %2(s16) = G_ADD %0, %1 - %r0 = COPY %2(s16) + %2(s32) = G_ANYEXT %0(s16) + %3(s32) = G_ANYEXT %1(s16) + %4(s32) = G_ADD %2, %3 + %5(s16) = G_TRUNC %4(s32) + %r0 = COPY %5(s16) BX_RET 14, _, implicit %r0 ... @@ -80,22 +110,31 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 %0(s8) = COPY %r0 %1(s8) = COPY %r1 - %2(s8) = G_ADD %0, %1 - %r0 = COPY %2(s8) + %2(s32) = G_ANYEXT %0(s8) + %3(s32) = G_ANYEXT %1(s8) + %4(s32) = G_ADD %2, %3 + %5(s8) = G_TRUNC %4(s32) + %r0 = COPY %5(s8) BX_RET 14, _, implicit %r0 ... @@ -106,22 +145,353 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 %0(s1) = COPY %r0 %1(s1) = COPY %r1 - %2(s1) = G_ADD %0, %1 - %r0 = COPY %2(s1) + %2(s32) = G_ANYEXT %0(s1) + %3(s32) = G_ANYEXT %1(s1) + %4(s32) = G_ADD %2, %3 + %5(s1) = G_TRUNC %4(s32) + %r0 = COPY %5(s1) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sub_s32 +# CHECK-LABEL: name: test_sub_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_SUB %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sub_s16 +# CHECK-LABEL: name: test_sub_s16 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s32) = G_ANYEXT %0(s16) + %3(s32) = G_ANYEXT %1(s16) + %4(s32) = G_SUB %2, %3 + %5(s16) = G_TRUNC %4(s32) + %r0 = COPY %5(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sub_s8 +# CHECK-LABEL: name: test_sub_s8 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s32) = G_ANYEXT %0(s8) + %3(s32) = G_ANYEXT %1(s8) + %4(s32) = G_SUB %2, %3 + %5(s8) = G_TRUNC %4(s32) + %r0 = COPY %5(s8) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_MUL %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s16 +# CHECK-LABEL: name: test_mul_s16 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s16) = COPY %r0 + %1(s16) = COPY %r1 + %2(s32) = G_ANYEXT %0(s16) + %3(s32) = G_ANYEXT %1(s16) + %4(s32) = G_MUL %2, %3 + %5(s16) = G_TRUNC %4(s32) + %r0 = COPY %5(s16) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_mul_s8 +# CHECK-LABEL: name: test_mul_s8 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s8) = COPY %r0 + %1(s8) = COPY %r1 + %2(s32) = G_ANYEXT %0(s8) + %3(s32) = G_ANYEXT %1(s8) + %4(s32) = G_MUL %2, %3 + %5(s8) = G_TRUNC %4(s32) + %r0 = COPY %5(s8) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_sdiv_s32 +# CHECK-LABEL: name: test_sdiv_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_SDIV %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_udiv_s32 +# CHECK-LABEL: name: test_udiv_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_UDIV %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_AND %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_OR %0, %1 + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_XOR %0, %1 + %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... @@ -132,13 +502,13 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } -# CHECK: - { id: 6, class: fprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } +# CHECK: - { id: 6, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -168,13 +538,13 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } -# CHECK: - { id: 5, class: gprb } -# CHECK: - { id: 6, class: fprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } +# CHECK: - { id: 5, class: gprb, preferred-register: '' } +# CHECK: - { id: 6, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -210,11 +580,11 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -243,9 +613,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -268,7 +638,7 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } body: | @@ -278,15 +648,78 @@ body: | BX_RET 14, _, implicit %r0 ... --- +name: test_anyext_s8_32 +# CHECK-LABEL: name: test_anyext_s8_32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(s8) = COPY %r0 + %1(s32) = G_ANYEXT %0(s8) + %r0 = COPY %1(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_anyext_s16_32 +# CHECK-LABEL: name: test_anyext_s16_32 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(s16) = COPY %r0 + %1(s32) = G_ANYEXT %0(s16) + %r0 = COPY %1(s32) + BX_RET 14, _, implicit %r0 +... +--- +name: test_trunc_s32_16 +# CHECK-LABEL: name: test_trunc_s32_16 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(s32) = COPY %r0 + %1(s16) = G_TRUNC %0(s32) + %r0 = COPY %1(s16) + BX_RET 14, _, implicit %r0 +... +--- name: test_fadd_s32 # CHECK-LABEL: name: test_fadd_s32 legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: fprb } -# CHECK: - { id: 1, class: fprb } -# CHECK: - { id: 2, class: fprb } +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -310,9 +743,9 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: fprb } -# CHECK: - { id: 1, class: fprb } -# CHECK: - { id: 2, class: fprb } +# CHECK: - { id: 0, class: fprb, preferred-register: '' } +# CHECK: - { id: 1, class: fprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -336,11 +769,11 @@ legalized: true regBankSelected: false selected: false # CHECK: registers: -# CHECK: - { id: 0, class: gprb } -# CHECK: - { id: 1, class: gprb } -# CHECK: - { id: 2, class: fprb } -# CHECK: - { id: 3, class: gprb } -# CHECK: - { id: 4, class: gprb } +# CHECK: - { id: 0, class: gprb, preferred-register: '' } +# CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: fprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -354,9 +787,8 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s64) = G_SEQUENCE %0(s32), 0, %1(s32), 32 - %3(s32) = G_EXTRACT %2(s64), 0 - %4(s32) = G_EXTRACT %2(s64), 32 + %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) + %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64) %r0 = COPY %3(s32) %r1 = COPY %4(s32) BX_RET 14, _, implicit %r0, implicit %r1 diff --git a/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll new file mode 100644 index 0000000000000000000000000000000000000000..f2f9c5d2a81dfe7809ce7e1bbac3a28de93b4099 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll @@ -0,0 +1,95 @@ +; RUN: llc -mtriple arm-unknown -verify-machineinstrs -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s + +; This file checks that we use the fallback path for things that are known to +; be unsupported on the ARM target. It should progressively shrink in size. + +define <4 x i32> @test_int_vectors(<4 x i32> %a, <4 x i32> %b) { +; CHECK: remark: {{.*}} unable to lower arguments: <4 x i32> (<4 x i32>, <4 x i32>)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_int_vectors + %res = add <4 x i32> %a, %b + ret <4 x i32> %res +} + +define <4 x float> @test_float_vectors(<4 x float> %a, <4 x float> %b) { +; CHECK: remark: {{.*}} unable to lower arguments: <4 x float> (<4 x float>, <4 x float>)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_float_vectors + %res = fadd <4 x float> %a, %b + ret <4 x float> %res +} + +define i64 @test_i64(i64 %a, i64 %b) { +; CHECK: remark: {{.*}} unable to lower arguments: i64 (i64, i64)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_i64 + %res = add i64 %a, %b + ret i64 %res +} + +define i128 @test_i128(i128 %a, i128 %b) { +; CHECK: remark: {{.*}} unable to lower arguments: i128 (i128, i128)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_i128 + %res = add i128 %a, %b + ret i128 %res +} + +define i17 @test_funny_ints(i17 %a, i17 %b) { +; CHECK: remark: {{.*}} unable to lower arguments: i17 (i17, i17)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_funny_ints + %res = add i17 %a, %b + ret i17 %res +} + +define half @test_half(half %a, half %b) { +; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_half + %res = fadd half %a, %b + ret half %res +} + +declare [16 x i32] @ret_demotion_target() + +define [16 x i32] @test_ret_demotion() { +; CHECK: remark: {{.*}} unable to translate instruction: call{{.*}} @ret_demotion_target +; CHECK-LABEL: warning: Instruction selection used fallback path for test_ret_demotion + %res = call [16 x i32] @ret_demotion_target() + ret [16 x i32] %res +} + +%large.struct = type { i32, i32, i32, i32, i32} ; Doesn't fit in R0-R3 + +declare %large.struct @large_struct_return_target() + +define %large.struct @test_large_struct_return() { +; CHECK: remark: {{.*}} unable to translate instruction: call{{.*}} @large_struct_return_target +; CHECK-LABEL: warning: Instruction selection used fallback path for test_large_struct_return + %r = call %large.struct @large_struct_return_target() + ret %large.struct %r +} + +%mixed.struct = type {i32*, float, i32} + +define %mixed.struct @test_mixed_struct(%mixed.struct %x) { +; CHECK: remark: {{.*}} unable to lower arguments: %mixed.struct (%mixed.struct)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_mixed_struct + ret %mixed.struct %x +} + +define void @test_vararg_definition(i32 %a, ...) { +; CHECK: remark: {{.*}} unable to lower arguments: void (i32, ...)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_vararg_definition + ret void +} + +define void @test_vararg_call(i32 %a) { +; CHECK: remark: {{.*}} unable to translate instruction: call +; CHECK-LABEL: warning: Instruction selection used fallback path for test_vararg_call + call void(i32, ...) @test_vararg_definition(i32 %a, i32 %a, i32 %a) + ret void +} + +define i32 @test_thumb(i32 %a) #0 { +; CHECK: remark: {{.*}} unable to lower arguments: i32 (i32)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_thumb + ret i32 %a +} + +attributes #0 = { "target-features"="+thumb-mode" } diff --git a/test/CodeGen/ARM/acle-intrinsics-v5.ll b/test/CodeGen/ARM/acle-intrinsics-v5.ll new file mode 100644 index 0000000000000000000000000000000000000000..407bea1488630800dde67a56cca53796f4ccb567 --- /dev/null +++ b/test/CodeGen/ARM/acle-intrinsics-v5.ll @@ -0,0 +1,110 @@ +; RUN: llc -O1 -mtriple=armv5te-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=armv7-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv6t2-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv7em-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv8m.main-none-none-eabi -mattr=+dsp %s -o - | FileCheck %s +define i32 @smulbb(i32 %a, i32 %b) { +; CHECK-LABEL: smulbb +; CHECK: smulbb r0, r0, r1 + %tmp = call i32 @llvm.arm.smulbb(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smulbt(i32 %a, i32 %b) { +; CHECK-LABEL: smulbt +; CHECK: smulbt r0, r0, r1 + %tmp = call i32 @llvm.arm.smulbt(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smultb(i32 %a, i32 %b) { +; CHECK-LABEL: smultb +; CHECK: smultb r0, r0, r1 + %tmp = call i32 @llvm.arm.smultb(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smultt(i32 %a, i32 %b) { +; CHECK-LABEL: smultt +; CHECK: smultt r0, r0, r1 + %tmp = call i32 @llvm.arm.smultt(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smulwb(i32 %a, i32 %b) { +; CHECK-LABEL: smulwb +; CHECK: smulwb r0, r0, r1 + %tmp = call i32 @llvm.arm.smulwb(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smulwt(i32 %a, i32 %b) { +; CHECK-LABEL: smulwt +; CHECK: smulwt r0, r0, r1 + %tmp = call i32 @llvm.arm.smulwt(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @acc_mults(i32 %a, i32 %b, i32 %acc) { +; CHECK-LABEL: acc_mults +; CHECK: smlabb r2, r0, r1, r2 +; CHECK: smlabt r2, r0, r1, r2 +; CHECK: smlatb r2, r0, r1, r2 +; CHECK: smlatt r2, r0, r1, r2 +; CHECK: smlawb r2, r0, r1, r2 +; CHECK: smlawt r0, r0, r1, r2 + %acc1 = call i32 @llvm.arm.smlabb(i32 %a, i32 %b, i32 %acc) + %acc2 = call i32 @llvm.arm.smlabt(i32 %a, i32 %b, i32 %acc1) + %acc3 = call i32 @llvm.arm.smlatb(i32 %a, i32 %b, i32 %acc2) + %acc4 = call i32 @llvm.arm.smlatt(i32 %a, i32 %b, i32 %acc3) + %acc5 = call i32 @llvm.arm.smlawb(i32 %a, i32 %b, i32 %acc4) + %acc6 = call i32 @llvm.arm.smlawt(i32 %a, i32 %b, i32 %acc5) + ret i32 %acc6 +} + +define i32 @qadd(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qadd +; CHECK: qadd r0, r0, r1 + %tmp = call i32 @llvm.arm.qadd(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsub(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsub +; CHECK: qsub r0, r0, r1 + %tmp = call i32 @llvm.arm.qsub(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qdadd(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qdadd +; CHECK: qdadd r0, r0, r1 + %dbl = call i32 @llvm.arm.qadd(i32 %a, i32 %a) + %add = call i32 @llvm.arm.qadd(i32 %dbl, i32 %b) + ret i32 %add +} + +define i32 @qdsub(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qdsub +; CHECK: qdsub r0, r0, r1 + %dbl = call i32 @llvm.arm.qadd(i32 %b, i32 %b) + %add = call i32 @llvm.arm.qsub(i32 %a, i32 %dbl) + ret i32 %add +} + +declare i32 @llvm.arm.smulbb(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smulbt(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smultb(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smultt(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smulwb(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smulwt(i32 %a, i32 %b) nounwind readnone +declare i32 @llvm.arm.smlabb(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlabt(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlatb(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlatt(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlawb(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlawt(i32, i32, i32) nounwind +declare i32 @llvm.arm.qadd(i32, i32) nounwind +declare i32 @llvm.arm.qsub(i32, i32) nounwind diff --git a/test/CodeGen/ARM/acle-intrinsics.ll b/test/CodeGen/ARM/acle-intrinsics.ll new file mode 100644 index 0000000000000000000000000000000000000000..0c20744e126bdc6ab08fa41469524aa4c2c943fe --- /dev/null +++ b/test/CodeGen/ARM/acle-intrinsics.ll @@ -0,0 +1,481 @@ +; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=armv7-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv6t2-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv7em-none-none-eabi %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple=thumbv8m.main-none-none-eabi -mattr=+dsp %s -o - | FileCheck %s + + +; upper-bound of the immediate argument +define i32 @ssat1(i32 %a) nounwind { +; CHECK-LABEL: ssat1 +; CHECK: ssat r0, #32, r0 + %tmp = call i32 @llvm.arm.ssat(i32 %a, i32 32) + ret i32 %tmp +} + +; lower-bound of the immediate argument +define i32 @ssat2(i32 %a) nounwind { +; CHECK-LABEL: ssat2 +; CHECK: ssat r0, #1, r0 + %tmp = call i32 @llvm.arm.ssat(i32 %a, i32 1) + ret i32 %tmp +} + +; upper-bound of the immediate argument +define i32 @usat1(i32 %a) nounwind { +; CHECK-LABEL: usat1 +; CHECK: usat r0, #31, r0 + %tmp = call i32 @llvm.arm.usat(i32 %a, i32 31) + ret i32 %tmp +} + +; lower-bound of the immediate argument +define i32 @usat2(i32 %a) nounwind { +; CHECK-LABEL: usat2 +; CHECK: usat r0, #0, r0 + %tmp = call i32 @llvm.arm.usat(i32 %a, i32 0) + ret i32 %tmp +} + +define i32 @ssat16 (i32 %a) nounwind { +; CHECK-LABEL: ssat16 +; CHECK: ssat16 r0, #1, r0 +; CHECK: ssat16 r0, #16, r0 + %tmp = call i32 @llvm.arm.ssat16(i32 %a, i32 1) + %tmp2 = call i32 @llvm.arm.ssat16(i32 %tmp, i32 16) + ret i32 %tmp2 +} + +define i32 @usat16(i32 %a) nounwind { +; CHECK-LABEL: usat16 +; CHECK: usat16 r0, #0, r0 +; CHECK: usat16 r0, #15, r0 + %tmp = call i32 @llvm.arm.usat16(i32 %a, i32 0) + %tmp2 = call i32 @llvm.arm.usat16(i32 %tmp, i32 15) + ret i32 %tmp2 +} + +define i32 @pack_unpack(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: pack_unpack +; CHECK: sxtab16 r0, r0, r1 +; CHECK: sxtb16 r0, r0 +; CHECK: uxtab16 r0, r1, r0 +; CHECK: uxtb16 r0, r0 + %tmp = call i32 @llvm.arm.sxtab16(i32 %a, i32 %b) + %tmp1 = call i32 @llvm.arm.sxtb16(i32 %tmp) + %tmp2 = call i32 @llvm.arm.uxtab16(i32 %b, i32 %tmp1) + %tmp3 = call i32 @llvm.arm.uxtb16(i32 %tmp2) + ret i32 %tmp3 +} + +define i32 @sel(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sel +; CHECK sel r0, r0, r1 + %tmp = call i32 @llvm.arm.sel(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qadd8 +; CHECK: qadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.qadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsub8 +; CHECK: qsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.qsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @sadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sadd8 +; CHECK: sadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.sadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shadd8 +; CHECK: shadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.shadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shsub8 +; CHECK: shsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.shsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @ssub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: ssub8 +; CHECK: ssub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.ssub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uadd8 +; CHECK: uadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhadd8 +; CHECK: uhadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhsub8 +; CHECK: uhsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqadd8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqadd8 +; CHECK: uqadd8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqadd8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqsub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqsub8 +; CHECK: uqsub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqsub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usub8(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: usub8 +; CHECK: usub8 r0, r0, r1 + %tmp = call i32 @llvm.arm.usub8(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usad(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: usad +; CHECK: usad8 r0, r0, r1 +; CHECK: usada8 r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.usad8(i32 %a, i32 %b) + %tmp1 = call i32 @llvm.arm.usada8(i32 %tmp, i32 %b, i32 %c) + ret i32 %tmp1 +} + +define i32 @qadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qadd16 +; CHECK: qadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.qadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qasx +; CHECK: qasx r0, r0, r1 + %tmp = call i32 @llvm.arm.qasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsax +; CHECK: qsax r0, r0, r1 + %tmp = call i32 @llvm.arm.qsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @qsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: qsub16 +; CHECK: qsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.qsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @sadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sadd16 +; CHECK: sadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.sadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @sasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: sasx +; CHECK: sasx r0, r0, r1 + %tmp = call i32 @llvm.arm.sasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shadd16 +; CHECK: shadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.shadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shasx +; CHECK: shasx r0, r0, r1 + %tmp = call i32 @llvm.arm.shasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shsax +; CHECK: shsax r0, r0, r1 + %tmp = call i32 @llvm.arm.shsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @shsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: shsub16 +; CHECK: shsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.shsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @ssax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: ssax +; CHECK: ssax r0, r0, r1 + %tmp = call i32 @llvm.arm.ssax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @ssub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: ssub16 +; CHECK: ssub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.ssub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uadd16 +; CHECK: uadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uasx +; CHECK: uasx r0, r0, r1 + %tmp = call i32 @llvm.arm.uasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhadd16 +; CHECK: uhadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhasx +; CHECK: uhasx r0, r0, r1 + %tmp = call i32 @llvm.arm.uhasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhsax +; CHECK: uhsax r0, r0, r1 + %tmp = call i32 @llvm.arm.uhsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uhsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uhsub16 +; CHECK: uhsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uhsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqadd16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqadd16 +; CHECK: uqadd16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqadd16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqasx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqasx +; CHECK: uqasx r0, r0, r1 + %tmp = call i32 @llvm.arm.uqasx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqsax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqsax +; CHECK: uqsax r0, r0, r1 + %tmp = call i32 @llvm.arm.uqsax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @uqsub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: uqsub16 +; CHECK: uqsub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.uqsub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usax(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: usax +; CHECK: usax r0, r0, r1 + %tmp = call i32 @llvm.arm.usax(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @usub16(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: usub16 +; CHECK: usub16 r0, r0, r1 + %tmp = call i32 @llvm.arm.usub16(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smlad(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smlad +; CHECK: smlad r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smlad(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i32 @smladx(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smladx +; CHECK: smladx r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smladx(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i64 @smlald(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlald +; CHECK: smlald r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlald(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i64 @smlaldx(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlaldx +; CHECK: smlaldx r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlaldx(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i32 @smlsd(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smlsd +; CHECK: smlsd r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smlsd(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i32 @smlsdx(i32 %a, i32 %b, i32 %c) nounwind { +; CHECK-LABEL: smlsdx +; CHECK: smlsdx r0, r0, r1, r2 + %tmp = call i32 @llvm.arm.smlsdx(i32 %a, i32 %b, i32 %c) + ret i32 %tmp +} + +define i64 @smlsld(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlsld +; CHECK: smlsld r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlsld(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i64 @smlsldx(i32 %a, i32 %b, i64 %c) nounwind { +; CHECK-LABEL: smlsldx +; CHECK: smlsldx r2, r3, r0, r1 + %tmp = call i64 @llvm.arm.smlsldx(i32 %a, i32 %b, i64 %c) + ret i64 %tmp +} + +define i32 @smuad(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: smuad +; CHECK: smuad r0, r0, r1 + %tmp = call i32 @llvm.arm.smuad(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smuadx(i32 %a, i32 %b) nounwind { +;CHECK-LABEL: smuadx +; CHECK: smuadx r0, r0, r1 + %tmp = call i32 @llvm.arm.smuadx(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smusd(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: smusd +; CHECK: smusd r0, r0, r1 + %tmp = call i32 @llvm.arm.smusd(i32 %a, i32 %b) + ret i32 %tmp +} + +define i32 @smusdx(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: smusdx +; CHECK: smusdx r0, r0, r1 + %tmp = call i32 @llvm.arm.smusdx(i32 %a, i32 %b) + ret i32 %tmp +} +declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone +declare i32 @llvm.arm.usat(i32, i32) nounwind readnone +declare i32 @llvm.arm.ssat16(i32, i32) nounwind +declare i32 @llvm.arm.usat16(i32, i32) nounwind +declare i32 @llvm.arm.sxtab16(i32, i32) +declare i32 @llvm.arm.sxtb16(i32) +declare i32 @llvm.arm.uxtab16(i32, i32) +declare i32 @llvm.arm.uxtb16(i32) +declare i32 @llvm.arm.sel(i32, i32) nounwind +declare i32 @llvm.arm.qadd8(i32, i32) nounwind +declare i32 @llvm.arm.qsub8(i32, i32) nounwind +declare i32 @llvm.arm.sadd8(i32, i32) nounwind +declare i32 @llvm.arm.shadd8(i32, i32) nounwind +declare i32 @llvm.arm.shsub8(i32, i32) nounwind +declare i32 @llvm.arm.ssub8(i32, i32) nounwind +declare i32 @llvm.arm.uadd8(i32, i32) nounwind +declare i32 @llvm.arm.uhadd8(i32, i32) nounwind +declare i32 @llvm.arm.uhsub8(i32, i32) nounwind +declare i32 @llvm.arm.uqadd8(i32, i32) nounwind +declare i32 @llvm.arm.uqsub8(i32, i32) nounwind +declare i32 @llvm.arm.usub8(i32, i32) nounwind +declare i32 @llvm.arm.usad8(i32, i32) nounwind readnone +declare i32 @llvm.arm.usada8(i32, i32, i32) nounwind readnone +declare i32 @llvm.arm.qadd16(i32, i32) nounwind +declare i32 @llvm.arm.qasx(i32, i32) nounwind +declare i32 @llvm.arm.qsax(i32, i32) nounwind +declare i32 @llvm.arm.qsub16(i32, i32) nounwind +declare i32 @llvm.arm.sadd16(i32, i32) nounwind +declare i32 @llvm.arm.sasx(i32, i32) nounwind +declare i32 @llvm.arm.shadd16(i32, i32) nounwind +declare i32 @llvm.arm.shasx(i32, i32) nounwind +declare i32 @llvm.arm.shsax(i32, i32) nounwind +declare i32 @llvm.arm.shsub16(i32, i32) nounwind +declare i32 @llvm.arm.ssax(i32, i32) nounwind +declare i32 @llvm.arm.ssub16(i32, i32) nounwind +declare i32 @llvm.arm.uadd16(i32, i32) nounwind +declare i32 @llvm.arm.uasx(i32, i32) nounwind +declare i32 @llvm.arm.usax(i32, i32) nounwind +declare i32 @llvm.arm.uhadd16(i32, i32) nounwind +declare i32 @llvm.arm.uhasx(i32, i32) nounwind +declare i32 @llvm.arm.uhsax(i32, i32) nounwind +declare i32 @llvm.arm.uhsub16(i32, i32) nounwind +declare i32 @llvm.arm.uqadd16(i32, i32) nounwind +declare i32 @llvm.arm.uqasx(i32, i32) nounwind +declare i32 @llvm.arm.uqsax(i32, i32) nounwind +declare i32 @llvm.arm.uqsub16(i32, i32) nounwind +declare i32 @llvm.arm.usub16(i32, i32) nounwind +declare i32 @llvm.arm.smlad(i32, i32, i32) nounwind +declare i32 @llvm.arm.smladx(i32, i32, i32) nounwind +declare i64 @llvm.arm.smlald(i32, i32, i64) nounwind +declare i64 @llvm.arm.smlaldx(i32, i32, i64) nounwind +declare i32 @llvm.arm.smlsd(i32, i32, i32) nounwind +declare i32 @llvm.arm.smlsdx(i32, i32, i32) nounwind +declare i64 @llvm.arm.smlsld(i32, i32, i64) nounwind +declare i64 @llvm.arm.smlsldx(i32, i32, i64) nounwind +declare i32 @llvm.arm.smuad(i32, i32) nounwind +declare i32 @llvm.arm.smuadx(i32, i32) nounwind +declare i32 @llvm.arm.smusd(i32, i32) nounwind +declare i32 @llvm.arm.smusdx(i32, i32) nounwind diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll index 0e077b3aee5a10870818ea75026e7f2d711e1829..64c279b0f218793622cb83ff9d6f72cc379d1618 100644 --- a/test/CodeGen/ARM/alloc-no-stack-realign.ll +++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll @@ -7,31 +7,32 @@ define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" { entry: -; CHECK-LABEL: test1 -; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] -; CHECK: add r[[R2:[0-9]+]], r1, #48 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: mov r[[R1:[0-9]+]], sp -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #48 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] +; CHECK-LABEL: test1: +; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], #32 +; CHECK: mov r[[R2:[0-9]+]], sp +; CHECK: mov r[[R3:[0-9]+]], r[[R2]] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]] +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval @@ -42,30 +43,32 @@ entry: define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp { entry: -; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: mov r[[R1:[0-9]+]], sp -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #48 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] +; CHECK-LABEL: test2: +; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], #32 +; CHECK: mov r[[R2:[0-9]+]], sp +; CHECK: mov r[[R3:[0-9]+]], r[[R2]] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]] +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 diff --git a/test/CodeGen/ARM/alloca-align.ll b/test/CodeGen/ARM/alloca-align.ll new file mode 100644 index 0000000000000000000000000000000000000000..6186d137ef7fdd64421a03db1b8cffa603b70fc0 --- /dev/null +++ b/test/CodeGen/ARM/alloca-align.ll @@ -0,0 +1,24 @@ +; RUN: llc -o - %s | FileCheck %s +target triple="arm--" + +@glob = external global i32* + +declare void @bar(i32*, [20000 x i8]* byval) + +; CHECK-LABEL: foo: +; We should see the stack getting additional alignment +; CHECK: sub sp, sp, #16 +; CHECK: bic sp, sp, #31 +; And a base pointer getting used. +; CHECK: mov r6, sp +; Which is passed to the call +; CHECK: add [[REG:r[0-9]+|lr]], r6, #19456 +; CHECK: add r0, [[REG]], #536 +; CHECK: bl bar +define void @foo([20000 x i8]* %addr) { + %tmp = alloca [4 x i32], align 32 + %tmp0 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 + call void @bar(i32* %tmp0, [20000 x i8]* byval %addr) + ret void +} + diff --git a/test/CodeGen/ARM/arm-shrink-wrapping.ll b/test/CodeGen/ARM/arm-shrink-wrapping.ll index 9cce1941704731739a91ef1e6d48499a649050e5..1985ff9b4a27c537081dd7a4b88a795e55950cfe 100644 --- a/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -656,6 +656,9 @@ declare double @llvm.pow.f64(double, double) ; ; DISABLE: pop ; +; FIXME: This is flakey passing by finding 'bl' somewhere amongst the debug +; info (like labels named 'line_table) not because it's found a bl instruction. +; ; CHECK: bl define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) "no-frame-pointer-elim"="true" { bb: @@ -681,7 +684,9 @@ bb13: ; preds = %bb3, %bb !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3} -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "LLVM", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "LLVM", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !4, globals: !2, imports: !2) !1 = !DIFile(filename: "a.cpp", directory: "b") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!5} +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/test/CodeGen/ARM/bool-ext-inc.ll b/test/CodeGen/ARM/bool-ext-inc.ll index fe43f1b2ef93dc13333c5206665bef348e8e9612..5f2ba8b109a7627702777500e0952937501b93c8 100644 --- a/test/CodeGen/ARM/bool-ext-inc.ll +++ b/test/CodeGen/ARM/bool-ext-inc.ll @@ -4,7 +4,7 @@ define i32 @sext_inc(i1 zeroext %x) { ; CHECK-LABEL: sext_inc: ; CHECK: @ BB#0: -; CHECK-NEXT: rsb r0, r0, #1 +; CHECK-NEXT: eor r0, r0, #1 ; CHECK-NEXT: mov pc, lr %ext = sext i1 %x to i32 %add = add i32 %ext, 1 @@ -14,14 +14,12 @@ define i32 @sext_inc(i1 zeroext %x) { define <4 x i32> @sext_inc_vec(<4 x i1> %x) { ; CHECK-LABEL: sext_inc_vec: ; CHECK: @ BB#0: -; CHECK-NEXT: vmov d16, r0, r1 -; CHECK-NEXT: vmov.i32 q9, #0x1f -; CHECK-NEXT: vmov.i32 q10, #0x1 +; CHECK-NEXT: vmov.i16 d16, #0x1 +; CHECK-NEXT: vmov d17, r0, r1 +; CHECK-NEXT: vmov.i32 q9, #0x1 +; CHECK-NEXT: veor d16, d17, d16 ; CHECK-NEXT: vmovl.u16 q8, d16 -; CHECK-NEXT: vneg.s32 q9, q9 -; CHECK-NEXT: vshl.i32 q8, q8, #31 -; CHECK-NEXT: vshl.s32 q8, q8, q9 -; CHECK-NEXT: vadd.i32 q8, q8, q10 +; CHECK-NEXT: vand q8, q8, q9 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: mov pc, lr @@ -30,3 +28,41 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) { ret <4 x i32> %add } +define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmpgt_sext_inc_vec: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r12, sp +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: vmov.i32 q10, #0x1 +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vcge.s32 q8, q8, q9 +; CHECK-NEXT: vand q8, q8, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %cmp = icmp sgt <4 x i32> %x, %y + %ext = sext <4 x i1> %cmp to <4 x i32> + %add = add <4 x i32> %ext, + ret <4 x i32> %add +} + +define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmpne_sext_inc_vec: +; CHECK: @ BB#0: +; CHECK-NEXT: mov r12, sp +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: vmov.i32 q10, #0x1 +; CHECK-NEXT: vld1.64 {d16, d17}, [r12] +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vceq.i32 q8, q9, q8 +; CHECK-NEXT: vand q8, q8, q10 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: mov pc, lr + %cmp = icmp ne <4 x i32> %x, %y + %ext = sext <4 x i1> %cmp to <4 x i32> + %add = add <4 x i32> %ext, + ret <4 x i32> %add +} + diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index fc85a3a2e6834d794d2840ac66786b45ec478163..bef7bbe01bff5e31a1407dbe0653a06dcbadd19a 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -199,7 +199,8 @@ ; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN ; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; ARMv6k -; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s 2> %t | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: FileCheck %s < %t --allow-empty --check-prefix=CPU-SUPPORTED ; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN ; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; ARMv6m @@ -222,6 +223,8 @@ ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=thumbv8-none-none-eabi -mcpu=cortex-m33 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN +; CPU-SUPPORTED-NOT: is not a recognized processor for this target + ; XSCALE: .eabi_attribute 6, 5 ; XSCALE: .eabi_attribute 8, 1 ; XSCALE: .eabi_attribute 9, 1 @@ -231,6 +234,11 @@ ; V6: .eabi_attribute 6, 6 ; V6: .eabi_attribute 8, 1 ;; We assume round-to-nearest by default (matches GCC) +; V6-NOT: .eabi_attribute 27 +; V6-NOT: .eabi_attribute 36 +; V6-NOT: .eabi_attribute 42 +; V6-NOT: .eabi_attribute 44 +; V6-NOT: .eabi_attribute 68 ; V6-NOT: .eabi_attribute 19 ;; The default choice made by llc is for a V6 CPU without an FPU. ;; This is not an interesting detail, but for such CPUs, the default intention is to use @@ -242,13 +250,8 @@ ; V6: .eabi_attribute 23, 3 ; V6: .eabi_attribute 24, 1 ; V6: .eabi_attribute 25, 1 -; V6-NOT: .eabi_attribute 27 ; V6-NOT: .eabi_attribute 28 -; V6-NOT: .eabi_attribute 36 ; V6: .eabi_attribute 38, 1 -; V6-NOT: .eabi_attribute 42 -; V6-NOT: .eabi_attribute 44 -; V6-NOT: .eabi_attribute 68 ; V6-FAST-NOT: .eabi_attribute 19 ;; Despite the V6 CPU having no FPU by default, we chose to flush to @@ -262,9 +265,14 @@ ;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for ;; V6-M, however we don't model the OS extension so this is fine. ; V6M: .eabi_attribute 6, 12 -; V6M-NOT: .eabi_attribute 7 +; V6M: .eabi_attribute 7, 77 ; V6M: .eabi_attribute 8, 0 ; V6M: .eabi_attribute 9, 1 +; V6M-NOT: .eabi_attribute 27 +; V6M-NOT: .eabi_attribute 36 +; V6M-NOT: .eabi_attribute 42 +; V6M-NOT: .eabi_attribute 44 +; V6M-NOT: .eabi_attribute 68 ; V6M-NOT: .eabi_attribute 19 ;; The default choice made by llc is for a V6M CPU without an FPU. ;; This is not an interesting detail, but for such CPUs, the default intention is to use @@ -276,13 +284,8 @@ ; V6M: .eabi_attribute 23, 3 ; V6M: .eabi_attribute 24, 1 ; V6M: .eabi_attribute 25, 1 -; V6M-NOT: .eabi_attribute 27 ; V6M-NOT: .eabi_attribute 28 -; V6M-NOT: .eabi_attribute 36 ; V6M: .eabi_attribute 38, 1 -; V6M-NOT: .eabi_attribute 42 -; V6M-NOT: .eabi_attribute 44 -; V6M-NOT: .eabi_attribute 68 ; V6M-FAST-NOT: .eabi_attribute 19 ;; Despite the V6M CPU having no FPU by default, we chose to flush to @@ -298,6 +301,11 @@ ; ARM1156T2F-S: .eabi_attribute 8, 1 ; ARM1156T2F-S: .eabi_attribute 9, 2 ; ARM1156T2F-S: .fpu vfpv2 +; ARM1156T2F-S-NOT: .eabi_attribute 27 +; ARM1156T2F-S-NOT: .eabi_attribute 36 +; ARM1156T2F-S-NOT: .eabi_attribute 42 +; ARM1156T2F-S-NOT: .eabi_attribute 44 +; ARM1156T2F-S-NOT: .eabi_attribute 68 ; ARM1156T2F-S-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; ARM1156T2F-S: .eabi_attribute 20, 1 @@ -306,13 +314,8 @@ ; ARM1156T2F-S: .eabi_attribute 23, 3 ; ARM1156T2F-S: .eabi_attribute 24, 1 ; ARM1156T2F-S: .eabi_attribute 25, 1 -; ARM1156T2F-S-NOT: .eabi_attribute 27 ; ARM1156T2F-S-NOT: .eabi_attribute 28 -; ARM1156T2F-S-NOT: .eabi_attribute 36 ; ARM1156T2F-S: .eabi_attribute 38, 1 -; ARM1156T2F-S-NOT: .eabi_attribute 42 -; ARM1156T2F-S-NOT: .eabi_attribute 44 -; ARM1156T2F-S-NOT: .eabi_attribute 68 ; ARM1156T2F-S-FAST-NOT: .eabi_attribute 19 ;; V6 cores default to flush to positive zero (value 0). Note that value 2 is also equally @@ -327,6 +330,11 @@ ; V7M: .eabi_attribute 7, 77 ; V7M: .eabi_attribute 8, 0 ; V7M: .eabi_attribute 9, 2 +; V7M-NOT: .eabi_attribute 27 +; V7M-NOT: .eabi_attribute 36 +; V7M-NOT: .eabi_attribute 42 +; V7M-NOT: .eabi_attribute 44 +; V7M-NOT: .eabi_attribute 68 ; V7M-NOT: .eabi_attribute 19 ;; The default choice made by llc is for a V7M CPU without an FPU. ;; This is not an interesting detail, but for such CPUs, the default intention is to use @@ -338,13 +346,8 @@ ; V7M: .eabi_attribute 23, 3 ; V7M: .eabi_attribute 24, 1 ; V7M: .eabi_attribute 25, 1 -; V7M-NOT: .eabi_attribute 27 ; V7M-NOT: .eabi_attribute 28 -; V7M-NOT: .eabi_attribute 36 ; V7M: .eabi_attribute 38, 1 -; V7M-NOT: .eabi_attribute 42 -; V7M-NOT: .eabi_attribute 44 -; V7M-NOT: .eabi_attribute 68 ; V7M-FAST-NOT: .eabi_attribute 19 ;; Despite the V7M CPU having no FPU by default, we chose to flush @@ -357,6 +360,11 @@ ; V7: .syntax unified ; V7: .eabi_attribute 6, 10 +; V7-NOT: .eabi_attribute 27 +; V7-NOT: .eabi_attribute 36 +; V7-NOT: .eabi_attribute 42 +; V7-NOT: .eabi_attribute 44 +; V7-NOT: .eabi_attribute 68 ; V7-NOT: .eabi_attribute 19 ;; In safe-maths mode we default to an IEEE 754 compliant choice. ; V7: .eabi_attribute 20, 1 @@ -365,13 +373,8 @@ ; V7: .eabi_attribute 23, 3 ; V7: .eabi_attribute 24, 1 ; V7: .eabi_attribute 25, 1 -; V7-NOT: .eabi_attribute 27 ; V7-NOT: .eabi_attribute 28 -; V7-NOT: .eabi_attribute 36 ; V7: .eabi_attribute 38, 1 -; V7-NOT: .eabi_attribute 42 -; V7-NOT: .eabi_attribute 44 -; V7-NOT: .eabi_attribute 68 ; V7-FAST-NOT: .eabi_attribute 19 ;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes @@ -386,6 +389,9 @@ ; V7VE: .eabi_attribute 7, 65 @ Tag_CPU_arch_profile ; V7VE: .eabi_attribute 8, 1 @ Tag_ARM_ISA_use ; V7VE: .eabi_attribute 9, 2 @ Tag_THUMB_ISA_use +; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use +; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use +; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use ; V7VE: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use ; V7VE: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal ; V7VE: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions @@ -393,19 +399,16 @@ ; V7VE: .eabi_attribute 24, 1 @ Tag_ABI_align_needed ; V7VE: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved ; V7VE: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format -; V7VE: .eabi_attribute 42, 1 @ Tag_MPextension_use -; V7VE: .eabi_attribute 44, 2 @ Tag_DIV_use -; V7VE: .eabi_attribute 68, 3 @ Tag_Virtualization_use ; V8: .syntax unified ; V8: .eabi_attribute 67, "2.09" ; V8: .eabi_attribute 6, 14 +; V8-NOT: .eabi_attribute 44 ; V8-NOT: .eabi_attribute 19 ; V8: .eabi_attribute 20, 1 ; V8: .eabi_attribute 21, 1 ; V8-NOT: .eabi_attribute 22 ; V8: .eabi_attribute 23, 3 -; V8-NOT: .eabi_attribute 44 ; V8-FAST-NOT: .eabi_attribute 19 ;; The default does have an FPU, and for V8-A, it flushes preserving sign. @@ -496,6 +499,30 @@ ; CORTEX-A7-FPUV4: .fpu vfpv4 ; CORTEX-A7-CHECK-NOT: .eabi_attribute 19 + +; Tag_FP_HP_extension +; CORTEX-A7-CHECK: .eabi_attribute 36, 1 +; CORTEX-A7-NOFPU-NOT: .eabi_attribute 36 +; CORTEX-A7-FPUV4: .eabi_attribute 36, 1 + +; Tag_MPextension_use +; CORTEX-A7-CHECK: .eabi_attribute 42, 1 +; CORTEX-A7-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A7-FPUV4: .eabi_attribute 42, 1 + +; Tag_DIV_use +; CORTEX-A7-CHECK: .eabi_attribute 44, 2 +; CORTEX-A7-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A7-FPUV4: .eabi_attribute 44, 2 + +; Tag_DSP_extension +; CORTEX-A7-CHECK-NOT: .eabi_attribute 46 + +; Tag_Virtualization_use +; CORTEX-A7-CHECK: .eabi_attribute 68, 3 +; CORTEX-A7-NOFPU: .eabi_attribute 68, 3 +; CORTEX-A7-FPUV4: .eabi_attribute 68, 3 + ; Tag_ABI_FP_denormal ;; We default to IEEE 754 compliance ; CORTEX-A7-CHECK: .eabi_attribute 20, 1 @@ -535,40 +562,20 @@ ; CORTEX-A7-NOFPU: .eabi_attribute 25, 1 ; CORTEX-A7-FPUV4: .eabi_attribute 25, 1 -; Tag_FP_HP_extension -; CORTEX-A7-CHECK: .eabi_attribute 36, 1 -; CORTEX-A7-NOFPU-NOT: .eabi_attribute 36 -; CORTEX-A7-FPUV4: .eabi_attribute 36, 1 - ; Tag_FP_16bit_format ; CORTEX-A7-CHECK: .eabi_attribute 38, 1 ; CORTEX-A7-NOFPU: .eabi_attribute 38, 1 ; CORTEX-A7-FPUV4: .eabi_attribute 38, 1 -; Tag_MPextension_use -; CORTEX-A7-CHECK: .eabi_attribute 42, 1 -; CORTEX-A7-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A7-FPUV4: .eabi_attribute 42, 1 - -; Tag_DIV_use -; CORTEX-A7-CHECK: .eabi_attribute 44, 2 -; CORTEX-A7-NOFPU: .eabi_attribute 44, 2 -; CORTEX-A7-FPUV4: .eabi_attribute 44, 2 - -; Tag_DSP_extension -; CORTEX-A7-CHECK-NOT: .eabi_attribute 46 - -; Tag_Virtualization_use -; CORTEX-A7-CHECK: .eabi_attribute 68, 3 -; CORTEX-A7-NOFPU: .eabi_attribute 68, 3 -; CORTEX-A7-FPUV4: .eabi_attribute 68, 3 - ; CORTEX-A5-DEFAULT: .cpu cortex-a5 ; CORTEX-A5-DEFAULT: .eabi_attribute 6, 10 ; CORTEX-A5-DEFAULT: .eabi_attribute 7, 65 ; CORTEX-A5-DEFAULT: .eabi_attribute 8, 1 ; CORTEX-A5-DEFAULT: .eabi_attribute 9, 2 ; CORTEX-A5-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A5-DEFAULT-NOT: .eabi_attribute 44 +; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1 ; CORTEX-A5-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A5-DEFAULT: .eabi_attribute 20, 1 @@ -577,9 +584,6 @@ ; CORTEX-A5-DEFAULT: .eabi_attribute 23, 3 ; CORTEX-A5-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A5-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A5-DEFAULT: .eabi_attribute 42, 1 -; CORTEX-A5-DEFAULT-NOT: .eabi_attribute 44 -; CORTEX-A5-DEFAULT: .eabi_attribute 68, 1 ; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 19 ;; The A5 defaults to a VFPv4 FPU, so it flushed preserving the sign when -ffast-math @@ -595,6 +599,8 @@ ; CORTEX-A5-NONEON: .eabi_attribute 8, 1 ; CORTEX-A5-NONEON: .eabi_attribute 9, 2 ; CORTEX-A5-NONEON: .fpu vfpv4-d16 +; CORTEX-A5-NONEON: .eabi_attribute 42, 1 +; CORTEX-A5-NONEON: .eabi_attribute 68, 1 ;; We default to IEEE 754 compliance ; CORTEX-A5-NONEON: .eabi_attribute 20, 1 ; CORTEX-A5-NONEON: .eabi_attribute 21, 1 @@ -602,8 +608,6 @@ ; CORTEX-A5-NONEON: .eabi_attribute 23, 3 ; CORTEX-A5-NONEON: .eabi_attribute 24, 1 ; CORTEX-A5-NONEON: .eabi_attribute 25, 1 -; CORTEX-A5-NONEON: .eabi_attribute 42, 1 -; CORTEX-A5-NONEON: .eabi_attribute 68, 1 ; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 19 ;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math @@ -619,6 +623,8 @@ ; CORTEX-A5-NOFPU: .eabi_attribute 8, 1 ; CORTEX-A5-NOFPU: .eabi_attribute 9, 2 ; CORTEX-A5-NOFPU-NOT: .fpu +; CORTEX-A5-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A5-NOFPU: .eabi_attribute 68, 1 ; CORTEX-A5-NOFPU-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A5-NOFPU: .eabi_attribute 20, 1 @@ -627,8 +633,6 @@ ; CORTEX-A5-NOFPU: .eabi_attribute 23, 3 ; CORTEX-A5-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A5-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A5-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A5-NOFPU: .eabi_attribute 68, 1 ; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -645,6 +649,11 @@ ; CORTEX-A8-SOFT: .eabi_attribute 8, 1 ; CORTEX-A8-SOFT: .eabi_attribute 9, 2 ; CORTEX-A8-SOFT: .fpu neon +; CORTEX-A8-SOFT-NOT: .eabi_attribute 27 +; CORTEX-A8-SOFT-NOT: .eabi_attribute 36, 1 +; CORTEX-A8-SOFT-NOT: .eabi_attribute 42, 1 +; CORTEX-A8-SOFT-NOT: .eabi_attribute 44 +; CORTEX-A8-SOFT: .eabi_attribute 68, 1 ; CORTEX-A8-SOFT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A8-SOFT: .eabi_attribute 20, 1 @@ -653,13 +662,8 @@ ; CORTEX-A8-SOFT: .eabi_attribute 23, 3 ; CORTEX-A8-SOFT: .eabi_attribute 24, 1 ; CORTEX-A8-SOFT: .eabi_attribute 25, 1 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 27 ; CORTEX-A8-SOFT-NOT: .eabi_attribute 28 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 36, 1 ; CORTEX-A8-SOFT: .eabi_attribute 38, 1 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 42, 1 -; CORTEX-A8-SOFT-NOT: .eabi_attribute 44 -; CORTEX-A8-SOFT: .eabi_attribute 68, 1 ; CORTEX-A9-SOFT: .cpu cortex-a9 ; CORTEX-A9-SOFT: .eabi_attribute 6, 10 @@ -667,6 +671,11 @@ ; CORTEX-A9-SOFT: .eabi_attribute 8, 1 ; CORTEX-A9-SOFT: .eabi_attribute 9, 2 ; CORTEX-A9-SOFT: .fpu neon +; CORTEX-A9-SOFT-NOT: .eabi_attribute 27 +; CORTEX-A9-SOFT: .eabi_attribute 36, 1 +; CORTEX-A9-SOFT: .eabi_attribute 42, 1 +; CORTEX-A9-SOFT-NOT: .eabi_attribute 44 +; CORTEX-A9-SOFT: .eabi_attribute 68, 1 ; CORTEX-A9-SOFT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A9-SOFT: .eabi_attribute 20, 1 @@ -675,13 +684,8 @@ ; CORTEX-A9-SOFT: .eabi_attribute 23, 3 ; CORTEX-A9-SOFT: .eabi_attribute 24, 1 ; CORTEX-A9-SOFT: .eabi_attribute 25, 1 -; CORTEX-A9-SOFT-NOT: .eabi_attribute 27 ; CORTEX-A9-SOFT-NOT: .eabi_attribute 28 -; CORTEX-A9-SOFT: .eabi_attribute 36, 1 ; CORTEX-A9-SOFT: .eabi_attribute 38, 1 -; CORTEX-A9-SOFT: .eabi_attribute 42, 1 -; CORTEX-A9-SOFT-NOT: .eabi_attribute 44 -; CORTEX-A9-SOFT: .eabi_attribute 68, 1 ; CORTEX-A8-SOFT-FAST-NOT: .eabi_attribute 19 ; CORTEX-A9-SOFT-FAST-NOT: .eabi_attribute 19 @@ -699,6 +703,10 @@ ; CORTEX-A8-HARD: .eabi_attribute 8, 1 ; CORTEX-A8-HARD: .eabi_attribute 9, 2 ; CORTEX-A8-HARD: .fpu neon +; CORTEX-A8-HARD-NOT: .eabi_attribute 27 +; CORTEX-A8-HARD-NOT: .eabi_attribute 36, 1 +; CORTEX-A8-HARD-NOT: .eabi_attribute 42, 1 +; CORTEX-A8-HARD: .eabi_attribute 68, 1 ; CORTEX-A8-HARD-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A8-HARD: .eabi_attribute 20, 1 @@ -707,12 +715,8 @@ ; CORTEX-A8-HARD: .eabi_attribute 23, 3 ; CORTEX-A8-HARD: .eabi_attribute 24, 1 ; CORTEX-A8-HARD: .eabi_attribute 25, 1 -; CORTEX-A8-HARD-NOT: .eabi_attribute 27 ; CORTEX-A8-HARD: .eabi_attribute 28, 1 -; CORTEX-A8-HARD-NOT: .eabi_attribute 36, 1 ; CORTEX-A8-HARD: .eabi_attribute 38, 1 -; CORTEX-A8-HARD-NOT: .eabi_attribute 42, 1 -; CORTEX-A8-HARD: .eabi_attribute 68, 1 @@ -722,6 +726,10 @@ ; CORTEX-A9-HARD: .eabi_attribute 8, 1 ; CORTEX-A9-HARD: .eabi_attribute 9, 2 ; CORTEX-A9-HARD: .fpu neon +; CORTEX-A9-HARD-NOT: .eabi_attribute 27 +; CORTEX-A9-HARD: .eabi_attribute 36, 1 +; CORTEX-A9-HARD: .eabi_attribute 42, 1 +; CORTEX-A9-HARD: .eabi_attribute 68, 1 ; CORTEX-A9-HARD-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A9-HARD: .eabi_attribute 20, 1 @@ -730,12 +738,8 @@ ; CORTEX-A9-HARD: .eabi_attribute 23, 3 ; CORTEX-A9-HARD: .eabi_attribute 24, 1 ; CORTEX-A9-HARD: .eabi_attribute 25, 1 -; CORTEX-A9-HARD-NOT: .eabi_attribute 27 ; CORTEX-A9-HARD: .eabi_attribute 28, 1 -; CORTEX-A9-HARD: .eabi_attribute 36, 1 ; CORTEX-A9-HARD: .eabi_attribute 38, 1 -; CORTEX-A9-HARD: .eabi_attribute 42, 1 -; CORTEX-A9-HARD: .eabi_attribute 68, 1 ; CORTEX-A8-HARD-FAST-NOT: .eabi_attribute 19 ;; The A8 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -759,6 +763,9 @@ ; CORTEX-A12-DEFAULT: .eabi_attribute 8, 1 ; CORTEX-A12-DEFAULT: .eabi_attribute 9, 2 ; CORTEX-A12-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2 +; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A12-DEFAULT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A12-DEFAULT: .eabi_attribute 20, 1 @@ -767,9 +774,6 @@ ; CORTEX-A12-DEFAULT: .eabi_attribute 23, 3 ; CORTEX-A12-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A12-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A12-DEFAULT: .eabi_attribute 42, 1 -; CORTEX-A12-DEFAULT: .eabi_attribute 44, 2 -; CORTEX-A12-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A12-DEFAULT-FAST-NOT: .eabi_attribute 19 ;; The A12 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -785,6 +789,9 @@ ; CORTEX-A12-NOFPU: .eabi_attribute 8, 1 ; CORTEX-A12-NOFPU: .eabi_attribute 9, 2 ; CORTEX-A12-NOFPU-NOT: .fpu +; CORTEX-A12-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A12-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A12-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A12-NOFPU-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A12-NOFPU: .eabi_attribute 20, 1 @@ -793,9 +800,6 @@ ; CORTEX-A12-NOFPU: .eabi_attribute 23, 3 ; CORTEX-A12-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A12-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A12-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A12-NOFPU: .eabi_attribute 44, 2 -; CORTEX-A12-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A12-NOFPU-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -812,6 +816,11 @@ ; CORTEX-A15: .eabi_attribute 8, 1 ; CORTEX-A15: .eabi_attribute 9, 2 ; CORTEX-A15: .fpu neon-vfpv4 +; CORTEX-A15-NOT: .eabi_attribute 27 +; CORTEX-A15: .eabi_attribute 36, 1 +; CORTEX-A15: .eabi_attribute 42, 1 +; CORTEX-A15: .eabi_attribute 44, 2 +; CORTEX-A15: .eabi_attribute 68, 3 ; CORTEX-A15-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A15: .eabi_attribute 20, 1 @@ -820,13 +829,8 @@ ; CORTEX-A15: .eabi_attribute 23, 3 ; CORTEX-A15: .eabi_attribute 24, 1 ; CORTEX-A15: .eabi_attribute 25, 1 -; CORTEX-A15-NOT: .eabi_attribute 27 ; CORTEX-A15-NOT: .eabi_attribute 28 -; CORTEX-A15: .eabi_attribute 36, 1 ; CORTEX-A15: .eabi_attribute 38, 1 -; CORTEX-A15: .eabi_attribute 42, 1 -; CORTEX-A15: .eabi_attribute 44, 2 -; CORTEX-A15: .eabi_attribute 68, 3 ; CORTEX-A15-FAST-NOT: .eabi_attribute 19 ;; The A15 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -842,6 +846,9 @@ ; CORTEX-A17-DEFAULT: .eabi_attribute 8, 1 ; CORTEX-A17-DEFAULT: .eabi_attribute 9, 2 ; CORTEX-A17-DEFAULT: .fpu neon-vfpv4 +; CORTEX-A17-DEFAULT: .eabi_attribute 42, 1 +; CORTEX-A17-DEFAULT: .eabi_attribute 44, 2 +; CORTEX-A17-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A17-DEFAULT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A17-DEFAULT: .eabi_attribute 20, 1 @@ -850,9 +857,6 @@ ; CORTEX-A17-DEFAULT: .eabi_attribute 23, 3 ; CORTEX-A17-DEFAULT: .eabi_attribute 24, 1 ; CORTEX-A17-DEFAULT: .eabi_attribute 25, 1 -; CORTEX-A17-DEFAULT: .eabi_attribute 42, 1 -; CORTEX-A17-DEFAULT: .eabi_attribute 44, 2 -; CORTEX-A17-DEFAULT: .eabi_attribute 68, 3 ; CORTEX-A17-FAST-NOT: .eabi_attribute 19 ;; The A17 defaults to a VFPv3 FPU, so it flushes preserving the sign when @@ -868,6 +872,9 @@ ; CORTEX-A17-NOFPU: .eabi_attribute 8, 1 ; CORTEX-A17-NOFPU: .eabi_attribute 9, 2 ; CORTEX-A17-NOFPU-NOT: .fpu +; CORTEX-A17-NOFPU: .eabi_attribute 42, 1 +; CORTEX-A17-NOFPU: .eabi_attribute 44, 2 +; CORTEX-A17-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A17-NOFPU: .eabi_attribute 20, 1 @@ -876,9 +883,6 @@ ; CORTEX-A17-NOFPU: .eabi_attribute 23, 3 ; CORTEX-A17-NOFPU: .eabi_attribute 24, 1 ; CORTEX-A17-NOFPU: .eabi_attribute 25, 1 -; CORTEX-A17-NOFPU: .eabi_attribute 42, 1 -; CORTEX-A17-NOFPU: .eabi_attribute 44, 2 -; CORTEX-A17-NOFPU: .eabi_attribute 68, 3 ; CORTEX-A17-NOFPU-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -897,25 +901,25 @@ ; CORTEX-M0: .cpu cortex-m0 ; CORTEX-M0: .eabi_attribute 6, 12 -; CORTEX-M0-NOT: .eabi_attribute 7 +; CORTEX-M0: .eabi_attribute 7, 77 ; CORTEX-M0: .eabi_attribute 8, 0 ; CORTEX-M0: .eabi_attribute 9, 1 +; CORTEX-M0-NOT: .eabi_attribute 27 +; CORTEX-M0-NOT: .eabi_attribute 36 +; CORTEX-M0: .eabi_attribute 34, 0 +; CORTEX-M0-NOT: .eabi_attribute 42 +; CORTEX-M0-NOT: .eabi_attribute 44 +; CORTEX-M0-NOT: .eabi_attribute 68 ; CORTEX-M0-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M0: .eabi_attribute 20, 1 ; CORTEX-M0: .eabi_attribute 21, 1 ; CORTEX-M0-NOT: .eabi_attribute 22 ; CORTEX-M0: .eabi_attribute 23, 3 -; CORTEX-M0: .eabi_attribute 34, 0 ; CORTEX-M0: .eabi_attribute 24, 1 ; CORTEX-M0: .eabi_attribute 25, 1 -; CORTEX-M0-NOT: .eabi_attribute 27 ; CORTEX-M0-NOT: .eabi_attribute 28 -; CORTEX-M0-NOT: .eabi_attribute 36 ; CORTEX-M0: .eabi_attribute 38, 1 -; CORTEX-M0-NOT: .eabi_attribute 42 -; CORTEX-M0-NOT: .eabi_attribute 44 -; CORTEX-M0-NOT: .eabi_attribute 68 ; CORTEX-M0-FAST-NOT: .eabi_attribute 19 ;; Despite the M0 CPU having no FPU in this scenario, we chose to @@ -930,9 +934,14 @@ ; CORTEX-M0PLUS: .cpu cortex-m0plus ; CORTEX-M0PLUS: .eabi_attribute 6, 12 -; CORTEX-M0PLUS-NOT: .eabi_attribute 7 +; CORTEX-M0PLUS: .eabi_attribute 7, 77 ; CORTEX-M0PLUS: .eabi_attribute 8, 0 ; CORTEX-M0PLUS: .eabi_attribute 9, 1 +; CORTEX-M0PLUS-NOT: .eabi_attribute 27 +; CORTEX-M0PLUS-NOT: .eabi_attribute 36 +; CORTEX-M0PLUS-NOT: .eabi_attribute 42 +; CORTEX-M0PLUS-NOT: .eabi_attribute 44 +; CORTEX-M0PLUS-NOT: .eabi_attribute 68 ; CORTEX-M0PLUS-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M0PLUS: .eabi_attribute 20, 1 @@ -941,13 +950,8 @@ ; CORTEX-M0PLUS: .eabi_attribute 23, 3 ; CORTEX-M0PLUS: .eabi_attribute 24, 1 ; CORTEX-M0PLUS: .eabi_attribute 25, 1 -; CORTEX-M0PLUS-NOT: .eabi_attribute 27 ; CORTEX-M0PLUS-NOT: .eabi_attribute 28 -; CORTEX-M0PLUS-NOT: .eabi_attribute 36 ; CORTEX-M0PLUS: .eabi_attribute 38, 1 -; CORTEX-M0PLUS-NOT: .eabi_attribute 42 -; CORTEX-M0PLUS-NOT: .eabi_attribute 44 -; CORTEX-M0PLUS-NOT: .eabi_attribute 68 ; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 19 ;; Despite the M0+ CPU having no FPU in this scenario, we chose to @@ -962,9 +966,14 @@ ; CORTEX-M1: .cpu cortex-m1 ; CORTEX-M1: .eabi_attribute 6, 12 -; CORTEX-M1-NOT: .eabi_attribute 7 +; CORTEX-M1: .eabi_attribute 7, 77 ; CORTEX-M1: .eabi_attribute 8, 0 ; CORTEX-M1: .eabi_attribute 9, 1 +; CORTEX-M1-NOT: .eabi_attribute 27 +; CORTEX-M1-NOT: .eabi_attribute 36 +; CORTEX-M1-NOT: .eabi_attribute 42 +; CORTEX-M1-NOT: .eabi_attribute 44 +; CORTEX-M1-NOT: .eabi_attribute 68 ; CORTEX-M1-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M1: .eabi_attribute 20, 1 @@ -973,13 +982,8 @@ ; CORTEX-M1: .eabi_attribute 23, 3 ; CORTEX-M1: .eabi_attribute 24, 1 ; CORTEX-M1: .eabi_attribute 25, 1 -; CORTEX-M1-NOT: .eabi_attribute 27 ; CORTEX-M1-NOT: .eabi_attribute 28 -; CORTEX-M1-NOT: .eabi_attribute 36 ; CORTEX-M1: .eabi_attribute 38, 1 -; CORTEX-M1-NOT: .eabi_attribute 42 -; CORTEX-M1-NOT: .eabi_attribute 44 -; CORTEX-M1-NOT: .eabi_attribute 68 ; CORTEX-M1-FAST-NOT: .eabi_attribute 19 ;; Despite the M1 CPU having no FPU in this scenario, we chose to @@ -994,9 +998,13 @@ ; SC000: .cpu sc000 ; SC000: .eabi_attribute 6, 12 -; SC000-NOT: .eabi_attribute 7 +; SC000: .eabi_attribute 7, 77 ; SC000: .eabi_attribute 8, 0 ; SC000: .eabi_attribute 9, 1 +; SC000-NOT: .eabi_attribute 27 +; SC000-NOT: .eabi_attribute 42 +; SC000-NOT: .eabi_attribute 44 +; SC000-NOT: .eabi_attribute 68 ; SC000-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; SC000: .eabi_attribute 20, 1 @@ -1005,13 +1013,8 @@ ; SC000: .eabi_attribute 23, 3 ; SC000: .eabi_attribute 24, 1 ; SC000: .eabi_attribute 25, 1 -; SC000-NOT: .eabi_attribute 27 ; SC000-NOT: .eabi_attribute 28 -; SC000-NOT: .eabi_attribute 36 ; SC000: .eabi_attribute 38, 1 -; SC000-NOT: .eabi_attribute 42 -; SC000-NOT: .eabi_attribute 44 -; SC000-NOT: .eabi_attribute 68 ; SC000-FAST-NOT: .eabi_attribute 19 ;; Despite the SC000 CPU having no FPU in this scenario, we chose to @@ -1029,6 +1032,11 @@ ; CORTEX-M3: .eabi_attribute 7, 77 ; CORTEX-M3: .eabi_attribute 8, 0 ; CORTEX-M3: .eabi_attribute 9, 2 +; CORTEX-M3-NOT: .eabi_attribute 27 +; CORTEX-M3-NOT: .eabi_attribute 36 +; CORTEX-M3-NOT: .eabi_attribute 42 +; CORTEX-M3-NOT: .eabi_attribute 44 +; CORTEX-M3-NOT: .eabi_attribute 68 ; CORTEX-M3-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M3: .eabi_attribute 20, 1 @@ -1037,13 +1045,8 @@ ; CORTEX-M3: .eabi_attribute 23, 3 ; CORTEX-M3: .eabi_attribute 24, 1 ; CORTEX-M3: .eabi_attribute 25, 1 -; CORTEX-M3-NOT: .eabi_attribute 27 ; CORTEX-M3-NOT: .eabi_attribute 28 -; CORTEX-M3-NOT: .eabi_attribute 36 ; CORTEX-M3: .eabi_attribute 38, 1 -; CORTEX-M3-NOT: .eabi_attribute 42 -; CORTEX-M3-NOT: .eabi_attribute 44 -; CORTEX-M3-NOT: .eabi_attribute 68 ; CORTEX-M3-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -1059,6 +1062,11 @@ ; SC300: .eabi_attribute 7, 77 ; SC300: .eabi_attribute 8, 0 ; SC300: .eabi_attribute 9, 2 +; SC300-NOT: .eabi_attribute 27 +; SC300-NOT: .eabi_attribute 36 +; SC300-NOT: .eabi_attribute 42 +; SC300-NOT: .eabi_attribute 44 +; SC300-NOT: .eabi_attribute 68 ; SC300-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; SC300: .eabi_attribute 20, 1 @@ -1067,13 +1075,8 @@ ; SC300: .eabi_attribute 23, 3 ; SC300: .eabi_attribute 24, 1 ; SC300: .eabi_attribute 25, 1 -; SC300-NOT: .eabi_attribute 27 ; SC300-NOT: .eabi_attribute 28 -; SC300-NOT: .eabi_attribute 36 ; SC300: .eabi_attribute 38, 1 -; SC300-NOT: .eabi_attribute 42 -; SC300-NOT: .eabi_attribute 44 -; SC300-NOT: .eabi_attribute 68 ; SC300-FAST-NOT: .eabi_attribute 19 ;; Despite there being no FPU, we chose to flush to zero preserving @@ -1090,6 +1093,11 @@ ; CORTEX-M4-SOFT: .eabi_attribute 8, 0 ; CORTEX-M4-SOFT: .eabi_attribute 9, 2 ; CORTEX-M4-SOFT: .fpu fpv4-sp-d16 +; CORTEX-M4-SOFT: .eabi_attribute 27, 1 +; CORTEX-M4-SOFT: .eabi_attribute 36, 1 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 42 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 44 +; CORTEX-M4-SOFT-NOT: .eabi_attribute 68 ; CORTEX-M4-SOFT-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M4-SOFT: .eabi_attribute 20, 1 @@ -1098,13 +1106,8 @@ ; CORTEX-M4-SOFT: .eabi_attribute 23, 3 ; CORTEX-M4-SOFT: .eabi_attribute 24, 1 ; CORTEX-M4-SOFT: .eabi_attribute 25, 1 -; CORTEX-M4-SOFT: .eabi_attribute 27, 1 ; CORTEX-M4-SOFT-NOT: .eabi_attribute 28 -; CORTEX-M4-SOFT: .eabi_attribute 36, 1 ; CORTEX-M4-SOFT: .eabi_attribute 38, 1 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 42 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 44 -; CORTEX-M4-SOFT-NOT: .eabi_attribute 68 ; CORTEX-M4-SOFT-FAST-NOT: .eabi_attribute 19 ;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when @@ -1120,6 +1123,11 @@ ; CORTEX-M4-HARD: .eabi_attribute 8, 0 ; CORTEX-M4-HARD: .eabi_attribute 9, 2 ; CORTEX-M4-HARD: .fpu fpv4-sp-d16 +; CORTEX-M4-HARD: .eabi_attribute 27, 1 +; CORTEX-M4-HARD: .eabi_attribute 36, 1 +; CORTEX-M4-HARD-NOT: .eabi_attribute 42 +; CORTEX-M4-HARD-NOT: .eabi_attribute 44 +; CORTEX-M4-HARD-NOT: .eabi_attribute 68 ; CORTEX-M4-HARD-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-M4-HARD: .eabi_attribute 20, 1 @@ -1128,13 +1136,8 @@ ; CORTEX-M4-HARD: .eabi_attribute 23, 3 ; CORTEX-M4-HARD: .eabi_attribute 24, 1 ; CORTEX-M4-HARD: .eabi_attribute 25, 1 -; CORTEX-M4-HARD: .eabi_attribute 27, 1 ; CORTEX-M4-HARD: .eabi_attribute 28, 1 -; CORTEX-M4-HARD: .eabi_attribute 36, 1 ; CORTEX-M4-HARD: .eabi_attribute 38, 1 -; CORTEX-M4-HARD-NOT: .eabi_attribute 42 -; CORTEX-M4-HARD-NOT: .eabi_attribute 44 -; CORTEX-M4-HARD-NOT: .eabi_attribute 68 ; CORTEX-M4-HARD-FAST-NOT: .eabi_attribute 19 ;; The M4 defaults to a VFPv4 FPU, so it flushes preserving the sign when @@ -1152,6 +1155,11 @@ ; CORTEX-M7-SOFT-NOT: .fpu ; CORTEX-M7-SINGLE: .fpu fpv5-sp-d16 ; CORTEX-M7-DOUBLE: .fpu fpv5-d16 +; CORTEX-M7-SOFT-NOT: .eabi_attribute 27 +; CORTEX-M7-SINGLE: .eabi_attribute 27, 1 +; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27 +; CORTEX-M7: .eabi_attribute 36, 1 +; CORTEX-M7-NOT: .eabi_attribute 44 ; CORTEX-M7: .eabi_attribute 17, 1 ; CORTEX-M7-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance @@ -1161,12 +1169,7 @@ ; CORTEX-M7: .eabi_attribute 23, 3 ; CORTEX-M7: .eabi_attribute 24, 1 ; CORTEX-M7: .eabi_attribute 25, 1 -; CORTEX-M7-SOFT-NOT: .eabi_attribute 27 -; CORTEX-M7-SINGLE: .eabi_attribute 27, 1 -; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27 -; CORTEX-M7: .eabi_attribute 36, 1 ; CORTEX-M7: .eabi_attribute 38, 1 -; CORTEX-M7-NOT: .eabi_attribute 44 ; CORTEX-M7: .eabi_attribute 14, 0 ; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 19 @@ -1186,6 +1189,10 @@ ; CORTEX-R4: .eabi_attribute 8, 1 ; CORTEX-R4: .eabi_attribute 9, 2 ; CORTEX-R4-NOT: .fpu vfpv3-d16 +; CORTEX-R4-NOT: .eabi_attribute 36 +; CORTEX-R4-NOT: .eabi_attribute 42 +; CORTEX-R4-NOT: .eabi_attribute 44 +; CORTEX-R4-NOT: .eabi_attribute 68 ; CORTEX-R4-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R4: .eabi_attribute 20, 1 @@ -1195,11 +1202,7 @@ ; CORTEX-R4: .eabi_attribute 24, 1 ; CORTEX-R4: .eabi_attribute 25, 1 ; CORTEX-R4-NOT: .eabi_attribute 28 -; CORTEX-R4-NOT: .eabi_attribute 36 ; CORTEX-R4: .eabi_attribute 38, 1 -; CORTEX-R4-NOT: .eabi_attribute 42 -; CORTEX-R4-NOT: .eabi_attribute 44 -; CORTEX-R4-NOT: .eabi_attribute 68 ; CORTEX-R4F: .cpu cortex-r4f ; CORTEX-R4F: .eabi_attribute 6, 10 @@ -1207,6 +1210,11 @@ ; CORTEX-R4F: .eabi_attribute 8, 1 ; CORTEX-R4F: .eabi_attribute 9, 2 ; CORTEX-R4F: .fpu vfpv3-d16 +; CORTEX-R4F-NOT: .eabi_attribute 27, 1 +; CORTEX-R4F-NOT: .eabi_attribute 36 +; CORTEX-R4F-NOT: .eabi_attribute 42 +; CORTEX-R4F-NOT: .eabi_attribute 44 +; CORTEX-R4F-NOT: .eabi_attribute 68 ; CORTEX-R4F-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R4F: .eabi_attribute 20, 1 @@ -1215,13 +1223,8 @@ ; CORTEX-R4F: .eabi_attribute 23, 3 ; CORTEX-R4F: .eabi_attribute 24, 1 ; CORTEX-R4F: .eabi_attribute 25, 1 -; CORTEX-R4F-NOT: .eabi_attribute 27, 1 ; CORTEX-R4F-NOT: .eabi_attribute 28 -; CORTEX-R4F-NOT: .eabi_attribute 36 ; CORTEX-R4F: .eabi_attribute 38, 1 -; CORTEX-R4F-NOT: .eabi_attribute 42 -; CORTEX-R4F-NOT: .eabi_attribute 44 -; CORTEX-R4F-NOT: .eabi_attribute 68 ; CORTEX-R5: .cpu cortex-r5 ; CORTEX-R5: .eabi_attribute 6, 10 @@ -1229,6 +1232,11 @@ ; CORTEX-R5: .eabi_attribute 8, 1 ; CORTEX-R5: .eabi_attribute 9, 2 ; CORTEX-R5: .fpu vfpv3-d16 +; CORTEX-R5-NOT: .eabi_attribute 27, 1 +; CORTEX-R5-NOT: .eabi_attribute 36 +; CORTEX-R5: .eabi_attribute 44, 2 +; CORTEX-R5-NOT: .eabi_attribute 42 +; CORTEX-R5-NOT: .eabi_attribute 68 ; CORTEX-R5-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R5: .eabi_attribute 20, 1 @@ -1237,13 +1245,8 @@ ; CORTEX-R5: .eabi_attribute 23, 3 ; CORTEX-R5: .eabi_attribute 24, 1 ; CORTEX-R5: .eabi_attribute 25, 1 -; CORTEX-R5-NOT: .eabi_attribute 27, 1 ; CORTEX-R5-NOT: .eabi_attribute 28 -; CORTEX-R5-NOT: .eabi_attribute 36 ; CORTEX-R5: .eabi_attribute 38, 1 -; CORTEX-R5-NOT: .eabi_attribute 42 -; CORTEX-R5: .eabi_attribute 44, 2 -; CORTEX-R5-NOT: .eabi_attribute 68 ; CORTEX-R5-FAST-NOT: .eabi_attribute 19 ;; The R5 has the VFPv3 FP unit, which always flushes preserving sign. @@ -1258,6 +1261,10 @@ ; CORTEX-R7: .eabi_attribute 8, 1 ; CORTEX-R7: .eabi_attribute 9, 2 ; CORTEX-R7: .fpu vfpv3-d16-fp16 +; CORTEX-R7: .eabi_attribute 36, 1 +; CORTEX-R7: .eabi_attribute 42, 1 +; CORTEX-R7: .eabi_attribute 44, 2 +; CORTEX-R7-NOT: .eabi_attribute 68 ; CORTEX-R7-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R7: .eabi_attribute 20, 1 @@ -1267,11 +1274,7 @@ ; CORTEX-R7: .eabi_attribute 24, 1 ; CORTEX-R7: .eabi_attribute 25, 1 ; CORTEX-R7-NOT: .eabi_attribute 28 -; CORTEX-R7: .eabi_attribute 36, 1 ; CORTEX-R7: .eabi_attribute 38, 1 -; CORTEX-R7: .eabi_attribute 42, 1 -; CORTEX-R7: .eabi_attribute 44, 2 -; CORTEX-R7-NOT: .eabi_attribute 68 ; CORTEX-R7-FAST-NOT: .eabi_attribute 19 ;; The R7 has the VFPv3 FP unit, which always flushes preserving sign. @@ -1286,6 +1289,10 @@ ; CORTEX-R8: .eabi_attribute 8, 1 ; CORTEX-R8: .eabi_attribute 9, 2 ; CORTEX-R8: .fpu vfpv3-d16-fp16 +; CORTEX-R8: .eabi_attribute 36, 1 +; CORTEX-R8: .eabi_attribute 42, 1 +; CORTEX-R8: .eabi_attribute 44, 2 +; CORTEX-R8-NOT: .eabi_attribute 68 ; CORTEX-R8-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R8: .eabi_attribute 20, 1 @@ -1295,11 +1302,7 @@ ; CORTEX-R8: .eabi_attribute 24, 1 ; CORTEX-R8: .eabi_attribute 25, 1 ; CORTEX-R8-NOT: .eabi_attribute 28 -; CORTEX-R8: .eabi_attribute 36, 1 ; CORTEX-R8: .eabi_attribute 38, 1 -; CORTEX-R8: .eabi_attribute 42, 1 -; CORTEX-R8: .eabi_attribute 44, 2 -; CORTEX-R8-NOT: .eabi_attribute 68 ; CORTEX-R8-FAST-NOT: .eabi_attribute 19 ;; The R8 has the VFPv3 FP unit, which always flushes preserving sign. @@ -1315,6 +1318,11 @@ ; CORTEX-A32: .eabi_attribute 9, 2 ; CORTEX-A32: .fpu crypto-neon-fp-armv8 ; CORTEX-A32: .eabi_attribute 12, 3 +; CORTEX-A32-NOT: .eabi_attribute 27 +; CORTEX-A32: .eabi_attribute 36, 1 +; CORTEX-A32: .eabi_attribute 42, 1 +; CORTEX-A32-NOT: .eabi_attribute 44 +; CORTEX-A32: .eabi_attribute 68, 3 ; CORTEX-A32-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A32: .eabi_attribute 20, 1 @@ -1323,13 +1331,8 @@ ; CORTEX-A32: .eabi_attribute 23, 3 ; CORTEX-A32: .eabi_attribute 24, 1 ; CORTEX-A32: .eabi_attribute 25, 1 -; CORTEX-A32-NOT: .eabi_attribute 27 ; CORTEX-A32-NOT: .eabi_attribute 28 -; CORTEX-A32: .eabi_attribute 36, 1 ; CORTEX-A32: .eabi_attribute 38, 1 -; CORTEX-A32: .eabi_attribute 42, 1 -; CORTEX-A32-NOT: .eabi_attribute 44 -; CORTEX-A32: .eabi_attribute 68, 3 ; CORTEX-A32-FAST-NOT: .eabi_attribute 19 ;; The A32 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1343,20 +1346,20 @@ ; CORTEX-M23: .eabi_attribute 7, 77 ; CORTEX-M23: .eabi_attribute 8, 0 ; CORTEX-M23: .eabi_attribute 9, 3 +; CORTEX-M23-NOT: .eabi_attribute 27 +; CORTEX-M23: .eabi_attribute 34, 1 +; CORTEX-M23-NOT: .eabi_attribute 44 ; CORTEX-M23: .eabi_attribute 17, 1 ;; We default to IEEE 754 compliance ; CORTEX-M23-NOT: .eabi_attribute 19 ; CORTEX-M23: .eabi_attribute 20, 1 ; CORTEX-M23: .eabi_attribute 21, 1 ; CORTEX-M23: .eabi_attribute 23, 3 -; CORTEX-M23: .eabi_attribute 34, 1 ; CORTEX-M23: .eabi_attribute 24, 1 -; CORTEX-M23-NOT: .eabi_attribute 27 ; CORTEX-M23-NOT: .eabi_attribute 28 ; CORTEX-M23: .eabi_attribute 25, 1 ; CORTEX-M23: .eabi_attribute 38, 1 ; CORTEX-M23: .eabi_attribute 14, 0 -; CORTEX-M23-NOT: .eabi_attribute 44 ; CORTEX-M33: .cpu cortex-m33 ; CORTEX-M33: .eabi_attribute 6, 17 @@ -1364,21 +1367,21 @@ ; CORTEX-M33: .eabi_attribute 8, 0 ; CORTEX-M33: .eabi_attribute 9, 3 ; CORTEX-M33: .fpu fpv5-sp-d16 +; CORTEX-M33: .eabi_attribute 27, 1 +; CORTEX-M33: .eabi_attribute 36, 1 +; CORTEX-M33-NOT: .eabi_attribute 44 +; CORTEX-M33: .eabi_attribute 46, 1 +; CORTEX-M33: .eabi_attribute 34, 1 ; CORTEX-M33: .eabi_attribute 17, 1 ;; We default to IEEE 754 compliance ; CORTEX-M23-NOT: .eabi_attribute 19 ; CORTEX-M33: .eabi_attribute 20, 1 ; CORTEX-M33: .eabi_attribute 21, 1 ; CORTEX-M33: .eabi_attribute 23, 3 -; CORTEX-M33: .eabi_attribute 34, 1 ; CORTEX-M33: .eabi_attribute 24, 1 ; CORTEX-M33: .eabi_attribute 25, 1 -; CORTEX-M33: .eabi_attribute 27, 1 ; CORTEX-M33-NOT: .eabi_attribute 28 -; CORTEX-M33: .eabi_attribute 36, 1 ; CORTEX-M33: .eabi_attribute 38, 1 -; CORTEX-M33: .eabi_attribute 46, 1 -; CORTEX-M33-NOT: .eabi_attribute 44 ; CORTEX-M33: .eabi_attribute 14, 0 ; CORTEX-M33-FAST-NOT: .eabi_attribute 19 @@ -1394,6 +1397,11 @@ ; CORTEX-A35: .eabi_attribute 9, 2 ; CORTEX-A35: .fpu crypto-neon-fp-armv8 ; CORTEX-A35: .eabi_attribute 12, 3 +; CORTEX-A35-NOT: .eabi_attribute 27 +; CORTEX-A35: .eabi_attribute 36, 1 +; CORTEX-A35: .eabi_attribute 42, 1 +; CORTEX-A35-NOT: .eabi_attribute 44 +; CORTEX-A35: .eabi_attribute 68, 3 ; CORTEX-A35-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A35: .eabi_attribute 20, 1 @@ -1402,13 +1410,8 @@ ; CORTEX-A35: .eabi_attribute 23, 3 ; CORTEX-A35: .eabi_attribute 24, 1 ; CORTEX-A35: .eabi_attribute 25, 1 -; CORTEX-A35-NOT: .eabi_attribute 27 ; CORTEX-A35-NOT: .eabi_attribute 28 -; CORTEX-A35: .eabi_attribute 36, 1 ; CORTEX-A35: .eabi_attribute 38, 1 -; CORTEX-A35: .eabi_attribute 42, 1 -; CORTEX-A35-NOT: .eabi_attribute 44 -; CORTEX-A35: .eabi_attribute 68, 3 ; CORTEX-A35-FAST-NOT: .eabi_attribute 19 ;; The A35 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1424,6 +1427,11 @@ ; CORTEX-A53: .eabi_attribute 9, 2 ; CORTEX-A53: .fpu crypto-neon-fp-armv8 ; CORTEX-A53: .eabi_attribute 12, 3 +; CORTEX-A53-NOT: .eabi_attribute 27 +; CORTEX-A53: .eabi_attribute 36, 1 +; CORTEX-A53: .eabi_attribute 42, 1 +; CORTEX-A53-NOT: .eabi_attribute 44 +; CORTEX-A53: .eabi_attribute 68, 3 ; CORTEX-A53-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A53: .eabi_attribute 20, 1 @@ -1432,13 +1440,8 @@ ; CORTEX-A53: .eabi_attribute 23, 3 ; CORTEX-A53: .eabi_attribute 24, 1 ; CORTEX-A53: .eabi_attribute 25, 1 -; CORTEX-A53-NOT: .eabi_attribute 27 ; CORTEX-A53-NOT: .eabi_attribute 28 -; CORTEX-A53: .eabi_attribute 36, 1 ; CORTEX-A53: .eabi_attribute 38, 1 -; CORTEX-A53: .eabi_attribute 42, 1 -; CORTEX-A53-NOT: .eabi_attribute 44 -; CORTEX-A53: .eabi_attribute 68, 3 ; CORTEX-A53-FAST-NOT: .eabi_attribute 19 ;; The A53 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1454,6 +1457,11 @@ ; CORTEX-A57: .eabi_attribute 9, 2 ; CORTEX-A57: .fpu crypto-neon-fp-armv8 ; CORTEX-A57: .eabi_attribute 12, 3 +; CORTEX-A57-NOT: .eabi_attribute 27 +; CORTEX-A57: .eabi_attribute 36, 1 +; CORTEX-A57: .eabi_attribute 42, 1 +; CORTEX-A57-NOT: .eabi_attribute 44 +; CORTEX-A57: .eabi_attribute 68, 3 ; CORTEX-A57-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A57: .eabi_attribute 20, 1 @@ -1462,13 +1470,8 @@ ; CORTEX-A57: .eabi_attribute 23, 3 ; CORTEX-A57: .eabi_attribute 24, 1 ; CORTEX-A57: .eabi_attribute 25, 1 -; CORTEX-A57-NOT: .eabi_attribute 27 ; CORTEX-A57-NOT: .eabi_attribute 28 -; CORTEX-A57: .eabi_attribute 36, 1 ; CORTEX-A57: .eabi_attribute 38, 1 -; CORTEX-A57: .eabi_attribute 42, 1 -; CORTEX-A57-NOT: .eabi_attribute 44 -; CORTEX-A57: .eabi_attribute 68, 3 ; CORTEX-A57-FAST-NOT: .eabi_attribute 19 ;; The A57 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1484,6 +1487,11 @@ ; CORTEX-A72: .eabi_attribute 9, 2 ; CORTEX-A72: .fpu crypto-neon-fp-armv8 ; CORTEX-A72: .eabi_attribute 12, 3 +; CORTEX-A72-NOT: .eabi_attribute 27 +; CORTEX-A72: .eabi_attribute 36, 1 +; CORTEX-A72: .eabi_attribute 42, 1 +; CORTEX-A72-NOT: .eabi_attribute 44 +; CORTEX-A72: .eabi_attribute 68, 3 ; CORTEX-A72-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A72: .eabi_attribute 20, 1 @@ -1492,13 +1500,8 @@ ; CORTEX-A72: .eabi_attribute 23, 3 ; CORTEX-A72: .eabi_attribute 24, 1 ; CORTEX-A72: .eabi_attribute 25, 1 -; CORTEX-A72-NOT: .eabi_attribute 27 ; CORTEX-A72-NOT: .eabi_attribute 28 -; CORTEX-A72: .eabi_attribute 36, 1 ; CORTEX-A72: .eabi_attribute 38, 1 -; CORTEX-A72: .eabi_attribute 42, 1 -; CORTEX-A72-NOT: .eabi_attribute 44 -; CORTEX-A72: .eabi_attribute 68, 3 ; CORTEX-A72-FAST-NOT: .eabi_attribute 19 ;; The A72 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1514,6 +1517,11 @@ ; CORTEX-A73: .eabi_attribute 9, 2 ; CORTEX-A73: .fpu crypto-neon-fp-armv8 ; CORTEX-A73: .eabi_attribute 12, 3 +; CORTEX-A73-NOT: .eabi_attribute 27 +; CORTEX-A73: .eabi_attribute 36, 1 +; CORTEX-A73: .eabi_attribute 42, 1 +; CORTEX-A73-NOT: .eabi_attribute 44 +; CORTEX-A73: .eabi_attribute 68, 3 ; CORTEX-A73-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-A73: .eabi_attribute 20, 1 @@ -1522,14 +1530,9 @@ ; CORTEX-A73: .eabi_attribute 23, 3 ; CORTEX-A73: .eabi_attribute 24, 1 ; CORTEX-A73: .eabi_attribute 25, 1 -; CORTEX-A73-NOT: .eabi_attribute 27 ; CORTEX-A73-NOT: .eabi_attribute 28 -; CORTEX-A73: .eabi_attribute 36, 1 ; CORTEX-A73: .eabi_attribute 38, 1 -; CORTEX-A73: .eabi_attribute 42, 1 -; CORTEX-A73-NOT: .eabi_attribute 44 ; CORTEX-A73: .eabi_attribute 14, 0 -; CORTEX-A73: .eabi_attribute 68, 3 ; EXYNOS-M1: .cpu exynos-m1 ; EXYNOS-M1: .eabi_attribute 6, 14 @@ -1538,6 +1541,11 @@ ; EXYNOS-M1: .eabi_attribute 9, 2 ; EXYNOS-M1: .fpu crypto-neon-fp-armv8 ; EXYNOS-M1: .eabi_attribute 12, 3 +; EXYNOS-M1-NOT: .eabi_attribute 27 +; EXYNOS-M1: .eabi_attribute 36, 1 +; EXYNOS-M1: .eabi_attribute 42, 1 +; EXYNOS-M1-NOT: .eabi_attribute 44 +; EXYNOS-M1: .eabi_attribute 68, 3 ; EXYNOS-M1-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; EXYNOS-M1: .eabi_attribute 20, 1 @@ -1546,13 +1554,8 @@ ; EXYNOS-M1: .eabi_attribute 23, 3 ; EXYNOS-M1: .eabi_attribute 24, 1 ; EXYNOS-M1: .eabi_attribute 25, 1 -; EXYNOS-M1-NOT: .eabi_attribute 27 ; EXYNOS-M1-NOT: .eabi_attribute 28 -; EXYNOS-M1: .eabi_attribute 36, 1 ; EXYNOS-M1: .eabi_attribute 38, 1 -; EXYNOS-M1: .eabi_attribute 42, 1 -; EXYNOS-M1-NOT: .eabi_attribute 44 -; EXYNOS-M1: .eabi_attribute 68, 3 ; EXYNOS-M1-FAST-NOT: .eabi_attribute 19 ;; The exynos-m1 has the ARMv8 FP unit, which always flushes preserving sign. @@ -1568,6 +1571,11 @@ ; EXYNOS-M2: .eabi_attribute 9, 2 ; EXYNOS-M2: .fpu crypto-neon-fp-armv8 ; EXYNOS-M2: .eabi_attribute 12, 3 +; EXYNOS-M2-NOT: .eabi_attribute 27 +; EXYNOS-M2: .eabi_attribute 36, 1 +; EXYNOS-M2: .eabi_attribute 42, 1 +; EXYNOS-M2-NOT: .eabi_attribute 44 +; EXYNOS-M2: .eabi_attribute 68, 3 ; EXYNOS-M2-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; EXYNOS-M2: .eabi_attribute 20, 1 @@ -1576,13 +1584,8 @@ ; EXYNOS-M2: .eabi_attribute 23, 3 ; EXYNOS-M2: .eabi_attribute 24, 1 ; EXYNOS-M2: .eabi_attribute 25, 1 -; EXYNOS-M2-NOT: .eabi_attribute 27 ; EXYNOS-M2-NOT: .eabi_attribute 28 -; EXYNOS-M2: .eabi_attribute 36, 1 ; EXYNOS-M2: .eabi_attribute 38, 1 -; EXYNOS-M2: .eabi_attribute 42, 1 -; EXYNOS-M2-NOT: .eabi_attribute 44 -; EXYNOS-M2: .eabi_attribute 68, 3 ; EXYNOS-M3: .cpu exynos-m3 ; EXYNOS-M3: .eabi_attribute 6, 14 @@ -1591,6 +1594,11 @@ ; EXYNOS-M3: .eabi_attribute 9, 2 ; EXYNOS-M3: .fpu crypto-neon-fp-armv8 ; EXYNOS-M3: .eabi_attribute 12, 3 +; EXYNOS-M3-NOT: .eabi_attribute 27 +; EXYNOS-M3: .eabi_attribute 36, 1 +; EXYNOS-M3: .eabi_attribute 42, 1 +; EXYNOS-M3-NOT: .eabi_attribute 44 +; EXYNOS-M3: .eabi_attribute 68, 3 ; EXYNOS-M3-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; EXYNOS-M3: .eabi_attribute 20, 1 @@ -1599,13 +1607,8 @@ ; EXYNOS-M3: .eabi_attribute 23, 3 ; EXYNOS-M3: .eabi_attribute 24, 1 ; EXYNOS-M3: .eabi_attribute 25, 1 -; EXYNOS-M3-NOT: .eabi_attribute 27 ; EXYNOS-M3-NOT: .eabi_attribute 28 -; EXYNOS-M3: .eabi_attribute 36, 1 ; EXYNOS-M3: .eabi_attribute 38, 1 -; EXYNOS-M3: .eabi_attribute 42, 1 -; EXYNOS-M3-NOT: .eabi_attribute 44 -; EXYNOS-M3: .eabi_attribute 68, 3 ; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16 ; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16 @@ -1619,6 +1622,11 @@ ; GENERIC-ARMV8_1-A: .eabi_attribute 9, 2 ; GENERIC-ARMV8_1-A: .fpu crypto-neon-fp-armv8 ; GENERIC-ARMV8_1-A: .eabi_attribute 12, 4 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27 +; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44 +; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3 ; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; GENERIC-ARMV8_1-A: .eabi_attribute 20, 1 @@ -1627,13 +1635,8 @@ ; GENERIC-ARMV8_1-A: .eabi_attribute 23, 3 ; GENERIC-ARMV8_1-A: .eabi_attribute 24, 1 ; GENERIC-ARMV8_1-A: .eabi_attribute 25, 1 -; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27 ; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28 -; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1 ; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1 -; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1 -; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44 -; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3 ; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19 ;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign. @@ -1670,23 +1673,16 @@ ; ARMv8R-SP-NOT: .eabi_attribute 12 ; ARMv8R-NEON: .fpu neon-fp-armv8 ; ARMv8R-NEON: .eabi_attribute 12, 3 @ Tag_Advanced_SIMD_arch -; ARMv8R: .eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use -; ARMv8R: .eabi_attribute 20, 1 @ Tag_ABI_FP_denormal -; ARMv8R: .eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions -; ARMv8R: .eabi_attribute 23, 3 @ Tag_ABI_FP_number_model -; ARMv8R: .eabi_attribute 34, 1 @ Tag_CPU_unaligned_access -; ARMv8R: .eabi_attribute 24, 1 @ Tag_ABI_align_needed -; ARMv8R: .eabi_attribute 25, 1 @ Tag_ABI_align_preserved ; ARMv8R-NOFPU-NOT: .eabi_attribute 27 ; ARMv8R-SP: .eabi_attribute 27, 1 @ Tag_ABI_HardFP_use ; ARMv8R-NEON-NOT: .eabi_attribute 27 ; ARMv8R-NOFPU-NOT: .eabi_attribute 36 ; ARMv8R-SP: .eabi_attribute 36, 1 @ Tag_FP_HP_extension ; ARMv8R-NEON: .eabi_attribute 36, 1 @ Tag_FP_HP_extension -; ARMv8R: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format ; ARMv8R: .eabi_attribute 42, 1 @ Tag_MPextension_use -; ARMv8R: .eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use ; ARMv8R: .eabi_attribute 68, 2 @ Tag_Virtualization_use +; ARMv8R: .eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format +; ARMv8R: .eabi_attribute 14, 0 @ Tag_ABI_PCS_R9_use define i32 @f(i64 %z) { ret i32 0 diff --git a/test/CodeGen/ARM/clang-section.ll b/test/CodeGen/ARM/clang-section.ll new file mode 100644 index 0000000000000000000000000000000000000000..343f0e721d7f435055b868149c4c216c4329dbd7 --- /dev/null +++ b/test/CodeGen/ARM/clang-section.ll @@ -0,0 +1,140 @@ +;RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s +;Test that global variables and functions are assigned to correct sections. + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7-arm-none-eabi" + +@a = global i32 0, align 4 #0 +@b = global i32 1, align 4 #0 +@c = global [4 x i32] zeroinitializer, align 4 #0 +@d = global [5 x i16] zeroinitializer, align 2 #0 +@e = global [6 x i16] [i16 0, i16 0, i16 1, i16 0, i16 0, i16 0], align 2 #0 +@f = constant i32 2, align 4 #0 +@h = global i32 0, align 4 #1 +@i = global i32 0, align 4 #2 +@j = constant i32 4, align 4 #2 +@k = global i32 0, align 4 #2 +@_ZZ3gooE7lstat_h = internal global i32 0, align 4 #2 +@_ZL1g = internal global [2 x i32] zeroinitializer, align 4 #0 +@l = global i32 5, align 4 #3 +@m = constant i32 6, align 4 #3 +@n = global i32 0, align 4 +@o = global i32 6, align 4 +@p = constant i32 7, align 4 + +; Function Attrs: noinline nounwind +define i32 @foo() #4 { +entry: + %0 = load i32, i32* @b, align 4 + ret i32 %0 +} + +; Function Attrs: noinline +define i32 @goo() #5 { +entry: + %call = call i32 @zoo(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZL1g, i32 0, i32 0), i32* @_ZZ3gooE7lstat_h) + ret i32 %call +} + +declare i32 @zoo(i32*, i32*) #6 + +; Function Attrs: noinline nounwind +define i32 @hoo() #7 { +entry: + %0 = load i32, i32* @b, align 4 + ret i32 %0 +} + +attributes #0 = { "bss-section"="my_bss.1" "data-section"="my_data.1" "rodata-section"="my_rodata.1" } +attributes #1 = { "data-section"="my_data.1" "rodata-section"="my_rodata.1" } +attributes #2 = { "bss-section"="my_bss.2" "rodata-section"="my_rodata.1" } +attributes #3 = { "bss-section"="my_bss.2" "data-section"="my_data.2" "rodata-section"="my_rodata.2" } +attributes #4 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.1" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "implicit-section-name"="my_text.2" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #7 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a9" "target-features"="+dsp,+fp16,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"static_rwdata", i32 1} +!2 = !{i32 1, !"enumsize_buildattr", i32 2} +!3 = !{i32 1, !"armlib_unavailable", i32 0} + +;CHECK: .section my_text.1,"ax",%progbits +;CHECK: .type foo,%function +;CHECK: foo: + +;CHECK: .section my_text.2,"ax",%progbits +;CHECK: .type goo,%function +;CHECK: goo: + +;CHECK: .text +;CHECK: .type hoo,%function +;CHECK: hoo: + +;CHECK: .type a,%object +;CHECK: .section my_bss.1,"aw",%nobits +;CHECK: a: + +;CHECK: .type b,%object +;CHECK: .section my_data.1,"aw",%progbits +;CHECK: b: + +;CHECK: .type c,%object +;CHECK: .section my_bss.1,"aw",%nobits +;CHECK: c: + +;CHECK: .type d,%object +;CHECK: d: + +;CHECK: .type e,%object +;CHECK: .section my_data.1,"aw",%progbits +;CHECK: e: + +;CHECK: .type f,%object +;CHECK: .section my_rodata.1,"a",%progbits +;CHECK: f: + +;CHECK: .type h,%object +;CHECK: .bss +;CHECK: h: + +;CHECK: .type i,%object +;CHECK: .section my_bss.2,"aw",%nobits +;CHECK: i: + +;CHECK: .type j,%object +;CHECK: .section my_rodata.1,"a",%progbits +;CHECK: j: + +;CHECK: .type k,%object +;CHECK: .section my_bss.2,"aw",%nobits +;CHECK: k: + +;CHECK: .type _ZZ3gooE7lstat_h,%object @ @_ZZ3gooE7lstat_h +;CHECK: _ZZ3gooE7lstat_h: + +;CHECK: .type _ZL1g,%object +;CHECK: .section my_bss.1,"aw",%nobits +;CHECK: _ZL1g: + +;CHECK: .type l,%object +;CHECK: .section my_data.2,"aw",%progbits +;CHECK: l: + +;CHECK: .type m,%object +;CHECK: .section my_rodata.2,"a",%progbits +;CHECK: m: + +;CHECK: .type n,%object +;CHECK: .bss +;CHECK: n: + +;CHECK: .type o,%object +;CHECK: .data +;CHECK: o: + +;CHECK: .type p,%object +;CHECK: .section .rodata,"a",%progbits +;CHECK: p: diff --git a/test/CodeGen/ARM/cmp1-peephole-thumb.mir b/test/CodeGen/ARM/cmp1-peephole-thumb.mir index 5ace58fd06584fa52e928c5aee401cb902df1bfd..3e87ced0ee57de62db109387323e551cae7e12ea 100644 --- a/test/CodeGen/ARM/cmp1-peephole-thumb.mir +++ b/test/CodeGen/ARM/cmp1-peephole-thumb.mir @@ -55,7 +55,6 @@ frameInfo: # CHECK-NOT: tCMPi8 body: | bb.0.entry: - successors: %bb.1.entry(0x40000000), %bb.2.entry(0x40000000) liveins: %r0, %r1 %1 = COPY %r1 @@ -67,8 +66,6 @@ body: | tBcc %bb.2.entry, 0, %cpsr bb.1.entry: - successors: %bb.2.entry(0x80000000) - bb.2.entry: %5 = PHI %4, %bb.1.entry, %3, %bb.0.entry diff --git a/test/CodeGen/ARM/cmp2-peephole-thumb.mir b/test/CodeGen/ARM/cmp2-peephole-thumb.mir index 6e9ca70f1741d5f9e285c775cf325f90bca99dfe..a31086d2113ebf63d9f7087d0469648ff0276dfc 100644 --- a/test/CodeGen/ARM/cmp2-peephole-thumb.mir +++ b/test/CodeGen/ARM/cmp2-peephole-thumb.mir @@ -76,7 +76,6 @@ stack: # CHECK-NEXT: tCMPi8 body: | bb.0.entry: - successors: %bb.1.if.then(0x40000000), %bb.2.if.end(0x40000000) liveins: %r0, %r1 %1 = COPY %r1 @@ -88,15 +87,11 @@ body: | tB %bb.1.if.then, 14, _ bb.1.if.then: - successors: %bb.3.return(0x80000000) - %4, %cpsr = tMOVi8 42, 14, _ tSTRspi killed %4, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval) tB %bb.3.return, 14, _ bb.2.if.end: - successors: %bb.3.return(0x80000000) - %3, %cpsr = tMOVi8 1, 14, _ tSTRspi killed %3, %stack.0.retval, 0, 14, _ :: (store 4 into %ir.retval) diff --git a/test/CodeGen/ARM/cmpxchg-O0.ll b/test/CodeGen/ARM/cmpxchg-O0.ll index f8ad2bbbbe0e4170739a08c7506ef2acc4230c83..a3be72112c761ad2b947a021c84b85d0fb30800d 100644 --- a/test/CodeGen/ARM/cmpxchg-O0.ll +++ b/test/CodeGen/ARM/cmpxchg-O0.ll @@ -10,10 +10,11 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK: dmb ish ; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0 ; CHECK: ldrexb [[OLD:r[0-9]+]], [r0] ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexb [[STATUS]], r2, [r0] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -29,10 +30,11 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind ; CHECK: dmb ish ; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0 ; CHECK: ldrexh [[OLD:r[0-9]+]], [r0] ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexh [[STATUS]], r2, [r0] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -48,10 +50,11 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: mov{{s?}} [[STATUS:r[0-9]+]], #0 ; CHECK: ldrex [[OLD:r[0-9]+]], [r0] ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strex [[STATUS]], r2, [r0] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: diff --git a/test/CodeGen/ARM/constantpool-promote-dbg.ll b/test/CodeGen/ARM/constantpool-promote-dbg.ll index ae765d26dcac171b6c20e162b34dabbd01e4c902..84386d2975f0fc7d83b710afeac3ab4a61d885e6 100644 --- a/test/CodeGen/ARM/constantpool-promote-dbg.ll +++ b/test/CodeGen/ARM/constantpool-promote-dbg.ll @@ -1,4 +1,4 @@ -; RUN: llc -relocation-model=static < %s | FileCheck %s +; RUN: llc -relocation-model=static -arm-promote-constant < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7m--linux-gnu" diff --git a/test/CodeGen/ARM/constantpool-promote-ldrh.ll b/test/CodeGen/ARM/constantpool-promote-ldrh.ll index 9e369dc08c4b66d5dc2056aa735ddc00cc8fad58..59970495874be381f60155fb63a43367592cff32 100644 --- a/test/CodeGen/ARM/constantpool-promote-ldrh.ll +++ b/test/CodeGen/ARM/constantpool-promote-ldrh.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -fast-isel=false | FileCheck %s -; RUN: llc < %s -O0 -fast-isel=false -filetype=obj +; RUN: llc < %s -O0 -fast-isel=false -arm-promote-constant | FileCheck %s +; RUN: llc < %s -O0 -fast-isel=false -filetype=obj -arm-promote-constant target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv6m-arm-linux-gnueabi" diff --git a/test/CodeGen/ARM/constantpool-promote.ll b/test/CodeGen/ARM/constantpool-promote.ll index 8df7e100c0514d2813b6be7a7f921d282dd4b8d8..d5361f33a98be9a9c5245133a301fba020f56b23 100644 --- a/test/CodeGen/ARM/constantpool-promote.ll +++ b/test/CodeGen/ARM/constantpool-promote.ll @@ -1,15 +1,15 @@ -; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM -; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM -; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM -; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM -; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB -; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB -; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB -; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB -; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=static < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M -; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=pic < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M -; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=ropi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M -; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=rwpi < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=static -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=pic -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=ropi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple armv7--linux-gnueabihf -relocation-model=rwpi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7ARM +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=static -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=pic -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=ropi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv7--linux-gnueabihf -relocation-model=rwpi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V7,CHECK-V7THUMB +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=static -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=pic -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=ropi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M +; RUN: llc -mtriple thumbv6m--linux-gnueabihf -relocation-model=rwpi -arm-promote-constant < %s | FileCheck %s --check-prefixes=CHECK,CHECK-V6M @.str = private unnamed_addr constant [2 x i8] c"s\00", align 1 @.str1 = private unnamed_addr constant [69 x i8] c"this string is far too long to fit in a literal pool by far and away\00", align 1 diff --git a/test/CodeGen/ARM/cortex-a57-misched-alu.ll b/test/CodeGen/ARM/cortex-a57-misched-alu.ll new file mode 100644 index 0000000000000000000000000000000000000000..960ee87532b0bc0db8142616f0c16a0ae3184225 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-alu.ll @@ -0,0 +1,81 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; Check the latency for ALU shifted operand variants. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: foo:BB#0 entry + +; ALU, basic - 1 cyc I0/I1 +; CHECK: EORrr +; CHECK: rdefs left +; CHECK-NEXT: Latency : 1 + +; ALU, shift by immed - 2 cyc M +; CHECK: ADDrsi +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +; ALU, shift by register, unconditional - 2 cyc M +; CHECK: RSBrsr +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +; ALU, shift by register, conditional - 2 cyc I0/I1 +; CHECK: ANDrsr +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +; Checking scheduling units + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; Skipping COPY +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: ANDrsr +; CHECK: Ready +; CHECK-NEXT: A57UnitI + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: CMPri +; CHECK: Ready +; CHECK-NEXT: A57UnitI + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: RSBrsr +; CHECK: Ready +; CHECK-NEXT: A57UnitM + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: ADDrsi +; CHECK: Ready +; CHECK-NEXT: A57UnitM + +; CHECK: ** ScheduleDAGMILive::schedule picking next node +; CHECK: Scheduling +; CHECK-SAME: EORrr +; CHECK: Ready +; CHECK-NEXT: A57UnitI + + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv8r-arm-none-eabi" + +; Function Attrs: norecurse nounwind readnone +define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %xor = xor i32 %a, %b + %xor_shl = shl i32 %xor, 2 + %add = add i32 %xor_shl, %d + %add_ashr = ashr i32 %add, %a + %sub = sub i32 %add_ashr, %a + %sub_lshr_pred = lshr i32 %sub, %c + %pred = icmp sgt i32 %a, 4 + %and = and i32 %sub_lshr_pred, %b + %rv = select i1 %pred, i32 %and, i32 %d + ret i32 %rv +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/test/CodeGen/ARM/cortex-a57-misched-basic.ll new file mode 100644 index 0000000000000000000000000000000000000000..2ec50b9d3343c07db65304905a9da06a9ec7bd33 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-basic.ll @@ -0,0 +1,53 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=A57_SCHED +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC + +; Check the latency for instructions for both generic and cortex-a57. +; SDIV should be scheduled at the block's begin (20 cyc of independent M unit). +; +; CHECK: ********** MI Scheduling ********** +; CHECK: foo:BB#0 entry + +; GENERIC: SDIV +; GENERIC: Latency : 1 +; GENERIC: EORrr +; GENERIC: Latency : 1 +; GENERIC: LDRi12 +; GENERIC: Latency : 4 +; GENERIC: ADDrr +; GENERIC: Latency : 1 +; GENERIC: SUBrr +; GENERIC: Latency : 1 + +; A57_SCHED: SDIV +; A57_SCHED: Latency : 20 +; A57_SCHED: EORrr +; A57_SCHED: Latency : 1 +; A57_SCHED: LDRi12 +; A57_SCHED: Latency : 4 +; A57_SCHED: ADDrr +; A57_SCHED: Latency : 1 +; A57_SCHED: SUBrr +; A57_SCHED: Latency : 1 + +; CHECK: ** Final schedule for BB#0 *** +; GENERIC: LDRi12 +; GENERIC: SDIV +; A57_SCHED: SDIV +; A57_SCHED: LDRi12 +; CHECK: ********** INTERVALS ********** + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv8r-arm-none-eabi" + +; Function Attrs: norecurse nounwind readnone +define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32* %d) local_unnamed_addr #0 { +entry: + %xor = xor i32 %c, %b + %ld = load i32, i32* %d + %add = add nsw i32 %xor, %ld + %div = sdiv i32 %a, %b + %sub = sub i32 %div, %add + ret i32 %sub +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll new file mode 100644 index 0000000000000000000000000000000000000000..d54848a6bcf1981548b2aa08c64e4df22038cd11 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll @@ -0,0 +1,37 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; + +@a = global i32 0, align 4 +@b = global i32 0, align 4 +@c = global i32 0, align 4 + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have LDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: LDMIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 4 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=1 +; CHECK-NEXT: data +; CHECK-SAME: Latency=3 +; CHECK-NEXT: data +; CHECK-SAME: Latency=3 +; CHECK-NEXT: data +; CHECK-SAME: Latency=4 +define i32 @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize { + %1 = load i32, i32* @a, align 4 + %2 = load i32, i32* @b, align 4 + %3 = load i32, i32* @c, align 4 + + %ptr_after = getelementptr i32, i32* @a, i32 3 + + %ptr_val = ptrtoint i32* %ptr_after to i32 + %mul1 = mul i32 %ptr_val, %1 + %mul2 = mul i32 %mul1, %2 + %mul3 = mul i32 %mul2, %3 + ret i32 %mul3 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-ldm.ll b/test/CodeGen/ARM/cortex-a57-misched-ldm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9cb076651f5b3ca3ad550df43597afd0e0135136 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-ldm.ll @@ -0,0 +1,28 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have LDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: LDMIA +; CHECK: rdefs left +; CHECK-NEXT: Latency : 3 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=3 +; CHECK-NEXT: data +; CHECK-SAME: Latency=3 + +define i32 @foo(i32* %a) nounwind optsize { +entry: + %b = getelementptr i32, i32* %a, i32 1 + %c = getelementptr i32, i32* %a, i32 2 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %2 = load i32, i32* %c, align 4 + + %mul1 = mul i32 %0, %1 + %mul2 = mul i32 %mul1, %2 + ret i32 %mul2 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll new file mode 100644 index 0000000000000000000000000000000000000000..774b0a907e3993c88a1cda241f950685aed5fe77 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll @@ -0,0 +1,36 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; N=3 STMIA_UPD should have latency 2cyc and writeback latency 1cyc + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have STM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: STMIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 +; CHECK: Successors +; CHECK: data +; CHECK-SAME: Latency=1 + +define i32 @bar(i32 %v0, i32 %v1, i32 %v2, i32* %addr) { + + %addr.1 = getelementptr i32, i32* %addr, i32 0 + store i32 %v0, i32* %addr.1 + + %addr.2 = getelementptr i32, i32* %addr, i32 1 + store i32 %v1, i32* %addr.2 + + %addr.3 = getelementptr i32, i32* %addr, i32 2 + store i32 %v2, i32* %addr.3 + + %ptr_after = getelementptr i32, i32* %addr, i32 3 + %val = ptrtoint i32* %ptr_after to i32 + + %rv1 = mul i32 %val, %v0 + %rv2 = mul i32 %rv1, %v1 + %rv3 = mul i32 %rv2, %v2 + + ret i32 %rv3 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-stm.ll b/test/CodeGen/ARM/cortex-a57-misched-stm.ll new file mode 100644 index 0000000000000000000000000000000000000000..474f39d84bae5b279f49062f608767ca9d99bcd3 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-stm.ll @@ -0,0 +1,29 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; N=3 STMIB should have latency 2cyc + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have STM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: STMIB +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +define i32 @test_stm(i32 %v0, i32 %v1, i32* %addr) { + + %addr.1 = getelementptr i32, i32* %addr, i32 1 + store i32 %v0, i32* %addr.1 + + %addr.2 = getelementptr i32, i32* %addr, i32 2 + store i32 %v1, i32* %addr.2 + + %addr.3 = getelementptr i32, i32* %addr, i32 3 + %val = ptrtoint i32* %addr to i32 + store i32 %val, i32* %addr.3 + + %rv = add i32 %v0, %v1 + + ret i32 %rv +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll new file mode 100644 index 0000000000000000000000000000000000000000..e234e179ed071fcab4900cd01f57cf25b80e6242 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll @@ -0,0 +1,196 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST +; Check latencies of vmul/vfma accumulate chains. + +define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test1:BB#0 + +; CHECK: VMULS +; > VMULS common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; > VMULS read-advanced latency to VMLAS = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLAS +; CHECK-FAST: VFMAS +; > VMLAS common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAS read-advanced latency to the next VMLAS = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLAS +; CHECK-FAST: VFMAS +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAS not-optimized latency to VMOVRS = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS + %mul1 = fmul float %f1, %f2 + %mul2 = fmul float %f3, %f4 + %mul3 = fmul float %f5, %f6 + %add1 = fadd float %mul1, %mul2 + %add2 = fadd float %add1, %mul3 + ret float %add2 +} + +; ASIMD form +define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test2:BB#0 + +; CHECK: VMULfd +; > VMULfd common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; VMULfd read-advanced latency to VMLAfd = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLAfd +; CHECK-FAST: VFMAfd +; > VMLAfd common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAfd read-advanced latency to the next VMLAfd = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLAfd +; CHECK-FAST: VFMAfd +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAfd not-optimized latency to VMOVRRD = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS + %mul1 = fmul <2 x float> %f1, %f2 + %mul2 = fmul <2 x float> %f3, %f4 + %mul3 = fmul <2 x float> %f5, %f6 + %add1 = fadd <2 x float> %mul1, %mul2 + %add2 = fadd <2 x float> %add1, %mul3 + ret <2 x float> %add2 +} + +define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test3:BB#0 + +; CHECK: VMULS +; > VMULS common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; > VMULS read-advanced latency to VMLSS = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLSS +; CHECK-FAST: VFMSS +; > VMLSS common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSS read-advanced latency to the next VMLSS = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLSS +; CHECK-FAST: VFMSS +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSS not-optimized latency to VMOVRS = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS + %mul1 = fmul float %f1, %f2 + %mul2 = fmul float %f3, %f4 + %mul3 = fmul float %f5, %f6 + %sub1 = fsub float %mul1, %mul2 + %sub2 = fsub float %sub1, %mul3 + ret float %sub2 +} + +; ASIMD form +define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test4:BB#0 + +; CHECK: VMULfd +; > VMULfd common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; VMULfd read-advanced latency to VMLSfd = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLSfd +; CHECK-FAST: VFMSfd +; > VMLSfd common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSfd read-advanced latency to the next VMLSfd = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLSfd +; CHECK-FAST: VFMSfd +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSfd not-optimized latency to VMOVRRD = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS + %mul1 = fmul <2 x float> %f1, %f2 + %mul2 = fmul <2 x float> %f3, %f4 + %mul3 = fmul <2 x float> %f5, %f6 + %sub1 = fsub <2 x float> %mul1, %mul2 + %sub2 = fsub <2 x float> %sub1, %mul3 + ret <2 x float> %sub2 +} + +define float @Test5(float %f1, float %f2, float %f3) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test5:BB#0 + +; CHECK-DEFAULT: VNMLS +; CHECK-FAST: VFNMS +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAS not-optimized latency to VMOVRS = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 - f3 ==> VNMLS/VFNMS + %mul = fmul float %f1, %f2 + %sub = fsub float %mul, %f3 + ret float %sub +} + + +define float @Test6(float %f1, float %f2, float %f3) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test6:BB#0 + +; CHECK-DEFAULT: VNMLA +; CHECK-FAST: VFNMA +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLAS not-optimized latency to VMOVRS = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 - f3 ==> VNMLA/VFNMA + %mul = fmul float %f1, %f2 + %sub1 = fsub float -0.0, %mul + %sub2 = fsub float %sub1, %f2 + ret float %sub2 +} diff --git a/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll new file mode 100644 index 0000000000000000000000000000000000000000..6cfa823fb9694c09523e131497fedf84e012d358 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll @@ -0,0 +1,50 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; + +@a = global double 0.0, align 4 +@b = global double 0.0, align 4 +@c = global double 0.0, align 4 + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VLDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: VLDMDIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 6 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=1 +; CHECK-NEXT: data +; CHECK-SAME: Latency=1 +; CHECK-NEXT: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=6 +define i32 @bar(i32* %iptr) minsize optsize { + %1 = load double, double* @a, align 8 + %2 = load double, double* @b, align 8 + %3 = load double, double* @c, align 8 + + %ptr_after = getelementptr double, double* @a, i32 3 + + %ptr_new_ival = ptrtoint double* %ptr_after to i32 + %ptr_new = inttoptr i32 %ptr_new_ival to i32* + + store i32 %ptr_new_ival, i32* %iptr, align 8 + + %v1 = fptoui double %1 to i32 + + %mul1 = mul i32 %ptr_new_ival, %v1 + + %v2 = fptoui double %2 to i32 + %v3 = fptoui double %3 to i32 + + %mul2 = mul i32 %mul1, %v2 + %mul3 = mul i32 %mul2, %v3 + + ret i32 %mul3 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vldm.ll b/test/CodeGen/ARM/cortex-a57-misched-vldm.ll new file mode 100644 index 0000000000000000000000000000000000000000..218b5b41a7e43f8ed6015f4d53c791bad18555ab --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vldm.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VLDM instruction combined from single-loads +; CHECK: ********** MI Scheduling ********** +; CHECK: VLDMDIA +; CHECK: rdefs left +; CHECK-NEXT: Latency : 6 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=5 +; CHECK-NEXT: data +; CHECK-SAME: Latency=6 + +define double @foo(double* %a) nounwind optsize { +entry: + %b = getelementptr double, double* %a, i32 1 + %c = getelementptr double, double* %a, i32 2 + %0 = load double, double* %a, align 4 + %1 = load double, double* %b, align 4 + %2 = load double, double* %c, align 4 + + %mul1 = fmul double %0, %1 + %mul2 = fmul double %mul1, %2 + ret double %mul2 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll new file mode 100644 index 0000000000000000000000000000000000000000..af1c469d44432945a1844f22ea712003c5808685 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll @@ -0,0 +1,43 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VSTM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: VSTMDIA_UPD +; CHECK: rdefs left +; CHECK-NEXT: Latency : 4 +; CHECK: Successors: +; CHECK: data +; CHECK-SAME: Latency=1 + +@a = global double 0.0, align 4 +@b = global double 0.0, align 4 +@c = global double 0.0, align 4 + +define i32 @bar(double* %vptr, i32 %iv1, i32* %iptr) minsize { + + %vp2 = getelementptr double, double* %vptr, i32 1 + %vp3 = getelementptr double, double* %vptr, i32 2 + + %v1 = load double, double* %vptr, align 8 + %v2 = load double, double* %vp2, align 8 + %v3 = load double, double* %vp3, align 8 + + store double %v1, double* @a, align 8 + store double %v2, double* @b, align 8 + store double %v3, double* @c, align 8 + + %ptr_after = getelementptr double, double* @a, i32 3 + + %ptr_new_ival = ptrtoint double* %ptr_after to i32 + %ptr_new = inttoptr i32 %ptr_new_ival to i32* + + store i32 %ptr_new_ival, i32* %iptr, align 8 + + %mul1 = mul i32 %ptr_new_ival, %iv1 + + ret i32 %mul1 +} + diff --git a/test/CodeGen/ARM/cortex-a57-misched-vstm.ll b/test/CodeGen/ARM/cortex-a57-misched-vstm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f31474f6655819791e8b9a5ca4a4ae43b2c62180 --- /dev/null +++ b/test/CodeGen/ARM/cortex-a57-misched-vstm.ll @@ -0,0 +1,23 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK: ********** MI Scheduling ********** +; We need second, post-ra scheduling to have VSTM instruction combined from single-stores +; CHECK: ********** MI Scheduling ********** +; CHECK: schedule starting +; CHECK: VSTMDIA +; CHECK: rdefs left +; CHECK-NEXT: Latency : 2 + +%bigVec = type [2 x double] + +@var = global %bigVec zeroinitializer + +define void @bar(%bigVec* %ptr) { + + %tmp = load %bigVec, %bigVec* %ptr + store %bigVec %tmp, %bigVec* @var + + ret void +} + diff --git a/test/CodeGen/ARM/cortexr52-misched-basic.ll b/test/CodeGen/ARM/cortexr52-misched-basic.ll index 3ccb34d9fc90e7a90a9c258056eb343d582dad88..eb2c29a3a5d1987543440f354a18054ac6221d3e 100644 --- a/test/CodeGen/ARM/cortexr52-misched-basic.ll +++ b/test/CodeGen/ARM/cortexr52-misched-basic.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=R52_SCHED -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=R52_SCHED +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; ; Check the latency for instructions for both generic and cortex-r52. ; Cortex-r52 machine model will cause the div to be sceduled before eor diff --git a/test/CodeGen/ARM/dag-combine-ldst.ll b/test/CodeGen/ARM/dag-combine-ldst.ll index c1960ee6c6e93d1ba2ec45c20a987541794dc70b..077754ef013dfea648948a44acfe8ac5792c8241 100644 --- a/test/CodeGen/ARM/dag-combine-ldst.ll +++ b/test/CodeGen/ARM/dag-combine-ldst.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: {{^}}main ; CHECK: mov [[TMP:r[0-9]+]], #0 ; CHECK-NEXT: str [[TMP]], [sp, #4] -; CHECK-NEXT: str [[TMP]], [sp] +; CHECK_O0: str [[TMP]], [sp] ; CHECK_O0: ldr [[TMP:r[0-9]+]], [sp] ; CHECK_O0-NEXT: add [[TMP]], [[TMP]], #2 ; CHECK_O1-NOT: ldr [[TMP:r[0-9]+]], [sp] diff --git a/test/CodeGen/ARM/darwin-tls-preserved.ll b/test/CodeGen/ARM/darwin-tls-preserved.ll new file mode 100644 index 0000000000000000000000000000000000000000..4969fabfd9b3c9b9508b0a2b311cf821726b1260 --- /dev/null +++ b/test/CodeGen/ARM/darwin-tls-preserved.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -arm-atomic-cfg-tidy=0 -o - %s | FileCheck %s + +@tls_var = thread_local global i32 0 + +; r9 and r12 can be live across the asm, but those get clobbered by the TLS +; access (in a different BB to order it). +define i32 @test_regs_preserved(i32* %ptr1, i32* %ptr2, i1 %tst1) { +; CHECK-LABEL: test_regs_preserved: +; CHECK: str {{.*}}, [sp +; CHECK: mov {{.*}}, r12 +entry: + call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r10},~{r11},~{r13},~{lr}"() + br i1 %tst1, label %get_tls, label %done + +get_tls: + %val = load i32, i32* @tls_var + br label %done + +done: + %res = phi i32 [%val, %get_tls], [0, %entry] + store i32 42, i32* %ptr1 + store i32 42, i32* %ptr2 + ret i32 %res +} diff --git a/test/CodeGen/ARM/dbg-range-extension.mir b/test/CodeGen/ARM/dbg-range-extension.mir index 466f6939694896649744a671e03b5b775c91f3aa..a79607705c1c7887475f0689a422a45c56bacaf3 100644 --- a/test/CodeGen/ARM/dbg-range-extension.mir +++ b/test/CodeGen/ARM/dbg-range-extension.mir @@ -209,7 +209,6 @@ stack: - { id: 5, type: spill-slot, offset: -24, size: 4, alignment: 4, callee-saved-register: '%r4' } body: | bb.0.entry: - successors: %bb.5.if.end, %bb.1.if.then liveins: %r0, %r4, %r5, %r6, %r7, %r11, %lr %sp = frame-setup STMDB_UPD %sp, 14, _, killed %r4, killed %r5, killed %r6, killed %r7, killed %r11, killed %lr @@ -232,7 +231,6 @@ body: | Bcc %bb.5.if.end, 0, killed %cpsr bb.1.if.then: - successors: %bb.3.for.cond liveins: %r4, %r5 %r0 = MOVi 12, 14, _, _, debug-location !26 @@ -245,7 +243,6 @@ body: | B %bb.3.for.cond bb.2.for.body: - successors: %bb.3.for.cond liveins: %r4, %r5, %r6, %r7 %r1 = ADDrr %r5, %r7, 14, _, _, debug-location !36 @@ -255,7 +252,6 @@ body: | DBG_VALUE debug-use %r7, debug-use _, !18, !20, debug-location !28 bb.3.for.cond: - successors: %bb.2.for.body, %bb.4.for.cond.cleanup liveins: %r4, %r5, %r6, %r7 DBG_VALUE debug-use %r7, debug-use _, !18, !20, debug-location !28 @@ -263,7 +259,6 @@ body: | Bcc %bb.2.for.body, 11, killed %cpsr, debug-location !33 bb.4.for.cond.cleanup: - successors: %bb.5.if.end liveins: %r4, %r5, %r6 %r0 = MOVr %r5, 14, _, _, debug-location !34 diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll index 1e9d890e933374de6e53c928a00cce27a7ec878b..6019a9410b0337a3a8a6da680043a396180cc63a 100644 --- a/test/CodeGen/ARM/debug-info-blocks.ll +++ b/test/CodeGen/ARM/debug-info-blocks.ll @@ -273,6 +273,6 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load !160 = !DIFile(filename: "header.h", directory: "/Volumes/Sandbox/llvm") !161 = !{!"header2.h", !"/Volumes/Sandbox/llvm"} !162 = !{i32 1, !"Debug Info Version", i32 3} -!163 = !DIExpression(DW_OP_plus, 20, DW_OP_deref, DW_OP_plus, 4, DW_OP_deref, DW_OP_plus, 24) -!164 = !DIExpression(DW_OP_deref, DW_OP_plus, 24) -!165 = !DIExpression(DW_OP_deref, DW_OP_plus, 28) +!163 = !DIExpression(DW_OP_plus_uconst, 20, DW_OP_deref, DW_OP_plus_uconst, 4, DW_OP_deref, DW_OP_plus_uconst, 24) +!164 = !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 24) +!165 = !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 28) diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll index ce5a1df05e3ff5e1862e4684a5344d6421b9f4b0..77ffc46e6a6914b5ab0dd2fb22d9ba24639c22f3 100644 --- a/test/CodeGen/ARM/divmod-eabi.ll +++ b/test/CodeGen/ARM/divmod-eabi.ll @@ -16,17 +16,15 @@ ; RUN: llc -mtriple armv7-linux-gnueabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=EABI ; RUN: llc -mtriple armv7-linux-musleabi %s -o - | FileCheck %s --check-prefix=EABI ; RUN: llc -mtriple armv7-linux-musleabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=EABI -; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefixes=DARWIN,DARWIN-DEFAULT -; RUN: llc -mtriple armv7-apple-darwin %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefixes=DARWIN,DARWIN-O0 -; FIXME: long-term, we will use "-apple-macho" and won't need this exception: -; RUN: llc -mtriple armv7-apple-darwin-eabi %s -o - | FileCheck %s --check-prefixes=DARWIN,DARWIN-DEFAULT -; RUN: llc -mtriple armv7-apple-darwin-eabi %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefixes=DARWIN,DARWIN-O0 +; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefixes=DARWIN +; RUN: llc -mtriple armv7-apple-darwin %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefix=DARWIN-O0 ; RUN: llc -mtriple thumbv7-windows %s -o - | FileCheck %s --check-prefixes=WINDOWS,WINDOWS-DEFAULT ; RUN: llc -mtriple thumbv7-windows %s -o - -O0 -optimize-regalloc | FileCheck %s --check-prefixes=WINDOWS,WINDOWS-O0 define signext i16 @f16(i16 signext %a, i16 signext %b) { ; EABI-LABEL: f16: ; DARWIN-LABEL: f16: +; DARWIN-O0-LABEL: f16: ; WINDOWS-LABEL: f16: entry: %conv = sext i16 %a to i32 @@ -36,11 +34,9 @@ entry: ; EABI: __aeabi_idivmod ; EABI: mov [[div:r[0-9]+]], r0 ; EABI: mov [[rem:r[0-9]+]], r1 -; DARWIN: ___divsi3 -; DARWIN: mov [[div:r[0-9]+]], r0 -; DARWIN: __modsi3 -; DARWIN-DEFAULT: add [[sum:r[0-9]+]], r0, [[div]] -; DARWIN-O0: mov [[rem:r[0-9]+]], r0 +; DARWIN: __divmodsi4 +; DARWIN-O0: __divsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv ; WINDOWS: __rt_sdiv ; WINDOWS-DEFAULT: add [[sum:r[0-9]+]], r1 @@ -48,16 +44,13 @@ entry: %rem8 = srem i32 %conv1, %conv ; EABI: __aeabi_idivmod ; DARWIN: __modsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv %add = add nsw i32 %rem, %div %add13 = add nsw i32 %add, %rem8 %conv14 = trunc i32 %add13 to i16 ; EABI: add r0{{.*}}r1 ; EABI: sxth r0, r0 -; DARWIN-DEFAULT: add [[res:r[0-9]+]], [[sum]], r0 -; DARWIN-O0: add [[sum:r[0-9]+]], [[rem]], [[div]] -; DARWIN-O0: add [[res:r[0-9]+]], [[sum]], r0 -; DARWIN: sxth r0, [[res]] ; WINDOWS-DEFAULT: adds [[sum1:r[0-9]+]], [[sum]], r1 ; WINDOWS-O0: adds [[sum:r[0-9]+]], [[rem]], ; WINDOWS-O0: add [[sum1:r[0-9]+]], r1 @@ -68,6 +61,7 @@ entry: define i32 @f32(i32 %a, i32 %b) { ; EABI-LABEL: f32: ; DARWIN-LABEL: f32: +; DARWIN-O0-LABEL: f32: ; WINDOWS-LABEL: f32: entry: %div = sdiv i32 %a, %b @@ -75,11 +69,9 @@ entry: ; EABI: __aeabi_idivmod ; EABI: mov [[div:r[0-9]+]], r0 ; EABI: mov [[rem:r[0-9]+]], r1 -; DARWIN: ___divsi3 -; DARWIN: mov [[div:r[0-9]+]], r0 -; DARWIN: __modsi3 -; DARWIN-DEFAULT: add [[sum:r[0-9]+]], r0, [[div]] -; DARWIN-O0: mov [[rem:r[0-9]+]], r0 +; DARWIN: ___divmodsi4 +; DARWIN-O0: __divsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv ; WINDOWS: mov [[div:r[0-9]+]], r0 ; WINDOWS: __rt_sdiv @@ -87,13 +79,11 @@ entry: %rem1 = srem i32 %b, %a ; EABI: __aeabi_idivmod ; DARWIN: __modsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv %add = add nsw i32 %rem, %div %add2 = add nsw i32 %add, %rem1 ; EABI: add r0{{.*}}r1 -; DARWIN-DEFAULT: add r0, [[sum]], r0 -; DARWIN-O0: add [[sum:r[0-9]+]], [[rem]], [[div]] -; DARWIN-O0: add [[res:r[0-9]+]], [[sum]], r0 ; WINDOWS-DEFAULT: adds r0, [[div]], r1 ; WINDOWS-O0: adds [[sum:r[0-9]+]], [[rem]], [[div]] ; WINDOWS-O0: add [[sum]], r1 @@ -103,16 +93,15 @@ entry: define i32 @uf(i32 %a, i32 %b) { ; EABI-LABEL: uf: ; DARWIN-LABEL: uf: +; DARWIN-O0-LABEL: uf: ; WINDOWS-LABEL: uf: entry: %div = udiv i32 %a, %b %rem = urem i32 %a, %b ; EABI: __aeabi_uidivmod -; DARWIN: ___udivsi3 -; DARWIN: mov [[div:r[0-9]+]], r0 -; DARWIN: __umodsi3 -; DARWIN-DEFAULT: add [[sum:r[0-9]+]], r0, [[div]] -; DARWIN-O0: mov [[rem:r[0-9]+]], r0 +; DARWIN: __udivmodsi4 +; DARWIN-O0: __udivsi3 +; DARWIN-O0: __umodsi3 ; WINDOWS: __rt_udiv ; WINDOWS: mov [[div:r[0-9]+]], r0 ; WINDOWS: __rt_udiv @@ -120,13 +109,11 @@ entry: %rem1 = urem i32 %b, %a ; EABI: __aeabi_uidivmod ; DARWIN: __umodsi3 +; DARWIN-O0: __umodsi3 ; WINDOWS: __rt_udiv %add = add nuw i32 %rem, %div %add2 = add nuw i32 %add, %rem1 ; EABI: add r0{{.*}}r1 -; DARWIN-DEFAULT: add r0, [[sum]], r0 -; DARWIN-O0: add [[sum:r[0-9]+]], [[rem]], [[div]] -; DARWIN-O0: add [[res:r[0-9]+]], [[sum]], r0 ; WINDOWS-DEFAULT: adds [[sum:r[0-9]+]], [[div]], r1 ; WINDOWS-O0: adds [[sum:r[0-9]+]], ; WINDOWS-O0: add [[sum]], r1 @@ -136,6 +123,7 @@ entry: define i64 @longf(i64 %a, i64 %b) { ; EABI-LABEL: longf: ; DARWIN-LABEL: longf: +; DARWIN-O0-LABEL: longf: ; WINDOWS-LABEL: longf: entry: %div = sdiv i64 %a, %b @@ -148,6 +136,8 @@ entry: ; DARWIN: mov [[div1:r[0-9]+]], r0 ; DARWIN: mov [[div2:r[0-9]+]], r1 ; DARWIN: __moddi3 +; DARWIN-O0: __divdi3 +; DARWIN-O0: __moddi3 ; WINDOWS: __rt_sdiv64 %add = add nsw i64 %rem, %div ; DARWIN: adds r0{{.*}}[[div1]] @@ -160,20 +150,19 @@ entry: define i16 @shortf(i16 %a, i16 %b) { ; EABI-LABEL: shortf: ; DARWIN-LABEL: shortf: +; DARWIN-O0-LABEL: shortf: ; WINDOWS-LABEL: shortf: entry: %div = sdiv i16 %a, %b %rem = srem i16 %a, %b ; EABI: __aeabi_idivmod -; DARWIN: ___divsi3 -; DARWIN: mov [[div1:r[0-9]+]], r0 -; DARWIN: __modsi3 +; DARWIN: ___divmodsi4 +; DARWIN-O0: __divmodsi4 ; WINDOWS: __rt_sdiv ; WINDOWS: mov [[div:r[0-9]+]], r0 ; WINDOWS: __rt_sdiv %add = add nsw i16 %rem, %div ; EABI: add r0, r1 -; DARWIN: add r0{{.*}}[[div1]] ; WINDOWS: adds r0, r1, [[div]] ret i16 %add } @@ -181,20 +170,20 @@ entry: define i32 @g1(i32 %a, i32 %b) { ; EABI-LABEL: g1: ; DARWIN-LABEL: g1: +; DARWIN-O0-LABEL: g1: ; WINDOWS-LABEL: g1: entry: %div = sdiv i32 %a, %b %rem = srem i32 %a, %b ; EABI: __aeabi_idivmod -; DARWIN: ___divsi3 -; DARWIN: mov [[sum:r[0-9]+]], r0 -; DARWIN: __modsi3 +; DARWIN: ___divmodsi4 +; DARWIN-O0: __divsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv ; WINDOWS: mov [[div:r[0-9]+]], r0 ; WINDOWS: __rt_sdiv %add = add nsw i32 %rem, %div ; EABI: add r0{{.*}}r1 -; DARWIN: add r0{{.*}}[[sum]] ; WINDOWS: adds r0, r1, [[div]] ret i32 %add } @@ -203,11 +192,13 @@ entry: define i32 @g2(i32 %a, i32 %b) { ; EABI-LABEL: g2: ; DARWIN-LABEL: g2: +; DARWIN-O0-LABEL: g2: ; WINDOWS-LABEL: g2: entry: %rem = srem i32 %a, %b ; EABI: __aeabi_idivmod ; DARWIN: __modsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv ret i32 %rem ; EABI: mov r0, r1 @@ -217,6 +208,7 @@ entry: define i32 @g3(i32 %a, i32 %b) { ; EABI-LABEL: g3: ; DARWIN-LABEL: g3: +; DARWIN-O0-LABEL: g3: ; WINDOWS-LABEL: g3: entry: %rem = srem i32 %a, %b @@ -224,11 +216,13 @@ entry: ; EABI: mov [[mod:r[0-9]+]], r1 ; DARWIN: __modsi3 ; DARWIN: mov [[sum:r[0-9]+]], r0 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv ; WINDOWS: mov [[rem:r[0-9]+]], r1 %rem1 = srem i32 %b, %rem ; EABI: __aeabi_idivmod ; DARWIN: __modsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv %add = add nsw i32 %rem1, %rem ; EABI: add r0, r1, [[mod]] @@ -240,6 +234,7 @@ entry: define i32 @g4(i32 %a, i32 %b) { ; EABI-LABEL: g4: ; DARWIN-LABEL: g4: +; DARWIN-O0-LABEL: g4: ; WINDOWS-LABEL: g4: entry: %div = sdiv i32 %a, %b @@ -247,11 +242,13 @@ entry: ; EABI: mov [[div:r[0-9]+]], r0 ; DARWIN: ___divsi3 ; DARWIN: mov [[sum:r[0-9]+]], r0 +; DARWIN-O0: __divsi3 ; WINDOWS: __rt_sdiv ; WINDOWS: mov [[div:r[0-9]+]], r0 %rem = srem i32 %b, %div ; EABI: __aeabi_idivmod ; DARWIN: __modsi3 +; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv %add = add nsw i32 %rem, %div ; EABI: add r0, r1, [[div]] diff --git a/test/CodeGen/ARM/divmod-hwdiv.ll b/test/CodeGen/ARM/divmod-hwdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..4cc316ffa3ea6aac689f0b396f52b75a14ec23a3 --- /dev/null +++ b/test/CodeGen/ARM/divmod-hwdiv.ll @@ -0,0 +1,37 @@ +; The hwdiv subtarget feature should only influence thumb, not arm. +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv | FileCheck %s -check-prefixes=ALL,THUMB-HWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV + +; The hwdiv-arm subtarget feature should only influence arm, not thumb. +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,ARM-HWDIV +; RUN: llc < %s -mtriple=arm-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=+hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV +; RUN: llc < %s -mtriple=thumbv7-gnueabi -mattr=-hwdiv-arm | FileCheck %s -check-prefixes=ALL,AEABI-NOHWDIV + +define arm_aapcscc i32 @test_i32_srem(i32 %x, i32 %y) { +; ALL-LABEL: test_i32_srem: +; ARM-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 +; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 +; ARM-HWDIV: sub r0, r0, [[P]] +; THUMB-HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 +; THUMB-HWDIV: mls r0, [[Q]], r1, r0 +; AEABI-NOHWDIV: bl __aeabi_idivmod +; AEABI-NOHWDIV: mov r0, r1 + %r = srem i32 %x, %y + ret i32 %r +} + +define arm_aapcscc i32 @test_i32_urem(i32 %x, i32 %y) { +; ALL-LABEL: test_i32_urem: +; ARM-HWDIV: udiv [[Q:r[0-9]+]], r0, r1 +; ARM-HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 +; ARM-HWDIV: sub r0, r0, [[P]] +; THUMB-HWDIV: udiv [[Q:r[0-9]+]], r0, r1 +; THUMB-HWDIV: mls r0, [[Q]], r1, r0 +; AEABI-NOHWDIV: bl __aeabi_uidivmod +; AEABI-NOHWDIV: mov r0, r1 + %r = urem i32 %x, %y + ret i32 %r +} diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll index 9336d0c477d1b907ab410e722796a5a996b90c1a..ffc1ed09cbf0c3be3f4df17f39bd9136f544edfd 100644 --- a/test/CodeGen/ARM/divmod.ll +++ b/test/CodeGen/ARM/divmod.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 ; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=swift | FileCheck %s -check-prefix=SWIFT +; RUN: llc < %s -mtriple=thumbv7-apple-macho -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 ; rdar://12481395 diff --git a/test/CodeGen/ARM/execute-only-big-stack-frame.ll b/test/CodeGen/ARM/execute-only-big-stack-frame.ll index fb498a81e390a9e88f171297d92dad000f5a74c7..0fe67f9863a58bbea60839d871f1793e4b1f1c2b 100644 --- a/test/CodeGen/ARM/execute-only-big-stack-frame.ll +++ b/test/CodeGen/ARM/execute-only-big-stack-frame.ll @@ -10,10 +10,10 @@ define i8 @test_big_stack_frame() { ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-SUBW-ADDW: sub.w sp, sp, #65536 ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} -; CHECK-SUBW-ADDW: add.w [[REG1:r[0-9]+]], sp, #255 +; CHECK-SUBW-ADDW: add.w [[REG1:r[0-9]+|lr]], sp, #255 ; CHECK-SUBW-ADDW: add.w {{r[0-9]+}}, [[REG1]], #65280 ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} -; CHECK-SUBW-ADDW: add.w lr, sp, #61440 +; CHECK-SUBW-ADDW: add.w [[REGX:r[0-9]+|lr]], sp, #61440 ; CHECK-SUBW-ADDW-NOT: ldr {{r[0-9]+}}, .{{.*}} ; CHECK-SUBW-ADDW: add.w sp, sp, #65536 diff --git a/test/CodeGen/ARM/fastisel-thumb-litpool.ll b/test/CodeGen/ARM/fastisel-thumb-litpool.ll index aa9e7260fb2e662f5627309d16d93f72e608ff3e..53653a5a4f573f30d01bf9430f779c6c83144759 100644 --- a/test/CodeGen/ARM/fastisel-thumb-litpool.ll +++ b/test/CodeGen/ARM/fastisel-thumb-litpool.ll @@ -5,6 +5,7 @@ ; hence the CHECK-NOT. define i32 @test_thumb_ldrlit() minsize { +; CHECK-LABEL: test_thumb_ldrlit: ; CHECK: ldr r0, LCPI0_0 ; CHECK-NOT: ldr ret i32 12345678 diff --git a/test/CodeGen/ARM/fence-singlethread.ll b/test/CodeGen/ARM/fence-singlethread.ll new file mode 100644 index 0000000000000000000000000000000000000000..ec032ccac423c063e2a29a7fdb98da8614aee876 --- /dev/null +++ b/test/CodeGen/ARM/fence-singlethread.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-apple-ios %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -filetype=obj -o %t +; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=OBJ + +; OBJ-NOT: dmb + +define void @fence_singlethread() { +; CHECK-LABEL: fence_singlethread: +; CHECK-NOT: dmb +; CHECK: @ COMPILER BARRIER +; CHECK-NOT: dmb + + fence singlethread seq_cst + ret void +} diff --git a/test/CodeGen/ARM/fpoffset_overflow.mir b/test/CodeGen/ARM/fpoffset_overflow.mir new file mode 100644 index 0000000000000000000000000000000000000000..4f3524bf7d117b63763514c2dfd65d22d9dd6568 --- /dev/null +++ b/test/CodeGen/ARM/fpoffset_overflow.mir @@ -0,0 +1,96 @@ +# RUN: llc -o - %s -mtriple=thumbv7-- -run-pass=stack-protector -run-pass=prologepilog | FileCheck %s +--- +# This should trigger an emergency spill in the register scavenger because the +# frame offset into the large argument is too large. +# CHECK-LABEL: name: func0 +# CHECK: t2STRi12 killed [[SPILLED:%r[0-9]+]], %sp, 0, 14, _ :: (store 4 into %stack.0) +# CHECK: [[SPILLED]] = t2ADDri killed %sp, 4096, 14, _, _ +# CHECK: %sp = t2LDRi12 killed [[SPILLED]], 40, 14, _ :: (load 4) +# CHECK: [[SPILLED]] = t2LDRi12 %sp, 0, 14, _ :: (load 4 from %stack.0) +name: func0 +tracksRegLiveness: true +fixedStack: + - { id: 0, offset: 4084, size: 4, alignment: 4, isImmutable: true, + isAliased: false } + - { id: 1, offset: -12, size: 4096, alignment: 4, isImmutable: false, + isAliased: false } +body: | + bb.0: + %r0 = IMPLICIT_DEF + %r1 = IMPLICIT_DEF + %r2 = IMPLICIT_DEF + %r3 = IMPLICIT_DEF + %r4 = IMPLICIT_DEF + %r5 = IMPLICIT_DEF + %r6 = IMPLICIT_DEF + %r7 = IMPLICIT_DEF + %r8 = IMPLICIT_DEF + %r9 = IMPLICIT_DEF + %r10 = IMPLICIT_DEF + %r11 = IMPLICIT_DEF + %r12 = IMPLICIT_DEF + %lr = IMPLICIT_DEF + + %sp = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) + + KILL %r0 + KILL %r1 + KILL %r2 + KILL %r3 + KILL %r4 + KILL %r5 + KILL %r6 + KILL %r7 + KILL %r8 + KILL %r9 + KILL %r10 + KILL %r11 + KILL %r12 + KILL %lr +... +--- +# This should not trigger an emergency spill yet. +# CHECK-LABEL: name: func1 +# CHECK-NOT: t2STRi12 +# CHECK-NOT: t2ADDri +# CHECK: %r11 = t2LDRi12 %sp, 4092, 14, _ :: (load 4) +# CHECK-NOT: t2LDRi12 +name: func1 +tracksRegLiveness: true +fixedStack: + - { id: 0, offset: 4044, size: 4, alignment: 4, isImmutable: true, + isAliased: false } + - { id: 1, offset: -12, size: 4056, alignment: 4, isImmutable: false, + isAliased: false } +body: | + bb.0: + %r0 = IMPLICIT_DEF + %r1 = IMPLICIT_DEF + %r2 = IMPLICIT_DEF + %r3 = IMPLICIT_DEF + %r4 = IMPLICIT_DEF + %r5 = IMPLICIT_DEF + %r6 = IMPLICIT_DEF + %r8 = IMPLICIT_DEF + %r9 = IMPLICIT_DEF + %r10 = IMPLICIT_DEF + %r11 = IMPLICIT_DEF + %r12 = IMPLICIT_DEF + %lr = IMPLICIT_DEF + + %r11 = t2LDRi12 %fixed-stack.0, 0, 14, _ :: (load 4) + + KILL %r0 + KILL %r1 + KILL %r2 + KILL %r3 + KILL %r4 + KILL %r5 + KILL %r6 + KILL %r8 + KILL %r9 + KILL %r10 + KILL %r11 + KILL %r12 + KILL %lr +... diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll index a9e0d199705a8b094b5bd39675c5c4ac6dbbe0be..03c977614320f3050a3a7b862a52caab8404665d 100644 --- a/test/CodeGen/ARM/global-merge-external.ll +++ b/test/CodeGen/ARM/global-merge-external.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE ; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE ; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE +; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefix=CHECK-NO-MERGE @x = global i32 0, align 4 @y = global i32 0, align 4 diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll index a44c9721d6c13583589da09b1b6f5a7a2168eafc..1c8142e5ddd51af65483d1ee00bd253366345310 100644 --- a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll +++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll @@ -22,7 +22,7 @@ entry: ; for.body -> for.cond.backedge (100%) ; -> cond.false.i (0%) ; CHECK: BB#1: derived from LLVM BB %for.body -; CHECK: Successors according to CFG: BB#2(0x7ffffc00 / 0x80000000 = 100.00%) BB#4(0x00000400 / 0x80000000 = 0.00%) +; CHECK: Successors according to CFG: BB#2(0x80000000 / 0x80000000 = 100.00%) BB#4(0x00000001 / 0x80000000 = 0.00%) for.body: br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1 diff --git a/test/CodeGen/ARM/invalidated-save-point.ll b/test/CodeGen/ARM/invalidated-save-point.ll index 0ff153b6799d751d6edea3ce4956ebe13618850d..bb602308a1793fc537e4d98eb5c737943b9a1714 100644 --- a/test/CodeGen/ARM/invalidated-save-point.ll +++ b/test/CodeGen/ARM/invalidated-save-point.ll @@ -4,8 +4,8 @@ ; this point. Notably, if it isn't is will be invalid and reference a ; deleted block (%bb.-1.if.end) -; CHECK-NOT: savePoint: -; CHECK-NOT: restorePoint: +; CHECK: savePoint: '' +; CHECK: restorePoint: '' target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7" diff --git a/test/CodeGen/ARM/load-arm.ll b/test/CodeGen/ARM/load-arm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3807424ece81ab66165bccceafc91dcca64c08aa --- /dev/null +++ b/test/CodeGen/ARM/load-arm.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=arm %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7 %s -o - | FileCheck %s + +; We ended up feeding a deleted node back to TableGen when we converted "Off * +; 410" into "(Off * 205) << 1", where the multiplication already existed in the +; DAG. + +; CHECK-LABEL: addrmode_cse_mutation: +; CHECK: {{mul|muls}} [[OFFSET:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}} +; CHECK: {{ldrb|ldrb.w}} {{r[0-9]+}}, [r0, [[OFFSET]], lsl #3] +define i32 @addrmode_cse_mutation(i8* %base, i32 %count) { + %offset = mul i32 %count, 277288 + %ptr = getelementptr i8, i8* %base, i32 %offset + %val = load volatile i8, i8* %ptr + %res = mul i32 %count, 34661 + ret i32 %res +} + +; CHECK-LABEL: addrmode_cse_multi_use: +; CHECK-NOT: {{ldrb|ldrb.w}} {{r[0-9]+}}, [{{r[0-9]+}}, {{r[0-9]+}}, lsl #3] +define i32 @addrmode_cse_multi_use(i8* %base, i32 %count) { + %offset = mul i32 %count, 277288 + %ptr = getelementptr i8, i8* %base, i32 %offset + %val = load volatile i8, i8* %ptr + %res = mul i32 %count, 34661 + %res.1 = add i32 %res, %offset + ret i32 %res.1 +} diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index d874884dcb39309121c226bd6e092be0ac4fa6f4..b447497b270abfd5b65c70ec528377f5d651e85a 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -30,12 +30,13 @@ entry: define void @t1(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t1: -; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] -; CHECK: adds r0, #15 -; CHECK: adds r1, #15 +; CHECK: movs [[INC:r[0-9]+]], #15 +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1], [[INC]] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]] ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK-T1-LABEL: t1: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) ret void } @@ -43,13 +44,17 @@ entry: define void @t2(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t2: +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: add.w r3, r0, #16 +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]] ; CHECK: movw [[REG2:r[0-9]+]], #16716 ; CHECK: movt [[REG2:r[0-9]+]], #72 -; CHECK: str [[REG2]], [r0, #32] -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! +; CHECK: str [[REG2]], [r0] ; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3] +; CHECK-T1-LABEL: t2: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) ret void } @@ -61,6 +66,8 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! ; CHECK: vldr d{{[0-9]+}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0] +; CHECK-T1-LABEL: t3: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) ret void } @@ -71,6 +78,8 @@ entry: ; CHECK: vld1.64 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1] ; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]! ; CHECK: strh [[REG5:r[0-9]+]], [r0] +; CHECK-T1-LABEL: t4: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) ret void } @@ -86,10 +95,7 @@ entry: ; CHECK: movt [[REG7:r[0-9]+]], #22866 ; CHECK: str [[REG7]] ; CHECK-T1-LABEL: t5: -; CHECK-T1: movs [[TREG3:r[0-9]]], -; CHECK-T1: strb [[TREG3]], -; CHECK-T1: movs [[TREG4:r[0-9]]], -; CHECK-T1: strb [[TREG4]], +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) ret void } diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll index f6f8d5623509e3475dfbd0e74979474104b04e44..b2bd257701d3f2a705cb2ee778656c11cdd7fc29 100644 --- a/test/CodeGen/ARM/memset-inline.ll +++ b/test/CodeGen/ARM/memset-inline.ll @@ -1,22 +1,36 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s -check-prefix=CHECK-7A +; RUN: llc < %s -mtriple=thumbv6m -pre-RA-sched=source -disable-post-ra -mattr=+strict-align | FileCheck %s -check-prefix=CHECK-6M define void @t1(i8* nocapture %c) nounwind optsize { entry: -; CHECK-LABEL: t1: -; CHECK: movs r1, #0 -; CHECK: strd r1, r1, [r0] -; CHECK: str r1, [r0, #8] +; CHECK-7A-LABEL: t1: +; CHECK-7A: movs r1, #0 +; CHECK-7A: strd r1, r1, [r0] +; CHECK-7A: str r1, [r0, #8] +; CHECK-6M-LABEL: t1: +; CHECK-6M: movs r1, #0 +; CHECK-6M: str r1, [r0] +; CHECK-6M: str r1, [r0, #4] +; CHECK-6M: str r1, [r0, #8] call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) ret void } define void @t2() nounwind ssp { entry: -; CHECK-LABEL: t2: -; CHECK: add.w r1, r0, #10 -; CHECK: vmov.i32 {{q[0-9]+}}, #0x0 -; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK-7A-LABEL: t2: +; CHECK-7A: vmov.i32 {{q[0-9]+}}, #0x0 +; CHECK-7A: movs r1, #10 +; CHECK-7A: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1 +; CHECK-7A: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2] +; CHECK-6M-LABEL: t2: +; CHECK-6M: movs [[REG:r[0-9]+]], #0 +; CHECK-6M: str [[REG]], [sp, #20] +; CHECK-6M: str [[REG]], [sp, #16] +; CHECK-6M: str [[REG]], [sp, #12] +; CHECK-6M: str [[REG]], [sp, #8] +; CHECK-6M: str [[REG]], [sp, #4] +; CHECK-6M: str [[REG]], [sp] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) @@ -24,6 +38,56 @@ entry: ret void } +define void @t3(i8* %p) { +entry: +; CHECK-7A-LABEL: t3: +; CHECK-7A: muls [[REG:r[0-9]+]], +; CHECK-7A: str [[REG]], +; CHECK-6M-LABEL: t3: +; CHECK-6M-NOT: muls +; CHECK-6M: strb [[REG:r[0-9]+]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = trunc i32 %i to i8 + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false) + call void @something(i8* %p) + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 255 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @t4(i8* %p) { +entry: +; CHECK-7A-LABEL: t4: +; CHECK-7A: muls [[REG:r[0-9]+]], +; CHECK-7A: str [[REG]], +; CHECK-6M-LABEL: t4: +; CHECK-6M: muls [[REG:r[0-9]+]], +; CHECK-6M: strh [[REG]], +; CHECK-6M: strh [[REG]], + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = trunc i32 %i to i8 + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false) + call void @something(i8* %p) + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 255 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + declare void @something(i8*) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll index 330252a90d7c1113ae07e3ecba349d058d713c9e..53f8b8d15042d390bce754101e04e7bdd9ffb2f6 100644 --- a/test/CodeGen/ARM/misched-copy-arm.ll +++ b/test/CodeGen/ARM/misched-copy-arm.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=misched -arm-atomic-cfg-tidy=0 %s -o - 2>&1 | FileCheck %s +; RUN: llc -mtriple=thumb-eabi -mcpu=swift -pre-RA-sched=source -join-globalcopies -enable-misched -verify-misched -debug-only=machine-scheduler -arm-atomic-cfg-tidy=0 %s -o - 2>&1 | FileCheck %s ; ; Loop counter copies should be eliminated. ; There is also a MUL here, but we don't care where it is scheduled. diff --git a/test/CodeGen/ARM/misched-fp-basic.ll b/test/CodeGen/ARM/misched-fp-basic.ll index 27ad2cec34fd63eeafb9f2d085ef43a71e7d23af..2f672b0cb540febd3471f6ca4de799b81a1fc303 100644 --- a/test/CodeGen/ARM/misched-fp-basic.ll +++ b/test/CodeGen/ARM/misched-fp-basic.ll @@ -1,9 +1,9 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > \ ; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 -; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > \ ; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT -; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > \ ; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 ; ; Check the latency of instructions for processors with sched-models diff --git a/test/CodeGen/ARM/misched-int-basic-thumb2.mir b/test/CodeGen/ARM/misched-int-basic-thumb2.mir index 86ef1e26f63688844440dc4d4326dfd927ca1854..32d1e03d9a1bb10730485ea936ae59a726b74bf1 100644 --- a/test/CodeGen/ARM/misched-int-basic-thumb2.mir +++ b/test/CodeGen/ARM/misched-int-basic-thumb2.mir @@ -1,10 +1,10 @@ # Basic machine sched model test for Thumb2 int instructions # RUN: llc -o /dev/null %s -mtriple=thumbv7-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \ -# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +# RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT # RUN: llc -o /dev/null %s -mtriple=thumbv7--eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \ -# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +# RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 # RUN: llc -o /dev/null %s -mtriple=thumbv8r-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \ -# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +# RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 # REQUIRES: asserts --- | ; ModuleID = 'foo.ll' diff --git a/test/CodeGen/ARM/misched-int-basic.mir b/test/CodeGen/ARM/misched-int-basic.mir index f237c0a07b2edb71cf48699b84c3d5a955f06499..d5231269d732c244921b8e7b51d095fb9e2339a9 100644 --- a/test/CodeGen/ARM/misched-int-basic.mir +++ b/test/CodeGen/ARM/misched-int-basic.mir @@ -1,9 +1,9 @@ # RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \ -# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +# RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT # RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \ -# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +# RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 # RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \ -# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +# RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 # REQUIRES: asserts --- | ; ModuleID = 'foo.ll' diff --git a/test/CodeGen/ARM/sat-arith.ll b/test/CodeGen/ARM/sat-arith.ll deleted file mode 100644 index 4844ed1bd21e253e042073434784ff2c1b4ac763..0000000000000000000000000000000000000000 --- a/test/CodeGen/ARM/sat-arith.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s -check-prefix=ARM -check-prefix=CHECK -; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s -check-prefix=THUMB -check-prefix=CHECK - -; CHECK-LABEL: qadd -define i32 @qadd() nounwind { -; CHECK-DAG: mov{{s?}} [[R0:.*]], #8 -; CHECK-DAG: mov{{s?}} [[R1:.*]], #128 -; CHECK-ARM: qadd [[R0]], [[R1]], [[R0]] -; CHECK-THRUMB: qadd [[R0]], [[R0]], [[R1]] - %tmp = call i32 @llvm.arm.qadd(i32 128, i32 8) - ret i32 %tmp -} - -; CHECK-LABEL: qsub -define i32 @qsub() nounwind { -; CHECK-DAG: mov{{s?}} [[R0:.*]], #8 -; CHECK-DAG: mov{{s?}} [[R1:.*]], #128 -; CHECK-ARM: qsub [[R0]], [[R1]], [[R0]] -; CHECK-THRUMB: qadd [[R0]], [[R1]], [[R0]] - %tmp = call i32 @llvm.arm.qsub(i32 128, i32 8) - ret i32 %tmp -} - -; upper-bound of the immediate argument -; CHECK-LABEL: ssat1 -define i32 @ssat1() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: ssat [[R1:.*]], #32, [[R0]] - %tmp = call i32 @llvm.arm.ssat(i32 128, i32 32) - ret i32 %tmp -} - -; lower-bound of the immediate argument -; CHECK-LABEL: ssat2 -define i32 @ssat2() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: ssat [[R1:.*]], #1, [[R0]] - %tmp = call i32 @llvm.arm.ssat(i32 128, i32 1) - ret i32 %tmp -} - -; upper-bound of the immediate argument -; CHECK-LABEL: usat1 -define i32 @usat1() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: usat [[R1:.*]], #31, [[R0]] - %tmp = call i32 @llvm.arm.usat(i32 128, i32 31) - ret i32 %tmp -} - -; lower-bound of the immediate argument -; CHECK-LABEL: usat2 -define i32 @usat2() nounwind { -; CHECK: mov{{s?}} [[R0:.*]], #128 -; CHECK: usat [[R1:.*]], #0, [[R0]] - %tmp = call i32 @llvm.arm.usat(i32 128, i32 0) - ret i32 %tmp -} - -declare i32 @llvm.arm.qadd(i32, i32) nounwind -declare i32 @llvm.arm.qsub(i32, i32) nounwind -declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone -declare i32 @llvm.arm.usat(i32, i32) nounwind readnone diff --git a/test/CodeGen/ARM/sincos.ll b/test/CodeGen/ARM/sincos.ll index 5be0044ddbd3548dd6316876a10419a3e2917e79..42a834d24b3e1d12f3d7bae57cd650c845af301b 100644 --- a/test/CodeGen/ARM/sincos.ll +++ b/test/CodeGen/ARM/sincos.ll @@ -1,10 +1,12 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios6 -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT ; RUN: llc < %s -mtriple=armv7-apple-ios7 -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS -; RUN: llc < %s -mtriple=armv7-linux-gnu -mcpu=cortex-a8 | FileCheck %s --check-prefix=NOOPT-GNU +; RUN: llc < %s -mtriple=armv7-linux-gnu -mcpu=cortex-a8 | FileCheck %s --check-prefix=SINCOS-GNU ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a8 \ ; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=SINCOS-GNU -; Combine sin / cos into a single call. +; Combine sin / cos into a single call unless they may write errno (as +; captured by readnone attrbiute, controlled by clang -fmath-errno +; setting). ; rdar://12856873 define float @test1(float %x) nounwind { @@ -19,12 +21,28 @@ entry: ; NOOPT: bl _sinf ; NOOPT: bl _cosf -; NOOPT-GNU-LABEL: test1: -; NOOPT-GNU: bl sinf -; NOOPT-GNU: bl cosf + %call = tail call float @sinf(float %x) readnone + %call1 = tail call float @cosf(float %x) readnone + %add = fadd float %call, %call1 + ret float %add +} + +define float @test1_errno(float %x) nounwind { +entry: +; SINCOS-LABEL: test1_errno: +; SINCOS: bl _sinf +; SINCOS: bl _cosf - %call = tail call float @sinf(float %x) nounwind readnone - %call1 = tail call float @cosf(float %x) nounwind readnone +; SINCOS-GNU-LABEL: test1_errno: +; SINCOS-GNU: bl sinf +; SINCOS-GNU: bl cosf + +; NOOPT-LABEL: test1_errno: +; NOOPT: bl _sinf +; NOOPT: bl _cosf + + %call = tail call float @sinf(float %x) + %call1 = tail call float @cosf(float %x) %add = fadd float %call, %call1 ret float %add } @@ -41,16 +59,33 @@ entry: ; NOOPT: bl _sin ; NOOPT: bl _cos -; NOOPT-GNU-LABEL: test2: -; NOOPT-GNU: bl sin -; NOOPT-GNU: bl cos - %call = tail call double @sin(double %x) nounwind readnone - %call1 = tail call double @cos(double %x) nounwind readnone + %call = tail call double @sin(double %x) readnone + %call1 = tail call double @cos(double %x) readnone + %add = fadd double %call, %call1 + ret double %add +} + +define double @test2_errno(double %x) nounwind { +entry: +; SINCOS-LABEL: test2_errno: +; SINCOS: bl _sin +; SINCOS: bl _cos + +; SINCOS-GNU-LABEL: test2_errno: +; SINCOS-GNU: bl sin +; SINCOS-GNU: bl cos + +; NOOPT-LABEL: test2_errno: +; NOOPT: bl _sin +; NOOPT: bl _cos + + %call = tail call double @sin(double %x) + %call1 = tail call double @cos(double %x) %add = fadd double %call, %call1 ret double %add } -declare float @sinf(float) readonly -declare double @sin(double) readonly -declare float @cosf(float) readonly -declare double @cos(double) readonly +declare float @sinf(float) +declare double @sin(double) +declare float @cosf(float) +declare double @cos(double) diff --git a/test/CodeGen/ARM/single-issue-r52.mir b/test/CodeGen/ARM/single-issue-r52.mir index 6c95f7603e6e0007c0e449bf384673e419d572d6..1eba074dafb3ceb4b5b8354bfea50114921ed988 100644 --- a/test/CodeGen/ARM/single-issue-r52.mir +++ b/test/CodeGen/ARM/single-issue-r52.mir @@ -1,5 +1,5 @@ -# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-topdown 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TOPDOWN -# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched -misched-bottomup 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOTTOMUP +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=machine-scheduler -misched-topdown 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TOPDOWN +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=machine-scheduler -misched-bottomup 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOTTOMUP # REQUIRES: asserts --- | ; ModuleID = 'foo.ll' diff --git a/test/CodeGen/ARM/swifterror.ll b/test/CodeGen/ARM/swifterror.ll index 78764202f62730ad6699ea6ef74970e6f0819651..3fd57c592bfb693cb36cc15518d074acd034f36c 100644 --- a/test/CodeGen/ARM/swifterror.ll +++ b/test/CodeGen/ARM/swifterror.ll @@ -528,3 +528,31 @@ entry: tail call void @acallee(i8* null) ret void } + + +declare swiftcc void @foo2(%swift_error** swifterror) + +; Make sure we properly assign registers during fast-isel. +; CHECK-O0-LABEL: testAssign +; CHECK-O0: mov r8, #0 +; CHECK-O0: bl _foo2 +; CHECK-O0: str r8, [s[[STK:p.*]]] +; CHECK-O0: ldr r0, [s[[STK]]] +; CHECK-O0: pop + +; CHECK-APPLE-LABEL: testAssign +; CHECK-APPLE: mov r8, #0 +; CHECK-APPLE: bl _foo2 +; CHECK-APPLE: mov r0, r8 + +define swiftcc %swift_error* @testAssign(i8* %error_ref) { +entry: + %error_ptr = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr + call swiftcc void @foo2(%swift_error** swifterror %error_ptr) + br label %a + +a: + %error = load %swift_error*, %swift_error** %error_ptr + ret %swift_error* %error +} diff --git a/test/CodeGen/ARM/v6-jumptable-clobber.mir b/test/CodeGen/ARM/v6-jumptable-clobber.mir index 0e9bc42565f3bbdeb9636d86034ae3217e193fc9..6577ef84867130cbeff1d933a10b80ae19c0eb6a 100644 --- a/test/CodeGen/ARM/v6-jumptable-clobber.mir +++ b/test/CodeGen/ARM/v6-jumptable-clobber.mir @@ -190,7 +190,6 @@ name: foo alignment: 1 exposesReturnsTwice: false -noVRegs: true legalized: false regBankSelected: false selected: false @@ -289,7 +288,6 @@ body: | name: bar alignment: 1 exposesReturnsTwice: false -noVRegs: true legalized: false regBankSelected: false selected: false diff --git a/test/CodeGen/ARM/v6m-smul-with-overflow.ll b/test/CodeGen/ARM/v6m-smul-with-overflow.ll new file mode 100644 index 0000000000000000000000000000000000000000..6e8a7041de2b987b7fb49eae412dbdd10e1fdba0 --- /dev/null +++ b/test/CodeGen/ARM/v6m-smul-with-overflow.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s + +define i1 @signed_multiplication_did_overflow(i32, i32) { +; CHECK-LABEL: signed_multiplication_did_overflow: +entry-block: + %2 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %0, i32 %1) + %3 = extractvalue { i32, i1 } %2, 1 + ret i1 %3 + +; CHECK: mov r2, r1 +; CHECK: asrs r1, r0, #31 +; CHECK: asrs r3, r2, #31 +; CHECK: bl __aeabi_lmul +} + +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll index 38c6d6c28aedfb87030b5fca2077b7683d6e428c..4295b32d25fc714997c7e6c9cdb93c28209eb357 100644 --- a/test/CodeGen/ARM/vabs.ll +++ b/test/CodeGen/ARM/vabs.ll @@ -8,6 +8,22 @@ define <8 x i8> @vabss8(<8 x i8>* %A) nounwind { ret <8 x i8> %tmp2 } +define <8 x i8> @vabss8_fold(<8 x i8>* %A) nounwind { +; CHECK-LABEL: vabss8_fold: +; CHECK: vldr d16, .LCPI1_0 +; CHECK: .LCPI1_0: +; CHECK-NEXT: .byte 128 @ 0x80 +; CHECK-NEXT: .byte 127 @ 0x7f +; CHECK-NEXT: .byte 1 @ 0x1 +; CHECK-NEXT: .byte 0 @ 0x0 +; CHECK-NEXT: .byte 1 @ 0x1 +; CHECK-NEXT: .byte 127 @ 0x7f +; CHECK-NEXT: .byte 128 @ 0x80 +; CHECK-NEXT: .byte 1 @ 0x1 + %tmp1 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> ) + ret <8 x i8> %tmp1 +} + define <4 x i16> @vabss16(<4 x i16>* %A) nounwind { ;CHECK-LABEL: vabss16: ;CHECK: vabs.s16 @@ -16,6 +32,18 @@ define <4 x i16> @vabss16(<4 x i16>* %A) nounwind { ret <4 x i16> %tmp2 } +define <4 x i16> @vabss16_fold() nounwind { +; CHECK-LABEL: vabss16_fold: +; CHECK: vldr d16, .LCPI3_0 +; CHECK: .LCPI3_0: +; CHECK-NEXT: .short 32768 @ 0x8000 +; CHECK-NEXT: .short 32767 @ 0x7fff +; CHECK-NEXT: .short 255 @ 0xff +; CHECK-NEXT: .short 32768 @ 0x8000 + %tmp1 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> ) + ret <4 x i16> %tmp1 +} + define <2 x i32> @vabss32(<2 x i32>* %A) nounwind { ;CHECK-LABEL: vabss32: ;CHECK: vabs.s32 @@ -24,6 +52,16 @@ define <2 x i32> @vabss32(<2 x i32>* %A) nounwind { ret <2 x i32> %tmp2 } +define <2 x i32> @vabss32_fold() nounwind { +; CHECK-LABEL: vabss32_fold: +; CHECK: vldr d16, .LCPI5_0 +; CHECK: .LCPI5_0: +; CHECK-NEXT: .long 2147483647 @ 0x7fffffff +; CHECK-NEXT: .long 2147483648 @ 0x80000000 + %tmp1 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> ) + ret <2 x i32> %tmp1 +} + define <2 x float> @vabsf32(<2 x float>* %A) nounwind { ;CHECK-LABEL: vabsf32: ;CHECK: vabs.f32 diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll index db9bc6ccdd0c838b7108c32f73170bf9e39626f4..0a7f7698fa88c425efe6403a56e48e316a16993b 100644 --- a/test/CodeGen/ARM/vbits.ll +++ b/test/CodeGen/ARM/vbits.ll @@ -1,8 +1,14 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 %s -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a8 | FileCheck %s define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_andi8: -;CHECK: vand +; CHECK-LABEL: v_andi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = and <8 x i8> %tmp1, %tmp2 @@ -10,8 +16,13 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_andi16: -;CHECK: vand +; CHECK-LABEL: v_andi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = and <4 x i16> %tmp1, %tmp2 @@ -19,8 +30,13 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_andi32: -;CHECK: vand +; CHECK-LABEL: v_andi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = and <2 x i32> %tmp1, %tmp2 @@ -28,8 +44,13 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_andi64: -;CHECK: vand +; CHECK-LABEL: v_andi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vand d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = and <1 x i64> %tmp1, %tmp2 @@ -37,8 +58,14 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_andQi8: -;CHECK: vand +; CHECK-LABEL: v_andQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = and <16 x i8> %tmp1, %tmp2 @@ -46,8 +73,14 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_andQi16: -;CHECK: vand +; CHECK-LABEL: v_andQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = and <8 x i16> %tmp1, %tmp2 @@ -55,8 +88,14 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_andQi32: -;CHECK: vand +; CHECK-LABEL: v_andQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = and <4 x i32> %tmp1, %tmp2 @@ -64,8 +103,14 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_andQi64: -;CHECK: vand +; CHECK-LABEL: v_andQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vand q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = and <2 x i64> %tmp1, %tmp2 @@ -73,8 +118,13 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_bici8: -;CHECK: vbic +; CHECK-LABEL: v_bici8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -83,8 +133,13 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_bici16: -;CHECK: vbic +; CHECK-LABEL: v_bici16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > @@ -93,8 +148,13 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_bici32: -;CHECK: vbic +; CHECK-LABEL: v_bici32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > @@ -103,8 +163,13 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_bici64: -;CHECK: vbic +; CHECK-LABEL: v_bici64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vbic d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > @@ -113,8 +178,14 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_bicQi8: -;CHECK: vbic +; CHECK-LABEL: v_bicQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -123,8 +194,14 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_bicQi16: -;CHECK: vbic +; CHECK-LABEL: v_bicQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -133,8 +210,14 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_bicQi32: -;CHECK: vbic +; CHECK-LABEL: v_bicQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > @@ -143,8 +226,14 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_bicQi64: -;CHECK: vbic +; CHECK-LABEL: v_bicQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vbic q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > @@ -153,8 +242,13 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_eori8: -;CHECK: veor +; CHECK-LABEL: v_eori8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = xor <8 x i8> %tmp1, %tmp2 @@ -162,8 +256,13 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_eori16: -;CHECK: veor +; CHECK-LABEL: v_eori16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = xor <4 x i16> %tmp1, %tmp2 @@ -171,8 +270,13 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_eori32: -;CHECK: veor +; CHECK-LABEL: v_eori32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = xor <2 x i32> %tmp1, %tmp2 @@ -180,8 +284,13 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_eori64: -;CHECK: veor +; CHECK-LABEL: v_eori64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: veor d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = xor <1 x i64> %tmp1, %tmp2 @@ -189,8 +298,14 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_eorQi8: -;CHECK: veor +; CHECK-LABEL: v_eorQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = xor <16 x i8> %tmp1, %tmp2 @@ -198,8 +313,14 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_eorQi16: -;CHECK: veor +; CHECK-LABEL: v_eorQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = xor <8 x i16> %tmp1, %tmp2 @@ -207,8 +328,14 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_eorQi32: -;CHECK: veor +; CHECK-LABEL: v_eorQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = xor <4 x i32> %tmp1, %tmp2 @@ -216,8 +343,14 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_eorQi64: -;CHECK: veor +; CHECK-LABEL: v_eorQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: veor q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = xor <2 x i64> %tmp1, %tmp2 @@ -225,72 +358,113 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind { -;CHECK-LABEL: v_mvni8: -;CHECK: vmvn +; CHECK-LABEL: v_mvni8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > ret <8 x i8> %tmp2 } define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind { -;CHECK-LABEL: v_mvni16: -;CHECK: vmvn +; CHECK-LABEL: v_mvni16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 > ret <4 x i16> %tmp2 } define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind { -;CHECK-LABEL: v_mvni32: -;CHECK: vmvn +; CHECK-LABEL: v_mvni32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 > ret <2 x i32> %tmp2 } define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind { -;CHECK-LABEL: v_mvni64: -;CHECK: vmvn +; CHECK-LABEL: v_mvni64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = xor <1 x i64> %tmp1, < i64 -1 > ret <1 x i64> %tmp2 } define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind { -;CHECK-LABEL: v_mvnQi8: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > ret <16 x i8> %tmp2 } define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind { -;CHECK-LABEL: v_mvnQi16: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > ret <8 x i16> %tmp2 } define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind { -;CHECK-LABEL: v_mvnQi32: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 > ret <4 x i32> %tmp2 } define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind { -;CHECK-LABEL: v_mvnQi64: -;CHECK: vmvn +; CHECK-LABEL: v_mvnQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 > ret <2 x i64> %tmp2 } define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_orri8: -;CHECK: vorr +; CHECK-LABEL: v_orri8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = or <8 x i8> %tmp1, %tmp2 @@ -298,8 +472,13 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_orri16: -;CHECK: vorr +; CHECK-LABEL: v_orri16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = or <4 x i16> %tmp1, %tmp2 @@ -307,8 +486,13 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_orri32: -;CHECK: vorr +; CHECK-LABEL: v_orri32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = or <2 x i32> %tmp1, %tmp2 @@ -316,8 +500,13 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_orri64: -;CHECK: vorr +; CHECK-LABEL: v_orri64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorr d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = or <1 x i64> %tmp1, %tmp2 @@ -325,8 +514,14 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_orrQi8: -;CHECK: vorr +; CHECK-LABEL: v_orrQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = or <16 x i8> %tmp1, %tmp2 @@ -334,8 +529,14 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_orrQi16: -;CHECK: vorr +; CHECK-LABEL: v_orrQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = or <8 x i16> %tmp1, %tmp2 @@ -343,8 +544,14 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_orrQi32: -;CHECK: vorr +; CHECK-LABEL: v_orrQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = or <4 x i32> %tmp1, %tmp2 @@ -352,8 +559,14 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_orrQi64: -;CHECK: vorr +; CHECK-LABEL: v_orrQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorr q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = or <2 x i64> %tmp1, %tmp2 @@ -361,8 +574,13 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: v_orni8: -;CHECK: vorn +; CHECK-LABEL: v_orni8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -371,8 +589,13 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: v_orni16: -;CHECK: vorn +; CHECK-LABEL: v_orni16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > @@ -381,8 +604,13 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: v_orni32: -;CHECK: vorn +; CHECK-LABEL: v_orni32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > @@ -391,8 +619,13 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { -;CHECK-LABEL: v_orni64: -;CHECK: vorn +; CHECK-LABEL: v_orni64: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vorn d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <1 x i64>, <1 x i64>* %A %tmp2 = load <1 x i64>, <1 x i64>* %B %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > @@ -401,8 +634,14 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: v_ornQi8: -;CHECK: vorn +; CHECK-LABEL: v_ornQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > @@ -411,8 +650,14 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: v_ornQi16: -;CHECK: vorn +; CHECK-LABEL: v_ornQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > @@ -421,8 +666,14 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: v_ornQi32: -;CHECK: vorn +; CHECK-LABEL: v_ornQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > @@ -431,8 +682,14 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { -;CHECK-LABEL: v_ornQi64: -;CHECK: vorn +; CHECK-LABEL: v_ornQi64: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vorn q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i64>, <2 x i64>* %B %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > @@ -441,8 +698,13 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { -;CHECK-LABEL: vtsti8: -;CHECK: vtst.8 +; CHECK-LABEL: vtsti8: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtst.8 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp2 = load <8 x i8>, <8 x i8>* %B %tmp3 = and <8 x i8> %tmp1, %tmp2 @@ -452,8 +714,13 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { -;CHECK-LABEL: vtsti16: -;CHECK: vtst.16 +; CHECK-LABEL: vtsti16: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtst.16 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B %tmp3 = and <4 x i16> %tmp1, %tmp2 @@ -463,8 +730,13 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { -;CHECK-LABEL: vtsti32: -;CHECK: vtst.32 +; CHECK-LABEL: vtsti32: +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0] +; CHECK-NEXT: vtst.32 d16, d17, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B %tmp3 = and <2 x i32> %tmp1, %tmp2 @@ -474,8 +746,14 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { -;CHECK-LABEL: vtstQi8: -;CHECK: vtst.8 +; CHECK-LABEL: vtstQi8: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtst.8 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = load <16 x i8>, <16 x i8>* %B %tmp3 = and <16 x i8> %tmp1, %tmp2 @@ -485,8 +763,14 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { -;CHECK-LABEL: vtstQi16: -;CHECK: vtst.16 +; CHECK-LABEL: vtstQi16: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtst.16 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B %tmp3 = and <8 x i16> %tmp1, %tmp2 @@ -496,8 +780,14 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { -;CHECK-LABEL: vtstQi32: -;CHECK: vtst.32 +; CHECK-LABEL: vtstQi32: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r1] +; CHECK-NEXT: vld1.64 {d18, d19}, [r0] +; CHECK-NEXT: vtst.32 q8, q9, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i32>, <4 x i32>* %B %tmp3 = and <4 x i32> %tmp1, %tmp2 @@ -508,19 +798,24 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_orrimm: -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vorr +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vorr.i32 d16, #0x1000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp3 = or <8 x i8> %tmp1, ret <8 x i8> %tmp3 } define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { -; CHECK: v_orrimmQ -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vorr +; CHECK-LABEL: v_orrimmQ: +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vorr.i32 q8, #0x1000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp3 = or <16 x i8> %tmp1, ret <16 x i8> %tmp3 @@ -528,9 +823,11 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { ; CHECK-LABEL: v_bicimm: -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vbic +; CHECK: @ BB#0: +; CHECK-NEXT: vldr d16, [r0] +; CHECK-NEXT: vbic.i32 d16, #0xff000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %tmp1 = load <8 x i8>, <8 x i8>* %A %tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > ret <8 x i8> %tmp3 @@ -538,10 +835,29 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind { define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind { ; CHECK-LABEL: v_bicimmQ: -; CHECK-NOT: vmov -; CHECK-NOT: vmvn -; CHECK: vbic +; CHECK: @ BB#0: +; CHECK-NEXT: vld1.64 {d16, d17}, [r0] +; CHECK-NEXT: vbic.i32 q8, #0xff000000 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 > ret <16 x i8> %tmp3 } + +define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind { +; CHECK-LABEL: hidden_not_v4i32: +; CHECK: @ BB#0: +; CHECK-NEXT: vmov d19, r2, r3 +; CHECK-NEXT: vmov.i32 q8, #0x6 +; CHECK-NEXT: vmov d18, r0, r1 +; CHECK-NEXT: vbic q8, q8, q9 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr + %xor = xor <4 x i32> %x, + %and = and <4 x i32> %xor, + ret <4 x i32> %and +} + diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll index 81b22ee12cdd24c7b6d2a8a1067d0ec990ad2c31..c08ed81d042a4ed17030277d0671fcfd747aed23 100644 --- a/test/CodeGen/ARM/vcombine.ll +++ b/test/CodeGen/ARM/vcombine.ll @@ -99,7 +99,9 @@ define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind { define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind { ; CHECK: vget_high8 ; CHECK-NOT: vst -; CHECK-LE: vmov r0, r1, d17 +; CHECK-LE-NOT: vld1.64 {d16, d17}, [r0] +; CHECK-LE: vldr d16, [r0, #8] +; CHECK-LE: vmov r0, r1, d16 ; CHECK-BE: vmov r1, r0, d16 %tmp1 = load <16 x i8>, <16 x i8>* %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll index ed734723a86d34012e04308fed5e35e26c700548..4f7ebc938d4c7f8c5c3a316eeeab678d3ab681b8 100644 --- a/test/CodeGen/ARM/vector-load.ll +++ b/test/CodeGen/ARM/vector-load.ll @@ -253,11 +253,22 @@ define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) { } ; CHECK-LABEL: test_silly_load: -; CHECK: ldr {{r[0-9]+}}, [r0, #24] -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]! -; CHECK: vldr d{{[0-9]+}}, [r0] +; CHECK: vldr d{{[0-9]+}}, [r0, #16] +; CHECK: movs r1, #24 +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1 +; CHECK: ldr {{r[0-9]+}}, [r0] define void @test_silly_load(<28 x i8>* %addr) { load volatile <28 x i8>, <28 x i8>* %addr ret void } + +define <4 x i32>* @test_vld1_immoffset(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) { +; CHECK-LABEL: test_vld1_immoffset: +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0], [[INC]] + %val = load <4 x i32>, <4 x i32>* %ptr.in + store <4 x i32> %val, <4 x i32>* %ptr.out + %next = getelementptr <4 x i32>, <4 x i32>* %ptr.in, i32 2 + ret <4 x i32>* %next +} diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll index 161bbf1d0fde85c16417d8776be3dc00b76bddd8..e8c1a78a9113b992e4e107607ad104cb50191c63 100644 --- a/test/CodeGen/ARM/vector-store.ll +++ b/test/CodeGen/ARM/vector-store.ll @@ -256,3 +256,13 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) store <4 x i8>* %inc, <4 x i8>** %ptr ret void } + +define <4 x i32>* @test_vst1_1reg(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) { +; CHECK-LABEL: test_vst1_1reg: +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1], [[INC]] + %val = load <4 x i32>, <4 x i32>* %ptr.in + store <4 x i32> %val, <4 x i32>* %ptr.out + %next = getelementptr <4 x i32>, <4 x i32>* %ptr.out, i32 2 + ret <4 x i32>* %next +} diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index e44e757a316933a93d555bd0a1f1476c1f2caf61..5742dc314978f5974547106c763cb106c4e1e8bf 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -199,10 +199,10 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind { define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind { ; CHECK-LABEL: test_undef: ; CHECK: @ BB#0: -; CHECK-NEXT: vld1.64 {d16, d17}, [r1] -; CHECK-NEXT: vld1.64 {d18, d19}, [r0] -; CHECK-NEXT: vzip.16 d19, d16 -; CHECK-NEXT: vmov r0, r1, d19 +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d17, [r0, #8] +; CHECK-NEXT: vzip.16 d17, d16 +; CHECK-NEXT: vmov r0, r1, d17 ; CHECK-NEXT: mov pc, lr %tmp1 = load <8 x i16>, <8 x i16>* %A %tmp2 = load <8 x i16>, <8 x i16>* %B diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll index c6d5747f35093646bba1ba36a55539f462b10d0b..71ca0f7915242249046dc9e67af6b5e04680c782 100644 --- a/test/CodeGen/ARM/vlddup.ll +++ b/test/CodeGen/ARM/vlddup.ll @@ -310,6 +310,23 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind { ret <4 x i16> %tmp5 } +define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind { +;CHECK-LABEL: vld2dupi16_odd_update: +;CHECK: mov [[INC:r[0-9]+]], #6 +;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]] + %A = load i16*, i16** %ptr + %A2 = bitcast i16* %A to i8* + %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) + %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = add <4 x i16> %tmp2, %tmp4 + %tmp6 = getelementptr i16, i16* %A, i32 3 + store i16* %tmp6, i16** %ptr + ret <4 x i16> %tmp5 +} + define <2 x i32> @vld2dupi32(i8* %A) nounwind { ;CHECK-LABEL: vld2dupi32: ;Check the alignment value. Max for this instruction is 64 bits: diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 2c14bc2d8f4ebbef63c6e9764e9994fbd38013c3..866641f3fbbd95ffe920036fff809d30f62c2c88 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -150,6 +150,22 @@ define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind { ret <2 x i32> %tmp5 } +define <2 x i32> @vld2lanei32_odd_update(i32** %ptr, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vld2lanei32_odd_update: +;CHECK: mov [[INC:r[0-9]+]], #12 +;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}], [[INC]] + %A = load i32*, i32** %ptr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 + %tmp6 = getelementptr i32, i32* %A, i32 3 + store i32* %tmp6, i32** %ptr + ret <2 x i32> %tmp5 +} + define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: vld2lanef: ;CHECK: vld2.32 diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll index 1aa23597cf499bb79b302177aca452093def29a3..3409d37a31f4c6929ffc1f1dc67c959b17b7c02e 100644 --- a/test/CodeGen/ARM/vpadd.ll +++ b/test/CodeGen/ARM/vpadd.ll @@ -485,6 +485,26 @@ define <2 x i16> @fromExtendingExtractVectorElt_i16(<4 x i16> %in) { ret <2 x i16> %x } +; And <2 x i8> to <2 x i32> +define <2 x i8> @fromExtendingExtractVectorElt_2i8(<8 x i8> %in) { +; CHECK-LABEL: fromExtendingExtractVectorElt_2i8: +; CHECK: vadd.i32 + %tmp1 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> + %tmp2 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> + %x = add <2 x i8> %tmp2, %tmp1 + ret <2 x i8> %x +} + +define <2 x i16> @fromExtendingExtractVectorElt_2i16(<8 x i16> %in) { +; CHECK-LABEL: fromExtendingExtractVectorElt_2i16: +; CHECK: vadd.i32 + %tmp1 = shufflevector <8 x i16> %in, <8 x i16> undef, <2 x i32> + %tmp2 = shufflevector <8 x i16> %in, <8 x i16> undef, <2 x i32> + %x = add <2 x i16> %tmp2, %tmp1 + ret <2 x i16> %x +} + + declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll index e4dd572a41b4d50be3f17d7dd45aa756994d9c82..2e0718877e96dd2faa351d7d2ba1c782614c9431 100644 --- a/test/CodeGen/ARM/vtbl.ll +++ b/test/CodeGen/ARM/vtbl.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -verify-machineinstrs | FileCheck %s %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } diff --git a/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll b/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll index 93c3cb14fb73848e00bb33c8f02dafe67af5594e..5e3c45c3454d850ca25a80434c6ddaaf0bebb16f 100644 --- a/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll +++ b/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll @@ -25,7 +25,13 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" } ; CHECK: .p2align 4 ; CHECK-NEXT: .long {{.*}}Lxray_synthetic_0 +; CHECK-NEXT: .long {{.*}}Lxray_fn_idx_synth_0 ; CHECK-NEXT: .section {{.*}}xray_instr_map{{.*}} ; CHECK-LABEL: Lxray_synthetic_0: ; CHECK: .long {{.*}}Lxray_sled_0 ; CHECK: .long {{.*}}Lxray_sled_1 +; CHECK-LABEL: Lxray_synthetic_end0: +; CHECK: .section {{.*}}xray_fn_idx{{.*}} +; CHECK-LABEL: Lxray_fn_idx_synth_0: +; CHECK: .long {{.*}}Lxray_synthetic_0 +; CHECK-NEXT: .long {{.*}}Lxray_synthetic_end0 diff --git a/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll b/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll index d14590b88679463bfd76422d70d5e494c5947384..739151fbdd5e541f4e4b85d458b0b16e5de1ae2e 100644 --- a/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll +++ b/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll @@ -25,7 +25,14 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" } ; CHECK: .p2align 4 ; CHECK-NEXT: .long {{.*}}Lxray_synthetic_0 +; CHECK-NEXT: .long {{.*}}Lxray_fn_idx_synth_0 ; CHECK-NEXT: .section {{.*}}xray_instr_map{{.*}} ; CHECK-LABEL: Lxray_synthetic_0: ; CHECK: .long {{.*}}Lxray_sled_0 ; CHECK: .long {{.*}}Lxray_sled_1 +; CHECK-LABEL: Lxray_synthetic_end0: +; CHECK: .section {{.*}}xray_fn_idx{{.*}} +; CHECK-LABEL: Lxray_fn_idx_synth_0: +; CHECK: .long {{.*}}xray_synthetic_0 +; CHECK-NEXT: .long {{.*}}xray_synthetic_end0 + diff --git a/test/CodeGen/AVR/alloca.ll b/test/CodeGen/AVR/alloca.ll index 579573c0a133db586b4ba7d54337c7d29f253373..37c0e62b55fdef5a901881941434eaf40e82dc50 100644 --- a/test/CodeGen/AVR/alloca.ll +++ b/test/CodeGen/AVR/alloca.ll @@ -45,14 +45,14 @@ entry: define i16 @alloca_write(i16 %x) { entry: ; CHECK-LABEL: alloca_write: +; Small offset here +; CHECK: std Y+23, {{.*}} +; CHECK: std Y+24, {{.*}} ; Big offset here ; CHECK: adiw r28, 57 ; CHECK: std Y+62, {{.*}} ; CHECK: std Y+63, {{.*}} ; CHECK: sbiw r28, 57 -; Small offset here -; CHECK: std Y+23, {{.*}} -; CHECK: std Y+24, {{.*}} %p = alloca [15 x i16] %k = alloca [14 x i16] %arrayidx = getelementptr inbounds [15 x i16], [15 x i16]* %p, i16 0, i16 45 diff --git a/test/CodeGen/AVR/brind.ll b/test/CodeGen/AVR/brind.ll index f92038d1082944ba8651cc4e846db56f9c133fa8..ec8262e84a952dd593e2d65ff44c631915bceaf6 100644 --- a/test/CodeGen/AVR/brind.ll +++ b/test/CodeGen/AVR/brind.ll @@ -4,8 +4,6 @@ define i8 @brind(i8 %p) { ; CHECK-LABEL: brind: -; CHECK: ld r30 -; CHECK: ldd r31 ; CHECK: ijmp entry: %idxprom = sext i8 %p to i16 diff --git a/test/CodeGen/AVR/call.ll b/test/CodeGen/AVR/call.ll index 58bffd3a6787029c2d28ed8247739bb7fb6a21e7..bc6cb198a9e5b5936db6e2e7f61eaca57ad53532 100644 --- a/test/CodeGen/AVR/call.ll +++ b/test/CodeGen/AVR/call.ll @@ -30,9 +30,9 @@ define i8 @calli8_reg() { define i8 @calli8_stack() { ; CHECK-LABEL: calli8_stack: -; CHECK: ldi [[REG1:r[0-9]+]], 11 +; CHECK: ldi [[REG1:r[0-9]+]], 10 ; CHECK: push [[REG1]] -; CHECK: ldi [[REG1]], 10 +; CHECK: ldi [[REG1]], 11 ; CHECK: push [[REG1]] ; CHECK: call foo8_3 %result1 = call i8 @foo8_3(i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11) @@ -52,14 +52,14 @@ define i16 @calli16_reg() { define i16 @calli16_stack() { ; CHECK-LABEL: calli16_stack: -; CHECK: ldi [[REG1:r[0-9]+]], 10 -; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] ; CHECK: ldi [[REG1:r[0-9]+]], 9 ; CHECK: ldi [[REG2:r[0-9]+]], 2 ; CHECK: push [[REG2]] ; CHECK: push [[REG1]] +; CHECK: ldi [[REG1:r[0-9]+]], 10 +; CHECK: ldi [[REG2:r[0-9]+]], 2 +; CHECK: push [[REG2]] +; CHECK: push [[REG1]] ; CHECK: call foo16_2 %result1 = call i16 @foo16_2(i16 512, i16 513, i16 514, i16 515, i16 516, i16 517, i16 518, i16 519, i16 520, i16 521, i16 522) ret i16 %result1 @@ -82,14 +82,14 @@ define i32 @calli32_reg() { define i32 @calli32_stack() { ; CHECK-LABEL: calli32_stack: -; CHECK: ldi [[REG1:r[0-9]+]], 15 -; CHECK: ldi [[REG2:r[0-9]+]], 2 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] ; CHECK: ldi [[REG1:r[0-9]+]], 64 ; CHECK: ldi [[REG2:r[0-9]+]], 66 ; CHECK: push [[REG2]] ; CHECK: push [[REG1]] +; CHECK: ldi [[REG1:r[0-9]+]], 15 +; CHECK: ldi [[REG2:r[0-9]+]], 2 +; CHECK: push [[REG2]] +; CHECK: push [[REG1]] ; CHECK: call foo32_2 %result1 = call i32 @foo32_2(i32 1, i32 2, i32 3, i32 4, i32 34554432) ret i32 %result1 @@ -112,14 +112,15 @@ define i64 @calli64_reg() { define i64 @calli64_stack() { ; CHECK-LABEL: calli64_stack: -; CHECK: ldi [[REG1:r[0-9]+]], 31 -; CHECK: ldi [[REG2:r[0-9]+]], 242 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] + ; CHECK: ldi [[REG1:r[0-9]+]], 76 ; CHECK: ldi [[REG2:r[0-9]+]], 73 ; CHECK: push [[REG2]] ; CHECK: push [[REG1]] +; CHECK: ldi [[REG1:r[0-9]+]], 31 +; CHECK: ldi [[REG2:r[0-9]+]], 242 +; CHECK: push [[REG2]] +; CHECK: push [[REG1]] ; CHECK: ldi [[REG1:r[0-9]+]], 155 ; CHECK: ldi [[REG2:r[0-9]+]], 88 ; CHECK: push [[REG2]] diff --git a/test/CodeGen/AVR/calling-conv/c/stack.ll b/test/CodeGen/AVR/calling-conv/c/stack.ll index 00ff7d1acd8022dbd78a9ed6855af98c3a2dbf7b..52b6427476ab159860010682ebbafd324be39933 100644 --- a/test/CodeGen/AVR/calling-conv/c/stack.ll +++ b/test/CodeGen/AVR/calling-conv/c/stack.ll @@ -11,15 +11,15 @@ define void @ret_void_args_i64_i64_i32(i64 %a, i64 %b, i32 %c) { ; CHECK-NEXT: in r29, 62 ; Load the top two bytes from the 32-bit int. - ; CHECK-NEXT: ldd r24, Y+7 - ; CHECK-NEXT: ldd r25, Y+8 + ; CHECK-NEXT: ldd r24, Y+5 + ; CHECK-NEXT: ldd r25, Y+6 ; Store the top two bytes of the 32-bit int to memory. ; CHECK-NEXT: sts 7, r25 ; CHECK-NEXT: sts 6, r24 ; Load the bottom two bytes from the 32-bit int. - ; CHECK-NEXT: ldd r24, Y+5 - ; CHECK-NEXT: ldd r25, Y+6 + ; CHECK-NEXT: ldd r24, Y+3 + ; CHECK-NEXT: ldd r25, Y+4 ; Store the bottom two bytes of the 32-bit int to memory. ; CHECK-NEXT: sts 5, r25 ; CHECK-NEXT: sts 4, r24 diff --git a/test/CodeGen/AVR/directmem.ll b/test/CodeGen/AVR/directmem.ll index a97e712ed625e08ed0388e90ce84dcf1e1876b06..032263a9d657e415405b07beb51a30039dd0c577 100644 --- a/test/CodeGen/AVR/directmem.ll +++ b/test/CodeGen/AVR/directmem.ll @@ -33,10 +33,10 @@ define i8 @global8_load() { define void @array8_store() { ; CHECK-LABEL: array8_store: -; CHECK: ldi [[REG1:r[0-9]+]], 1 -; CHECK: sts char.array, [[REG1]] ; CHECK: ldi [[REG2:r[0-9]+]], 2 ; CHECK: sts char.array+1, [[REG2]] +; CHECK: ldi [[REG1:r[0-9]+]], 1 +; CHECK: sts char.array, [[REG1]] ; CHECK: ldi [[REG:r[0-9]+]], 3 ; CHECK: sts char.array+2, [[REG]] store i8 1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @char.array, i32 0, i64 0) @@ -83,14 +83,18 @@ define i16 @global16_load() { define void @array16_store() { ; CHECK-LABEL: array16_store: -; CHECK: ldi [[REG1:r[0-9]+]], 187 -; CHECK: ldi [[REG2:r[0-9]+]], 170 -; CHECK: sts int.array+1, [[REG2]] -; CHECK: sts int.array, [[REG1]] + ; CHECK: ldi [[REG1:r[0-9]+]], 204 ; CHECK: ldi [[REG2:r[0-9]+]], 170 ; CHECK: sts int.array+3, [[REG2]] ; CHECK: sts int.array+2, [[REG1]] + +; CHECK: ldi [[REG1:r[0-9]+]], 187 +; CHECK: ldi [[REG2:r[0-9]+]], 170 +; CHECK: sts int.array+1, [[REG2]] +; CHECK: sts int.array, [[REG1]] + + ; CHECK: ldi [[REG1:r[0-9]+]], 221 ; CHECK: ldi [[REG2:r[0-9]+]], 170 ; CHECK: sts int.array+5, [[REG2]] @@ -148,14 +152,6 @@ define i32 @global32_load() { define void @array32_store() { ; CHECK-LABEL: array32_store: -; CHECK: ldi [[REG1:r[0-9]+]], 27 -; CHECK: ldi [[REG2:r[0-9]+]], 172 -; CHECK: sts long.array+3, [[REG2]] -; CHECK: sts long.array+2, [[REG1]] -; CHECK: ldi [[REG1:r[0-9]+]], 68 -; CHECK: ldi [[REG2:r[0-9]+]], 13 -; CHECK: sts long.array+1, [[REG2]] -; CHECK: sts long.array, [[REG1]] ; CHECK: ldi [[REG1:r[0-9]+]], 102 ; CHECK: ldi [[REG2:r[0-9]+]], 85 ; CHECK: sts long.array+7, [[REG2]] @@ -164,6 +160,14 @@ define void @array32_store() { ; CHECK: ldi [[REG2:r[0-9]+]], 119 ; CHECK: sts long.array+5, [[REG2]] ; CHECK: sts long.array+4, [[REG1]] +; CHECK: ldi [[REG1:r[0-9]+]], 27 +; CHECK: ldi [[REG2:r[0-9]+]], 172 +; CHECK: sts long.array+3, [[REG2]] +; CHECK: sts long.array+2, [[REG1]] +; CHECK: ldi [[REG1:r[0-9]+]], 68 +; CHECK: ldi [[REG2:r[0-9]+]], 13 +; CHECK: sts long.array+1, [[REG2]] +; CHECK: sts long.array, [[REG1]] ; CHECK: ldi [[REG1:r[0-9]+]], 170 ; CHECK: ldi [[REG2:r[0-9]+]], 153 ; CHECK: sts long.array+11, [[REG2]] diff --git a/test/CodeGen/AVR/dynalloca.ll b/test/CodeGen/AVR/dynalloca.ll index 13f503015f9f3344bf1d62f28c35b8060c7cc1dc..6aa776e2de6f92d1f7c08187c8b5c6a2c8d32995 100644 --- a/test/CodeGen/AVR/dynalloca.ll +++ b/test/CodeGen/AVR/dynalloca.ll @@ -69,9 +69,9 @@ define void @dynalloca2(i16 %x) { ; SP restore ; CHECK: in r0, 63 ; CHECK-NEXT: cli -; CHECK-NEXT: out 62, r29 +; CHECK-NEXT: out 62, r7 ; CHECK-NEXT: out 63, r0 -; CHECK-NEXT: out 61, r28 +; CHECK-NEXT: out 61, r6 %vla = alloca i16, i16 %x call void @foo2(i16* %vla, i64 0, i64 0, i64 0) ret void diff --git a/test/CodeGen/AVR/inline-asm/inline-asm.ll b/test/CodeGen/AVR/inline-asm/inline-asm.ll index 88d0c3af2e88511f3898d54d886755697becadb5..26f90806781ea4381eaa817b41954bf94ef4c9ca 100644 --- a/test/CodeGen/AVR/inline-asm/inline-asm.ll +++ b/test/CodeGen/AVR/inline-asm/inline-asm.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=avr -mattr=movw -no-integrated-as | FileCheck %s +; XFAIL: * ; CHECK-LABEL: no_operands: define void @no_operands() { diff --git a/test/CodeGen/AVR/inline-asm/multibyte.ll b/test/CodeGen/AVR/inline-asm/multibyte.ll deleted file mode 100644 index a7c8f6e75f0fbee7fe851f852f3fe35bff577ae1..0000000000000000000000000000000000000000 --- a/test/CodeGen/AVR/inline-asm/multibyte.ll +++ /dev/null @@ -1,135 +0,0 @@ -; RUN: llc < %s -march=avr -no-integrated-as | FileCheck %s -; XFAIL: * - -; Multibyte references - -; CHECK-LABEL: multibyte_i16 -define void @multibyte_i16(i16 %a) { -entry: -; CHECK: instr r24 r25 - call void asm sideeffect "instr ${0:A} ${0:B}", "r"(i16 %a) -; CHECK: instr r25 r24 - call void asm sideeffect "instr ${0:B} ${0:A}", "r"(i16 %a) - ret void -} - -; CHECK-LABEL: multibyte_i32 -define void @multibyte_i32(i32 %a) { -entry: -; CHECK: instr r22 r23 r24 r25 - call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "r"(i32 %a) -; CHECK: instr r25 r24 r23 r22 - call void asm sideeffect "instr ${0:D} ${0:C} ${0:B} ${0:A}", "r"(i32 %a) - ret void -} - -; CHECK-LABEL: multibyte_alternative_name -define void @multibyte_alternative_name(i16* %p) { -entry: -; CHECK: instr Z - call void asm sideeffect "instr ${0:a}", "e" (i16* %p) - ret void -} - -; CHECK-LABEL: multibyte_a_i32 -define void @multibyte_a_i32() { -entry: - %a = alloca i32 - %0 = load i32, i32* %a -; CHECK: instr r20 r21 r22 r23 - call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "a"(i32 %0) - ret void -} - -@c = internal global i32 0 - -; CHECK-LABEL: multibyte_b_i32 -define void @multibyte_b_i32() { -entry: - %0 = load i32, i32* @c -; CHECK: instr r28 r29 r30 r31 - call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "b"(i32 %0) - ret void -} - -; CHECK-LABEL: multibyte_d_i32 -define void @multibyte_d_i32() { -entry: - %a = alloca i32 - %0 = load i32, i32* %a -; CHECK: instr r18 r19 r24 r25 - call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "d"(i32 %0) - ret void -} - -; CHECK-LABEL: multibyte_e_i32 -define void @multibyte_e_i32() { -entry: - %a = alloca i32 - %0 = load i32, i32* %a -; CHECK: instr r26 r27 r30 r31 - call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "e"(i32 %0) - ret void -} - -; CHECK-LABEL: multibyte_l_i32 -define void @multibyte_l_i32() { -entry: - %a = alloca i32 - %0 = load i32, i32* %a -; CHECK: instr r12 r13 r14 r15 - call void asm sideeffect "instr ${0:A} ${0:B} ${0:C} ${0:D}", "l"(i32 %0) - ret void -} - -; CHECK-LABEL: multibyte_a_i16 -define void @multibyte_a_i16() { -entry: - %a = alloca i16 - %0 = load i16, i16* %a -; CHECK: instr r22 r23 - call void asm sideeffect "instr ${0:A} ${0:B}", "a"(i16 %0) - ret void -} - -; CHECK-LABEL: multibyte_b_i16 -define void @multibyte_b_i16() { -entry: - %a = alloca i16 - %0 = load i16, i16* %a -; CHECK: instr r30 r31 - call void asm sideeffect "instr ${0:A} ${0:B}", "b"(i16 %0) - ret void -} - -; CHECK-LABEL: multibyte_d_i16 -define void @multibyte_d_i16() { -entry: - %a = alloca i16 - %0 = load i16, i16* %a -; CHECK: instr r24 r25 - call void asm sideeffect "instr ${0:A} ${0:B}", "d"(i16 %0) - ret void -} - -; CHECK-LABEL: multibyte_e_i16 -define void @multibyte_e_i16() { -entry: - %a = alloca i16 - %0 = load i16, i16* %a -; CHECK: instr r30 r31 - call void asm sideeffect "instr ${0:A} ${0:B}", "e"(i16 %0) - ret void -} - -; CHECK-LABEL: multibyte_l_i16 -define void @multibyte_l_i16() { -entry: - %a = alloca i16 - %0 = load i16, i16* %a -; CHECK: instr r14 r15 - call void asm sideeffect "instr ${0:A} ${0:B}", "l"(i16 %0) - ret void -} - - diff --git a/test/CodeGen/AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir b/test/CodeGen/AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir new file mode 100644 index 0000000000000000000000000000000000000000..b19e44e29fb661c4197f5c6f6d6a9886af93703a --- /dev/null +++ b/test/CodeGen/AVR/pseudo/LDDWRdPtrQ-same-src-dst.mir @@ -0,0 +1,35 @@ +# RUN: llc -O0 %s -o - -march=avr | FileCheck %s + +# This test checks the expansion of the 16-bit 'LDDWRdPtrQ' pseudo instruction. +# +# This test ensures that the pseudo expander can correctly handle the case +# where we are expanding a 16-bit LDD instruction where the source and +# destination registers are the same. +# +# The instruction itself is earlyclobber and so ISel will never produce an +# instruction like this, but the stack slot loading can and will. + +--- | + target triple = "avr--" + define void @test_lddwrdptrq() { + entry: + ret void + } +... + +--- +name: test_lddwrdptrq +tracksRegLiveness: true +body: | + bb.0.entry: + + ; CHECK-LABEL: test_lddwrdptrq + + ; CHECK: ldd [[SCRATCH:r[0-9]+]], Z+10 + ; CHECK-NEXT: push [[SCRATCH]] + ; CHECK-NEXT: ldd [[SCRATCH]], Z+11 + ; CHECK-NEXT: mov r31, [[SCRATCH]] + ; CHECK-NEXT: pop r30 + + early-clobber %r31r30 = LDDWRdPtrQ undef %r31r30, 10 +... diff --git a/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir b/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir new file mode 100644 index 0000000000000000000000000000000000000000..3e7fdcd400d21b309e1f1922fe3c91f6ae8e5e15 --- /dev/null +++ b/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir @@ -0,0 +1,29 @@ +# RUN: llc -O0 %s -o - | FileCheck %s + +# This test checks the expansion of the 16-bit LDWRdPtr pseudo instruction. + +--- | + target triple = "avr--" + define void @test_ldwrdptr() { + entry: + ret void + } +... + +--- +name: test_ldwrdptr +tracksRegLiveness: true +body: | + bb.0.entry: + + ; CHECK-LABEL: test_ldwrdptr + + ; CHECK: ld [[SCRATCH:r[0-9]+]], Z + ; CHECK-NEXT: push [[SCRATCH]] + ; CHECK-NEXT: ldd [[SCRATCH]], Z+1 + ; CHECK-NEXT: mov r31, [[SCRATCH]] + ; CHECK-NEXT: pop r30 + + early-clobber %r31r30 = LDWRdPtr undef %r31r30 +... + diff --git a/test/CodeGen/AVR/pseudo/expand-lddw-dst-src-same.mir b/test/CodeGen/AVR/pseudo/expand-lddw-dst-src-same.mir deleted file mode 100644 index 8427a2bfb4edf2b3efaa668e5ff04ba9aded8c26..0000000000000000000000000000000000000000 --- a/test/CodeGen/AVR/pseudo/expand-lddw-dst-src-same.mir +++ /dev/null @@ -1,35 +0,0 @@ -# RUN: llc -O0 %s -o - -march=avr | FileCheck %s - -# This test ensures that the pseudo expander can correctly handle the case -# where we are expanding a 16-bit LDD instruction where the source and -# destination registers are the same. -# -# The instruction itself is earlyclobber and so ISel will never produce an -# instruction like this, but the stack slot loading can and will. - ---- | - target triple = "avr--" - - define void @test_lddw() { - entry: - ret void - } - -... ---- -name: test_lddw -tracksRegLiveness: true -stack: - - { id: 0, type: spill-slot, offset: -4, size: 1, alignment: 1, callee-saved-register: '%r28' } -body: | - bb.0.entry: - liveins: %r28, %r29 - - ; CHECK-LABEL: test_lddw - - ; CHECK: ldd [[TMPREG:r[0-9]+]], Y+0 - ; CHECK-NEXT: mov r28, [[TMPREG]] - ; CHECK-NEXT: ldd [[TMPREG]], Y+1 - ; CHECK-NEXT: mov r29, [[TMPREG]] - dead early-clobber %r29r28 = LDDWRdYQ killed %r29r28, 0 -... diff --git a/test/CodeGen/AVR/return.ll b/test/CodeGen/AVR/return.ll index d57f435fd11c65d99a14555ee6a3a3ff320afc02..1f80576af2885e472db558ddf5f14532550310aa 100644 --- a/test/CodeGen/AVR/return.ll +++ b/test/CodeGen/AVR/return.ll @@ -96,14 +96,14 @@ define i64 @return64_arg2(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: return64_arg2: ; CHECK: push r28 ; CHECK: push r29 -; CHECK: ldd r18, Y+5 -; CHECK: ldd r19, Y+6 -; CHECK: ldd r20, Y+7 -; CHECK: ldd r21, Y+8 -; CHECK: ldd r22, Y+9 -; CHECK: ldd r23, Y+10 -; CHECK: ldd r24, Y+11 -; CHECK: ldd r25, Y+12 +; CHECK: ldd r18, Y+3 +; CHECK: ldd r19, Y+4 +; CHECK: ldd r20, Y+5 +; CHECK: ldd r21, Y+6 +; CHECK: ldd r22, Y+7 +; CHECK: ldd r23, Y+8 +; CHECK: ldd r24, Y+9 +; CHECK: ldd r25, Y+10 ; CHECK: pop r29 ; CHECK: pop r28 ret i64 %z @@ -113,10 +113,10 @@ define i32 @return64_trunc(i32 %a, i32 %b, i32 %c, i64 %d) { ; CHECK-LABEL: return64_trunc: ; CHECK: push r28 ; CHECK: push r29 -; CHECK: ldd r22, Y+5 -; CHECK: ldd r23, Y+6 -; CHECK: ldd r24, Y+7 -; CHECK: ldd r25, Y+8 +; CHECK: ldd r22, Y+3 +; CHECK: ldd r23, Y+4 +; CHECK: ldd r24, Y+5 +; CHECK: ldd r25, Y+6 ; CHECK: pop r29 ; CHECK: pop r28 %result = trunc i64 %d to i32 diff --git a/test/CodeGen/AVR/rot.ll b/test/CodeGen/AVR/rot.ll new file mode 100644 index 0000000000000000000000000000000000000000..a7b77d97ba698cb3c5f39a135fcd556956483e5b --- /dev/null +++ b/test/CodeGen/AVR/rot.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -march=avr | FileCheck %s + +; Bit rotation tests. + +; CHECK-LABEL: rol8: +define i8 @rol8(i8 %val, i8 %amt) { + ; CHECK: andi r22, 7 + + ; CHECK-NEXT: cpi r22, 0 + ; CHECK-NEXT: breq LBB0_2 + +; CHECK-NEXT: LBB0_1: + ; CHECK-NEXT: rol r24 + ; CHECK-NEXT: subi r22, 1 + ; CHECK-NEXT: brne LBB0_1 + +; CHECK-NEXT:LBB0_2: + ; CHECK-NEXT: ret + %mod = urem i8 %amt, 8 + + %inv = sub i8 8, %mod + %parta = shl i8 %val, %mod + %partb = lshr i8 %val, %inv + + %rotl = or i8 %parta, %partb + + ret i8 %rotl +} + + +; CHECK-LABEL: ror8: +define i8 @ror8(i8 %val, i8 %amt) { + ; CHECK: andi r22, 7 + + ; CHECK-NEXT: cpi r22, 0 + ; CHECK-NEXT: breq LBB1_2 + +; CHECK-NEXT: LBB1_1: + ; CHECK-NEXT: ror r24 + ; CHECK-NEXT: subi r22, 1 + ; CHECK-NEXT: brne LBB1_1 + +; CHECK-NEXT:LBB1_2: + ; CHECK-NEXT: ret + %mod = urem i8 %amt, 8 + + %inv = sub i8 8, %mod + %parta = lshr i8 %val, %mod + %partb = shl i8 %val, %inv + + %rotr = or i8 %parta, %partb + + ret i8 %rotr +} + diff --git a/test/CodeGen/AVR/select-mbb-placement-bug.ll b/test/CodeGen/AVR/select-mbb-placement-bug.ll new file mode 100644 index 0000000000000000000000000000000000000000..ca7ec1ab831ce38a8a988aa3872c526d62926b46 --- /dev/null +++ b/test/CodeGen/AVR/select-mbb-placement-bug.ll @@ -0,0 +1,35 @@ +; RUN: llc -mcpu=atmega328p < %s -march=avr | FileCheck %s + +; CHECK-LABEL: loopy +define internal fastcc void @loopy() { + +; In this case, when we expand `Select8`/`Select16`, we should be +; replacing the existing MBB instead of adding a new one. +; +; https://github.com/avr-rust/rust/issues/49 + +; CHECK: LBB0_1: +; CHECK: LBB0_2: +; CHECK-NOT: LBB0_3: +start: + br label %bb7.preheader + +bb7.preheader: ; preds = %bb10, %start + %i = phi i8 [ 0, %start ], [ %j, %bb10 ] + %j = phi i8 [ 1, %start ], [ %next, %bb10 ] + br label %bb10 + +bb4: ; preds = %bb10 + ret void + +bb10: ; preds = %bb7.preheader + tail call fastcc void @observe(i8 %i, i8 1) + %0 = icmp ult i8 %j, 20 + %1 = zext i1 %0 to i8 + %next = add i8 %j, %1 + br i1 %0, label %bb7.preheader, label %bb4 + +} + +declare void @observe(i8, i8); + diff --git a/test/CodeGen/AVR/varargs.ll b/test/CodeGen/AVR/varargs.ll index b35ce4c0f7aef52da1d33f6b160cf04f0595a692..6f727cda582d54978a22a562d91f5c6b732c0c77 100644 --- a/test/CodeGen/AVR/varargs.ll +++ b/test/CodeGen/AVR/varargs.ll @@ -7,12 +7,12 @@ declare void @llvm.va_end(i8*) define i16 @varargs1(i8* nocapture %x, ...) { ; CHECK-LABEL: varargs1: ; CHECK: movw r20, r28 -; CHECK: subi r20, 215 +; CHECK: subi r20, 217 ; CHECK: sbci r21, 255 ; CHECK: movw r24, r28 ; CHECK: adiw r24, 3 -; CHECK: ldd r22, Y+39 -; CHECK: ldd r23, Y+40 +; CHECK: ldd r22, Y+37 +; CHECK: ldd r23, Y+38 ; CHECK: call %buffer = alloca [32 x i8] %ap = alloca i8* @@ -40,14 +40,14 @@ define i16 @varargs2(i8* nocapture %x, ...) { declare void @var1223(i16, ...) define void @varargcall() { ; CHECK-LABEL: varargcall: -; CHECK: ldi [[REG1:r[0-9]+]], 191 -; CHECK: ldi [[REG2:r[0-9]+]], 223 -; CHECK: push [[REG2]] -; CHECK: push [[REG1]] ; CHECK: ldi [[REG1:r[0-9]+]], 189 ; CHECK: ldi [[REG2:r[0-9]+]], 205 ; CHECK: push [[REG2]] ; CHECK: push [[REG1]] +; CHECK: ldi [[REG1:r[0-9]+]], 191 +; CHECK: ldi [[REG2:r[0-9]+]], 223 +; CHECK: push [[REG2]] +; CHECK: push [[REG1]] ; CHECK: ldi [[REG1:r[0-9]+]], 205 ; CHECK: ldi [[REG2:r[0-9]+]], 171 ; CHECK: push [[REG2]] diff --git a/test/CodeGen/BPF/dwarfdump.ll b/test/CodeGen/BPF/dwarfdump.ll index 7ae64dfb5682701c929ee62087091dbde1c9e6e6..6a6913011e644ec25439e717625c8c91d9dcc5ae 100644 --- a/test/CodeGen/BPF/dwarfdump.ll +++ b/test/CodeGen/BPF/dwarfdump.ll @@ -1,5 +1,7 @@ ; RUN: llc -O2 -march=bpfel %s -o %t -filetype=obj ; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s +; RUN: llc -O2 -march=bpfeb %s -o %t -filetype=obj +; RUN: llvm-dwarfdump -debug-dump=line %t | FileCheck %s source_filename = "testprog.c" target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128" diff --git a/test/CodeGen/BPF/mem_offset.ll b/test/CodeGen/BPF/mem_offset.ll new file mode 100644 index 0000000000000000000000000000000000000000..2b86e44ae592b5c583da9186b06f1bac489f9a16 --- /dev/null +++ b/test/CodeGen/BPF/mem_offset.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=bpfel -show-mc-encoding < %s | FileCheck %s + +; Function Attrs: nounwind +define i32 @bpf_prog1(i8* nocapture readnone) local_unnamed_addr #0 { +; CHECK: r1 += -1879113726 # encoding: [0x07,0x01,0x00,0x00,0x02,0x00,0xff,0x8f] +; CHECK: r0 = *(u64 *)(r1 + 0) # encoding: [0x79,0x10,0x00,0x00,0x00,0x00,0x00,0x00] + %2 = alloca i64, align 8 + %3 = bitcast i64* %2 to i8* + store volatile i64 590618314553, i64* %2, align 8 + %4 = load volatile i64, i64* %2, align 8 + %5 = add i64 %4, -1879113726 + %6 = inttoptr i64 %5 to i64* + %7 = load i64, i64* %6, align 8 + %8 = trunc i64 %7 to i32 + ret i32 %8 +} + diff --git a/test/CodeGen/BPF/mem_offset_be.ll b/test/CodeGen/BPF/mem_offset_be.ll new file mode 100644 index 0000000000000000000000000000000000000000..e5e352783d70193809804a97ada245c81d1b49a9 --- /dev/null +++ b/test/CodeGen/BPF/mem_offset_be.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=bpfeb -show-mc-encoding < %s | FileCheck %s + +; Function Attrs: nounwind +define i32 @bpf_prog1(i8* nocapture readnone) local_unnamed_addr #0 { +; CHECK: r1 = 590618314553ll # encoding: [0x18,0x10,0x00,0x00,0x83,0x98,0x47,0x39,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x89] +; CHECK: r1 += -1879113726 # encoding: [0x07,0x10,0x00,0x00,0x8f,0xff,0x00,0x02] +; CHECK: r0 = *(u64 *)(r1 + 0) # encoding: [0x79,0x01,0x00,0x00,0x00,0x00,0x00,0x00] + %2 = alloca i64, align 8 + %3 = bitcast i64* %2 to i8* + store volatile i64 590618314553, i64* %2, align 8 + %4 = load volatile i64, i64* %2, align 8 + %5 = add i64 %4, -1879113726 + %6 = inttoptr i64 %5 to i64* + %7 = load i64, i64* %6, align 8 + %8 = trunc i64 %7 to i32 + ret i32 %8 +} + diff --git a/test/CodeGen/BPF/reloc.ll b/test/CodeGen/BPF/reloc.ll new file mode 100644 index 0000000000000000000000000000000000000000..75dbebf311e3dffa7f1da1eaa3e61e0aeb539d94 --- /dev/null +++ b/test/CodeGen/BPF/reloc.ll @@ -0,0 +1,43 @@ +; RUN: llc -march=bpfel -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s + +%struct.bpf_context = type { i64, i64, i64, i64, i64, i64, i64 } +%struct.sk_buff = type { i64, i64, i64, i64, i64, i64, i64 } +%struct.net_device = type { i64, i64, i64, i64, i64, i64, i64 } + +@bpf_prog1.devname = private unnamed_addr constant [3 x i8] c"lo\00", align 1 +@bpf_prog1.fmt = private unnamed_addr constant [15 x i8] c"skb %x dev %x\0A\00", align 1 + +; Function Attrs: norecurse +define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/net/netif_receive_skb" { + %devname = alloca [3 x i8], align 1 + %fmt = alloca [15 x i8], align 1 + %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false) + %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0 + %3 = load i64, i64* %2, align 8 + %4 = inttoptr i64 %3 to %struct.sk_buff* + %5 = getelementptr inbounds %struct.sk_buff, %struct.sk_buff* %4, i64 0, i32 2 + %6 = bitcast i64* %5 to i8* + %7 = call i8* inttoptr (i64 4 to i8* (i8*)*)(i8* %6) #1 + %8 = call i32 inttoptr (i64 9 to i32 (i8*, i8*, i32)*)(i8* %7, i8* %1, i32 2) #1 + %9 = icmp eq i32 %8, 0 + br i1 %9, label %10, label %13 + +;